How to use snowballstemmer - 10 common examples

To help you get started, we’ve selected a few snowballstemmer examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github shibukawa / snowball_py / sample / testapp.py View on Github external
def main():
    argv = sys.argv
    if len(argv) < 2:
        usage()
        return
    algorithm = 'english'
    if len(argv) > 2:
        algorithm = argv[1]
        argv = argv[2:]
    else:
        argv = argv[1:]
    stemmer = snowballstemmer.stemmer(algorithm)
    splitter = re.compile(r"[\s\.-]")
    for arg in argv:
        for word in splitter.split(arg):
            if word == '':
                continue
            original = word.lower()
            print(original + " -> " + stemmer.stemWord(original))
main()
github sphinx-doc / sphinx / sphinx / search / tr.py View on Github external
def init(self, options: Dict) -> None:
        self.stemmer = snowballstemmer.stemmer('turkish')
github sphinx-doc / sphinx / sphinx / search / sv.py View on Github external
def init(self, options: Dict) -> None:
        self.stemmer = snowballstemmer.stemmer('swedish')
github sphinx-doc / sphinx / sphinx / search / ro.py View on Github external
def init(self, options: Dict) -> None:
        self.stemmer = snowballstemmer.stemmer('romanian')
github sphinx-doc / sphinx / sphinx / search / hu.py View on Github external
def init(self, options: Dict) -> None:
        self.stemmer = snowballstemmer.stemmer('hungarian')
github takahiro-777 / nlp_tutorial / nlp100_Python / 72 / main.py View on Github external
'twas,us,wants,was,we,were,what,when,where,which,while,who,whom,why,'
	'will,with,would,yet,you,your').lower().split(',')


def is_stopword(str):
	'''文字がストップワードかどうかを返す
	大小文字は同一視する

	戻り値:
	ストップワードならTrue、違う場合はFalse
	'''
	return str.lower() in stop_words


# 素性抽出
stemmer = snowballstemmer.stemmer('english')
word_counter = Counter()

with codecs.open(fname_sentiment, 'r', fencoding) as file_in:
	for line in file_in:
		for word in line[3:].split(' '):		# line[3:]で極性ラベル除去

			# 前後の空白文字除去
			word = word.strip()

			# ストップワード除去
			if is_stopword(word):
				continue

			# ステミング
			word = stemmer.stemWord(word)
github shibukawa / snowball_py / snowballstemmer / german_stemmer.py View on Github external
'''
    serialVersionUID = 1

    a_0 = [
        Among(u"", -1, 6),
        Among(u"U", 0, 2),
        Among(u"Y", 0, 1),
        Among(u"\u00E4", 0, 3),
        Among(u"\u00F6", 0, 4),
        Among(u"\u00FC", 0, 5)
    ]

    a_1 = [
        Among(u"e", -1, 2),
        Among(u"em", -1, 1),
        Among(u"en", -1, 2),
        Among(u"ern", -1, 1),
        Among(u"er", -1, 1),
        Among(u"s", -1, 3),
        Among(u"es", 5, 2)
    ]

    a_2 = [
        Among(u"en", -1, 1),
        Among(u"er", -1, 1),
        Among(u"st", -1, 2),
        Among(u"est", 2, 1)
    ]

    a_3 = [
        Among(u"ig", -1, 1),
        Among(u"lich", -1, 1)
github shibukawa / snowball_py / snowballstemmer / italian_stemmer.py View on Github external
Among(u"imento", -1, 6),
        Among(u"ivo", -1, 9),
        Among(u"it\u00E0", -1, 8),
        Among(u"ist\u00E0", -1, 1),
        Among(u"ist\u00E8", -1, 1),
        Among(u"ist\u00EC", -1, 1)
    ]

    a_7 = [
        Among(u"isca", -1, 1),
        Among(u"enda", -1, 1),
        Among(u"ata", -1, 1),
        Among(u"ita", -1, 1),
        Among(u"uta", -1, 1),
        Among(u"ava", -1, 1),
        Among(u"eva", -1, 1),
        Among(u"iva", -1, 1),
        Among(u"erebbe", -1, 1),
        Among(u"irebbe", -1, 1),
        Among(u"isce", -1, 1),
        Among(u"ende", -1, 1),
        Among(u"are", -1, 1),
        Among(u"ere", -1, 1),
        Among(u"ire", -1, 1),
        Among(u"asse", -1, 1),
        Among(u"ate", -1, 1),
        Among(u"avate", 16, 1),
        Among(u"evate", 16, 1),
        Among(u"ivate", 16, 1),
        Among(u"ete", -1, 1),
        Among(u"erete", 20, 1),
        Among(u"irete", 20, 1),
github shibukawa / snowball_py / snowballstemmer / spanish_stemmer.py View on Github external
serialVersionUID = 1

    a_0 = [
        Among(u"", -1, 6),
        Among(u"\u00E1", 0, 1),
        Among(u"\u00E9", 0, 2),
        Among(u"\u00ED", 0, 3),
        Among(u"\u00F3", 0, 4),
        Among(u"\u00FA", 0, 5)
    ]

    a_1 = [
        Among(u"la", -1, -1),
        Among(u"sela", 0, -1),
        Among(u"le", -1, -1),
        Among(u"me", -1, -1),
        Among(u"se", -1, -1),
        Among(u"lo", -1, -1),
        Among(u"selo", 5, -1),
        Among(u"las", -1, -1),
        Among(u"selas", 7, -1),
        Among(u"les", -1, -1),
        Among(u"los", -1, -1),
        Among(u"selos", 10, -1),
        Among(u"nos", -1, -1)
    ]

    a_2 = [
        Among(u"ando", -1, 6),
        Among(u"iendo", -1, 6),
        Among(u"yendo", -1, 7),
        Among(u"\u00E1ndo", -1, 2),
github shibukawa / snowball_py / snowballstemmer / portuguese_stemmer.py View on Github external
a_1 = [
        Among(u"", -1, 3),
        Among(u"a~", 0, 1),
        Among(u"o~", 0, 2)
    ]

    a_2 = [
        Among(u"ic", -1, -1),
        Among(u"ad", -1, -1),
        Among(u"os", -1, -1),
        Among(u"iv", -1, 1)
    ]

    a_3 = [
        Among(u"ante", -1, 1),
        Among(u"avel", -1, 1),
        Among(u"\u00EDvel", -1, 1)
    ]

    a_4 = [
        Among(u"ic", -1, 1),
        Among(u"abil", -1, 1),
        Among(u"iv", -1, 1)
    ]

    a_5 = [
        Among(u"ica", -1, 1),
        Among(u"\u00E2ncia", -1, 1),
        Among(u"\u00EAncia", -1, 4),
        Among(u"ira", -1, 9),
        Among(u"adora", -1, 1),
        Among(u"osa", -1, 1),