Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def main():
argv = sys.argv
if len(argv) < 2:
usage()
return
algorithm = 'english'
if len(argv) > 2:
algorithm = argv[1]
argv = argv[2:]
else:
argv = argv[1:]
stemmer = snowballstemmer.stemmer(algorithm)
splitter = re.compile(r"[\s\.-]")
for arg in argv:
for word in splitter.split(arg):
if word == '':
continue
original = word.lower()
print(original + " -> " + stemmer.stemWord(original))
main()
def init(self, options: Dict) -> None:
self.stemmer = snowballstemmer.stemmer('turkish')
def init(self, options: Dict) -> None:
self.stemmer = snowballstemmer.stemmer('swedish')
def init(self, options: Dict) -> None:
self.stemmer = snowballstemmer.stemmer('romanian')
def init(self, options: Dict) -> None:
self.stemmer = snowballstemmer.stemmer('hungarian')
'twas,us,wants,was,we,were,what,when,where,which,while,who,whom,why,'
'will,with,would,yet,you,your').lower().split(',')
def is_stopword(str):
'''文字がストップワードかどうかを返す
大小文字は同一視する
戻り値:
ストップワードならTrue、違う場合はFalse
'''
return str.lower() in stop_words
# 素性抽出
stemmer = snowballstemmer.stemmer('english')
word_counter = Counter()
with codecs.open(fname_sentiment, 'r', fencoding) as file_in:
for line in file_in:
for word in line[3:].split(' '): # line[3:]で極性ラベル除去
# 前後の空白文字除去
word = word.strip()
# ストップワード除去
if is_stopword(word):
continue
# ステミング
word = stemmer.stemWord(word)
import uuid
import json
TTAD_GEN_DIR = os.path.dirname(os.path.realpath(__file__))
CRAFTASSIST_DIR = os.path.join(TTAD_GEN_DIR, "../../")
sys.path.append(CRAFTASSIST_DIR)
from generate_data import *
import re
from dialogue_objects.interpreter_helper import coref_resolve, interpret_shape_schematic
from word_maps import SPECIAL_SHAPE_FNS, SPECIAL_SHAPES_CANONICALIZE, SPAWN_OBJECTS
from size_words import size_str_to_int
import shapes
import block_data
import snowballstemmer
stemmer = snowballstemmer.stemmer("english")
# from word2number.w2n import word_to_num
CHOICES = [Move, Build, Destroy, Dig, Copy, Fill, Spawn, Dance]
#############################################################
# modified from size_words...
#############################################################
RANGES = {
"tiny": (2, 3),
"small": (2, 3),
"medium": (2, 4),
"large": (4, 5),
"huge": (5, 6),
def stemming(lang, input, output, encoding, pretty):
stemmer = snowballstemmer.stemmer(lang)
with codecs.open(output, "w", encoding) as outfile:
with codecs.open(input, "r", encoding) as infile:
for original in infile.readlines():
original = original.strip()
# Convert only ASCII-letters to lowercase, to match C behavior
original = ''.join((c.lower() if 'A' <= c <= 'Z' else c for c in original))
stemmed = stemmer.stemWord(original)
if pretty == 0:
if stemmed != "":
outfile.write(stemmed)
elif pretty == 1:
outfile.write(original, " -> ", stemmed)
elif pretty == 2:
outfile.write(original)
if len(original) < 30:
outfile.write(" " * (30 - len(original)))
def textrank(text, hdr):
# finding out the most possible language of the text
lang_code = lang_identifier.classify(' '.join([hdr, text]))[0]
# tokenizing for words
sentences = [sentence for sentence in split_multi(text)]
stemmer = snowballstemmer.stemmer(LANG_CODES.get(lang_code, 'english'))
words = [set(stemmer.stemWord(word) for word in word_tokenizer(sentence.lower()) if word.isalpha())
for sentence in sentences]
pairs = combinations(range(len(sentences)), 2)
scores = [(i, j, similarity(words[i], words[j])) for i, j in pairs]
scores = filter(lambda x: x[2], scores)
g = nx.Graph()
g.add_weighted_edges_from(scores)
pr = nx.pagerank(g)
return sorted(((i, pr[i], s) for i, s in enumerate(sentences) if i in pr),
key=lambda x: pr[x[0]], reverse=True), lang_code