Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
level=logging.INFO)
logger.info("running %s", " ".join(sys.argv))
logger.info("using optimization %s", FAST_VERSION)
# check and process cmdline input
program = os.path.basename(sys.argv[0])
if len(sys.argv) < 2:
print(globals()['__doc__'] % locals())
sys.exit(1)
infile = sys.argv[1]
from gensim.models.word2vec import Word2Vec # avoid referencing __main__ in pickle
seterr(all='raise') # don't ignore numpy errors
# model = Word2Vec(LineSentence(infile), size=200, min_count=5, workers=4)
model = Word2Vec(Text8Corpus(infile, 10), size=256, min_count=5, workers=4, sg=0, hs=0, cbow_mean=1, negative=5)
if len(sys.argv) > 3:
outfile = sys.argv[3]
model.save(outfile + '.model')
model.save_word2vec_format(outfile + '.model.bin', binary=True)
model.save_word2vec_format(outfile + '.model.txt', binary=False)
if len(sys.argv) > 2:
questions_file = sys.argv[2]
model.accuracy(sys.argv[2])
logger.info("finished running %s", program)
def load(cls, *args, **kwargs):
model = super(Word2Vec, cls).load(*args, **kwargs)
# update older models
if hasattr(model, 'table'):
delattr(model, 'table') # discard in favor of cum_table
if model.negative and hasattr(model, 'index2word'):
model.make_cum_table() # rebuild cum_table from vocabulary
if not hasattr(model, 'corpus_count'):
model.corpus_count = None
for v in model.vocab.values():
if hasattr(v, 'sample_int'):
break # already 0.12.0+ style int probabilities
elif hasattr(v, 'sample_probability'):
v.sample_int = int(round(v.sample_probability * 2**32))
del v.sample_probability
if not hasattr(model, 'syn0_lockf') and hasattr(model, 'syn0'):
model.syn0_lockf = ones(len(model.syn0), dtype=REAL)
if not hasattr(model, 'random'):