How to use the fasttext.cbow function in fasttext

To help you get started, we’ve selected a few fasttext examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github emanjavacas / seqmod / scripts / w2v.py View on Github external
def fit(self, documents,
            alg='cbow', min_count=5, size=300, max_features=10000, window=5):

        assert alg in ('cbow', 'sg')

        if self.flavor == 'w2v':
            alg = 0 if alg == 'cbow' else 1
            self.model = Word2Vec(
                documents, min_count=min_count, size=size, window=window,
                max_vocab_size=max_features, sg=alg)
            self.model.save(self.path)
        elif self.flavor == 'ft':
            func = fasttext.cbow if alg == 'cbow' else fasttext.skipgram
            with open('/tmp/skiptrain.txt', 'w') as f:
                for d in documents:
                    f.write(' '.join(d) + '\n')
            self.model = func(
                input_file='/tmp/skiptrain.txt', output=self.path,
                min_count=min_count, dim=size, ws=window)

        self.size = size
        self.default = np.zeros(self.size, dtype='float64')
        self.fitted = True

        return self
github fnielsen / dasem / dasem / models.py View on Github external
model_filename = FAST_TEXT_CBOW_MODEL_FILENAME

        full_model_filename = self.full_filename(model_filename)
        full_input_filename = self.full_filename(input_filename)

        if model_type == 'skipgram':
            self.logger.info(
                'Training fasttext skipgram model on {} to {}'.format(
                    full_input_filename, full_model_filename))
            self.model = fasttext.skipgram(
                full_input_filename, full_model_filename)
        elif model_type == 'cbow':
            self.logger.info(
                'Training fasttext cbow model on {} to {}'.format(
                    full_input_filename, full_model_filename))
            self.model = fasttext.cbow(
                full_input_filename, full_model_filename)
        else:
            raise ValueError('Wrong argument to model_type')

        # Invalidate computed normalized matrix
        self._normalized_matrix = None
github liorshk / wordembedding-hebrew / fasttxt.py View on Github external
def train(inp = "wiki.he.text",out_model = "wiki.he.fasttext.model",
          alg = "CBOW"):

    start = time.time()

    if alg == "skipgram":
        # Skipgram model
        model = fasttext.skipgram(inp, out_model)
        print(model.words) # list of words in dictionary
    else:
        # CBOW model
        model = fasttext.cbow(inp, out_model)
        print(model.words) # list of words in dictionary

    print(time.time()-start)
          
    model.save(out_model)
github ziweipolaris / atec2018-nlp / utils / train_embedding.py View on Github external
def train_embedding_fasttext():
    
    # Skipgram model
    model = fasttext.skipgram(model_dir + 'train_char.txt', model_dir + 'char2vec_fastskip256', word_ngrams=2, ws=5, min_count=10, dim=256)
    del(model)

    # CBOW model
    model = fasttext.cbow(model_dir + 'train_char.txt', model_dir + 'char2vec_fastcbow256', word_ngrams=2, ws=5, min_count=10, dim=256)
    del(model)

    # Skipgram model
    model = fasttext.skipgram(model_dir + 'train_word.txt', model_dir + 'word2vec_fastskip256', word_ngrams=2, ws=5, min_count=10, dim=256)
    del(model)

    # CBOW model
    model = fasttext.cbow(model_dir + 'train_word.txt', model_dir + 'word2vec_fastcbow256', word_ngrams=2, ws=5, min_count=10, dim=256)
    del(model)
github ziweipolaris / atec2018-nlp / utils / train_embedding.py View on Github external
def train_embedding_fasttext():
    
    # Skipgram model
    model = fasttext.skipgram(model_dir + 'train_char.txt', model_dir + 'char2vec_fastskip256', word_ngrams=2, ws=5, min_count=10, dim=256)
    del(model)

    # CBOW model
    model = fasttext.cbow(model_dir + 'train_char.txt', model_dir + 'char2vec_fastcbow256', word_ngrams=2, ws=5, min_count=10, dim=256)
    del(model)

    # Skipgram model
    model = fasttext.skipgram(model_dir + 'train_word.txt', model_dir + 'word2vec_fastskip256', word_ngrams=2, ws=5, min_count=10, dim=256)
    del(model)

    # CBOW model
    model = fasttext.cbow(model_dir + 'train_word.txt', model_dir + 'word2vec_fastcbow256', word_ngrams=2, ws=5, min_count=10, dim=256)
    del(model)