How to use the fasttext.train_unsupervised function in fasttext

To help you get started, we’ve selected a few fasttext examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github dperezrada / keywords2vec / keywords2vec / __init__.py View on Github external
def train_model(input_filename):
    model = fasttext.train_unsupervised(input_filename, model='skipgram', maxn=0, dim=100, ws=5)
    return model
github explosion / sense2vec / scripts / 04_fasttext_train_vectors.py View on Github external
input_path = Path(in_dir)
        # Check to see if fasttext_filepath exists
        if not input_path.exists() or not input_path.is_dir():
            msg.fail("Not a valid input directory", in_dir, exits=1)
        tmp_path = input_path / "s2v_input.tmp"
        input_files = [p for p in input_path.iterdir() if p.suffix == ".s2v"]
        if not input_files:
            msg.fail("Input directory contains no .s2v files", in_dir, exits=1)
        # fastText expects only one input file and only reads from disk and not
        # stdin, so we need to create a temporary file that concatenates the inputs
        with tmp_path.open("a", encoding="utf8") as tmp_file:
            for input_file in input_files:
                with input_file.open("r", encoding="utf8") as f:
                    tmp_file.write(f.read())
        msg.info("Created temporary merged input file", tmp_path)
        fasttext_model = fasttext.train_unsupervised(str(tmp_path), thread=n_threads, epoch=epoch, dim=vector_size,
                                                     minn=0, maxn=0, minCount=min_count, verbose=verbose)
        msg.good("Successfully trained fastText model vectors")

        tmp_path.unlink()
        msg.good("Deleted temporary input file", tmp_path)
        output_file = output_path / f"vectors_w2v_{vector_size}dim.bin"
        if save_fasttext_model:
            fasttext_model.save_model(str(output_file))
            if not output_file.exists() or not output_file.is_file():
                msg.fail("Failed to save fastText model to disk", output_file, exits=1)
            msg.good("Successfully saved fastText model to disk", output_file)
    else:
        fasttext_model = None
        msg.fail("Must provide an input directory or fastText binary filepath", exits=1)

    msg.info("Creating vocabulary file")
github sagorbrur / bnlp / bnlp / bengali_fasttext.py View on Github external
def train_fasttext(self, data, model_name, epoch):
        model = fasttext.train_unsupervised(data, model='skipgram', minCount=1, epoch=epoch)
        model.save_model(model_name)