How to use the sister.word_embedders.FasttextEmbedding function in sister

To help you get started, we’ve selected a few sister examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github tofunlp / sister / tests / test_embedders.py View on Github external
def setUp(self):
        # TODO
        # Downloading fasttext and unzipping is too heavy.
        # Needs to be mocked somehow.
        # For now, solved with caching.

        # embedding_patcher = patch('sister.word_embedders.FasttextEmbedding')
        # self.embedding = embedding_patcher.start()(lang='en')
        # self.embedding.get_word_vector.return_value = np.random.rand(300)
        # self.embedding.get_word_vectors.side_effect = lambda words: np.random.rand(len(words), 300)
        # self.embedding_patcher = embedding_patcher
        self.embedding = FasttextEmbedding(lang="en")
github tofunlp / sister / sister / core.py View on Github external
def __init__(
            self,
            lang: str = 'en',
            tokenizer: Tokenizer = None,
            word_embedder: WordEmbedding = None) -> None:
        tokenizer = tokenizer or {"en": SimpleTokenizer(),
                                  "fr": SimpleTokenizer(),
                                  "ja": JapaneseTokenizer()}[lang]
        word_embedder = word_embedder or FasttextEmbedding(lang)
        super().__init__(tokenizer, word_embedder)