How to use the sister.word_embedders.WordEmbedding function in sister

To help you get started, we’ve selected a few sister examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github tofunlp / sister / tests / test_embedders.py View on Github external
def test_get_word_vector_not_implemented(self):
        class Dummy(WordEmbedding):
            def get_word_vectors(self, w): ...
        with self.assertRaises(NotImplementedError):
            words = self.words
            Dummy().get_word_vector(words)
github tofunlp / sister / sister / word_embedders.py View on Github external
return model


class WordEmbedding(object):

    def get_word_vector(self, word: str) -> np.ndarray:
        raise NotImplementedError

    def get_word_vectors(self, words: List[str]) -> np.ndarray:
        vectors = []
        for word in words:
            vectors.append(self.get_word_vector(word))
        return np.array(vectors)


class FasttextEmbedding(WordEmbedding):

    def __init__(self, lang: str = "en") -> None:
        model = get_fasttext(lang)
        self.model = model

    def get_word_vector(self, word: str) -> np.ndarray:
        return self.model.get_word_vector(word)


class Word2VecEmbedding(WordEmbedding):

    def __init__(self, lang: str = "en") -> None:
        model = get_word2vec(lang)
        self.model = model

    def get_word_vector(self, word: str) -> np.ndarray:
github tofunlp / sister / sister / word_embedders.py View on Github external
for word in words:
            vectors.append(self.get_word_vector(word))
        return np.array(vectors)


class FasttextEmbedding(WordEmbedding):

    def __init__(self, lang: str = "en") -> None:
        model = get_fasttext(lang)
        self.model = model

    def get_word_vector(self, word: str) -> np.ndarray:
        return self.model.get_word_vector(word)


class Word2VecEmbedding(WordEmbedding):

    def __init__(self, lang: str = "en") -> None:
        model = get_word2vec(lang)
        self.model = model

    def get_word_vector(self, word: str) -> np.ndarray:
        if word in self.model:
            return self.model[word]
        else:
            return np.random.rand(self.model.vector_size,)