How to use the sense2vec.Sense2VecComponent function in sense2vec

To help you get started, we’ve selected a few sense2vec examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github explosion / sense2vec / tests / test_component.py View on Github external
def test_component_to_from_bytes(doc):
    s2v = Sense2VecComponent(doc.vocab, shape=(1, 4))
    s2v.first_run = False
    vector = numpy.asarray([4, 2, 2, 2], dtype=numpy.float32)
    s2v.s2v.add("world|NOUN", vector)
    assert "world|NOUN" in s2v.s2v
    assert "world|GPE" not in s2v.s2v
    doc = s2v(doc)
    assert doc[0]._.in_s2v is False
    assert doc[1]._.in_s2v is True
    s2v_bytes = s2v.to_bytes()
    new_s2v = Sense2VecComponent(doc.vocab).from_bytes(s2v_bytes)
    new_s2v.first_run = False
    assert "world|NOUN" in new_s2v.s2v
    assert numpy.array_equal(new_s2v.s2v["world|NOUN"], vector)
    assert "world|GPE" not in new_s2v.s2v
    new_s2v.s2v.vectors.resize((2, 4))
    new_s2v.s2v.add("hello|INTJ", vector)
github explosion / sense2vec / tests / test_component.py View on Github external
def test_component_attributes(doc):
    s2v = Sense2VecComponent(doc.vocab, shape=(10, 4))
    vector = numpy.asarray([4, 2, 2, 2], dtype=numpy.float32)
    s2v.s2v.add("world|NOUN", vector, 123)
    doc = s2v(doc)
    assert doc[0]._.s2v_key == "hello|INTJ"
    assert doc[1]._.s2v_key == "world|NOUN"
    assert doc[0]._.in_s2v is False
    assert doc[1]._.in_s2v is True
    assert doc[0]._.s2v_freq is None
    assert doc[1]._.s2v_freq == 123
    assert numpy.array_equal(doc[1]._.s2v_vec, vector)
github explosion / sense2vec / tests / test_component.py View on Github external
def test_component_lemmatize(doc):
    lookups = doc.vocab.lookups.add_table("lemma_lookup")
    lookups["world"] = "wrld"
    s2v = Sense2VecComponent(doc.vocab, shape=(4, 4), lemmatize=True)
    s2v.first_run = False
    vector = numpy.asarray([4, 2, 2, 2], dtype=numpy.float32)
    s2v.s2v.add("hello|INTJ", vector)
    s2v.s2v.add("world|NOUN", vector)
    s2v.s2v.add("wrld|NOUN", vector)
    doc = s2v(doc)
    assert doc[0]._.s2v_key == "hello|INTJ"
    assert doc[1].lemma_ == "wrld"
    assert doc[1]._.s2v_key == "wrld|NOUN"
    lookups["hello"] = "hll"
    assert doc[0].lemma_ == "hll"
    assert doc[0]._.s2v_key == "hello|INTJ"
    s2v.s2v.add("hll|INTJ", vector)
    assert doc[0]._.s2v_key == "hll|INTJ"
    new_s2v = Sense2VecComponent().from_bytes(s2v.to_bytes())
    assert new_s2v.s2v.cfg["lemmatize"] is True
github explosion / sense2vec / tests / test_component.py View on Github external
def test_component_similarity(doc):
    s2v = Sense2VecComponent(doc.vocab, shape=(4, 4))
    s2v.first_run = False
    vector = numpy.asarray([4, 2, 2, 2], dtype=numpy.float32)
    s2v.s2v.add("hello|INTJ", vector)
    s2v.s2v.add("world|NOUN", vector)
    doc = s2v(doc)
    assert doc[0]._.s2v_similarity(doc[1]) == 1.0
    assert doc[1:3]._.s2v_similarity(doc[1:3]) == 1.0
github explosion / sense2vec / tests / test_component.py View on Github external
s2v = Sense2VecComponent(doc.vocab, shape=(4, 4), lemmatize=True)
    s2v.first_run = False
    vector = numpy.asarray([4, 2, 2, 2], dtype=numpy.float32)
    s2v.s2v.add("hello|INTJ", vector)
    s2v.s2v.add("world|NOUN", vector)
    s2v.s2v.add("wrld|NOUN", vector)
    doc = s2v(doc)
    assert doc[0]._.s2v_key == "hello|INTJ"
    assert doc[1].lemma_ == "wrld"
    assert doc[1]._.s2v_key == "wrld|NOUN"
    lookups["hello"] = "hll"
    assert doc[0].lemma_ == "hll"
    assert doc[0]._.s2v_key == "hello|INTJ"
    s2v.s2v.add("hll|INTJ", vector)
    assert doc[0]._.s2v_key == "hll|INTJ"
    new_s2v = Sense2VecComponent().from_bytes(s2v.to_bytes())
    assert new_s2v.s2v.cfg["lemmatize"] is True
    doc.vocab.lookups.remove_table("lemma_lookup")
github explosion / sense2vec / tests / test_component.py View on Github external
def test_component_attributes_ents(doc):
    s2v = Sense2VecComponent(doc.vocab, shape=(10, 4))
    s2v.first_run = False
    vector = numpy.asarray([4, 2, 2, 2], dtype=numpy.float32)
    s2v.s2v.add("world|NOUN", vector)
    s2v.s2v.add("world|GPE", vector)
    doc = s2v(doc)
    assert len(doc._.s2v_phrases) == 0
    doc.ents = [Span(doc, 1, 2, label="GPE")]
    assert len(doc._.s2v_phrases) == 1
    phrase = doc._.s2v_phrases[0]
    assert phrase._.s2v_key == "world|GPE"
    assert phrase[0]._.s2v_key == "world|NOUN"
    assert phrase._.in_s2v is True
    assert phrase[0]._.in_s2v is True
github explosion / sense2vec / tests / test_component.py View on Github external
def test_component_to_from_bytes(doc):
    s2v = Sense2VecComponent(doc.vocab, shape=(1, 4))
    s2v.first_run = False
    vector = numpy.asarray([4, 2, 2, 2], dtype=numpy.float32)
    s2v.s2v.add("world|NOUN", vector)
    assert "world|NOUN" in s2v.s2v
    assert "world|GPE" not in s2v.s2v
    doc = s2v(doc)
    assert doc[0]._.in_s2v is False
    assert doc[1]._.in_s2v is True
    s2v_bytes = s2v.to_bytes()
    new_s2v = Sense2VecComponent(doc.vocab).from_bytes(s2v_bytes)
    new_s2v.first_run = False
    assert "world|NOUN" in new_s2v.s2v
    assert numpy.array_equal(new_s2v.s2v["world|NOUN"], vector)
    assert "world|GPE" not in new_s2v.s2v
    new_s2v.s2v.vectors.resize((2, 4))
    new_s2v.s2v.add("hello|INTJ", vector)
    assert doc[0]._.in_s2v is False
    new_doc = new_s2v(doc)
    assert new_doc[0]._.in_s2v is True
github neelkamath / spacy-server / src / main.py View on Github external
import dataclasses
import fastapi
import pydantic
import sense2vec
import spacy
import starlette.responses
import starlette.status

app: fastapi.FastAPI = fastapi.FastAPI()
model: str = os.getenv('SPACY_MODEL')
pipeline_error: str = f"The model ({model}) doesn't support " + '{}.'
nlp: spacy = spacy.load(model)
if os.getenv('SENSE2VEC') == '1':
    nlp.add_pipe(
        sense2vec.Sense2VecComponent(nlp.vocab).from_disk('src/s2v_old')
    )


def enforce_components(components: typing.List[str], message: str) -> None:
    """Throws the  if the model doesn't have the ."""
    for component in components:
        if not nlp.has_pipe(component):
            raise fastapi.HTTPException(
                status_code=400,
                detail=pipeline_error.format(message)
            )


class NERRequest(pydantic.BaseModel):
    sections: typing.List[str]
    sense2vec: bool = False