How to use the sense2vec.util.registry function in sense2vec

To help you get started, we’ve selected a few sense2vec examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github explosion / sense2vec / sense2vec / util.py View on Github external
@registry.split_key.register("default")
def split_key(key: str) -> Tuple[str, str]:
    """Split a key into word and sense, e.g. ("usage example", "NOUN").

    key (unicode): The key to split.
    RETURNS (tuple): The split (word, sense) tuple.
    """
    if not isinstance(key, str) or "|" not in key:
        raise ValueError(f"Invalid key: {key}")
    word, sense = key.replace("_", " ").rsplit("|", 1)
    return word, sense
github explosion / sense2vec / sense2vec / util.py View on Github external
@registry.make_key.register("default")
def make_key(word: str, sense: str) -> str:
    """Create a key from a word and sense, e.g. "usage_example|NOUN".

    word (unicode): The word.
    sense (unicode): The sense.
    RETURNS (unicode): The key.
    """
    text = re.sub(r"\s", "_", word)
    return text + "|" + sense
github explosion / sense2vec / sense2vec / util.py View on Github external
@registry.get_phrases.register("default")
def get_phrases(doc: Doc) -> List[Span]:
    """Compile a list of sense2vec phrases based on a processed Doc: named
    entities and noun chunks without determiners.

    doc (Doc): The Doc to get phrases from.
    RETURNS (list): The phrases as a list of Span objects.
    """
    spans = list(doc.ents)
    ent_words: Set[str] = set()
    for span in spans:
        ent_words.update(token.i for token in span)
    for np in get_noun_phrases(doc):
        # Prefer entities over noun chunks if there's overlap
        if not any(w.i in ent_words for w in np):
            spans.append(np)
    return spans
github explosion / sense2vec / sense2vec / util.py View on Github external
@registry.make_spacy_key.register("default")
def make_spacy_key(
    obj: Union[Token, Span], prefer_ents: bool = False, lemmatize: bool = False
) -> Tuple[str, str]:
    """Create a key from a spaCy object, i.e. a Token or Span. If the object
    is a token, the part-of-speech tag (Token.pos_) is used for the sense
    and a special string is created for URLs. If the object is a Span and
    has a label (i.e. is an entity span), the label is used. Otherwise, the
    span's root part-of-speech tag becomes the sense.

    obj (Token / Span): The spaCy object to create the key for.
    prefer_ents (bool): Prefer entity types for single tokens (i.e.
        token.ent_type instead of tokens.pos_). Should be enabled if phrases
        are merged into single tokens, because otherwise the entity sense would
        never be used.
    lemmatize (bool): Use the object's lemma instead of its text.
    RETURNS (unicode): The key.
github explosion / sense2vec / sense2vec / util.py View on Github external
@registry.merge_phrases.register("default")
def merge_phrases(doc: Doc) -> Doc:
    """Transform a spaCy Doc to match the sense2vec format: merge entities
    into one token and merge noun chunks without determiners.

    doc (Doc): The document to merge phrases in.
    RETURNS (Doc): The Doc with merged tokens.
    """
    spans = get_phrases(doc)
    spans = filter_spans(spans)
    with doc.retokenize() as retokenizer:
        for span in spans:
            retokenizer.merge(span)
    return doc