How to use the spacy.tokens.Doc.set_extension function in spacy

To help you get started, weโ€™ve selected a few spacy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github explosion / spaCy / tests / doc / test_underscore.py View on Github external
def test_underscore_docstring(en_vocab):
    """Test that docstrings are available for extension methods, even though
    they're partials."""

    def test_method(doc, arg1=1, arg2=2):
        """I am a docstring"""
        return (arg1, arg2)

    Doc.set_extension("test_docstrings", method=test_method)
    doc = Doc(en_vocab, words=["hello", "world"])
    assert test_method.__doc__ == "I am a docstring"
    assert doc._.test_docstrings.__doc__.rsplit(". ")[-1] == "I am a docstring"
github explosion / spaCy / tests / doc / test_to_json.py View on Github external
def test_doc_to_json_underscore_error_serialize(doc):
    """Test that Doc.to_json() raises an error if a custom attribute value
    isn't JSON-serializable."""
    Doc.set_extension("json_test4", method=lambda doc: doc.text)
    with pytest.raises(ValueError):
        doc.to_json(underscore=["json_test4"])
github Roboy / ravestate / test / modules / ravestate_nlp / test_extract_triples.py View on Github external
def spacy_model():
    nlp = spacy_nlp_en
    from spacy.tokens import Doc
    if not Doc.has_extension('triples'):
        Doc.set_extension('triples', getter=extract_triples)
    return nlp
github explosion / spaCy / tests / doc / test_underscore.py View on Github external
def test_underscore_mutable_defaults_list(en_vocab):
    """Test that mutable default arguments are handled correctly (see #2581)."""
    Doc.set_extension("mutable", default=[])
    doc1 = Doc(en_vocab, words=["one"])
    doc2 = Doc(en_vocab, words=["two"])
    doc1._.mutable.append("foo")
    assert len(doc1._.mutable) == 1
    assert doc1._.mutable[0] == "foo"
    assert len(doc2._.mutable) == 0
    doc1._.mutable = ["bar", "baz"]
    doc1._.mutable.append("foo")
    assert len(doc1._.mutable) == 3
    assert len(doc2._.mutable) == 0
github explosion / spaCy / tests / serialize / test_serialize_extension_attrs.py View on Github external
def doc_w_attrs(en_tokenizer):
    Doc.set_extension("_test_attr", default=False)
    Doc.set_extension("_test_prop", getter=lambda doc: len(doc.text))
    Doc.set_extension(
        "_test_method", method=lambda doc, arg: "{}{}".format(len(doc.text), arg)
    )
    doc = en_tokenizer("This is a test.")
    doc._._test_attr = "test"
    return doc
github ICLRandD / Blackstone / blackstone / pipeline / compound_cases.py View on Github external
def __init__(self, nlp) -> None:
        Doc.set_extension("compound_cases", default=[], force=True)
        self.matcher = Matcher(nlp.vocab)
        common_pattern = [{"ent_type": "CASENAME"}, {"ent_type": "CITATION", "OP": "+"}]
        possessive_pattern = [
            {"ent_type": "CASENAME"},
            {"lower": "case"},
            {"ent_type": "CITATION"},
        ]
        self.matcher.add("compound_case", None, common_pattern, possessive_pattern)
        self.global_matcher = Matcher(nlp.vocab)
        merge_ents = nlp.create_pipe("merge_entities")
        nlp.add_pipe(merge_ents)
github allenai / scispacy / scispacy / abbreviation.py View on Github external
def __init__(self, nlp) -> None:
        Doc.set_extension("abbreviations", default=[], force=True)
        Span.set_extension("long_form", default=None, force=True)

        self.matcher = Matcher(nlp.vocab)
        self.matcher.add("parenthesis", None, [{'ORTH': '('}, {'OP': '+'}, {'ORTH': ')'}])
        self.global_matcher = Matcher(nlp.vocab)
github explosion / spaCy / examples / pipeline / custom_component_entities.py View on Github external
"""
        self.label = nlp.vocab.strings[label]  # get entity label ID

        # Set up the PhraseMatcher โ€“ it can now take Doc objects as patterns,
        # so even if the list of companies is long, it's very efficient
        patterns = [nlp(org) for org in companies]
        self.matcher = PhraseMatcher(nlp.vocab)
        self.matcher.add("TECH_ORGS", None, *patterns)

        # Register attribute on the Token. We'll be overwriting this based on
        # the matches, so we're only setting a default value, not a getter.
        Token.set_extension("is_tech_org", default=False)

        # Register attributes on Doc and Span via a getter that checks if one of
        # the contained tokens is set to is_tech_org == True.
        Doc.set_extension("has_tech_org", getter=self.has_tech_org)
        Span.set_extension("has_tech_org", getter=self.has_tech_org)
github Roboy / ravestate / modules / ravestate_nlp / __init__.py View on Github external
spacy_download("en_core_web_sm")
        import en_core_web_sm as spacy_en
    spacy_nlp_en = spacy_en.load()
    empty_token = spacy_nlp_en(u" ")[0]

    # TODO: Make agent id configurable, rename nlp:contains-roboy to nlp:agent-mentioned
    about_roboy = ('you', 'roboy', 'robot', 'roboboy', 'your')

    def roboy_getter(doc) -> bool:
        return any(roboy in doc.text.lower() for roboy in about_roboy)

    from spacy.tokens import Doc
    Doc.set_extension('about_roboy', getter=roboy_getter)
    Doc.set_extension('empty_token', getter=lambda doc: empty_token)
    Doc.set_extension('triples', getter=extract_triples)
    Doc.set_extension('yesno', getter=yes_no)
    return spacy_nlp_en
github nickdavidhaynes / spacy-cld / spacy_cld / spacy_cld.py View on Github external
def __init__(self, attrs=('languages', 'language_scores')):
        self._languages, self._scores = attrs
        Doc.set_extension(self._languages, getter=get_languages)
        Doc.set_extension(self._scores, getter=get_scores)
        Span.set_extension(self._languages, getter=get_languages)
        Span.set_extension(self._scores, getter=get_scores)