How to use the textacy.constants function in textacy

To help you get started, we’ve selected a few textacy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github chartbeat-labs / textacy / textacy / extract.py View on Github external
"""
    Args:
        patstr (str)

    Returns:
        List[dict]
    """
    pattern = []
    for tokpatstr in constants.RE_MATCHER_TOKPAT_DELIM.split(patstr):
        parts = tokpatstr.split(":")
        if 2 <= len(parts) <= 3:
            attr = parts[0]
            attr_val = parts[1]
            if attr and attr_val:
                # handle special bool and int attribute values
                special_val = constants.RE_MATCHER_SPECIAL_VAL.match(attr_val)
                if special_val:
                    attr_val = eval(special_val.group(0))
                tokpat = {attr: attr_val}
            # handle wildcard tokens
            else:
                tokpat = {}
            # handle quantifier ops
            try:
                op_val = parts[2]
                if op_val in constants.MATCHER_VALID_OPS:
                    tokpat["OP"] = op_val
                else:
                    raise ValueError(
                        "op={} invalid; valid choices are {}".format(
                            op_val, constants.MATCHER_VALID_OPS)
                    )
github chartbeat-labs / textacy / textacy / resources / depeche_mood.py View on Github external
def __init__(
        self,
        data_dir=constants.DEFAULT_DATA_DIR.joinpath(NAME),
        lang="en",
        word_rep="lemmapos",
        min_freq=3,
    ):
        super().__init__(NAME, meta=META)
        if lang not in self._lang_map:
            raise ValueError(
                "lang='{}' is invalid; valid options are {}".format(
                    lang, sorted(self._lang_map.keys()))
            )
        if word_rep not in self._word_reps:
            raise ValueError(
                "word_rep='{}' is invalid; valid options are {}".format(
                    word_rep, self._word_reps)
            )
        self.lang = lang
github chartbeat-labs / textacy / textacy / preprocess.py View on Github external
def normalize_whitespace(text):
    """
    Given ``text``, replace one or more spacings with a single space, and one
    or more linebreaks with a single newline. Also strip leading/trailing whitespace.
    """
    return constants.RE_NONBREAKING_SPACE.sub(
        " ", constants.RE_LINEBREAK.sub(r"\n", text)
    ).strip()
github chartbeat-labs / textacy / textacy / preprocess.py View on Github external
def replace_urls(text, replace_with="*URL*"):
    """Replace all URLs in ``text`` with ``replace_with``."""
    return constants.RE_URL.sub(
        replace_with, constants.RE_SHORT_URL.sub(replace_with, text)
    )
github chartbeat-labs / textacy / textacy / preprocess.py View on Github external
def replace_emails(text, replace_with="*EMAIL*"):
    """Replace all emails in ``text`` with ``replace_with``."""
    return constants.RE_EMAIL.sub(replace_with, text)
github chartbeat-labs / textacy / textacy / preprocess.py View on Github external
def replace_phone_numbers(text, replace_with="*PHONE*"):
    """Replace all phone numbers in ``text`` with ``replace_with``."""
    return constants.RE_PHONE.sub(replace_with, text)
github chartbeat-labs / textacy / textacy / datasets / udhr.py View on Github external
    def __init__(self, data_dir=constants.DEFAULT_DATA_DIR.joinpath(NAME)):
        super().__init__(NAME, meta=META)
        self.data_dir = utils.to_path(data_dir).resolve()
        self._texts_dirpath = self.data_dir.joinpath("udhr_txt")
        self._index_filepath = self._texts_dirpath.joinpath("index.xml")
        self._index = None
        self.langs = None
github chartbeat-labs / textacy / textacy / spacy_utils.py View on Github external
def get_subjects_of_verb(verb):
    """Return all subjects of a verb according to the dependency parse."""
    subjs = [tok for tok in verb.lefts if tok.dep_ in constants.SUBJ_DEPS]
    # get additional conjunct subjects
    subjs.extend(tok for subj in subjs for tok in _get_conjuncts(subj))
    return subjs
github chartbeat-labs / textacy / textacy / spacy_utils.py View on Github external
def get_objects_of_verb(verb):
    """
    Return all objects of a verb according to the dependency parse,
    including open clausal complements.
    """
    objs = [tok for tok in verb.rights if tok.dep_ in constants.OBJ_DEPS]
    # get open clausal complements (xcomp)
    objs.extend(tok for tok in verb.rights if tok.dep_ == "xcomp")
    # get additional conjunct objects
    objs.extend(tok for obj in objs for tok in _get_conjuncts(obj))
    return objs
github chartbeat-labs / textacy / textacy / cache.py View on Github external
weighting used in building DepecheMood matrix.

    Returns:
        Dict[dict]: Top-level keys are Lemma#POS strings, values are nested dicts
        with emotion names as keys and weights as floats.

    References:
        Staiano, J., & Guerini, M. (2014). "DepecheMood: a Lexicon for Emotion
        Analysis from Crowd-Annotated News". Proceedings of ACL-2014. (arXiv:1405.1605)
        Data available at https://github.com/marcoguerini/DepecheMood/releases .

    See Also:
        :func:`download_depechemood `
    """
    if data_dir is None:
        data_dir = os.path.join(constants.DEFAULT_DATA_DIR, "depechemood", "DepecheMood_V1.0")
    filepath = os.path.join(
        data_dir, "DepecheMood_{weighting}.txt".format(weighting=weighting)
    )
    delimiter = b"\t" if compat.PY2 else "\t"
    # HACK: Py2's csv module fail
    try:
        with io.open(filepath, mode="rt") as csvfile:
            csvreader = csv.reader(csvfile, delimiter=delimiter)
            rows = list(csvreader)
    except (OSError, IOError):
        LOGGER.exception(
            "Unable to load DepecheMood from %s."
            "\n\nHave you downloaded the data? If not, you can use the "
            "`textacy.lexicon_methods.download_depechemood()` function."
            "\n\nIf so, have you given the correct `data_dir`? The directory "
            "should have a `DepecheMood_V1.0` subdirectory, within which are "