How to use the textacy.compat.range_ function in textacy

To help you get started, we’ve selected a few textacy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github chartbeat-labs / textacy / textacy / extract.py View on Github external
# get definition window's leading characters and word types
    def_leads = []
    def_types = []
    for tok in window:
        tok_text = tok.text
        if tok.is_stop:
            def_leads.append(tok_text[0])
            def_types.append("s")
        elif text_utils.is_acronym(tok_text):
            def_leads.append(tok_text[0])
            def_types.append("a")
        elif "-" in tok_text and not tok_text.startswith("-"):
            tok_split = [t[0] for t in tok_text.split("-") if t]
            def_leads.extend(tok_split)
            def_types.extend(
                "H" if i == 0 else "h" for i in compat.range_(len(tok_split))
            )
        else:
            def_leads.append(tok_text[0])
            def_types.append("w")
    def_leads = "".join(def_leads).lower()
    def_types = "".join(def_types)

    # extract alphanumeric characters from acronym
    acr_leads = "".join(c for c in acronym if c.isalnum())
    # handle special cases of '&' and trailing 's'
    acr_leads = acr_leads.replace("&", "a")
    if acr_leads.endswith("s"):
        # bail out if it's only a 2-letter acronym to start with, e.g. 'Is'
        if len(acr_leads) == 2:
            return ("", 0)
        acr_leads = acr_leads[:-1]
github chartbeat-labs / textacy / textacy / extract.py View on Github external
def parse_lcs_matrix(b, start_i, start_j, lcs_length, stack, vectors):
        m = b.shape[0]
        n = b.shape[1]
        for i in compat.range_(start_i, m):
            for j in compat.range_(start_j, n):
                if b[i, j] == 1:
                    s = (i, j)
                    stack.append(s)
                    if lcs_length == 1:
                        vec = [np.NaN] * n
                        for k, l in stack:
                            vec[l] = k
                        vectors.append(vec)
                    else:
                        parse_lcs_matrix(
                            b, i + 1, j + 1, lcs_length - 1, stack, vectors
                        )
                    stack = []
        return vectors
github chartbeat-labs / textacy / textacy / ke / graph_base.py View on Github external
[item[0] for item in top_k_sorted_ranks], c
    )

    # compute initial exprel contribution
    taken = collections.defaultdict(bool)
    contrib = {}
    for vertex in nodes_list:
        # get l-step expanded set
        s = get_l_step_expanded_set([vertex], c)
        # sum up neighbors ranks, i.e. l-step expanded relevance
        contrib[vertex] = sum(ranks[v] for v in s)

    sum_contrib = 0.0
    results = {}
    # greedily select to maximize exprel metric
    for _ in compat.range_(k):
        if not contrib:
            break
        # find word with highest l-step expanded relevance score
        max_word_score = sorted(
            contrib.items(), key=operator.itemgetter(1), reverse=True
        )[0]
        sum_contrib += max_word_score[1]  # contrib[max_word[0]]
        results[max_word_score[0]] = max_word_score[1]
        # find its l-step expanded set
        l_step_expanded_set = get_l_step_expanded_set([max_word_score[0]], c)
        # for each vertex found
        for vertex in l_step_expanded_set:
            # already removed its contribution from neighbors
            if taken[vertex] is True:
                continue
            # remove the contribution of vertex (or some fraction) from its l-step neighbors
github chartbeat-labs / textacy / textacy / keyterms.py View on Github external
def get_l_step_expanded_set(vertices, l):
        """
        Args:
            vertices (iterable[str]): vertices to be expanded
            l (int): how many steps to expand vertices set

        Returns:
            set: the l-step expanded set of vertices
        """
        # add vertices to s
        s = set(vertices)
        # s.update(vertices)
        # for each step
        for _ in compat.range_(l):
            # for each node
            next_vertices = []
            for vertex in vertices:
                # add its neighbors to the next list
                neighbors = graph.neighbors(vertex)
                next_vertices.extend(neighbors)
                s.update(neighbors)
            vertices = set(next_vertices)
        return s
github chartbeat-labs / textacy / textacy / extract.py View on Github external
def build_lcs_matrix(X, Y):
        m = len(X)
        n = len(Y)
        b = np.zeros((m, n), dtype=int)
        c = np.zeros((m, n), dtype=int)
        for i in compat.range_(0, m):
            for j in compat.range_(0, n):
                if X[i] == Y[j]:
                    c[i, j] = c[i - 1, j - 1] + 1
                    b[i, j] = 1
                elif c[i - 1, j] >= c[i, j - 1]:
                    c[i, j] = c[i - 1, j]
                else:
                    c[i, j] = c[i, j - 1]
        return c, b
github chartbeat-labs / textacy / textacy / extract.py View on Github external
def build_lcs_matrix(X, Y):
        m = len(X)
        n = len(Y)
        b = np.zeros((m, n), dtype=int)
        c = np.zeros((m, n), dtype=int)
        for i in compat.range_(0, m):
            for j in compat.range_(0, n):
                if X[i] == Y[j]:
                    c[i, j] = c[i - 1, j - 1] + 1
                    b[i, j] = 1
                elif c[i - 1, j] >= c[i, j - 1]:
                    c[i, j] = c[i - 1, j]
                else:
                    c[i, j] = c[i, j - 1]
        return c, b
github chartbeat-labs / textacy / textacy / extract.py View on Github external
def parse_lcs_matrix(b, start_i, start_j, lcs_length, stack, vectors):
        m = b.shape[0]
        n = b.shape[1]
        for i in compat.range_(start_i, m):
            for j in compat.range_(start_j, n):
                if b[i, j] == 1:
                    s = (i, j)
                    stack.append(s)
                    if lcs_length == 1:
                        vec = [np.NaN] * n
                        for k, l in stack:
                            vec[l] = k
                        vectors.append(vec)
                    else:
                        parse_lcs_matrix(
                            b, i + 1, j + 1, lcs_length - 1, stack, vectors
                        )
                    stack = []
        return vectors