How to use the nltk.tree.Tree function in nltk

To help you get started, we’ve selected a few nltk examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github nltk / nltk / nltk / corpus / reader / conll.py View on Github external
def _get_chunked_words(self, grid, chunk_types, tagset=None):
        # n.b.: this method is very similar to conllstr2tree.
        words = self._get_column(grid, self._colmap['words'])
        pos_tags = self._get_column(grid, self._colmap['pos'])
        if tagset and tagset != self._tagset:
            pos_tags = [map_tag(self._tagset, tagset, t) for t in pos_tags]
        chunk_tags = self._get_column(grid, self._colmap['chunk'])

        stack = [Tree(self._root_label, [])]

        for (word, pos_tag, chunk_tag) in zip(words, pos_tags, chunk_tags):
            if chunk_tag == 'O':
                state, chunk_type = 'O', ''
            else:
                (state, chunk_type) = chunk_tag.split('-')
            # If it's a chunk we don't care about, treat it as O.
            if chunk_types is not None and chunk_type not in chunk_types:
                state = 'O'
            # Treat a mismatching I like a B.
            if state == 'I' and chunk_type != stack[-1].label():
                state = 'B'
            # For B or I: close any open chunks
            if state in 'BO' and len(stack) == 2:
                stack.pop()
            # For B: start a new chunk.
github nltk / nltk / nltk / parse / chart.py View on Github external
    def trees(self, edge, tree_class=Tree, complete=False):
        """
        Return an iterator of the tree structures that are associated
        with ``edge``.

        If ``edge`` is incomplete, then the unexpanded children will be
        encoded as childless subtrees, whose node value is the
        corresponding terminal or nonterminal.

        :rtype: list(Tree)
        :note: If two trees share a common subtree, then the same
            Tree may be used to encode that subtree in
            both trees.  If you need to eliminate this subtree
            sharing, then create a deep copy of each tree.
        """
        return iter(self._trees(edge, complete, memo={}, tree_class=tree_class))
github rplevy / clojure-nltk / resources / nltk / draw / cfg.py View on Github external
def _selectprod_cb(self, production):
        canvas = self._treelet_canvas

        self._prodlist.highlight(production)
        if self._treelet is not None: self._treelet.destroy()

        # Convert the production to a tree.
        rhs = production.rhs()
        for (i, elt) in enumerate(rhs):
            if isinstance(elt, Nonterminal): elt = Tree(elt)
        tree = Tree(production.lhs().symbol(), *rhs)

        # Draw the tree in the treelet area.
        fontsize = int(self._size.get())
        node_font = ('helvetica', -(fontsize+4), 'bold')
        leaf_font = ('helvetica', -(fontsize+2))
        self._treelet = tree_to_treesegment(canvas, tree,
                                            node_font=node_font,
                                            leaf_font=leaf_font)
        self._treelet['draggable'] = 1

        # Center the treelet.
        (x1, y1, x2, y2) = self._treelet.bbox()
        w, h = int(canvas['width']), int(canvas['height'])
        self._treelet.move((w-x1-x2)/2, (h-y1-y2)/2)
github sloria / TextBlob / textchunk / util.py View on Github external
delimited by whitespace, and each word should have the form
    ``text/tag``.  Words that do not contain a slash are
    assigned a ``tag`` of None.

    :param s: The string to be converted
    :type s: str
    :param chunk_node: The label to use for chunk nodes
    :type chunk_node: str
    :param top_node: The label to use for the root of the tree
    :type top_node: str
    :rtype: Tree
    """

    WORD_OR_BRACKET = re.compile(r'\[|\]|[^\[\]\s]+')

    stack = [Tree(top_node, [])]
    for match in WORD_OR_BRACKET.finditer(s):
        text = match.group()
        if text[0] == '[':
            if len(stack) != 1:
                raise ValueError('Unexpected [ at char %d' % match.start())
            chunk = Tree(chunk_node, [])
            stack[-1].append(chunk)
            stack.append(chunk)
        elif text[0] == ']':
            if len(stack) != 2:
                raise ValueError('Unexpected ] at char %d' % match.start())
            stack.pop()
        else:
            if sep is None:
                stack[-1].append(text)
            else:
github sobhe / hazm / hazm / DadeganReader.py View on Github external
chunks.append(Tree(label, np_nodes))
						appended = True
					elif node['ctag'] == 'ADJ' and node['rel'] == 'POSDEP' and tree.nodes[node['head']]['ctag'] != 'CONJ':
						np_nodes = [item]
						i = n - node['head']
						while i > 0:
							label = 'ADJP'
							if type(chunks[-1]) == Tree:
								label = chunks[-1].label()
								leaves = chunks.pop().leaves()
								i -= len(leaves)
								np_nodes = leaves + np_nodes
							else:
								i -= 1
								np_nodes.insert(0, chunks.pop())
						chunks.append(Tree(label, np_nodes))
						appended = True
					for d in node_deps(node):
						if d == n - 1 and type(chunks[-1]) == Tree and chunks[
							-1].label() != 'PP' and appended is not True:
							label = chunks[-1].label()
							if node['rel'] == 'ADV':
								label = 'ADVP'
							elif label in {'ADJP', 'ADVP'}:
								if node['ctag'] == 'N':
									label = 'NP'
								elif node['ctag'] == 'ADJ':
									label = 'ADJP'
							leaves = chunks.pop().leaves()
							leaves.append(item)
							chunks.append(Tree(label, leaves))
							appended = True
github ronaldahmed / robot-navigation / neural-navigation-with-lstm / MARCO / nltk / parser / __init__.py View on Github external
        @type remaining_text: C{list} of C{Token}
        @param remaining_text: The portion of the text that is not yet
            covered by C{stack}.
        """
        if production is None: productions = self._grammar.productions()
        else: productions = [production]
        
        # Try each production, in order.
        for production in productions:
            rhslen = len(production.rhs())
                
            # check if the RHS of a production matches the top of the stack
            if self._match_rhs(production.rhs(), stack[-rhslen:]):

                # combine the tree to reflect the reduction
                tree = Tree(production.lhs().symbol(), stack[-rhslen:])
                stack[-rhslen:] = [tree]

                # We reduced something
                if self._trace:
                    self._trace_reduce(stack, production, remaining_text)
                return production

        # We didn't reduce anything
        return None
github meyersbs / SPLAT / splat / complexity / Util.py View on Github external
def get_frazier_score(treestrings):
	""" Average all of the frazier scores for the given input_file. """
	sentences, total_frazier_score, total_word_count = 0, 0, 0
	for tree_line in treestrings:
		if tree_line.strip() == "":
			continue
		tree = Tree.fromstring(tree_line)
		sentences += 1
		raw_frazier_score = calc_frazier_score(tree, 0, "")
		try:
			total_word_count += get_word_score(tree)
			total_frazier_score += raw_frazier_score
		except ZeroDivisionError:
			print('WARNING: ZeroDisvisionError for the tree: ' + str(tree))
			pass

	score = float(total_frazier_score) / float(total_word_count)

	return score
github nltk / nltk / nltk / parse / rd.py View on Github external
def parse(self, tokens):
        # Inherit docs from ParserI

        tokens = list(tokens)
        self._grammar.check_coverage(tokens)

        # Start a recursive descent parse, with an initial tree
        # containing just the start symbol.
        start = self._grammar.start().symbol()
        initial_tree = Tree(start, [])
        frontier = [()]
        if self._trace:
            self._trace_start(initial_tree, frontier, tokens)
        parses = self._parse(tokens, initial_tree, frontier)

        # Return the parses.
        return parses
github sobhe / hazm / hazm / DadeganReader.py View on Github external
chunks.append(Tree(label, np_nodes))
						appended = True
					for d in node_deps(node):
						if d == n - 1 and type(chunks[-1]) == Tree and chunks[
							-1].label() != 'PP' and appended is not True:
							label = chunks[-1].label()
							if node['rel'] == 'ADV':
								label = 'ADVP'
							elif label in {'ADJP', 'ADVP'}:
								if node['ctag'] == 'N':
									label = 'NP'
								elif node['ctag'] == 'ADJ':
									label = 'ADJP'
							leaves = chunks.pop().leaves()
							leaves.append(item)
							chunks.append(Tree(label, leaves))
							appended = True
						elif tree.nodes[d]['rel'] == 'NPREMOD' and appended is not True:
							np_nodes = [item]
							i = n - d
							while i > 0:
								if type(chunks[-1]) == Tree:
									leaves = chunks.pop().leaves()
									i -= len(leaves)
									np_nodes = leaves + np_nodes
								else:
									i -= 1
									np_nodes.insert(0, chunks.pop())
							chunks.append(Tree('NP', np_nodes))
							appended = True
					if not appended:
						label = 'NP'
github nltk / nltk / src / nltk / parser / __init__.py View on Github external
Return a tree token that has a node C{M{lhs}.symbol}, and
            C{M{n}} children.  For each nonterminal element
            C{M{elt[i]}} in the production, the tree token has a
            childless subtree with node value C{M{elt[i]}.symbol}; and
            for each terminal element C{M{elt[j]}}, the tree token has
            a leaf token with type C{M{elt[j]}}.

        @param production: The CFG production that licenses the tree
            token that should be returned.
        @type production: C{CFGProduction}
        """
        LEAF = self.property('LEAF')
        children = []
        for elt in production.rhs():
            if isinstance(elt, Nonterminal):
                children.append(Tree(elt.symbol(), []))
            else:
                # This will be matched.
                children.append(Token({LEAF: elt}))
        return Tree(production.lhs().symbol(), children)