How to use the reynir.incparser.IncrementalParser function in reynir

To help you get started, we’ve selected a few reynir examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mideind / Greynir / tests / test_processors.py View on Github external
Mér er sagt að Geysir sé hættur að gjósa.
       
       Geysir er hættur að gjósa.
       
       Geysir er gamall goshver.
       
       Fyrirtækið Apple-búðin selur Apple Mac tölvur.
       Fyrirtækið Origo selur IBM tölvur.
       
       Íslendingar stofnuðu skipafélagið Eimskipafélag Íslands hf.
       
    """
    toklist = tokenize(text)
    fp = Fast_Parser(verbose=False)
    ip = IncrementalParser(fp, toklist, verbose=False)
    # Dict of parse trees in string dump format,
    # stored by sentence index (1-based)
    trees = OrderedDict()
    num_sent = 0
    for p in ip.paragraphs():
        for sent in p.sentences():
            num_sent += 1
            num_tokens = len(sent)
            assert sent.parse(), "Sentence does not parse: " + sent.text
            # Obtain a text representation of the parse tree
            token_dicts = TreeUtility.dump_tokens(sent.tokens, sent.tree)
            # Create a verbose text representation of
            # the highest scoring parse tree
            tree = ParseForestDumper.dump_forest(sent.tree, token_dicts=token_dicts)
            # Add information about the sentence tree's score
            # and the number of tokens
github mideind / Greynir / treeutil.py View on Github external
def _process_toklist(parser, session, toklist, xform):
        """ Low-level utility function to parse token lists and return
            the result of a transformation function (xform) for each sentence """
        pgs = []  # Paragraph list, containing sentences, containing tokens
        ip = IncrementalParser(parser, toklist, verbose=True)
        for p in ip.paragraphs():
            pgs.append([])
            for sent in p.sentences():
                if sent.parse():
                    # Parsed successfully
                    pgs[-1].append(xform(sent.tokens, sent.tree, None))
                else:
                    # Error in parse
                    pgs[-1].append(xform(sent.tokens, None, sent.err_index))

        stats = dict(
            num_tokens=ip.num_tokens,
            num_sentences=ip.num_sentences,
            num_parsed=ip.num_parsed,
            ambiguity=ip.ambiguity,
            num_combinations=ip.num_combinations,
github mideind / Greynir / article.py View on Github external
def _parse(self, enclosing_session=None, verbose=False):
        """ Parse the article content to yield parse trees and annotated token list """
        with SessionContext(enclosing_session) as session:

            # Convert the content soup to a token iterable (generator)
            toklist = Fetcher.tokenize_html(self._url, self._html, session)

            bp = self.get_parser()
            ip = IncrementalParser(bp, toklist, verbose=verbose)

            # List of paragraphs containing a list of sentences containing
            # token lists for sentences in string dump format
            # (1-based paragraph and sentence indices)
            pgs = []  # type: List[List[Dict[str, Any]]]

            # Dict of parse trees in string dump format,
            # stored by sentence index (1-based)
            trees = OrderedDict()

            # Word stem dictionary, indexed by (stem, cat)
            words = defaultdict(int)  # type: Dict[Tuple[str, str], int]
            num_sent = 0

            for p in ip.paragraphs():