How to use the reynir.fastparser.Fast_Parser function in reynir

To help you get started, we’ve selected a few reynir examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mideind / Greynir / tests / test_processors.py View on Github external
Í miðbæ Reykjavíkur er herrafataverslunin Geysir.

       Mér er sagt að Geysir sé hættur að gjósa.
       
       Geysir er hættur að gjósa.
       
       Geysir er gamall goshver.
       
       Fyrirtækið Apple-búðin selur Apple Mac tölvur.
       Fyrirtækið Origo selur IBM tölvur.
       
       Íslendingar stofnuðu skipafélagið Eimskipafélag Íslands hf.
       
    """
    toklist = tokenize(text)
    fp = Fast_Parser(verbose=False)
    ip = IncrementalParser(fp, toklist, verbose=False)
    # Dict of parse trees in string dump format,
    # stored by sentence index (1-based)
    trees = OrderedDict()
    num_sent = 0
    for p in ip.paragraphs():
        for sent in p.sentences():
            num_sent += 1
            num_tokens = len(sent)
            assert sent.parse(), "Sentence does not parse: " + sent.text
            # Obtain a text representation of the parse tree
            token_dicts = TreeUtility.dump_tokens(sent.tokens, sent.tree)
            # Create a verbose text representation of
            # the highest scoring parse tree
            tree = ParseForestDumper.dump_forest(sent.tree, token_dicts=token_dicts)
            # Add information about the sentence tree's score
github mideind / Greynir / treeutil.py View on Github external
def raw_tag_toklist(session, toklist, root=None):
        """ Parse plain text and return the parsed paragraphs as lists of sentences
            where each sentence is a list of tagged tokens. The result does not
            include a name register. """

        def xform(tokens, tree, err_index):
            """ Transformation function that simply returns a list of POS-tagged,
                normalized tokens for the sentence """
            return TreeUtility.dump_tokens(tokens, tree, error_index=err_index)

        with Fast_Parser(verbose=False, root=root) as parser:
            return TreeUtility._process_toklist(parser, session, toklist, xform)
github mideind / Greynir / treeutil.py View on Github external
def tag_toklist(session, toklist, all_names=False):
        """ Parse plain text and return the parsed paragraphs as lists of sentences
            where each sentence is a list of tagged tokens """

        def xform(tokens, tree, err_index):
            """ Transformation function that simply returns a list of POS-tagged,
                normalized tokens for the sentence """
            return TreeUtility.dump_tokens(tokens, tree, error_index=err_index)

        with Fast_Parser(verbose=False) as parser:  # Don't emit diagnostic messages
            pgs, stats = TreeUtility._process_toklist(parser, session, toklist, xform)
        from queries.builtin import create_name_register

        register = create_name_register(toklist, session, all_names=all_names)
        return pgs, stats, register
github mideind / Greynir / treeutil.py View on Github external
full_tree = None

        def xform(tokens, tree, err_index):
            """ Transformation function that yields a simplified parse tree
                with POS-tagged, normalized terminal leaves for the sentence """
            if err_index is not None:
                return TreeUtility.dump_tokens(tokens, tree, error_index=err_index)
            # Successfully parsed: return a simplified tree for the sentence
            nonlocal full_tree
            # We are assuming that there is only one parsed sentence
            if full_tree is None:
                # Note the full tree of the first parsed paragraph
                full_tree = tree
            return TreeUtility._simplify_tree(tokens, tree)

        with Fast_Parser(verbose=False) as parser:
            pgs, stats, _ = TreeUtility._process_text(
                parser, session, text, all_names, xform
            )

        if (
            not pgs
            or stats["num_parsed"] == 0
            or not pgs[0]
            or any("err" in t for t in pgs[0][0])
        ):
            # The first sentence didn't parse: let's not beat around the bush with that fact
            return (None, None, stats)

        # Return the simplified tree, full tree and stats
        assert full_tree is not None
        return (pgs[0][0], full_tree, stats)
github mideind / Greynir / query.py View on Github external
# binary grammar file, regardless of file timestamps. This helps
            # in query development, as query grammar fragment strings may change
            # without any .grammar source file change (which is the default
            # trigger for generating new binary grammar files).
            return self.read_from_generator(
                fname,
                grammar_generator(),
                verbose,
                binary_fname,
                force_new_binary=Settings.DEBUG,
            )
        except (IOError, OSError):
            raise GrammarError("Unable to open or read grammar file", fname, 0)


class QueryParser(Fast_Parser):

    """ A subclass of Fast_Parser, specialized to parse queries """

    _GRAMMAR_BINARY_FILE = Fast_Parser._GRAMMAR_FILE + ".query.bin"

    # Keep a separate grammar class instance and time stamp for
    # QueryParser. This Python sleight-of-hand overrides
    # class attributes that are defined in BIN_Parser, see binparser.py.
    _grammar_ts = None
    _grammar = None
    _grammar_class = QueryGrammar

    # Also keep separate class instances of the C grammar and its timestamp
    _c_grammar = ffi.NULL
    _c_grammar_ts = None
github mideind / Greynir / main.py View on Github external
log_str = (
        "Greynir instance starting with "
        "host={0}:{1}, db_host={2}:{3} on Python {4}".format(
            Settings.HOST,
            Settings.PORT,
            Settings.DB_HOSTNAME,
            Settings.DB_PORT,
            sys.version.replace("\n", " "),
        )
    )
    logging.info(log_str)
    print(log_str)
    sys.stdout.flush()

    # Running as a server module: pre-load the grammar into memory
    with Fast_Parser() as fp:
        pass
github mideind / Greynir / article.py View on Github external
def _init_class(cls) -> None:
        """ Initialize class attributes """
        if cls._parser is None:
            cls._parser = Fast_Parser(verbose=False)  # Don't emit diagnostic messages
github mideind / Greynir / treeutil.py View on Github external
else:
                    # Terminal: append the text
                    result.append(node["x"].replace(" ", "_"))

            # This uses a custom simplification scheme
            simple_tree = TreeUtility._simplify_tree(
                tokens,
                tree,
                nt_map=_TEST_NT_MAP,
                id_map=_TEST_ID_MAP,
                terminal_map=_TEST_TERMINAL_MAP,
            )
            push(simple_tree)
            return "".join(result)

        with Fast_Parser(verbose=False) as parser:
            pgs, stats, _ = TreeUtility._process_text(
                parser, session, text, all_names=None, xform=xform
            )
        # pgs is a list of paragraphs, each being a list of sentences
        # To access the first parsed sentence, use pgs[0][0]
        return (pgs, stats)
github mideind / Greynir / treeutil.py View on Github external
def tag_text(session, text, all_names=False):
        """ Parse plain text and return the parsed paragraphs as lists of sentences
            where each sentence is a list of tagged tokens """
        # Don't emit diagnostic messages
        with Fast_Parser(verbose=False) as parser:
            return TreeUtility.raw_tag_text(parser, session, text, all_names=all_names)