How to use the lark.grammar.NonTerminal function in lark

To help you get started, we’ve selected a few lark examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github lark-parser / lark / lark / parsers / cyk.py View on Github external
def _term(g):
    """Applies the TERM rule on 'g' (see top comment)."""
    all_t = {x for rule in g.rules for x in rule.rhs if isinstance(x, T)}
    t_rules = {t: Rule(NT('__T_%s' % str(t)), [t], weight=0, alias='Term') for t in all_t}
    new_rules = []
    for rule in g.rules:
        if len(rule.rhs) > 1 and any(isinstance(x, T) for x in rule.rhs):
            new_rhs = [t_rules[x].lhs if isinstance(x, T) else x for x in rule.rhs]
            new_rules.append(Rule(rule.lhs, new_rhs, weight=rule.weight, alias=rule.alias))
            new_rules.extend(v for k, v in t_rules.items() if k in rule.rhs)
        else:
            new_rules.append(rule)
    return Grammar(new_rules)
github lark-parser / lark / lark / parsers / grammar_analysis.py View on Github external
if len(rules) != len(set(rules)):
            duplicates = [item for item, count in Counter(rules).items() if count > 1]
            raise GrammarError("Rules defined twice: %s" % ', '.join(str(i) for i in duplicates))

        for r in rules:
            for sym in r.expansion:
                if not (sym.is_term or sym in self.rules_by_origin):
                    raise GrammarError("Using an undefined rule: %s" % sym) # TODO test validation

        self.start_states = {start: self.expand_rule(root_rule.origin)
                             for start, root_rule in root_rules.items()}

        self.end_states = {start: fzset({RulePtr(root_rule, len(root_rule.expansion))})
                           for start, root_rule in root_rules.items()}

        lr0_root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start)])
                for start in parser_conf.start}

        lr0_rules = parser_conf.rules + list(lr0_root_rules.values())
        assert(len(lr0_rules) == len(set(lr0_rules)))

        self.lr0_rules_by_origin = classify(lr0_rules, lambda r: r.origin)

        # cache RulePtr(r, 0) in r (no duplicate RulePtr objects)
        self.lr0_start_states = {start: LR0ItemSet([RulePtr(root_rule, 0)], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin))
                for start, root_rule in lr0_root_rules.items()}

        self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules)
github lark-parser / lark / lark / parsers / earley.py View on Github external
def parse(self, stream, start):
        assert start, start
        start_symbol = NonTerminal(start)

        columns = [set()]
        to_scan = set()     # The scan buffer. 'Q' in E.Scott's paper.

        ## Predict for the start_symbol.
        # Add predicted items to the first Earley set (for the predictor) if they
        # result in a non-terminal, or the scanner if they result in a terminal.
        for rule in self.predictions[start_symbol]:
            item = Item(rule, 0, 0)
            if item.expect in self.TERMINALS:
                to_scan.add(item)
            else:
                columns[0].add(item)

        to_scan = self._parse(stream, columns, to_scan, start_symbol)
github lark-parser / lark / lark / load_grammar.py View on Github external
# Empty rule; assert all other attributes are equal
                    assert len({(r.alias, r.order, r.options) for r in dups}) == len(dups)

            # Remove duplicates
            compiled_rules = list(set(compiled_rules))


        # Filter out unused rules
        while True:
            c = len(compiled_rules)
            used_rules = {s for r in compiled_rules
                                for s in r.expansion
                                if isinstance(s, NonTerminal)
                                and s != r.origin}
            used_rules |= {NonTerminal(s) for s in start}
            compiled_rules = [r for r in compiled_rules if r.origin in used_rules]
            if len(compiled_rules) == c:
                break

        # Filter out unused terminals
        used_terms = {t.name for r in compiled_rules
                             for t in r.expansion
                             if isinstance(t, Terminal)}
        terminals = [t for t in terminals if t.name in used_terms or t.name in self.ignore]

        return terminals, compiled_rules, self.ignore