How to use the lark.grammar.Terminal function in lark

To help you get started, we’ve selected a few lark examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github lark-parser / lark / lark / load_grammar.py View on Github external
def symbols_from_strcase(expansion):
    return [Terminal(x, filter_out=x.startswith('_')) if x.isupper() else NonTerminal(x) for x in expansion]
github lark-parser / lark / lark / reconstruct.py View on Github external
for r in rules:
            recons_exp = [sym if sym in nonterminals else Terminal(sym.name)
                          for sym in r.expansion if not is_discarded_terminal(sym)]

            # Skip self-recursive constructs
            if recons_exp == [r.origin]:
                continue

            sym = NonTerminal(r.alias) if r.alias else r.origin

            yield Rule(sym, recons_exp, alias=MakeMatchTree(sym.name, r.expansion))

        for origin, rule_aliases in aliases.items():
            for alias in rule_aliases:
                yield Rule(origin, [Terminal(alias)], alias=MakeMatchTree(origin.name, [NonTerminal(alias)]))
            yield Rule(origin, [Terminal(origin.name)], alias=MakeMatchTree(origin.name, [origin]))
github lark-parser / lark / lark / grammar.py View on Github external
self.keep_all_tokens,
            self.expand1,
            self.priority,
        )


class Rule(Serialize):
    """
        origin : a symbol
        expansion : a list of symbols
        order : index of this expansion amongst all rules of the same name
    """
    __slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash')

    __serialize_fields__ = 'origin', 'expansion', 'order', 'alias', 'options'
    __serialize_namespace__ = Terminal, NonTerminal, RuleOptions

    def __init__(self, origin, expansion, order=0, alias=None, options=None):
        self.origin = origin
        self.expansion = expansion
        self.alias = alias
        self.order = order
        self.options = options or RuleOptions()
        self._hash = hash((self.origin, tuple(self.expansion)))

    def _deserialize(self):
        self._hash = hash((self.origin, tuple(self.expansion)))

    def __str__(self):
        return '<%s : %s>' % (self.origin.name, ' '.join(x.name for x in self.expansion))

    def __repr__(self):
github lark-parser / lark / lark / parsers / cyk.py View on Github external
def _parse(s, g):
    """Parses sentence 's' using CNF grammar 'g'."""
    # The CYK table. Indexed with a 2-tuple: (start pos, end pos)
    table = defaultdict(set)
    # Top-level structure is similar to the CYK table. Each cell is a dict from
    # rule name to the best (lightest) tree for that rule.
    trees = defaultdict(dict)
    # Populate base case with existing terminal production rules
    for i, w in enumerate(s):
        for terminal, rules in g.terminal_rules.items():
            if match(terminal, w):
                for rule in rules:
                    table[(i, i)].add(rule)
                    if (rule.lhs not in trees[(i, i)] or
                        rule.weight < trees[(i, i)][rule.lhs].weight):
                        trees[(i, i)][rule.lhs] = RuleNode(rule, [T(w)], weight=rule.weight)

    # Iterate over lengths of sub-sentences
    for l in xrange(2, len(s) + 1):
        # Iterate over sub-sentences with the given length
        for i in xrange(len(s) - l + 1):
            # Choose partition of the sub-sentence in [1, l)
            for p in xrange(i + 1, i + l):
                span1 = (i, p - 1)
                span2 = (p, i + l - 1)
                for r1, r2 in itertools.product(table[span1], table[span2]):
                    for rule in g.nonterminal_rules.get((r1.lhs, r2.lhs), []):
                        table[(i, i + l - 1)].add(rule)
                        r1_tree = trees[span1][r1.lhs]
                        r2_tree = trees[span2][r2.lhs]
                        rule_total_weight = rule.weight + r1_tree.weight + r2_tree.weight
                        if (rule.lhs not in trees[(i, i + l - 1)]
github lark-parser / lark / lark / parsers / cyk.py View on Github external
def __init__(self, grammar):
        super(CnfWrapper, self).__init__()
        self.grammar = grammar
        self.rules = grammar.rules
        self.terminal_rules = defaultdict(list)
        self.nonterminal_rules = defaultdict(list)
        for r in self.rules:
            # Validate that the grammar is CNF and populate auxiliary data structures.
            assert isinstance(r.lhs, NT), r
            if len(r.rhs) not in [1, 2]:
                raise ParseError("CYK doesn't support empty rules")
            if len(r.rhs) == 1 and isinstance(r.rhs[0], T):
                self.terminal_rules[r.rhs[0]].append(r)
            elif len(r.rhs) == 2 and all(isinstance(x, NT) for x in r.rhs):
                self.nonterminal_rules[tuple(r.rhs)].append(r)
            else:
                assert False, r
github lark-parser / lark / lark / reconstruct.py View on Github external
for r in rules:
            recons_exp = [sym if sym in nonterminals else Terminal(sym.name)
                          for sym in r.expansion if not is_discarded_terminal(sym)]

            # Skip self-recursive constructs
            if recons_exp == [r.origin]:
                continue

            sym = NonTerminal(r.alias) if r.alias else r.origin

            yield Rule(sym, recons_exp, alias=MakeMatchTree(sym.name, r.expansion))

        for origin, rule_aliases in aliases.items():
            for alias in rule_aliases:
                yield Rule(origin, [Terminal(alias)], alias=MakeMatchTree(origin.name, [NonTerminal(alias)]))
            yield Rule(origin, [Terminal(origin.name)], alias=MakeMatchTree(origin.name, [origin]))
github lark-parser / lark / lark / load_grammar.py View on Github external
def value(self, v):
        v ,= v
        if isinstance(v, Tree):
            return v
        elif v.type == 'RULE':
            return NonTerminal(Str(v.value))
        elif v.type == 'TERMINAL':
            return Terminal(Str(v.value), filter_out=v.startswith('_'))
        assert False
github HypothesisWorks / hypothesis / hypothesis-python / src / hypothesis / extra / lark.py View on Github external
self.grammar = grammar

        if "start" in getfullargspec(grammar.grammar.compile).args:
            terminals, rules, ignore_names = grammar.grammar.compile(start)
        else:  # pragma: no cover
            # This branch is to support lark <= 0.7.1, without the start argument.
            terminals, rules, ignore_names = grammar.grammar.compile()

        self.names_to_symbols = {}

        for r in rules:
            t = r.origin
            self.names_to_symbols[t.name] = t

        for t in terminals:
            self.names_to_symbols[t.name] = Terminal(t.name)

        self.start = st.sampled_from([self.names_to_symbols[s] for s in start])

        self.ignored_symbols = tuple(self.names_to_symbols[n] for n in ignore_names)

        self.terminal_strategies = {
            t.name: st.from_regex(t.pattern.to_regexp(), fullmatch=True)
            for t in terminals
        }
        unknown_explicit = set(explicit) - get_terminal_names(
            terminals, rules, ignore_names
        )
        if unknown_explicit:
            raise InvalidArgument(
                "The following arguments were passed as explicit_strategies, "
                "but there is no such terminal production in this grammar: %r"
github lark-parser / lark / lark / reconstruct.py View on Github external
def _match(self, term, token):
        if isinstance(token, Tree):
            return Terminal(token.data) == term
        elif isinstance(token, Token):
            return term == Terminal(token.type)
        assert False