Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def symbols_from_strcase(expansion):
return [Terminal(x, filter_out=x.startswith('_')) if x.isupper() else NonTerminal(x) for x in expansion]
for r in rules:
recons_exp = [sym if sym in nonterminals else Terminal(sym.name)
for sym in r.expansion if not is_discarded_terminal(sym)]
# Skip self-recursive constructs
if recons_exp == [r.origin]:
continue
sym = NonTerminal(r.alias) if r.alias else r.origin
yield Rule(sym, recons_exp, alias=MakeMatchTree(sym.name, r.expansion))
for origin, rule_aliases in aliases.items():
for alias in rule_aliases:
yield Rule(origin, [Terminal(alias)], alias=MakeMatchTree(origin.name, [NonTerminal(alias)]))
yield Rule(origin, [Terminal(origin.name)], alias=MakeMatchTree(origin.name, [origin]))
self.keep_all_tokens,
self.expand1,
self.priority,
)
class Rule(Serialize):
"""
origin : a symbol
expansion : a list of symbols
order : index of this expansion amongst all rules of the same name
"""
__slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash')
__serialize_fields__ = 'origin', 'expansion', 'order', 'alias', 'options'
__serialize_namespace__ = Terminal, NonTerminal, RuleOptions
def __init__(self, origin, expansion, order=0, alias=None, options=None):
self.origin = origin
self.expansion = expansion
self.alias = alias
self.order = order
self.options = options or RuleOptions()
self._hash = hash((self.origin, tuple(self.expansion)))
def _deserialize(self):
self._hash = hash((self.origin, tuple(self.expansion)))
def __str__(self):
return '<%s : %s>' % (self.origin.name, ' '.join(x.name for x in self.expansion))
def __repr__(self):
def _parse(s, g):
"""Parses sentence 's' using CNF grammar 'g'."""
# The CYK table. Indexed with a 2-tuple: (start pos, end pos)
table = defaultdict(set)
# Top-level structure is similar to the CYK table. Each cell is a dict from
# rule name to the best (lightest) tree for that rule.
trees = defaultdict(dict)
# Populate base case with existing terminal production rules
for i, w in enumerate(s):
for terminal, rules in g.terminal_rules.items():
if match(terminal, w):
for rule in rules:
table[(i, i)].add(rule)
if (rule.lhs not in trees[(i, i)] or
rule.weight < trees[(i, i)][rule.lhs].weight):
trees[(i, i)][rule.lhs] = RuleNode(rule, [T(w)], weight=rule.weight)
# Iterate over lengths of sub-sentences
for l in xrange(2, len(s) + 1):
# Iterate over sub-sentences with the given length
for i in xrange(len(s) - l + 1):
# Choose partition of the sub-sentence in [1, l)
for p in xrange(i + 1, i + l):
span1 = (i, p - 1)
span2 = (p, i + l - 1)
for r1, r2 in itertools.product(table[span1], table[span2]):
for rule in g.nonterminal_rules.get((r1.lhs, r2.lhs), []):
table[(i, i + l - 1)].add(rule)
r1_tree = trees[span1][r1.lhs]
r2_tree = trees[span2][r2.lhs]
rule_total_weight = rule.weight + r1_tree.weight + r2_tree.weight
if (rule.lhs not in trees[(i, i + l - 1)]
def __init__(self, grammar):
super(CnfWrapper, self).__init__()
self.grammar = grammar
self.rules = grammar.rules
self.terminal_rules = defaultdict(list)
self.nonterminal_rules = defaultdict(list)
for r in self.rules:
# Validate that the grammar is CNF and populate auxiliary data structures.
assert isinstance(r.lhs, NT), r
if len(r.rhs) not in [1, 2]:
raise ParseError("CYK doesn't support empty rules")
if len(r.rhs) == 1 and isinstance(r.rhs[0], T):
self.terminal_rules[r.rhs[0]].append(r)
elif len(r.rhs) == 2 and all(isinstance(x, NT) for x in r.rhs):
self.nonterminal_rules[tuple(r.rhs)].append(r)
else:
assert False, r
for r in rules:
recons_exp = [sym if sym in nonterminals else Terminal(sym.name)
for sym in r.expansion if not is_discarded_terminal(sym)]
# Skip self-recursive constructs
if recons_exp == [r.origin]:
continue
sym = NonTerminal(r.alias) if r.alias else r.origin
yield Rule(sym, recons_exp, alias=MakeMatchTree(sym.name, r.expansion))
for origin, rule_aliases in aliases.items():
for alias in rule_aliases:
yield Rule(origin, [Terminal(alias)], alias=MakeMatchTree(origin.name, [NonTerminal(alias)]))
yield Rule(origin, [Terminal(origin.name)], alias=MakeMatchTree(origin.name, [origin]))
def value(self, v):
v ,= v
if isinstance(v, Tree):
return v
elif v.type == 'RULE':
return NonTerminal(Str(v.value))
elif v.type == 'TERMINAL':
return Terminal(Str(v.value), filter_out=v.startswith('_'))
assert False
self.grammar = grammar
if "start" in getfullargspec(grammar.grammar.compile).args:
terminals, rules, ignore_names = grammar.grammar.compile(start)
else: # pragma: no cover
# This branch is to support lark <= 0.7.1, without the start argument.
terminals, rules, ignore_names = grammar.grammar.compile()
self.names_to_symbols = {}
for r in rules:
t = r.origin
self.names_to_symbols[t.name] = t
for t in terminals:
self.names_to_symbols[t.name] = Terminal(t.name)
self.start = st.sampled_from([self.names_to_symbols[s] for s in start])
self.ignored_symbols = tuple(self.names_to_symbols[n] for n in ignore_names)
self.terminal_strategies = {
t.name: st.from_regex(t.pattern.to_regexp(), fullmatch=True)
for t in terminals
}
unknown_explicit = set(explicit) - get_terminal_names(
terminals, rules, ignore_names
)
if unknown_explicit:
raise InvalidArgument(
"The following arguments were passed as explicit_strategies, "
"but there is no such terminal production in this grammar: %r"
def _match(self, term, token):
if isinstance(token, Tree):
return Terminal(token.data) == term
elif isinstance(token, Token):
return term == Terminal(token.type)
assert False