Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
fp = Fast_Parser(verbose=False)
ip = IncrementalParser(fp, toklist, verbose=False)
# Dict of parse trees in string dump format,
# stored by sentence index (1-based)
trees = OrderedDict()
num_sent = 0
for p in ip.paragraphs():
for sent in p.sentences():
num_sent += 1
num_tokens = len(sent)
assert sent.parse(), "Sentence does not parse: " + sent.text
# Obtain a text representation of the parse tree
token_dicts = TreeUtility.dump_tokens(sent.tokens, sent.tree)
# Create a verbose text representation of
# the highest scoring parse tree
tree = ParseForestDumper.dump_forest(sent.tree, token_dicts=token_dicts)
# Add information about the sentence tree's score
# and the number of tokens
trees[num_sent] = "\n".join(
["C{0}".format(sent.score), "L{0}".format(num_tokens), tree]
)
# Create a tree representation string out of
# all the accumulated parse trees
tree_string = "".join("S{0}\n{1}\n".format(key, val) for key, val in trees.items())
tree = Tree()
tree.load(tree_string)
session = SessionShim()
tree.process(session, entities)
session.check(("Bygma", "er", "dönsk byggingavörukeðja"))
for sent in p.sentences():
num_sent += 1
num_tokens = len(sent)
# We don't attempt to parse very long sentences (>85 tokens)
# since they are memory intensive (>16 GB) and may take
# minutes to process
if num_tokens <= MAX_SENTENCE_TOKENS and sent.parse():
# Obtain a text representation of the parse tree
token_dicts = TreeUtility.dump_tokens(
sent.tokens, sent.tree, words=words
)
# Create a verbose text representation of
# the highest scoring parse tree
tree = ParseForestDumper.dump_forest(
sent.tree, token_dicts=token_dicts
)
# Add information about the sentence tree's score
# and the number of tokens
trees[num_sent] = "\n".join(
["C{0}".format(sent.score), "L{0}".format(num_tokens), tree]
)
else:
# Error, sentence too long or no parse:
# add an error index entry for this sentence
if num_tokens > MAX_SENTENCE_TOKENS:
# Set the error index at the first
# token outside the maximum limit
eix = MAX_SENTENCE_TOKENS
else:
eix = sent.err_index
# Parse the accumulated sentence
num = 0
try:
# Parse the sentence
forest = bp.go(sent)
if forest is not None:
num = Fast_Parser.num_combinations(forest)
if num > 1:
# Reduce the resulting forest
forest = rdc.go(forest)
except ParseError:
forest = None
if num > 0:
num_parsed_sent += 1
# Obtain a text representation of the parse tree
trees[num_sent] = ParseForestDumper.dump_forest(forest)
elif t[0] == TOK.P_BEGIN:
pass
elif t[0] == TOK.P_END:
pass
else:
sent.append(t)
result = dict(num_sent=num_sent, num_parsed_sent=num_parsed_sent)
return result, trees