Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
sys.stderr.write(" Recovered by returning a flat parse.\n")
#sys.stderr.write(' '.join(t.split())+'\n')
return Tree('S', self._tag(t))
def _tag(self, t, simplify_tags=False):
tagged_sent = [(w,t) for (t,w) in TAGWORD.findall(self._normalize(t))]
if simplify_tags:
tagged_sent = [(w, self._tag_mapping_function(t))
for (w,t) in tagged_sent]
return tagged_sent
def _word(self, t):
return WORD.findall(self._normalize(t))
class CategorizedBracketParseCorpusReader(CategorizedCorpusReader,
BracketParseCorpusReader):
"""
A reader for parsed corpora whose documents are
divided into categories based on their file identifiers.
@author: Nathan Schneider
"""
def __init__(self, *args, **kwargs):
"""
Initialize the corpus reader. Categorization arguments
(C{cat_pattern}, C{cat_map}, and C{cat_file}) are passed to
the L{CategorizedCorpusReader constructor
}. The remaining arguments
are passed to the L{BracketParseCorpusReader constructor
}.
"""
CategorizedCorpusReader.__init__(self, kwargs)
BracketParseCorpusReader.__init__(self, *args, **kwargs)
return BracketParseCorpusReader.tagged_sents(
self, self._resolve(fileids, categories), simplify_tags)
def tagged_paras(self, fileids=None, categories=None, simplify_tags=False):
return BracketParseCorpusReader.tagged_paras(
self, self._resolve(fileids, categories), simplify_tags)
def parsed_words(self, fileids=None, categories=None):
return BracketParseCorpusReader.parsed_words(
self, self._resolve(fileids, categories))
def parsed_sents(self, fileids=None, categories=None):
return BracketParseCorpusReader.parsed_sents(
self, self._resolve(fileids, categories))
def parsed_paras(self, fileids=None, categories=None):
return BracketParseCorpusReader.parsed_paras(
self, self._resolve(fileids, categories))
class AlpinoCorpusReader(BracketParseCorpusReader):
"""
Reader for the Alpino Dutch Treebank.
"""
def __init__(self, root, encoding='ISO-8859-1', tag_mapping_function=None):
BracketParseCorpusReader.__init__(self, root, 'alpino\.xml',
detect_blocks='blankline',
encoding=encoding,
tag_mapping_function=tag_mapping_function)
def _normalize(self, t):
if t[:10] != "', r"(\1", t)
t = re.sub(r' ', r"(\1 \2)", t)
t = re.sub(r" ", r")", t)
def tagged_words(self, fileids=None, categories=None, simplify_tags=False):
return BracketParseCorpusReader.tagged_words(
self, self._resolve(fileids, categories), simplify_tags)
def tagged_sents(self, fileids=None, categories=None, simplify_tags=False):
def tagged_sents(self, fileids=None, categories=None, simplify_tags=False):
return BracketParseCorpusReader.tagged_sents(
self, self._resolve(fileids, categories), simplify_tags)
def tagged_paras(self, fileids=None, categories=None, simplify_tags=False):
def raw(self, fileids=None, categories=None):
return BracketParseCorpusReader.raw(
self, self._resolve(fileids, categories))
def words(self, fileids=None, categories=None):
def parsed_sents(self, fileids=None, categories=None):
return BracketParseCorpusReader.parsed_sents(
self, self._resolve(fileids, categories))
def parsed_paras(self, fileids=None, categories=None):
def parsed_words(self, fileids=None, categories=None):
return BracketParseCorpusReader.parsed_words(
self, self._resolve(fileids, categories))
def parsed_sents(self, fileids=None, categories=None):
def parsed_paras(self, fileids=None, categories=None):
return BracketParseCorpusReader.parsed_paras(
self, self._resolve(fileids, categories))
def words(self, fileids=None, categories=None):
return BracketParseCorpusReader.words(
self, self._resolve(fileids, categories))
def sents(self, fileids=None, categories=None):
def sents(self, fileids=None, categories=None):
return BracketParseCorpusReader.sents(
self, self._resolve(fileids, categories))
def paras(self, fileids=None, categories=None):