How to use the conllu.parser.parse_sentences function in conllu

To help you get started, we’ve selected a few conllu examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github EmilStenstrom / conllu / tests / test_parser.py View on Github external
def test_simple(self):
        data = dedent("""\
            1\thej
            2\tdå
            3\thej

            1\thej
            2\tdå
            3\thej
        """)
        sentences = list(parse_sentences(string_to_file(data)))
        self.assertEqual(sentences, [
            '1\thej\n2\tdå\n3\thej',
            '1\thej\n2\tdå\n3\thej',
        ])
github EmilStenstrom / conllu / tests / test_parser.py View on Github external
def test_ends_without_newline(self):
        data = "1\thej\n2\tdå"
        sentences = list(parse_sentences(string_to_file(data)))
        self.assertEqual(sentences, [
            '1\thej\n2\tdå',
        ])
github EmilStenstrom / conllu / tests / test_parser.py View on Github external
def test_empty(self):
        self.assertEqual(list(parse_sentences(string_to_file(""))), [])
        self.assertEqual(list(parse_sentences(string_to_file(None))), [])
github EmilStenstrom / conllu / tests / test_parser.py View on Github external
def test_multiple_newlines(self):
        data = dedent("""\
            1\thej
            2\tdå


            1\thej
            2\tdå



            1\thej
            2\tdå
        """)
        sentences = list(parse_sentences(string_to_file(data)))
        self.assertEqual(sentences, [
            '1\thej\n2\tdå',
            '1\thej\n2\tdå',
            '1\thej\n2\tdå',
        ])
github EmilStenstrom / conllu / conllu / __init__.py View on Github external
def parse_incr(in_file, fields=None, field_parsers=None, metadata_parsers=None):
    if not hasattr(in_file, 'read'):
        raise FileNotFoundError("Invalid file, 'parse_incr' needs an opened file as input")

    if not fields:
        fields = parse_conllu_plus_fields(in_file, metadata_parsers=metadata_parsers)

    for sentence in parse_sentences(in_file):
        yield TokenList(*parse_token_and_metadata(
            sentence,
            fields=fields,
            field_parsers=field_parsers,
            metadata_parsers=metadata_parsers
        ))