How to use the conllu.parser.parse_line function in conllu

To help you get started, we’ve selected a few conllu examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github EmilStenstrom / conllu / tests / test_parser.py View on Github external
def test_parse_line(self):
        line = "1\tThe\tthe\tDET\tDT\tDefinite=Def|PronType=Art\t4\tdet\t_\t_"
        self.assertEqual(
            parse_line(line, fields=DEFAULT_FIELDS),
            Token([
                ('id', 1),
                ('form', 'The'),
                ('lemma', 'the'),
                ('upos', 'DET'),
                ('xpos', 'DT'),
                ('feats', Token([('Definite', 'Def'), ('PronType', 'Art')])),
                ('head', 4),
                ('deprel', 'det'),
                ('deps', None),
                ('misc', None)
            ])
github EmilStenstrom / conllu / tests / test_parser.py View on Github external
def test_parse_fieldparsers_alias_xupostag(self):
        line = "1\t2"
        custom_fieldparsers = {
            "xpostag": lambda line, i: line[i] * 5,
            "upostag": lambda line, i: line[i] * 5,
        }
        self.assertEqual(
            parse_line(line, fields=["xpos", "upos"], field_parsers=custom_fieldparsers),
            Token([
                ('xpos', "11111"),
                ('upos', "22222"),
            ])
github EmilStenstrom / conllu / tests / test_parser.py View on Github external
def test_parse_line_with_spaces(self):
        line = "1 The the DET DT Definite=Def|PronType=Art 4 det _ _"
        with self.assertRaises(ParseException) as assert_context:
            parse_line(line, fields=DEFAULT_FIELDS)

        expected = "Invalid line format"
        self.assertEqual(str(assert_context.exception)[:len(expected)], expected)
github EmilStenstrom / conllu / tests / test_parser.py View on Github external
def test_parse_fieldparsers_alias_two_ways(self):
        line = "1\t2"
        custom_fieldparsers = {
            "xpos": lambda line, i: line[i] * 5,
            "upostag": lambda line, i: line[i] * 5,
        }
        self.assertEqual(
            parse_line(line, fields=["xpostag", "upos"], field_parsers=custom_fieldparsers),
            Token([
                ('xpostag', "11111"),
                ('upos', "22222"),
            ])
github EmilStenstrom / conllu / tests / test_parser.py View on Github external
def test_parse_custom_fieldparsers(self):
        line = "1\t2"
        custom_fieldparsers = {
            "id": lambda line, i: line[i] * 5,
        }
        self.assertEqual(
            parse_line(line, fields=["id"], field_parsers=custom_fieldparsers),
            Token([
                ('id', "11111"),
            ])
github EmilStenstrom / conllu / tests / test_parser.py View on Github external
def test_parse_fieldparsers_doesnt_alias_when_exists(self):
        line = "1\t2"
        custom_fieldparsers = {
            "xpos": lambda line, i: line[i] * 5,
            "xpostag": lambda line, i: line[i],
            "upos": lambda line, i: line[i] * 5,
            "upostag": lambda line, i: line[i],
        }
        self.assertEqual(
            parse_line(line, fields=["xpostag", "upostag"], field_parsers=custom_fieldparsers),
            Token([
                ('xpostag', "1"),
                ('upostag', "2"),
            ])
github allenai / scispacy / scispacy / spacy_convert.py View on Github external
def _lazy_parse(text: str, fields=DEFAULT_FIELDS):
    """
    Reads conllu annotations, yielding unwieldy OrderedDict-like
    objects per sentence.
    """
    for sentence in text.split("\n\n"):
        if sentence:
            yield [parse_line(line, fields)
                   for line in sentence.split("\n")
                   if line and not line.strip().startswith("#")]