How to use the conllu.exceptions.ParseException function in conllu

To help you get started, we’ve selected a few conllu examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github EmilStenstrom / conllu / conllu / parser.py View on Github external
if not value or value == '_':
        return None

    if fullmatch(ID_SINGLE, value):
        return int(value)

    elif fullmatch(ID_RANGE, value):
        from_, to = value.split("-")
        from_, to = int(from_), int(to)
        if to > from_:
            return (int(from_), "-", int(to))

    elif fullmatch(ID_DOT_ID, value):
        return (int(value.split(".")[0]), ".", int(value.split(".")[1]))

    raise ParseException("'{}' is not a valid ID.".format(value))
github EmilStenstrom / conllu / conllu / parser.py View on Github external
def parse_token_and_metadata(data, fields=None, field_parsers=None, metadata_parsers=None):
    if not data:
        raise ParseException("Can't create TokenList, no data sent to constructor.")

    fields = fields or DEFAULT_FIELDS

    if not field_parsers:
        field_parsers = DEFAULT_FIELD_PARSERS.copy()
    elif sorted(field_parsers.keys()) != sorted(fields):
        new_field_parsers = DEFAULT_FIELD_PARSERS.copy()
        new_field_parsers.update(field_parsers)
        field_parsers = new_field_parsers

    tokens = []
    metadata = Metadata()

    for line in data.split('\n'):
        line = line.strip()
github EmilStenstrom / conllu / conllu / parser.py View on Github external
if len(line) == 1:
        raise ParseException("Invalid line format, line must contain either tabs or two spaces.")

    data = Token()

    for i, field in enumerate(fields):
        # Allow parsing CoNNL-U files with fewer columns
        if i >= len(line):
            break

        if field in field_parsers:
            try:
                value = field_parsers[field](line, i)
            except ParseException as e:
                raise ParseException("Failed parsing field '{}': ".format(field) + str(e))

        else:
            value = line[i]

        data[text(field)] = value

    return data
github EmilStenstrom / conllu / conllu / models.py View on Github external
def head_to_token(sentence):
        if not sentence:
            raise ParseException("Can't parse tree, need a tokenlist as input.")

        if "head" not in sentence[0]:
            raise ParseException("Can't parse tree, missing 'head' field.")

        head_indexed = defaultdict(list)
        for token in sentence:
            # Filter out range and decimal ID:s before building tree
            if "id" in token and not isinstance(token["id"], int):
                continue

            # Filter out tokens with negative head, they are sometimes used to
            # specify tokens which should not be included in tree
            if token["head"] < 0:
                continue

            head_indexed[token["head"]].append(token)

        if len(head_indexed[0]) == 0:
            raise ParseException("Found no head node, can't build tree")
github EmilStenstrom / conllu / conllu / models.py View on Github external
head_indexed = defaultdict(list)
        for token in sentence:
            # Filter out range and decimal ID:s before building tree
            if "id" in token and not isinstance(token["id"], int):
                continue

            # Filter out tokens with negative head, they are sometimes used to
            # specify tokens which should not be included in tree
            if token["head"] < 0:
                continue

            head_indexed[token["head"]].append(token)

        if len(head_indexed[0]) == 0:
            raise ParseException("Found no head node, can't build tree")

        if len(head_indexed[0]) > 1:
            raise ParseException("Can't parse tree, found multiple root nodes.")

        return head_indexed
github EmilStenstrom / conllu / conllu / models.py View on Github external
def print_tree(self, depth=0, indent=4, exclude_fields=DEFAULT_EXCLUDE_FIELDS):
        if not self.token:
            raise ParseException("Can't print, token is None.")

        if "deprel" not in self.token or "id" not in self.token:
            raise ParseException("Can't print, token is missing either the id or deprel fields.")

        relevant_data = self.token.copy()
        for key in exclude_fields:
            if key in relevant_data:
                del relevant_data[key]

        node_repr = ' '.join([
            '{key}:{value}'.format(key=key, value=value)
            for key, value in relevant_data.items()
        ])

        print(' ' * indent * depth + '(deprel:{deprel}) {node_repr} [{idx}]'.format(
            deprel=self.token['deprel'],
github EmilStenstrom / conllu / conllu / parser.py View on Github external
def parse_int_value(value):
    if value == '_':
        return None

    if fullmatch(INTEGER, value):
        return int(value)
    else:
        raise ParseException("'{}' is not a valid value for parse_int_value.".format(value))