How to use the allennlp.data.fields.SequenceLabelField function in allennlp

To help you get started, we’ve selected a few allennlp examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github allenai / allennlp-semparse / allennlp_semparse / dataset_readers / quarel.py View on Github external
knowledge_graph,
            tokenized_question,
            self._entity_token_indexers,
            tokenizer=self._tokenizer,
        )

        if self._tagger_only:
            fields: Dict[str, Field] = {"tokens": question_field}
            if entity_literals is not None:
                entity_tags = self._get_entity_tags(
                    self._all_entities, table_field, entity_literals, tokenized_question
                )
                if debug_counter > 0:
                    logger.info(f"raw entity tags = {entity_tags}")
                entity_tags_bio = self._convert_tags_bio(entity_tags)
                fields["tags"] = SequenceLabelField(entity_tags_bio, question_field)
                additional_metadata["tags_gold"] = entity_tags_bio
            additional_metadata["words"] = [x.text for x in tokenized_question]
            fields["metadata"] = MetadataField(additional_metadata)
            return Instance(fields)

        world_field = MetadataField(world)

        production_rule_fields: List[Field] = []
        for production_rule in world.all_possible_actions():
            _, rule_right_side = production_rule.split(" -> ")
            is_global_rule = not world.is_table_entity(rule_right_side)
            field = ProductionRuleField(production_rule, is_global_rule)
            production_rule_fields.append(field)
        action_field = ListField(production_rule_fields)

        fields = {
github Hyperparticle / udify / udify / dataset_readers / sigmorphon_2019_task_2.py View on Github external
lemmas: List[str] = None,
                         lemma_rules: List[str] = None,
                         feats: List[str] = None,
                         ids: List[str] = None,
                         multiword_ids: List[str] = None,
                         multiword_forms: List[str] = None) -> Instance:
        fields: Dict[str, Field] = {}

        tokens = TextField([Token(w) for w in words], self._token_indexers)
        fields["tokens"] = tokens

        if lemma_rules:
            fields["lemmas"] = SequenceLabelField(lemma_rules, tokens, label_namespace="lemmas")

        if "feats":
            fields["feats"] = SequenceLabelField(feats, tokens, label_namespace="feats")

            # TODO: parameter to turn this off
            feature_seq = []

            for feat in feats:
                features = feat.lower().split(";") if feat != "_" else "_"
                dimensions = {dimension: "_" for dimension in unimorph_schema}

                if feat != "_":
                    for label in features:
                        # Use regex to handle special cases where multi-labels are contained inside "{}"
                        first_label = re.findall(r"(?#{)([a-zA-Z0-9.\-_]+)(?#\+|\/|})", label)
                        first_label = first_label[0] if first_label else label

                        if first_label not in self.label_to_dimension:
                            if first_label.startswith("arg"):
github allenai / allennlp-reading-comprehension / allennlp_rc / dataset_readers / drop.py View on Github external
question_span_fields: List[Field] = [
                SpanField(span[0], span[1], question_field)
                for span in answer_info["answer_question_spans"]
            ]
            if not question_span_fields:
                question_span_fields.append(SpanField(-1, -1, question_field))
            fields["answer_as_question_spans"] = ListField(question_span_fields)

            add_sub_signs_field: List[Field] = []
            for signs_for_one_add_sub_expression in answer_info["signs_for_add_sub_expressions"]:
                add_sub_signs_field.append(
                    SequenceLabelField(signs_for_one_add_sub_expression, numbers_in_passage_field)
                )
            if not add_sub_signs_field:
                add_sub_signs_field.append(
                    SequenceLabelField([0] * len(number_tokens), numbers_in_passage_field)
                )
            fields["answer_as_add_sub_expressions"] = ListField(add_sub_signs_field)

            count_fields: List[Field] = [
                LabelField(count_label, skip_indexing=True) for count_label in answer_info["counts"]
            ]
            if not count_fields:
                count_fields.append(LabelField(-1, skip_indexing=True))
            fields["answer_as_counts"] = ListField(count_fields)

        metadata.update(additional_metadata)
        fields["metadata"] = MetadataField(metadata)
        return Instance(fields)
github allenai / allennlp / allennlp / data / dataset_readers / reading_comprehension / util.py View on Github external
p1_span_end = span_end
            if num_context_answers > 2:
                p3_answer_marker_list.append(
                    SequenceLabelField(
                        prev_answer_marker_lists[3], passage_field, label_namespace="answer_tags"
                    )
                )
            if num_context_answers > 1:
                p2_answer_marker_list.append(
                    SequenceLabelField(
                        prev_answer_marker_lists[2], passage_field, label_namespace="answer_tags"
                    )
                )
            if num_context_answers > 0:
                p1_answer_marker_list.append(
                    SequenceLabelField(
                        prev_answer_marker_lists[1], passage_field, label_namespace="answer_tags"
                    )
                )
        fields["span_start"] = ListField(span_start_list)
        fields["span_end"] = ListField(span_end_list)
        if num_context_answers > 0:
            fields["p1_answer_marker"] = ListField(p1_answer_marker_list)
            if num_context_answers > 1:
                fields["p2_answer_marker"] = ListField(p2_answer_marker_list)
                if num_context_answers > 2:
                    fields["p3_answer_marker"] = ListField(p3_answer_marker_list)
        fields["yesno_list"] = ListField(
            [LabelField(yesno, label_namespace="yesno_labels") for yesno in yesno_list]
        )
        fields["followup_list"] = ListField(
            [LabelField(followup, label_namespace="followup_labels") for followup in followup_list]
github allenai / allennlp / allennlp / data / dataset_readers / ontonotes_ner.py View on Github external
self,  # type: ignore
        tokens: List[Token],
        ner_tags: List[str] = None,
    ) -> Instance:
        """
        We take `pre-tokenized` input here, because we don't have a tokenizer in this class.
        """

        sequence = TextField(tokens, self._token_indexers)
        instance_fields: Dict[str, Field] = {"tokens": sequence}
        instance_fields["metadata"] = MetadataField({"words": [x.text for x in tokens]})
        # Add "tag label" to instance
        if ner_tags is not None:
            if self._coding_scheme == "BIOUL":
                ner_tags = to_bioul(ner_tags, encoding="BIO")
            instance_fields["tags"] = SequenceLabelField(ner_tags, sequence)
        return Instance(instance_fields)
github allenai / allennlp / allennlp / data / dataset_readers / sequence_tagging.py View on Github external
def text_to_instance(  # type: ignore
        self, tokens: List[Token], tags: List[str] = None
    ) -> Instance:
        """
        We take `pre-tokenized` input here, because we don't have a tokenizer in this class.
        """

        fields: Dict[str, Field] = {}
        sequence = TextField(tokens, self._token_indexers)
        fields["tokens"] = sequence
        fields["metadata"] = MetadataField({"words": [x.text for x in tokens]})
        if tags is not None:
            fields["tags"] = SequenceLabelField(tags, sequence)
        return Instance(fields)
github allenai / allennlp / allennlp / data / dataset_readers / conll2003.py View on Github external
if ner_tags is not None
                else None
            )
        else:
            # the default IOB1
            coded_chunks = chunk_tags
            coded_ner = ner_tags

        # Add "feature labels" to instance
        if "pos" in self.feature_labels:
            if pos_tags is None:
                raise ConfigurationError(
                    "Dataset reader was specified to use pos_tags as "
                    "features. Pass them to text_to_instance."
                )
            instance_fields["pos_tags"] = SequenceLabelField(pos_tags, sequence, "pos_tags")
        if "chunk" in self.feature_labels:
            if coded_chunks is None:
                raise ConfigurationError(
                    "Dataset reader was specified to use chunk tags as "
                    "features. Pass them to text_to_instance."
                )
            instance_fields["chunk_tags"] = SequenceLabelField(coded_chunks, sequence, "chunk_tags")
        if "ner" in self.feature_labels:
            if coded_ner is None:
                raise ConfigurationError(
                    "Dataset reader was specified to use NER tags as "
                    " features. Pass them to text_to_instance."
                )
            instance_fields["ner_tags"] = SequenceLabelField(coded_ner, sequence, "ner_tags")

        # Add "tag label" to instance
github manuwhs / Trapyng / Examples / 5.1 AllenNLP / 0. main DataSet example.py View on Github external
def convert_to_instance(list_words,sentiment_label,PoS_labels):
    tokenized_words =  list(map(Token,list_words ))
    word_and_character_text_field = TextField( tokenized_words, 
                 token_indexers= {"tokens": SingleIdTokenIndexer(namespace="token_ids"), 
                                  "chars": TokenCharactersIndexer(namespace="token_chars")})
    sentiment_analysis_label_field = LabelField(sentiment_label, label_namespace="sentiment_tags")
    PoS_labels_field = SequenceLabelField(labels=PoS_labels,  label_namespace = "PoS_tags",
                                          sequence_field=word_and_character_text_field)
    
    instance_i = Instance({"text_field": word_and_character_text_field,
                           "label_sentiment": sentiment_analysis_label_field,
                           "Pos_labels": PoS_labels_field})
    return instance_i
github plasticityai / magnitude / pymagnitude / third_party / allennlp / data / dataset_readers / conll2003.py View on Github external
if u'chunk' in self.feature_labels:
            if coded_chunks is None:
                raise ConfigurationError(u"Dataset reader was specified to use chunk tags as "
                                         u"features. Pass them to text_to_instance.")
            instance_fields[u'chunk_tags'] = SequenceLabelField(coded_chunks, sequence, u"chunk_tags")
        if u'ner' in self.feature_labels:
            if coded_ner is None:
                raise ConfigurationError(u"Dataset reader was specified to use NER tags as "
                                         u" features. Pass them to text_to_instance.")
            instance_fields[u'ner_tags'] = SequenceLabelField(coded_ner, sequence, u"ner_tags")

        # Add "tag label" to instance
        if self.tag_label == u'ner' and coded_ner is not None:
            instance_fields[u'tags'] = SequenceLabelField(coded_ner, sequence)
        elif self.tag_label == u'pos' and pos_tags is not None:
            instance_fields[u'tags'] = SequenceLabelField(pos_tags, sequence)
        elif self.tag_label == u'chunk' and coded_chunks is not None:
            instance_fields[u'tags'] = SequenceLabelField(coded_chunks, sequence)

        return Instance(instance_fields)
github allenai / allennlp / allennlp / data / dataset_readers / ccgbank.py View on Github external
if "modified_pos" in self.feature_labels:
            if modified_pos_tags is None:
                raise ConfigurationError(
                    "Dataset reader was specified to use modified POS tags as "
                    " features. Pass them to text_to_instance."
                )
            fields["modified_pos_tags"] = SequenceLabelField(
                modified_pos_tags, text_field, "modified_pos_tags"
            )
        if "predicate_arg" in self.feature_labels:
            if predicate_arg_categories is None:
                raise ConfigurationError(
                    "Dataset reader was specified to use predicate arg tags as "
                    " features. Pass them to text_to_instance."
                )
            fields["predicate_arg_tags"] = SequenceLabelField(
                predicate_arg_categories, text_field, "predicate_arg_tags"
            )

        # Add "tag label" to instance
        if self.tag_label == "ccg" and ccg_categories is not None:
            fields["tags"] = SequenceLabelField(ccg_categories, text_field, self.label_namespace)
        elif self.tag_label == "original_pos" and original_pos_tags is not None:
            fields["tags"] = SequenceLabelField(original_pos_tags, text_field, self.label_namespace)
        elif self.tag_label == "modified_pos" and modified_pos_tags is not None:
            fields["tags"] = SequenceLabelField(modified_pos_tags, text_field, self.label_namespace)
        elif self.tag_label == "predicate_arg" and predicate_arg_categories is not None:
            fields["tags"] = SequenceLabelField(
                predicate_arg_categories, text_field, self.label_namespace
            )

        return Instance(fields)