How to use the deeppavlov.agents.coreference.utils.shape function in deeppavlov

To help you get started, we’ve selected a few deeppavlov examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github deepmipt / kpi2017 / deeppavlov / agents / coreference / models.py View on Github external
mention_emb_list.append(mention_end_emb)

        mention_width = 1 + mention_ends - mention_starts  # [num_mentions]
        if self.opt["use_features"]:
            mention_width_index = mention_width - 1  # [num_mentions]
            mention_width_emb = tf.gather(tf.get_variable("mention_width_embeddings", [self.opt["max_mention_width"],
                                                                                       self.opt["feature_size"]],
                                                          dtype=tf.float64),
                                          mention_width_index)  # [num_mentions, emb]
            mention_width_emb = tf.nn.dropout(mention_width_emb, self.dropout)
            mention_emb_list.append(mention_width_emb)

        if self.opt["model_heads"]:
            mention_indices = tf.expand_dims(tf.range(self.opt["max_mention_width"]), 0) + tf.expand_dims(
                mention_starts, 1)  # [num_mentions, max_mention_width]
            mention_indices = tf.minimum(utils.shape(text_outputs, 0) - 1,
                                         mention_indices)  # [num_mentions, max_mention_width]
            mention_text_emb = tf.gather(text_emb, mention_indices)  # [num_mentions, max_mention_width, emb]
            self.head_scores = utils.projection(text_outputs, 1)  # [num_words, 1]
            mention_head_scores = tf.gather(self.head_scores, mention_indices)  # [num_mentions, max_mention_width, 1]
            mention_mask = tf.expand_dims(
                tf.sequence_mask(mention_width, self.opt["max_mention_width"], dtype=tf.float64),
                2)  # [num_mentions, max_mention_width, 1]
            mention_attention = tf.nn.softmax(mention_head_scores + tf.log(mention_mask),
                                              dim=1)  # [num_mentions, max_mention_width, 1]
            mention_head_emb = tf.reduce_sum(mention_attention * mention_text_emb, 1)  # [num_mentions, emb]
            mention_emb_list.append(mention_head_emb)

        mention_emb = tf.concat(mention_emb_list, 1)  # [num_mentions, emb]
        return mention_emb
github deepmipt / kpi2017 / deeppavlov / agents / coreference / models.py View on Github external
"""
        Forms a new tensor using special features, mentions embeddings, mentions scores, etc.
        and passes it through a fully-connected network that compute antecedent scores.
        Args:
            mention_emb: [num_mentions, emb], a tensor that contains of embeddings of specific mentions
            mention_scores: [num_mentions, 1], Output of the fully-connected network, that compute the mentions scores.
            antecedents: [] get from C++ function
            antecedents_len: [] get from C++ function
            mention_speaker_ids: [num_mentions, speaker_emb_size], tf.float64, Speaker IDs.
            genre_emb: [genre_emb_size], tf.float64, Genre

        Returns: tf.float64, [num_mentions, max_ant + 1], antecedent scores.

        """
        num_mentions = utils.shape(mention_emb, 0)
        max_antecedents = utils.shape(antecedents, 1)

        feature_emb_list = []

        if self.opt["use_metadata"]:
            antecedent_speaker_ids = tf.gather(mention_speaker_ids, antecedents)  # [num_mentions, max_ant]
            same_speaker = tf.equal(tf.expand_dims(mention_speaker_ids, 1),
                                    antecedent_speaker_ids)  # [num_mentions, max_ant]
            speaker_pair_emb = tf.gather(tf.get_variable("same_speaker_emb", [2, self.opt["feature_size"]],
                                                         dtype=tf.float64),
                                         tf.to_int32(same_speaker))  # [num_mentions, max_ant, emb]
            feature_emb_list.append(speaker_pair_emb)

            tiled_genre_emb = tf.tile(tf.expand_dims(tf.expand_dims(genre_emb, 0), 0),
                                      [num_mentions, max_antecedents, 1])  # [num_mentions, max_ant, emb]
            feature_emb_list.append(tiled_genre_emb)
github deepmipt / kpi2017 / deeppavlov / agents / coreference / models.py View on Github external
text_emb_list = [word_emb]

        if self.opt["char_embedding_size"] > 0:
            char_emb = tf.gather(
                tf.get_variable("char_embeddings", [len(self.char_dict), self.opt["char_embedding_size"]]),
                char_index, tf.float64)  # [num_sentences, max_sentence_length, max_word_length, emb]
            flattened_char_emb = tf.reshape(char_emb, [num_sentences * max_sentence_length, utils.shape(char_emb, 2),
                                                       utils.shape(char_emb, 3)])
            # [num_sentences * max_sentence_length, max_word_length, emb]

            flattened_aggregated_char_emb = utils.cnn(flattened_char_emb, self.opt["filter_widths"], self.opt[
                "filter_size"])  # [num_sentences * max_sentence_length, emb]
            aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb,
                                             [num_sentences,
                                              max_sentence_length,
                                              utils.shape(flattened_aggregated_char_emb, 1)])
            # [num_sentences, max_sentence_length, emb]

            text_emb_list.append(aggregated_char_emb)

        text_emb = tf.concat(text_emb_list, 2)
        text_emb = tf.nn.dropout(text_emb, self.lexical_dropout)

        text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length)
        text_len_mask = tf.reshape(text_len_mask, [num_sentences * max_sentence_length])

        text_outputs = self.encode_sentences(text_emb, text_len, text_len_mask)
        text_outputs = tf.nn.dropout(text_outputs, self.dropout)

        genre_emb = tf.gather(tf.get_variable("genre_embeddings", [len(self.genres), self.opt["feature_size"]],
                                              dtype=tf.float64),
                              genre)  # [emb]
github deepmipt / kpi2017 / deeppavlov / agents / coreference / models.py View on Github external
pair_emb = tf.concat([target_emb_tiled, antecedent_emb, similarity_emb, feature_emb], 2)
        # [num_mentions, max_ant, emb]

        with tf.variable_scope("iteration"):
            with tf.variable_scope("antecedent_scoring"):
                antecedent_scores = utils.ffnn(pair_emb, self.opt["ffnn_depth"], self.opt["ffnn_size"], 1,
                                               self.dropout)  # [num_mentions, max_ant, 1]
        antecedent_scores = tf.squeeze(antecedent_scores, 2)  # [num_mentions, max_ant]

        antecedent_mask = tf.log(
            tf.sequence_mask(antecedents_len, max_antecedents, dtype=tf.float64))  # [num_mentions, max_ant]
        antecedent_scores += antecedent_mask  # [num_mentions, max_ant]

        antecedent_scores += tf.expand_dims(mention_scores, 1) + tf.gather(mention_scores,
                                                                           antecedents)  # [num_mentions, max_ant]
        antecedent_scores = tf.concat([tf.zeros([utils.shape(mention_scores, 0), 1], dtype=tf.float64),
                                       antecedent_scores],
                                      1)  # [num_mentions, max_ant + 1]
        return antecedent_scores  # [num_mentions, max_ant + 1]
github deepmipt / kpi2017 / deeppavlov / agents / coreference / models.py View on Github external
genre_emb):
        """
        Forms a new tensor using special features, mentions embeddings, mentions scores, etc.
        and passes it through a fully-connected network that compute antecedent scores.
        Args:
            mention_emb: [num_mentions, emb], a tensor that contains of embeddings of specific mentions
            mention_scores: [num_mentions, 1], Output of the fully-connected network, that compute the mentions scores.
            antecedents: [] get from C++ function
            antecedents_len: [] get from C++ function
            mention_speaker_ids: [num_mentions, speaker_emb_size], tf.float64, Speaker IDs.
            genre_emb: [genre_emb_size], tf.float64, Genre

        Returns: tf.float64, [num_mentions, max_ant + 1], antecedent scores.

        """
        num_mentions = utils.shape(mention_emb, 0)
        max_antecedents = utils.shape(antecedents, 1)

        feature_emb_list = []

        if self.opt["use_metadata"]:
            antecedent_speaker_ids = tf.gather(mention_speaker_ids, antecedents)  # [num_mentions, max_ant]
            same_speaker = tf.equal(tf.expand_dims(mention_speaker_ids, 1),
                                    antecedent_speaker_ids)  # [num_mentions, max_ant]
            speaker_pair_emb = tf.gather(tf.get_variable("same_speaker_emb", [2, self.opt["feature_size"]],
                                                         dtype=tf.float64),
                                         tf.to_int32(same_speaker))  # [num_mentions, max_ant, emb]
            feature_emb_list.append(speaker_pair_emb)

            tiled_genre_emb = tf.tile(tf.expand_dims(tf.expand_dims(genre_emb, 0), 0),
                                      [num_mentions, max_antecedents, 1])  # [num_mentions, max_ant, emb]
            feature_emb_list.append(tiled_genre_emb)
github deepmipt / kpi2017 / deeppavlov / agents / coreference / models.py View on Github external
# assert gold_ends.shape == gold_starts.shape,\
        #     ('Amount of starts and ends of gold mentions are not equal: '
        #      'Length of gold starts: {1}; Length of gold ends: {0}'.format(gold_ends.shape, gold_starts.shape))

        num_sentences = tf.shape(word_emb)[0]
        max_sentence_length = tf.shape(word_emb)[1]

        text_emb_list = [word_emb]

        if self.opt["char_embedding_size"] > 0:
            char_emb = tf.gather(
                tf.get_variable("char_embeddings", [len(self.char_dict), self.opt["char_embedding_size"]],
                                dtype=tf.float64),
                char_index)  # [num_sentences, max_sentence_length, max_word_length, emb]
            flattened_char_emb = tf.reshape(char_emb, [num_sentences * max_sentence_length, utils.shape(char_emb, 2),
                                                       utils.shape(char_emb,
                                                                   3)])
            # [num_sentences * max_sentence_length, max_word_length, emb]

            flattened_aggregated_char_emb = utils.cnn(flattened_char_emb, self.opt["filter_widths"], self.opt[
                "filter_size"])  # [num_sentences * max_sentence_length, emb]
            aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb,
                                             [num_sentences,
                                              max_sentence_length,
                                              utils.shape(flattened_aggregated_char_emb, 1)])
            # [num_sentences, max_sentence_length, emb]

            text_emb_list.append(aggregated_char_emb)

        text_emb = tf.concat(text_emb_list, 2)
        text_emb = tf.nn.dropout(text_emb, self.lexical_dropout)
github deepmipt / kpi2017 / deeppavlov / agents / coreference / models.py View on Github external
antecedent_scores], loss
        List of predictions and scores, and Loss function value
        """
        self.dropout = 1 - (tf.cast(is_training, tf.float64) * self.opt["dropout_rate"])
        self.lexical_dropout = 1 - (tf.cast(is_training, tf.float64) * self.opt["lexical_dropout_rate"])

        num_sentences = tf.shape(word_emb)[0]
        max_sentence_length = tf.shape(word_emb)[1]

        text_emb_list = [word_emb]

        if self.opt["char_embedding_size"] > 0:
            char_emb = tf.gather(
                tf.get_variable("char_embeddings", [len(self.char_dict), self.opt["char_embedding_size"]]),
                char_index, tf.float64)  # [num_sentences, max_sentence_length, max_word_length, emb]
            flattened_char_emb = tf.reshape(char_emb, [num_sentences * max_sentence_length, utils.shape(char_emb, 2),
                                                       utils.shape(char_emb, 3)])
            # [num_sentences * max_sentence_length, max_word_length, emb]

            flattened_aggregated_char_emb = utils.cnn(flattened_char_emb, self.opt["filter_widths"], self.opt[
                "filter_size"])  # [num_sentences * max_sentence_length, emb]
            aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb,
                                             [num_sentences,
                                              max_sentence_length,
                                              utils.shape(flattened_aggregated_char_emb, 1)])
            # [num_sentences, max_sentence_length, emb]

            text_emb_list.append(aggregated_char_emb)

        text_emb = tf.concat(text_emb_list, 2)
        text_emb = tf.nn.dropout(text_emb, self.lexical_dropout)