How to use the deeppavlov.core.layers.tf_layers.variational_dropout function in deeppavlov

To help you get started, we’ve selected a few deeppavlov examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github deepmipt / DeepPavlov / deeppavlov / models / go_bot / network.py View on Github external
_units = tf_layers.variational_dropout(_units,
                                               keep_prob=self._dropout_keep_prob)

        # recurrent network unit
        _lstm_cell = tf.nn.rnn_cell.LSTMCell(self.hidden_size)
        _utter_lengths = tf.cast(tf.reduce_sum(self._utterance_mask, axis=-1),
                                 tf.int32)
        # _output: [batch_size, max_time, hidden_size]
        # _state: tuple of two [batch_size, hidden_size]
        _output, _state = tf.nn.dynamic_rnn(_lstm_cell,
                                            _units,
                                            time_major=False,
                                            initial_state=self._initial_state,
                                            sequence_length=_utter_lengths)
        _output = tf.reshape(_output, (self._batch_size, -1, self.hidden_size))
        _output = tf_layers.variational_dropout(_output,
                                                keep_prob=self._dropout_keep_prob)
        # output projection
        _logits = tf.layers.dense(_output, self.action_size,
                                  kernel_regularizer=tf.nn.l2_loss,
                                  kernel_initializer=xav(), name='logits')
        return _logits, _state
github deepmipt / DeepPavlov / deeppavlov / models / ner / network.py View on Github external
def _build_rnn(self, units, n_hidden_list, cell_type, intra_layer_dropout, mask):
        sequence_lengths = tf.to_int32(tf.reduce_sum(mask, axis=1))
        for n, n_hidden in enumerate(n_hidden_list):
            units, _ = bi_rnn(units, n_hidden, cell_type=cell_type,
                              seq_lengths=sequence_lengths, name='Layer_' + str(n))
            units = tf.concat(units, -1)
            if intra_layer_dropout and n != len(n_hidden_list) - 1:
                units = variational_dropout(units, self._dropout_ph)
        return units
github deepmipt / DeepPavlov / deeppavlov / models / squad / squad_refactor.py View on Github external
if not self.use_reattention:
                    match = dot_attention(qc_att, qc_att, mask=self.c_mask, att_size=self.attention_hidden_size,
                                          keep_prob=self.keep_prob_ph, use_gate=self.use_gated_attention,
                                          drop_diag=self.drop_diag_self_att, use_transpose_att=False,
                                          concat_inputs=self.concat_att_inputs)
                else:
                    match, B = dot_reattention(qc_att, qc_att, memory_mask=self.c_mask,
                                               inputs_mask=self.c_mask, att_size=self.attention_hidden_size,
                                               E=B, B=B, drop_diag=self.drop_diag_self_att,
                                               keep_prob=self.keep_prob_ph, concat_inputs=self.concat_att_inputs)

                if self.use_highway_after_selfatt:
                    # Z
                    # match = tf.layers.batch_normalization(match, training=self.is_train_ph)
                    match = highway_layer(variational_dropout(qc_att, keep_prob=self.keep_prob_ph),
                                          variational_dropout(match, keep_prob=self.keep_prob_ph),
                                          use_combinations=True, regularizer=tf.nn.l2_loss)

                if self.use_birnn_after_selfatt:
                    # R
                    rnn = self.GRU(num_layers=self.num_match_layers, num_units=self.hidden_size, batch_size=self.bs,
                                   input_size=match.get_shape().as_list()[-1],
                                   keep_prob=self.keep_prob_ph, share_layers=self.share_layers)
                    match = rnn(match, seq_len=self.c_len, concat_layers=self.concat_bigru_outputs)

                context_representations.append(match)
        
        if self.number_of_hops == 1:
            final_context_repr = context_representations[-1]
        else:
            with tf.variable_scope('aggregation'):
                context_representations = tf.concat(context_representations, axis=-1)
github deepmipt / DeepPavlov / deeppavlov / models / ner / network.py View on Github external
def _build_cudnn_rnn(self, units, n_hidden_list, cell_type, intra_layer_dropout, mask):
        sequence_lengths = tf.to_int32(tf.reduce_sum(mask, axis=1))
        for n, n_hidden in enumerate(n_hidden_list):
            with tf.variable_scope(cell_type.upper() + '_' + str(n)):
                if cell_type.lower() == 'lstm':
                    units, _ = cudnn_bi_lstm(units, n_hidden, sequence_lengths)
                elif cell_type.lower() == 'gru':
                    units, _ = cudnn_bi_gru(units, n_hidden, sequence_lengths)
                else:
                    raise RuntimeError('Wrong cell type "{}"! Only "gru" and "lstm"!'.format(cell_type))
                units = tf.concat(units, -1)
                if intra_layer_dropout and n != len(n_hidden_list) - 1:
                    units = variational_dropout(units, self._dropout_ph)
            return units
github deepmipt / DeepPavlov / deeppavlov / models / go_bot / network.py View on Github external
_attn_output = am.light_general_attention(
                        self._key,
                        self._emb_context,
                        hidden_size=self.attn.hidden_size,
                        projected_align=self.attn.projected_align)
                elif self.attn.type == 'light_bahdanau':
                    _attn_output = am.light_bahdanau_attention(
                        self._key,
                        self._emb_context,
                        hidden_size=self.attn.hidden_size,
                        projected_align=self.attn.projected_align)
                else:
                    raise ValueError("wrong value for attention mechanism type")
            _units = tf.concat([_units, _attn_output], -1)

        _units = tf_layers.variational_dropout(_units,
                                               keep_prob=self._dropout_keep_prob)

        # recurrent network unit
        _lstm_cell = tf.nn.rnn_cell.LSTMCell(self.hidden_size)
        _utter_lengths = tf.cast(tf.reduce_sum(self._utterance_mask, axis=-1),
                                 tf.int32)
        # _output: [batch_size, max_time, hidden_size]
        # _state: tuple of two [batch_size, hidden_size]
        _output, _state = tf.nn.dynamic_rnn(_lstm_cell,
                                            _units,
                                            time_major=False,
                                            initial_state=self._initial_state,
                                            sequence_length=_utter_lengths)
        _output = tf.reshape(_output, (self._batch_size, -1, self.hidden_size))
        _output = tf_layers.variational_dropout(_output,
                                                keep_prob=self._dropout_keep_prob)
github deepmipt / DeepPavlov / deeppavlov / models / squad / squad_refactor.py View on Github external
for i in range(self.number_of_hops):
            with tf.variable_scope('co-attention_{}'.format(i)):
                if not self.use_reattention:
                    qc_att = dot_attention(context_representations[-1], q, mask=self.q_mask,
                                           att_size=self.attention_hidden_size, keep_prob=self.keep_prob_ph,
                                           use_gate=self.use_gated_attention, use_transpose_att=self.use_transpose_att,
                                           concat_inputs=self.concat_att_inputs)
                else:
                    qc_att, E = dot_reattention(context_representations[-1], q, memory_mask=self.q_mask,
                                                inputs_mask=self.c_mask, att_size=self.attention_hidden_size,
                                                E=E, B=B,
                                                keep_prob=self.keep_prob_ph, concat_inputs=self.concat_att_inputs)

                if self.use_highway_after_coatt:
                    # qc_att = tf.layers.batch_normalization(qc_att, training=self.is_train_ph)
                    qc_att = highway_layer(variational_dropout(context_representations[-1], keep_prob=self.keep_prob_ph),
                                           variational_dropout(qc_att, keep_prob=self.keep_prob_ph),
                                           use_combinations=True, regularizer=tf.nn.l2_loss)

                if self.use_birnn_after_coatt:
                    rnn = self.GRU(num_layers=self.num_match_layers, num_units=self.hidden_size, batch_size=self.bs,
                                   input_size=qc_att.get_shape().as_list()[-1],
                                   keep_prob=self.keep_prob_ph, share_layers=self.share_layers)
                    qc_att = rnn(qc_att, seq_len=self.c_len, concat_layers=self.concat_bigru_outputs)

            with tf.variable_scope('self-attention_{}'.format(i)):
                if not self.use_reattention:
                    match = dot_attention(qc_att, qc_att, mask=self.c_mask, att_size=self.attention_hidden_size,
                                          keep_prob=self.keep_prob_ph, use_gate=self.use_gated_attention,
                                          drop_diag=self.drop_diag_self_att, use_transpose_att=False,
                                          concat_inputs=self.concat_att_inputs)
                else:
github deepmipt / DeepPavlov / deeppavlov / models / squad / utils_refactor.py View on Github external
def dot_reattention(inputs, memory, memory_mask, inputs_mask, att_size, E=None, B=None, gamma_init=3, keep_prob=1.0,
                    drop_diag=False, concat_inputs=False, scope="dot_reattention"):
    # check reinforced mnemonic reader paper for more info about E, B and re-attention
    with tf.variable_scope(scope):
        BS, IL, IH = tf.unstack(tf.shape(inputs))
        BS, ML, MH = tf.unstack(tf.shape(memory))

        d_inputs = variational_dropout(inputs, keep_prob=keep_prob)
        d_memory = variational_dropout(memory, keep_prob=keep_prob)

        with tf.variable_scope("attention"):
            inputs_att = tf.layers.dense(d_inputs, att_size, use_bias=False,
                                         activation=tf.nn.relu,
                                         kernel_initializer=tf.contrib.layers.variance_scaling_initializer(),
                                         kernel_regularizer=tf.nn.l2_loss)
            memory_att = tf.layers.dense(d_memory, att_size, use_bias=False,
                                         activation=tf.nn.relu,
                                         kernel_initializer=tf.contrib.layers.variance_scaling_initializer(),
                                         kernel_regularizer=tf.nn.l2_loss)
            # BS x IL x ML
            logits = tf.matmul(inputs_att, tf.transpose(memory_att, [0, 2, 1])) / (att_size ** 0.5)

            if E is not None and B is not None:
                gamma = tf.Variable(gamma_init, dtype=tf.float32, trainable=True, name='gamma')
                E_softmax = tf.nn.softmax(softmax_mask(E,
github deepmipt / DeepPavlov / deeppavlov / models / squad / utils_refactor.py View on Github external
def mnemonic_reader_answer_selection(q, context_repr, q_mask, c_mask, att_hidden_size, keep_prob, with_poolings=False):
    q_mask = tf.cast(q_mask, tf.float32)
    q_att = simple_attention(q, att_hidden_size, mask=q_mask, keep_prob=keep_prob)
    if with_poolings:
        q_mask_expand = tf.expand_dims(q_mask, axis=-1)
        q_max_pool = tf.reduce_max(softmax_mask(q, mask=q_mask_expand), axis=1)
        q_avg_pool = tf.reduce_sum(q * q_mask_expand, axis=1) / tf.expand_dims(tf.reduce_sum(q_mask, axis=-1), axis=-1)
        init_state = tf.concat([q_att, q_max_pool, q_avg_pool], axis=-1)
    else:
        init_state = q_att
    state = tf.layers.dense(init_state, units=context_repr.get_shape().as_list()[-1],
                            kernel_regularizer=tf.nn.l2_loss)
    context_repr = variational_dropout(context_repr, keep_prob=keep_prob)
    state = tf.nn.dropout(state, keep_prob=keep_prob)
    att, logits_st = attention(context_repr, state, att_hidden_size, c_mask, use_combinations=True, scope='st_att')
    state = highway_layer(state, att, use_combinations=True, regularizer=tf.nn.l2_loss)
    state = tf.nn.dropout(state, keep_prob=keep_prob)
    _, logits_end = attention(context_repr, state, att_hidden_size, c_mask, use_combinations=True, scope='end_att')
    return logits_st, logits_end
github deepmipt / DeepPavlov / deeppavlov / models / seq2seq_go_bot / network_with_ner.py View on Github external
def _build_encoder(self, scope="Encoder"):
        with tf.variable_scope(scope):
            # _units: [batch_size, max_input_time, embedding_size]
            _units = variational_dropout(self._encoder_inputs, self._dropout_keep_prob)

            # _outputs: [batch_size, max_input_time, embedding_size, 2]
            # _state: [batch_size, hidden_size, 2]
            if self.encoder_use_cudnn:
                if (self.l2_regs[0] > 0) or (self.l2_regs[1] > 0):
                    log.warning("cuDNN RNN are not l2 regularizable")
                if self.encoder_cell_type.lower() == 'lstm':
                    _outputs, _state = cudnn_bi_lstm(_units,
                                                     self.hidden_size,
                                                     self._src_sequence_lengths)
                elif self.encoder_cell_type.lower() == 'gru':
                    _outputs, _state = cudnn_bi_gru(_units,
                                                    self.hidden_size,
                                                    self._src_sequence_lengths)
            else:
                _outputs, _state = bi_rnn(_units,