How to use the mlagents.tf_utils.tf function in mlagents

To help you get started, we’ve selected a few mlagents examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github StepNeverStop / RLs / mlagents / trainers / components / reward_signals / gail / model.py View on Github external
self.beta = tf.get_variable(
                "gail_beta",
                [],
                trainable=False,
                dtype=tf.float32,
                initializer=tf.ones_initializer(),
            )

        self.discriminator_loss = -tf.reduce_mean(
            tf.log(self.expert_estimate + EPSILON)
            + tf.log(1.0 - self.policy_estimate + EPSILON)
        )

        if self.use_vail:
            # KL divergence loss (encourage latent representation to be normal)
            self.kl_loss = tf.reduce_mean(
                -tf.reduce_sum(
                    1
                    + self.z_log_sigma_sq
                    - 0.5 * tf.square(self.z_mean_expert)
                    - 0.5 * tf.square(self.z_mean_policy)
                    - tf.exp(self.z_log_sigma_sq),
                    1,
                )
            )
            self.loss = (
                self.beta * (self.kl_loss - self.mutual_information)
                + self.discriminator_loss
            )
        else:
            self.loss = self.discriminator_loss
github StepNeverStop / RLs / mlagents / trainers / sac / models.py View on Github external
def create_sac_value_head(
        self, stream_names, hidden_input, num_layers, h_size, scope
    ):
        """
        Creates one value estimator head for each reward signal in stream_names.
        Also creates the node corresponding to the mean of all the value heads in self.value.
        self.value_head is a dictionary of stream name to node containing the value estimator head for that signal.
        :param stream_names: The list of reward signal names
        :param hidden_input: The last layer of the Critic. The heads will consist of one dense hidden layer on top
        of the hidden input.
        :param num_layers: Number of hidden layers for value network
        :param h_size: size of hidden layers for value network
        :param scope: TF scope for value network.
        """
        with tf.variable_scope(scope):
            value_hidden = self.create_vector_observation_encoder(
                hidden_input, h_size, self.activ_fn, num_layers, "encoder", False
            )
            if self.use_recurrent:
                value_hidden, memory_out = self.create_recurrent_encoder(
                    value_hidden,
                    self.value_memory_in,
                    self.sequence_length,
                    name="lstm_value",
                )
                self.value_memory_out = memory_out
            self.create_value_heads(stream_names, value_hidden)
github Unity-Technologies / ml-agents / ml-agents / mlagents / trainers / ppo / models.py View on Github external
self.returns_holders[name], clipped_value_estimate
            )
            value_loss = tf.reduce_mean(
                tf.dynamic_partition(tf.maximum(v_opt_a, v_opt_b), self.mask, 2)[1]
            )
            value_losses.append(value_loss)
        self.value_loss = tf.reduce_mean(value_losses)

        r_theta = tf.exp(probs - old_probs)
        p_opt_a = r_theta * advantage
        p_opt_b = (
            tf.clip_by_value(r_theta, 1.0 - decay_epsilon, 1.0 + decay_epsilon)
            * advantage
        )
        self.policy_loss = -tf.reduce_mean(
            tf.dynamic_partition(tf.minimum(p_opt_a, p_opt_b), self.mask, 2)[1]
        )
        # For cleaner stats reporting
        self.abs_policy_loss = tf.abs(self.policy_loss)

        self.loss = (
            self.policy_loss
            + 0.5 * self.value_loss
            - decay_beta
            * tf.reduce_mean(tf.dynamic_partition(entropy, self.mask, 2)[1])
        )
github StepNeverStop / RLs / mlagents / trainers / components / reward_signals / curiosity / model.py View on Github external
"""
        Creates inverse model TensorFlow ops for Curiosity module.
        Predicts action taken given current and future encoded states.
        :param encoded_state: Tensor corresponding to encoded current state.
        :param encoded_next_state: Tensor corresponding to encoded next state.
        """
        combined_input = tf.concat([encoded_state, encoded_next_state], axis=1)
        hidden = tf.layers.dense(combined_input, 256, activation=ModelUtils.swish)
        if self.policy.brain.vector_action_space_type == "continuous":
            pred_action = tf.layers.dense(
                hidden, self.policy.act_size[0], activation=None
            )
            squared_difference = tf.reduce_sum(
                tf.squared_difference(pred_action, self.policy.selected_actions), axis=1
            )
            self.inverse_loss = tf.reduce_mean(
                tf.dynamic_partition(squared_difference, self.policy.mask, 2)[1]
            )
        else:
            pred_action = tf.concat(
                [
                    tf.layers.dense(
                        hidden, self.policy.act_size[i], activation=tf.nn.softmax
                    )
                    for i in range(len(self.policy.act_size))
                ],
                axis=1,
            )
            cross_entropy = tf.reduce_sum(
                -tf.log(pred_action + 1e-10) * self.policy.selected_actions, axis=1
            )
            self.inverse_loss = tf.reduce_mean(
github StepNeverStop / RLs / mlagents / trainers / components / reward_signals / curiosity / model.py View on Github external
def __init__(
        self, policy: TFPolicy, encoding_size: int = 128, learning_rate: float = 3e-4
    ):
        """
        Creates the curiosity model for the Curiosity reward Generator
        :param policy: The policy being trained
        :param encoding_size: The size of the encoding for the Curiosity module
        :param learning_rate: The learning rate for the curiosity module
        """
        self.encoding_size = encoding_size
        self.policy = policy
        self.next_visual_in: List[tf.Tensor] = []
        encoded_state, encoded_next_state = self.create_curiosity_encoders()
        self.create_inverse_model(encoded_state, encoded_next_state)
        self.create_forward_model(encoded_state, encoded_next_state)
        self.create_loss(learning_rate)