How to use the mlagents.tf_utils.tf.assign function in mlagents

To help you get started, we’ve selected a few mlagents examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Unity-Technologies / ml-agents / ml-agents / mlagents / trainers / sac / models.py View on Github external
def create_sac_optimizers(self):
        """
        Creates the Adam optimizers and update ops for SAC, including
        the policy, value, and entropy updates, as well as the target network update.
        """
        policy_optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
        entropy_optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
        value_optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)

        self.target_update_op = [
            tf.assign(target, (1 - self.tau) * target + self.tau * source)
            for target, source in zip(
                self.target_network.value_vars, self.policy_network.value_vars
            )
        ]
        LOGGER.debug("value_vars")
        self.print_all_vars(self.policy_network.value_vars)
        LOGGER.debug("targvalue_vars")
        self.print_all_vars(self.target_network.value_vars)
        LOGGER.debug("critic_vars")
        self.print_all_vars(self.policy_network.critic_vars)
        LOGGER.debug("q_vars")
        self.print_all_vars(self.policy_network.q_vars)
        LOGGER.debug("policy_vars")
        self.print_all_vars(self.policy_network.policy_vars)

        self.target_init_op = [
github StepNeverStop / RLs / mlagents / trainers / components / reward_signals / gail / model.py View on Github external
def make_beta_update(self) -> None:
        """
        Creates the beta parameter and its updater for GAIL
        """

        new_beta = tf.maximum(
            self.beta + self.alpha * (self.kl_loss - self.mutual_information), EPSILON
        )
        with tf.control_dependencies([self.update_batch]):
            self.update_beta = tf.assign(self.beta, new_beta)
github StepNeverStop / RLs / mlagents / trainers / models.py View on Github external
steps_increment = tf.shape(vector_input)[0]
        total_new_steps = tf.add(steps, steps_increment)

        # Compute the incremental update and divide by the number of new steps.
        input_to_old_mean = tf.subtract(vector_input, running_mean)
        new_mean = running_mean + tf.reduce_sum(
            input_to_old_mean / tf.cast(total_new_steps, dtype=tf.float32), axis=0
        )
        # Compute difference of input to the new mean for Welford update
        input_to_new_mean = tf.subtract(vector_input, new_mean)
        new_variance = running_variance + tf.reduce_sum(
            input_to_new_mean * input_to_old_mean, axis=0
        )
        update_mean = tf.assign(running_mean, new_mean)
        update_variance = tf.assign(running_variance, new_variance)
        update_norm_step = tf.assign(steps, total_new_steps)
        return tf.group([update_mean, update_variance, update_norm_step])
github StepNeverStop / RLs / mlagents / trainers / sac / optimizer.py View on Github external
)
        ]
        logger.debug("value_vars")
        self.print_all_vars(self.policy_network.value_vars)
        logger.debug("targvalue_vars")
        self.print_all_vars(self.target_network.value_vars)
        logger.debug("critic_vars")
        self.print_all_vars(self.policy_network.critic_vars)
        logger.debug("q_vars")
        self.print_all_vars(self.policy_network.q_vars)
        logger.debug("policy_vars")
        policy_vars = self.policy.get_trainable_variables()
        self.print_all_vars(policy_vars)

        self.target_init_op = [
            tf.assign(target, source)
            for target, source in zip(
                self.target_network.value_vars, self.policy_network.value_vars
            )
        ]

        self.update_batch_policy = policy_optimizer.minimize(
            self.policy_loss, var_list=policy_vars
        )

        # Make sure policy is updated first, then value, then entropy.
        with tf.control_dependencies([self.update_batch_policy]):
            self.update_batch_value = value_optimizer.minimize(
                self.total_value_loss, var_list=self.policy_network.critic_vars
            )
            # Add entropy coefficient optimization operation
            with tf.control_dependencies([self.update_batch_value]):
github StepNeverStop / RLs / mlagents / trainers / sac / models.py View on Github external
def copy_normalization(self, mean, variance, steps):
        """
        Copies the mean, variance, and steps into the normalizers of the
        input of this SACNetwork. Used to copy the normalizer from the policy network
        to the target network.
        param mean: Tensor containing the mean.
        param variance: Tensor containing the variance
        param steps: Tensor containing the number of steps.
        """
        update_mean = tf.assign(self.running_mean, mean)
        update_variance = tf.assign(self.running_variance, variance)
        update_norm_step = tf.assign(self.normalization_steps, steps)
        return tf.group([update_mean, update_variance, update_norm_step])
github StepNeverStop / RLs / mlagents / trainers / models.py View on Github external
def create_global_steps():
        """Creates TF ops to track and increment global training step."""
        global_step = tf.Variable(
            0, name="global_step", trainable=False, dtype=tf.int32
        )
        steps_to_increment = tf.placeholder(
            shape=[], dtype=tf.int32, name="steps_to_increment"
        )
        increment_step = tf.assign(global_step, tf.add(global_step, steps_to_increment))
        return global_step, increment_step, steps_to_increment
github StepNeverStop / RLs / mlagents / trainers / sac / models.py View on Github external
def copy_normalization(self, mean, variance, steps):
        """
        Copies the mean, variance, and steps into the normalizers of the
        input of this SACNetwork. Used to copy the normalizer from the policy network
        to the target network.
        param mean: Tensor containing the mean.
        param variance: Tensor containing the variance
        param steps: Tensor containing the number of steps.
        """
        update_mean = tf.assign(self.running_mean, mean)
        update_variance = tf.assign(self.running_variance, variance)
        update_norm_step = tf.assign(self.normalization_steps, steps)
        return tf.group([update_mean, update_variance, update_norm_step])
github Unity-Technologies / ml-agents / ml-agents / mlagents / trainers / sac / models.py View on Github external
self.target_network.value_vars, self.policy_network.value_vars
            )
        ]
        LOGGER.debug("value_vars")
        self.print_all_vars(self.policy_network.value_vars)
        LOGGER.debug("targvalue_vars")
        self.print_all_vars(self.target_network.value_vars)
        LOGGER.debug("critic_vars")
        self.print_all_vars(self.policy_network.critic_vars)
        LOGGER.debug("q_vars")
        self.print_all_vars(self.policy_network.q_vars)
        LOGGER.debug("policy_vars")
        self.print_all_vars(self.policy_network.policy_vars)

        self.target_init_op = [
            tf.assign(target, source)
            for target, source in zip(
                self.target_network.value_vars, self.policy_network.value_vars
            )
        ]

        self.update_batch_policy = policy_optimizer.minimize(
            self.policy_loss, var_list=self.policy_network.policy_vars
        )

        # Make sure policy is updated first, then value, then entropy.
        with tf.control_dependencies([self.update_batch_policy]):
            self.update_batch_value = value_optimizer.minimize(
                self.total_value_loss, var_list=self.policy_network.critic_vars
            )
            # Add entropy coefficient optimization operation
            with tf.control_dependencies([self.update_batch_value]):