How to use the mlagents.trainers.models.ModelUtils function in mlagents

To help you get started, we’ve selected a few mlagents examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github StepNeverStop / RLs / mlagents / trainers / sac / optimizer.py View on Github external
vis_encode_type=vis_encode_type,
                )
                self.target_network = SACTargetNetwork(
                    policy=self.policy,
                    m_size=self.policy.m_size,  # 1x policy.m_size
                    h_size=h_size,
                    normalize=self.policy.normalize,
                    use_recurrent=self.policy.use_recurrent,
                    num_layers=num_layers,
                    stream_names=stream_names,
                    vis_encode_type=vis_encode_type,
                )
                # The optimizer's m_size is 3 times the policy (Q1, Q2, and Value)
                self.m_size = 3 * self.policy.m_size
                self._create_inputs_and_outputs()
                self.learning_rate = ModelUtils.create_schedule(
                    lr_schedule,
                    lr,
                    self.policy.global_step,
                    int(max_step),
                    min_value=1e-10,
                )
                self._create_losses(
                    self.policy_network.q1_heads,
                    self.policy_network.q2_heads,
                    lr,
                    int(max_step),
                    stream_names,
                    discrete=not self.policy.use_continuous_act,
                )
                self._create_sac_optimizer_ops()
github StepNeverStop / RLs / mlagents / trainers / models.py View on Github external
def get_encoder_for_type(encoder_type: EncoderType) -> EncoderFunction:
        ENCODER_FUNCTION_BY_TYPE = {
            EncoderType.SIMPLE: ModelUtils.create_visual_observation_encoder,
            EncoderType.NATURE_CNN: ModelUtils.create_nature_cnn_visual_observation_encoder,
            EncoderType.RESNET: ModelUtils.create_resnet_visual_observation_encoder,
        }
        return ENCODER_FUNCTION_BY_TYPE.get(
            encoder_type, ModelUtils.create_visual_observation_encoder
        )
github StepNeverStop / RLs / mlagents / trainers / components / reward_signals / gail / model.py View on Github external
Creates the encoder for the discriminator
        :param state_in: The encoded observation input
        :param action_in: The action input
        :param done_in: The done flags input
        :param reuse: If true, the weights will be shared with the previous encoder created
        """
        with tf.variable_scope("GAIL_model"):
            if self.use_actions:
                concat_input = tf.concat([state_in, action_in, done_in], axis=1)
            else:
                concat_input = state_in

            hidden_1 = tf.layers.dense(
                concat_input,
                self.h_size,
                activation=ModelUtils.swish,
                name="gail_d_hidden_1",
                reuse=reuse,
            )

            hidden_2 = tf.layers.dense(
                hidden_1,
                self.h_size,
                activation=ModelUtils.swish,
                name="gail_d_hidden_2",
                reuse=reuse,
            )

            z_mean = None
            if self.use_vail:
                # Latent representation
                z_mean = tf.layers.dense(
github StepNeverStop / RLs / mlagents / trainers / ppo / optimizer.py View on Github external
self.memory_out = memory_value_out
        else:
            hidden_value = hidden_stream

        self.value_heads, self.value = ModelUtils.create_value_heads(
            self.stream_names, hidden_value
        )

        self.all_old_log_probs = tf.placeholder(
            shape=[None, sum(self.policy.act_size)],
            dtype=tf.float32,
            name="old_probabilities",
        )

        # Break old log probs into separate branches
        old_log_prob_branches = ModelUtils.break_into_branches(
            self.all_old_log_probs, self.policy.act_size
        )

        _, _, old_normalized_logits = ModelUtils.create_discrete_action_masking_layer(
            old_log_prob_branches, self.policy.action_masks, self.policy.act_size
        )

        action_idx = [0] + list(np.cumsum(self.policy.act_size))

        self.old_log_probs = tf.reduce_sum(
            (
                tf.stack(
                    [
                        -tf.nn.softmax_cross_entropy_with_logits_v2(
                            labels=self.policy.selected_actions[
                                :, action_idx[i] : action_idx[i + 1]
github StepNeverStop / RLs / mlagents / trainers / components / reward_signals / curiosity / model.py View on Github external
"""
        Creates state encoders for current and future observations.
        Used for implementation of Curiosity-driven Exploration by Self-supervised Prediction
        See https://arxiv.org/abs/1705.05363 for more details.
        :return: current and future state encoder tensors.
        """
        encoded_state_list = []
        encoded_next_state_list = []

        if self.policy.vis_obs_size > 0:
            self.next_visual_in = []
            visual_encoders = []
            next_visual_encoders = []
            for i in range(self.policy.vis_obs_size):
                # Create input ops for next (t+1) visual observations.
                next_visual_input = ModelUtils.create_visual_input(
                    self.policy.brain.camera_resolutions[i],
                    name="curiosity_next_visual_observation_" + str(i),
                )
                self.next_visual_in.append(next_visual_input)

                # Create the encoder ops for current and next visual input.
                # Note that these encoders are siamese.
                encoded_visual = ModelUtils.create_visual_observation_encoder(
                    self.policy.visual_in[i],
                    self.encoding_size,
                    ModelUtils.swish,
                    1,
                    "curiosity_stream_{}_visual_obs_encoder".format(i),
                    False,
                )
github StepNeverStop / RLs / mlagents / trainers / models.py View on Github external
def get_encoder_for_type(encoder_type: EncoderType) -> EncoderFunction:
        ENCODER_FUNCTION_BY_TYPE = {
            EncoderType.SIMPLE: ModelUtils.create_visual_observation_encoder,
            EncoderType.NATURE_CNN: ModelUtils.create_nature_cnn_visual_observation_encoder,
            EncoderType.RESNET: ModelUtils.create_resnet_visual_observation_encoder,
        }
        return ENCODER_FUNCTION_BY_TYPE.get(
            encoder_type, ModelUtils.create_visual_observation_encoder
        )
github StepNeverStop / RLs / mlagents / trainers / policy / nn_policy.py View on Github external
self,
        visual_in: List[tf.Tensor],
        vector_in: tf.Tensor,
        h_size: int,
        num_layers: int,
        vis_encode_type: EncoderType,
    ) -> tf.Tensor:
        """
        Creates an encoder for visual and vector observations.
        :param h_size: Size of hidden linear layers.
        :param num_layers: Number of hidden linear layers.
        :param vis_encode_type: Type of visual encoder to use if visual input.
        :return: The hidden layer (tf.Tensor) after the encoder.
        """
        with tf.variable_scope("policy"):
            encoded = ModelUtils.create_observation_streams(
                self.visual_in,
                self.processed_vector_in,
                1,
                h_size,
                num_layers,
                vis_encode_type,
            )[0]
        return encoded
github StepNeverStop / RLs / mlagents / trainers / models.py View on Github external
num_streams: int,
        h_size: int,
        num_layers: int,
        vis_encode_type: EncoderType = EncoderType.SIMPLE,
        stream_scopes: List[str] = None,
    ) -> List[tf.Tensor]:
        """
        Creates encoding stream for observations.
        :param num_streams: Number of streams to create.
        :param h_size: Size of hidden linear layers in stream.
        :param num_layers: Number of hidden linear layers in stream.
        :param stream_scopes: List of strings (length == num_streams), which contains
            the scopes for each of the streams. None if all under the same TF scope.
        :return: List of encoded streams.
        """
        activation_fn = ModelUtils.swish
        vector_observation_input = vector_in

        final_hiddens = []
        for i in range(num_streams):
            # Pick the encoder function based on the EncoderType
            create_encoder_func = ModelUtils.get_encoder_for_type(vis_encode_type)

            visual_encoders = []
            hidden_state, hidden_visual = None, None
            _scope_add = stream_scopes[i] if stream_scopes else ""
            if len(visual_in) > 0:
                for j, vis_in in enumerate(visual_in):
                    ModelUtils._check_resolution_for_encoder(vis_in, vis_encode_type)
                    encoded_visual = create_encoder_func(
                        vis_in,
                        h_size,
github StepNeverStop / RLs / mlagents / trainers / components / reward_signals / gail / model.py View on Github external
tf.one_hot(self.action_in_expert[:, i], act_size)
                    for i, act_size in enumerate(self.policy.act_size)
                ],
                axis=1,
            )

        encoded_policy_list = []
        encoded_expert_list = []

        if self.policy.vec_obs_size > 0:
            self.obs_in_expert = tf.placeholder(
                shape=[None, self.policy.vec_obs_size], dtype=tf.float32
            )
            if self.policy.normalize:
                encoded_expert_list.append(
                    ModelUtils.normalize_vector_obs(
                        self.obs_in_expert,
                        self.policy.running_mean,
                        self.policy.running_variance,
                        self.policy.normalization_steps,
                    )
                )
                encoded_policy_list.append(self.policy.processed_vector_in)
            else:
                encoded_expert_list.append(self.obs_in_expert)
                encoded_policy_list.append(self.policy.vector_in)

        if self.policy.vis_obs_size > 0:
            self.expert_visual_in: List[tf.Tensor] = []
            visual_policy_encoders = []
            visual_expert_encoders = []
            for i in range(self.policy.vis_obs_size):
github StepNeverStop / RLs / mlagents / trainers / components / reward_signals / curiosity / model.py View on Github external
def create_forward_model(
        self, encoded_state: tf.Tensor, encoded_next_state: tf.Tensor
    ) -> None:
        """
        Creates forward model TensorFlow ops for Curiosity module.
        Predicts encoded future state based on encoded current state and given action.
        :param encoded_state: Tensor corresponding to encoded current state.
        :param encoded_next_state: Tensor corresponding to encoded next state.
        """
        combined_input = tf.concat(
            [encoded_state, self.policy.selected_actions], axis=1
        )
        hidden = tf.layers.dense(combined_input, 256, activation=ModelUtils.swish)
        pred_next_state = tf.layers.dense(
            hidden,
            self.encoding_size
            * (self.policy.vis_obs_size + int(self.policy.vec_obs_size > 0)),
            activation=None,
        )
        squared_difference = 0.5 * tf.reduce_sum(
            tf.squared_difference(pred_next_state, encoded_next_state), axis=1
        )
        self.intrinsic_reward = squared_difference
        self.forward_loss = tf.reduce_mean(
            tf.dynamic_partition(squared_difference, self.policy.mask, 2)[1]
        )