How to use the mlagents.tf_utils.tf.placeholder function in mlagents

To help you get started, we’ve selected a few mlagents examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Unity-Technologies / ml-agents / ml-agents / mlagents / trainers / ppo / models.py View on Github external
hidden = hidden_streams[0]

        if self.use_recurrent:
            self.prev_action = tf.placeholder(
                shape=[None, len(self.act_size)], dtype=tf.int32, name="prev_action"
            )
            prev_action_oh = tf.concat(
                [
                    tf.one_hot(self.prev_action[:, i], self.act_size[i])
                    for i in range(len(self.act_size))
                ],
                axis=1,
            )
            hidden = tf.concat([hidden, prev_action_oh], axis=1)

            self.memory_in = tf.placeholder(
                shape=[None, self.m_size], dtype=tf.float32, name="recurrent_in"
            )
            hidden, memory_out = self.create_recurrent_encoder(
                hidden, self.memory_in, self.sequence_length
            )
            self.memory_out = tf.identity(memory_out, name="recurrent_out")

        policy_branches = []
        for size in self.act_size:
            policy_branches.append(
                tf.layers.dense(
                    hidden,
                    size,
                    activation=None,
                    use_bias=False,
                    kernel_initializer=LearningModel.scaled_init(0.01),
github StepNeverStop / RLs / mlagents / trainers / ppo / models.py View on Github external
policy_branches = []
        for size in self.act_size:
            policy_branches.append(
                tf.layers.dense(
                    hidden,
                    size,
                    activation=None,
                    use_bias=False,
                    kernel_initializer=LearningModel.scaled_init(0.01),
                )
            )

        self.all_log_probs = tf.concat(policy_branches, axis=1, name="action_probs")

        self.action_masks = tf.placeholder(
            shape=[None, sum(self.act_size)], dtype=tf.float32, name="action_masks"
        )
        output, _, normalized_logits = self.create_discrete_action_masking_layer(
            self.all_log_probs, self.action_masks, self.act_size
        )

        self.output = tf.identity(output)
        self.normalized_logits = tf.identity(normalized_logits, name="action")

        self.create_value_heads(self.stream_names, hidden)

        self.action_holder = tf.placeholder(
            shape=[None, len(policy_branches)], dtype=tf.int32, name="action_holder"
        )
        self.action_oh = tf.concat(
            [
github StepNeverStop / RLs / mlagents / trainers / ppo / models.py View on Github external
self.all_log_probs = tf.concat(policy_branches, axis=1, name="action_probs")

        self.action_masks = tf.placeholder(
            shape=[None, sum(self.act_size)], dtype=tf.float32, name="action_masks"
        )
        output, _, normalized_logits = self.create_discrete_action_masking_layer(
            self.all_log_probs, self.action_masks, self.act_size
        )

        self.output = tf.identity(output)
        self.normalized_logits = tf.identity(normalized_logits, name="action")

        self.create_value_heads(self.stream_names, hidden)

        self.action_holder = tf.placeholder(
            shape=[None, len(policy_branches)], dtype=tf.int32, name="action_holder"
        )
        self.action_oh = tf.concat(
            [
                tf.one_hot(self.action_holder[:, i], self.act_size[i])
                for i in range(len(self.act_size))
            ],
            axis=1,
        )
        self.selected_actions = tf.stop_gradient(self.action_oh)

        self.all_old_log_probs = tf.placeholder(
            shape=[None, sum(self.act_size)], dtype=tf.float32, name="old_probabilities"
        )
        _, _, old_normalized_logits = self.create_discrete_action_masking_layer(
            self.all_old_log_probs, self.action_masks, self.act_size
github StepNeverStop / RLs / mlagents / trainers / components / reward_signals / gail / model.py View on Github external
Creates the input layers for the discriminator
        """
        self.done_expert_holder = tf.placeholder(shape=[None], dtype=tf.float32)
        self.done_policy_holder = tf.placeholder(shape=[None], dtype=tf.float32)
        self.done_expert = tf.expand_dims(self.done_expert_holder, -1)
        self.done_policy = tf.expand_dims(self.done_policy_holder, -1)

        if self.policy.brain.vector_action_space_type == "continuous":
            action_length = self.policy.act_size[0]
            self.action_in_expert = tf.placeholder(
                shape=[None, action_length], dtype=tf.float32
            )
            self.expert_action = tf.identity(self.action_in_expert)
        else:
            action_length = len(self.policy.act_size)
            self.action_in_expert = tf.placeholder(
                shape=[None, action_length], dtype=tf.int32
            )
            self.expert_action = tf.concat(
                [
                    tf.one_hot(self.action_in_expert[:, i], act_size)
                    for i, act_size in enumerate(self.policy.act_size)
                ],
                axis=1,
            )

        encoded_policy_list = []
        encoded_expert_list = []

        if self.policy.vec_obs_size > 0:
            self.obs_in_expert = tf.placeholder(
                shape=[None, self.policy.vec_obs_size], dtype=tf.float32
github StepNeverStop / RLs / mlagents / trainers / ppo / models.py View on Github external
def create_dc_actor_critic(
        self, h_size: int, num_layers: int, vis_encode_type: EncoderType
    ) -> None:
        """
        Creates Discrete control actor-critic model.
        :param h_size: Size of hidden linear layers.
        :param num_layers: Number of hidden linear layers.
        """
        hidden_streams = self.create_observation_streams(
            1, h_size, num_layers, vis_encode_type
        )
        hidden = hidden_streams[0]

        if self.use_recurrent:
            self.prev_action = tf.placeholder(
                shape=[None, len(self.act_size)], dtype=tf.int32, name="prev_action"
            )
            prev_action_oh = tf.concat(
                [
                    tf.one_hot(self.prev_action[:, i], self.act_size[i])
                    for i in range(len(self.act_size))
                ],
                axis=1,
            )
            hidden = tf.concat([hidden, prev_action_oh], axis=1)

            self.memory_in = tf.placeholder(
                shape=[None, self.m_size], dtype=tf.float32, name="recurrent_in"
            )
            hidden, memory_out = self.create_recurrent_encoder(
                hidden, self.memory_in, self.sequence_length
github StepNeverStop / RLs / mlagents / trainers / components / bc / model.py View on Github external
def make_inputs(self) -> None:
        """
        Creates the input layers for the discriminator
        """
        self.done_expert = tf.placeholder(shape=[None, 1], dtype=tf.float32)
        self.done_policy = tf.placeholder(shape=[None, 1], dtype=tf.float32)

        if self.policy.brain.vector_action_space_type == "continuous":
            action_length = self.policy.act_size[0]
            self.action_in_expert = tf.placeholder(
                shape=[None, action_length], dtype=tf.float32
            )
            self.expert_action = tf.identity(self.action_in_expert)
        else:
            action_length = len(self.policy.act_size)
            self.action_in_expert = tf.placeholder(
                shape=[None, action_length], dtype=tf.int32
            )
            self.expert_action = tf.concat(
                [
                    tf.one_hot(self.action_in_expert[:, i], act_size)
                    for i, act_size in enumerate(self.policy.act_size)
                ],
                axis=1,
            )
github StepNeverStop / RLs / mlagents / trainers / components / reward_signals / curiosity / model.py View on Github external
"curiosity_stream_{}_visual_obs_encoder".format(i),
                    True,
                )
                visual_encoders.append(encoded_visual)
                next_visual_encoders.append(encoded_next_visual)

            hidden_visual = tf.concat(visual_encoders, axis=1)
            hidden_next_visual = tf.concat(next_visual_encoders, axis=1)
            encoded_state_list.append(hidden_visual)
            encoded_next_state_list.append(hidden_next_visual)

        if self.policy.vec_obs_size > 0:
            # Create the encoder ops for current and next vector input.
            # Note that these encoders are siamese.
            # Create input op for next (t+1) vector observation.
            self.next_vector_in = tf.placeholder(
                shape=[None, self.policy.vec_obs_size],
                dtype=tf.float32,
                name="curiosity_next_vector_observation",
            )

            encoded_vector_obs = ModelUtils.create_vector_observation_encoder(
                self.policy.vector_in,
                self.encoding_size,
                ModelUtils.swish,
                2,
                "curiosity_vector_obs_encoder",
                False,
            )
            encoded_next_vector_obs = ModelUtils.create_vector_observation_encoder(
                self.next_vector_in,
                self.encoding_size,
github StepNeverStop / RLs / mlagents / trainers / components / bc / model.py View on Github external
def make_inputs(self) -> None:
        """
        Creates the input layers for the discriminator
        """
        self.done_expert = tf.placeholder(shape=[None, 1], dtype=tf.float32)
        self.done_policy = tf.placeholder(shape=[None, 1], dtype=tf.float32)

        if self.policy.brain.vector_action_space_type == "continuous":
            action_length = self.policy.act_size[0]
            self.action_in_expert = tf.placeholder(
                shape=[None, action_length], dtype=tf.float32
            )
            self.expert_action = tf.identity(self.action_in_expert)
        else:
            action_length = len(self.policy.act_size)
            self.action_in_expert = tf.placeholder(
                shape=[None, action_length], dtype=tf.int32
            )
            self.expert_action = tf.concat(
                [
                    tf.one_hot(self.action_in_expert[:, i], act_size)
                    for i, act_size in enumerate(self.policy.act_size)
github Unity-Technologies / ml-agents / ml-agents / mlagents / trainers / ppo / models.py View on Github external
self.create_value_heads(self.stream_names, hidden)

        self.action_holder = tf.placeholder(
            shape=[None, len(policy_branches)], dtype=tf.int32, name="action_holder"
        )
        self.action_oh = tf.concat(
            [
                tf.one_hot(self.action_holder[:, i], self.act_size[i])
                for i in range(len(self.act_size))
            ],
            axis=1,
        )
        self.selected_actions = tf.stop_gradient(self.action_oh)

        self.all_old_log_probs = tf.placeholder(
            shape=[None, sum(self.act_size)], dtype=tf.float32, name="old_probabilities"
        )
        _, _, old_normalized_logits = self.create_discrete_action_masking_layer(
            self.all_old_log_probs, self.action_masks, self.act_size
        )

        action_idx = [0] + list(np.cumsum(self.act_size))

        self.entropy = tf.reduce_sum(
            (
                tf.stack(
                    [
                        tf.nn.softmax_cross_entropy_with_logits_v2(
                            labels=tf.nn.softmax(
                                self.all_log_probs[:, action_idx[i] : action_idx[i + 1]]
                            ),