How to use mlagents - 10 common examples

To help you get started, we’ve selected a few mlagents examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Unity-Technologies / ml-agents / ml-agents / mlagents / trainers / sac / models.py View on Github external
)
        # We assume m_size is divisible by 4
        # Create the non-Policy inputs
        # Use a default placeholder here so nothing has to be provided during
        # Barracuda inference. Note that the default value is just the tiled input
        # for the policy, which is thrown away.
        three_fourths_m_size = m_size * 3 // 4
        self.other_memory_in = tf.placeholder_with_default(
            input=tf.tile(self.inference_memory_in, [1, 3]),
            shape=[None, three_fourths_m_size],
            name="other_recurrent_in",
        )

        # Concat and use this as the "placeholder"
        # for training
        self.memory_in = tf.concat(
            [self.other_memory_in, self.inference_memory_in], axis=1
        )

        # Re-break-up for each network
        num_mems = 4
        mem_ins = []
        for i in range(num_mems):
            _start = m_size // num_mems * i
            _end = m_size // num_mems * (i + 1)
            mem_ins.append(self.memory_in[:, _start:_end])
        self.value_memory_in = mem_ins[0]
        self.q1_memory_in = mem_ins[1]
        self.q2_memory_in = mem_ins[2]
        self.policy_memory_in = mem_ins[3]
github StepNeverStop / RLs / mlagents / trainers / sac / models.py View on Github external
)
        # We assume m_size is divisible by 4
        # Create the non-Policy inputs
        # Use a default placeholder here so nothing has to be provided during
        # Barracuda inference. Note that the default value is just the tiled input
        # for the policy, which is thrown away.
        three_fourths_m_size = m_size * 3 // 4
        self.other_memory_in = tf.placeholder_with_default(
            input=tf.tile(self.inference_memory_in, [1, 3]),
            shape=[None, three_fourths_m_size],
            name="other_recurrent_in",
        )

        # Concat and use this as the "placeholder"
        # for training
        self.memory_in = tf.concat(
            [self.other_memory_in, self.inference_memory_in], axis=1
        )

        # Re-break-up for each network
        num_mems = 4
        mem_ins = []
        for i in range(num_mems):
            _start = m_size // num_mems * i
            _end = m_size // num_mems * (i + 1)
            mem_ins.append(self.memory_in[:, _start:_end])
        self.value_memory_in = mem_ins[0]
        self.q1_memory_in = mem_ins[1]
        self.q2_memory_in = mem_ins[2]
        self.policy_memory_in = mem_ins[3]
github Unity-Technologies / ml-agents / ml-agents / mlagents / trainers / ppo / models.py View on Github external
hidden = hidden_streams[0]

        if self.use_recurrent:
            self.prev_action = tf.placeholder(
                shape=[None, len(self.act_size)], dtype=tf.int32, name="prev_action"
            )
            prev_action_oh = tf.concat(
                [
                    tf.one_hot(self.prev_action[:, i], self.act_size[i])
                    for i in range(len(self.act_size))
                ],
                axis=1,
            )
            hidden = tf.concat([hidden, prev_action_oh], axis=1)

            self.memory_in = tf.placeholder(
                shape=[None, self.m_size], dtype=tf.float32, name="recurrent_in"
            )
            hidden, memory_out = self.create_recurrent_encoder(
                hidden, self.memory_in, self.sequence_length
            )
            self.memory_out = tf.identity(memory_out, name="recurrent_out")

        policy_branches = []
        for size in self.act_size:
            policy_branches.append(
                tf.layers.dense(
                    hidden,
                    size,
                    activation=None,
                    use_bias=False,
                    kernel_initializer=LearningModel.scaled_init(0.01),
github StepNeverStop / RLs / mlagents / trainers / ppo / models.py View on Github external
policy_branches = []
        for size in self.act_size:
            policy_branches.append(
                tf.layers.dense(
                    hidden,
                    size,
                    activation=None,
                    use_bias=False,
                    kernel_initializer=LearningModel.scaled_init(0.01),
                )
            )

        self.all_log_probs = tf.concat(policy_branches, axis=1, name="action_probs")

        self.action_masks = tf.placeholder(
            shape=[None, sum(self.act_size)], dtype=tf.float32, name="action_masks"
        )
        output, _, normalized_logits = self.create_discrete_action_masking_layer(
            self.all_log_probs, self.action_masks, self.act_size
        )

        self.output = tf.identity(output)
        self.normalized_logits = tf.identity(normalized_logits, name="action")

        self.create_value_heads(self.stream_names, hidden)

        self.action_holder = tf.placeholder(
            shape=[None, len(policy_branches)], dtype=tf.int32, name="action_holder"
        )
        self.action_oh = tf.concat(
            [
github StepNeverStop / RLs / mlagents / trainers / tensorflow_to_barracuda.py View on Github external
]
                inputs_to_op_nodes = list(
                    flatten([list(flatten(n.input)) for n in op_nodes])
                )
                inputs_to_op_nodes = replace_strings_in_list(
                    inputs_to_op_nodes, map_ignored_layer_to_its_input
                )
                inputs_to_op_nodes = [i.split(":")[0] for i in inputs_to_op_nodes]

                const_nodes_by_name = {n.name: n for n in const_nodes}
                tensors = []
                for i in inputs_to_op_nodes:
                    if i in model_tensors:
                        src = model_tensors[i]
                        tensors += [
                            Struct(
                                name=i,
                                obj=src,
                                shape=get_tensor_dims(src),
                                data=get_tensor_data(src),
                            )
                        ]
                    elif i in const_nodes_by_name:
                        src = const_nodes_by_name[i].attr["value"].tensor
                        tensors += [
                            Struct(
                                name=i,
                                obj=src,
                                shape=get_tensor_dims(src),
                                data=get_tensor_data(src),
                            )
                        ]
github Unity-Technologies / marathon-envs / ml-agents / mlagents / trainers / tensorflow_to_barracuda.py View on Github external
]
                inputs_to_op_nodes = list(
                    flatten([list(flatten(n.input)) for n in op_nodes])
                )
                inputs_to_op_nodes = replace_strings_in_list(
                    inputs_to_op_nodes, map_ignored_layer_to_its_input
                )
                inputs_to_op_nodes = [i.split(":")[0] for i in inputs_to_op_nodes]

                const_nodes_by_name = {n.name: n for n in const_nodes}
                tensors = []
                for i in inputs_to_op_nodes:
                    if i in model_tensors:
                        src = model_tensors[i]
                        tensors += [
                            Struct(
                                name=i,
                                obj=src,
                                shape=get_tensor_dims(src),
                                data=get_tensor_data(src),
                            )
                        ]
                    elif i in const_nodes_by_name:
                        src = const_nodes_by_name[i].attr["value"].tensor
                        tensors += [
                            Struct(
                                name=i,
                                obj=src,
                                shape=get_tensor_dims(src),
                                data=get_tensor_data(src),
                            )
                        ]
github StepNeverStop / RLs / mlagents / trainers / components / reward_signals / gail / model.py View on Github external
hidden_2,
                    self.z_size,
                    reuse=reuse,
                    name="gail_z_mean",
                    kernel_initializer=ModelUtils.scaled_init(0.01),
                )

                self.noise = tf.random_normal(tf.shape(z_mean), dtype=tf.float32)

                # Sampled latent code
                self.z = z_mean + self.z_sigma * self.noise * self.use_noise
                estimate_input = self.z
            else:
                estimate_input = hidden_2

            estimate = tf.layers.dense(
                estimate_input,
                1,
                activation=tf.nn.sigmoid,
                name="gail_d_estimate",
                reuse=reuse,
            )
            return estimate, z_mean, concat_input
github StepNeverStop / RLs / mlagents / trainers / components / reward_signals / gail / model.py View on Github external
name="gail_d_hidden_1",
                reuse=reuse,
            )

            hidden_2 = tf.layers.dense(
                hidden_1,
                self.h_size,
                activation=ModelUtils.swish,
                name="gail_d_hidden_2",
                reuse=reuse,
            )

            z_mean = None
            if self.use_vail:
                # Latent representation
                z_mean = tf.layers.dense(
                    hidden_2,
                    self.z_size,
                    reuse=reuse,
                    name="gail_z_mean",
                    kernel_initializer=ModelUtils.scaled_init(0.01),
                )

                self.noise = tf.random_normal(tf.shape(z_mean), dtype=tf.float32)

                # Sampled latent code
                self.z = z_mean + self.z_sigma * self.noise * self.use_noise
                estimate_input = self.z
            else:
                estimate_input = hidden_2

            estimate = tf.layers.dense(
github Unity-Technologies / ml-agents / ml-agents / mlagents / trainers / ppo / models.py View on Github external
axis=1,
            )
            hidden = tf.concat([hidden, prev_action_oh], axis=1)

            self.memory_in = tf.placeholder(
                shape=[None, self.m_size], dtype=tf.float32, name="recurrent_in"
            )
            hidden, memory_out = self.create_recurrent_encoder(
                hidden, self.memory_in, self.sequence_length
            )
            self.memory_out = tf.identity(memory_out, name="recurrent_out")

        policy_branches = []
        for size in self.act_size:
            policy_branches.append(
                tf.layers.dense(
                    hidden,
                    size,
                    activation=None,
                    use_bias=False,
                    kernel_initializer=LearningModel.scaled_init(0.01),
                )
            )

        self.all_log_probs = tf.concat(policy_branches, axis=1, name="action_probs")

        self.action_masks = tf.placeholder(
            shape=[None, sum(self.act_size)], dtype=tf.float32, name="action_masks"
        )
        output, _, normalized_logits = self.create_discrete_action_masking_layer(
            self.all_log_probs, self.action_masks, self.act_size
        )
github StepNeverStop / RLs / mlagents / trainers / sac / models.py View on Github external
self.policy_memory_in,
                self.sequence_length,
                name="lstm_policy",
            )
            self.policy_memory_out = memory_out
        with tf.variable_scope(scope):
            mu = tf.layers.dense(
                hidden_policy,
                self.act_size[0],
                activation=None,
                name="mu",
                kernel_initializer=LearningModel.scaled_init(0.01),
            )

            # Policy-dependent log_sigma_sq
            log_sigma_sq = tf.layers.dense(
                hidden_policy,
                self.act_size[0],
                activation=None,
                name="log_std",
                kernel_initializer=LearningModel.scaled_init(0.01),
            )

            self.log_sigma_sq = tf.clip_by_value(log_sigma_sq, LOG_STD_MIN, LOG_STD_MAX)

            sigma_sq = tf.exp(self.log_sigma_sq)

            # Do the reparameterization trick
            policy_ = mu + tf.random_normal(tf.shape(mu)) * sigma_sq

            _gauss_pre = -0.5 * (
                ((policy_ - mu) / (tf.exp(self.log_sigma_sq) + EPSILON)) ** 2