How to use the mlagents.trainers.models.LearningModel.__init__ function in mlagents

To help you get started, we’ve selected a few mlagents examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Unity-Technologies / ml-agents / ml-agents / mlagents / trainers / sac / models.py View on Github external
:param num_layers: Number of hidden layers between encoded input and policy & value layers
        :param tau: Strength of soft-Q update.
        :param m_size: Size of brain memory.
        """
        self.tau = tau
        self.gammas = gammas
        self.brain = brain
        self.init_entcoef = init_entcoef
        if stream_names is None:
            stream_names = []
        # Use to reduce "survivor bonus" when using Curiosity or GAIL.
        self.use_dones_in_backup = {name: tf.Variable(1.0) for name in stream_names}
        self.disable_use_dones = {
            name: self.use_dones_in_backup[name].assign(0.0) for name in stream_names
        }
        LearningModel.__init__(
            self, m_size, normalize, use_recurrent, brain, seed, stream_names
        )
        if num_layers < 1:
            num_layers = 1

        self.target_init_op: List[tf.Tensor] = []
        self.target_update_op: List[tf.Tensor] = []
        self.update_batch_policy: Optional[tf.Operation] = None
        self.update_batch_value: Optional[tf.Operation] = None
        self.update_batch_entropy: Optional[tf.Operation] = None

        self.policy_network = SACPolicyNetwork(
            brain=brain,
            m_size=m_size,
            h_size=h_size,
            normalize=normalize,
github dtransposed / Reinforcement-Learning-With-Unity-G.E.A.R / ml-agents / __backup / trainers / ppo / models.py View on Github external
Takes a Unity environment and model-specific hyper-parameters and returns the
        appropriate PPO agent model for the environment.
        :param brain: BrainInfo used to generate specific network graph.
        :param lr: Learning rate.
        :param h_size: Size of hidden layers
        :param epsilon: Value for policy-divergence threshold.
        :param beta: Strength of entropy regularization.
        :return: a sub-class of PPOAgent tailored to the environment.
        :param max_step: Total number of training steps.
        :param normalize: Whether to normalize vector observation input.
        :param use_recurrent: Whether to use an LSTM layer in the network.
        :param num_layers Number of hidden layers between encoded input and policy & value layers
        :param m_size: Size of brain memory.
        """
        with tf.variable_scope(scope):
            LearningModel.__init__(self, m_size, normalize, use_recurrent, brain, seed)
            self.use_curiosity = use_curiosity
            if num_layers < 1:
                num_layers = 1
            self.last_reward, self.new_reward, self.update_reward = self.create_reward_encoder()
            if brain.vector_action_space_type == "continuous":
                self.create_cc_actor_critic(h_size, num_layers)
                self.entropy = tf.ones_like(tf.reshape(self.value, [-1])) * self.entropy
            else:
                self.create_dc_actor_critic(h_size, num_layers)
            if self.use_curiosity:
                self.curiosity_enc_size = curiosity_enc_size
                self.curiosity_strength = curiosity_strength
                encoded_state, encoded_next_state = self.create_curiosity_encoders()
                self.create_inverse_model(encoded_state, encoded_next_state)
                self.create_forward_model(encoded_state, encoded_next_state)
            self.create_ppo_optimizer(self.log_probs, self.old_log_probs, self.value,
github dtransposed / Reinforcement-Learning-With-Unity-G.E.A.R / ml-agents / trainers / bc / models.py View on Github external
def __init__(self, brain, h_size=128, lr=1e-4, n_layers=2, m_size=128,
                 normalize=False, use_recurrent=False, scope='PPO', seed=0):
        with tf.variable_scope(scope):
            LearningModel.__init__(self, m_size, normalize, use_recurrent, brain, seed)
            num_streams = 1
            hidden_streams = self.create_observation_streams(num_streams, h_size, n_layers)
            hidden = hidden_streams[0]
            self.dropout_rate = tf.placeholder(dtype=tf.float32, shape=[], name="dropout_rate")
            hidden_reg = tf.layers.dropout(hidden, self.dropout_rate)
            if self.use_recurrent:
                tf.Variable(self.m_size, name="memory_size", trainable=False, dtype=tf.int32)
                self.memory_in = tf.placeholder(shape=[None, self.m_size], dtype=tf.float32, name='recurrent_in')
                hidden_reg, self.memory_out = self.create_recurrent_encoder(hidden_reg, self.memory_in,
                                                                            self.sequence_length)
                self.memory_out = tf.identity(self.memory_out, name='recurrent_out')

            if brain.vector_action_space_type == "discrete":
                policy_branches = []
                for size in self.act_size:
                    policy_branches.append(
github dtransposed / Reinforcement-Learning-With-Unity-G.E.A.R / ml-agents / __backup / trainers / bc / models.py View on Github external
def __init__(self, brain, h_size=128, lr=1e-4, n_layers=2, m_size=128,
                 normalize=False, use_recurrent=False, scope='PPO', seed=0):
        with tf.variable_scope(scope):
            LearningModel.__init__(self, m_size, normalize, use_recurrent, brain, seed)
            num_streams = 1
            hidden_streams = self.create_observation_streams(num_streams, h_size, n_layers)
            hidden = hidden_streams[0]
            self.dropout_rate = tf.placeholder(dtype=tf.float32, shape=[], name="dropout_rate")
            hidden_reg = tf.layers.dropout(hidden, self.dropout_rate)
            if self.use_recurrent:
                tf.Variable(self.m_size, name="memory_size", trainable=False, dtype=tf.int32)
                self.memory_in = tf.placeholder(shape=[None, self.m_size], dtype=tf.float32, name='recurrent_in')
                hidden_reg, self.memory_out = self.create_recurrent_encoder(hidden_reg, self.memory_in,
                                                                            self.sequence_length)
                self.memory_out = tf.identity(self.memory_out, name='recurrent_out')

            if brain.vector_action_space_type == "discrete":
                policy_branches = []
                for size in self.act_size:
                    policy_branches.append(
github Unity-Technologies / marathon-envs / ml-agents / mlagents / trainers / bc / models.py View on Github external
def __init__(self, brain, h_size=128, lr=1e-4, n_layers=2, m_size=128,
                 normalize=False, use_recurrent=False, seed=0):
        LearningModel.__init__(self, m_size, normalize, use_recurrent, brain, seed)
        num_streams = 1
        hidden_streams = self.create_observation_streams(num_streams, h_size, n_layers)
        hidden = hidden_streams[0]
        self.dropout_rate = tf.placeholder(dtype=tf.float32, shape=[], name="dropout_rate")
        hidden_reg = tf.layers.dropout(hidden, self.dropout_rate)
        if self.use_recurrent:
            tf.Variable(self.m_size, name="memory_size", trainable=False, dtype=tf.int32)
            self.memory_in = tf.placeholder(shape=[None, self.m_size], dtype=tf.float32, name='recurrent_in')
            hidden_reg, self.memory_out = self.create_recurrent_encoder(hidden_reg, self.memory_in,
                                                                        self.sequence_length)
            self.memory_out = tf.identity(self.memory_out, name='recurrent_out')

        if brain.vector_action_space_type == "discrete":
            policy_branches = []
            for size in self.act_size:
                policy_branches.append(
github Unity-Technologies / ml-agents / ml-agents / mlagents / trainers / ppo / models.py View on Github external
"""
        Takes a Unity environment and model-specific hyper-parameters and returns the
        appropriate PPO agent model for the environment.
        :param brain: BrainInfo used to generate specific network graph.
        :param lr: Learning rate.
        :param h_size: Size of hidden layers
        :param epsilon: Value for policy-divergence threshold.
        :param beta: Strength of entropy regularization.
        :return: a sub-class of PPOAgent tailored to the environment.
        :param max_step: Total number of training steps.
        :param normalize: Whether to normalize vector observation input.
        :param use_recurrent: Whether to use an LSTM layer in the network.
        :param num_layers Number of hidden layers between encoded input and policy & value layers
        :param m_size: Size of brain memory.
        """
        LearningModel.__init__(self, m_size, normalize, use_recurrent, brain, seed)
        self.use_curiosity = use_curiosity
        if num_layers < 1:
            num_layers = 1
        self.last_reward, self.new_reward, self.update_reward = (
            self.create_reward_encoder()
        )
        if brain.vector_action_space_type == "continuous":
            self.create_cc_actor_critic(h_size, num_layers)
            self.entropy = tf.ones_like(tf.reshape(self.value, [-1])) * self.entropy
        else:
            self.create_dc_actor_critic(h_size, num_layers)
        if self.use_curiosity:
            self.curiosity_enc_size = curiosity_enc_size
            self.curiosity_strength = curiosity_strength
            encoded_state, encoded_next_state = self.create_curiosity_encoders()
            self.create_inverse_model(encoded_state, encoded_next_state)
github Sohojoe / ActiveRagdollStyleTransfer / ml-agents / mlagents / trainers / bc / models.py View on Github external
def __init__(self, brain, h_size=128, lr=1e-4, n_layers=2, m_size=128,
                 normalize=False, use_recurrent=False, scope='PPO', seed=0):
        with tf.variable_scope(scope):
            LearningModel.__init__(self, m_size, normalize, use_recurrent, brain, seed)
            num_streams = 1
            hidden_streams = self.create_observation_streams(num_streams, h_size, n_layers)
            hidden = hidden_streams[0]
            self.dropout_rate = tf.placeholder(dtype=tf.float32, shape=[], name="dropout_rate")
            hidden_reg = tf.layers.dropout(hidden, self.dropout_rate)
            if self.use_recurrent:
                tf.Variable(self.m_size, name="memory_size", trainable=False, dtype=tf.int32)
                self.memory_in = tf.placeholder(shape=[None, self.m_size], dtype=tf.float32, name='recurrent_in')
                hidden_reg, self.memory_out = self.create_recurrent_encoder(hidden_reg, self.memory_in,
                                                                            self.sequence_length)
                self.memory_out = tf.identity(self.memory_out, name='recurrent_out')

            if brain.vector_action_space_type == "discrete":
                policy_branches = []
                for size in self.act_size:
                    policy_branches.append(