How to use the parl.layers function in parl

To help you get started, we’ve selected a few parl examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github PaddlePaddle / PARL / examples / NeurIPS2018-AI-for-Prosthetics-Challenge / final_submit / submit_model.py View on Github external
def _define_program(self):
        self.ensemble_predict_program = fluid.Program()
        self.startup_program = fluid.Program()
        with fluid.program_guard(self.ensemble_predict_program,
                                 self.startup_program):
            obs = layers.data(name='obs', shape=[OBS_DIM], dtype='float32')
            action = self._ensemble_predict(obs)
            self.ensemble_predict_output = [action]
github PaddlePaddle / PARL / examples / A2C / atari_model.py View on Github external
def __init__(self, act_dim):

        self.conv1 = layers.conv2d(
            num_filters=32, filter_size=8, stride=4, padding=1, act='relu')
        self.conv2 = layers.conv2d(
            num_filters=64, filter_size=4, stride=2, padding=2, act='relu')
        self.conv3 = layers.conv2d(
            num_filters=64, filter_size=3, stride=1, padding=0, act='relu')

        self.fc = layers.fc(size=512, act='relu')

        self.policy_fc = layers.fc(size=act_dim)
        self.value_fc = layers.fc(size=1)
github PaddlePaddle / PARL / examples / ES / mujoco_model.py View on Github external
def __init__(self, act_dim):
        hid1_size = 256
        hid2_size = 256

        self.fc1 = layers.fc(size=hid1_size, act='tanh')
        self.fc2 = layers.fc(size=hid2_size, act='tanh')
        self.fc3 = layers.fc(size=act_dim)
github PaddlePaddle / PARL / examples / NeurIPS2018-AI-for-Prosthetics-Challenge / multi_head_ddpg.py View on Github external
def _critic_learn(self, obs, action, reward, next_obs, terminal, critic_lr,
                      model_id):
        next_action = self.target_models[model_id].policy(next_obs)
        next_Q = self.target_models[model_id].value(next_obs, next_action)

        terminal = layers.cast(terminal, dtype='float32')
        target_Q = reward + (1.0 - terminal) * self.gamma * next_Q
        target_Q.stop_gradient = True

        Q = self.models[model_id].value(obs, action)
        cost = layers.square_error_cost(Q, target_Q)
        cost = layers.reduce_mean(cost)
        optimizer = fluid.optimizer.AdamOptimizer(critic_lr)
        optimizer.minimize(cost)
        return cost
github PaddlePaddle / PARL / examples / DDPG / mujoco_agent.py View on Github external
def build_program(self):
        self.pred_program = fluid.Program()
        self.learn_program = fluid.Program()

        with fluid.program_guard(self.pred_program):
            obs = layers.data(
                name='obs', shape=[self.obs_dim], dtype='float32')
            self.pred_act = self.alg.predict(obs)

        with fluid.program_guard(self.learn_program):
            obs = layers.data(
                name='obs', shape=[self.obs_dim], dtype='float32')
            act = layers.data(
                name='act', shape=[self.act_dim], dtype='float32')
            reward = layers.data(name='reward', shape=[], dtype='float32')
            next_obs = layers.data(
                name='next_obs', shape=[self.obs_dim], dtype='float32')
            terminal = layers.data(name='terminal', shape=[], dtype='bool')
            _, self.critic_cost = self.alg.learn(obs, act, reward, next_obs,
                                                 terminal)
github PaddlePaddle / PARL / examples / NeurIPS2018-AI-for-Prosthetics-Challenge / multi_head_ddpg.py View on Github external
def ensemble_predict(self, obs):
        """ ensemble predict:
        1. For actions of all actors, each critic will score them
           and normalize its scores;
        2. For each actor, will calculate its score by 
           average scores given by all critics
        3. choose action of the actor whose score is best
        """
        actor_outputs = []
        for i in range(self.ensemble_num):
            actor_outputs.append(self.models[i].policy(obs))
        batch_actions = layers.concat(actor_outputs, axis=0)
        batch_obs = layers.expand(obs, expand_times=[self.ensemble_num, 1])

        critic_outputs = []
        for i in range(self.ensemble_num):
            critic_output = self.models[i].value(batch_obs, batch_actions)
            critic_output = layers.unsqueeze(critic_output, axes=[1])
            critic_outputs.append(critic_output)
        score_matrix = layers.concat(critic_outputs, axis=1)

        # Normalize scores given by each critic
        sum_critic_score = layers.reduce_sum(
            score_matrix, dim=0, keep_dim=True)
        sum_critic_score = layers.expand(
            sum_critic_score, expand_times=[self.ensemble_num, 1])
        norm_score_matrix = score_matrix / sum_critic_score
github PaddlePaddle / PARL / examples / A2C / atari_agent.py View on Github external
def build_program(self):
        self.sample_program = fluid.Program()
        self.predict_program = fluid.Program()
        self.value_program = fluid.Program()
        self.learn_program = fluid.Program()

        with fluid.program_guard(self.sample_program):
            obs = layers.data(
                name='obs', shape=self.obs_shape, dtype='float32')
            sample_actions, values = self.alg.sample(obs)
            self.sample_outputs = [sample_actions, values]

        with fluid.program_guard(self.predict_program):
            obs = layers.data(
                name='obs', shape=self.obs_shape, dtype='float32')
            self.predict_actions = self.alg.predict(obs)

        with fluid.program_guard(self.value_program):
            obs = layers.data(
                name='obs', shape=self.obs_shape, dtype='float32')
            self.values = self.alg.value(obs)

        with fluid.program_guard(self.learn_program):
            obs = layers.data(