How to use the tf2rl.algos.gail.GAIL function in tf2rl

To help you get started, we’ve selected a few tf2rl examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github keiohta / tf2rl / tests / algos / test_gail.py View on Github external
def setUpClass(cls):
        super().setUpClass()
        cls.irl_discrete = GAIL(
            state_shape=cls.discrete_env.observation_space.shape,
            action_dim=cls.discrete_env.action_space.n,
            gpu=-1)
        cls.irl_continuous = GAIL(
            state_shape=cls.continuous_env.observation_space.shape,
            action_dim=cls.continuous_env.action_space.low.size,
            gpu=-1)
github keiohta / tf2rl / tests / algos / test_gail.py View on Github external
def setUpClass(cls):
        super().setUpClass()
        cls.irl_discrete = GAIL(
            state_shape=cls.discrete_env.observation_space.shape,
            action_dim=cls.discrete_env.action_space.n,
            gpu=-1)
        cls.irl_continuous = GAIL(
            state_shape=cls.continuous_env.observation_space.shape,
            action_dim=cls.continuous_env.action_space.low.size,
            gpu=-1)
github keiohta / tf2rl / examples / run_gail_ddpg.py View on Github external
exit()

    units = [400, 300]

    env = gym.make(args.env_name)
    test_env = gym.make(args.env_name)
    policy = DDPG(
        state_shape=env.observation_space.shape,
        action_dim=env.action_space.high.size,
        max_action=env.action_space.high[0],
        gpu=args.gpu,
        actor_units=units,
        critic_units=units,
        n_warmup=10000,
        batch_size=100)
    irl = GAIL(
        state_shape=env.observation_space.shape,
        action_dim=env.action_space.high.size,
        units=units,
        enable_sn=args.enable_sn,
        batch_size=32,
        gpu=args.gpu)
    expert_trajs = restore_latest_n_traj(
        args.expert_path_dir, n_path=20, max_steps=1000)
    trainer = IRLTrainer(policy, env, args, irl, expert_trajs["obses"],
                         expert_trajs["acts"], test_env)
    trainer()
github keiohta / tf2rl / examples / run_gail_ddpg.py View on Github external
import roboschool
import gym

from tf2rl.algos.ddpg import DDPG
from tf2rl.algos.gail import GAIL
from tf2rl.experiments.irl_trainer import IRLTrainer
from tf2rl.experiments.utils import restore_latest_n_traj


if __name__ == '__main__':
    parser = IRLTrainer.get_argument()
    parser = GAIL.get_argument(parser)
    parser.add_argument('--env-name', type=str, default="RoboschoolReacher-v1")
    args = parser.parse_args()

    if args.expert_path_dir is None:
        print("Plaese generate demonstrations first")
        print("python examples/run_sac.py --env-name=RoboschoolReacher-v1 --save-test-path --test-interval=50000")
        exit()

    units = [400, 300]

    env = gym.make(args.env_name)
    test_env = gym.make(args.env_name)
    policy = DDPG(
        state_shape=env.observation_space.shape,
        action_dim=env.action_space.high.size,
        max_action=env.action_space.high[0],
github keiohta / tf2rl / tf2rl / algos / wgail.py View on Github external
import tensorflow as tf
from tensorflow.keras.layers import Dense

from tf2rl.algos.policy_base import IRLPolicy
from tf2rl.algos.gail import GAIL, Discriminator
from tf2rl.networks.spectral_norm_dense import SNDense


class DiscriminatorWGAIL(Discriminator):
    def compute_reward(self, inputs):
        cur_rewards = -self.call(inputs)
        # Normalize rewards so that positive examples should be zero
        return rewards


class WGAIL(GAIL):
    def __init__(
            self,
            state_shape,
            action_dim,
            units=[32, 32],
            lr=0.001,
            enable_sn=False,
            enable_gp=True,
            enable_gc=False,
            name="WGAIL",
            **kwargs):
        """
        :param enable_sn (bool): If true, add spectral normalization in Dense layer
        :param enable_gp (bool): If true, add gradient penalty to loss function
        :param enable_gc (bool): If true, apply gradient clipping while training
        """
github keiohta / tf2rl / tf2rl / algos / vail.py View on Github external
logstds = tf.clip_by_value(
            logstds, self.LOG_SIG_CAP_MIN, self.LOG_SIG_CAP_MAX)
        latents = means + tf.random.normal(shape=means.shape) * tf.math.exp(logstds)
        # Binary classifier
        out = self.l3(latents)
        return out, means, logstds

    def compute_reward(self, inputs):
        features = tf.concat(inputs, axis=1)
        features = self.l1(features)
        features = self.l2(features)
        means = self.l_mean(features)
        return tf.math.log(self.l3(means) + 1e-8)


class VAIL(GAIL):
    def __init__(
            self,
            state_shape,
            action_dim,
            units=[32, 32],
            n_latent_unit=32,
            lr=5e-5,
            kl_target=0.5,
            reg_param=0.,
            enable_sn=False,
            enable_gp=False,
            name="VAIL",
            **kwargs):
        """
        :param enable_sn (bool): If true, add spectral normalization in Dense layer
        :param enable_gp (bool): If true, add gradient penalty to loss function
github keiohta / tf2rl / tf2rl / algos / gaifo.py View on Github external
tf.keras.Model.__init__(self, name=name)

        DenseClass = SNDense if enable_sn else Dense
        self.l1 = DenseClass(units[0], name="L1", activation="relu")
        self.l2 = DenseClass(units[1], name="L2", activation="relu")
        self.l3 = DenseClass(1, name="L3", activation=output_activation)

        dummy_state = tf.constant(
            np.zeros(shape=(1,) + state_shape, dtype=np.float32))
        dummy_next_state = tf.constant(
            np.zeros(shape=(1,) + state_shape, dtype=np.float32))
        with tf.device("/cpu:0"):
            self([dummy_state, dummy_next_state])


class GAIfO(GAIL):
    def __init__(
            self,
            state_shape,
            units=[32, 32],
            lr=0.001,
            enable_sn=False,
            name="GAIfO",
            **kwargs):
        IRLPolicy.__init__(self, name=name, n_training=1, **kwargs)
        self.disc = Discriminator(
            state_shape=state_shape,
            units=units, enable_sn=enable_sn)
        self.optimizer = tf.keras.optimizers.Adam(
            learning_rate=lr, beta_1=0.5)

    def train(self, agent_states, agent_next_states,