How to use the lagom.transform.describe function in lagom

To help you get started, we’ve selected a few lagom examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github zuoxingdong / lagom / baselines / sac / logs / _old_default / source_files / _agent.py View on Github external
        describe_it = lambda x: describe(torch.cat(x).detach().cpu().numpy().squeeze(), axis=-1, repr_indent=1, repr_prefix='\n')
        out['Q1'] = describe_it(Q1_vals)
github zuoxingdong / lagom / baselines / openaies / experiment.py View on Github external
'rank_transform': config['train.rank_transform']})
    train_logs = []
    checkpoint_count = 0
    with Pool(processes=config['train.popsize']//config['train.worker_chunksize']) as pool:
        print('Finish initialization. Training starts...')
        for generation in range(config['train.generations']):
            t0 = time.perf_counter()
            solutions = es.ask()
            data = [(config, seed, device, solution) for solution in solutions]
            out = pool.map(CloudpickleWrapper(fitness), data, chunksize=config['train.worker_chunksize'])
            Rs, Hs = zip(*out)
            es.tell(solutions, [-R for R in Rs])
            logger = Logger()
            logger('generation', generation+1)
            logger('num_seconds', round(time.perf_counter() - t0, 1))
            logger('Returns', describe(Rs, axis=-1, repr_indent=1, repr_prefix='\n'))
            logger('Horizons', describe(Hs, axis=-1, repr_indent=1, repr_prefix='\n'))
            logger('fbest', es.result.fbest)
            train_logs.append(logger.logs)
            if generation == 0 or (generation+1) % config['log.freq'] == 0:
                logger.dump(keys=None, index=0, indent=0, border='-'*50)
            if (generation+1) >= int(config['train.generations']*(checkpoint_count/(config['checkpoint.num'] - 1))):
                agent.from_vec(tensorify(es.result.xbest, 'cpu'))
                agent.checkpoint(logdir, generation+1)
                checkpoint_count += 1
    pickle_dump(obj=train_logs, f=logdir/'train_logs', ext='.pkl')
    return None
github zuoxingdong / lagom / baselines / ddpg / engine.py View on Github external
next_observation, reward, done, info = self.eval_env.step(action)
                if done[0]:  # [0] single environment
                    returns.append(info[0]['episode']['return'])
                    horizons.append(info[0]['episode']['horizon'])
                    break
                observation = next_observation
        logger = Logger()
        logger('num_seconds', round(perf_counter() - start_time, 1))
        logger('accumulated_trained_timesteps', kwargs['accumulated_trained_timesteps'])
        logger('accumulated_trained_episodes', kwargs['accumulated_trained_episodes'])
        logger('online_return', describe(returns, axis=-1, repr_indent=1, repr_prefix='\n'))
        logger('online_horizon', describe(horizons, axis=-1, repr_indent=1, repr_prefix='\n'))
        
        monitor_env = get_wrapper(self.eval_env, 'VecMonitor')
        logger('running_return', describe(monitor_env.return_queue, axis=-1, repr_indent=1, repr_prefix='\n'))
        logger('running_horizon', describe(monitor_env.horizon_queue, axis=-1, repr_indent=1, repr_prefix='\n'))
        logger.dump(keys=None, index=0, indent=0, border=color_str('+'*50, color='green'))
        return logger.logs
github zuoxingdong / lagom / examples / reinforcement_learning / ppo / logs / compare_tanh_and_relu_plus_layernorm / relu+layernorm / source_files / engine.py View on Github external
[logger(key, value) for key, value in out_agent.items()]
        logger('num_trajectories', len(D))
        logger('num_timesteps', sum([len(traj) for traj in D]))
        logger('accumulated_trained_timesteps', self.agent.total_timestep)
        G = [traj.numpy_rewards.sum() for traj in D]
        logger('return', describe(G, axis=-1, repr_indent=1, repr_prefix='\n'))
        
        infos = [info for info in chain.from_iterable([traj.infos for traj in D]) if 'episode' in info]
        online_returns = [info['episode']['return'] for info in infos]
        online_horizons = [info['episode']['horizon'] for info in infos]
        logger('online_return', describe(online_returns, axis=-1, repr_indent=1, repr_prefix='\n'))
        logger('online_horizon', describe(online_horizons, axis=-1, repr_indent=1, repr_prefix='\n'))
            
        monitor_env = get_wrapper(self.env, 'VecMonitor')
        logger('running_return', describe(monitor_env.return_queue, axis=-1, repr_indent=1, repr_prefix='\n'))
        logger('running_horizon', describe(monitor_env.horizon_queue, axis=-1, repr_indent=1, repr_prefix='\n'))
        return logger
github zuoxingdong / lagom / examples / reinforcement_learning / ppo / logs / compare_tanh_and_relu_plus_layernorm / relu+layernorm / source_files / engine.py View on Github external
logger = Logger()
        logger('train_iteration', n+1)
        logger('num_seconds', round(perf_counter() - start_time, 1))
        [logger(key, value) for key, value in out_agent.items()]
        logger('num_trajectories', len(D))
        logger('num_timesteps', sum([len(traj) for traj in D]))
        logger('accumulated_trained_timesteps', self.agent.total_timestep)
        G = [traj.numpy_rewards.sum() for traj in D]
        logger('return', describe(G, axis=-1, repr_indent=1, repr_prefix='\n'))
        
        infos = [info for info in chain.from_iterable([traj.infos for traj in D]) if 'episode' in info]
        online_returns = [info['episode']['return'] for info in infos]
        online_horizons = [info['episode']['horizon'] for info in infos]
        logger('online_return', describe(online_returns, axis=-1, repr_indent=1, repr_prefix='\n'))
        logger('online_horizon', describe(online_horizons, axis=-1, repr_indent=1, repr_prefix='\n'))
            
        monitor_env = get_wrapper(self.env, 'VecMonitor')
        logger('running_return', describe(monitor_env.return_queue, axis=-1, repr_indent=1, repr_prefix='\n'))
        logger('running_horizon', describe(monitor_env.horizon_queue, axis=-1, repr_indent=1, repr_prefix='\n'))
        return logger
github zuoxingdong / lagom / baselines / ddpg / engine.py View on Github external
observation = self.eval_env.reset()
            for _ in range(self.eval_env.spec.max_episode_steps):
                with torch.no_grad():
                    action = self.agent.choose_action(observation, mode='eval')['action']
                next_observation, reward, done, info = self.eval_env.step(action)
                if done[0]:  # [0] single environment
                    returns.append(info[0]['episode']['return'])
                    horizons.append(info[0]['episode']['horizon'])
                    break
                observation = next_observation
        logger = Logger()
        logger('num_seconds', round(perf_counter() - start_time, 1))
        logger('accumulated_trained_timesteps', kwargs['accumulated_trained_timesteps'])
        logger('accumulated_trained_episodes', kwargs['accumulated_trained_episodes'])
        logger('online_return', describe(returns, axis=-1, repr_indent=1, repr_prefix='\n'))
        logger('online_horizon', describe(horizons, axis=-1, repr_indent=1, repr_prefix='\n'))
        
        monitor_env = get_wrapper(self.eval_env, 'VecMonitor')
        logger('running_return', describe(monitor_env.return_queue, axis=-1, repr_indent=1, repr_prefix='\n'))
        logger('running_horizon', describe(monitor_env.horizon_queue, axis=-1, repr_indent=1, repr_prefix='\n'))
        logger.dump(keys=None, index=0, indent=0, border=color_str('+'*50, color='green'))
        return logger.logs
github zuoxingdong / lagom / examples / reinforcement_learning / vpg / logs / default / source_files / agent.py View on Github external
if self.config['agent.use_lr_scheduler']:
            self.lr_scheduler.step(self.total_timestep)
        self.optimizer.step()
        
        self.total_timestep += sum([len(traj) for traj in D])
        out = {}
        if self.config['agent.use_lr_scheduler']:
            out['current_lr'] = self.lr_scheduler.get_lr()
        out['loss'] = loss.item()
        out['grad_norm'] = grad_norm
        out['policy_loss'] = policy_loss.mean().item()
        out['entropy_loss'] = entropy_loss.mean().item()
        out['policy_entropy'] = -entropy_loss.mean().item()
        out['value_loss'] = value_loss.mean().item()
        Vs_numpy = Vs.detach().cpu().numpy().squeeze()
        out['V'] = describe(Vs_numpy, axis=-1, repr_indent=1, repr_prefix='\n')
        out['explained_variance'] = ev(y_true=Qs.detach().cpu().numpy(), y_pred=Vs.detach().cpu().numpy())
        return out
github zuoxingdong / lagom / baselines / ddpg / engine.py View on Github external
for _ in range(self.config['eval.num_episode']):
            observation = self.eval_env.reset()
            for _ in range(self.eval_env.spec.max_episode_steps):
                with torch.no_grad():
                    action = self.agent.choose_action(observation, mode='eval')['action']
                next_observation, reward, done, info = self.eval_env.step(action)
                if done[0]:  # [0] single environment
                    returns.append(info[0]['episode']['return'])
                    horizons.append(info[0]['episode']['horizon'])
                    break
                observation = next_observation
        logger = Logger()
        logger('num_seconds', round(perf_counter() - start_time, 1))
        logger('accumulated_trained_timesteps', kwargs['accumulated_trained_timesteps'])
        logger('accumulated_trained_episodes', kwargs['accumulated_trained_episodes'])
        logger('online_return', describe(returns, axis=-1, repr_indent=1, repr_prefix='\n'))
        logger('online_horizon', describe(horizons, axis=-1, repr_indent=1, repr_prefix='\n'))
        
        monitor_env = get_wrapper(self.eval_env, 'VecMonitor')
        logger('running_return', describe(monitor_env.return_queue, axis=-1, repr_indent=1, repr_prefix='\n'))
        logger('running_horizon', describe(monitor_env.horizon_queue, axis=-1, repr_indent=1, repr_prefix='\n'))
        logger.dump(keys=None, index=0, indent=0, border=color_str('+'*50, color='green'))
        return logger.logs
github zuoxingdong / lagom / baselines / sac / engine.py View on Github external
def eval(self, n=None, **kwargs):
        t0 = time.perf_counter()
        with torch.no_grad():
            D = self.runner(self.agent, self.eval_env, 10, mode='eval')
        
        logger = Logger()
        logger('eval_iteration', n+1)
        logger('num_seconds', round(time.perf_counter() - t0, 1))
        logger('accumulated_trained_timesteps', self.agent.total_timestep)
        logger('online_return', describe([sum(traj.rewards) for traj in D], axis=-1, repr_indent=1, repr_prefix='\n'))
        logger('online_horizon', describe([traj.T for traj in D], axis=-1, repr_indent=1, repr_prefix='\n'))
        logger('running_return', describe(self.eval_env.return_queue, axis=-1, repr_indent=1, repr_prefix='\n'))
        logger('running_horizon', describe(self.eval_env.horizon_queue, axis=-1, repr_indent=1, repr_prefix='\n'))
        logger.dump(keys=None, index=0, indent=0, border=color_str('+'*50, color='green'))
        return logger.logs
github zuoxingdong / lagom / baselines / sac / logs / old_default / source_files / engine.py View on Github external
next_observation, reward, done, info = self.eval_env.step(action)
                if done[0]:  # [0] single environment
                    returns.append(info[0]['episode']['return'])
                    horizons.append(info[0]['episode']['horizon'])
                    break
                observation = next_observation
        logger = Logger()
        logger('num_seconds', round(perf_counter() - start_time, 1))
        logger('accumulated_trained_timesteps', kwargs['accumulated_trained_timesteps'])
        logger('accumulated_trained_episodes', kwargs['accumulated_trained_episodes'])
        logger('online_return', describe(returns, axis=-1, repr_indent=1, repr_prefix='\n'))
        logger('online_horizon', describe(horizons, axis=-1, repr_indent=1, repr_prefix='\n'))
        
        monitor_env = get_wrapper(self.eval_env, 'VecMonitor')
        logger('running_return', describe(monitor_env.return_queue, axis=-1, repr_indent=1, repr_prefix='\n'))
        logger('running_horizon', describe(monitor_env.horizon_queue, axis=-1, repr_indent=1, repr_prefix='\n'))
        logger.dump(keys=None, index=0, indent=0, border=color_str('+'*50, color='green'))
        return logger.logs