How to use the torchx.device_scope function in torchx

To help you get started, we’ve selected a few torchx examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github SurrealAI / surreal / surreal / learner / ppo.py View on Github external
self.clip_epsilon = self.clip_epsilon_init
            self.clip_adjust_threshold = self.adjust_threshold
            self.clip_upper = self.clip_range[1]
            self.clip_lower = self.clip_range[0]

        # learning rate setting:
        self.min_lr = self.learner_config.algo.network.anneal.min_lr
        self.lr_update_frequency = self.learner_config.algo.network.anneal.lr_update_frequency
        self.frames_to_anneal = self.learner_config.algo.network.anneal.frames_to_anneal
        num_updates = int(self.frames_to_anneal / self.learner_config.parameter_publish.exp_interval)
        lr_scheduler = eval(self.learner_config.algo.network.anneal.lr_scheduler) 

        self.exp_counter = 0
        self.kl_record = []

        with tx.device_scope(self.gpu_option):
            self.model = PPOModel(
                obs_spec=self.obs_spec,
                action_dim=self.action_dim,
                model_config=self.learner_config.model,
                use_cuda=self.use_cuda,
                init_log_sig=self.init_log_sig,
                use_z_filter=self.use_z_filter,
                if_pixel_input=self.env_config.pixel_input,
                rnn_config=self.learner_config.algo.rnn,
            )
            self.ref_target_model = PPOModel(
                obs_spec=self.obs_spec,
                action_dim=self.action_dim,
                model_config=self.learner_config.model,
                use_cuda=self.use_cuda,
                init_log_sig=self.init_log_sig,
github SurrealAI / surreal / surreal / learner / gail.py View on Github external
def __init__(self, learner_config, env_config, session_config):
        # PPO setup
        super().__init__(learner_config, env_config, session_config)

        # GAIL-specific setup
        self.reward_lambda = self.learner_config.algo.reward_lambda # reward mixing
        self.lr_discriminator = self.learner_config.algo.network.lr_discriminator
        self.epoch_discriminator = self.learner_config.algo.consts.epoch_discriminator
        self.stride = self.learner_config.algo.stride

        # learning rate setting:
        num_updates = int(self.frames_to_anneal / self.learner_config.parameter_publish.exp_interval)
        lr_scheduler = eval(self.learner_config.algo.network.anneal.lr_scheduler)

        with tx.device_scope(self.gpu_option):
            # TODO: what hypers does GAIL need? put them here ###
            # add a discriminator
            self.discriminator_model = GAILModel(
                obs_spec=self.obs_spec,
                action_dim=self.action_dim,
                model_config=self.learner_config.model,
                use_cuda=self.use_cuda,
                use_z_filter=self.use_z_filter
            )

            # Learning parameters and optimizer
            self.clip_discriminator_gradient = self.learner_config.algo.network.clip_discriminator_gradient
            self.discriminator_gradient_clip_value = self.learner_config.algo.network.discriminator_gradient_norm_clip

            self.discriminator_optim = torch.optim.Adam(
                self.discriminator_model.get_discriminator_params(),
github SurrealAI / surreal / surreal / learner / ppo.py View on Github external
def _preprocess_batch_ppo(self, batch):
        '''
            Loading experiences from numpy to torch.FloatTensor type
            Args: 
                batch: BeneDict of experiences containing following attributes
                        'obs' - observation
                        'actions' - actions
                        'rewards' - rewards
                        'obs_next' - next observation
                        'persistent_infos' - action policy
                        'onetime_infos' - RNN hidden cells or None
            Return:
                Benedict of torch.FloatTensors
        '''
        with tx.device_scope(self.gpu_option):

            obs, actions, rewards, obs_next, done, persistent_infos, onetime_infos = (
                batch['obs'],
                batch['actions'],
                batch['rewards'],
                batch['obs_next'],
                batch['dones'],
                batch['persistent_infos'],
                batch['onetime_infos'],
            )

            for modality in obs:
                for key in obs[modality]:
                    obs[modality][key] = (torch.tensor(obs[modality][key], dtype=torch.float32)).detach()
                    obs_next[modality][key] = (torch.tensor(obs_next[modality][key], dtype=torch.float32)).detach()
github SurrealAI / surreal / surreal / agent / ppo_agent.py View on Github external
Args:
                obs: numpy array of (1, obs_dim)

            Returns:
                action_choice: sampled or max likelihood action to input to env
                action_info: list of auxiliary information - [onetime, persistent]
                    Note: this includes probability distribution the action is
                    sampled from, RNN hidden states
        '''
        # Note: we collect two kinds of action infos, one persistent one onetime
        # persistent info is collected for every step in rollout (i.e. policy probability distribution)
        # onetime info is collected for the first step in partial trajectory (i.e. RNN hidden state)
        # see ExpSenderWrapperMultiStepMovingWindowWithInfo in exp_sender_wrapper for more
        action_info = [[], []]

        with tx.device_scope(self.gpu_ids):
            obs_tensor = {}
            for mod in obs.keys():
                obs_tensor[mod] = {}
                for k in obs[mod].keys():
                    obs_tensor[mod][k] = torch.tensor(obs[mod][k], dtype=torch.float32).unsqueeze(0)

            if self.rnn_config.if_rnn_policy:
                action_info[0].append(self.cells[0].squeeze(1).cpu().numpy())
                action_info[0].append(self.cells[1].squeeze(1).cpu().numpy())

            action_pd, self.cells = self.model.forward_actor_expose_cells(obs_tensor, self.cells)
            action_pd = action_pd.detach().cpu().numpy()
            action_pd[:, self.action_dim:] *= np.exp(self.noise)

            if self.agent_mode not in ['eval_deterministic', 'eval_deterministic_local']:
                action_choice = self.pd.sample(action_pd)
github SurrealAI / surreal / surreal / learner / ppo.py View on Github external
main method for optimization that calls _adapt/clip_update and 
            _value_update epoch_policy and epoch_baseline times respectively
            return: dictionary of tracted statistics
            Args:
                obs: batch of observations (batch_size, N-step , obs_dim)
                obs_next: batch of next observations (batch_size, 1 , obs_dim)
                actions: batch of actions (batch_size, N-step , act_dim)
                rewards: batch of rewards (batch_size, N-step)
                dones: batch of termination flags (batch_size, N-step)
                action_infos: list of batched other attributes tracted, such as
                    behavior policy, RNN hidden states and etc.
            Returns:
                dictionary of recorded statistics
        '''
        # convert everything to float tensor: 
        with tx.device_scope(self.gpu_option):
            pds = persistent_infos[-1]

            if self.if_rnn_policy:
                h = (onetime_infos[0].transpose(0, 1).contiguous()).detach()
                c = (onetime_infos[1].transpose(0, 1).contiguous()).detach()
                self.cells = (h, c)

            advantages, returns = self._gae_and_return(obs, 
                                                       obs_next,  
                                                       rewards, 
                                                       dones)
            advantages = advantages.detach()
            returns    = returns.detach()

            if self.if_rnn_policy:
                h = self.cells[0].detach()
github SurrealAI / surreal / surreal / agent / ddpg_agent.py View on Github external
def act(self, obs):
        with tx.device_scope(self.gpu_ids):
            if self.sleep_time > 0.0:
                time.sleep(self.sleep_time)
            if not self.frame_stack_concatenate_on_env:
                # Output pixels of environment is a list of frames,
                # we concatenate the frames into a single numpy array
                obs = copy.deepcopy(obs)
                if 'pixel' in obs:
                    for key in obs['pixel']:
                        obs['pixel'][key] = np.concatenate(obs['pixel'][key], axis=0)
            # Convert to pytorch tensor
            obs_tensor = collections.OrderedDict()
            for modality in obs:
                modality_dict = collections.OrderedDict()
                for key in obs[modality]:
                    modality_dict[key] = torch.tensor(obs[modality][key], dtype=torch.float32).unsqueeze(0)
                obs_tensor[modality] = modality_dict