How to use the tianshou.data.to_numpy function in tianshou

To help you get started, we’ve selected a few tianshou examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github thu-ml / tianshou / test / base / test_batch.py View on Github external
assert isinstance(data_list_4_torch, list)
    assert all(isinstance(e, torch.Tensor) for e in data_list_4_torch)
    assert all(starmap(np.allclose,
               zip(to_numpy(to_torch(data_list_4)), data_list_4)))
    data_list_5 = [np.zeros(2), np.zeros((3, 3))]
    data_list_5_torch = to_torch(data_list_5)
    assert isinstance(data_list_5_torch, list)
    assert all(isinstance(e, torch.Tensor) for e in data_list_5_torch)
    data_array = np.random.rand(3, 2, 2)
    data_empty_tensor = to_torch(data_array[[]])
    assert isinstance(data_empty_tensor, torch.Tensor)
    assert data_empty_tensor.shape == (0, 2, 2)
    data_empty_array = to_numpy(data_empty_tensor)
    assert isinstance(data_empty_array, np.ndarray)
    assert data_empty_array.shape == (0, 2, 2)
    assert np.allclose(to_numpy(to_torch(data_array)), data_array)
github thu-ml / tianshou / test / base / test_batch.py View on Github external
data_list_2 = [np.random.rand(3, 3), np.random.rand(3, 3)]
    data_list_2_torch = to_torch(data_list_2)
    assert data_list_2_torch.shape == (2, 3, 3)
    assert np.allclose(to_numpy(to_torch(data_list_2)), data_list_2)
    data_list_3 = [np.zeros((3, 2)), np.zeros((3, 3))]
    data_list_3_torch = to_torch(data_list_3)
    assert isinstance(data_list_3_torch, list)
    assert all(isinstance(e, torch.Tensor) for e in data_list_3_torch)
    assert all(starmap(np.allclose,
               zip(to_numpy(to_torch(data_list_3)), data_list_3)))
    data_list_4 = [np.zeros((2, 3)), np.zeros((3, 3))]
    data_list_4_torch = to_torch(data_list_4)
    assert isinstance(data_list_4_torch, list)
    assert all(isinstance(e, torch.Tensor) for e in data_list_4_torch)
    assert all(starmap(np.allclose,
               zip(to_numpy(to_torch(data_list_4)), data_list_4)))
    data_list_5 = [np.zeros(2), np.zeros((3, 3))]
    data_list_5_torch = to_torch(data_list_5)
    assert isinstance(data_list_5_torch, list)
    assert all(isinstance(e, torch.Tensor) for e in data_list_5_torch)
    data_array = np.random.rand(3, 2, 2)
    data_empty_tensor = to_torch(data_array[[]])
    assert isinstance(data_empty_tensor, torch.Tensor)
    assert data_empty_tensor.shape == (0, 2, 2)
    data_empty_array = to_numpy(data_empty_tensor)
    assert isinstance(data_empty_array, np.ndarray)
    assert data_empty_array.shape == (0, 2, 2)
    assert np.allclose(to_numpy(to_torch(data_array)), data_array)
github thu-ml / tianshou / test / base / test_batch.py View on Github external
zip(to_numpy(to_torch(data_list_3)), data_list_3)))
    data_list_4 = [np.zeros((2, 3)), np.zeros((3, 3))]
    data_list_4_torch = to_torch(data_list_4)
    assert isinstance(data_list_4_torch, list)
    assert all(isinstance(e, torch.Tensor) for e in data_list_4_torch)
    assert all(starmap(np.allclose,
               zip(to_numpy(to_torch(data_list_4)), data_list_4)))
    data_list_5 = [np.zeros(2), np.zeros((3, 3))]
    data_list_5_torch = to_torch(data_list_5)
    assert isinstance(data_list_5_torch, list)
    assert all(isinstance(e, torch.Tensor) for e in data_list_5_torch)
    data_array = np.random.rand(3, 2, 2)
    data_empty_tensor = to_torch(data_array[[]])
    assert isinstance(data_empty_tensor, torch.Tensor)
    assert data_empty_tensor.shape == (0, 2, 2)
    data_empty_array = to_numpy(data_empty_tensor)
    assert isinstance(data_empty_array, np.ndarray)
    assert data_empty_array.shape == (0, 2, 2)
    assert np.allclose(to_numpy(to_torch(data_array)), data_array)
github thu-ml / tianshou / test / base / test_batch.py View on Github external
)
    a_torch_float = to_torch(batch.a, dtype=torch.float32)
    assert a_torch_float.dtype == torch.float32
    a_torch_double = to_torch(batch.a, dtype=torch.float64)
    assert a_torch_double.dtype == torch.float64
    batch_torch_float = to_torch(batch, dtype=torch.float32)
    assert batch_torch_float.a.dtype == torch.float32
    assert batch_torch_float.b.c.dtype == torch.float32
    assert batch_torch_float.b.d.dtype == torch.float32
    data_list = [float('nan'), 1]
    data_list_torch = to_torch(data_list)
    assert data_list_torch.dtype == torch.float64
    data_list_2 = [np.random.rand(3, 3), np.random.rand(3, 3)]
    data_list_2_torch = to_torch(data_list_2)
    assert data_list_2_torch.shape == (2, 3, 3)
    assert np.allclose(to_numpy(to_torch(data_list_2)), data_list_2)
    data_list_3 = [np.zeros((3, 2)), np.zeros((3, 3))]
    data_list_3_torch = to_torch(data_list_3)
    assert isinstance(data_list_3_torch, list)
    assert all(isinstance(e, torch.Tensor) for e in data_list_3_torch)
    assert all(starmap(np.allclose,
               zip(to_numpy(to_torch(data_list_3)), data_list_3)))
    data_list_4 = [np.zeros((2, 3)), np.zeros((3, 3))]
    data_list_4_torch = to_torch(data_list_4)
    assert isinstance(data_list_4_torch, list)
    assert all(isinstance(e, torch.Tensor) for e in data_list_4_torch)
    assert all(starmap(np.allclose,
               zip(to_numpy(to_torch(data_list_4)), data_list_4)))
    data_list_5 = [np.zeros(2), np.zeros((3, 3))]
    data_list_5_torch = to_torch(data_list_5)
    assert isinstance(data_list_5_torch, list)
    assert all(isinstance(e, torch.Tensor) for e in data_list_5_torch)
github thu-ml / tianshou / tianshou / policy / modelfree / ppo.py View on Github external
def process_fn(self, batch: Batch, buffer: ReplayBuffer,
                   indice: np.ndarray) -> Batch:
        if self._rew_norm:
            mean, std = batch.rew.mean(), batch.rew.std()
            if not np.isclose(std, 0):
                batch.rew = (batch.rew - mean) / std
        if self._lambda in [0, 1]:
            return self.compute_episodic_return(
                batch, None, gamma=self._gamma, gae_lambda=self._lambda)
        v_ = []
        with torch.no_grad():
            for b in batch.split(self._batch, shuffle=False):
                v_.append(self.critic(b.obs_next))
        v_ = to_numpy(torch.cat(v_, dim=0))
        return self.compute_episodic_return(
            batch, v_, gamma=self._gamma, gae_lambda=self._lambda)
github thu-ml / tianshou / tianshou / policy / modelfree / a2c.py View on Github external
def process_fn(self, batch: Batch, buffer: ReplayBuffer,
                   indice: np.ndarray) -> Batch:
        if self._lambda in [0, 1]:
            return self.compute_episodic_return(
                batch, None, gamma=self._gamma, gae_lambda=self._lambda)
        v_ = []
        with torch.no_grad():
            for b in batch.split(self._batch, shuffle=False):
                v_.append(to_numpy(self.critic(b.obs_next)))
        v_ = np.concatenate(v_, axis=0)
        return self.compute_episodic_return(
            batch, v_, gamma=self._gamma, gae_lambda=self._lambda)
github thu-ml / tianshou / tianshou / data / collector.py View on Github external
result = Batch(act=self._make_batch(action_space.sample()))
            else:
                with torch.no_grad():
                    result = self.policy(self.data, last_state)

            # convert None to Batch(), since None is reserved for 0-init
            state = result.get('state', Batch())
            if state is None:
                state = Batch()
            self.data.state = state
            if hasattr(result, 'policy'):
                self.data.policy = to_numpy(result.policy)
            # save hidden state to policy._state, in order to save into buffer
            self.data.policy._state = self.data.state

            self.data.act = to_numpy(result.act)
            if self._action_noise is not None:
                self.data.act += self._action_noise(self.data.act.shape)

            # step in env
            obs_next, rew, done, info = self.env.step(
                self.data.act if self._multi_env else self.data.act[0])

            # move data to self.data
            if not self._multi_env:
                obs_next = self._make_batch(obs_next)
                rew = self._make_batch(rew)
                done = self._make_batch(done)
                info = self._make_batch(info)
            self.data.obs_next = obs_next
            self.data.rew = rew
            self.data.done = done
github thu-ml / tianshou / tianshou / policy / modelfree / dqn.py View on Github external
def learn(self, batch: Batch, **kwargs) -> Dict[str, float]:
        if self._target and self._cnt % self._freq == 0:
            self.sync_weight()
        self.optim.zero_grad()
        q = self(batch).logits
        q = q[np.arange(len(q)), batch.act]
        r = to_torch_as(batch.returns, q)
        if hasattr(batch, 'update_weight'):
            td = r - q
            batch.update_weight(batch.indice, to_numpy(td))
            impt_weight = to_torch_as(batch.impt_weight, q)
            loss = (td.pow(2) * impt_weight).mean()
        else:
            loss = F.mse_loss(q, r)
        loss.backward()
        self.optim.step()
        self._cnt += 1
        return {'loss': loss.item()}
github thu-ml / tianshou / tianshou / data / collector.py View on Github external
action_space = self.env.action_space
                if isinstance(action_space, list):
                    result = Batch(act=[a.sample() for a in action_space])
                else:
                    result = Batch(act=self._make_batch(action_space.sample()))
            else:
                with torch.no_grad():
                    result = self.policy(self.data, last_state)

            # convert None to Batch(), since None is reserved for 0-init
            state = result.get('state', Batch())
            if state is None:
                state = Batch()
            self.data.state = state
            if hasattr(result, 'policy'):
                self.data.policy = to_numpy(result.policy)
            # save hidden state to policy._state, in order to save into buffer
            self.data.policy._state = self.data.state

            self.data.act = to_numpy(result.act)
            if self._action_noise is not None:
                self.data.act += self._action_noise(self.data.act.shape)

            # step in env
            obs_next, rew, done, info = self.env.step(
                self.data.act if self._multi_env else self.data.act[0])

            # move data to self.data
            if not self._multi_env:
                obs_next = self._make_batch(obs_next)
                rew = self._make_batch(rew)
                done = self._make_batch(done)
github thu-ml / tianshou / tianshou / utils / moving_average.py View on Github external
def add(self, x: Union[float, list, np.ndarray, torch.Tensor]) -> float:
        """Add a scalar into :class:`MovAvg`. You can add ``torch.Tensor`` with
        only one element, a python scalar, or a list of python scalar.
        """
        if isinstance(x, torch.Tensor):
            x = to_numpy(x.flatten())
        if isinstance(x, list) or isinstance(x, np.ndarray):
            for _ in x:
                if _ not in self.banned:
                    self.cache.append(_)
        elif x not in self.banned:
            self.cache.append(x)
        if self.size > 0 and len(self.cache) > self.size:
            self.cache = self.cache[-self.size:]
        return self.get()