How to use the tensorforce.util.np_dtype function in Tensorforce

To help you get started, we’ve selected a few Tensorforce examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github tensorforce / tensorforce / test / test_parameters.py View on Github external
def long_unittest(self, horizon):
        agent, environment = self.prepare(
            min_timesteps=3, reward_estimation=dict(horizon=horizon), memory=20
        )

        states = environment.reset()
        actions = agent.act(states=states)
        states, terminal, reward = environment.execute(actions=actions)
        _, horizon_output1 = agent.observe(terminal=terminal, reward=reward, query='horizon')
        self.assertIsInstance(horizon_output1, util.np_dtype(dtype='long'))

        if not isinstance(horizon, dict) or horizon['type'] == 'constant':
            actions = agent.act(states=states)
            states, terminal, reward = environment.execute(actions=actions)
            _, horizon_output2 = agent.observe(terminal=terminal, reward=reward, query='horizon')
            self.assertEqual(horizon_output2, horizon_output1)

        else:
            actions = agent.act(states=states)
            states, terminal, reward = environment.execute(actions=actions)
            _, horizon_output2 = agent.observe(terminal=terminal, reward=reward, query='horizon')
            self.assertNotEqual(horizon_output2, horizon_output1)

        actions = agent.act(states=states)
        _, terminal, reward = environment.execute(actions=actions)
        horizon_input = 3
github tensorforce / tensorforce / test / unittest_environment.py View on Github external
return (lambda action, name, states: (
                (
                    (isinstance(action, util.np_dtype('int')) and shape == ()) or
                    (
                        isinstance(action, np.ndarray) and
                        action.dtype == util.np_dtype('int') and action.shape == shape
                    )
                ) and (0 <= action).all() and (action < num_values).all() and
                np.take_along_axis(
                    states[name + '_mask'], indices=np.expand_dims(action, axis=-1), axis=-1
                ).all()
github tensorforce / tensorforce / test / unittest_environment.py View on Github external
return (lambda action, name, states: (
                (isinstance(action, util.np_dtype('bool')) and shape == ()) or
                (
                    isinstance(action, np.ndarray) and
                    action.dtype == util.np_dtype('bool') and action.shape == shape
                )
github tensorforce / tensorforce / tensorforce / core / memories / queue.py View on Github external
def __init__(self, name, capacity, values_spec, device=None, summary_labels=None):
        # Terminal initialization has to agree with terminal_indices
        terminal_initializer = np.zeros(shape=(capacity,), dtype=util.np_dtype(dtype='long'))
        terminal_initializer[-1] = 1
        initializers = OrderedDict(terminal=terminal_initializer)

        super().__init__(
            name=name, capacity=capacity, values_spec=values_spec, initializers=initializers,
            device=device, summary_labels=summary_labels
        )
github tensorforce / tensorforce / tensorforce / core / memories / queue.py View on Github external
def tf_initialize(self):
        super().tf_initialize()

        # Terminal indices
        # (oldest episode terminals first, initially the only terminal is last index)
        initializer = np.zeros(shape=(self.capacity + 1,), dtype=util.np_dtype(dtype='long'))
        initializer[0] = self.capacity - 1
        self.terminal_indices = self.add_variable(
            name='terminal-indices', dtype='long', shape=(self.capacity + 1,), is_trainable=False,
            initializer=initializer
        )

        # Episode count
        self.episode_count = self.add_variable(
            name='episode-count', dtype='long', shape=(), is_trainable=False, initializer='zeros'
        )
github tensorforce / tensorforce / tensorforce / core / module.py View on Github external
raise TensorforceError.type(
                name='variable', argument='is_trainable', value=is_trainable
            )
        elif is_trainable and dtype != 'float':
            raise TensorforceError.unexpected()
        # initializer
        initializer_names = (
            'normal', 'normal-relu', 'orthogonal', 'orthogonal-relu', 'zeros', 'ones'
        )
        if not isinstance(initializer, (util.py_dtype(dtype=dtype), np.ndarray, tf.Tensor)) and \
                initializer not in initializer_names:
            raise TensorforceError.value(
                name='variable', argument='initializer', value=initializer
            )
        elif isinstance(initializer, np.ndarray) and \
                initializer.dtype != util.np_dtype(dtype=dtype):
            raise TensorforceError.type(
                name='variable', argument='initializer', value=initializer
            )
        elif isinstance(initializer, tf.Tensor) and util.dtype(x=initializer) != dtype:
            raise TensorforceError.type(
                name='variable', argument='initializer', value=initializer
            )
        # is_saved
        if not isinstance(is_saved, bool):
            raise TensorforceError.type(name='variable', argument='is_saved', value=is_saved)
        # summarize
        if summarize is not None and not isinstance(summarize, bool):
            raise TensorforceError.type(name='variable', argument='summarize', value=summarize)
        # shared
        if shared is not None and not isinstance(shared, str):
            raise TensorforceError.type(name='variable', argument='shared', value=shared)
github tensorforce / tensorforce / tensorforce / agents / tensorforce.py View on Github external
internals = OrderedDict()
        actions = util.normalize_values(
            value_type='action', values=actions, values_spec=self.actions_spec
        )

        if isinstance(terminal, (bool, int)):
            states = util.fmap(function=(lambda x: [x]), xs=states, depth=1)
            actions = util.fmap(function=(lambda x: [x]), xs=actions, depth=1)
            terminal = [terminal]
            reward = [reward]

        states = util.fmap(function=np.asarray, xs=states, depth=1)
        actions = util.fmap(function=np.asarray, xs=actions, depth=1)

        if isinstance(terminal, np.ndarray):
            if terminal.dtype is util.np_dtype(dtype='bool'):
                zeros = np.zeros_like(terminal, dtype=util.np_dtype(dtype='long'))
                ones = np.ones_like(terminal, dtype=util.np_dtype(dtype='long'))
                terminal = np.where(terminal, ones, zeros)
        else:
            terminal = np.asarray([int(x) if isinstance(x, bool) else x for x in terminal])
        reward = np.asarray(reward)

        # Batch experiences split into episodes and at most size buffer_observe
        last = 0
        for index in range(1, len(terminal) + 1):
            if terminal[index - 1] == 0 and index - last < self.experience_size:
                continue

            # Include terminal in batch if possible
            if index < len(terminal) and terminal[index - 1] == 0 and terminal[index] > 0 and \
                    index - last < self.experience_size:
github tensorforce / tensorforce / tensorforce / agents / agent.py View on Github external
states=states, auxiliaries=auxiliaries, parallel=parallel,
                deterministic=deterministic, independent=independent, query=query, **kwargs
            )

        if self.recorder_spec is not None and not independent:
            for name in self.states_spec:
                self.record_states[name].append(states[name])
            for name, spec in self.actions_spec.items():
                self.record_actions[name].append(actions[name])
                if spec['type'] == 'int':
                    if name + '_mask' in auxiliaries:
                        self.record_states[name + '_mask'].append(auxiliaries[name + '_mask'])
                    else:
                        shape = (1,) + spec['shape'] + (spec['num_values'],)
                        self.record_states[name + '_mask'].append(
                            np.full(shape, True, dtype=util.np_dtype(dtype='bool'))
                        )

        # Unbatch actions
        actions = util.fmap(function=(lambda x: x[0]), xs=actions, depth=1)

        # Reverse normalized actions dictionary
        actions = util.unpack_values(
            value_type='action', values=actions, values_spec=self.actions_spec
        )

        # if independent, return processed state as well?

        if query is None:
            return actions
        else:
            return actions, queried
github tensorforce / tensorforce / tensorforce / agents / tensorforce.py View on Github external
value_type='action', values=actions, values_spec=self.actions_spec
        )

        if isinstance(terminal, (bool, int)):
            states = util.fmap(function=(lambda x: [x]), xs=states, depth=1)
            actions = util.fmap(function=(lambda x: [x]), xs=actions, depth=1)
            terminal = [terminal]
            reward = [reward]

        states = util.fmap(function=np.asarray, xs=states, depth=1)
        actions = util.fmap(function=np.asarray, xs=actions, depth=1)

        if isinstance(terminal, np.ndarray):
            if terminal.dtype is util.np_dtype(dtype='bool'):
                zeros = np.zeros_like(terminal, dtype=util.np_dtype(dtype='long'))
                ones = np.ones_like(terminal, dtype=util.np_dtype(dtype='long'))
                terminal = np.where(terminal, ones, zeros)
        else:
            terminal = np.asarray([int(x) if isinstance(x, bool) else x for x in terminal])
        reward = np.asarray(reward)

        # Batch experiences split into episodes and at most size buffer_observe
        last = 0
        for index in range(1, len(terminal) + 1):
            if terminal[index - 1] == 0 and index - last < self.experience_size:
                continue

            # Include terminal in batch if possible
            if index < len(terminal) and terminal[index - 1] == 0 and terminal[index] > 0 and \
                    index - last < self.experience_size:
                index += 1