How to use the tensorforce.util.fmap function in Tensorforce

To help you get started, we’ve selected a few Tensorforce examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github tensorforce / tensorforce / tensorforce / agents / agent.py View on Github external
for name in self.states_spec:
                self.states_buffers[name][parallel, index] = states[name][0]
            for name, spec in self.actions_spec.items():
                self.actions_buffers[name][parallel, index] = actions[name][0]
                if spec['type'] == 'int':
                    name = name + '_mask'
                    if name in auxiliaries:
                        self.states_buffers[name][parallel, index] = auxiliaries[name][0]
                    else:
                        shape = (1,) + spec['shape'] + (spec['num_values'],)
                        self.states_buffers[name][parallel, index] = np.full(
                            shape=shape, fill_value=True, dtype=util.np_dtype(dtype='bool')
                        )

        # Unbatch actions
        actions = util.fmap(function=(lambda x: x[0]), xs=actions, depth=1)

        # Reverse normalized actions dictionary
        actions = util.unpack_values(
            value_type='action', values=actions, values_spec=self.actions_spec
        )

        # if independent, return processed state as well?

        if query is None:
            return actions
        else:
            return actions, queried
github tensorforce / tensorforce / tensorforce / core / optimizers / natural_gradient.py View on Github external
# delta = delta' / lambda
            estimated_deltas = [delta / lagrange_multiplier for delta in deltas]

            # improvement = grad(loss) * delta  (= loss_new - loss_old)
            estimated_improvement = tf.add_n(inputs=[
                tf.reduce_sum(input_tensor=(grad * delta))
                for grad, delta in zip(loss_gradients, estimated_deltas)
            ])

            # Apply natural gradient improvement.
            applied = self.apply_step(variables=variables, deltas=estimated_deltas)

            with tf.control_dependencies(control_inputs=(applied,)):
                # Trivial operation to enforce control dependency
                estimated_delta = util.fmap(function=util.identity_operation, xs=estimated_deltas)
                if return_estimated_improvement:
                    return estimated_delta, estimated_improvement
                else:
                    return estimated_delta
github tensorforce / tensorforce / tensorforce / agents / tensorforce.py View on Github external
(<span style="color:#00C000"><b>default</b></span>: none).
            kwargs: Additional input values, for instance, for dynamic hyperparameters.
        """
        assert (self.buffer_indices == 0).all()
        assert util.reduce_all(predicate=util.not_nan_inf, xs=states)
        assert internals is None  # or util.reduce_all(predicate=util.not_nan_inf, xs=internals)
        assert util.reduce_all(predicate=util.not_nan_inf, xs=actions)
        assert util.reduce_all(predicate=util.not_nan_inf, xs=reward)

        # Auxiliaries
        auxiliaries = OrderedDict()
        if isinstance(states, dict):
            for name, spec in self.actions_spec.items():
                if spec['type'] == 'int' and name + '_mask' in states:
                    auxiliaries[name + '_mask'] = np.asarray(states.pop(name + '_mask'))
        auxiliaries = util.fmap(function=np.asarray, xs=auxiliaries, depth=1)

        # Normalize states/actions dictionaries
        states = util.normalize_values(
            value_type='state', values=states, values_spec=self.states_spec
        )
        if internals is None:
            internals = OrderedDict()
        actions = util.normalize_values(
            value_type='action', values=actions, values_spec=self.actions_spec
        )

        if isinstance(terminal, (bool, int)):
            states = util.fmap(function=(lambda x: [x]), xs=states, depth=1)
            actions = util.fmap(function=(lambda x: [x]), xs=actions, depth=1)
            terminal = [terminal]
            reward = [reward]
github tensorforce / tensorforce / tensorforce / core / optimizers / subsampling_step.py View on Github external
batch_size = tf.shape(input=some_argument, out_type=util.tf_dtype(dtype='int'))[0]
        else:
            batch_size = tf.dtypes.cast(
                x=tf.shape(input=some_argument)[0], dtype=util.tf_dtype(dtype='int')
            )
        fraction = self.fraction.value()
        num_samples = fraction * tf.dtypes.cast(x=batch_size, dtype=util.tf_dtype('float'))
        num_samples = tf.dtypes.cast(x=num_samples, dtype=util.tf_dtype('int'))
        one = tf.constant(value=1, dtype=util.tf_dtype('int'))
        num_samples = tf.maximum(x=num_samples, y=one)
        indices = tf.random.uniform(
            shape=(num_samples,), maxval=batch_size, dtype=util.tf_dtype(dtype='int')
        )

        function = (lambda x: tf.gather(params=x, indices=indices))
        subsampled_arguments = util.fmap(function=function, xs=arguments)

        return self.optimizer.step(variables=variables, arguments=subsampled_arguments, **kwargs)
github tensorforce / tensorforce / tensorforce / agents / tensorforce.py View on Github external
if spec['type'] == 'int':
                    states[name + '_mask'] = list()
            actions = OrderedDict(((name, list()) for name in self.actions_spec))
            terminal = list()
            reward = list()
            for index in selection:
                trace = np.load(files[index])
                for name in states:
                    states[name].append(trace[name])
                for name in actions:
                    actions[name].append(trace[name])
                terminal.append(trace['terminal'])
                reward.append(trace['reward'])

            states = util.fmap(function=np.concatenate, xs=states, depth=1)
            actions = util.fmap(function=np.concatenate, xs=actions, depth=1)
            terminal = np.concatenate(terminal)
            reward = np.concatenate(reward)

            self.experience(states=states, actions=actions, terminal=terminal, reward=reward)
            for _ in range(num_updates):
                self.update()
            # TODO: self.obliviate()
github tensorforce / tensorforce / tensorforce / core / module.py View on Github external
tensor = tf.math.reduce_sum(input_tensor=tf.squeeze(input=tensor, axis=-1), axis=0)
                summaries.append(tf.summary.scalar(name=name, data=tensor, step=step))
            else:
                # General tensor as histogram
                assert not util.is_iterable(x=label) and label.endswith('-histogram')
                summaries.append(tf.summary.histogram(name=name, data=tensor, step=step))

        # Scope handling
        if Module.scope_stack is not None:
            if len(Module.global_scope) > 0:
                temp_scope.__exit__(None, None, None)
            for scope in Module.scope_stack[1:]:
                scope.__enter__()

        with tf.control_dependencies(control_inputs=summaries):
            return util.fmap(function=util.identity_operation, xs=pass_tensors)
github tensorforce / tensorforce / tensorforce / core / optimizers / natural_gradient.py View on Github external
]

            # delta' * grad(kldiv)
            delta_kldiv_gradients = tf.add_n(inputs=[
                tf.reduce_sum(input_tensor=(delta * grad))
                for delta, grad in zip(deltas, kldiv_gradients)
            ])

            # [delta' * F] = grad(delta' * grad(kldiv))
            return [
                tf.convert_to_tensor(value=grad)
                for grad in tf.gradients(ys=delta_kldiv_gradients, xs=variables)
            ]

        # loss
        arguments = util.fmap(function=tf.stop_gradient, xs=arguments)
        loss = fn_loss(**arguments)

        # grad(loss)
        loss_gradients = tf.gradients(ys=loss, xs=variables)

        # Solve the following system for delta' via the conjugate gradient solver.
        # [delta' * F] * delta' = -grad(loss)
        # --> delta'  (= lambda * delta)
        deltas = self.solver.solve(
            fn_x=fisher_matrix_product, x_init=None, b=[-grad for grad in loss_gradients]
        )

        # delta' * F
        delta_fisher_matrix_product = fisher_matrix_product(deltas=deltas)

        # c' = 0.5 * delta' * F * delta'  (= lambda * c)
github tensorforce / tensorforce / tensorforce / agents / tensorforce.py View on Github external
# Batch experiences split into episodes and at most size buffer_observe
        last = 0
        for index in range(1, len(terminal) + 1):
            if terminal[index - 1] == 0 and index - last &lt; self.experience_size:
                continue

            # Include terminal in batch if possible
            if index &lt; len(terminal) and terminal[index - 1] == 0 and terminal[index] &gt; 0 and \
                    index - last &lt; self.experience_size:
                index += 1

            function = (lambda x: x[last: index])
            states_batch = util.fmap(function=function, xs=states, depth=1)
            internals_batch = util.fmap(function=function, xs=internals, depth=1)
            auxiliaries_batch = util.fmap(function=function, xs=auxiliaries, depth=1)
            actions_batch = util.fmap(function=function, xs=actions, depth=1)
            terminal_batch = terminal[last: index]
            reward_batch = reward[last: index]
            last = index

            # Model.experience()
            if query is None:
                self.timesteps, self.episodes, self.updates = self.model.experience(
                    states=states_batch, internals=internals_batch,
                    auxiliaries=auxiliaries_batch, actions=actions_batch, terminal=terminal_batch,
                    reward=reward_batch, **kwargs
                )

            else:
                self.timesteps, self.episodes, self.updates, queried = self.model.experience(
                    states=states_batch, internals=internals_batch,
                    auxiliaries=auxiliaries_batch, actions=actions_batch, terminal=terminal_batch,