How to use the tensorforce.util function in Tensorforce

To help you get started, we’ve selected a few Tensorforce examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github tensorforce / tensorforce / tensorforce / core / module.py View on Github external
variable = collection[0]

        else:
            tf_dtype = util.tf_dtype(dtype=dtype)

            # Variable initializer
            if isinstance(initializer, util.py_dtype(dtype=dtype)):
                initializer = tf.constant(value=initializer, dtype=tf_dtype, shape=shape)
            elif isinstance(initializer, np.ndarray):
                if initializer.shape != shape:
                    raise TensorforceError(
                        "Invalid variable initializer shape: {}.".format(initializer.shape)
                    )
                initializer = tf.constant(value=initializer, dtype=tf_dtype)
            elif isinstance(initializer, tf.Tensor):
                if util.shape(x=initializer) != shape:
                    raise TensorforceError(
                        "Invalid variable initializer shape: {}.".format(util.shape(x=initializer))
                    )
                initializer = initializer
            elif not isinstance(initializer, str):
                raise TensorforceError("Invalid variable initializer: {}".format(initializer))
            elif initializer[:6] == 'normal':
                if dtype != 'float':
                    raise TensorforceError(
                        message="Invalid variable initializer value for non-float variable: {}.".format(
                            initializer
                        )
                    )
                if initializer[6:] == '-relu':
                    stddev = min(0.1, sqrt(2.0 / util.product(xs=shape[:-1])))
                else:
github tensorforce / tensorforce / tensorforce / core / objectives / policy_gradient.py View on Github external
def tf_loss_per_instance(
        self, policy, states, internals, auxiliaries, actions, reward, reference=None
    ):
        assert self.ratio_based or reference is None

        log_probability = policy.log_probability(
            states=states, internals=internals, auxiliaries=auxiliaries, actions=actions,
            reduced=self.early_reduce
        )

        zero = tf.constant(value=0.0, dtype=util.tf_dtype(dtype='float'))
        one = tf.constant(value=1.0, dtype=util.tf_dtype(dtype='float'))

        clipping_value = self.clipping_value.value()

        if self.ratio_based:
            if reference is None:
                reference = log_probability
            scaling = tf.exp(x=(log_probability - tf.stop_gradient(input=reference)))
            min_value = one / (one + clipping_value)
            max_value = one + clipping_value

        else:
            scaling = log_probability
            min_value = -clipping_value
            max_value = log_probability + one
github tensorforce / tensorforce / tensorforce / core / models / pg_log_prob_model.py View on Github external
def tf_loss_per_instance(
        self, states, internals, actions, terminal, reward, next_states, next_internals,
        reference=None
    ):
        embedding = self.network.apply(x=states, internals=internals)

        log_probs = list()
        for name, distribution in self.distributions.items():
            parameters = distribution.parametrize(x=embedding)
            action = actions[name]
            log_prob = distribution.log_probability(parameters=parameters, action=action)
            collapsed_size = util.product(xs=util.shape(log_prob)[1:])
            log_prob = tf.reshape(tensor=log_prob, shape=(-1, collapsed_size))
            log_probs.append(log_prob)

        log_probs = tf.concat(values=log_probs, axis=1)
        log_prob_per_instance = tf.reduce_mean(input_tensor=log_probs, axis=1)
        return -log_prob_per_instance * reward
github tensorforce / tensorforce / tensorforce / core / estimators / estimator.py View on Github external
)

            # Overwrite buffer rewards
            with tf.control_dependencies(control_inputs=assertions):
                indices = tf.range(
                    start=self.buffer_index, limit=(self.buffer_index + num_overwritten)
                )
                indices = tf.math.mod(x=indices, y=capacity)
                indices = tf.expand_dims(input=indices, axis=1)

            assignment = self.buffers['reward'].scatter_nd_update(
                indices=indices, updates=discounted_sum
            )

            with tf.control_dependencies(control_inputs=(assignment,)):
                return util.no_operation()
github tensorforce / tensorforce / tensorforce / core / memories / replay.py View on Github external
def tf_retrieve_timesteps(self, n, past_padding, future_padding):
        one = tf.constant(value=1, dtype=util.tf_dtype(dtype='long'))
        capacity = tf.constant(value=self.capacity, dtype=util.tf_dtype(dtype='long'))

        # # Start index of oldest episode
        # oldest_episode_start = self.terminal_indices[0] + one + past_padding

        # # Number of timesteps (minus/plus one to prevent zero but allow capacity)
        # num_timesteps = self.buffer_index - oldest_episode_start - future_padding - one 
        # num_timesteps = tf.math.mod(x=num_timesteps, y=capacity) + one

        # Check whether memory contains enough timesteps
        num_timesteps = tf.minimum(x=self.buffer_index, y=capacity) - past_padding - future_padding
        assertion = tf.debugging.assert_less_equal(x=n, y=num_timesteps)

        # Randomly sampled timestep indices
        with tf.control_dependencies(control_inputs=(assertion,)):
            indices = tf.random.uniform(
                shape=(n,), maxval=num_timesteps, dtype=util.tf_dtype(dtype='long')
github tensorforce / tensorforce / tensorforce / core / models / q_naf_model.py View on Github external
# MemoryModel
            update_mode=update_mode, memory=memory, optimizer=optimizer, discount=discount,
            # DistributionModel
            network=network, distributions=distributions,
            entropy_regularization=entropy_regularization,
            # QModel
            target_sync_frequency=target_sync_frequency, target_update_weight=target_update_weight,
            double_q_model=double_q_model, huber_loss=huber_loss
        )

        self.state_values = OrderedDict()
        self.l_entries = OrderedDict()
        embedding_size = self.network.get_output_spec()['shape'][0]
        input_spec = dict(type='float', shape=(embedding_size,))
        for name, action_spec in self.actions_spec.items():
            action_size = util.product(xs=action_spec['shape'])
            self.state_values[name] = self.add_module(
                name=(name + '-state-value'), module='linear', modules=layer_modules,
                size=action_size, input_spec=input_spec
            )
            self.l_entries[name] = self.add_module(
                name=(name + '-l-entries'), module='linear', modules=layer_modules,
                size=action_size, input_spec=input_spec
            )
github tensorforce / tensorforce / tensorforce / core / parameters / parameter.py View on Github external
def __init__(self, name, dtype, shape=(), unit=None, summary_labels=None):
        super().__init__(name=name, summary_labels=summary_labels)

        assert unit in (None, 'timesteps', 'episodes', 'updates')

        spec = dict(type=dtype, shape=shape)
        spec = util.valid_value_spec(value_spec=spec, return_normalized=True)
        self.dtype = spec['type']
        self.shape = spec['shape']
        self.unit = unit

        Module.register_tensor(name=self.name, spec=spec, batched=False)
github tensorforce / tensorforce / tensorforce / core / layers / preprocessing.py View on Github external
def first_sequence():
            assignment = self.has_previous.assign(
                value=tf.constant(value=True, dtype=util.tf_dtype(dtype='bool')), read_value=False
            )
            with tf.control_dependencies(control_inputs=(assignment,)):
                if self.concatenate:
                    current = x
                else:
                    current = tf.expand_dims(input=x, axis=(self.axis + 1))
                multiples = tuple(
                    self.length if dims == self.axis + 1 else 1
                    for dims in range(util.rank(x=current))
                )
                return tf.tile(input=x, multiples=multiples)