How to use the tensorforce.util.shape function in Tensorforce

To help you get started, we’ve selected a few Tensorforce examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github tensorforce / tensorforce / tensorforce / core / optimizers / distributed_optimizer.py View on Github external
def tf_step(self, time, variables, global_variables, **kwargs):
        assert all(util.shape(global_var) == util.shape(local_var) for global_var, local_var in zip(global_variables, variables))

        local_diffs = self.optimizer.fn_step(time=time, variables=variables, **kwargs)

        with tf.control_dependencies(control_inputs=local_diffs):
            applied = self.optimizer.apply_step(variables=global_variables, diffs=local_diffs)

        with tf.control_dependencies(control_inputs=(applied,)):
            update_diffs = list()
            for global_var, local_var in zip(global_variables, variables):
                diff = global_var - local_var
                update_diffs.append(diff)

            applied = self.apply_step(variables=variables, diffs=update_diffs)

            # TODO: Update time, episode, etc (like in Synchronization)?
github tensorforce / tensorforce / tensorforce / core / module.py View on Github external
# Scope handling
        if Module.scope_stack is not None:
            for scope in reversed(Module.scope_stack[1:]):
                scope.__exit__(None, None, None)
            if len(Module.global_scope) > 0:
                temp_scope = tf.name_scope(name='/'.join(Module.global_scope))
                temp_scope.__enter__()
            tensors = util.fmap(function=util.identity_operation, xs=tensors)

        # TensorFlow summaries
        assert Module.global_summary_step is not None
        step = Module.retrieve_tensor(name=Module.global_summary_step)
        summaries = list()
        for name, tensor in tensors.items():
            shape = util.shape(x=tensor)
            if shape == ():
                summaries.append(tf.summary.scalar(name=name, data=tensor, step=step))
            elif shape == (-1,):
                tensor = tf.math.reduce_sum(input_tensor=tensor, axis=0)
                summaries.append(tf.summary.scalar(name=name, data=tensor, step=step))
            elif shape == (1,):
                tensor = tf.squeeze(input=tensor, axis=-1)
                summaries.append(tf.summary.scalar(name=name, data=tensor, step=step))
            elif shape == (-1, 1):
                tensor = tf.math.reduce_sum(input_tensor=tf.squeeze(input=tensor, axis=-1), axis=0)
                summaries.append(tf.summary.scalar(name=name, data=tensor, step=step))
            else:
                # General tensor as histogram
                assert not util.is_iterable(x=label) and label.endswith('-histogram')
                summaries.append(tf.summary.histogram(name=name, data=tensor, step=step))
github tensorforce / tensorforce / tensorforce / core / models / pg_prob_ratio_model.py View on Github external
def tf_reference(
        self, states, internals, actions, terminal, reward, next_states, next_internals
    ):
        embedding = self.network.apply(x=states, internals=internals)

        log_probs = list()
        for name, distribution, action in util.zip_items(self.distributions, actions):
            parameters = distribution.parametrize(x=embedding)
            log_prob = distribution.log_probability(parameters=parameters, action=action)
            collapsed_size = util.product(xs=util.shape(log_prob)[1:])
            log_prob = tf.reshape(tensor=log_prob, shape=(-1, collapsed_size))
            log_probs.append(log_prob)

        log_probs = tf.concat(values=log_probs, axis=1)
        return tf.stop_gradient(input=log_probs)
github tensorforce / tensorforce / tensorforce / core / module.py View on Github external
else:
            tf_dtype = util.tf_dtype(dtype=dtype)

            # Variable initializer
            if isinstance(initializer, util.py_dtype(dtype=dtype)):
                initializer = tf.constant(value=initializer, dtype=tf_dtype, shape=shape)
            elif isinstance(initializer, np.ndarray):
                if initializer.shape != shape:
                    raise TensorforceError(
                        "Invalid variable initializer shape: {}.".format(initializer.shape)
                    )
                initializer = tf.constant(value=initializer, dtype=tf_dtype)
            elif isinstance(initializer, tf.Tensor):
                if util.shape(x=initializer) != shape:
                    raise TensorforceError(
                        "Invalid variable initializer shape: {}.".format(util.shape(x=initializer))
                    )
                initializer = initializer
            elif not isinstance(initializer, str):
                raise TensorforceError("Invalid variable initializer: {}".format(initializer))
            elif initializer[:6] == 'normal':
                if dtype != 'float':
                    raise TensorforceError(
                        message="Invalid variable initializer value for non-float variable: {}.".format(
                            initializer
                        )
                    )
                if initializer[6:] == '-relu':
                    stddev = min(0.1, sqrt(2.0 / util.product(xs=shape[:-1])))
                else:
                    stddev = min(0.1, sqrt(2.0 / (util.product(xs=shape[:-1]) + shape[-1])))
                initializer = tf.random.normal(shape=shape, stddev=stddev, dtype=tf_dtype)
github tensorforce / tensorforce / tensorforce / models / q_model.py View on Github external
def tf_q_delta(self, q_value, next_q_value, terminal, reward):
        """
        Creates the deltas (or advantage) of the Q values.

        :return: A list of deltas per action
        """
        for _ in range(util.rank(q_value) - 1):
            terminal = tf.expand_dims(input=terminal, axis=1)
            reward = tf.expand_dims(input=reward, axis=1)

        multiples = (1,) + util.shape(q_value)[1:]
        terminal = tf.tile(input=terminal, multiples=multiples)
        reward = tf.tile(input=reward, multiples=multiples)

        zeros = tf.zeros_like(tensor=next_q_value)
        next_q_value = tf.where(condition=terminal, x=zeros, y=(self.discount * next_q_value))

        return reward + next_q_value - q_value  # tf.stop_gradient(q_target)
github tensorforce / tensorforce / tensorforce / core / layers / pooling.py View on Github external
def tf_apply(self, x):
        if self.reduction == 'concat':
            return tf.reshape(tensor=x, shape=(-1, util.product(xs=util.shape(x)[1:])))

        elif self.reduction == 'max':
            for _ in range(util.rank(x=x) - 2):
                x = tf.reduce_max(input_tensor=x, axis=1)
            return x

        elif self.reduction == 'mean':
            for _ in range(util.rank(x=x) - 2):
                x = tf.reduce_mean(input_tensor=x, axis=1)
            return x

        elif self.reduction == 'product':
            for _ in range(util.rank(x=x) - 2):
                x = tf.reduce_prod(input_tensor=x, axis=1)
            return x
github tensorforce / tensorforce / tensorforce / core / models / q_naf_model.py View on Github external
parameters = distribution.parametrize(x=embedding)
            target_parameters = target_distribution.parametrize(x=target_embedding)

            q_value = self.tf_q_value(
                embedding=embedding, parameters=parameters, action=actions[name], name=name
            )

            # Notice, this is V', not Q' because NAF outputs V(s) separately
            next_state_value = target_distribution.states_value(parameters=target_parameters)

            delta = self.tf_q_delta(
                q_value=q_value, next_q_value=next_state_value, terminal=terminal, reward=reward
            )

            collapsed_size = util.product(xs=util.shape(delta)[1:])
            delta = tf.reshape(tensor=delta, shape=(-1, collapsed_size))

            deltas.append(delta)

        # Surrogate loss as the mean squared error between actual observed rewards and expected rewards
        loss_per_instance = tf.reduce_mean(input_tensor=tf.concat(values=deltas, axis=1), axis=1)

        # Optional Huber loss
        huber_loss = self.huber_loss.value()

        def no_huber_loss():
            return tf.square(x=loss_per_instance)

        def apply_huber_loss():
            return tf.where(
                condition=(tf.abs(x=loss_per_instance) <= huber_loss),
github tensorforce / tensorforce / tensorforce / core / models / distribution_model.py View on Github external
def tf_regularize(self, states, internals):
        regularization_loss = super().tf_regularize(states=states, internals=internals)

        entropies = list()
        embedding = self.network.apply(x=states, internals=internals)
        for name, distribution in self.distributions.items():
            parameters = distribution.parametrize(x=embedding)
            entropy = distribution.entropy(parameters=parameters)
            collapsed_size = util.product(xs=util.shape(entropy)[1:])
            entropy = tf.reshape(tensor=entropy, shape=(-1, collapsed_size))
            entropies.append(entropy)

        entropies = tf.concat(values=entropies, axis=1)
        entropy_per_instance = tf.reduce_mean(input_tensor=entropies, axis=1)
        entropy = tf.reduce_mean(input_tensor=entropy_per_instance, axis=0)
        # entropy = self.add_summary(label='entropy', name='entropy', tensor=entropy)

        entropy_regularization = self.entropy_regularization.value()

        regularization_loss = regularization_loss - entropy_regularization * entropy

        # def no_entropy_reg():
        #     return regularization_loss

        # def apply_entropy_reg():
github tensorforce / tensorforce / tensorforce / models / ppo_model.py View on Github external
# Standard policy gradient log likelihood computation
                log_prob = distribution.log_probability(action=action)
                fixed_log_prob = fixed_distribution.log_probability(action=action)
                log_prob_diff = log_prob - fixed_log_prob
                prob_ratio = tf.exp(x=log_prob_diff)
                prob_ratio = tf.reshape(tensor=prob_ratio, shape=(-1, shape_size))
                prob_ratios.append(prob_ratio)

                entropy = distribution.entropy()
                entropy_penalty = -config.entropy_penalty * entropy
                entropy_penalty = tf.reshape(tensor=entropy_penalty, shape=(-1, shape_size))
                entropy_penalties.append(entropy_penalty)

                self.distribution_tensors[name] = list(distribution.get_tensors())
                prev_distribution = list(tf.placeholder(dtype=tf.float32, shape=util.shape(tensor, unknown=None)) for tensor in distribution.get_tensors())
                self.prev_distribution_tensors[name] = prev_distribution
                prev_distribution = distribution.from_tensors(tensors=prev_distribution, deterministic=self.deterministic)

                kl_divergence = prev_distribution.kl_divergence(other=distribution)
                kl_divergence = tf.reshape(tensor=kl_divergence, shape=(-1, shape_size))
                kl_divergences.append(kl_divergence)

                entropy = tf.reshape(tensor=entropy, shape=(-1, shape_size))
                entropies.append(entropy)

            # The surrogate loss in PPO is the minimum of clipped loss and
            # target advantage * prob_ratio, which is the CPO loss
            # Presentation on conservative policy iteration:
            # https://www.cs.cmu.edu/~jcl/presentation/RL/RL.ps
            prob_ratio = tf.reduce_mean(input_tensor=tf.concat(values=prob_ratios, axis=1), axis=1)
            clipped_prob_ratio = tf.clip_by_value(prob_ratio, 1.0 - config.loss_clipping, 1.0 + config.loss_clipping)