How to use the tensorforce.util.rank function in Tensorforce

To help you get started, we’ve selected a few Tensorforce examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github tensorforce / tensorforce / tensorforce / core / models / q_model.py View on Github external
def tf_q_delta(self, q_value, next_q_value, terminal, reward):
        """
        Creates the deltas (or advantage) of the Q values.

        :return: A list of deltas per action
        """
        for _ in range(util.rank(x=q_value) - 1):
            terminal = tf.expand_dims(input=terminal, axis=1)
            reward = tf.expand_dims(input=reward, axis=1)

        multiples = (1,) + util.shape(q_value)[1:]
        terminal = tf.tile(input=terminal, multiples=multiples)
        reward = tf.tile(input=reward, multiples=multiples)

        zeros = tf.zeros_like(tensor=next_q_value)
        discount = self.discount.value()
        next_q_value = tf.where(condition=terminal, x=zeros, y=(discount * next_q_value))

        return reward + next_q_value - q_value  # tf.stop_gradient(q_target)
github tensorforce / tensorforce / tensorforce / core / preprocessors / sequence.py View on Github external
def first_run():
            fill_buffer = (self.length,) + tuple(1 for _ in range(util.rank(tensor) - 1))
            return tf.assign(ref=states_buffer, value=tf.tile(input=tensor, multiples=fill_buffer))
github tensorforce / tensorforce / tensorforce / core / networks / layer.py View on Github external
def tf_apply(self, x, update, state):
        if util.rank(x) != 2:
            raise TensorForceError(
                'Invalid input rank for internal lstm layer: {}, must be 2.'.format(util.rank(x))
            )

        state = tf.contrib.rnn.LSTMStateTuple(c=state[:, 0, :], h=state[:, 1, :])

        self.lstm_cell = tf.contrib.rnn.LSTMCell(num_units=self.size, **self.lstmcell_args)

        if self.dropout is not None:
            keep_prob = tf.cond(pred=update, true_fn=(lambda: 1.0 - self.dropout), false_fn=(lambda: 1.0))
            self.lstm_cell = tf.contrib.rnn.DropoutWrapper(cell=self.lstm_cell, output_keep_prob=keep_prob)

        x, state = self.lstm_cell(inputs=x, state=state)

        state = tf.stack(values=(state.c, state.h), axis=1)
github tensorforce / tensorforce / tensorforce / core / preprocessors / standardize.py View on Github external
def tf_process(self, tensor):
        if self.across_batch:
            axes = tuple(range(util.rank(tensor)))
        else:
            axes = tuple(range(1, util.rank(tensor)))

        mean, variance = tf.nn.moments(x=tensor, axes=axes, keep_dims=True)
        return (tensor - mean) / tf.maximum(x=tf.sqrt(variance), y=util.epsilon)
github tensorforce / tensorforce / tensorforce / core / networks / layers.py View on Github external
def conv1d(x, size, window=3, stride=1, padding='SAME', bias=False, activation='relu',
           l2_regularization=0.0, scope='conv1d', summary_level=0):
    """A 1d convolutional layer.
    Args:
        x: Input tensor. Must be rank 3
        size: Neurons
        window: Filter window size
        stride: Filter stride
        padding: One of [VALID, SAME]
        bias: Bool, indicates whether bias is used
        activation: Non-linearity type, defaults to relu
        l2_regularization: L2-regularisation value
    Returns:
    """
    input_rank = util.rank(x)
    if input_rank != 3:
        raise TensorForceError('Invalid input rank for conv1d layer: {}, must be 3'.format(input_rank))

    with tf.variable_scope(scope):
        filters_shape = (window, x.shape[2].value, size)
        stddev = min(0.1, sqrt(2.0 / size))
        filters_init = tf.random_normal_initializer(mean=0.0, stddev=stddev, dtype=tf.float32)
        filters = tf.get_variable(name='W', shape=filters_shape, dtype=tf.float32, initializer=filters_init)

        if l2_regularization > 0.0:
            tf.losses.add_loss(l2_regularization * tf.nn.l2_loss(t=filters))

        x = tf.nn.conv1d(value=x, filters=filters, stride=stride, padding=padding)

        if bias:
            bias_shape = (size,)
github tensorforce / tensorforce / tensorforce / core / layers / layer.py View on Github external
def body(indices, remaining, current_x, current_aggregates):
                current_x = tf.gather(params=x, indices=indices)
                next_x, next_aggregates = self.iterative_step(
                    x=current_x, previous=current_aggregates
                )
                with tf.control_dependencies(control_inputs=(current_x, next_x)):
                    is_finished = tf.math.equal(x=remaining, y=zeros)
                    if isinstance(next_aggregates, dict):
                        for name, current_aggregate, next_aggregate in util.zip_items(
                            current_aggregates, next_aggregates
                        ):
                            condition = is_finished
                            for _ in range(util.rank(x=current_aggregate) - 1):
                                condition = tf.expand_dims(input=condition, axis=1)
                            next_aggregates[name] = tf.where(
                                condition=condition, x=current_aggregate, y=next_aggregate
                            )
                    else:
                        condition = is_finished
                        for _ in range(util.rank(x=current_aggregates) - 1):
                            condition = tf.expand_dims(input=condition, axis=1)
                        next_aggregates = tf.where(
                            condition=condition, x=current_aggregates, y=next_aggregates
                        )
                    remaining -= tf.where(condition=is_finished, x=zeros, y=ones)
                    indices += tf.where(
                        condition=tf.math.equal(x=remaining, y=zeros), x=zeros, y=ones
                    )
                return indices, remaining, next_x, next_aggregates
github tensorforce / tensorforce / tensorforce / core / models / q_nstep_model.py View on Github external
def tf_q_delta(self, q_value, next_q_value, terminal, reward):
        for _ in range(util.rank(x=q_value) - 1):
            terminal = tf.expand_dims(input=terminal, axis=1)
            reward = tf.expand_dims(input=reward, axis=1)

        multiples = (1,) + util.shape(x=q_value)[1:]
        terminal = tf.tile(input=terminal, multiples=multiples)
        reward = tf.tile(input=reward, multiples=multiples)

        reward = self.discounted_cumulative_reward(
            terminal=terminal, reward=reward, final_reward=next_q_value[-1]
        )

        return reward - q_value
github tensorforce / tensorforce / tensorforce / core / networks / layers.py View on Github external
"""
    Linear layer.

    Args:
        x: Input tensor. Must be rank 2
        size: Neurons in layer
        weights: None for random matrix, otherwise given float or array is used.
        bias: Bool to indicate whether bias is used, otherwise given float or array is used.
        l2_regularization: L2-regularisation value
        weights: Weights for layer. If none, initialisation defaults to Xavier (normal with
        size/shape dependent standard deviation).

    Returns:

    """
    input_rank = util.rank(x)
    if input_rank != 2:
        raise TensorForceError('Invalid input rank for linear layer: {},'
                               ' must be 2.'.format(input_rank))

    with tf.variable_scope(scope):
        weights_shape = (x.shape[1].value, size)

        if weights is None:
            stddev = min(0.1, sqrt(2.0 / (x.shape[1].value + size)))
            weights_init = tf.random_normal_initializer(mean=0.0, stddev=stddev, dtype=tf.float32)

        elif isinstance(weights, float):
            if weights == 0.0:
                weights_init = tf.zeros_initializer(dtype=tf.float32)
            else:
                weights_init = tf.constant_initializer(value=weights, dtype=tf.float32)
github tensorforce / tensorforce / tensorforce / models / model.py View on Github external
if action_spec['type'] == 'bool':
            action = tf.where(
                condition=(tf.random_uniform(shape=action_shape[0]) < exploration_value),
                x=(tf.random_uniform(shape=action_shape) < 0.5),
                y=action
            )

        elif action_spec['type'] == 'int':
            action = tf.where(
                condition=(tf.random_uniform(shape=action_shape) < exploration_value),
                x=tf.random_uniform(shape=action_shape, maxval=action_spec['num_actions'], dtype=util.tf_dtype('int')),
                y=action
            )

        elif action_spec['type'] == 'float':
            for _ in range(util.rank(action) - 1):
                exploration_value = tf.expand_dims(input=exploration_value, axis=-1)
            action += exploration_value
            if 'min_value' in action_spec:
                action = tf.clip_by_value(
                    t=action,
                    clip_value_min=action_spec['min_value'],
                    clip_value_max=action_spec['max_value']
                )

        return action