How to use the tensorforce.core.Module function in Tensorforce

To help you get started, we’ve selected a few Tensorforce examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github tensorforce / tensorforce / tensorforce / core / layers / layer.py View on Github external
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

from collections import OrderedDict

import tensorflow as tf

from tensorforce import TensorforceError, util
import tensorforce.core
from tensorforce.core import Module, parameter_modules
from tensorforce.core.parameters import Parameter


class Layer(Module):
    """
    Base class for neural network layers.

    Args:
        name (string): Layer name
            (<span style="color:#00C000"><b>default</b></span>: internally chosen).
        input_spec (specification): Input tensor specification
            (<span style="color:#00C000"><b>internal use</b></span>).
        summary_labels ('all' | iter[string]): Labels of summaries to record
            (<span style="color:#00C000"><b>default</b></span>: inherit value of parent module).
        l2_regularization (float &gt;= 0.0): Scalar controlling L2 regularization
            (<span style="color:#00C000"><b>default</b></span>: inherit value of parent module).
    """

    layers = None
github tensorforce / tensorforce / tensorforce / core / layers / normalization.py View on Github external
read_value=False
                )

            with tf.control_dependencies(control_inputs=(assignment,)):
                variance = self.moving_variance.assign(value=variance)
                mean = self.moving_mean.assign(value=mean)

            return mean, variance

        optimization = Module.retrieve_tensor(name='optimization')
        update_on_optimization = tf.where(
            condition=self.after_first_call, x=self.update_on_optimization, y=optimization
        )
        update_on_optimization = self.update_on_optimization.assign(value=update_on_optimization)
        skip_update = tf.math.logical_or(
            x=Module.retrieve_tensor(name='independent'),
            y=tf.math.not_equal(x=update_on_optimization, y=optimization)
        )

        mean, variance = self.cond(pred=skip_update, true_fn=no_update, false_fn=apply_update)

        epsilon = tf.constant(value=util.epsilon, dtype=util.tf_dtype(dtype='float'))
        reciprocal_stddev = tf.math.rsqrt(x=tf.maximum(x=variance, y=epsilon))

        x = (x - tf.stop_gradient(input=mean)) * tf.stop_gradient(input=reciprocal_stddev)

        return x
github tensorforce / tensorforce / tensorforce / core / models / distribution_model.py View on Github external
def tf_core_act(self, states, internals):
        embedding, internals = self.network.apply(
            x=states, internals=internals, return_internals=True
        )

        actions = OrderedDict()
        for name, distribution in self.distributions.items():
            parameters = distribution.parametrize(x=embedding)
            deterministic = Module.retrieve_tensor(name='deterministic')
            deterministic = tf.logical_or(
                x=deterministic,
                y=tf.constant(value=self.requires_deterministic, dtype=util.tf_dtype(dtype='bool'))
            )
            action = distribution.sample(parameters=parameters, deterministic=deterministic)

            entropy = distribution.entropy(parameters=parameters)
            collapsed_size = util.product(xs=util.shape(entropy)[1:])
            entropy = tf.reshape(tensor=entropy, shape=(-1, collapsed_size))
            entropy = tf.reduce_mean(input_tensor=entropy, axis=1)
            actions[name] = self.add_summary(
                label='entropy', name=(name + '-entropy'), tensor=entropy, pass_tensors=action
            )

        return actions, internals
github tensorforce / tensorforce / tensorforce / core / layers / layer.py View on Github external
def tf_apply(self, x, initial=None):
        zero = tf.constant(value=0, dtype=util.tf_dtype(dtype='long'))
        one = tf.constant(value=1, dtype=util.tf_dtype(dtype='long'))
        dependency_starts = Module.retrieve_tensor(name='dependency_starts')
        dependency_lengths = Module.retrieve_tensor(name='dependency_lengths')
        if util.tf_dtype(dtype='long') in (tf.int32, tf.int64):
            batch_size = tf.shape(input=dependency_starts, out_type=util.tf_dtype(dtype='long'))[0]
        else:
            batch_size = tf.dtypes.cast(
                x=tf.shape(input=dependency_starts)[0], dtype=util.tf_dtype(dtype='long')
            )
        zeros = tf.zeros(shape=(batch_size,), dtype=util.tf_dtype(dtype='long'))
        ones = tf.ones(shape=(batch_size,), dtype=util.tf_dtype(dtype='long'))
        # maximum_iterations = tf.math.reduce_max(input_tensor=lengths, axis=0)
        horizon = self.dependency_horizon.value() + one  # including 0th step
        starts = dependency_starts + tf.maximum(x=(dependency_lengths - horizon), y=zeros)
        lengths = dependency_lengths - tf.maximum(x=(dependency_lengths - horizon), y=zeros)
        horizon = tf.minimum(x=horizon, y=tf.math.reduce_max(input_tensor=lengths, axis=0))

        if self.processing == 'cumulative':
github tensorforce / tensorforce / tensorforce / core / objectives / objective.py View on Github external
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

from collections import OrderedDict

from tensorforce.core import Module


class Objective(Module):
    """
    Base class for optimization objectives.

    Args:
        name (string): Module name
            (<span style="color:#0000C0"><b>internal use</b></span>).
        summary_labels ('all' | iter[string]): Labels of summaries to record
            (<span style="color:#00C000"><b>default</b></span>: inherit value of parent module).
    """

    def __init__(self, name, summary_labels=None):
        super().__init__(name=name, summary_labels=summary_labels)

    def tf_loss_per_instance(self, policy, states, internals, auxiliaries, actions, reward):
        raise NotImplementedError
github tensorforce / tensorforce / tensorforce / core / distributions / categorical.py View on Github external
states_value = self.value.apply(x=x)
            if len(self.embedding_shape) == 1:
                states_value = tf.reshape(tensor=states_value, shape=value_shape)
            action_values = states_value + action_values - tf.math.reduce_mean(
                input_tensor=action_values, axis=-1, keepdims=True
            )
            states_value = tf.squeeze(input=states_value, axis=-1)
            action_values = tf.where(condition=mask, x=action_values, y=min_float)

        # Softmax for corresponding probabilities
        probabilities = tf.nn.softmax(logits=action_values, axis=-1)

        # "Normalized" logits
        logits = tf.math.log(x=tf.maximum(x=probabilities, y=epsilon))

        Module.update_tensor(name=(self.name + '-probabilities'), tensor=probabilities)

        return logits, probabilities, states_value, action_values
github tensorforce / tensorforce / tensorforce / core / distributions / gaussian.py View on Github external
# Log standard deviation
        log_stddev = self.log_stddev.apply(x=x)
        if len(self.embedding_shape) == 1:
            log_stddev = tf.reshape(tensor=log_stddev, shape=shape)

        # Clip log_stddev for numerical stability
        # epsilon &lt; 1.0, hence negative
        log_stddev = tf.clip_by_value(
            t=log_stddev, clip_value_min=log_epsilon, clip_value_max=-log_epsilon
        )

        # Standard deviation
        stddev = tf.exp(x=log_stddev)

        Module.update_tensor(name=(self.name + '-mean'), tensor=mean)
        Module.update_tensor(name=(self.name + '-stddev'), tensor=stddev)

        return mean, stddev, log_stddev
github tensorforce / tensorforce / tensorforce / core / distributions / bernoulli.py View on Github external
# States value
        states_value = logit

        # Sigmoid for corresponding probability
        probability = tf.sigmoid(x=logit)

        # Clip probability for numerical stability
        probability = tf.clip_by_value(
            t=probability, clip_value_min=epsilon, clip_value_max=(one - epsilon)
        )

        # "Normalized" logits
        true_logit = tf.math.log(x=probability)
        false_logit = tf.math.log(x=(one - probability))

        Module.update_tensor(name=(self.name + '-probability'), tensor=probability)

        return true_logit, false_logit, probability, states_value
github tensorforce / tensorforce / tensorforce / core / models / memory_model.py View on Github external
"""
        stored = self.memory.store(
            states=states, internals=internals, actions=actions, terminal=terminal, reward=reward
        )

        # Periodic optimization
        with tf.control_dependencies(control_inputs=(stored,)):
            zero = tf.constant(value=0, dtype=util.tf_dtype(dtype='long'))
            batch_size = self.update_batch_size.value()
            frequency = self.update_frequency.value()
            start = self.update_start.value()
            start = tf.maximum(x=start, y=batch_size)

            if self.update_unit == 'timesteps':
                # Timestep-based batch
                timestep = Module.retrieve_tensor(name='timestep')
                is_frequency = tf.math.equal(x=tf.mod(x=timestep, y=frequency), y=zero)
                at_least_start = tf.math.greater_equal(x=timestep, y=start)

            elif self.update_unit == 'sequences':
                # Timestep-sequence-based batch
                timestep = Module.retrieve_tensor(name='timestep')
                sequence_length = self.update_sequence_length.value()
                is_frequency = tf.math.equal(x=tf.mod(x=timestep, y=frequency), y=zero)
                at_least_start = tf.math.greater_equal(x=timestep, y=(start + sequence_length - 1))

            elif self.update_unit == 'episodes':
                # Episode-based batch
                episode = Module.retrieve_tensor(name='episode')
                is_frequency = tf.math.equal(x=tf.mod(x=episode, y=frequency), y=zero)
                # Only update once per episode increment
                is_frequency = tf.math.logical_and(x=is_frequency, y=terminal[-1])
github tensorforce / tensorforce / tensorforce / core / distributions / beta.py View on Github external
# Beta
        beta = self.beta.apply(x=x)
        # epsilon &lt; 1.0, hence negative
        beta = tf.clip_by_value(t=beta, clip_value_min=log_epsilon, clip_value_max=-log_epsilon)
        beta = tf.math.softplus(features=beta) + one
        if len(self.embedding_shape) == 1:
            beta = tf.reshape(tensor=beta, shape=shape)

        # Alpha + Beta
        alpha_beta = tf.maximum(x=(alpha + beta), y=epsilon)

        # Log norm
        log_norm = tf.math.lgamma(x=alpha) + tf.math.lgamma(x=beta) - tf.math.lgamma(x=alpha_beta)

        Module.update_tensor(name=(self.name + '-alpha'), tensor=alpha)
        Module.update_tensor(name=(self.name + '-beta'), tensor=beta)

        return alpha, beta, alpha_beta, log_norm