How to use the mathy.state.observations_to_window function in mathy

To help you get started, we’ve selected a few mathy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github justindujardin / mathy / libraries / website / docs / snippets / ml / embeddings_inference.py View on Github external
from mathy import envs
from mathy.agents.base_config import BaseConfig
from mathy.agents.embedding import MathyEmbedding
from mathy.env import MathyEnv
from mathy.state import MathyObservation, observations_to_window


args = BaseConfig()
env: MathyEnv = envs.PolySimplify()
observation: MathyObservation = env.state_to_observation(
    env.get_initial_state()[0], rnn_size=args.lstm_units
)
model = MathyEmbedding(args)
# output shape is: [num_observations, max_nodes_len, embedding_dimensions]
inputs = observations_to_window([observation, observation]).to_inputs()
embeddings = model(inputs)

# We provided two observations in a sequence
assert embeddings.shape[0] == 2
# There are as many outputs as input sequences
assert embeddings.shape[1] == len(observation.nodes)
# Outputs vectors with the provided embedding units
assert embeddings.shape[-1] == args.embedding_units
github justindujardin / mathy / libraries / website / docs / snippets / ml / thinc_model.py View on Github external
from mathy.env import MathyEnv
from mathy.state import MathyObservation, observations_to_window
from thinc.api import TensorFlowWrapper, keras_subclass
from thinc.layers import Linear, ReLu, Softmax, chain, with_list
from thinc.model import Model
from thinc.shims.tensorflow import TensorFlowShim
from thinc.types import Array, Array1d, Array2d, ArrayNd

args = BaseConfig()
env: MathyEnv = envs.PolySimplify()
observation: MathyObservation = env.state_to_observation(
    env.get_initial_state()[0], rnn_size=args.lstm_units
)

# output shape is: [num_observations, max_nodes_len, embedding_dimensions]
window = observations_to_window([observation, observation])
inputs = window.to_inputs()
input_shape = window.to_input_shapes()


@keras_subclass(
    "MathyEmbedding",
    X=window.to_inputs(),
    Y=window.mask,
    input_shape=input_shape,
    args={"config": args},
)
class ThincEmbeddings(MathyEmbedding):
    pass


embeddings = TensorFlowWrapper(ThincEmbeddings(args))
github justindujardin / mathy / libraries / website / docs / snippets / ml / embeddings_rnn_state.py View on Github external
import numpy as np

from mathy import envs
from mathy.agents.base_config import BaseConfig
from mathy.agents.embedding import MathyEmbedding
from mathy.env import MathyEnv
from mathy.state import MathyObservation, observations_to_window

args = BaseConfig()
env: MathyEnv = envs.PolySimplify()
observation: MathyObservation = env.state_to_observation(
    env.get_initial_state()[0], rnn_size=args.lstm_units
)
model = MathyEmbedding(args)
inputs = observations_to_window([observation]).to_inputs()

# Expect that the RNN states are zero to begin
assert np.count_nonzero(model.state_h.numpy()) == 0
assert np.count_nonzero(model.state_c.numpy()) == 0

embeddings = model.call(inputs)

# Expect that the RNN states are non-zero
assert np.count_nonzero(model.state_h.numpy()) > 0
assert np.count_nonzero(model.state_c.numpy()) > 0

# You can reset them
model.reset_rnn_state()

# Expect that the RNN states are zero again
assert np.count_nonzero(model.state_h.numpy()) == 0
github justindujardin / mathy / mathy / agents / r2d2 / learner.py View on Github external
for frame in window:
                states.append(frame.state)
                rnn_states[0].append(frame.rnn_state[0][-1])
                rnn_states[1].append(frame.rnn_state[1][-1])
                discounted_rewards.append(frame.discounted)
            action_labels.append(window[-1].action)
            rnn_states = [
                tf.convert_to_tensor(rnn_states[0], dtype=tf.float32),
                tf.convert_to_tensor(rnn_states[1], dtype=tf.float32),
            ]
            discounted_rewards = tf.convert_to_tensor(
                discounted_rewards, dtype=tf.float32
            )
            action_labels = tf.convert_to_tensor(action_labels)

            obs_windows.append(observations_to_window(states))
            batch_rnn_states.append(rnn_states)
            batch_discounted_rewards.append(discounted_rewards)
            batch_action_labels.append(action_labels)
        batch_observations: MathyBatchObservation = windows_to_batch(obs_windows)
        return self.update_loss(
            batch_frames,
            batch_observations,
            batch_rnn_states,
            batch_discounted_rewards,
            batch_action_labels,
        )
github justindujardin / mathy / libraries / website / docs / snippets / ml / policy_value_basic.py View on Github external
import tensorflow as tf

from mathy import envs
from mathy.agents.base_config import BaseConfig
from mathy.agents.policy_value_model import PolicyValueModel
from mathy.env import MathyEnv
from mathy.state import MathyObservation, observations_to_window

args = BaseConfig()
env: MathyEnv = envs.PolySimplify()
observation: MathyObservation = env.state_to_observation(env.get_initial_state()[0])
model = PolicyValueModel(args, predictions=env.action_size)
inputs = observations_to_window([observation]).to_inputs()
# predict_next only returns a policy for the last observation
# in the sequence, and applies masking and softmax to the output
policy, value = model.predict_next(inputs)

# The policy is a 1D array of size (actions * num_nodes)
assert policy.shape.rank == 1
assert policy.shape == (env.action_size * len(observation.nodes),)

# There should be one floating point output Value
assert value.shape.rank == 0
assert isinstance(float(value.numpy()), float)
github justindujardin / mathy / mathy / agents / r2d2 / learner.py View on Github external
rewards: List[float] = []
        if self.experience.is_full() is False:
            return output, rewards
        windows: List[MathyWindowObservation] = []
        for i in range(max_samples):
            frames = self.experience.sample_rp_sequence()
            # 4 frames
            states = [frame.state for frame in frames[:-1]]
            target_reward = frames[-1].reward
            if math.isclose(target_reward, GameRewards.TIMESTEP):
                sample_label = 0  # zero
            elif target_reward > 0:
                sample_label = 1  # positive
            else:
                sample_label = 2  # negative
            windows.append(observations_to_window(states))
            rewards.append(sample_label)
        return windows_to_batch(windows), rewards
github justindujardin / mathy / libraries / website / docs / snippets / ml / policy_value_inference.py View on Github external
selector = A3CGreedyActionSelector(
    model=get_or_create_policy_model(args=args, env_actions=env.action_size,),
    episode=0,
    worker_id=0,
)

# Set RNN to 0 state for start of episode
selector.model.embedding.reset_rnn_state()

# Start with the "init" sequence [n] times
for i in range(args.num_thinking_steps_begin + 1):
    rnn_state_h = selector.model.embedding.state_h.numpy()
    rnn_state_c = selector.model.embedding.state_c.numpy()
    seq_start = env_state.to_start_observation([rnn_state_h, rnn_state_c])
    selector.model.call(observations_to_window([seq_start]).to_inputs())

done = False
while not done:
    # store rnn state for replay training
    rnn_state_h = selector.model.embedding.state_h.numpy()
    rnn_state_c = selector.model.embedding.state_c.numpy()
    last_rnn_state = [rnn_state_h, rnn_state_c]

    # named tuples are read-only, so add rnn state to a new copy
    last_observation = MathyObservation(
        nodes=last_observation.nodes,
        mask=last_observation.mask,
        values=last_observation.values,
        type=last_observation.type,
        time=last_observation.time,
        rnn_state=last_rnn_state,