Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
from mathy import envs
from mathy.agents.base_config import BaseConfig
from mathy.agents.embedding import MathyEmbedding
from mathy.env import MathyEnv
from mathy.state import MathyObservation, observations_to_window
args = BaseConfig()
env: MathyEnv = envs.PolySimplify()
observation: MathyObservation = env.state_to_observation(
env.get_initial_state()[0], rnn_size=args.lstm_units
)
model = MathyEmbedding(args)
# output shape is: [num_observations, max_nodes_len, embedding_dimensions]
inputs = observations_to_window([observation, observation]).to_inputs()
embeddings = model(inputs)
# We provided two observations in a sequence
assert embeddings.shape[0] == 2
# There are as many outputs as input sequences
assert embeddings.shape[1] == len(observation.nodes)
# Outputs vectors with the provided embedding units
assert embeddings.shape[-1] == args.embedding_units
from mathy.env import MathyEnv
from mathy.state import MathyObservation, observations_to_window
from thinc.api import TensorFlowWrapper, keras_subclass
from thinc.layers import Linear, ReLu, Softmax, chain, with_list
from thinc.model import Model
from thinc.shims.tensorflow import TensorFlowShim
from thinc.types import Array, Array1d, Array2d, ArrayNd
args = BaseConfig()
env: MathyEnv = envs.PolySimplify()
observation: MathyObservation = env.state_to_observation(
env.get_initial_state()[0], rnn_size=args.lstm_units
)
# output shape is: [num_observations, max_nodes_len, embedding_dimensions]
window = observations_to_window([observation, observation])
inputs = window.to_inputs()
input_shape = window.to_input_shapes()
@keras_subclass(
"MathyEmbedding",
X=window.to_inputs(),
Y=window.mask,
input_shape=input_shape,
args={"config": args},
)
class ThincEmbeddings(MathyEmbedding):
pass
embeddings = TensorFlowWrapper(ThincEmbeddings(args))
import numpy as np
from mathy import envs
from mathy.agents.base_config import BaseConfig
from mathy.agents.embedding import MathyEmbedding
from mathy.env import MathyEnv
from mathy.state import MathyObservation, observations_to_window
args = BaseConfig()
env: MathyEnv = envs.PolySimplify()
observation: MathyObservation = env.state_to_observation(
env.get_initial_state()[0], rnn_size=args.lstm_units
)
model = MathyEmbedding(args)
inputs = observations_to_window([observation]).to_inputs()
# Expect that the RNN states are zero to begin
assert np.count_nonzero(model.state_h.numpy()) == 0
assert np.count_nonzero(model.state_c.numpy()) == 0
embeddings = model.call(inputs)
# Expect that the RNN states are non-zero
assert np.count_nonzero(model.state_h.numpy()) > 0
assert np.count_nonzero(model.state_c.numpy()) > 0
# You can reset them
model.reset_rnn_state()
# Expect that the RNN states are zero again
assert np.count_nonzero(model.state_h.numpy()) == 0
for frame in window:
states.append(frame.state)
rnn_states[0].append(frame.rnn_state[0][-1])
rnn_states[1].append(frame.rnn_state[1][-1])
discounted_rewards.append(frame.discounted)
action_labels.append(window[-1].action)
rnn_states = [
tf.convert_to_tensor(rnn_states[0], dtype=tf.float32),
tf.convert_to_tensor(rnn_states[1], dtype=tf.float32),
]
discounted_rewards = tf.convert_to_tensor(
discounted_rewards, dtype=tf.float32
)
action_labels = tf.convert_to_tensor(action_labels)
obs_windows.append(observations_to_window(states))
batch_rnn_states.append(rnn_states)
batch_discounted_rewards.append(discounted_rewards)
batch_action_labels.append(action_labels)
batch_observations: MathyBatchObservation = windows_to_batch(obs_windows)
return self.update_loss(
batch_frames,
batch_observations,
batch_rnn_states,
batch_discounted_rewards,
batch_action_labels,
)
import tensorflow as tf
from mathy import envs
from mathy.agents.base_config import BaseConfig
from mathy.agents.policy_value_model import PolicyValueModel
from mathy.env import MathyEnv
from mathy.state import MathyObservation, observations_to_window
args = BaseConfig()
env: MathyEnv = envs.PolySimplify()
observation: MathyObservation = env.state_to_observation(env.get_initial_state()[0])
model = PolicyValueModel(args, predictions=env.action_size)
inputs = observations_to_window([observation]).to_inputs()
# predict_next only returns a policy for the last observation
# in the sequence, and applies masking and softmax to the output
policy, value = model.predict_next(inputs)
# The policy is a 1D array of size (actions * num_nodes)
assert policy.shape.rank == 1
assert policy.shape == (env.action_size * len(observation.nodes),)
# There should be one floating point output Value
assert value.shape.rank == 0
assert isinstance(float(value.numpy()), float)
rewards: List[float] = []
if self.experience.is_full() is False:
return output, rewards
windows: List[MathyWindowObservation] = []
for i in range(max_samples):
frames = self.experience.sample_rp_sequence()
# 4 frames
states = [frame.state for frame in frames[:-1]]
target_reward = frames[-1].reward
if math.isclose(target_reward, GameRewards.TIMESTEP):
sample_label = 0 # zero
elif target_reward > 0:
sample_label = 1 # positive
else:
sample_label = 2 # negative
windows.append(observations_to_window(states))
rewards.append(sample_label)
return windows_to_batch(windows), rewards
selector = A3CGreedyActionSelector(
model=get_or_create_policy_model(args=args, env_actions=env.action_size,),
episode=0,
worker_id=0,
)
# Set RNN to 0 state for start of episode
selector.model.embedding.reset_rnn_state()
# Start with the "init" sequence [n] times
for i in range(args.num_thinking_steps_begin + 1):
rnn_state_h = selector.model.embedding.state_h.numpy()
rnn_state_c = selector.model.embedding.state_c.numpy()
seq_start = env_state.to_start_observation([rnn_state_h, rnn_state_c])
selector.model.call(observations_to_window([seq_start]).to_inputs())
done = False
while not done:
# store rnn state for replay training
rnn_state_h = selector.model.embedding.state_h.numpy()
rnn_state_c = selector.model.embedding.state_c.numpy()
last_rnn_state = [rnn_state_h, rnn_state_c]
# named tuples are read-only, so add rnn state to a new copy
last_observation = MathyObservation(
nodes=last_observation.nodes,
mask=last_observation.mask,
values=last_observation.values,
type=last_observation.type,
time=last_observation.time,
rnn_state=last_rnn_state,