Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def long_unittest(self, horizon):
agent, environment = self.prepare(
min_timesteps=3, reward_estimation=dict(horizon=horizon), memory=20
)
states = environment.reset()
actions = agent.act(states=states)
states, terminal, reward = environment.execute(actions=actions)
_, horizon_output1 = agent.observe(terminal=terminal, reward=reward, query='horizon')
self.assertIsInstance(horizon_output1, util.np_dtype(dtype='long'))
if not isinstance(horizon, dict) or horizon['type'] == 'constant':
actions = agent.act(states=states)
states, terminal, reward = environment.execute(actions=actions)
_, horizon_output2 = agent.observe(terminal=terminal, reward=reward, query='horizon')
self.assertEqual(horizon_output2, horizon_output1)
else:
actions = agent.act(states=states)
states, terminal, reward = environment.execute(actions=actions)
_, horizon_output2 = agent.observe(terminal=terminal, reward=reward, query='horizon')
self.assertNotEqual(horizon_output2, horizon_output1)
actions = agent.act(states=states)
_, terminal, reward = environment.execute(actions=actions)
horizon_input = 3
return (lambda action, name, states: (
(
(isinstance(action, util.np_dtype('int')) and shape == ()) or
(
isinstance(action, np.ndarray) and
action.dtype == util.np_dtype('int') and action.shape == shape
)
) and (0 <= action).all() and (action < num_values).all() and
np.take_along_axis(
states[name + '_mask'], indices=np.expand_dims(action, axis=-1), axis=-1
).all()
return (lambda action, name, states: (
(isinstance(action, util.np_dtype('bool')) and shape == ()) or
(
isinstance(action, np.ndarray) and
action.dtype == util.np_dtype('bool') and action.shape == shape
)
def __init__(self, name, capacity, values_spec, device=None, summary_labels=None):
# Terminal initialization has to agree with terminal_indices
terminal_initializer = np.zeros(shape=(capacity,), dtype=util.np_dtype(dtype='long'))
terminal_initializer[-1] = 1
initializers = OrderedDict(terminal=terminal_initializer)
super().__init__(
name=name, capacity=capacity, values_spec=values_spec, initializers=initializers,
device=device, summary_labels=summary_labels
)
def tf_initialize(self):
super().tf_initialize()
# Terminal indices
# (oldest episode terminals first, initially the only terminal is last index)
initializer = np.zeros(shape=(self.capacity + 1,), dtype=util.np_dtype(dtype='long'))
initializer[0] = self.capacity - 1
self.terminal_indices = self.add_variable(
name='terminal-indices', dtype='long', shape=(self.capacity + 1,), is_trainable=False,
initializer=initializer
)
# Episode count
self.episode_count = self.add_variable(
name='episode-count', dtype='long', shape=(), is_trainable=False, initializer='zeros'
)
raise TensorforceError.type(
name='variable', argument='is_trainable', value=is_trainable
)
elif is_trainable and dtype != 'float':
raise TensorforceError.unexpected()
# initializer
initializer_names = (
'normal', 'normal-relu', 'orthogonal', 'orthogonal-relu', 'zeros', 'ones'
)
if not isinstance(initializer, (util.py_dtype(dtype=dtype), np.ndarray, tf.Tensor)) and \
initializer not in initializer_names:
raise TensorforceError.value(
name='variable', argument='initializer', value=initializer
)
elif isinstance(initializer, np.ndarray) and \
initializer.dtype != util.np_dtype(dtype=dtype):
raise TensorforceError.type(
name='variable', argument='initializer', value=initializer
)
elif isinstance(initializer, tf.Tensor) and util.dtype(x=initializer) != dtype:
raise TensorforceError.type(
name='variable', argument='initializer', value=initializer
)
# is_saved
if not isinstance(is_saved, bool):
raise TensorforceError.type(name='variable', argument='is_saved', value=is_saved)
# summarize
if summarize is not None and not isinstance(summarize, bool):
raise TensorforceError.type(name='variable', argument='summarize', value=summarize)
# shared
if shared is not None and not isinstance(shared, str):
raise TensorforceError.type(name='variable', argument='shared', value=shared)
internals = OrderedDict()
actions = util.normalize_values(
value_type='action', values=actions, values_spec=self.actions_spec
)
if isinstance(terminal, (bool, int)):
states = util.fmap(function=(lambda x: [x]), xs=states, depth=1)
actions = util.fmap(function=(lambda x: [x]), xs=actions, depth=1)
terminal = [terminal]
reward = [reward]
states = util.fmap(function=np.asarray, xs=states, depth=1)
actions = util.fmap(function=np.asarray, xs=actions, depth=1)
if isinstance(terminal, np.ndarray):
if terminal.dtype is util.np_dtype(dtype='bool'):
zeros = np.zeros_like(terminal, dtype=util.np_dtype(dtype='long'))
ones = np.ones_like(terminal, dtype=util.np_dtype(dtype='long'))
terminal = np.where(terminal, ones, zeros)
else:
terminal = np.asarray([int(x) if isinstance(x, bool) else x for x in terminal])
reward = np.asarray(reward)
# Batch experiences split into episodes and at most size buffer_observe
last = 0
for index in range(1, len(terminal) + 1):
if terminal[index - 1] == 0 and index - last < self.experience_size:
continue
# Include terminal in batch if possible
if index < len(terminal) and terminal[index - 1] == 0 and terminal[index] > 0 and \
index - last < self.experience_size:
states=states, auxiliaries=auxiliaries, parallel=parallel,
deterministic=deterministic, independent=independent, query=query, **kwargs
)
if self.recorder_spec is not None and not independent:
for name in self.states_spec:
self.record_states[name].append(states[name])
for name, spec in self.actions_spec.items():
self.record_actions[name].append(actions[name])
if spec['type'] == 'int':
if name + '_mask' in auxiliaries:
self.record_states[name + '_mask'].append(auxiliaries[name + '_mask'])
else:
shape = (1,) + spec['shape'] + (spec['num_values'],)
self.record_states[name + '_mask'].append(
np.full(shape, True, dtype=util.np_dtype(dtype='bool'))
)
# Unbatch actions
actions = util.fmap(function=(lambda x: x[0]), xs=actions, depth=1)
# Reverse normalized actions dictionary
actions = util.unpack_values(
value_type='action', values=actions, values_spec=self.actions_spec
)
# if independent, return processed state as well?
if query is None:
return actions
else:
return actions, queried
value_type='action', values=actions, values_spec=self.actions_spec
)
if isinstance(terminal, (bool, int)):
states = util.fmap(function=(lambda x: [x]), xs=states, depth=1)
actions = util.fmap(function=(lambda x: [x]), xs=actions, depth=1)
terminal = [terminal]
reward = [reward]
states = util.fmap(function=np.asarray, xs=states, depth=1)
actions = util.fmap(function=np.asarray, xs=actions, depth=1)
if isinstance(terminal, np.ndarray):
if terminal.dtype is util.np_dtype(dtype='bool'):
zeros = np.zeros_like(terminal, dtype=util.np_dtype(dtype='long'))
ones = np.ones_like(terminal, dtype=util.np_dtype(dtype='long'))
terminal = np.where(terminal, ones, zeros)
else:
terminal = np.asarray([int(x) if isinstance(x, bool) else x for x in terminal])
reward = np.asarray(reward)
# Batch experiences split into episodes and at most size buffer_observe
last = 0
for index in range(1, len(terminal) + 1):
if terminal[index - 1] == 0 and index - last < self.experience_size:
continue
# Include terminal in batch if possible
if index < len(terminal) and terminal[index - 1] == 0 and terminal[index] > 0 and \
index - last < self.experience_size:
index += 1