Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
'hidden_layer_sizes': (256, 256),
'squash': True,
}
},
sampler_params={
'type': 'SimpleSampler',
'kwargs': {
'max_path_length': HORIZON,
'min_pool_size': HORIZON,
'batch_size': 256,
}
},
run_params={
'seed': tune.sample_from(
lambda spec: np.random.randint(0, 10000)),
'checkpoint_at_end': True,
'checkpoint_frequency': HORIZON // N_CHECKPOINTS,
'checkpoint_replay_pool': False,
},
resources_per_trial={
'cpu': N_CPUS,
'gpu': N_GPUS,
'extra_cpu': 0,
'extra_gpu': 0,
}
)
class TestSoftActorCritic(unittest.TestCase):
args = parser.parse_args(argv)
experiment = RayExperiment(
name=f'Fft_factorization_softmax_{args.size}',
run=TrainableFftFactorSoftmax,
local_dir=args.result_dir,
num_samples=args.ntrials,
checkpoint_at_end=True,
resources_per_trial={'cpu': args.nthreads, 'gpu': 0},
stop={
'training_iteration': 1 if args.smoke_test else 99999,
'is_nan': True,
'negative_loss': -1e-8
},
config={
'size': args.size,
'lr': sample_from(lambda spec: math.exp(random.uniform(math.log(1e-4), math.log(5e-1)))),
'seed': sample_from(lambda spec: random.randint(0, 1 << 16)),
'semantic_loss_weight': sample_from(lambda spec: math.exp(random.uniform(math.log(5e-4), math.log(5e-1)))),
'n_steps_per_epoch': args.nsteps,
},
)
return experiment, args
def legendreeval_experiment_complex(fixed_order, softmax_fn, size, ntrials, nsteps, result_dir, nthreads, smoke_test):
assert softmax_fn in ['softmax', 'sparsemax']
config={
'fixed_order': fixed_order,
'softmax_fn': softmax_fn,
'size': size,
'lr': sample_from(lambda spec: math.exp(random.uniform(math.log(1e-4), math.log(5e-1)))),
'seed': sample_from(lambda spec: random.randint(0, 1 << 16)),
'perm': sample_from(lambda spec: random.choice(['id', 'br', 'dct'])),
'n_steps_per_epoch': nsteps,
}
if (not fixed_order) and softmax_fn == 'softmax':
config['semantic_loss_weight'] = sample_from(lambda spec: math.exp(random.uniform(math.log(5e-3), math.log(5e-1))))
experiment = RayExperiment(
name=f'LegendreEval_factorization_complex_{fixed_order}_{softmax_fn}_{size}',
run=TrainableLegendreComplex,
local_dir=result_dir,
num_samples=ntrials,
checkpoint_at_end=True,
resources_per_trial={'cpu': nthreads, 'gpu': 0},
stop={
'training_iteration': 1 if smoke_test else 99999,
'negative_loss': -1e-8
def fft_experiment_learn_perm(fixed_order, softmax_fn, size, ntrials, nsteps, result_dir, nthreads, smoke_test):
assert softmax_fn in ['softmax', 'sparsemax']
config={
'fixed_order': fixed_order,
'softmax_fn': softmax_fn,
'size': size,
'lr': sample_from(lambda spec: math.exp(random.uniform(math.log(1e-4), math.log(5e-1)))),
'seed': sample_from(lambda spec: random.randint(0, 1 << 16)),
'n_steps_per_epoch': nsteps,
}
if (not fixed_order) and softmax_fn == 'softmax':
config['semantic_loss_weight'] = sample_from(lambda spec: math.exp(random.uniform(math.log(5e-3), math.log(5e-1))))
experiment = RayExperiment(
name=f'Fft_factorization_Learnperm_{fixed_order}_{softmax_fn}_{size}',
run=TrainableFftLearnPerm,
local_dir=result_dir,
num_samples=ntrials,
checkpoint_at_end=True,
resources_per_trial={'cpu': nthreads, 'gpu': 0},
stop={
'training_iteration': 1 if smoke_test else 99999,
# 'negative_loss': -1e-8
},
config=config,
)
return experiment
# "kl_target": random.uniform(0.003, 0.03),
# "kl_coeff": random.uniform(0.3, 1),
# "use_gae": random.choice([True, False]),
# "gamma": random.choice([0.99,
# random.uniform(0.8, 0.9997),
# random.uniform(0.8, 0.9997)]),
# "lambda": random.uniform(0.9, 1.0),
# val fn & entropy coeff
# "vf_loss_coeff": random.choice([0.5, 1.0]),
# "entropy_coeff": random.uniform(0, 0.01),
# "lr": random.uniform(5e-6, 0.003),
}
# creates a wide range of the potential population
for k in hyper_params.keys():
config[k] = tune.sample_from(lambda spec: hyper_params[k])
scheduler = PopulationBasedTraining(time_attr="time_total_s",
reward_attr="episode_reward_mean",
perturbation_interval=120,
resample_probability=0.80,
hyperparam_mutations=hyper_params,
custom_explore_fn=explore)
if agent.lower() == "ddpg":
pass
if agent.lower() == "pg":
pass
return config, scheduler
tune.run_experiments(
{
"exp": {
"stop": {
"training_iteration": 200
},
"resources_per_trial": {
"cpu": 3,
"gpu": 1
},
"run": "validate",
"num_samples": args.n_samples,
"config": {
"alpha": tune.sample_from(
lambda spec: np.random.uniform(0.1, 0.6)),
"lr": tune.sample_from(
lambda spec: np.random.uniform(1e-6, 1e-3)),
"niters": tune.sample_from(
lambda spec: np.random.randint(1, 10))
}
}
},
verbose=1,
scheduler=sched)
args = parser.parse_args(argv)
experiment = RayExperiment(
name=f'Randn_factorization_softmax_no_perm_{args.size}',
run=TrainableRandnFactorSoftmaxNoPerm,
local_dir=args.result_dir,
num_samples=args.ntrials,
checkpoint_at_end=True,
resources_per_trial={'cpu': args.nthreads, 'gpu': 0},
stop={
'training_iteration': 1 if args.smoke_test else 99999,
'negative_loss': -1e-8
},
config={
'size': args.size,
'lr': sample_from(lambda spec: math.exp(random.uniform(math.log(1e-4), math.log(5e-1)))),
'seed': sample_from(lambda spec: random.randint(0, 1 << 16)),
'n_steps_per_epoch': args.nsteps,
},
)
return experiment, args
def save_activations(score):
score = dict(score)
score["episodes"] = None
# Trajectory length varies a lot between environments and opponents; make sure we have
# a consistent number of data points.
score["timesteps"] = 20000
score["record_traj"] = True
score["transparent_params"] = {"ff_policy": True, "ff_value": True}
score["record_traj_params"] = {
"save_dir": "data/trajectories",
}
spec = { # noqa: F841
"config": {
"record_traj_params": {
"agent_indices": tune.sample_from(
lambda spec: VICTIM_INDEX[spec.config[PATHS_AND_TYPES][0]]
),
}
}
}
exp_prefix = {"activations": None} # noqa: F841
def fft_experiment_block(trainable, size, ntrials, nsteps, nepochsvalid, result_dir, nthreads, smoke_test):
config={
'target_matrix': named_target_matrix('dft', size),
'lr': sample_from(lambda spec: math.exp(random.uniform(math.log(1e-4), math.log(5e-1)))),
'seed': sample_from(lambda spec: random.randint(0, 1 << 16)),
'n_steps_per_epoch': nsteps,
'n_epochs_per_validation': nepochsvalid,
'complex': True,
}
experiment = RayExperiment(
name=f'Fft_factorization_{trainable.__name__}_{size}',
run=trainable,
local_dir=result_dir,
num_samples=ntrials,
checkpoint_at_end=True,
resources_per_trial={'cpu': nthreads, 'gpu': 0},
stop={
'training_iteration': 1 if smoke_test else 99999,
'negative_loss': -1e-8
},
config=config,