Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
test_flag = 0
total_steps = 0
while total_steps < args.train_total_steps:
trajectories = collect_trajectories(
env, agent, scaler, episodes=args.episodes_per_batch)
total_steps += sum([t['obs'].shape[0] for t in trajectories])
total_train_rewards = sum([np.sum(t['rewards']) for t in trajectories])
train_obs, train_actions, train_advantages, train_discount_sum_rewards = build_train_data(
trajectories, agent)
policy_loss, kl = agent.policy_learn(train_obs, train_actions,
train_advantages)
value_loss = agent.value_learn(train_obs, train_discount_sum_rewards)
logger.info(
'Steps {}, Train reward: {}, Policy loss: {}, KL: {}, Value loss: {}'
.format(total_steps, total_train_rewards / args.episodes_per_batch,
policy_loss, kl, value_loss))
if total_steps // args.test_every_steps >= test_flag:
while total_steps // args.test_every_steps >= test_flag:
test_flag += 1
eval_reward = run_evaluate_episode(env, agent, scaler)
logger.info('Steps {}, Evaluate reward: {}'.format(
total_steps, eval_reward))
pbar.update(steps)
# train
test_flag = 0
pbar = tqdm(total=args.train_total_steps)
total_steps = 0
max_reward = None
while total_steps < args.train_total_steps:
# start epoch
total_reward, steps, loss = run_train_episode(env, agent, rpm)
total_steps += steps
pbar.set_description('[train]exploration:{}'.format(agent.exploration))
tensorboard.add_scalar('dqn/score', total_reward, total_steps)
tensorboard.add_scalar('dqn/loss', loss,
total_steps) # mean of total loss
tensorboard.add_scalar('dqn/exploration', agent.exploration,
total_steps)
pbar.update(steps)
if total_steps // args.test_every_steps >= test_flag:
while total_steps // args.test_every_steps >= test_flag:
test_flag += 1
pbar.write("testing")
eval_rewards = []
for _ in tqdm(range(3), desc='eval agent'):
eval_reward = run_evaluate_episode(test_env, agent)
eval_rewards.append(eval_reward)
logger.info(
"eval_agent done, (steps, eval_reward): ({}, {})".format(
total_steps, np.mean(eval_rewards)))
eval_test = np.mean(eval_rewards)
tensorboard.add_scalar('dqn/eval', eval_test, total_steps)
total_steps)
pbar.update(steps)
if total_steps // args.test_every_steps >= test_flag:
while total_steps // args.test_every_steps >= test_flag:
test_flag += 1
pbar.write("testing")
eval_rewards = []
for _ in tqdm(range(3), desc='eval agent'):
eval_reward = run_evaluate_episode(test_env, agent)
eval_rewards.append(eval_reward)
logger.info(
"eval_agent done, (steps, eval_reward): ({}, {})".format(
total_steps, np.mean(eval_rewards)))
eval_test = np.mean(eval_rewards)
tensorboard.add_scalar('dqn/eval', eval_test, total_steps)
pbar.close()
total=MEMORY_WARMUP_SIZE, desc='[Replay Memory Warm Up]') as pbar:
while rpm.size() < MEMORY_WARMUP_SIZE:
total_reward, steps, _ = run_train_episode(env, agent, rpm)
pbar.update(steps)
# train
test_flag = 0
pbar = tqdm(total=args.train_total_steps)
total_steps = 0
max_reward = None
while total_steps < args.train_total_steps:
# start epoch
total_reward, steps, loss = run_train_episode(env, agent, rpm)
total_steps += steps
pbar.set_description('[train]exploration:{}'.format(agent.exploration))
tensorboard.add_scalar('dqn/score', total_reward, total_steps)
tensorboard.add_scalar('dqn/loss', loss,
total_steps) # mean of total loss
tensorboard.add_scalar('dqn/exploration', agent.exploration,
total_steps)
pbar.update(steps)
if total_steps // args.test_every_steps >= test_flag:
while total_steps // args.test_every_steps >= test_flag:
test_flag += 1
pbar.write("testing")
eval_rewards = []
for _ in tqdm(range(3), desc='eval agent'):
eval_reward = run_evaluate_episode(test_env, agent)
eval_rewards.append(eval_reward)
logger.info(
"eval_agent done, (steps, eval_reward): ({}, {})".format(
def _new_ready_actor(self):
"""
The actor is ready to start new episode,
but blocking until training thread call actor_ready_event.set()
"""
actor_ready_event = threading.Event()
self.ready_actor_queue.put(actor_ready_event)
logger.info(
"[new_avaliabe_actor] approximate size of ready actors:{}".format(
self.ready_actor_queue.qsize()))
actor_ready_event.wait()
for file, code in pyfiles['python_files'].items():
file = os.path.join(envdir, file)
with open(file, 'wb') as code_file:
code_file.write(code)
# save other files to current directory
for file, content in pyfiles['other_files'].items():
# create directory (i.e. ./rom_files/)
if '/' in file:
try:
os.makedirs(os.path.join(*file.rsplit('/')[:-1]))
except OSError as e:
pass
with open(file, 'wb') as f:
f.write(content)
logger.info('[job] reply')
reply_socket.send_multipart([remote_constants.NORMAL_TAG])
return envdir
else:
logger.error("NotImplementedError:{}, received tag:{}".format(
job_address, ))
raise NotImplementedError
Returns:
gpu_count: int
"""
gpu_count = 0
env_cuda_devices = os.environ.get('CUDA_VISIBLE_DEVICES', None)
if env_cuda_devices is not None:
assert isinstance(env_cuda_devices, str)
try:
if not env_cuda_devices:
return 0
gpu_count = len(
[x for x in env_cuda_devices.split(',') if int(x) >= 0])
logger.info(
'CUDA_VISIBLE_DEVICES found gpu count: {}'.format(gpu_count))
except:
logger.info('Cannot find available GPU devices, using CPU now.')
gpu_count = 0
else:
try:
gpu_count = str(subprocess.check_output(["nvidia-smi",
"-L"])).count('UUID')
logger.info('nvidia-smi -L found gpu count: {}'.format(gpu_count))
except:
logger.info('Cannot find available GPU devices, using CPU now.')
gpu_count = 0
return gpu_count
file = os.path.join(envdir, file)
with open(file, 'wb') as code_file:
code_file.write(code)
# save other files to current directory
for file, content in pyfiles['other_files'].items():
# create directory (i.e. ./rom_files/)
if '/' in file:
try:
os.makedirs(os.path.join(*file.rsplit('/')[:-1]))
except OSError as e:
pass
with open(file, 'wb') as f:
f.write(content)
logger.info('[job] reply')
reply_socket.send_multipart([remote_constants.NORMAL_TAG])
return envdir
else:
logger.error("NotImplementedError:{}, received tag:{}".format(
job_address, ))
raise NotImplementedError
self.submit_job_socket = self.ctx.socket(zmq.REQ)
self.submit_job_socket.linger = 0
self.submit_job_socket.setsockopt(
zmq.RCVTIMEO, remote_constants.HEARTBEAT_TIMEOUT_S * 1000)
self.submit_job_socket.connect("tcp://{}".format(master_address))
self.start_time = time.time()
thread = threading.Thread(target=self._reply_heartbeat)
thread.setDaemon(True)
thread.start()
self.heartbeat_socket_initialized.wait()
# check if the master is connected properly
try:
self.submit_job_socket.send_multipart([
remote_constants.CLIENT_CONNECT_TAG,
to_byte(self.heartbeat_master_address),
to_byte(socket.gethostname())
])
_ = self.submit_job_socket.recv_multipart()
except zmq.error.Again as e:
logger.warning("[Client] Can not connect to the master, please "
"check if master is started and ensure the input "
"address {} is correct.".format(master_address))
self.master_is_alive = False
raise Exception("Client can not connect to the master, please "
"check if master is started and ensure the input "
"address {} is correct.".format(master_address))
logger.error(
"Memory used by this job exceeds {}. This job will exist."
.format(self.max_memory))
time.sleep(5)
socket.close(0)
os._exit(1)
except zmq.error.Again as e:
logger.warning(
"[Job] Cannot connect to the client. This job will exit and inform the worker."
)
break
socket.close(0)
with self.lock:
self.kill_job_socket.send_multipart(
[remote_constants.KILLJOB_TAG,
to_byte(self.job_address)])
try:
_ = self.kill_job_socket.recv_multipart()
except zmq.error.Again as e:
pass
logger.warning("[Job]lost connection with the client, will exit")
os._exit(1)