Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
m.restore_B(hps.restore_path_B)
if hps.restore_path_A == '' and hps.restore_path_B == '':
with Z.arg_scope([Z.get_variable_ddi, Z.actnorm], init=True):
results_init = f_loss(None, False, reuse=True, init=True)
all_params = tf.global_variables()
params_A = [param for param in all_params if 'A/' in param.name]
params_B = [param for param in all_params if 'B/' in param.name]
sess.run(tf.variables_initializer(params_A))
sess.run(tf.variables_initializer(params_B))
feeds_dict = {feeds['x_A']: data_inits['A']['x'],
feeds['y_A']: data_inits['A']['y'],
feeds['x_B']: data_inits['B']['x'],
feeds['y_B']: data_inits['B']['y']}
sess.run(results_init, feeds_dict)
sess.run(hvd.broadcast_global_variables(0))
return m
feed_dict = {
iterator.handle: iterator.get_handle(sess, 'test'),
tx.context.global_mode(): tf.estimator.ModeKeys.PREDICT,
}
_preds = sess.run(preds, feed_dict=feed_dict)
_all_preds.extend(_preds.tolist())
except tf.errors.OutOfRangeError:
break
output_file = os.path.join(FLAGS.output_dir, "test_results.tsv")
with tf.gfile.GFile(output_file, "w") as writer:
writer.write('\n'.join(str(p) for p in _all_preds))
# Broadcasts global variables from rank-0 process
if FLAGS.distributed:
bcast = hvd.broadcast_global_variables(0)
session_config = tf.ConfigProto()
if FLAGS.distributed:
session_config.gpu_options.visible_device_list = str(hvd.local_rank())
with tf.Session(config=session_config) as sess:
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
sess.run(tf.tables_initializer())
if FLAGS.distributed:
bcast.run()
# Restores trained model if specified
saver = tf.train.Saver()
if FLAGS.checkpoint:
feed_dict = {
iterator.handle: iterator.get_handle(sess, 'test'),
tx.context.global_mode(): tf.estimator.ModeKeys.PREDICT,
}
_preds = sess.run(preds, feed_dict=feed_dict)
_all_preds.extend(_preds.tolist())
except tf.errors.OutOfRangeError:
break
output_file = os.path.join(FLAGS.output_dir, "test_results.tsv")
with tf.gfile.GFile(output_file, "w") as writer:
writer.write('\n'.join(str(p) for p in _all_preds))
# Broadcasts global variables from rank-0 process
if FLAGS.distributed:
bcast = hvd.broadcast_global_variables(0)
session_config = tf.ConfigProto()
if FLAGS.distributed:
session_config.gpu_options.visible_device_list = str(hvd.local_rank())
with tf.Session(config=session_config) as sess:
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
sess.run(tf.tables_initializer())
if FLAGS.distributed:
bcast.run()
# Restores trained model if specified
saver = tf.train.Saver()
if FLAGS.checkpoint:
placeholders = model.get_placeholders()
train_queue = tf.FIFOQueue(train_params.async_encoding, [x.dtype for x in placeholders], name="train_queue")
evaluator_runner = AysncEvaluatorRunner(evaluators, model, train_params.async_encoding)
train_enqeue = train_queue.enqueue(placeholders)
train_close = train_queue.close(True)
is_train = tf.placeholder(tf.bool, ())
input_tensors = tf.cond(is_train, lambda: train_queue.dequeue(),
lambda: evaluator_runner.eval_queue.dequeue())
# tensorfow can't infer the shape for an unsized queue, so set it manually
for input_tensor, pl in zip(input_tensors, placeholders):
input_tensor.set_shape(pl.shape)
bcast = hvd.broadcast_global_variables(0)
print("Init model...")
config = tf.ConfigProto(allow_soft_placement=True)
config.gpu_options.allow_growth = True
config.gpu_options.visible_device_list = str(hvd.local_rank())
sess = tf.Session(config=config)
with sess.as_default():
pred = model.get_predictions_for(dict(zip(placeholders, input_tensors)))
evaluator_runner.set_input(pred)
if parameter_checkpoint is not None:
print("Restoring parameters from %s" % parameter_checkpoint)
saver = tf.train.Saver()
saver.restore(sess, checkpoint)
saver = None
_all_samples_text = []
for i, s in zip(_all_inputs, _all_samples):
s_text = proc.decode(s)
s_text = s_text.replace('\n', ' ')
_all_samples_text.append(s_text)
_all_samples_text = tx.utils.strip_eos(_all_samples_text,
eos_token='<|endoftext|>')
output_file = os.path.join(FLAGS.output_dir, "test_samples.tsv")
tf.logging.info('Write samples to {}'.format(output_file))
tx.utils.write_paired_text(
_all_input_text, _all_samples_text, output_file)
# Broadcasts global variables from rank-0 process
if FLAGS.distributed:
bcast = hvd.broadcast_global_variables(0)
session_config = tf.ConfigProto()
if FLAGS.distributed:
session_config.gpu_options.visible_device_list = str(hvd.local_rank())
with tf.Session(config=session_config) as sess:
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
sess.run(tf.tables_initializer())
if FLAGS.distributed:
bcast.run()
# Restores trained model if specified
if FLAGS.checkpoint:
tf.logging.info('Restore from {}'.format(FLAGS.checkpoint))
else:
saver = None
ready_for_local_init_op = None
if self.job_name and not (self.single_session or
self.distributed_collective):
# In distributed mode, we don't want to run local_var_init_op_group until
# the global variables are initialized, because local_var_init_op_group
# may use global variables (such as in distributed replicated mode). We
# don't set this in non-distributed mode, because in non-distributed mode,
# local_var_init_op_group may itself initialize global variables (such as
# in replicated mode).
ready_for_local_init_op = tf.report_uninitialized_variables(
tf.global_variables())
if self.params.variable_update == 'horovod':
import horovod.tensorflow as hvd # pylint: disable=g-import-not-at-top
bcast_global_variables_op = hvd.broadcast_global_variables(0)
else:
bcast_global_variables_op = None
if self.params.variable_update == 'collective_all_reduce':
# It doesn't matter what this collective_graph_key value is,
# so long as it's > 0 and the same at every worker.
init_run_options = tf.RunOptions()
init_run_options.experimental.collective_graph_key = 6
else:
init_run_options = tf.RunOptions()
sv = tf.train.Supervisor(
# For the purpose of Supervisor, all Horovod workers are 'chiefs',
# since we want session to be initialized symmetrically on all the
# workers.
is_chief=is_chief or (self.params.variable_update == 'horovod'
or self.distributed_collective),
config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
if args.eager:
tf.enable_eager_execution(config)
# Set up standard model.
model = getattr(applications, args.model)(weights=None)
opt = tf.train.GradientDescentOptimizer(0.01)
# Horovod: wrap optimizer with DistributedOptimizer.
# To make a fair comparison with KungFu, we configure Horovod to use CPUs to run MPI and gradient averaging.
opt = hvd.DistributedOptimizer(opt)
init = tf.global_variables_initializer()
bcast_op = hvd.broadcast_global_variables(0)
def random_input():
data = tf.random_uniform([args.batch_size, 224, 224, 3])
target = tf.random_uniform([args.batch_size, 1],
minval=0,
maxval=999,
dtype=tf.int64)
return data, target
def disk_input(data_dir):
from kungfu.tensorflow.v1.helpers import imagenet
filenames = glob.glob(os.path.join(data_dir, args.file_pattern))
filenames *= 100 # make it long enough
return imagenet.create_dataset_from_files(filenames, args.batch_size)
params = Bunch({})
params.epoch = FLAGS.epoch
params.batch_size = FLAGS.batch_size
eval_features = tf_data_utils.eval_input_fn(FLAGS.eval_data_file,
_decode_record, name_to_features, params, if_shard=FLAGS.if_shard)
[_, eval_loss, eval_per_example_loss, eval_logits] = model_eval_fn(eval_features, [], tf.estimator.ModeKeys.EVAL)
result = metric_fn(eval_features, eval_logits, eval_loss)
init_op = tf.group(tf.global_variables_initializer(),
tf.local_variables_initializer())
sess.run(init_op)
sess.run(hvd.broadcast_global_variables(0))
print("===horovod rank==={}".format(hvd.rank()))
def eval_fn(result):
i = 0
total_accuracy = 0
label, label_id, prob = [], [], []
while True:
try:
eval_result = sess.run(result)
total_accuracy += eval_result["accuracy"]
label_id.extend(eval_result["label_ids"])
label.extend(eval_result["pred_label"])
prob.extend(eval_result["prob"])
i += 1
except tf.errors.OutOfRangeError:
def broadcast_global_variables(backend, root_rank):
return _eval(backend, hvd.broadcast_global_variables(root_rank))