Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
uttid = 'uttid{}'.format(i)
f[uttid] = x
desire_xs.append(x)
desire_ys.append(np.array([1, 2, 3, 4]))
batch = []
with open(scp, 'r') as f:
for line in f:
uttid, path = line.strip().split()
batch.append((uttid,
{'input': [{'feat': path,
'name': 'input1'}],
'output': [{'tokenid': '1 2 3 4',
'name': 'target1'}]}))
load_inputs_and_targets = LoadInputsAndTargets()
xs, ys = load_inputs_and_targets(batch)
for x, xd in zip(xs, desire_xs):
np.testing.assert_array_equal(x, xd)
for y, yd in zip(ys, desire_ys):
np.testing.assert_array_equal(y, yd)
with h5py.File(str(p), 'w') as f:
# batch: List[Tuple[str, Dict[str, List[Dict[str, Any]]]]]
for i in range(10):
x = np.random.random((100, 100)).astype(np.float32)
uttid = 'uttid{}'.format(i)
f[uttid] = x
batch.append((uttid,
{'input': [{'feat': str(p) + ':' + uttid,
'filetype': 'hdf5',
'name': 'input1'}],
'output': [{'tokenid': '1 2 3 4',
'name': 'target1'}]}))
desire_xs.append(x)
desire_ys.append(np.array([1, 2, 3, 4]))
load_inputs_and_targets = LoadInputsAndTargets()
xs, ys = load_inputs_and_targets(batch)
for x, xd in zip(xs, desire_xs):
np.testing.assert_array_equal(x, xd)
for y, yd in zip(ys, desire_ys):
np.testing.assert_array_equal(y, yd)
"""
logging.warning("experimental API for custom LMs is selected by --api v2")
if args.batchsize > 1:
raise NotImplementedError("batch decoding is not implemented")
if args.streaming_mode is not None:
raise NotImplementedError("streaming mode is not implemented")
if args.word_rnnlm:
raise NotImplementedError("word LM is not implemented")
set_deterministic_pytorch(args)
model, train_args = load_trained_model(args.model)
assert isinstance(model, ASRInterface)
model.eval()
load_inputs_and_targets = LoadInputsAndTargets(
mode='asr', load_output=False, sort_in_input_length=False,
preprocess_conf=train_args.preprocess_conf
if args.preprocess_conf is None else args.preprocess_conf,
preprocess_args={'train': False})
if args.rnnlm:
lm_args = get_model_conf(args.rnnlm, args.rnnlm_conf)
# NOTE: for a compatibility with less than 0.5.0 version models
lm_model_module = getattr(lm_args, "model_module", "default")
lm_class = dynamic_import_lm(lm_model_module, lm_args.backend)
lm = lm_class(len(train_args.char_list), lm_args)
torch_load(args.rnnlm, lm)
lm.eval()
else:
lm = None
batch_frames_out=args.batch_frames_out,
batch_frames_inout=args.batch_frames_inout)
valid = make_batchset(valid_json, args.batch_size,
args.maxlen_in, args.maxlen_out, args.minibatches,
min_batch_size=args.ngpu if args.ngpu > 1 else 1,
count=args.batch_count,
batch_bins=args.batch_bins,
batch_frames_in=args.batch_frames_in,
batch_frames_out=args.batch_frames_out,
batch_frames_inout=args.batch_frames_inout)
load_tr = LoadInputsAndTargets(
mode='asr', load_output=True, preprocess_conf=args.preprocess_conf,
preprocess_args={'train': True} # Switch the mode of preprocessing
)
load_cv = LoadInputsAndTargets(
mode='asr', load_output=True, preprocess_conf=args.preprocess_conf,
preprocess_args={'train': False} # Switch the mode of preprocessing
)
# hack to make batchsize argument as 1
# actual bathsize is included in a list
train_iter = {'main': ChainerDataLoader(
dataset=TransformDataset(train, lambda data: converter([load_tr(data)])),
batch_size=1, num_workers=args.n_iter_processes,
shuffle=not use_sortagrad, collate_fn=lambda x: x[0])}
valid_iter = {'main': ChainerDataLoader(
dataset=TransformDataset(valid, lambda data: converter([load_cv(data)])),
batch_size=1, shuffle=False, collate_fn=lambda x: x[0],
num_workers=args.n_iter_processes)}
# Set up a trainer
updater = CustomUpdater(
def __init__(self, config):
super().__init__(config)
taskconf = self.config['data']['task']
assert taskconf['type'] == TASK_SET['asr']
self.subsampling_factor = taskconf['src']['subsampling_factor']
self.preprocess_conf = taskconf['src']['preprocess_conf']
# mode: asr or tts
self.load_inputs_and_targets = LoadInputsAndTargets(
mode=taskconf['type'],
load_output=True,
preprocess_conf=self.preprocess_conf)
word_dict, char_dict))
# gpu
if args.ngpu == 1:
gpu_id = list(range(args.ngpu))
logging.info('gpu id: ' + str(gpu_id))
model.cuda()
if rnnlm:
rnnlm.cuda()
# read json data
with open(args.trans_json, 'rb') as f:
js = json.load(f)['utts']
new_js = {}
load_inputs_and_targets = LoadInputsAndTargets(
mode='asr', load_output=False, sort_in_input_length=False,
preprocess_conf=train_args.preprocess_conf
if args.preprocess_conf is None else args.preprocess_conf,
preprocess_args={'train': False})
if args.batchsize == 0:
with torch.no_grad():
for idx, name in enumerate(js.keys(), 1):
logging.info('(%d/%d) decoding ' + name, idx, len(js.keys()))
batch = [(name, js[name])]
feat = load_inputs_and_targets(batch)[0][0]
nbest_hyps = model.translate(feat, args, train_args.char_list, rnnlm)
new_js[name] = add_results_to_json(js[name], nbest_hyps, train_args.char_list)
else:
def grouper(n, iterable, fillvalue=None):
iaxis=0, oaxis=0)
valid = make_batchset(valid_json, args.batch_size,
args.maxlen_in, args.maxlen_out, args.minibatches,
min_batch_size=args.ngpu if args.ngpu > 1 else 1,
count=args.batch_count,
batch_bins=args.batch_bins,
batch_frames_in=args.batch_frames_in,
batch_frames_out=args.batch_frames_out,
batch_frames_inout=args.batch_frames_inout,
iaxis=0, oaxis=0)
load_tr = LoadInputsAndTargets(
mode='asr', load_output=True, preprocess_conf=args.preprocess_conf,
preprocess_args={'train': True} # Switch the mode of preprocessing
)
load_cv = LoadInputsAndTargets(
mode='asr', load_output=True, preprocess_conf=args.preprocess_conf,
preprocess_args={'train': False} # Switch the mode of preprocessing
)
# hack to make batchsize argument as 1
# actual bathsize is included in a list
# default collate function converts numpy array to pytorch tensor
# we used an empty collate function instead which returns list
train_iter = {'main': ChainerDataLoader(
dataset=TransformDataset(train, lambda data: converter([load_tr(data)])),
batch_size=1, num_workers=args.n_iter_processes,
shuffle=not use_sortagrad, collate_fn=lambda x: x[0])}
valid_iter = {'main': ChainerDataLoader(
dataset=TransformDataset(valid, lambda data: converter([load_cv(data)])),
batch_size=1, shuffle=False, collate_fn=lambda x: x[0],
num_workers=args.n_iter_processes)}
model = model_class(idim, odim, train_args)
assert isinstance(model, ASRInterface)
torch_load(args.model, model)
model.recog_args = args
# gpu
if args.ngpu == 1:
gpu_id = list(range(args.ngpu))
logging.info('gpu id: ' + str(gpu_id))
model.cuda()
# read json data
with open(args.recog_json, 'rb') as f:
js = json.load(f)['utts']
load_inputs_and_targets = LoadInputsAndTargets(
mode='asr', load_output=False, sort_in_input_length=False,
preprocess_conf=None # Apply pre_process in outer func
)
if args.batchsize == 0:
args.batchsize = 1
# Creates writers for outputs from the network
if args.enh_wspecifier is not None:
enh_writer = file_writer_helper(args.enh_wspecifier,
filetype=args.enh_filetype)
else:
enh_writer = None
# Creates a Transformation instance
preprocess_conf = (
train_args.preprocess_conf if args.preprocess_conf is None
shortest_first=use_sortagrad,
count=args.batch_count,
batch_bins=args.batch_bins,
batch_frames_in=args.batch_frames_in,
batch_frames_out=args.batch_frames_out,
batch_frames_inout=args.batch_frames_inout)
valid = make_batchset(valid_json, args.batch_size,
args.maxlen_in, args.maxlen_out, args.minibatches,
min_batch_size=args.ngpu if args.ngpu > 1 else 1,
count=args.batch_count,
batch_bins=args.batch_bins,
batch_frames_in=args.batch_frames_in,
batch_frames_out=args.batch_frames_out,
batch_frames_inout=args.batch_frames_inout)
load_tr = LoadInputsAndTargets(
mode='asr', load_output=True, preprocess_conf=args.preprocess_conf,
preprocess_args={'train': True} # Switch the mode of preprocessing
)
load_cv = LoadInputsAndTargets(
mode='asr', load_output=True, preprocess_conf=args.preprocess_conf,
preprocess_args={'train': False} # Switch the mode of preprocessing
)
# hack to make batchsize argument as 1
# actual bathsize is included in a list
if args.n_iter_processes > 0:
train_iter = ToggleableShufflingMultiprocessIterator(
TransformDataset(train, load_tr),
batch_size=1, n_processes=args.n_iter_processes, n_prefetch=8, maxtasksperchild=20,
shuffle=not use_sortagrad)
valid_iter = ToggleableShufflingMultiprocessIterator(
model = model_class(idim, odim, train_args)
assert isinstance(model, ASRInterface)
torch_load(args.model, model)
model.recog_args = args
# gpu
if args.ngpu == 1:
gpu_id = list(range(args.ngpu))
logging.info('gpu id: ' + str(gpu_id))
model.cuda()
# read json data
with open(args.recog_json, 'rb') as f:
js = json.load(f)['utts']
load_inputs_and_targets = LoadInputsAndTargets(
mode='asr', load_output=False, sort_in_input_length=False,
preprocess_conf=None # Apply pre_process in outer func
)
if args.batchsize == 0:
args.batchsize = 1
# Creates writers for outputs from the network
if args.enh_wspecifier is not None:
enh_writer = file_writer_helper(args.enh_wspecifier,
filetype=args.enh_filetype)
else:
enh_writer = None
# Creates a Transformation instance
preprocess_conf = (
train_args.preprocess_conf if args.preprocess_conf is None