Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def dump(self):
data_location = self.prm.basic["model_location"] + self.prm.basic["model_name"] + ".prm"
self.pub("Save model: " + data_location)
d = klepto.archives.file_archive(data_location, cached=True,serialized=True)
d['layer_weights'] = [[np.asarray(w.eval()) for w in layer] for layer in self.layer_weights]
d['p_basic'] = self.prm.basic
d['p_struct'] = self.prm.struct
d['p_optimize'] = self.prm.optimize
d.dump()
d.clear()
def creat_minibatches(data_location, data_set_name, batch_size, mb_set_name):
rng = np.random.RandomState(seed=1)
# Load data set
file_name = data_location + data_set_name + ".klepto"
print(file_name)
d = klepto.archives.file_archive(file_name, cached=True,serialized=True)
d.load()
data_set_x = d['x']
data_set_y = d['y']
d.clear()
# data set info
len_set = data_set_x.__len__()
print("len_set " + str(len_set))
len_x = data_set_x[0].shape[1]
nbatches = int(len_set / batch_size)
len_y = data_set_y[0].shape[1]
print(len_x)
def make_klepto_file(set_name, input_data, output_data):
file_name = "data_set/little-timer_" + set_name + ".klepto"
print("data set name: " + file_name)
d = klepto.archives.file_archive(file_name, cached=True,serialized=True)
d['x'] = input_data
d['y'] = output_data
d.dump()
d.clear()
y1 = [self.prm.struct["net_size"][-1]-1]
for char in data_set_y[s]:
y1 += [char, self.prm.optimize['CTC_blank'] ]
mb_train_y[k,:y1.__len__()] = y1
if self.prm.data["batch_size"] > 1:
half_len = mb_train_y.shape[1] / 2
mb_train_y[k,half_len:half_len+y1.__len__()] = np.ones(y1.__len__())
data_mb_x.append(mb_train_x.astype(theano.config.floatX))
data_mb_y.append(mb_train_y.astype(theano.config.floatX))
data_mask.append(mb_mask.astype(theano.config.floatX))
file_name = self.prm.data["mini_batch_location"] + "mb_of_" + self.prm.data[set + "_data_name"]
d = klepto.archives.file_archive(file_name, cached=True,serialized=True)
d['x'] = data_mb_x
d['y'] = data_mb_y
d['m'] = data_mask
d.dump()
d.clear()
if folder_name in train_speaker:
train_set_x.append(feat)
train_set_y.append(target)
elif folder_name in valid_speaker:
valid_set_x.append(feat)
valid_set_y.append(target)
else:
assert False, "unknown name"
print("write valid set")
print("valid set length: " + str(valid_set_x.__len__()))
file_name = drainDir + "timit_" + "valid_" + "xy_" + para_name + ".klepto"
print("valid set name: " + file_name)
d = klepto.archives.file_archive(file_name, cached=True,serialized=True)
d['x'] = valid_set_x
d['y'] = valid_set_y
d.dump()
d.clear()
print("write train set")
print("train set length: " + str(train_set_x.__len__()))
file_name = drainDir + "timit_" + "train_" + "xy_" + para_name + ".klepto"
print("train set name: " + file_name)
d = klepto.archives.file_archive(file_name, cached=True,serialized=True)
d['x'] = train_set_x
d['y'] = train_set_y
d.dump()
d.clear()
def check_out_data_set(self):
for set in ['train', 'valid', 'test']:
if self.prm.data[set + "_data_name"] != None:
file_name = self.prm.data["data_location"] + self.prm.data[set + "_data_name"]
try:
d = klepto.archives.file_archive(file_name, cached=True,serialized=True)
d.load()
data_set_x = d['x']
data_set_y = d['y']
d.clear()
self.prm.data[set + "_set_len"] = data_set_x.__len__()
if data_set_x.__len__() != data_set_y.__len__():
raise Warning("x and y " + set + "_data_name have not the same length")
self.prm.data["x_size"] = data_set_x[0].shape[1]
if self.prm.data["x_size"] != int(self.prm.struct["net_size"][0]):
raise Warning(set + " data x size and net input size are unequal")
if self.prm.optimize['CTC'] == False:
self.prm.data["y_size"] = data_set_y[0].shape[1]
if self.prm.data["y_size"] != int(self.prm.struct["net_size"][-1]):
raise Warning(set + " data y size and net input size are unequal")
else:
self.prm.data["y_size"] = self.prm.struct["net_size"][-1]
def __get_class(self):
import klepto.archives as archives
return getattr(archives, self.__archive__.__class__.__name__)
def __archive(self, archive):
if folder_name in train_speaker:
train_set_x.append(feat)
train_set_y.append(target)
elif folder_name in valid_speaker:
valid_set_x.append(feat)
valid_set_y.append(target)
else:
assert False, "unknown name"
print("write valid set")
print("valid set length: " + str(valid_set_x.__len__()))
file_name = drainDir + "timit_" + "valid_" + "xy_" + para_name + ".klepto"
print("valid set name: " + file_name)
d = klepto.archives.file_archive(file_name, cached=True,serialized=True)
d['x'] = valid_set_x
d['y'] = valid_set_y
d.dump()
d.clear()
print("write train set")
print("train set length: " + str(train_set_x.__len__()))
file_name = drainDir + "timit_" + "train_" + "xy_" + para_name + ".klepto"
print("train set name: " + file_name)
d = klepto.archives.file_archive(file_name, cached=True,serialized=True)
d['x'] = train_set_x
d['y'] = train_set_y
d.dump()
d.clear()
data_mask = np.asarray(data_mask)
# Permutate train set
data_set_length = data_mb_x.__len__()
order = np.arange(0,data_set_length)
order = rng.permutation(order)
data_mb_x = data_mb_x[order]
data_mb_y = data_mb_y[order]
data_mask = data_mask[order]
print("write minibatch set")
print("minibatch set length: " + str(data_mb_x.__len__()))
file_name = data_location + mb_set_name
print("minibatch set name: " + file_name)
d = klepto.archives.file_archive(file_name, cached=True,serialized=True)
d['x'] = data_mb_x
d['y'] = data_mb_y
d['m'] = data_mask
d.dump()
d.clear()
return