Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _test_mean(self, onehot):
vocab_size = 99 if onehot else 800
priming_data = generate_sentences(2, 6, vocab_size)
test_data = random.sample(priming_data, len(priming_data) // 5)
enc = ShortTextEncoder(combine='mean')
enc.prepare_encoder(priming_data)
if onehot:
assert not enc.cae.use_autoencoder
else:
assert enc.cae.use_autoencoder
encoded_data = enc.encode(test_data)
assert len(test_data) == len(encoded_data)
with self.assertRaises(ValueError):
decoded_data = enc.decode(encoded_data)
def _test_concat(self, onehot):
vocab_size = 99 if onehot else 800
priming_data = generate_sentences(2, 6, vocab_size)
test_data = random.sample(priming_data, len(priming_data) // 5)
enc = ShortTextEncoder(combine='concat')
enc.prepare_encoder(priming_data)
if onehot:
assert not enc.cae.use_autoencoder
else:
assert enc.cae.use_autoencoder
encoded_data = enc.encode(test_data)
decoded_data = enc.decode(encoded_data)
assert len(test_data) == len(encoded_data) == len(decoded_data)
for x_sent, y_sent in zip(
test_data,
[' '.join(x) for x in decoded_data]
):
def _test_concat(self, onehot):
vocab_size = 99 if onehot else 800
priming_data = generate_sentences(2, 6, vocab_size)
test_data = random.sample(priming_data, len(priming_data) // 5)
enc = ShortTextEncoder(combine='concat')
enc.prepare_encoder(priming_data)
if onehot:
assert not enc.cae.use_autoencoder
else:
assert enc.cae.use_autoencoder
encoded_data = enc.encode(test_data)
decoded_data = enc.decode(encoded_data)
assert len(test_data) == len(encoded_data) == len(decoded_data)
for x_sent, y_sent in zip(
test_data,
[' '.join(x) for x in decoded_data]
):
def _test_mean(self, onehot):
vocab_size = 99 if onehot else 800
priming_data = generate_sentences(2, 6, vocab_size)
test_data = random.sample(priming_data, len(priming_data) // 5)
enc = ShortTextEncoder(combine='mean')
enc.prepare_encoder(priming_data)
if onehot:
assert not enc.cae.use_autoencoder
else:
assert enc.cae.use_autoencoder
encoded_data = enc.encode(test_data)
assert len(test_data) == len(encoded_data)
with self.assertRaises(ValueError):
decoded_data = enc.decode(encoded_data)
def run_full_test(USE_CUDA, CACHE_ENCODED_DATA, SELFAWARE, PLINEAR):
'''
Run full test example with home_rentals dataset
'''
lightwood.config.config.CONFIG.USE_CUDA = USE_CUDA
lightwood.config.config.CONFIG.PLINEAR = PLINEAR
config = {'input_features': [
{'name': 'number_of_bathrooms', 'type': 'numeric'}, {'name': 'sqft', 'type': 'numeric'},
{'name': 'days_on_market', 'type': 'numeric'},
{'name': 'neighborhood', 'type': 'categorical','dropout':0.4}],
'output_features': [{'name': 'number_of_rooms', 'type': 'categorical',
'weights':{
'0': 0.8,
'1': 0.6,
'2': 0.5,
'3': 0.7,
'4': 1,
}
},{'name': 'rental_price', 'type': 'numeric'},{'name': 'location', 'type': 'categorical'}],
'data_source': {'cache_transformed_data':CACHE_ENCODED_DATA},
def run_full_test(USE_CUDA, CACHE_ENCODED_DATA, SELFAWARE, PLINEAR):
'''
Run full test example with home_rentals dataset
'''
lightwood.config.config.CONFIG.USE_CUDA = USE_CUDA
lightwood.config.config.CONFIG.PLINEAR = PLINEAR
config = {'input_features': [
{'name': 'number_of_bathrooms', 'type': 'numeric'}, {'name': 'sqft', 'type': 'numeric'},
{'name': 'days_on_market', 'type': 'numeric'},
{'name': 'neighborhood', 'type': 'categorical','dropout':0.4}],
'output_features': [{'name': 'number_of_rooms', 'type': 'categorical',
'weights':{
'0': 0.8,
'1': 0.6,
'2': 0.5,
'3': 0.7,
'4': 1,
}
},{'name': 'rental_price', 'type': 'numeric'},{'name': 'location', 'type': 'categorical'}],
'data_source': {'cache_transformed_data':CACHE_ENCODED_DATA},
'mixer':{'class': lightwood.BUILTIN_MIXERS.NnMixer, 'selfaware': SELFAWARE}}
def iter_function(epoch, training_error, test_error, test_error_gradient, test_accuracy):
print(f'Epoch: {epoch}, Train Error: {training_error}, Test Error: {test_error}, Test Error Gradient: {test_error_gradient}, Test Accuracy: {test_accuracy}')
test_cases = [gen_multiply(),gen_correlate(),gen_categorical()]
log_map = {}
for i, data in enumerate(test_cases):
df_train, df_test, dropout_arr, out_col, name = data
pmap = {}
accmap = {}
pmap['normal'] = lightwood.Predictor(output=[out_col])
pmap['normal'].learn(from_data=df_train, callback_on_iter=iter_function, eval_every_x_epochs=100)
accmap['normal'] = pmap['normal'].calculate_accuracy(from_data=df_test)[out_col]['value']
for cols in dropout_arr:
mk = 'missing_' + '_'.join(cols)
pmap[mk] = lightwood.Predictor(output=[out_col])
pmap[mk].learn(from_data=df_train.drop(columns=cols), callback_on_iter=iter_function, eval_every_x_epochs=100)
accmap[mk + '_unfit'] = pmap['normal'].calculate_accuracy(from_data=df_test.drop(columns=cols))[out_col]['value']
accmap[mk + '_fit'] = pmap[mk].calculate_accuracy(from_data=df_test.drop(columns=cols))[out_col]['value']
text = f'\n---------\nTest case {name}\n---------\nNormal accuracy of: ' + str(accmap['normal'])
for cols in dropout_arr:
mk = 'missing_' + '_'.join(cols)
text += f'\nSpecially-trained trained accuracy when {cols} missing: ' + str(accmap[mk + '_fit'])
text += f'\nNormally-trained trained accuracy when {cols} missing: ' + str(accmap[mk + '_unfit'])
def iter_function(epoch, error, test_error, test_error_gradient, test_accuracy):
print(
'epoch: {iter}, error: {error}, test_error: {test_error}, test_error_gradient: {test_error_gradient}, test_accuracy: {test_accuracy}'.format(
iter=epoch, error=error, test_error=test_error, test_error_gradient=test_error_gradient,
accuracy=predictor.train_accuracy, test_accuracy=test_accuracy))
predictor = Predictor(config)
# stop_training_after_seconds given in order to not get timeouts in travis
predictor.learn(from_data=df, callback_on_iter=iter_function, eval_every_x_epochs=4, stop_training_after_seconds=80)
df = df.drop([x['name'] for x in config['output_features']], axis=1)
predictor.predict(when_data=df)
predictor.save('test.pkl')
predictor = Predictor(load_from_path='test.pkl')
for j in range(100):
pred = predictor.predict(when={'sqft': round(j * 10)})['number_of_rooms']['predictions'][0]
assert(isinstance(pred, str) or isinstance(pred, int))
def test_encoded_cache(self):
df, config = self.df, self.config
ds = DataSource(df, config)
assert not ds.disable_cache
ds.prepare_encoders()
for column in ['x1', 'x2', 'y']:
assert not column in ds.encoded_cache
encoded_column = ds.get_encoded_column_data(column)
assert (ds.encoded_cache[column] == encoded_column).all()
def test_padding(self):
series = [['1', '2', '3 '], ['2', '3'], ['3', '4', '5', '6'], [' 4', '5', '6']]
target = [[1.0, 2.0, 3.0, 4.0, 0.0], [2.0, 3.0, 4.0, 5.0, 0.0], [3.0, 0.0, 5.0, 6.0, 0.0]]
result = tensor_from_series(series, get_devices()[0], n_dims=5, pad_value=0.0, max_len=3).tolist()[0]
self.assertEqual(result, target)