How to use the lightwood.Predictor function in lightwood

To help you get started, we’ve selected a few lightwood examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mindsdb / lightwood / tests / development / dropout.py View on Github external
def iter_function(epoch, training_error, test_error, test_error_gradient, test_accuracy):
    print(f'Epoch: {epoch}, Train Error: {training_error}, Test Error: {test_error}, Test Error Gradient: {test_error_gradient}, Test Accuracy: {test_accuracy}')


test_cases = [gen_multiply(),gen_correlate(),gen_categorical()]

log_map = {}
for i, data in enumerate(test_cases):
    df_train, df_test, dropout_arr, out_col, name = data

    pmap = {}
    accmap = {}

    pmap['normal'] = lightwood.Predictor(output=[out_col])
    pmap['normal'].learn(from_data=df_train, callback_on_iter=iter_function, eval_every_x_epochs=100)
    accmap['normal'] = pmap['normal'].calculate_accuracy(from_data=df_test)[out_col]['value']

    for cols in dropout_arr:
        mk = 'missing_' + '_'.join(cols)
        pmap[mk] = lightwood.Predictor(output=[out_col])
        pmap[mk].learn(from_data=df_train.drop(columns=cols), callback_on_iter=iter_function, eval_every_x_epochs=100)
        accmap[mk + '_unfit'] = pmap['normal'].calculate_accuracy(from_data=df_test.drop(columns=cols))[out_col]['value']
        accmap[mk + '_fit'] = pmap[mk].calculate_accuracy(from_data=df_test.drop(columns=cols))[out_col]['value']

    text = f'\n---------\nTest case {name}\n---------\nNormal accuracy of: ' + str(accmap['normal'])
    for cols in dropout_arr:
        mk = 'missing_' + '_'.join(cols)
        text += f'\nSpecially-trained trained accuracy when {cols} missing: ' + str(accmap[mk + '_fit'])
        text += f'\nNormally-trained trained accuracy when {cols} missing: ' + str(accmap[mk + '_unfit'])
github mindsdb / lightwood / tests / ci_tests / ci_tests.py View on Github external
def iter_function(epoch, error, test_error, test_error_gradient, test_accuracy):
        print(
            'epoch: {iter}, error: {error}, test_error: {test_error}, test_error_gradient: {test_error_gradient}, test_accuracy: {test_accuracy}'.format(
                iter=epoch, error=error, test_error=test_error, test_error_gradient=test_error_gradient,
                accuracy=predictor.train_accuracy, test_accuracy=test_accuracy))

    predictor = Predictor(config)
    # stop_training_after_seconds given in order to not get timeouts in travis
    predictor.learn(from_data=df, callback_on_iter=iter_function, eval_every_x_epochs=4, stop_training_after_seconds=80)

    df = df.drop([x['name'] for x in config['output_features']], axis=1)
    predictor.predict(when_data=df)

    predictor.save('test.pkl')
    predictor = Predictor(load_from_path='test.pkl')

    for j in range(100):
        pred = predictor.predict(when={'sqft': round(j * 10)})['number_of_rooms']['predictions'][0]
        assert(isinstance(pred, str) or isinstance(pred, int))
github mindsdb / lightwood / docs / examples / time_series.py View on Github external
config = {'input_features': [{'name': 'ts', 'type': COLUMN_DATA_TYPES.TIME_SERIES }],
 'output_features': [{'name': 'next', 'type': 'numeric'}]}



def iter_function(epoch, error, test_error, test_error_gradient):
    print(
        'epoch: {iter}, error: {error}, test_error: {test_error}, test_error_gradient: {test_error_gradient}, accuracy: {accuracy}'.format(
            iter=epoch, error=error, test_error=test_error, test_error_gradient=test_error_gradient,
            accuracy=predictor.train_accuracy))


data = pandas.DataFrame(ts_data, columns=['time', 'ts', 'next'])

predictor = Predictor(config)


predictor.learn(from_data=data, callback_on_iter=iter_function, eval_every_x_epochs=10)


ret = predictor.predict(when={'ts':" ".join([str(math.sin(i/max)) for i in range(10+1, 10+ts_len)])})
print(" ".join([str(math.sin(i/max)) for i in range(10+1, 10+ts_len+1)]))
print(ret)
github mindsdb / lightwood / docs / examples / learn_to_classify.py View on Github external
data['y'] = [Counter([data[f'x_{i}'][n] for i in range(nr_inputs)]).most_common(1)[0][0] for n in range(nr_ele)]

data_train = pd.DataFrame(data_train)
data_test = pd.DataFrame(data_test)

def iter_function(epoch, training_error, test_error, test_error_gradient, test_accuracy):
    print(f'Epoch: {epoch}, Train Error: {training_error}, Test Error: {test_error}, Test Error Gradient: {test_error_gradient}, Test Accuracy: {test_accuracy}')

if train:
    predictor = lightwood.Predictor(output=['y'])
    predictor.learn(from_data=data_train, callback_on_iter=iter_function, eval_every_x_epochs=200)
    predictor.save('/tmp/ltcrl.pkl')


predictor = lightwood.Predictor(load_from_path='/tmp/ltcrl.pkl')
print('Train accuracy: ', predictor.train_accuracy['y']['value'])
print('Test accuracy: ', predictor.calculate_accuracy(from_data=data_test)['y']['value'])

print(f'Accuracy for all columns present: ', predictor.calculate_accuracy(from_data=data_test)['y']['value'])

predictions = predictor.calculate_accuracy(from_data=data_test)
print(f'Confidence mean for all columns present ', np.mean(predictions['y']['selfaware_confidences']))

for i_drop in range(nr_inputs):
    print(f'Accuracy for x_{i_drop} missing: ', predictor.calculate_accuracy(from_data=data_test.drop(columns=[f'x_{i_drop}']))['y']['value'])

    predictions = predictor.calculate_accuracy(from_data=data_test.drop(columns=[f'x_{i_drop}']))
    print(f'Confidence mean for x_{i_drop} missing: ', np.mean(predictions['y']['selfaware_confidences']))
github mindsdb / lightwood / docs / examples / learn_to_correlate.py View on Github external
for data, nr_ele in [(data_train,n), (data_test,m)]:
    for i in range(1,5):
        data[f'x_{i}'] = [random.random()*50 + 25  for _ in range(nr_ele)]

    data['y'] = [data['x_1'][i] * 0.9 + data['x_2'][i] * 0.09 + data['x_3'][i] * 0.009 + data['x_4'][i] * 0.0009 for i in range(nr_ele)]

data_train = pd.DataFrame(data_train)
data_test = pd.DataFrame(data_test)



def iter_function(epoch, training_error, test_error, test_error_gradient, test_accuracy):
    print(f'Epoch: {epoch}, Train Error: {training_error}, Test Error: {test_error}, Test Error Gradient: {test_error_gradient}, Test Accuracy: {test_accuracy}')

if train:
    predictor = lightwood.Predictor(output=['y'])
    predictor.learn(from_data=data_train, callback_on_iter=iter_function, eval_every_x_epochs=200)
    predictor.save('/tmp/ltcrl.pkl')

predictor = lightwood.Predictor(load_from_path='/tmp/ltcrl.pkl')
print('Train accuracy: ', predictor.train_accuracy['y']['value'])
print('Test accuracy: ', predictor.calculate_accuracy(from_data=data_test)['y']['value'])

predictions = predictor.predict(when_data=data_test)
print(f'Confidence mean for all columns present ', np.mean(predictions['y']['selfaware_confidences']))

for i_drop in range(1,5):
    predictions = predictor.predict(when_data=data_test.drop(columns=[f'x_{i_drop}']))
    print(f'Accuracy for x_{i_drop} missing: ', predictor.calculate_accuracy(from_data=data_test.drop(columns=[f'x_{i_drop}']))['y']['value'])
    print(f'Confidence mean for x_{i_drop} missing: ', np.mean(predictions['y']['selfaware_confidences']))
github mindsdb / lightwood / docs / examples / classification.py View on Github external
{'name': 'PAY_5', 'type': 'numeric'}, {'name': 'PAY_6', 'type': 'numeric'},
        {'name': 'BILL_AMT1', 'type': 'numeric'}, {'name': 'BILL_AMT2', 'type': 'numeric'},
        {'name': 'BILL_AMT3', 'type': 'numeric'}, {'name': 'BILL_AMT4', 'type': 'numeric'},
        {'name': 'BILL_AMT5', 'type': 'numeric'}, {'name': 'BILL_AMT6', 'type': 'numeric'},
        {'name': 'PAY_AMT1', 'type': 'numeric'}, {'name': 'PAY_AMT2', 'type': 'numeric'},
        {'name': 'PAY_AMT3', 'type': 'numeric'}, {'name': 'PAY_AMT4', 'type': 'numeric'},
        {'name': 'PAY_AMT5', 'type': 'numeric'}, {'name': 'PAY_AMT6', 'type': 'numeric'}],
        'output_features': [{'name': 'default.payment.next.month', 'type': 'categorical', 'weights': {'0': 0.3, '1': 1}}],
        'mixer': {'class': lightwood.BUILTIN_MIXERS.NnMixer}}

    # Callback to log various training stats (currently the only hook into the training process)
    def train_callback(epoch, error, test_error, test_error_gradient, test_accuracy):
        print(f'We reached epoch {epoch} with error: {error}, test_error: {test_error}, test_error_gradient: {test_error_gradient}, test_accuracy: {test_accuracy}')

    # The actual training process
    predictor = lightwood.Predictor(config)
    # Note: If `stop_training_after_seconds` is not set, training will stop automatically once we determine the model is overfitting (we separate a testing and a training dataset internally from the dataframe given and only train on the training one, using the testing one to determine overfitting, pick the best model and evaluate model accuracy)
    predictor.learn(from_data=df, callback_on_iter=train_callback, eval_every_x_epochs=5, stop_training_after_seconds=100)

    # Save the lightwood model
    predictor.save('lightwood_model.dill')
github mindsdb / lightwood / docs / examples / learn_to_correlate.py View on Github external
data['y'] = [data['x_1'][i] * 0.9 + data['x_2'][i] * 0.09 + data['x_3'][i] * 0.009 + data['x_4'][i] * 0.0009 for i in range(nr_ele)]

data_train = pd.DataFrame(data_train)
data_test = pd.DataFrame(data_test)



def iter_function(epoch, training_error, test_error, test_error_gradient, test_accuracy):
    print(f'Epoch: {epoch}, Train Error: {training_error}, Test Error: {test_error}, Test Error Gradient: {test_error_gradient}, Test Accuracy: {test_accuracy}')

if train:
    predictor = lightwood.Predictor(output=['y'])
    predictor.learn(from_data=data_train, callback_on_iter=iter_function, eval_every_x_epochs=200)
    predictor.save('/tmp/ltcrl.pkl')

predictor = lightwood.Predictor(load_from_path='/tmp/ltcrl.pkl')
print('Train accuracy: ', predictor.train_accuracy['y']['value'])
print('Test accuracy: ', predictor.calculate_accuracy(from_data=data_test)['y']['value'])

predictions = predictor.predict(when_data=data_test)
print(f'Confidence mean for all columns present ', np.mean(predictions['y']['selfaware_confidences']))

for i_drop in range(1,5):
    predictions = predictor.predict(when_data=data_test.drop(columns=[f'x_{i_drop}']))
    print(f'Accuracy for x_{i_drop} missing: ', predictor.calculate_accuracy(from_data=data_test.drop(columns=[f'x_{i_drop}']))['y']['value'])
    print(f'Confidence mean for x_{i_drop} missing: ', np.mean(predictions['y']['selfaware_confidences']))
github mindsdb / lightwood / docs / examples / learn_to_multiply.py View on Github external
for i in range(n):
        if data_train['y'][i] == 0:
            data_train['y'][i] = 1
if op == '/':
    for i in range(m):
        if data_test['y'][i] == 0:
            data_test['y'][i] = 1

# target variable to be the multiplication of the two
data_train['z'] = eval(f"""[data_train['x'][i] {op} data_train['y'][i] for i in range(n)]""")
data_test['z'] = eval(f"""[data_test['x'][i] {op} data_test['y'][i] for i in range(m)]""")

df_train = pandas.DataFrame(data_train)
df_test = pandas.DataFrame(data_test)

predictor = Predictor(output=['z'])

def iter_function(epoch, training_error, test_error, test_error_gradient, test_accuracy):
    print(f'Epoch: {epoch}, Train Error: {training_error}, Test Error: {test_error}, Test Error Gradient: {test_error_gradient}, Test Accuracy: {test_accuracy}')

predictor.learn(from_data=df_train, callback_on_iter=iter_function, eval_every_x_epochs=200)
predictor.save('ok.pkl')

predictor = Predictor(load_from_path='ok.pkl')
print('Train accuracy: ', predictor.train_accuracy)
print('Test accuracy: ', predictor.calculate_accuracy(from_data=df_test))

predictions = predictor.predict(when_data=df_test)
print('Confidence mean for both x and y present: ', np.mean(predictions['z']['selfaware_confidences']))
print(list(df_test['z'])[30:60])
print(predictions['z']['predictions'][30:60])