How to use lightwood - 10 common examples

To help you get started, we’ve selected a few lightwood examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mindsdb / lightwood / tests / unit_tests / test_short_text_encoder.py View on Github external
def _test_mean(self, onehot):
        vocab_size = 99 if onehot else 800

        priming_data = generate_sentences(2, 6, vocab_size)
        test_data = random.sample(priming_data, len(priming_data) // 5)

        enc = ShortTextEncoder(combine='mean')
        enc.prepare_encoder(priming_data)

        if onehot:
            assert not enc.cae.use_autoencoder
        else:
            assert enc.cae.use_autoencoder

        encoded_data = enc.encode(test_data)

        assert len(test_data) == len(encoded_data)

        with self.assertRaises(ValueError):
            decoded_data = enc.decode(encoded_data)
github mindsdb / lightwood / tests / unit_tests / test_short_text_encoder.py View on Github external
def _test_concat(self, onehot):
        vocab_size = 99 if onehot else 800
        
        priming_data = generate_sentences(2, 6, vocab_size)
        test_data = random.sample(priming_data, len(priming_data) // 5)

        enc = ShortTextEncoder(combine='concat')
        enc.prepare_encoder(priming_data)

        if onehot:
            assert not enc.cae.use_autoencoder
        else:
            assert enc.cae.use_autoencoder

        encoded_data = enc.encode(test_data)
        decoded_data = enc.decode(encoded_data)
        
        assert len(test_data) == len(encoded_data) == len(decoded_data)

        for x_sent, y_sent in zip(
            test_data,
            [' '.join(x) for x in decoded_data]
        ):
github mindsdb / lightwood / tests / unit_tests / encoders / text / test_short_text_encoder.py View on Github external
def _test_concat(self, onehot):
        vocab_size = 99 if onehot else 800
        
        priming_data = generate_sentences(2, 6, vocab_size)
        test_data = random.sample(priming_data, len(priming_data) // 5)

        enc = ShortTextEncoder(combine='concat')
        enc.prepare_encoder(priming_data)

        if onehot:
            assert not enc.cae.use_autoencoder
        else:
            assert enc.cae.use_autoencoder

        encoded_data = enc.encode(test_data)
        decoded_data = enc.decode(encoded_data)
        
        assert len(test_data) == len(encoded_data) == len(decoded_data)

        for x_sent, y_sent in zip(
            test_data,
            [' '.join(x) for x in decoded_data]
        ):
github mindsdb / lightwood / tests / unit_tests / encoders / text / test_short_text_encoder.py View on Github external
def _test_mean(self, onehot):
        vocab_size = 99 if onehot else 800

        priming_data = generate_sentences(2, 6, vocab_size)
        test_data = random.sample(priming_data, len(priming_data) // 5)

        enc = ShortTextEncoder(combine='mean')
        enc.prepare_encoder(priming_data)

        if onehot:
            assert not enc.cae.use_autoencoder
        else:
            assert enc.cae.use_autoencoder

        encoded_data = enc.encode(test_data)

        assert len(test_data) == len(encoded_data)

        with self.assertRaises(ValueError):
            decoded_data = enc.decode(encoded_data)
github mindsdb / lightwood / tests / ci_tests / ci_tests.py View on Github external
def run_full_test(USE_CUDA, CACHE_ENCODED_DATA, SELFAWARE, PLINEAR):
    '''
    Run full test example with home_rentals dataset
    '''
    lightwood.config.config.CONFIG.USE_CUDA = USE_CUDA
    lightwood.config.config.CONFIG.PLINEAR = PLINEAR

    config = {'input_features': [
                        {'name': 'number_of_bathrooms', 'type': 'numeric'}, {'name': 'sqft', 'type': 'numeric'},
                        {'name': 'days_on_market', 'type': 'numeric'},
                        {'name': 'neighborhood', 'type': 'categorical','dropout':0.4}],
     'output_features': [{'name': 'number_of_rooms', 'type': 'categorical',
                       'weights':{
                             '0': 0.8,
                             '1': 0.6,
                             '2': 0.5,
                             '3': 0.7,
                             '4': 1,
                       }
    },{'name': 'rental_price', 'type': 'numeric'},{'name': 'location', 'type': 'categorical'}],
    'data_source': {'cache_transformed_data':CACHE_ENCODED_DATA},
github mindsdb / lightwood / tests / ci_tests / ci_tests.py View on Github external
def run_full_test(USE_CUDA, CACHE_ENCODED_DATA, SELFAWARE, PLINEAR):
    '''
    Run full test example with home_rentals dataset
    '''
    lightwood.config.config.CONFIG.USE_CUDA = USE_CUDA
    lightwood.config.config.CONFIG.PLINEAR = PLINEAR

    config = {'input_features': [
                        {'name': 'number_of_bathrooms', 'type': 'numeric'}, {'name': 'sqft', 'type': 'numeric'},
                        {'name': 'days_on_market', 'type': 'numeric'},
                        {'name': 'neighborhood', 'type': 'categorical','dropout':0.4}],
     'output_features': [{'name': 'number_of_rooms', 'type': 'categorical',
                       'weights':{
                             '0': 0.8,
                             '1': 0.6,
                             '2': 0.5,
                             '3': 0.7,
                             '4': 1,
                       }
    },{'name': 'rental_price', 'type': 'numeric'},{'name': 'location', 'type': 'categorical'}],
    'data_source': {'cache_transformed_data':CACHE_ENCODED_DATA},
    'mixer':{'class': lightwood.BUILTIN_MIXERS.NnMixer, 'selfaware': SELFAWARE}}
github mindsdb / lightwood / tests / development / dropout.py View on Github external
def iter_function(epoch, training_error, test_error, test_error_gradient, test_accuracy):
    print(f'Epoch: {epoch}, Train Error: {training_error}, Test Error: {test_error}, Test Error Gradient: {test_error_gradient}, Test Accuracy: {test_accuracy}')


test_cases = [gen_multiply(),gen_correlate(),gen_categorical()]

log_map = {}
for i, data in enumerate(test_cases):
    df_train, df_test, dropout_arr, out_col, name = data

    pmap = {}
    accmap = {}

    pmap['normal'] = lightwood.Predictor(output=[out_col])
    pmap['normal'].learn(from_data=df_train, callback_on_iter=iter_function, eval_every_x_epochs=100)
    accmap['normal'] = pmap['normal'].calculate_accuracy(from_data=df_test)[out_col]['value']

    for cols in dropout_arr:
        mk = 'missing_' + '_'.join(cols)
        pmap[mk] = lightwood.Predictor(output=[out_col])
        pmap[mk].learn(from_data=df_train.drop(columns=cols), callback_on_iter=iter_function, eval_every_x_epochs=100)
        accmap[mk + '_unfit'] = pmap['normal'].calculate_accuracy(from_data=df_test.drop(columns=cols))[out_col]['value']
        accmap[mk + '_fit'] = pmap[mk].calculate_accuracy(from_data=df_test.drop(columns=cols))[out_col]['value']

    text = f'\n---------\nTest case {name}\n---------\nNormal accuracy of: ' + str(accmap['normal'])
    for cols in dropout_arr:
        mk = 'missing_' + '_'.join(cols)
        text += f'\nSpecially-trained trained accuracy when {cols} missing: ' + str(accmap[mk + '_fit'])
        text += f'\nNormally-trained trained accuracy when {cols} missing: ' + str(accmap[mk + '_unfit'])
github mindsdb / lightwood / tests / ci_tests / ci_tests.py View on Github external
def iter_function(epoch, error, test_error, test_error_gradient, test_accuracy):
        print(
            'epoch: {iter}, error: {error}, test_error: {test_error}, test_error_gradient: {test_error_gradient}, test_accuracy: {test_accuracy}'.format(
                iter=epoch, error=error, test_error=test_error, test_error_gradient=test_error_gradient,
                accuracy=predictor.train_accuracy, test_accuracy=test_accuracy))

    predictor = Predictor(config)
    # stop_training_after_seconds given in order to not get timeouts in travis
    predictor.learn(from_data=df, callback_on_iter=iter_function, eval_every_x_epochs=4, stop_training_after_seconds=80)

    df = df.drop([x['name'] for x in config['output_features']], axis=1)
    predictor.predict(when_data=df)

    predictor.save('test.pkl')
    predictor = Predictor(load_from_path='test.pkl')

    for j in range(100):
        pred = predictor.predict(when={'sqft': round(j * 10)})['number_of_rooms']['predictions'][0]
        assert(isinstance(pred, str) or isinstance(pred, int))
github mindsdb / lightwood / tests / unit_tests / api / test_datasource.py View on Github external
def test_encoded_cache(self):
        df, config = self.df, self.config

        ds = DataSource(df, config)
        assert not ds.disable_cache
        ds.prepare_encoders()

        for column in ['x1', 'x2', 'y']:
            assert not column in ds.encoded_cache
            encoded_column = ds.get_encoded_column_data(column)
            assert (ds.encoded_cache[column] == encoded_column).all()
github mindsdb / lightwood / tests / unit_tests / encoders / time_series / test_rnn.py View on Github external
def test_padding(self):
        series = [['1', '2', '3 '], ['2', '3'], ['3', '4', '5', '6'], [' 4', '5', '6']]
        target = [[1.0, 2.0, 3.0, 4.0, 0.0], [2.0, 3.0, 4.0, 5.0, 0.0], [3.0, 0.0, 5.0, 6.0, 0.0]]
        result = tensor_from_series(series, get_devices()[0], n_dims=5, pad_value=0.0, max_len=3).tolist()[0]
        self.assertEqual(result, target)