How to use the lightwood.constants.lightwood.COLUMN_DATA_TYPES.NUMERIC function in lightwood

To help you get started, we’ve selected a few lightwood examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mindsdb / lightwood / lightwood / mixers / boost / boost.py View on Github external
for target_col_name in self.targets:
            Y = train_ds.get_column_original_data(target_col_name)

            if self.targets[target_col_name]['type'] == COLUMN_DATA_TYPES.CATEGORICAL:
                weight_map = self.targets[target_col_name]['weights']
                if weight_map is None:
                    sample_weight = [1 for x in real]
                else:
                    sample_weight = []
                    for val in Y:
                        sample_weight.append(weight_map[val])

                self.targets[target_col_name]['model'] = GradientBoostingClassifier(n_estimators=600)
                self.targets[target_col_name]['model'].fit(X,Y,sample_weight=sample_weight)

            elif self.targets[target_col_name]['type'] == COLUMN_DATA_TYPES.NUMERIC:
                self.targets[target_col_name]['model'] = GradientBoostingRegressor(n_estimators=600)
                self.targets[target_col_name]['model'].fit(X,Y)
                if self.quantiles is not None:
                    self.targets[target_col_name]['quantile_models'] = {}
                    for i, quantile in enumerate(self.quantiles):
                        self.targets[target_col_name]['quantile_models'][i] = GradientBoostingRegressor(n_estimators=600, loss='quantile',alpha=quantile)
                        self.targets[target_col_name]['quantile_models'][i].fit(X,Y)

            else:
                self.targets[target_col_name]['model'] = None
github mindsdb / lightwood / lightwood / mixers / nn / nn.py View on Github external
def fit_data_source(self, ds):
        self.input_column_names = self.input_column_names \
            if self.input_column_names is not None else ds.get_feature_names('input_features')
        self.output_column_names = self.output_column_names \
            if self.output_column_names is not None else ds.get_feature_names('output_features')

        self.out_types = ds.out_types
        for n, out_type in enumerate(self.out_types):
            if out_type == COLUMN_DATA_TYPES.NUMERIC:
                ds.encoders[self.output_column_names[n]].extra_outputs = len(self.quantiles) - 1

        transformer_already_initialized = False
        try:
            if len(list(ds.transformer.feature_len_map.keys())) > 0:
                transformer_already_initialized = True
        except:
            pass

        if not transformer_already_initialized:
            ds.transformer = Transformer(self.input_column_names, self.output_column_names)

        self.encoders = ds.encoders
        self.transformer = ds.transformer
github mindsdb / lightwood / lightwood / api / predictor.py View on Github external
ds = from_data if isinstance(from_data, DataSource) else DataSource(from_data, self.config)
        predictions = self._mixer.predict(ds, include_extra_data=True)
        accuracies = {}

        for output_column in self._output_columns:

            real = list(map(str,ds.get_column_original_data(output_column)))
            predicted =  list(map(str,predictions[output_column]['predictions']))

            weight_map = None
            if 'weights' in ds.get_column_config(output_column):
                weight_map = ds.get_column_config(output_column)['weights']

            accuracy = self.apply_accuracy_function(ds.get_column_config(output_column)['type'], real, predicted,weight_map=weight_map)

            if ds.get_column_config(output_column)['type'] in (COLUMN_DATA_TYPES.NUMERIC):
                ds.encoders[output_column].decode_log = True
                predicted = ds.get_decoded_column_data(output_column, predictions[output_column]['encoded_predictions'])

                alternative_accuracy = self.apply_accuracy_function(ds.get_column_config(output_column)['type'], real, predicted,weight_map=weight_map)

                if alternative_accuracy['value'] > accuracy['value']:
                    accuracy = alternative_accuracy
                else:
                    ds.encoders[output_column].decode_log = False

            accuracies[output_column] = accuracy

        return accuracies
github mindsdb / lightwood / lightwood / api / predictor.py View on Github external
def type_map(col_name):
            col_pd_type = from_data[col_name].dtype
            col_pd_type = str(col_pd_type)

            if col_pd_type in ['int64', 'float64', 'timedelta']:
                return COLUMN_DATA_TYPES.NUMERIC
            elif col_pd_type in ['bool', 'category']:
                return COLUMN_DATA_TYPES.CATEGORICAL
            else:
                # if the number of uniques is elss than 100 or less,
                # than 10% of the total number of rows then keep it as categorical
                unique = from_data[col_name].nunique()
                if unique < 100 or unique < len(from_data[col_name]) / 10:
                    return COLUMN_DATA_TYPES.CATEGORICAL
                # else assume its text
                return COLUMN_DATA_TYPES.TEXT
github mindsdb / lightwood / lightwood / encoders / text / distilbert.py View on Github external
# else:
        priming_data.append(str(i) + ''.join(['n'] * i))
        # priming_data.append(str(i))
        primting_target.append(i)

    output_1_encoder = NumericEncoder(is_target=True)
    output_1_encoder.prepare_encoder(primting_target)

    encoded_data_1 = output_1_encoder.encode(primting_target)
    encoded_data_1 = encoded_data_1.tolist()

    enc = DistilBertEncoder()

    enc.prepare_encoder(priming_data,
                        training_data={'targets': [
                            {'output_type': COLUMN_DATA_TYPES.NUMERIC,'encoded_output': encoded_data_1},
                            {'output_type': COLUMN_DATA_TYPES.NUMERIC, 'encoded_output': encoded_data_1}
                        ]})

    encoded_predicted_target = enc.encode(test_data).tolist()

    predicted_targets_1 = output_1_encoder.decode(torch.tensor([x[:3] for x in encoded_predicted_target]))
    predicted_targets_2 = output_1_encoder.decode(torch.tensor([x[3:] for x in encoded_predicted_target]))

    for predicted_targets in [predicted_targets_1, predicted_targets_2]:
        real = list(test_target)
        pred = list(predicted_targets)

        # handle nan
        for i in range(len(pred)):
            try:
                float(pred[i])