How to use the lightwood.constants.lightwood.COLUMN_DATA_TYPES function in lightwood

To help you get started, we’ve selected a few lightwood examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mindsdb / lightwood / lightwood / api / predictor.py View on Github external
def apply_accuracy_function(col_type, real, predicted, weight_map=None):
        if col_type in (COLUMN_DATA_TYPES.CATEGORICAL, COLUMN_DATA_TYPES.MULTIPLE_CATEGORICAL):
            if weight_map is None:
                sample_weight = [1 for x in real]
            else:
                sample_weight = []
                for val in real:
                    sample_weight.append(weight_map[val])

            accuracy = {
                'function': 'accuracy_score',
                'value': accuracy_score(real, predicted, sample_weight=sample_weight)
            }
        else:
            real_fixed = []
            predicted_fixed = []
            for val in real:
                try:
github mindsdb / lightwood / lightwood / data_schemas / predictor_config.py View on Github external
    'type': And(str, Use(str.lower), lambda s: s in COLUMN_DATA_TYPES.get_attributes().values()),
    Optional('encoder_class'): object,
github mindsdb / lightwood / lightwood / mixers / nn / nn.py View on Github external
output_vector = outputs[i]
            transformed_output_vectors = when_data_source.transformer.revert(
                output_vector, feature_set='output_features')
            for feature in transformed_output_vectors:
                if feature not in output_trasnformed_vectors:
                    output_trasnformed_vectors[feature] = []
                output_trasnformed_vectors[feature] += [transformed_output_vectors[feature]]

        predictions = {}
        for k, output_column in enumerate(list(output_trasnformed_vectors.keys())):
            decoded_predictions = when_data_source.get_decoded_column_data(
                output_column,
                when_data_source.encoders[output_column]._pytorch_wrapper(output_trasnformed_vectors[output_column])
            )

            if self.out_types[k] in (COLUMN_DATA_TYPES.NUMERIC):
                predictions[output_column] = {'predictions': [x[0] for x in decoded_predictions]}

                if include_extra_data:
                    predictions[output_column]['every_confidence_range'] = [x[1:] for x in decoded_predictions]

            else:
                predictions[output_column] = {'predictions': decoded_predictions}

            if awareness_arr is not None:
                predictions[output_column]['selfaware_confidences'] = [1/abs(x[k]) if x[k] != 0 else 1/0.000001 for x in awareness_arr]

            if self.out_types[k] in (COLUMN_DATA_TYPES.NUMERIC):
                predictions[output_column]['confidence_range'] = []
                predictions[output_column]['quantile_confidences'] = []

                for i, pred in enumerate(decoded_predictions):
github mindsdb / lightwood / lightwood / encoders / text / distilbert.py View on Github external
test_data_loader=test_data_loader,
                                                       desired_error=self.desired_error,
                                                       max_time=self.max_training_time,
                                                       callback=self._train_callback,
                                                       eval_every_x_epochs=1,
                                                       max_unimproving_models=10,
                                                       custom_train_func=partial(
                                                           self.categorical_train_function,
                                                           test=False),
                                                       custom_test_func=partial(
                                                           self.categorical_train_function, test=True)
                                                       )

            self._model = best_model.to(self.device)

        elif all([x['output_type'] == COLUMN_DATA_TYPES.NUMERIC or x['output_type'] == COLUMN_DATA_TYPES.CATEGORICAL
                  for x in training_data['targets']]):

            self.desired_error = 0.01
            self._model_type = 'generic_target_predictor'
            self._model = self._embeddings_model_class.from_pretrained(self._pretrained_model_name).to(self.device)
            batch_size = 10

            self._head = DefaultNet(dynamic_parameters={}, shape=funnel(
                768, sum([len(x['encoded_output'][0]) for x in training_data['targets']]), depth=5), selfaware=False)

            no_decay = ['bias', 'LayerNorm.weight']
            optimizer_grouped_parameters = [
                {'params': [p for n, p in self._head.named_parameters() if not any(
                    nd in n for nd in no_decay)], 'weight_decay': 0.000001},
                {'params': [p for n, p in self._head.named_parameters() if any(nd in n for nd in no_decay)],
                 'weight_decay': 0.0}
github mindsdb / lightwood / lightwood / mixers / nn / nn.py View on Github external
when_data_source.encoders[output_column]._pytorch_wrapper(output_trasnformed_vectors[output_column])
            )

            if self.out_types[k] in (COLUMN_DATA_TYPES.NUMERIC):
                predictions[output_column] = {'predictions': [x[0] for x in decoded_predictions]}

                if include_extra_data:
                    predictions[output_column]['every_confidence_range'] = [x[1:] for x in decoded_predictions]

            else:
                predictions[output_column] = {'predictions': decoded_predictions}

            if awareness_arr is not None:
                predictions[output_column]['selfaware_confidences'] = [1/abs(x[k]) if x[k] != 0 else 1/0.000001 for x in awareness_arr]

            if self.out_types[k] in (COLUMN_DATA_TYPES.NUMERIC):
                predictions[output_column]['confidence_range'] = []
                predictions[output_column]['quantile_confidences'] = []

                for i, pred in enumerate(decoded_predictions):
                    if 'selfaware_confidences' in predictions[output_column]:
                        sc = predictions[output_column]['selfaware_confidences'][i]
                    else:
                        sc = pow(10,3)

                    qp = self.select_quantile(sc)
                    predictions[output_column]['confidence_range'].append([pred[qp[0]],pred[qp[1]]])
                    predictions[output_column]['quantile_confidences'].append(self.quantiles[qp[1]] - self.quantiles[qp[0]])

            if loss_confidence_arr[k] is not None:
                predictions[output_column]['loss_confidences'] = loss_confidence_arr[k]