How to use the lightwood.data_schemas.predictor_config.predictor_config_schema.validate function in lightwood

To help you get started, we’ve selected a few lightwood examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mindsdb / lightwood / tests / unit_tests / api / test_datasource.py View on Github external
{
                    'name': 'x2',
                    'type': 'numeric',

                }
            ],

            'output_features': [
                {
                    'name': 'y',
                    'type': 'categorical',

                }
            ]
        }
        config = predictor_config_schema.validate(config)
        n_points = 100
        data = {'x1': [i for i in range(n_points)],
                'x2': [random.randint(i, i + 20) for i in range(n_points)]}
        nums = [data['x1'][i] * data['x2'][i] for i in range(n_points)]

        data['y'] = ['low' if i < 50 else 'high' for i in nums]

        df = pd.DataFrame(data)

        self.config = config
        self.df = df
github mindsdb / lightwood / lightwood / mixers / nn / nn.py View on Github external
'type': 'numeric'
            }
        ],

        'output_features': [
            {
                'name': 'z',
                'type': 'numeric'
            },
            {
                'name': 'z`',
                'type': 'categorical'
            }
        ]
    }
    config = predictor_config_schema.validate(config)

    data = {'x': [i for i in range(10)], 'y': [random.randint(i, i + 20) for i in range(10)]}
    nums = [data['x'][i] * data['y'][i] for i in range(10)]

    data['z'] = [i + 0.5 for i in range(10)]
    data['z`'] = ['low' if i < 50 else 'high' for i in nums]

    data_frame = pandas.DataFrame(data)
    ds = DataSource(data_frame, config)
    ds.prepare_encoders()

    mixer = NnMixer({}, config)
    mixer.fit(ds,ds, stop_training_after_seconds=50)

    predict_input_ds = DataSource(data_frame[['x', 'y']], config)
    predict_input_ds.prepare_encoders()
github mindsdb / lightwood / lightwood / api / predictor.py View on Github external
logging.info(f'Boosting mixer can\'t be loaded due to error: {e} !')
            print((f'Boosting mixer can\'t be loaded due to error: {e} !'))

        if load_from_path is not None:
            pickle_in = open(load_from_path, "rb")
            self_dict = dill.load(pickle_in)
            pickle_in.close()
            self.__dict__ = self_dict
            self.convert_to_device()
            return

        if output is None and config is None:
            raise ValueError('You must give one argument to the Predictor constructor')
        try:
            if config is not None and output is None:
                self.config = predictor_config_schema.validate(config)
        except:
            error = traceback.format_exc(1)
            raise ValueError('[BAD DEFINITION] argument has errors: {err}'.format(err=error))

        # this is if we need to automatically generate a configuration variable
        self._generate_config = True if output is not None or self.config is None else False

        self._output_columns = output
        self._input_columns = None
        self.train_accuracy = None

        self._mixer = None
        self._helper_mixers = None
github mindsdb / lightwood / lightwood / api / predictor.py View on Github external
# if the number of uniques is elss than 100 or less,
                # than 10% of the total number of rows then keep it as categorical
                unique = from_data[col_name].nunique()
                if unique < 100 or unique < len(from_data[col_name]) / 10:
                    return COLUMN_DATA_TYPES.CATEGORICAL
                # else assume its text
                return COLUMN_DATA_TYPES.TEXT

        # generate the configuration and set the order for the input and output columns
        if self._generate_config is True:
            self._input_columns = [col for col in from_data if col not in self._output_columns]
            self.config = {
                'input_features': [{'name': col, 'type': type_map(col)} for col in self._input_columns],
                'output_features': [{'name': col, 'type': type_map(col)} for col in self._output_columns]
            }
            self.config = predictor_config_schema.validate(self.config)
            logging.info('Automatically generated a configuration')
            logging.info(self.config)
        else:
            self._output_columns = [col['name'] for col in self.config['output_features']]
            self._input_columns = [col['name'] for col in self.config['input_features']]

        if stop_training_after_seconds is None:
            stop_training_after_seconds = round(from_data.shape[0] * from_data.shape[1] / 5)

        if stop_model_building_after_seconds is None:
            stop_model_building_after_seconds = stop_training_after_seconds * 3

        from_data_ds = DataSource(from_data, self.config)

        if test_data is not None:
            test_data_ds = DataSource(test_data, self.config)