How to use the catboost.CatBoostRegressor function in catboost

To help you get started, we’ve selected a few catboost examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github zyfra / ebonite / tests / ext / catboost / test_model.py View on Github external
def catboost_regressor(pandas_data, catboost_params):
    return CatBoostRegressor(**catboost_params).fit(pandas_data, [1, 0])
github slundberg / shap / tests / explainers / test_tree.py View on Github external
def test_catboost():
    try:
        import catboost
    except:
        print("Skipping test_catboost!")
        return
    import shap

    # train catboost model
    X, y = shap.datasets.boston()
    X["RAD"] = X["RAD"].astype(np.int)
    model = catboost.CatBoostRegressor(iterations=300, learning_rate=0.1, random_seed=123)
    p = catboost.Pool(X, y, cat_features=["RAD"])
    model.fit(p, verbose=False, plot=False)

    # explain the model's predictions using SHAP values
    ex = shap.TreeExplainer(model)
    shap_values = ex.shap_values(p)

    predicted = model.predict(X)

    assert np.abs(shap_values.sum(1) + ex.expected_value - predicted).max() < 1e-6, \
        "SHAP values don't sum to model output!"
github Anfany / Machine-Learning-for-Beginner-by-Python3 / Blending / Blending_Regression_pm25.py View on Github external
def CatBoost_First(self, data, catsign, depth=8, iterations=80000):
        model = cb.CatBoostRegressor(iterations=iterations, depth=depth, learning_rate=0.8, loss_function='RMSE')
        model.fit(data['train'][:, :-1], data['train'][:, -1], cat_features=catsign)
        # 注意存储验证数据集结果和预测数据集结果的不同
        # 训练数据集的预测结果
        xul = model.predict(data['train'][:, :-1])
        # 验证的预测结果
        yanre = model.predict(data['test'][:, :-1])
        # 预测的预测结果
        prer = model.predict(data['predict'][:, :-1])
        # 储存
        self.yanzhneg_pr.append(yanre)
        self.predi.append(prer)
        # 分别计算训练、验证、预测的误差
        # 每计算一折后,要计算训练、验证、预测数据的误差
        xx = self.RMSE(xul, data['train'][:, -1])
        yy = self.RMSE(yanre, data['test'][:, -1])
        pp = self.RMSE(prer, data['predict'][:, -1])
github ClimbsRocks / auto_ml / auto_ml / utils_models.py View on Github external
if keras_imported:
        if isinstance(model, KerasRegressor):
            return 'DeepLearningRegressor'
        if isinstance(model, KerasClassifier):
            return 'DeepLearningClassifier'

    if lgb_installed:
        if isinstance(model, LGBMClassifier):
            return 'LGBMClassifier'
        if isinstance(model, LGBMRegressor):
            return 'LGBMRegressor'

    if catboost_installed:
        if isinstance(model, CatBoostClassifier):
            return 'CatBoostClassifier'
        if isinstance(model, CatBoostRegressor):
            return 'CatBoostRegressor'
github h2oai / driverlessai-recipes / models / algorithms / catboost.py View on Github external
else:
            valid_X_shape = None

        X, eval_set = self.process_cats(X, eval_set, orig_cols)

        # modify self.params_base['gpu_id'] based upon actually-available GPU based upon training and valid shapes
        self.acquire_gpus_function(train_shape=X.shape, valid_shape=valid_X_shape)

        params = copy.deepcopy(self.params)  # keep separate, since then can be pulled form lightgbm params
        params = self.transcribe_and_filter_params(params, eval_set is not None)

        if logger is not None:
            loggerdata(logger, "CatBoost parameters: params_base : %s params: %s catboost_params: %s" % (str(self.params_base), str(self.params), str(params)))

        if self.num_classes == 1:
            model = CatBoostRegressor(**params)
        else:
            model = CatBoostClassifier(**params)
        # Hit sometimes: Exception: catboost/libs/data_new/quantization.cpp:779: All features are either constant or ignored.
        if self.num_classes == 1:
            # assume not mae, which would use median
            # baseline = [np.mean(y)] * len(y)
            baseline = None
        else:
            baseline = None

        kargs=dict(X=X, y=y,
                  sample_weight=sample_weight,
                  baseline=baseline,
                  eval_set=eval_set)
        pickle_path = None
        if config.debug_daimodel_level >= 2:
github h2oai / driverlessai-recipes / models / algorithms / catboost.py View on Github external
def predict(self, X, **kwargs):
        model, features, importances, iterations = self.get_model_properties()
        if not self._save_by_pickle:
            from catboost import CatBoostClassifier, CatBoostRegressor, EFstrType
            if self.num_classes >= 2:
                from_file = CatBoostClassifier()
            else:
                from_file = CatBoostRegressor()
            with open(self.model_path, mode='wb') as f:
                f.write(model)
            model = from_file.load_model(self.model_path)

        # FIXME: Do equivalent throttling of predict size like def _predict_internal(self, X, **kwargs), wrap-up.
        if isinstance(X, dt.Frame) and len(self.params['cat_features']) == 0:
            # dt -> lightgbm internally using buffer leaks, so convert here
            # assume predict is after pipeline collection or in subprocess so needs no protection
            X = X.to_numpy()  # don't assign back to X so don't damage during predict
            X = np.ascontiguousarray(X, dtype=np.float32 if config.data_precision == "float32" else np.float64)

        X, eval_set = self.process_cats(X, None, self.feature_names_fitted)

        pred_contribs = kwargs.get('pred_contribs', None)
        output_margin = kwargs.get('output_margin', None)
        fast_approx = kwargs.pop('fast_approx', False)
github HunterMcGushion / hyperparameter_hunter / examples / catboost_examples / regression.py View on Github external
model_extra_params=dict(
        fit=dict(verbose=50, eval_set=[(env.validation_input, env.validation_target)])
    ),
)

# Notice above that CatBoost printed scores for our `eval_set` every 50 iterations just like we said
# ... in `model_extra_params["fit"]`; although, it made our results rather difficult to read, so
# ... we'll switch back to `verbose=False` during optimization.

# And/or...
#################### 2. Hyperparameter Optimization ####################
# Notice below that `optimizer` still recognizes the results of `experiment` as valid learning material even
# ... though their `verbose` values differ. This is because it knows that `verbose` has no effect on actual results.
optimizer = DummyOptPro(iterations=10, random_state=777)
optimizer.forge_experiment(
    model_initializer=CatBoostRegressor,
    model_init_params=dict(
        iterations=100,
        learning_rate=Real(0.001, 0.2),
        depth=Integer(3, 7),
        bootstrap_type=Categorical(["Bayesian", "Bernoulli"]),
        save_snapshot=False,
        allow_writing_files=False,
    ),
    model_extra_params=dict(
        fit=dict(verbose=False, eval_set=[(env.validation_input, env.validation_target)])
    ),
)
optimizer.go()
github HunterMcGushion / hyperparameter_hunter / examples / catboost_examples / regression.py View on Github external
#################### 1. Perform Experiments ####################
# *Note: If this is your first HyperparameterHunter example, the CatBoost classification example may be a better starting point.*

# In this Experiment, we're also going to use `model_extra_params` to provide arguments to
# ... `CatBoostRegressor`'s `fit` method, just like we would if we weren't using HyperparameterHunter.

# We'll be using the `verbose` argument to print evaluations of our `CatBoostRegressor` every 50 iterations,
# ... and we'll also be using the dataset sentinels offered by `Environment`. You can read more about
# ... the exciting thing you can do with the `Environment` sentinels in the documentation and in the
# ... example dedicated to them. For now, though, we'll be using them to provide each fold's
# ... `env.validation_input`, and `env.validation_target` to `CatBoostRegressor.fit` via its `eval_set` argument.

# You could also easily add `CatBoostRegressor.fit`'s `early_stopping_rounds` argument to `model_extra_params["fit"]`
# ... to use early stopping, but doing so here with only `iterations=100` doesn't make much sense.
experiment = CVExperiment(
    model_initializer=CatBoostRegressor,
    model_init_params=dict(
        iterations=100,
        learning_rate=0.05,
        depth=5,
        bootstrap_type="Bayesian",
        save_snapshot=False,
        allow_writing_files=False,
    ),
    model_extra_params=dict(
        fit=dict(verbose=50, eval_set=[(env.validation_input, env.validation_target)])
    ),
)

# Notice above that CatBoost printed scores for our `eval_set` every 50 iterations just like we said
# ... in `model_extra_params["fit"]`; although, it made our results rather difficult to read, so
# ... we'll switch back to `verbose=False` during optimization.
github zyfra / ebonite / src / ebonite / ext / catboost / model.py View on Github external
def must_process(self, obj) -> bool:
        """
        Returns `True` if object is `catboost.CatBoostClassifier` or `catboost.CatBoostRegressor`

        :param obj: obj to check
        :return: `True` or `False`
        """
        return isinstance(obj, (CatBoostClassifier,  CatBoostRegressor))