How to use catboost - 10 common examples

To help you get started, we’ve selected a few catboost examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github zyfra / ebonite / tests / ext / catboost / test_model.py View on Github external
def catboost_regressor(pandas_data, catboost_params):
    return CatBoostRegressor(**catboost_params).fit(pandas_data, [1, 0])
github slundberg / shap / tests / explainers / test_tree.py View on Github external
def test_catboost():
    try:
        import catboost
    except:
        print("Skipping test_catboost!")
        return
    import shap

    # train catboost model
    X, y = shap.datasets.boston()
    X["RAD"] = X["RAD"].astype(np.int)
    model = catboost.CatBoostRegressor(iterations=300, learning_rate=0.1, random_seed=123)
    p = catboost.Pool(X, y, cat_features=["RAD"])
    model.fit(p, verbose=False, plot=False)

    # explain the model's predictions using SHAP values
    ex = shap.TreeExplainer(model)
    shap_values = ex.shap_values(p)

    predicted = model.predict(X)

    assert np.abs(shap_values.sum(1) + ex.expected_value - predicted).max() < 1e-6, \
        "SHAP values don't sum to model output!"
github slundberg / shap / tests / explainers / test_tree.py View on Github external
def test_catboost():
    try:
        import catboost
    except:
        print("Skipping test_catboost!")
        return
    import shap

    # train catboost model
    X, y = shap.datasets.boston()
    X["RAD"] = X["RAD"].astype(np.int)
    model = catboost.CatBoostRegressor(iterations=300, learning_rate=0.1, random_seed=123)
    p = catboost.Pool(X, y, cat_features=["RAD"])
    model.fit(p, verbose=False, plot=False)

    # explain the model's predictions using SHAP values
    ex = shap.TreeExplainer(model)
    shap_values = ex.shap_values(p)

    predicted = model.predict(X)

    assert np.abs(shap_values.sum(1) + ex.expected_value - predicted).max() < 1e-6, \
        "SHAP values don't sum to model output!"
github slundberg / shap / tests / explainers / test_tree.py View on Github external
def test_catboost():
    try:
        import catboost
    except:
        print("Skipping test_catboost!")
        return
    import shap

    # train catboost model
    X, y = shap.datasets.boston()
    X["RAD"] = X["RAD"].astype(np.int)
    model = catboost.CatBoostRegressor(iterations=300, learning_rate=0.1, random_seed=123)
    p = catboost.Pool(X, y, cat_features=["RAD"])
    model.fit(p, verbose=False, plot=False)

    # explain the model's predictions using SHAP values
    ex = shap.TreeExplainer(model)
    shap_values = ex.shap_values(p)

    predicted = model.predict(X)

    assert np.abs(shap_values.sum(1) + ex.expected_value - predicted).max() < 1e-6, \
        "SHAP values don't sum to model output!"
github catboost / catboost / catboost / pytest / cuda_tests / test_gpu.py View on Github external
'-f': train_path,
        '-t': test_path,
        '--column-description': cd_path,
        '-i': '10',
        '-T': '4',
        '-m': model_path,
        '--use-best-model': 'false',
        '--test-err-log': test_error_path
    }

    fit_catboost_gpu(fit_params)

    eval_metric(model_path, METRIC_CHECKING_MULTICLASS_NO_WEIGHTS, test_path, cd_path, eval_error_path)
    compare_metrics_with_diff(METRIC_CHECKING_MULTICLASS_NO_WEIGHTS, test_error_path, eval_error_path)

    py_catboost = catboost.CatBoost()
    py_catboost.load_model(model_path)

    assert json.loads(py_catboost.get_metadata()['multiclass_params'])['class_to_label'] == [0, 1, 2, 3]
    assert json.loads(py_catboost.get_metadata()['multiclass_params'])['class_names'] == ['a', 'b', 'c', 'd']
    assert json.loads(py_catboost.get_metadata()['multiclass_params'])['classes_count'] == 0

    assert json.loads(py_catboost.get_metadata()['params'])['data_processing_options']['class_names'] == ['a', 'b', 'c', 'd']

    return [local_canonical_file(test_error_path)]
github catboost / catboost / catboost / pytest / cuda_tests / test_gpu.py View on Github external
fit_params = {
        '--loss-function': loss_function,
        '--boosting-type': 'Plain',
        '--classes-count': '4',
        '-f': train_path,
        '--column-description': cd_path,
        '-i': '10',
        '-T': '4',
        '-m': model_path,
        '--use-best-model': 'false'
    }

    fit_catboost_gpu(fit_params)

    py_catboost = catboost.CatBoost()
    py_catboost.load_model(model_path)

    assert json.loads(py_catboost.get_metadata()['multiclass_params'])['class_to_label'] == [1, 2]
    assert json.loads(py_catboost.get_metadata()['multiclass_params'])['classes_count'] == 4
    assert json.loads(py_catboost.get_metadata()['multiclass_params'])['class_names'] == []

    calc_cmd = (
        CATBOOST_PATH,
        'calc',
        '--input-path', test_path,
        '--column-description', cd_path,
        '-m', model_path,
        '--output-path', eval_path,
        '--prediction-type', prediction_type
    )
github bsharchilev / influence_boosting / influence_boosting / influence / ut / leaf_refit_ut.py View on Github external
base_dir + 'train_data_catboost_format.tsv'
    )
    train_targets = np.argmax(train_targets, axis=1)

    test_documents, test_targets = read_train_documents_and_one_hot_targets(
        base_dir + 'train_data_catboost_format.tsv'
    )

    train_dir = base_dir + 'ut_tmp/'
    if not isdir(train_dir):
        mkdir(train_dir)
    cbc_params = read_json_params(base_dir + 'catboost_params.json')
    cbc_params['leaf_estimation_method'] = method
    cbc_params['random_seed'] = 10
    cbc_params['train_dir'] = train_dir
    cbc = CatBoostClassifier(**cbc_params)
    cbc.fit(train_documents, train_targets)
    cbc.save_model(train_dir + 'model.bin', format='cbm')
    export_catboost_to_json(train_dir + 'model.bin', train_dir + 'model.json')
    full_model = CBOneStepLeafRefitEnsemble(train_dir + 'model.json', train_documents, train_targets,
                                            learning_rate=0.2, loss_function=BinaryCrossEntropyLoss(),
                                            leaf_method=method,
                                            update_set='AllPoints')
    assert np.allclose(full_model(train_documents), cbc.predict(train_documents, prediction_type='RawFormulaVal'),
                       atol=1e-5),\
                       (full_model(train_documents), cbc.predict(train_documents, prediction_type='RawFormulaVal'))
    assert np.allclose(full_model(test_documents), cbc.predict(test_documents, prediction_type='RawFormulaVal'),
                       atol=1e-5)
github bsharchilev / influence_boosting / influence_boosting / influence / ut / leaf_influence_ut.py View on Github external
train_targets = np.argmax(train_targets, axis=1)

    test_documents, test_targets = read_train_documents_and_one_hot_targets(
        base_dir + 'test_data_catboost_format.tsv'
    )
    test_targets = np.argmax(test_targets, axis=1)

    train_dir = base_dir + 'ut_tmp/'
    if not isdir(train_dir):
        mkdir(train_dir)
    cbc_params = read_json_params(base_dir + 'catboost_params.json')
    cbc_params['iterations'] = 2
    cbc_params['leaf_estimation_method'] = leaf_method
    cbc_params['random_seed'] = 10
    cbc_params['train_dir'] = train_dir
    cbc = CatBoostClassifier(**cbc_params)
    cbc.set_params(boosting_type='Plain')
    cbc.fit(train_documents, train_targets)
    cbc.save_model(train_dir + 'model.bin', format='cbm')
    export_catboost_to_json(train_dir + 'model.bin', train_dir + 'model.json')
    full_model = CBLeafInfluenceEnsemble(train_dir + 'model.json', train_documents, train_targets,
                                         leaf_method=leaf_method,
                                         learning_rate=cbc_params['learning_rate'],
                                         loss_function=BinaryCrossEntropyLoss(),
                                         update_set='AllPoints')
    retrained_model_our = deepcopy(full_model)
    tf_checker = TFGBApplier(full_model, train_documents, train_targets, leaf_method)
    for remove_idx in np.random.randint(len(train_targets), size=30):
        full_model.fit(remove_idx, retrained_model_our)
        pred_ours = full_model(train_documents)
        pred_theirs = tf_checker.get_predicts()
        pred_cbc = cbc.predict(train_documents, prediction_type='RawFormulaVal')
github Anfany / Machine-Learning-for-Beginner-by-Python3 / Blending / Blending_Regression_pm25.py View on Github external
def CatBoost_First(self, data, catsign, depth=8, iterations=80000):
        model = cb.CatBoostRegressor(iterations=iterations, depth=depth, learning_rate=0.8, loss_function='RMSE')
        model.fit(data['train'][:, :-1], data['train'][:, -1], cat_features=catsign)
        # 注意存储验证数据集结果和预测数据集结果的不同
        # 训练数据集的预测结果
        xul = model.predict(data['train'][:, :-1])
        # 验证的预测结果
        yanre = model.predict(data['test'][:, :-1])
        # 预测的预测结果
        prer = model.predict(data['predict'][:, :-1])
        # 储存
        self.yanzhneg_pr.append(yanre)
        self.predi.append(prer)
        # 分别计算训练、验证、预测的误差
        # 每计算一折后,要计算训练、验证、预测数据的误差
        xx = self.RMSE(xul, data['train'][:, -1])
        yy = self.RMSE(yanre, data['test'][:, -1])
        pp = self.RMSE(prer, data['predict'][:, -1])
github ybabakhin / kaggle-skeleton / models_zoo.py View on Github external
def fit(self, X_train, y_train):

        bst = cv(
            Pool(X_train, y_train),
            self.params
        )

        best_rounds = int(bst['test-{}-mean'.format(self.metric)].idxmax() * 1.5) + 1
        print('Best Iteration: {}'.format(best_rounds))

        self.params['iterations'] = best_rounds
        self.model = CatBoostClassifier(**self.params)

        self.model.fit(
            X_train, y_train
        )