How to use the catboost.CatBoostClassifier function in catboost

To help you get started, we’ve selected a few catboost examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github bsharchilev / influence_boosting / influence_boosting / influence / ut / leaf_refit_ut.py View on Github external
base_dir + 'train_data_catboost_format.tsv'
    )
    train_targets = np.argmax(train_targets, axis=1)

    test_documents, test_targets = read_train_documents_and_one_hot_targets(
        base_dir + 'train_data_catboost_format.tsv'
    )

    train_dir = base_dir + 'ut_tmp/'
    if not isdir(train_dir):
        mkdir(train_dir)
    cbc_params = read_json_params(base_dir + 'catboost_params.json')
    cbc_params['leaf_estimation_method'] = method
    cbc_params['random_seed'] = 10
    cbc_params['train_dir'] = train_dir
    cbc = CatBoostClassifier(**cbc_params)
    cbc.fit(train_documents, train_targets)
    cbc.save_model(train_dir + 'model.bin', format='cbm')
    export_catboost_to_json(train_dir + 'model.bin', train_dir + 'model.json')
    full_model = CBOneStepLeafRefitEnsemble(train_dir + 'model.json', train_documents, train_targets,
                                            learning_rate=0.2, loss_function=BinaryCrossEntropyLoss(),
                                            leaf_method=method,
                                            update_set='AllPoints')
    assert np.allclose(full_model(train_documents), cbc.predict(train_documents, prediction_type='RawFormulaVal'),
                       atol=1e-5),\
                       (full_model(train_documents), cbc.predict(train_documents, prediction_type='RawFormulaVal'))
    assert np.allclose(full_model(test_documents), cbc.predict(test_documents, prediction_type='RawFormulaVal'),
                       atol=1e-5)
github bsharchilev / influence_boosting / influence_boosting / influence / ut / leaf_influence_ut.py View on Github external
train_targets = np.argmax(train_targets, axis=1)

    test_documents, test_targets = read_train_documents_and_one_hot_targets(
        base_dir + 'test_data_catboost_format.tsv'
    )
    test_targets = np.argmax(test_targets, axis=1)

    train_dir = base_dir + 'ut_tmp/'
    if not isdir(train_dir):
        mkdir(train_dir)
    cbc_params = read_json_params(base_dir + 'catboost_params.json')
    cbc_params['iterations'] = 2
    cbc_params['leaf_estimation_method'] = leaf_method
    cbc_params['random_seed'] = 10
    cbc_params['train_dir'] = train_dir
    cbc = CatBoostClassifier(**cbc_params)
    cbc.set_params(boosting_type='Plain')
    cbc.fit(train_documents, train_targets)
    cbc.save_model(train_dir + 'model.bin', format='cbm')
    export_catboost_to_json(train_dir + 'model.bin', train_dir + 'model.json')
    full_model = CBLeafInfluenceEnsemble(train_dir + 'model.json', train_documents, train_targets,
                                         leaf_method=leaf_method,
                                         learning_rate=cbc_params['learning_rate'],
                                         loss_function=BinaryCrossEntropyLoss(),
                                         update_set='AllPoints')
    retrained_model_our = deepcopy(full_model)
    tf_checker = TFGBApplier(full_model, train_documents, train_targets, leaf_method)
    for remove_idx in np.random.randint(len(train_targets), size=30):
        full_model.fit(remove_idx, retrained_model_our)
        pred_ours = full_model(train_documents)
        pred_theirs = tf_checker.get_predicts()
        pred_cbc = cbc.predict(train_documents, prediction_type='RawFormulaVal')
github ClimbsRocks / auto_ml / auto_ml / utils_models.py View on Github external
return 'XGBRegressor'

    if keras_imported:
        if isinstance(model, KerasRegressor):
            return 'DeepLearningRegressor'
        if isinstance(model, KerasClassifier):
            return 'DeepLearningClassifier'

    if lgb_installed:
        if isinstance(model, LGBMClassifier):
            return 'LGBMClassifier'
        if isinstance(model, LGBMRegressor):
            return 'LGBMRegressor'

    if catboost_installed:
        if isinstance(model, CatBoostClassifier):
            return 'CatBoostClassifier'
        if isinstance(model, CatBoostRegressor):
            return 'CatBoostRegressor'
github ClimbsRocks / auto_ml / auto_ml / utils_models.py View on Github external
model_map['Perceptron'] = Perceptron()
        model_map['PassiveAggressiveClassifier'] = PassiveAggressiveClassifier()
        model_map['SGDRegressor'] = SGDRegressor()
        model_map['PassiveAggressiveRegressor'] = PassiveAggressiveRegressor()

    if xgb_installed:
        model_map['XGBClassifier'] = XGBClassifier()
        model_map['XGBRegressor'] = XGBRegressor()

    if lgb_installed:
        model_map['LGBMRegressor'] = LGBMRegressor()
        model_map['LGBMClassifier'] = LGBMClassifier()

    if catboost_installed:
        model_map['CatBoostRegressor'] = CatBoostRegressor(calc_feature_importance=True)
        model_map['CatBoostClassifier'] = CatBoostClassifier(calc_feature_importance=True)

    if model_name[:12] == 'DeepLearning':
        if keras_imported == False:
            # Suppress some level of logs if TF is installed (but allow it to not be installed, and use Theano instead)
            try:
                os.environ['TF_CPP_MIN_VLOG_LEVEL'] = '3'
                os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
                from tensorflow import logging
                logging.set_verbosity(logging.INFO)
            except:
                pass

            global maxnorm
            global Dense, Dropout
            global LeakyReLU, PReLU, ThresholdedReLU, ELU
            global Sequential
github RandolphVI / Music-Recommendation-System / CatBoost / model / cb.py View on Github external
logger.info('Done creating...')

    # Fitting
    # ==================================================
    logger.info('Training XGBoost & CatBoost model...')

    model_xgb = xgboost.XGBClassifier(
        learning_rate=0.03,
        max_depth=7,
        nthread=50,
        seed=1,
        n_estimators=750
    )

    model_cb = catboost.CatBoostClassifier(
        iterations=2000,
        learning_rate=0.03,
        depth=7,
        loss_function='Logloss',
        thread_count=50,
        random_seed=1
    )

    dh.train_model(model_xgb, model_cb)
    logger.info('Done training...')

    # Model Blending
    # ==================================================
    logger.info('Start models blending...')

    p = dh.blend(pct1=0.6, pct2=0.4)
github NVIDIA / gbm-bench / utils.py View on Github external
def prepare(self):
        # NOTE: HACK!!
        # Due to some issue with CatBoostClassifier class we need to explicitly
        # set the below params to None, or else we get exceptions!
        params = self.params
        params['store_all_simple_ctr'] = None
        params['rsm'] = None
        # CB_THREAD_LIMIT is set to 56 in catboost source!
        if 'thread_count' in params and params['thread_count'] > 56:
            print("Warning! catboost sets max-thread-count to 56!")
            params['thread_count'] = 56
        self.model = cat.CatBoostClassifier(**params)
github HunterMcGushion / hyperparameter_hunter / examples / catboost_examples / classification.py View on Github external
model_init_params=dict(
        iterations=100,
        learning_rate=0.03,
        depth=6,
        save_snapshot=False,
        allow_writing_files=False,
        loss_function="MultiClass",
        classes_count=7,
    ),
)

# And/or...
#################### 2. Hyperparameter Optimization ####################
optimizer = GBRT(iterations=8, random_state=42)
optimizer.forge_experiment(
    model_initializer=CatBoostClassifier,
    model_init_params=dict(
        iterations=100,
        learning_rate=Real(low=0.0001, high=0.5),
        depth=Integer(4, 15),
        save_snapshot=False,
        allow_writing_files=False,
        loss_function="MultiClass",
        classes_count=7,
    ),
)
optimizer.go()