How to use the catboost.python-package.catboost.core.Pool function in catboost

To help you get started, we’ve selected a few catboost examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github catboost / catboost / catboost / python-package / catboost / core.py View on Github external
def _build_train_pool(X, y, cat_features, pairs, sample_weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, column_description):
    train_pool = None
    if isinstance(X, Pool):
        train_pool = X
        if any(v is not None for v in [cat_features, sample_weight, group_id, group_weight, subgroup_id, pairs_weight, baseline]):
            raise CatboostError("cat_features, sample_weight, group_id, group_weight, subgroup_id, pairs_weight, baseline should have the None type when X has catboost.Pool type.")
        if X.get_label() is None and X.num_pairs() == 0:
            raise CatboostError("Label in X has not initialized.")
        if y is not None:
            raise CatboostError("Wrong initializing y: X is catboost.Pool object, y must be initialized inside catboost.Pool.")
    elif isinstance(X, STRING_TYPES):
            train_pool = Pool(data=X, pairs=pairs, column_description=column_description)
    else:
        if y is None:
            raise CatboostError("y has not initialized in fit(): X is not catboost.Pool object, y must be not None in fit().")
        train_pool = Pool(X, y, cat_features=cat_features, pairs=pairs, weight=sample_weight, group_id=group_id,
                          group_weight=group_weight, subgroup_id=subgroup_id, pairs_weight=pairs_weight, baseline=baseline)
    return train_pool
github catboost / catboost / catboost / python-package / catboost / core.py View on Github external
def _build_train_pool(X, y, cat_features, pairs, sample_weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, column_description):
    train_pool = None
    if isinstance(X, Pool):
        train_pool = X
        if any(v is not None for v in [cat_features, sample_weight, group_id, group_weight, subgroup_id, pairs_weight, baseline]):
            raise CatboostError("cat_features, sample_weight, group_id, group_weight, subgroup_id, pairs_weight, baseline should have the None type when X has catboost.Pool type.")
        if X.get_label() is None and X.num_pairs() == 0:
            raise CatboostError("Label in X has not initialized.")
        if y is not None:
            raise CatboostError("Wrong initializing y: X is catboost.Pool object, y must be initialized inside catboost.Pool.")
    elif isinstance(X, STRING_TYPES):
            train_pool = Pool(data=X, pairs=pairs, column_description=column_description)
    else:
        if y is None:
            raise CatboostError("y has not initialized in fit(): X is not catboost.Pool object, y must be not None in fit().")
        train_pool = Pool(X, y, cat_features=cat_features, pairs=pairs, weight=sample_weight, group_id=group_id,
                          group_weight=group_weight, subgroup_id=subgroup_id, pairs_weight=pairs_weight, baseline=baseline)
    return train_pool
github catboost / catboost / catboost / python-package / catboost / core.py View on Github external
def _predict(self, data, prediction_type, ntree_start, ntree_end, thread_count, verbose):
        verbose = verbose or self.get_param('verbose')
        if verbose is None:
            verbose = False
        if not self.is_fitted():
            raise CatboostError("There is no trained model to use predict(). Use fit() to train model. Then use predict().")
        if not isinstance(data, Pool):
            data = Pool(
                data=data,
                cat_features=self._get_cat_feature_indices() if not isinstance(data, FeaturesData) else None
            )
        if not isinstance(prediction_type, STRING_TYPES):
            raise CatboostError("Invalid prediction_type type={}: must be str().".format(type(prediction_type)))
        if prediction_type not in ('Class', 'RawFormulaVal', 'Probability'):
            raise CatboostError("Invalid value of prediction_type={}: must be Class, RawFormulaVal or Probability.".format(prediction_type))
        loss_function_type = self.get_param('loss_function')
        if loss_function_type is None:
            loss_function_type = self.get_param('objective')
        # TODO(kirillovs): very bad solution. user should be able to use custom multiclass losses
        if loss_function_type is not None and (loss_function_type == 'MultiClass' or loss_function_type == 'MultiClassOneVsAll'):
            return np.transpose(self._base_predict_multi(data, prediction_type, ntree_start, ntree_end, thread_count, verbose))
        predictions = np.array(self._base_predict(data, prediction_type, ntree_start, ntree_end, thread_count, verbose))
        if prediction_type == 'Probability':
            predictions = np.transpose([1 - predictions, predictions])
github catboost / catboost / catboost / python-package / catboost / core.py View on Github external
export_parameters : dict
            Parameters for CoreML export:
                * prediction_type : string - either 'probability' or 'raw'
                * coreml_description : string
                * coreml_model_version : string
                * coreml_model_author : string
                * coreml_model_license: string
        pool : catboost.Pool or list or numpy.array or pandas.DataFrame or pandas.Series or catboost.FeaturesData
            Training pool.
        """
        if not self.is_fitted():
            raise CatboostError("There is no trained model to use save_model(). Use fit() to train model. Then use save_model().")
        if not isinstance(fname, STRING_TYPES):
            raise CatboostError("Invalid fname type={}: must be str().".format(type(fname)))
        if pool is not None and not isinstance(pool, Pool):
            pool = Pool(
                data=pool,
                cat_features=self._get_cat_feature_indices() if not isinstance(pool, FeaturesData) else None
            )
        self._save_model(fname, format, export_parameters, pool)
github catboost / catboost / catboost / python-package / catboost / core.py View on Github external
if train_pool.is_empty_:
            raise CatboostError("X is empty.")

        allow_clear_pool = not isinstance(X, Pool)

        eval_set_list = eval_set if isinstance(eval_set, list) else [eval_set]
        eval_sets = []
        eval_total_row_count = 0
        for eval_set in eval_set_list:
            if isinstance(eval_set, Pool):
                eval_sets.append(eval_set)
                eval_total_row_count += eval_sets[-1].num_row()
                if eval_sets[-1].num_row() == 0:
                    raise CatboostError("Empty 'eval_set' in Pool")
            elif isinstance(eval_set, STRING_TYPES):
                eval_sets.append(Pool(eval_set, column_description=column_description))
                eval_total_row_count += eval_sets[-1].num_row()
                if eval_sets[-1].num_row() == 0:
                    raise CatboostError("Empty 'eval_set' in file {}".format(eval_set))
            elif isinstance(eval_set, tuple):
                if len(eval_set) != 2:
                    raise CatboostError("Invalid shape of 'eval_set': {}, must be (X, y).".format(str(tuple(type(_) for _ in eval_set))))
                eval_sets.append(Pool(eval_set[0], eval_set[1], cat_features=train_pool.get_cat_feature_indices()))
                eval_total_row_count += eval_sets[-1].num_row()
                if eval_sets[-1].num_row() == 0:
                    raise CatboostError("Empty 'eval_set' in tuple")
            elif eval_set is None:
                if len(eval_set_list) > 1:
                    raise CatboostError("Multiple eval set shall not contain None")
            else:
                raise CatboostError("Invalid type of 'eval_set': {}, while expected Pool or (X, y) or filename, or list thereof.".format(type(eval_set)))
github catboost / catboost / catboost / python-package / catboost / core.py View on Github external
Values are calculated for RawFormulaVal predictions.
            - Interaction
                list of length [n_features] of 3-element lists of (first_feature_index, second_feature_index, interaction_score (float))
        """

        if not isinstance(verbose, bool) and not isinstance(verbose, int):
            raise CatboostError('verbose should be bool or int.')
        verbose = int(verbose)
        if verbose < 0:
            raise CatboostError('verbose should be non-negative.')

        fstr_type = enum_from_enum_or_str(EFstrType, fstr_type)
        empty_data_is_ok = (((fstr_type == EFstrType.PredictionValuesChange) and self._object._has_leaf_weights_in_model())
                            or (fstr_type == EFstrType.Interaction))
        if not empty_data_is_ok:
            if not isinstance(data, Pool):
                raise CatboostError("Invalid metric type={}, must be catboost.Pool.".format(type(data)))
            if data.is_empty_:
                raise CatboostError("data is empty.")

        with log_fixup():
            fstr, feature_names = self._calc_fstr(fstr_type, data, thread_count, verbose)
        if fstr_type == EFstrType.PredictionValuesChange or fstr_type == EFstrType.LossFunctionChange:
            feature_importances = [value[0] for value in fstr]
            if prettified:
                return sorted(zip(feature_names, feature_importances), key=itemgetter(1), reverse=True)
            else:
                return feature_importances
        if fstr_type == EFstrType.ShapValues:
            if isinstance(fstr[0][0], ARRAY_TYPES):
                return np.array([np.array([np.array([
                    value for value in dimension]) for dimension in doc]) for doc in fstr])