Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _fit_model(pool, case, fold_id, model_path):
from catboost import CatBoost
# Learn model
make_dirs_if_not_exists(FoldModelsHandler.__MODEL_DIR)
feature_count = pool.num_col()
if "ignored_features" in case.get_params():
ignored_features = case.get_params()["ignored_features"]
if len(ignored_features) and max(ignored_features) >= feature_count:
raise CatBoostError("Error: input parameter contains feature indices wich are not available in pool: "
"{}\n "
"Check eval_feature set and ignored features options".format(ignored_features))
get_eval_logger().debug('Learn model {} on fold #{}'.format(str(case), fold_id))
cur_time = time.time()
instance = CatBoost(params=case.get_params())
instance.fit(pool)
instance.save_model(fname=model_path)
get_eval_logger().debug('Operation was done in {} seconds'.format(time.time() - cur_time))
return FoldModel(case, model_path, fold_id)
Returns
-------
result : Instance of EvaluationResult class
"""
features_to_eval = set(features_to_eval)
if eval_metrics is None:
eval_metrics = []
eval_metrics = eval_metrics if isinstance(eval_metrics, list) else [eval_metrics]
if isinstance(learn_config, CatBoost):
params = learn_config.get_params()
else:
params = dict(learn_config)
if loss_function is not None:
if "loss_function" in params and params["loss_function"] != loss_function:
raise CatBoostError("Loss function in params {} should be equal to feature evaluation objective "
"function {}".format(params["loss_function"], loss_function))
else:
if "loss_function" not in params:
raise CatBoostError("Provide loss function in params or as option to eval_features method")
if thread_count is not None and thread_count != -1:
params["thread_count"] = thread_count
if eval_step is None:
eval_step = 1
if loss_function is not None:
params["loss_function"] = loss_function
else:
loss_function = params["loss_function"]
def _add(self, model, learning_curve):
if model.get_case() != self._case:
raise CatBoostError("Model case should be equal to result case")
fold_id = model.get_fold_id()
self._fold_curves[fold_id] = learning_curve
score = max(learning_curve) if self._metric_description.is_max_optimal() else min(learning_curve)
position = np.argmax(learning_curve) if self._metric_description.is_max_optimal() else np.argmin(
learning_curve)
self._fold_metric.at[fold_id] = score
self._fold_metric_iteration.at[fold_id] = position
def _calculate_result_metrics(self, cases, metrics, thread_count=-1, evaluation_step=1):
"""
This method calculate metrics and return them.
Args:
:param cases: List of the ExecutionCases you want to evaluate
:param metrics: List of the metrics to be computed
:param thread_count: Count of thread to use.
:param: evaluation_step: Step to evaluate metrics
:return: instance of EvaluationResult
"""
cases_set = set(cases)
if len(cases_set) != len(cases):
raise CatBoostError("Found duplicate cases in " + cases)
current_wd = self.__go_to_working_dir()
try:
if self._fold_count <= self._fold_offset:
error_msg = 'Count of folds(folds_count - offset) need to be at least one: offset {}, folds_count {}.'
raise AttributeError(error_msg.format(self._fold_offset,
self._fold_count))
handler = FoldModelsHandler(cases=cases,
metrics=metrics,
eval_step=evaluation_step,
thread_count=thread_count,
remove_models=self._remove_models)
reader = _SimpleStreamingFileReader(self._path_to_dataset,
sep=self._delimiter,
group_feature_num=self._group_feature_num)
self._case_results = dict()
self._case_comparisons = dict()
self._cases = [case_result.get_case() for case_result in case_results]
for case_result in case_results:
case = case_result.get_case()
self._case_results[case] = case_result
self._metric_description = case_results[0].get_metric_description()
self._baseline_case = case_results[0].get_case()
self._score_config = ScoreConfig()
for (case, case_result) in self._case_results.items():
if case_result.get_metric_description() != self._metric_description:
raise CatBoostError("Metric names should be equal for all case results")
if case_result.get_fold_ids() != self.get_fold_ids():
raise CatBoostError("Case results should be computed on the same folds")
if case_result.get_eval_step() != self.get_eval_step():
raise CatBoostError("Eval steps should be equal for different cases")
self._case_results[case] = case_result
self._metric_description = case_results[0].get_metric_description()
self._baseline_case = case_results[0].get_case()
self._score_config = ScoreConfig()
for (case, case_result) in self._case_results.items():
if case_result.get_metric_description() != self._metric_description:
raise CatBoostError("Metric names should be equal for all case results")
if case_result.get_fold_ids() != self.get_fold_ids():
raise CatBoostError("Case results should be computed on the same folds")
if case_result.get_eval_step() != self.get_eval_step():
raise CatBoostError("Eval steps should be equal for different cases")
def _change_score_config(self, config):
if config is not None:
if isinstance(config, ScoreType):
if config == ScoreType.Abs:
config = ScoreConfig.abs_score()
elif config == ScoreType.Rel:
config = ScoreConfig.rel_score()
else:
raise CatBoostError("Unknown scoreType {}".format(config))
if self._score_config != config:
self._score_config = config
self.__clear_comparisons()
def __init__(self, metric_results):
if len(metric_results) < 1:
raise CatBoostError("Need at least one result")
self._results = dict()
self._metrics = dict()
self._cases = None
for result in metric_results:
metric_description = result.get_metric_description()
if metric_description in self._results:
raise CatBoostError("Duplicate metric {}".format(metric_description))
if self._cases is None:
self._cases = result.get_cases()
key = metric_description_or_str_to_str(metric_description)
self._results[key] = result
self._metrics[key] = metric_description