How to use the imblearn.pipeline.Pipeline function in imblearn

To help you get started, we’ve selected a few imblearn examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github exactpro / nostradamus / exactpro / model.py View on Github external
def training_imbalance_kf(X_, Y_, TFIDF_, IMB_, FS_, pers_, CLF_, name_, model_path):
        transform = feature_selection.SelectPercentile(FS_)
        clf_model = Pipeline([('tfidf', TFIDF_), ('imba', IMB_), ('fs', transform), ('clf', CLF_)])
        kf = KFold(n_splits=10)
        kf.get_n_splits(X_)
        #X_train, X_test, y_train, y_test = cross_validation.train_test_split(X_,Y_,train_size=.8, stratify=Y_)
        for train_index, test_index in kf.split(X_):
            X_train, X_test = X_[train_index], X_[test_index]
            y_train, y_test = Y_[train_index], Y_[test_index]
        clf_model.set_params(fs__percentile=pers_).fit(X_train, y_train)
        pickle.dump(clf_model, open(model_path+name_+'.sav', 'wb'))
        #y_pred = clf_model.predict(X_test)
github exactpro / nostradamus / main / training.py View on Github external
model_path):
    """ Trains models using handled setting and saves them as .sav objects.

        Parameters:
            descr_series(Series): description series;
            classes_codes(Series): series with classes' codes;
            TFIDF_: vectorizer;
            IMB_: SMOTE method;
            FS_: ranking terms method;
            req_percentage(int): percentage to be taken from the ranked list;
            CLF_: classifier;
            model_path(str): the path to the model.

    """
    transformer = feature_selection.SelectPercentile(FS_)
    clf_model = Pipeline([('tfidf', TFIDF_), ('imba', IMB_),
                          ('fs', transformer), ('clf', CLF_)])
    clf_model.set_params(
        fs__percentile=req_percentage).fit(
        descr_series,
        classes_codes)
    dump(clf_model, open(model_path + '.sav', 'wb'))
github akoury / ml-helper / src / ml_helper / helper.py View on Github external
def stack_predict(self, df, holdout, pipes, amount=2):
        X, y = self.split_x_y(df)
        X_test, y_test = self.split_x_y(holdout)

        pipe = Pipeline(self.top_pipeline(pipes).steps[:-1])
        X = pipe.fit_transform(X)
        X_test = pipe.transform(X_test)

        estimators = []

        for i in range(amount):
            estimators.append((str(i), self.top_pipeline(pipes, i).steps[-1][1]))

        regression = False

        if self.METRIC in [
            "explained_variance",
            "neg_mean_absolute_error",
            "neg_mean_squared_error",
            "neg_mean_squared_log_error",
            "neg_median_absolute_error",
github melqkiades / yelp / source / python / evaluation / classifier_evaluator.py View on Github external
def error_estimation(
        x_matrix, y_vector, param_grid, cv=None, scoring=None):
    pipeline = Pipeline([('resampler', None), ('classifier', DummyClassifier())])
    grid_search_cv = GridSearchCV(pipeline, param_grid, cv=cv, scoring=scoring)

    return cross_val_score(grid_search_cv, x_matrix, y_vector)
github scikit-learn-contrib / imbalanced-learn / imblearn / ensemble / _bagging.py View on Github external
"n_estimators must be an integer, "
                "got {}.".format(type(self.n_estimators))
            )

        if self.n_estimators <= 0:
            raise ValueError(
                "n_estimators must be greater than zero, "
                "got {}.".format(self.n_estimators)
            )

        if self.base_estimator is not None:
            base_estimator = clone(self.base_estimator)
        else:
            base_estimator = clone(default)

        self.base_estimator_ = Pipeline(
            [
                (
                    "sampler",
                    RandomUnderSampler(
                        sampling_strategy=self.sampling_strategy,
                        replacement=self.replacement,
                    ),
                ),
                ("classifier", base_estimator),
            ]
github scikit-learn-contrib / imbalanced-learn / imblearn / pipeline.py View on Github external
>>> make_pipeline(StandardScaler(), GaussianNB(priors=None))
    ... # doctest: +NORMALIZE_WHITESPACE
    Pipeline(memory=None,
             steps=[('standardscaler',
                     StandardScaler(copy=True, with_mean=True, with_std=True)),
                    ('gaussiannb',
                     GaussianNB(priors=None, var_smoothing=1e-09))],
             verbose=False)
    """
    memory = kwargs.pop("memory", None)
    verbose = kwargs.pop('verbose', False)
    if kwargs:
        raise TypeError(
            'Unknown keyword arguments: "{}"'.format(list(kwargs.keys())[0])
        )
    return Pipeline(
        pipeline._name_estimators(steps), memory=memory, verbose=verbose
    )
github melqkiades / yelp / source / python / evaluation / classifier_evaluator.py View on Github external
'kneighborsclassifier': KNeighborsClassifier(),
            'decisiontreeclassifier': DecisionTreeClassifier(),
            'nusvc': NuSVC(),
            'randomforestclassifier': RandomForestClassifier()
        }

        classifier = classifiers[parameters['classifier'].lower()]
        # print(classifier)
        classifier_params = get_classifier_params(parameters)
        classifier.set_params(**classifier_params)
        print(classifier)

        resampler = sampler_factory.create_sampler(
            parameters['resampler'], Constants.DOCUMENT_CLASSIFIER_SEED)

        return Pipeline([('resampler', resampler), ('classifier', classifier)])