How to use the imblearn.utils.Substitution function in imblearn

To help you get started, we’ve selected a few imblearn examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github scikit-learn-contrib / imbalanced-learn / imblearn / ensemble / _easy_ensemble.py View on Github external
from sklearn.base import clone
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import BaggingClassifier

from ..under_sampling import RandomUnderSampler
from ..under_sampling.base import BaseUnderSampler
from ..utils import Substitution, check_target_type
from ..utils._docstring import _n_jobs_docstring
from ..utils._docstring import _random_state_docstring
from ..pipeline import Pipeline

MAX_INT = np.iinfo(np.int32).max


@Substitution(
    sampling_strategy=BaseUnderSampler._sampling_strategy_docstring,
    n_jobs=_n_jobs_docstring,
    random_state=_random_state_docstring,
)
class EasyEnsembleClassifier(BaggingClassifier):
    """Bag of balanced boosted learners also known as EasyEnsemble.

    This algorithm is known as EasyEnsemble [1]_. The classifier is an
    ensemble of AdaBoost learners trained on different balanced boostrap
    samples. The balancing is achieved by random under-sampling.

    Read more in the :ref:`User Guide `.

    Parameters
    ----------
    n_estimators : int, default=10
github scikit-learn-contrib / imbalanced-learn / imblearn / under_sampling / _prototype_selection / _tomek_links.py View on Github external
# Authors: Guillaume Lemaitre 
#          Fernando Nogueira
#          Christos Aridas
# License: MIT

import numpy as np
from sklearn.neighbors import NearestNeighbors
from sklearn.utils import _safe_indexing

from ..base import BaseCleaningSampler
from ...utils import Substitution
from ...utils._docstring import _n_jobs_docstring


@Substitution(
    sampling_strategy=BaseCleaningSampler._sampling_strategy_docstring,
    n_jobs=_n_jobs_docstring,
)
class TomekLinks(BaseCleaningSampler):
    """Under-sampling by removing Tomek's links.

    Read more in the :ref:`User Guide `.

    Parameters
    ----------
    {sampling_strategy}

    {n_jobs}

    Attributes
    ----------
github scikit-learn-contrib / imbalanced-learn / imblearn / under_sampling / _prototype_selection / _condensed_nearest_neighbour.py View on Github external
import numpy as np

from scipy.sparse import issparse

from sklearn.base import clone
from sklearn.neighbors import KNeighborsClassifier
from sklearn.utils import check_random_state, _safe_indexing

from ..base import BaseCleaningSampler
from ...utils import Substitution
from ...utils._docstring import _n_jobs_docstring
from ...utils._docstring import _random_state_docstring


@Substitution(
    sampling_strategy=BaseCleaningSampler._sampling_strategy_docstring,
    n_jobs=_n_jobs_docstring,
    random_state=_random_state_docstring,
)
class CondensedNearestNeighbour(BaseCleaningSampler):
    """Undersample based on the condensed nearest neighbour method.

    Read more in the :ref:`User Guide `.

    Parameters
    ----------
    {sampling_strategy}

    {random_state}

    n_neighbors : int or object, default=\
github scikit-learn-contrib / imbalanced-learn / imblearn / under_sampling / _prototype_selection / _neighbourhood_cleaning_rule.py View on Github external
import numpy as np
from scipy.stats import mode

from sklearn.utils import _safe_indexing

from ..base import BaseCleaningSampler
from ._edited_nearest_neighbours import EditedNearestNeighbours
from ...utils import check_neighbors_object
from ...utils import Substitution
from ...utils._docstring import _n_jobs_docstring

SEL_KIND = ("all", "mode")


@Substitution(
    sampling_strategy=BaseCleaningSampler._sampling_strategy_docstring,
    n_jobs=_n_jobs_docstring,
)
class NeighbourhoodCleaningRule(BaseCleaningSampler):
    """Undersample based on the neighbourhood cleaning rule.

    This class uses ENN and a k-NN to remove noisy samples from the datasets.

    Read more in the :ref:`User Guide `.

    Parameters
    ----------
    {sampling_strategy}

    n_neighbors : int or object, default=3
        If ``int``, size of the neighbourhood to consider to compute the
github scikit-learn-contrib / imbalanced-learn / imblearn / ensemble / _weight_boosting.py View on Github external
import numpy as np

from sklearn.base import clone
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble._base import _set_random_states
from sklearn.utils import _safe_indexing

from ..under_sampling.base import BaseUnderSampler
from ..under_sampling import RandomUnderSampler
from ..pipeline import make_pipeline
from ..utils import Substitution, check_target_type
from ..utils._docstring import _random_state_docstring


@Substitution(
    sampling_strategy=BaseUnderSampler._sampling_strategy_docstring,
    random_state=_random_state_docstring,
)
class RUSBoostClassifier(AdaBoostClassifier):
    """Random under-sampling integrated in the learning of AdaBoost.

    During learning, the problem of class balancing is alleviated by random
    under-sampling the sample at each iteration of the boosting algorithm.

    Read more in the :ref:`User Guide `.

    Parameters
    ----------
    base_estimator : object, default=None
        The base estimator from which the boosted ensemble is built.
        Support for sample weighting is required, as well as proper
github scikit-learn-contrib / imbalanced-learn / imblearn / under_sampling / _prototype_generation / _cluster_centroids.py View on Github external
from scipy import sparse

from sklearn.base import clone
from sklearn.cluster import KMeans
from sklearn.neighbors import NearestNeighbors
from sklearn.utils import _safe_indexing

from ..base import BaseUnderSampler
from ...utils import Substitution
from ...utils._docstring import _n_jobs_docstring
from ...utils._docstring import _random_state_docstring

VOTING_KIND = ("auto", "hard", "soft")


@Substitution(
    sampling_strategy=BaseUnderSampler._sampling_strategy_docstring,
    n_jobs=_n_jobs_docstring,
    random_state=_random_state_docstring,
)
class ClusterCentroids(BaseUnderSampler):
    """Undersample by generating centroids based on clustering methods.

    Method that under samples the majority class by replacing a
    cluster of majority samples by the cluster centroid of a KMeans
    algorithm.  This algorithm keeps N majority samples by fitting the
    KMeans algorithm with N cluster to the majority class and using
    the coordinates of the N cluster centroids as the new majority
    samples.

    Read more in the :ref:`User Guide `.
github scikit-learn-contrib / imbalanced-learn / imblearn / over_sampling / _adasyn.py View on Github external
# License: MIT

import numpy as np
from scipy import sparse

from sklearn.utils import check_random_state
from sklearn.utils import _safe_indexing

from .base import BaseOverSampler
from ..utils import check_neighbors_object
from ..utils import Substitution
from ..utils._docstring import _n_jobs_docstring
from ..utils._docstring import _random_state_docstring


@Substitution(
    sampling_strategy=BaseOverSampler._sampling_strategy_docstring,
    n_jobs=_n_jobs_docstring,
    random_state=_random_state_docstring,
)
class ADASYN(BaseOverSampler):
    """Oversample using Adaptive Synthetic (ADASYN) algorithm.

    This method is similar to SMOTE but it generates different number of
    samples depending on an estimate of the local distribution of the class
    to be oversampled.

    Read more in the :ref:`User Guide `.

    Parameters
    ----------
    {sampling_strategy}
github scikit-learn-contrib / imbalanced-learn / imblearn / combine / _smote_enn.py View on Github external
# License: MIT

from sklearn.base import clone
from sklearn.utils import check_X_y

from ..base import BaseSampler
from ..over_sampling import SMOTE
from ..over_sampling.base import BaseOverSampler
from ..under_sampling import EditedNearestNeighbours
from ..utils import check_target_type
from ..utils import Substitution
from ..utils._docstring import _n_jobs_docstring
from ..utils._docstring import _random_state_docstring


@Substitution(
    sampling_strategy=BaseOverSampler._sampling_strategy_docstring,
    n_jobs=_n_jobs_docstring,
    random_state=_random_state_docstring,
)
class SMOTEENN(BaseSampler):
    """Over-sampling using SMOTE and cleaning using ENN.

    Combine over- and under-sampling using SMOTE and Edited Nearest Neighbours.

    Read more in the :ref:`User Guide `.

    Parameters
    ----------
    {sampling_strategy}

    {random_state}
github AlgoWit / geometric-smote / gsmote / geometric_smote.py View on Github external
# Deformation
    parallel_point_position = np.dot(point, parallel_unit_vector) * parallel_unit_vector
    perpendicular_point_position = point - parallel_point_position
    point = (
        parallel_point_position
        + (1 - deformation_factor) * perpendicular_point_position
    )

    # Translation
    point = center + radius * point

    return point


@Substitution(
    sampling_strategy=BaseOverSampler._sampling_strategy_docstring,
    random_state=_random_state_docstring,
)
class GeometricSMOTE(BaseOverSampler):
    """Class to to perform over-sampling using Geometric SMOTE.

    This algorithm is an implementation of Geometric SMOTE, a geometrically
    enhanced drop-in replacement for SMOTE as presented in [1]_.

    Read more in the :ref:`User Guide `.

    Parameters
    ----------
    {sampling_strategy}

    {random_state}