How to use the imblearn.under_sampling.EditedNearestNeighbours function in imblearn

To help you get started, we’ve selected a few imblearn examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github ZhiningLiu1998 / self-paced-ensemble / canonical_resampling.py View on Github external
def fit(self, X, y, by, random_state=None, visualize=False):
        '''
        by: String
            The method used to perform re-sampling
            currently support: ['RUS', 'CNN', 'ENN', 'NCR', 'Tomek', 'ALLKNN', 'OSS',
                'NM', 'CC', 'SMOTE', 'ADASYN', 'BorderSMOTE', 'SMOTEENN', 'SMOTETomek',
                'ORG']
        '''
        if by == 'RUS':
            sampler = RandomUnderSampler(random_state=random_state)
        elif by == 'CNN':
            sampler = CondensedNearestNeighbour(random_state=random_state)
        elif by == 'ENN':
            sampler = EditedNearestNeighbours(random_state=random_state)
        elif by == 'NCR':
            sampler = NeighbourhoodCleaningRule(random_state=random_state)
        elif by == 'Tomek':
            sampler = TomekLinks(random_state=random_state)
        elif by == 'ALLKNN':
            sampler = AllKNN(random_state=random_state)
        elif by == 'OSS':
            sampler = OneSidedSelection(random_state=random_state)
        elif by == 'NM':
            sampler = NearMiss(random_state=random_state)
        elif by == 'CC':
            sampler = ClusterCentroids(random_state=random_state)
        elif by == 'SMOTE':
            sampler = SMOTE(random_state=random_state)
        elif by == 'ADASYN':
            sampler = ADASYN(random_state=random_state)
github scikit-learn-contrib / imbalanced-learn / examples / under-sampling / plot_enn_renn_allknn.py View on Github external
n_features=5, n_clusters_per_class=1,
                           n_samples=100, random_state=10)

# Instanciate a PCA object for the sake of easy visualisation
pca = PCA(n_components=2)
# Fit and transform x to visualise inside a 2D feature space
X_vis = pca.fit_transform(X)

# Three subplots, unpack the axes array immediately
f, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2)

c0, c1 = plot_resampling(ax1, X_vis, y, 'Original set')

# Apply the ENN
print('ENN')
enn = EditedNearestNeighbours(return_indices=True)
X_resampled, y_resampled, idx_resampled = enn.fit_resample(X, y)
X_res_vis = pca.transform(X_resampled)
idx_samples_removed = np.setdiff1d(np.arange(X_vis.shape[0]), idx_resampled)
reduction_str = ('Reduced {:.2f}%'.format(100 * (1 - float(len(X_resampled)) /
                                                 len(X))))
print(reduction_str)
c3 = ax2.scatter(X_vis[idx_samples_removed, 0],
                 X_vis[idx_samples_removed, 1],
                 alpha=.2, label='Removed samples', c='g')
plot_resampling(ax2, X_res_vis, y_resampled, 'ENN - ' + reduction_str)

# Apply the RENN
print('RENN')
renn = RepeatedEditedNearestNeighbours(return_indices=True)
X_resampled, y_resampled, idx_resampled = renn.fit_resample(X, y)
X_res_vis = pca.transform(X_resampled)
github scikit-learn-contrib / imbalanced-learn / examples / under-sampling / plot_repeated_edited_nearest_neighbours.py View on Github external
pca = PCA(n_components=2)
# Fit and transform x to visualise inside a 2D feature space
X_vis = pca.fit_transform(X)

# Three subplots, unpack the axes array immediately
f, (ax1, ax2, ax3) = plt.subplots(1, 3)

ax1.scatter(X_vis[y == 0, 0], X_vis[y == 0, 1], label="Class #0", alpha=.5,
            edgecolor=almost_black, facecolor=palette[0], linewidth=0.15)
ax1.scatter(X_vis[y == 1, 0], X_vis[y == 1, 1], label="Class #1", alpha=.5,
            edgecolor=almost_black, facecolor=palette[2], linewidth=0.15)
ax1.set_title('Original set')

# Apply the ENN
print('ENN')
enn = EditedNearestNeighbours()
X_resampled, y_resampled = enn.fit_sample(X, y)
X_res_vis = pca.transform(X_resampled)

ax2.scatter(X_res_vis[y_resampled == 0, 0], X_res_vis[y_resampled == 0, 1],
            label="Class #0", alpha=.5, edgecolor=almost_black,
            facecolor=palette[0], linewidth=0.15)
ax2.scatter(X_res_vis[y_resampled == 1, 0], X_res_vis[y_resampled == 1, 1],
            label="Class #1", alpha=.5, edgecolor=almost_black,
            facecolor=palette[2], linewidth=0.15)
ax2.set_title('ENN')

# Apply the RENN
print('RENN')
renn = RepeatedEditedNearestNeighbours()
X_resampled, y_resampled = renn.fit_sample(X, y)
X_res_vis = pca.transform(X_resampled)
github HunterMcGushion / hyperparameter_hunter / examples / feature_engineering_examples / imblearn_resampling_example.py View on Github external
def under_sample_EditedNearestNeighbours(train_inputs, train_targets):
    sampler = EditedNearestNeighbours(random_state=32)
    train_inputs, train_targets = _sampler_helper(sampler, train_inputs, train_targets)
    return train_inputs, train_targets
github scikit-learn-contrib / imbalanced-learn / imblearn / combine / _smote_enn.py View on Github external
sampling_strategy=self.sampling_strategy,
                random_state=self.random_state,
                n_jobs=self.n_jobs,
            )

        if self.enn is not None:
            if isinstance(self.enn, EditedNearestNeighbours):
                self.enn_ = clone(self.enn)
            else:
                raise ValueError(
                    "enn needs to be an EditedNearestNeighbours."
                    " Got {} instead.".format(type(self.enn))
                )
        # Otherwise create a default EditedNearestNeighbours
        else:
            self.enn_ = EditedNearestNeighbours(
                sampling_strategy="all", n_jobs=self.n_jobs
            )
github IBM / lale / lale / lib / imblearn / edited_nearest_neighbours.py View on Github external
def __init__(self, operator = None, sampling_strategy='auto', random_state=None, 
                n_neighbors=3, kind_sel='all', n_jobs=1):
        if operator is None:
            raise ValueError("Operator is a required argument.")

        self._hyperparams = {
            'sampling_strategy': sampling_strategy,
            'random_state': random_state,
            'n_neighbors': n_neighbors,
            'kind_sel': kind_sel,
            'n_jobs': n_jobs}
    
        resampler_instance = OrigModel(**self._hyperparams)
        super(EditedNearestNeighboursImpl, self).__init__(
            operator = operator,
            resampler = resampler_instance)
github melqkiades / yelp / source / python / etl / sampler_factory.py View on Github external
def create_sampler(sampler_name, random_state=None):

    if sampler_name is None or sampler_name == 'None':
        return None
    if sampler_name.lower() == 'randomundersampler':
        return RandomUnderSampler(random_state=random_state)
    if sampler_name.lower() == 'tomeklinks':
        return TomekLinks(random_state=random_state)
    if sampler_name.lower() == 'enn':
        return EditedNearestNeighbours(random_state=random_state)
    if sampler_name.lower() == 'ncl':
        return NeighbourhoodCleaningRule(random_state=random_state)
    if sampler_name.lower() == 'randomoversampler':
        return RandomOverSampler(random_state=random_state)
    if sampler_name.lower() == 'smote':
        return SMOTE(random_state=random_state)
    if sampler_name.lower() == 'smotetomek':
        return SMOTETomek(random_state=random_state)
    if sampler_name.lower() == 'smoteenn':
        return SMOTEENN(random_state=random_state)
    else:
        raise ValueError('Unsupported value \'%s\' for sampler' % sampler_name)
github scikit-learn-contrib / imbalanced-learn / examples / under-sampling / plot_allknn.py View on Github external
pca = PCA(n_components=2)
# Fit and transform x to visualise inside a 2D feature space
X_vis = pca.fit_transform(X)

# Three subplots, unpack the axes array immediately
f, (ax1, ax2, ax3, ax4) = plt.subplots(1, 4)

ax1.scatter(X_vis[y == 0, 0], X_vis[y == 0, 1], label="Class #0", alpha=.5,
            edgecolor=almost_black, facecolor=palette[0], linewidth=0.15)
ax1.scatter(X_vis[y == 1, 0], X_vis[y == 1, 1], label="Class #1", alpha=.5,
            edgecolor=almost_black, facecolor=palette[2], linewidth=0.15)
ax1.set_title('Original set')

# Apply the ENN
print('ENN')
enn = EditedNearestNeighbours()
X_resampled, y_resampled = enn.fit_sample(X, y)
X_res_vis = pca.transform(X_resampled)
print('Reduced {:.2f}\%'.format(100 * (1 - float(len(X_resampled)) / len(X))))

ax2.scatter(X_res_vis[y_resampled == 0, 0], X_res_vis[y_resampled == 0, 1],
            label="Class #0", alpha=.5, edgecolor=almost_black,
            facecolor=palette[0], linewidth=0.15)
ax2.scatter(X_res_vis[y_resampled == 1, 0], X_res_vis[y_resampled == 1, 1],
            label="Class #1", alpha=.5, edgecolor=almost_black,
            facecolor=palette[2], linewidth=0.15)
ax2.set_title('ENN')

# Apply the RENN
print('RENN')
renn = RepeatedEditedNearestNeighbours()
X_resampled, y_resampled = renn.fit_sample(X, y)
github melqkiades / yelp / source / python / evaluation / classifier_evaluator.py View on Github external
from etl import ETLUtils
from etl import sampler_factory
from nlp import nlp_utils
from topicmodeling.context import review_metrics_extractor
from utils.constants import Constants


RANDOM_STATE = 0
SCORE_METRIC = 'accuracy'
# SCORE_METRIC = 'roc_auc'
resamplers = [
    None,
    RandomUnderSampler(random_state=RANDOM_STATE),
    TomekLinks(random_state=RANDOM_STATE),
    EditedNearestNeighbours(random_state=RANDOM_STATE),
    NeighbourhoodCleaningRule(random_state=RANDOM_STATE),
    RandomOverSampler(random_state=RANDOM_STATE),
    SMOTE(random_state=RANDOM_STATE),
    SMOTETomek(random_state=RANDOM_STATE),
    SMOTEENN(random_state=RANDOM_STATE)
]


PARAM_GRID_MAP = {
    'DummyClassifier': {
        'resampler': resamplers,
        'classifier': [DummyClassifier(random_state=RANDOM_STATE)],
        'classifier__strategy': ['most_frequent', 'stratified', 'uniform']
    },
    'LogisticRegression': {
        'resampler': resamplers,