How to use the aif360.datasets.AdultDataset function in aif360

To help you get started, we’ve selected a few aif360 examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github IBM / AIF360 / tests / test_differential_fairness.py View on Github external
def test_epsilon_all_groups():
    def custom_preprocessing(df):
        # slight workaround for non-binary protected attribute
        # feature should be categorical but protected attribute should be numerical
        mapping = {'Black': 0, 'White': 1, 'Asian-Pac-Islander': 2,
                   'Amer-Indian-Eskimo': 3, 'Other': 4}
        df['race-num'] = df.race.map(mapping)
        return df.fillna('Unknown')

    nonbinary_ad = AdultDataset(
            protected_attribute_names=['sex', 'native-country', 'race-num'],
            privileged_classes=[['Male'], ['United-States'], [1]],
            categorical_features=['workclass', 'education', 'marital-status',
                                  'occupation', 'relationship', 'race'],
            custom_preprocessing=custom_preprocessing)
    # drop redundant race feature (not relevant to this test)
    index = nonbinary_ad.feature_names.index('race-num')
    nonbinary_ad.features = np.delete(nonbinary_ad.features, index, axis=1)
    nonbinary_ad.feature_names = np.delete(nonbinary_ad.feature_names, index)

    nonbinary_test, _ = nonbinary_ad.split([16281], shuffle=False)
    dataset_metric = BinaryLabelDatasetMetric(nonbinary_test)
    eps_data = dataset_metric.smoothed_empirical_differential_fairness()
    assert eps_data == 2.063813731996515  # verified with reference implementation
github IBM / AIF360 / tests / sklearn / test_calibrated_equalized_odds.py View on Github external
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

from aif360.datasets import AdultDataset
from aif360.sklearn.datasets import fetch_adult
from aif360.algorithms.postprocessing import CalibratedEqOddsPostprocessing
from aif360.sklearn.postprocessing import CalibratedEqualizedOdds, PostProcessingMeta


X, y, sample_weight = fetch_adult(numeric_only=True)
adult = AdultDataset(instance_weights_name='fnlwgt', categorical_features=[],
        features_to_keep=['age', 'education-num', 'capital-gain', 'capital-loss',
                          'hours-per-week'], features_to_drop=[])

def test_calib_eq_odds_sex_weighted():
    logreg = LogisticRegression(solver='lbfgs', max_iter=500)
    y_pred = logreg.fit(X, y, sample_weight=sample_weight).predict_proba(X)
    adult_pred = adult.copy()
    adult_pred.scores = y_pred[:, 1]
    orig_cal_eq_odds = CalibratedEqOddsPostprocessing(
            unprivileged_groups=[{'sex': 0}], privileged_groups=[{'sex': 1}])
    orig_cal_eq_odds.fit(adult, adult_pred)
    cal_eq_odds = CalibratedEqualizedOdds('sex')
    cal_eq_odds.fit(y_pred, y, sample_weight=sample_weight)

    assert np.isclose(orig_cal_eq_odds.priv_mix_rate, cal_eq_odds.mix_rates_[1])
    assert np.isclose(orig_cal_eq_odds.unpriv_mix_rate, cal_eq_odds.mix_rates_[0])
github IBM / AIF360 / tests / test_disparate_impact_remover.py View on Github external
def test_adult():
    protected = 'sex'
    ad = AdultDataset(protected_attribute_names=[protected],
        privileged_classes=[['Male']], categorical_features=[],
        features_to_keep=['age', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week'])

    scaler = MinMaxScaler(copy=False)
    # ad.features = scaler.fit_transform(ad.features)

    train, test = ad.split([32561])
    assert np.any(test.labels)

    train.features = scaler.fit_transform(train.features)
    test.features = scaler.transform(test.features)

    index = train.feature_names.index(protected)
    X_tr = np.delete(train.features, index, axis=1)
    X_te = np.delete(test.features, index, axis=1)
    y_tr = train.labels.ravel()
github IBM / AIF360 / tests / test_standard_datasets.py View on Github external
def test_adult_test_set():
    ad = AdultDataset()
    # train, test = ad.split([32561])
    train, test = ad.split([30162])
    assert np.any(test.labels)
github IBM / AIF360 / tests / test_differential_fairness.py View on Github external
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

from aif360.datasets import AdultDataset
from aif360.metrics import BinaryLabelDatasetMetric, ClassificationMetric

ad = AdultDataset(protected_attribute_names=['race', 'sex', 'native-country'],
                  privileged_classes=[['White'], ['Male'], ['United-States']],
                  categorical_features=['workclass', 'education',
                          'marital-status', 'occupation', 'relationship'],
                  custom_preprocessing=lambda df: df.fillna('Unknown'))
adult_test, adult_train = ad.split([16281], shuffle=False)

scaler = StandardScaler()
X = scaler.fit_transform(adult_train.features)
test_X = scaler.transform(adult_test.features)
clf = LogisticRegression(C=1.0, random_state=0, solver='liblinear')

adult_pred = adult_test.copy()
adult_pred.labels = clf.fit(X, adult_train.labels.ravel()).predict(test_X)

dataset_metric = BinaryLabelDatasetMetric(adult_test)
classifier_metric = BinaryLabelDatasetMetric(adult_pred)
github IBM / AIF360 / tests / test_meta_classifier.py View on Github external
def test_adult():
    np.random.seed(1)
    # np.random.seed(9876)

    protected = 'sex'
    ad = AdultDataset(protected_attribute_names=[protected],
                      privileged_classes=[['Male']], categorical_features=[],
                      features_to_keep=['age', 'education-num', 'capital-gain',
                                        'capital-loss', 'hours-per-week'])

    #scaler = MinMaxScaler(copy=False)
    # ad.features = scaler.fit_transform(ad.features)

    train, test = ad.split([32561])

    biased_model = MetaFairClassifier(tau=0, sensitive_attr=protected)
    biased_model.fit(train)

    dataset_bias_test = biased_model.predict(test)

    biased_cm = ClassificationMetric(test, dataset_bias_test,
        unprivileged_groups=[{protected: 0}], privileged_groups=[{protected: 1}])
github IBM / AIF360 / tests / test_standard_datasets.py View on Github external
def test_adult():
    ad = AdultDataset()
    # print(ad.feature_names)
    assert np.isclose(ad.labels.mean(), 0.2478, atol=5e-5)

    bldm = BinaryLabelDatasetMetric(ad)
    assert bldm.num_instances() == 45222
github IBM / AIF360 / tests / test_disparate_impact_remover.py View on Github external
def test_repair0():
    ad = AdultDataset(protected_attribute_names=['sex'],
        privileged_classes=[['Male']], categorical_features=[],
        features_to_keep=['age', 'education-num'])

    di = DisparateImpactRemover(repair_level=0.)
    ad_repd = di.fit_transform(ad)

    assert ad_repd == ad
github IBM / AIF360 / tests / test_standard_datasets.py View on Github external
def test_adult_no_drop():
    ad = AdultDataset(protected_attribute_names=['sex'],
        privileged_classes=[['Male']], categorical_features=[],
        features_to_keep=['age', 'education-num'])
    bldm = BinaryLabelDatasetMetric(ad)
    assert bldm.num_instances() == 48842
github IBM / AIF360 / aif360 / algorithms / preprocessing / optim_preproc_helpers / data_preproc_functions.py View on Github external
XD_features = ['Age (decade)', 'Education Years', 'sex', 'race']
    D_features = ['sex', 'race'] if protected_attributes is None else protected_attributes
    Y_features = ['Income Binary']
    X_features = list(set(XD_features)-set(D_features))
    categorical_features = ['Age (decade)', 'Education Years']

    # privileged classes
    all_privileged_classes = {"sex": [1.0],
                              "race": [1.0]}

    # protected attribute maps
    all_protected_attribute_maps = {"sex": {1.0: 'Male', 0.0: 'Female'},
                                    "race": {1.0: 'White', 0.0: 'Non-white'}}

    return AdultDataset(
        label_name=Y_features[0],
        favorable_classes=['>50K', '>50K.'],
        protected_attribute_names=D_features,
        privileged_classes=[all_privileged_classes[x] for x in D_features],
        instance_weights_name=None,
        categorical_features=categorical_features,
        features_to_keep=X_features+Y_features+D_features,
        na_values=['?'],
        metadata={'label_maps': [{1.0: '>50K', 0.0: '<=50K'}],
                  'protected_attribute_maps': [all_protected_attribute_maps[x]
                                for x in D_features]},
        custom_preprocessing=custom_preprocessing)