How to use the aif360.datasets.BinaryLabelDataset function in aif360

To help you get started, we’ve selected a few aif360 examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github IBM / AIF360 / tests / test_classification_metric.py View on Github external
[1, 0],
                     [1, 1],
                     [1, 0],
                     [1, 0],
                     [2, 1],
                     [2, 0],
                     [2, 1],
                     [2, 1]])
    pred = data.copy()
    pred[[3, 9], -1] = 0
    pred[[4, 5], -1] = 1
    df = pd.DataFrame(data, columns=['feat', 'label'])
    df2 = pd.DataFrame(pred, columns=['feat', 'label'])
    bld = BinaryLabelDataset(df=df, label_names=['label'],
        protected_attribute_names=['feat'])
    bld2 = BinaryLabelDataset(df=df2, label_names=['label'],
        protected_attribute_names=['feat'])
    cm = ClassificationMetric(bld, bld2)

    assert cm.theil_index() == 4*np.log(2)/10
github IBM / AIF360 / tests / test_classification_metric.py View on Github external
[1, 0],
                     [1, 1],
                     [1, 0],
                     [1, 0],
                     [2, 1],
                     [2, 0],
                     [2, 1],
                     [2, 1]])
    pred = data.copy()
    pred[[3, 9], -1] = 0
    pred[[4, 5], -1] = 1
    df = pd.DataFrame(data, columns=['feat', 'label'])
    df2 = pd.DataFrame(pred, columns=['feat', 'label'])
    bld = BinaryLabelDataset(df=df, label_names=['label'],
        protected_attribute_names=['feat'])
    bld2 = BinaryLabelDataset(df=df2, label_names=['label'],
        protected_attribute_names=['feat'])
    cm = ClassificationMetric(bld, bld2)

    assert cm.generalized_entropy_index() == 0.2

    pred = data.copy()
    pred[:, -1] = np.array([0, 1, 1, 0, 0, 0, 0, 1, 1, 1])
    df2 = pd.DataFrame(pred, columns=['feat', 'label'])
    bld2 = BinaryLabelDataset(df=df2, label_names=['label'],
        protected_attribute_names=['feat'])
    cm = ClassificationMetric(bld, bld2)

    assert cm.generalized_entropy_index() == 0.3
github IBM / AIF360 / tests / test_classification_metric.py View on Github external
[1, 0],
                     [1, 1],
                     [1, 0],
                     [1, 0],
                     [2, 1],
                     [2, 0],
                     [2, 1],
                     [2, 1]])
    pred = data.copy()
    pred[[3, 9], -1] = 0
    pred[[4, 5], -1] = 1
    df = pd.DataFrame(data, columns=['feat', 'label'])
    df2 = pd.DataFrame(pred, columns=['feat', 'label'])
    bld = BinaryLabelDataset(df=df, label_names=['label'],
        protected_attribute_names=['feat'])
    bld2 = BinaryLabelDataset(df=df2, label_names=['label'],
        protected_attribute_names=['feat'])
    cm = ClassificationMetric(bld, bld2)

    b = np.array([1, 1, 1.25, 1.25, 1.25, 1.25, 0.75, 0.75, 0.75, 0.75])
    assert cm.between_all_groups_generalized_entropy_index() == 1/20*np.sum(b**2 - 1)
github h2oai / driverlessai-recipes / transformers / mli / debiasing_lfr.py View on Github external
self.label_names = [frame.names[-1]]

            self.privileged_groups = config['privileged_groups']
            self.unprivileged_groups = config['unprivileged_groups']
            self.favorable_label = float(config['favorable_label'])
            self.unfavorable_label = float(config['unfavorable_label'])
            self.protected_attribute_names = config['protected_attribute_names']

            self.lfr = LFR(
                unprivileged_groups=self.unprivileged_groups,
                privileged_groups=self.privileged_groups,
                verbose=0,
            )

            self.lfr.fit(
                BinaryLabelDataset(
                    df=frame.to_pandas(),
                    favorable_label=self.favorable_label,
                    unfavorable_label=self.unfavorable_label,
                    label_names=self.label_names,
                    protected_attribute_names=self.protected_attribute_names,
                )
            )
            self.fitted = True
github h2oai / driverlessai-recipes / transformers / mli / debiasing_lfr.py View on Github external
def transform(self, X: dt.Frame, y: np.array = None):
        from aif360.datasets import BinaryLabelDataset
        # Transformation should only occur during training when y is present
        if self.fitted and (self.label_names in X.names or y is not None):
            if self.label_names not in X.names:
                X = dt.cbind(X, dt.Frame(y))

            X_pd = X.to_pandas()
            X = dt.Frame(X_pd.fillna(X_pd.mean()))
            transformed_X: BinaryLabelDataset = self.lfr.transform(
                BinaryLabelDataset(
                    df=X.to_pandas(),
                    favorable_label=self.favorable_label,
                    unfavorable_label=self.unfavorable_label,
                    label_names=self.label_names,
                    protected_attribute_names=self.protected_attribute_names,
                )
            )

            return dt.Frame(
                transformed_X.features,
                names=[name+"_lfr" for name in transformed_X.feature_names],
            )
        # For predictions no transformation is required
        else:
            return X
github IBM / AIF360 / aif360 / algorithms / preprocessing / optim_preproc.py View on Github external
df_transformed[p] = df_transformed[p].replace(pmap_rev)

        # Map the labels to numeric values
        for idx, p in enumerate(Y_feature_names):
            pmap = dataset.metadata["label_maps"][idx]
            pmap_rev = dict(zip(pmap.values(), pmap.keys()))
            df_transformed[p] = df_transformed[p].replace(pmap_rev)

        # Dummy code and convert to a dataset
        df_dum = pd.concat([pd.get_dummies(df_transformed.loc[:, X_feature_names],
                            prefix_sep="="),
                            df_transformed.loc[:, Y_feature_names+D_feature_names]],
                            axis=1)

        # Create a dataset out of df_dum
        dataset_transformed = BinaryLabelDataset(
            df=df_dum,
            label_names=Y_feature_names,
            protected_attribute_names=self.protected_attribute_names,
            privileged_protected_attributes=self.privileged_protected_attributes,
            unprivileged_protected_attributes=self.unprivileged_protected_attributes,
            favorable_label=dataset.favorable_label,
            unfavorable_label=dataset.unfavorable_label,
            metadata=dataset.metadata)

        return dataset_transformed
github IBM / AIF360 / mlops / kubeflow / bias_detector_pytorch / src / fairness.py View on Github external
def dataset_wrapper(outcome, protected, unprivileged_groups, privileged_groups, favorable_label, unfavorable_label):
    """ A wrapper function to create aif360 dataset from outcome and protected in numpy array format.
    """
    df = pd.DataFrame(data=outcome,
                      columns=['outcome'])
    df['race'] = protected

    dataset = BinaryLabelDataset(favorable_label=favorable_label,
                                 unfavorable_label=unfavorable_label,
                                 df=df,
                                 label_names=['outcome'],
                                 protected_attribute_names=['race'],
                                 unprivileged_protected_attributes=unprivileged_groups)
    return dataset
github IBM / AIF360 / aif360 / metrics / classification_metric.py View on Github external
unprivileged_groups (list(dict)): Unprivileged groups in the same
                format as `privileged_groups`.

        Raises:
            TypeError: `dataset` and `classified_dataset` must be
                :obj:`~aif360.datasets.BinaryLabelDataset` types.
        """
        if not isinstance(dataset, BinaryLabelDataset):
            raise TypeError("'dataset' should be a BinaryLabelDataset")

        # sets self.dataset, self.unprivileged_groups, self.privileged_groups
        super(ClassificationMetric, self).__init__(dataset,
            unprivileged_groups=unprivileged_groups,
            privileged_groups=privileged_groups)

        if isinstance(classified_dataset, BinaryLabelDataset):
            self.classified_dataset = classified_dataset
        else:
            raise TypeError("'classified_dataset' should be a "
                            "BinaryLabelDataset.")

        # Verify if everything except the predictions and metadata are the same
        # for the two datasets
        with self.dataset.temporarily_ignore('labels', 'scores'):
            if self.dataset != self.classified_dataset:
                raise ValueError("The two datasets are expected to differ only "
                                 "in 'labels' or 'scores'.")