How to use the aif360.datasets.StructuredDataset function in aif360

To help you get started, we’ve selected a few aif360 examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github IBM / AIF360 / tests / test_structured_dataset.py View on Github external
def test_k_folds():
    sd = StructuredDataset(df=df, label_names=['label'], protected_attribute_names=['two'])
    folds = sd.split(4)

    assert len(folds) == 4
    assert all(f.features.shape[0] == f.labels.shape[0]
            == f.protected_attributes.shape[0] == len(f.instance_names)
            == f.instance_weights.shape[0] == 1 for f in folds)

    folds = sd.split(3)
    assert folds[0].features.shape[0] == 2
github IBM / AIF360 / tests / test_structured_dataset.py View on Github external
def test_copy():
    sd = StructuredDataset(df=df, label_names=['label'], protected_attribute_names=['two'])
    sd2 = sd.copy()
    sd3 = sd.copy(True)

    sd.features[0] = 999
    assert np.all(sd2.features[0] == 999)
    assert not np.any(sd3.features[0] == 999)
github IBM / AIF360 / tests / test_structured_dataset.py View on Github external
def test_eq():
    sd = StructuredDataset(df=df, label_names=['label'], protected_attribute_names=['two'])
    sd2 = sd.copy()
    sd3 = sd.copy(True)
    sd4 = StructuredDataset(df=df, label_names=['label'], protected_attribute_names=['one', 'three'])

    assert sd == sd2
    assert sd == sd3
    assert sd2 == sd3
    assert sd != sd4
github IBM / AIF360 / tests / test_structured_dataset.py View on Github external
def test_temporarily_ignore():
    sd = StructuredDataset(df=df, label_names=['label'], protected_attribute_names=['one', 'three'])
    modified = sd.copy()
    modified.labels = sd.labels + 1
    assert sd != modified
    with sd.temporarily_ignore('labels'):
        assert sd == modified
    assert 'labels' not in sd.ignore_fields
github IBM / AIF360 / tests / test_sample_distortion_metric.py View on Github external
cols = ['one', 'two', 'three', 'label']
labs = np.ones((4, 1))

df = pd.DataFrame(data=np.concatenate((data, labs), axis=1), columns=cols)
sd = StructuredDataset(df=df, label_names=['label'],
    protected_attribute_names=['one', 'three'])

distorted = data + 1

sd_distorted = sd.copy(True)
sd_distorted.features = distorted

rand = np.random.randint(0, 10, (4, 4))
rand2 = np.random.randint(0, 10, (4, 3))
df_rand = pd.DataFrame(data=rand, columns=cols)
sd_rand = StructuredDataset(df=df_rand, label_names=['label'],
    protected_attribute_names=['one', 'three'])
sd_rand2 = sd_rand.copy(True)
sd_rand2.features = rand2


priv = [{'one': 1}]
unpriv = [{'one': 2}]

def test_euclidean_distance():
    sdm = SampleDistortionMetric(sd, sd_distorted)
    assert sdm.total_euclidean_distance() == 4*np.sqrt(3)

def test_manhattan_distance():
    sdm = SampleDistortionMetric(sd, sd_distorted)
    assert sdm.total_manhattan_distance() == 12
github IBM / AIF360 / tests / test_structured_dataset.py View on Github external
def test_eq():
    sd = StructuredDataset(df=df, label_names=['label'], protected_attribute_names=['two'])
    sd2 = sd.copy()
    sd3 = sd.copy(True)
    sd4 = StructuredDataset(df=df, label_names=['label'], protected_attribute_names=['one', 'three'])

    assert sd == sd2
    assert sd == sd3
    assert sd2 == sd3
    assert sd != sd4
github IBM / AIF360 / tests / test_sample_distortion_metric.py View on Github external
import numpy as np
import pandas as pd
from scipy.spatial.distance import cdist

from aif360.datasets import StructuredDataset
from aif360.metrics import SampleDistortionMetric


data = np.arange(12).reshape((3, 4)).T
cols = ['one', 'two', 'three', 'label']
labs = np.ones((4, 1))

df = pd.DataFrame(data=np.concatenate((data, labs), axis=1), columns=cols)
sd = StructuredDataset(df=df, label_names=['label'],
    protected_attribute_names=['one', 'three'])

distorted = data + 1

sd_distorted = sd.copy(True)
sd_distorted.features = distorted

rand = np.random.randint(0, 10, (4, 4))
rand2 = np.random.randint(0, 10, (4, 3))
df_rand = pd.DataFrame(data=rand, columns=cols)
sd_rand = StructuredDataset(df=df_rand, label_names=['label'],
    protected_attribute_names=['one', 'three'])
sd_rand2 = sd_rand.copy(True)
sd_rand2.features = rand2
github IBM / AIF360 / aif360 / datasets / binary_label_dataset.py View on Github external
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import numpy as np

from aif360.datasets import StructuredDataset


class BinaryLabelDataset(StructuredDataset):
    """Base class for all structured datasets with binary labels."""

    def __init__(self, favorable_label=1., unfavorable_label=0., **kwargs):
        """
        Args:
            favorable_label (float): Label value which is considered favorable
                (i.e. "positive").
            unfavorable_label (float): Label value which is considered
                unfavorable (i.e. "negative").
            **kwargs: StructuredDataset arguments.
        """
        self.favorable_label = float(favorable_label)
        self.unfavorable_label = float(unfavorable_label)

        super(BinaryLabelDataset, self).__init__(**kwargs)
github IBM / AIF360 / aif360 / metrics / dataset_metric.py View on Github external
format as `privileged_groups`.

        Raises:
            TypeError: `dataset` must be a
                :obj:`~aif360.datasets.StructuredDataset` type.
            ValueError: `privileged_groups` and `unprivileged_groups` must be
                disjoint.

        Examples:
            >>> from aif360.datasets import GermanDataset
            >>> german = GermanDataset()
            >>> u = [{'sex': 1, 'age': 1}, {'sex': 0}]
            >>> p = [{'sex': 1, 'age': 0}]
            >>> dm = DatasetMetric(german, unprivileged_groups=u, privileged_groups=p)
        """
        if not isinstance(dataset, StructuredDataset):
            raise TypeError("'dataset' should be a StructuredDataset")

        # sets self.dataset
        super(DatasetMetric, self).__init__(dataset)

        # TODO: should this deepcopy?
        self.privileged_groups = privileged_groups
        self.unprivileged_groups = unprivileged_groups

        # don't check if nothing was provided
        if not self.privileged_groups or not self.unprivileged_groups:
            return

        priv_mask = utils.compute_boolean_conditioning_vector(
            self.dataset.protected_attributes,
            self.dataset.protected_attribute_names, self.privileged_groups)
github IBM / AIF360 / aif360 / metrics / sample_distortion_metric.py View on Github external
of `dicts` where the keys are `protected_attribute_names` and
                the values are values in `protected_attributes`. Each `dict`
                element describes a single group. See examples for more details.
            unprivileged_groups (list(dict)): Unprivileged groups in the same
                format as `privileged_groups`.

        Raises:
            TypeError: `dataset` and `distorted_dataset` must be
                :obj:`~aif360.datasets.StructuredDataset` types.
        """
        # sets self.dataset, self.unprivileged_groups, self.privileged_groups
        super(SampleDistortionMetric, self).__init__(dataset,
            unprivileged_groups=unprivileged_groups,
            privileged_groups=privileged_groups)

        if isinstance(distorted_dataset, StructuredDataset):
            self.distorted_dataset = distorted_dataset
        else:
            raise TypeError("'distorted_dataset' should be a StructuredDataset")

        with dataset.temporarily_ignore('features', 'labels', 'scores'):
            if dataset != distorted_dataset:
                raise ValueError("The two datasets may differ in features and "
                                 "labels/scores only.")