How to use the aif360.metrics.utils function in aif360

To help you get started, we’ve selected a few aif360 examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github IBM / AIF360 / aif360 / algorithms / postprocessing / calibrated_eq_odds_postprocessing.py View on Github external
def fit(self, dataset_true, dataset_pred):
        """Compute parameters for equalizing generalized odds using true and
        predicted scores, while preserving calibration.

        Args:
            dataset_true (BinaryLabelDataset): Dataset containing true `labels`.
            dataset_pred (BinaryLabelDataset): Dataset containing predicted
                `scores`.

        Returns:
            CalibratedEqOddsPostprocessing: Returns self.
        """

        # Create boolean conditioning vectors for protected groups
        cond_vec_priv = utils.compute_boolean_conditioning_vector(
            dataset_pred.protected_attributes,
            dataset_pred.protected_attribute_names,
            self.privileged_groups)
        cond_vec_unpriv = utils.compute_boolean_conditioning_vector(
            dataset_pred.protected_attributes,
            dataset_pred.protected_attribute_names,
            self.unprivileged_groups)

        cm = ClassificationMetric(dataset_true, dataset_pred,
                                  unprivileged_groups=self.unprivileged_groups,
                                  privileged_groups=self.privileged_groups)
        self.base_rate_priv = cm.base_rate(privileged=True)
        self.base_rate_unpriv = cm.base_rate(privileged=False)

        # Create a dataset with "trivial" predictions
        dataset_trivial = dataset_pred.copy(deepcopy=True)
github IBM / AIF360 / aif360 / metrics / dataset_metric.py View on Github external
# sets self.dataset
        super(DatasetMetric, self).__init__(dataset)

        # TODO: should this deepcopy?
        self.privileged_groups = privileged_groups
        self.unprivileged_groups = unprivileged_groups

        # don't check if nothing was provided
        if not self.privileged_groups or not self.unprivileged_groups:
            return

        priv_mask = utils.compute_boolean_conditioning_vector(
            self.dataset.protected_attributes,
            self.dataset.protected_attribute_names, self.privileged_groups)
        unpriv_mask = utils.compute_boolean_conditioning_vector(
            self.dataset.protected_attributes,
            self.dataset.protected_attribute_names, self.unprivileged_groups)
        if np.any(np.logical_and(priv_mask, unpriv_mask)):
            raise ValueError("'privileged_groups' and 'unprivileged_groups'"
                             " must be disjoint.")
        if not np.all(np.logical_or(priv_mask, unpriv_mask)):
            warn("There are some instances in the dataset which are not "
                 "designated as either privileged or unprivileged. Are you sure"
github IBM / AIF360 / aif360 / algorithms / postprocessing / eq_odds_postprocessing.py View on Github external
# of the upper-bound inequality constraints at x
        # b_ub - 1-D array of values representing the upper-bound of each
        # inequality constraint (row) in A_ub.
        # Just to keep these between zero and one
        A_ub = np.array([[ 1,  0,  0,  0],
                         [-1,  0,  0,  0],
                         [ 0,  1,  0,  0],
                         [ 0, -1,  0,  0],
                         [ 0,  0,  1,  0],
                         [ 0,  0, -1,  0],
                         [ 0,  0,  0,  1],
                         [ 0,  0,  0, -1]], dtype=np.float64)
        b_ub = np.array([1, 0, 1, 0, 1, 0, 1, 0], dtype=np.float64)

        # Create boolean conditioning vectors for protected groups
        cond_vec_priv = utils.compute_boolean_conditioning_vector(
            dataset_pred.protected_attributes,
            dataset_pred.protected_attribute_names,
            self.privileged_groups)
        cond_vec_unpriv = utils.compute_boolean_conditioning_vector(
            dataset_pred.protected_attributes,
            dataset_pred.protected_attribute_names,
            self.unprivileged_groups)

        sconst = np.ravel(
            dataset_pred.labels[cond_vec_priv] == dataset_pred.favorable_label)
        sflip = np.ravel(
            dataset_pred.labels[cond_vec_priv] == dataset_pred.unfavorable_label)
        oconst = np.ravel(
            dataset_pred.labels[cond_vec_unpriv] == dataset_pred.favorable_label)
        oflip = np.ravel(
            dataset_pred.labels[cond_vec_unpriv] == dataset_pred.unfavorable_label)
github IBM / AIF360 / aif360 / algorithms / postprocessing / reject_option_classification.py View on Github external
y_pred = np.zeros(dataset.scores.shape)
        y_pred[fav_pred_inds] = dataset.favorable_label
        y_pred[unfav_pred_inds] = dataset.unfavorable_label

        # Indices of critical region around the classification boundary
        crit_region_inds = np.logical_and(
                dataset.scores <= self.classification_threshold+self.ROC_margin,
                dataset.scores > self.classification_threshold-self.ROC_margin)

        # Indices of privileged and unprivileged groups
        cond_priv = utils.compute_boolean_conditioning_vector(
                        dataset.protected_attributes,
                        dataset.protected_attribute_names,
                        self.privileged_groups)
        cond_unpriv = utils.compute_boolean_conditioning_vector(
                        dataset.protected_attributes,
                        dataset.protected_attribute_names,
                        self.unprivileged_groups)

        # New, fairer labels
        dataset_new.labels = y_pred
        dataset_new.labels[np.logical_and(crit_region_inds,
                            cond_priv.reshape(-1,1))] = dataset.unfavorable_label
        dataset_new.labels[np.logical_and(crit_region_inds,
                            cond_unpriv.reshape(-1,1))] = dataset.favorable_label

        return dataset_new
github IBM / AIF360 / aif360 / algorithms / postprocessing / calibrated_eq_odds_postprocessing.py View on Github external
"""Perturb the predicted scores to obtain new labels that satisfy
        equalized odds constraints, while preserving calibration.

        Args:
            dataset (BinaryLabelDataset): Dataset containing `scores` that needs
                to be transformed.
            threshold (float): Threshold for converting `scores` to `labels`.
                Values greater than or equal to this threshold are predicted to
                be the `favorable_label`. Default is 0.5.
        Returns:
            dataset (BinaryLabelDataset): transformed dataset.
        """
        if self.seed is not None:
            np.random.seed(self.seed)

        cond_vec_priv = utils.compute_boolean_conditioning_vector(
            dataset.protected_attributes,
            dataset.protected_attribute_names,
            self.privileged_groups)
        cond_vec_unpriv = utils.compute_boolean_conditioning_vector(
            dataset.protected_attributes,
            dataset.protected_attribute_names,
            self.unprivileged_groups)

        priv_indices = (np.random.random(sum(cond_vec_priv))
                     <= self.priv_mix_rate)
        priv_new_pred = dataset.scores[cond_vec_priv].copy()
        priv_new_pred[priv_indices] = self.base_rate_priv

        unpriv_indices = (np.random.random(sum(cond_vec_unpriv))
                       <= self.unpriv_mix_rate)
        unpriv_new_pred = dataset.scores[cond_vec_unpriv].copy()
github IBM / AIF360 / aif360 / metrics / sample_distortion_metric.py View on Github external
def mahalanobis_distance(self, privileged=None, returned=False):
        """Compute the average Mahalanobis distance between the samples from the
        two datasets.
        """
        condition = self._to_condition(privileged)
        X_orig = self.dataset.features
        X_distort = self.distorted_dataset.features
        dist_fun = partial(scdist.mahalanobis,
            VI=np.linalg.inv(np.cov(np.vstack([X_orig, X_distort]).T)).T)
        distance, mask = utils.compute_distance(X_orig, X_distort,
            self.dataset.protected_attributes,
            self.dataset.protected_attribute_names, dist_fun=dist_fun,
            condition=condition)
        if returned:
            return distance, self.dataset.instance_weights[mask]
        return distance
github IBM / AIF360 / aif360 / metrics / classification_metric.py View on Github external
Args:
            groups (list): A list of groups over which to calculate this metric.
                Groups should be disjoint. By default, this will use the
                `privileged_groups` and `unprivileged_groups` as the only two
                groups.
            alpha (int): See :meth:`generalized_entropy_index`.

        References:
            .. [2] T. Speicher, H. Heidari, N. Grgic-Hlaca, K. P. Gummadi, A. Singla, A. Weller, and M. B. Zafar,
               "A Unified Approach to Quantifying Algorithmic Unfairness: Measuring Individual and Group Unfairness via Inequality Indices,"
               ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, 2018.
        """
        b = np.zeros(self.dataset.labels.size, dtype=np.float64)

        for group in groups:
            classified_group = utils.compute_boolean_conditioning_vector(
                self.classified_dataset.protected_attributes,
                self.classified_dataset.protected_attribute_names,
                condition=group)
            true_group = utils.compute_boolean_conditioning_vector(
                self.dataset.protected_attributes,
                self.dataset.protected_attribute_names,
                condition=group)
            # ignore if there are no members of this group present
            if not np.any(true_group):
                continue
            y_pred = self.classified_dataset.labels[classified_group].ravel()
            y_true = self.dataset.labels[true_group].ravel()
            y_pred = (y_pred == self.classified_dataset.favorable_label).astype(
                np.float64)
            y_true = (y_true == self.dataset.favorable_label).astype(np.float64)
            b[true_group] = np.mean(1 + y_pred - y_true)
github IBM / AIF360 / aif360 / metrics / sample_distortion_metric.py View on Github external
def euclidean_distance(self, privileged=None, returned=False):
        """Compute the average Euclidean distance between the samples from the
        two datasets.
        """
        condition = self._to_condition(privileged)
        distance, mask = utils.compute_distance(self.dataset.features,
            self.distorted_dataset.features, self.dataset.protected_attributes,
            self.dataset.protected_attribute_names, dist_fun=scdist.euclidean,
            condition=condition)
        if returned:
            return distance, self.dataset.instance_weights[mask]
        return distance