How to use the fairlearn.postprocessing._constants.LABEL_KEY function in fairlearn

To help you get started, we’ve selected a few fairlearn examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github fairlearn / fairlearn / test / unit / postprocessing / test_curve_utilities.py View on Github external
def test_calculate_roc_points():
    data = pd.DataFrame({
        SENSITIVE_FEATURE_KEY: sensitive_features_ex1,
        SCORE_KEY: scores_ex,
        LABEL_KEY: labels_ex})
    grouped_data = data.groupby(SENSITIVE_FEATURE_KEY).get_group("A") \
        .sort_values(by=SCORE_KEY, ascending=False)

    roc_points = _calculate_roc_points(grouped_data, "A")
    expected_roc_points = pd.DataFrame({
        "x": [0, 0.25, 0.5, 0.5, 1],
        "y": [0, 1/3,  2/3, 1,   1],
        "operation": [ThresholdOperation('>', np.inf),
                      ThresholdOperation('<', 0.5),
                      ThresholdOperation('<', 1.5),
                      ThresholdOperation('<', 2.5),
                      ThresholdOperation('>', -np.inf)]
    })

    _assert_equal_points(expected_roc_points, roc_points)
github fairlearn / fairlearn / test / unit / postprocessing / test_utilities.py View on Github external
def _get_grouped_data_and_base_points(sensitive_feature_value):
    data = pd.DataFrame({
        SENSITIVE_FEATURE_KEY: sensitive_features_ex1,
        SCORE_KEY: scores_ex,
        LABEL_KEY: labels_ex})
    grouped_data = data.groupby(SENSITIVE_FEATURE_KEY).get_group(sensitive_feature_value) \
        .sort_values(by=SCORE_KEY, ascending=False)
    x_grid = np.linspace(0, 1, 100)

    if sensitive_feature_value == "A":
        expected_roc_points = pd.DataFrame({
            "x": [0, 0.25, 0.5, 0.5, 1],
            "y": [0, 1/3,  2/3, 1,   1],
            "operation": [ThresholdOperation('>', np.inf),
                          ThresholdOperation('<', 0.5),
                          ThresholdOperation('<', 1.5),
                          ThresholdOperation('<', 2.5),
                          ThresholdOperation('>', -np.inf)]
        })
        ignore_for_base_points = [1, 2]
github fairlearn / fairlearn / fairlearn / postprocessing / _threshold_optimizer.py View on Github external
:type scores: pandas.Series, pandas.DataFrame, numpy.ndarray, or list
    :param sensitive_feature_names: list of names for the sensitive features in case they were
        not implicitly provided (e.g. if `sensitive_features` is of type DataFrame); default
        None
    :type sensitive_feature_names: list of strings
    :return: the training data for the mitigator, grouped by sensitive feature value
    :rtype: pandas.DataFrameGroupBy
    """
    data_dict = {}

    # TODO: extend to multiple columns for additional group data
    # and name columns after original column names if possible
    # or store the original column names
    sensitive_feature_name = SENSITIVE_FEATURE_KEY
    if sensitive_feature_names is not None:
        if sensitive_feature_name in [SCORE_KEY, LABEL_KEY]:
            raise ValueError(SENSITIVE_FEATURE_NAME_CONFLICT_DETECTED_ERROR_MESSAGE)
        sensitive_feature_name = sensitive_feature_names[0]

    _reformat_data_into_dict(sensitive_feature_name, data_dict, sensitive_features)
    _reformat_data_into_dict(SCORE_KEY, data_dict, scores)
    _reformat_data_into_dict(LABEL_KEY, data_dict, labels)

    return pd.DataFrame(data_dict).groupby(sensitive_feature_name)
github fairlearn / fairlearn / fairlearn / postprocessing / _threshold_optimizer.py View on Github external
from ._interpolated_prediction import InterpolatedPredictor

# various error messages
DIFFERENT_INPUT_LENGTH_ERROR_MESSAGE = "{} need to be of equal length."
EMPTY_INPUT_ERROR_MESSAGE = "At least one of sensitive_features, labels, or scores are empty."
NON_BINARY_LABELS_ERROR_MESSAGE = "Labels other than 0/1 were provided."
INPUT_DATA_FORMAT_ERROR_MESSAGE = "The only allowed input data formats are: " \
                                  "list, numpy.ndarray, pandas.DataFrame, pandas.Series. " \
                                  "Your provided data was of types ({}, {}, {})"
NOT_SUPPORTED_CONSTRAINTS_ERROR_MESSAGE = "Currently only {} and {} are supported " \
    "constraints.".format(DEMOGRAPHIC_PARITY, EQUALIZED_ODDS)
PREDICT_BEFORE_FIT_ERROR_MESSAGE = "It is required to call 'fit' before 'predict'."
MULTIPLE_DATA_COLUMNS_ERROR_MESSAGE = "Post processing currently only supports a single " \
    "column in {}."
SENSITIVE_FEATURE_NAME_CONFLICT_DETECTED_ERROR_MESSAGE = "A sensitive feature named {} or {} " \
    "was detected. Please rename your column and try again.".format(SCORE_KEY, LABEL_KEY)
SCORES_DATA_TOO_MANY_COLUMNS_ERROR_MESSAGE = "The provided scores data contains multiple columns."
UNEXPECTED_DATA_TYPE_ERROR_MESSAGE = "Unexpected data type {} encountered."


_SUPPORTED_CONSTRAINTS = [DEMOGRAPHIC_PARITY, EQUALIZED_ODDS]

logger = logging.getLogger(__name__)


class ThresholdOptimizer(PostProcessing):
    """An Estimator based on the threshold optimization approach.

    The procedure followed is described in detail in
    `Hardt et al. (2016) `_.

    :param unconstrained_predictor: The trained predictor whose output will be post processed
github fairlearn / fairlearn / fairlearn / postprocessing / _threshold_optimizer.py View on Github external
:type plot: bool
    :return: the postprocessed predictor as a function taking the sensitive feature value
        and the fairness unaware predictor's score as arguments to produce predictions
    """
    n = len(labels)
    selection_error_curve = {}
    x_grid = np.linspace(0, 1, grid_size + 1)
    error_given_selection = 0 * x_grid

    data_grouped_by_sensitive_feature = _reformat_and_group_data(
        sensitive_features, labels, scores)

    for sensitive_feature_value, group in data_grouped_by_sensitive_feature:
        # determine probability of current sensitive feature group based on data
        n_group = len(group)
        n_positive = sum(group[LABEL_KEY])
        n_negative = n_group - n_positive
        p_sensitive_feature_value = n_group / n

        roc_convex_hull = _get_roc(group, sensitive_feature_value, flip=flip)

        fraction_negative_label_positive_sample = (
            n_negative / n_group) * roc_convex_hull['x']
        fraction_positive_label_positive_sample = (
            n_positive / n_group) * roc_convex_hull['y']
        # Calculate selection to represent the proportion of positive predictions.
        roc_convex_hull['selection'] = fraction_negative_label_positive_sample + \
            fraction_positive_label_positive_sample

        fraction_positive_label_negative_sample = \
            (n_positive / n_group) * (1 - roc_convex_hull['y'])
        roc_convex_hull['error'] = fraction_negative_label_positive_sample + \
github fairlearn / fairlearn / fairlearn / postprocessing / _threshold_optimizer.py View on Github external
:rtype: pandas.DataFrameGroupBy
    """
    data_dict = {}

    # TODO: extend to multiple columns for additional group data
    # and name columns after original column names if possible
    # or store the original column names
    sensitive_feature_name = SENSITIVE_FEATURE_KEY
    if sensitive_feature_names is not None:
        if sensitive_feature_name in [SCORE_KEY, LABEL_KEY]:
            raise ValueError(SENSITIVE_FEATURE_NAME_CONFLICT_DETECTED_ERROR_MESSAGE)
        sensitive_feature_name = sensitive_feature_names[0]

    _reformat_data_into_dict(sensitive_feature_name, data_dict, sensitive_features)
    _reformat_data_into_dict(SCORE_KEY, data_dict, scores)
    _reformat_data_into_dict(LABEL_KEY, data_dict, labels)

    return pd.DataFrame(data_dict).groupby(sensitive_feature_name)
github fairlearn / fairlearn / fairlearn / postprocessing / _roc_curve_utilities.py View on Github external
def _get_scores_labels_and_counts(data):
    """Order samples by scores, counting number of positive, negative, and overall samples.

    The samples are sorted into ascending order.

    :param data: the DataFrame containing scores and labels
    :type data: pandas.DataFrame
    :return: a tuple containing the sorted scores, labels, the number of samples, the number
        of positive samples, and the number of negative samples
    :rtype: tuple of list, list, int, int, int
    """
    data_sorted = data.sort_values(by=SCORE_KEY, ascending=False)

    scores = list(data_sorted[SCORE_KEY])
    labels = list(data_sorted[LABEL_KEY])

    n, n_positive, n_negative = _get_counts(labels)

    return scores, labels, n, n_positive, n_negative