Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def num_instances(self, privileged=None):
"""Compute the number of instances, :math:`n`, in the dataset conditioned
on protected attributes if necessary.
Args:
privileged (bool, optional): Boolean prescribing whether to
condition this metric on the `privileged_groups`, if `True`, or
the `unprivileged_groups`, if `False`. Defaults to `None`
meaning this metric is computed over the entire dataset.
Raises:
AttributeError: `privileged_groups` or `unprivileged_groups` must be
must be provided at initialization to condition on them.
"""
condition = self._to_condition(privileged)
return utils.compute_num_instances(self.dataset.protected_attributes,
self.dataset.instance_weights,
self.dataset.protected_attribute_names, condition=condition)
"""
# Dirichlet smoothing parameters
if concentration < 0:
raise ValueError("Concentration parameter must be non-negative.")
num_classes = 2 # binary label dataset
dirichlet_alpha = concentration / num_classes
# compute counts for all intersecting groups, e.g. black-women, white-man, etc
intersect_groups = np.unique(self.dataset.protected_attributes, axis=0)
num_intersects = len(intersect_groups)
counts_pos = np.zeros(num_intersects)
counts_total = np.zeros(num_intersects)
for i in range(num_intersects):
condition = [dict(zip(self.dataset.protected_attribute_names,
intersect_groups[i]))]
counts_total[i] = utils.compute_num_instances(
self.dataset.protected_attributes,
self.dataset.instance_weights,
self.dataset.protected_attribute_names, condition=condition)
counts_pos[i] = utils.compute_num_pos_neg(
self.dataset.protected_attributes, labels,
self.dataset.instance_weights,
self.dataset.protected_attribute_names,
self.dataset.favorable_label, condition=condition)
# probability of y given S (p(y=1|S))
return (counts_pos + dirichlet_alpha) / (counts_total + concentration)