Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
l1_series = pd.Series([1.5, 0.5], index=idx)
l2_series = pd.Series([1.0, 1.0], index=idx)
l3_series = pd.Series([0.5, 1.5], index=idx)
l4_series = pd.Series([0.0, 2.0], index=idx)
grid_df = pd.concat([l0_series,
l1_series,
l2_series,
l3_series,
l4_series],
axis=1)
target1 = GridSearch(copy.deepcopy(estimator),
constraints=GroupLossMoment(ZeroOneLoss()),
grid_size=5)
target2 = GridSearch(copy.deepcopy(estimator),
constraints=GroupLossMoment(ZeroOneLoss()),
grid=grid_df)
tradeoffs = [0, 0.25, 0.5, 0.75, 1]
target1.fit(X, y, sensitive_features=A)
target2.fit(X, y, sensitive_features=A)
assert len(target1.all_results) == len(tradeoffs)
assert len(target2.all_results) == len(tradeoffs)
# Check we generated the same multipliers
for i in range(len(tradeoffs)):
lm1 = target1.all_results[i].lambda_vec
lm2 = target2.all_results[i].lambda_vec
assert lm1.equals(lm2)
def test_X_A_different_rows(self, transformX, transformY, transformA):
gs = GridSearch(self.estimator, self.disparity_criterion)
X, Y, _ = self._quick_data()
A = np.random.randint(2, size=len(Y)+1)
message = str("X and the sensitive features must have same number of rows")
with pytest.raises(RuntimeError) as execInfo:
gs.fit(transformX(X),
transformY(Y),
sensitive_features=transformA(A))
assert message == execInfo.value.args[0]
def test_Y_ternary(self, transformX, transformY, transformA):
gs = GridSearch(self.estimator, self.disparity_criterion)
X, Y, A = self._quick_data()
Y[0] = 0
Y[1] = 1
Y[2] = 2
message = str("Supplied y labels are not 0 or 1")
with pytest.raises(RuntimeError) as execInfo:
gs.fit(transformX(X),
transformY(Y),
sensitive_features=transformA(A))
assert message == execInfo.value.args[0]
def test_sensitive_feature_non_binary(self, transformX, transformY, transformA):
gs = GridSearch(self.estimator, self.disparity_criterion)
X, Y, A = self._quick_data()
A[0] = 0
A[1] = 1
A[2] = 2
message = str("Sensitive features contain more than two unique values")
with pytest.raises(RuntimeError) as execInfo:
gs.fit(transformX(X),
transformY(Y),
sensitive_features=transformA(A))
assert message == execInfo.value.args[0]
def test_Y_df_bad_columns(self, transformX, transformA):
gs = GridSearch(self.estimator, self.disparity_criterion)
X, Y, A = self._quick_data()
Y_two_col_df = pd.DataFrame({"a": Y, "b": Y})
message = str("y is a DataFrame with more than one column")
with pytest.raises(RuntimeError) as execInfo:
gs.fit(transformX(X),
Y_two_col_df,
sensitive_features=transformA(A))
assert message == execInfo.value.args[0]
# Do the grid search with a zero Lagrange multiplier
idx = pd.Int64Index(sorted([a0_label, a1_label]))
l0_series = pd.Series([2.0, 0.0], index=idx)
l1_series = pd.Series([1.5, 0.5], index=idx)
l2_series = pd.Series([1.0, 1.0], index=idx)
l3_series = pd.Series([0.5, 1.5], index=idx)
l4_series = pd.Series([0.0, 2.0], index=idx)
grid_df = pd.concat([l0_series,
l1_series,
l2_series,
l3_series,
l4_series],
axis=1)
target1 = GridSearch(copy.deepcopy(estimator),
constraints=GroupLossMoment(ZeroOneLoss()),
grid_size=5)
target2 = GridSearch(copy.deepcopy(estimator),
constraints=GroupLossMoment(ZeroOneLoss()),
grid=grid_df)
tradeoffs = [0, 0.25, 0.5, 0.75, 1]
target1.fit(X, y, sensitive_features=A)
target2.fit(X, y, sensitive_features=A)
assert len(target1.all_results) == len(tradeoffs)
assert len(target2.all_results) == len(tradeoffs)
# Check we generated the same multipliers
def test_no_predict_proba_before_fit(self):
gs = GridSearch(self.estimator, self.disparity_criterion)
X, _, _ = self._quick_data()
message = str("Must call fit before attempting to make predictions")
with pytest.raises(NotFittedException) as execInfo:
gs.predict_proba(X)
assert message == execInfo.value.args[0]
iterables = [['+', '-'], ['all'], sorted([a0_label, a1_label])]
midx = pd.MultiIndex.from_product(iterables, names=['sign', 'event', 'group_id'])
lagrange_negative_series = pd.Series([0.0, 0.0, 0.0, 2.0], index=midx)
lagrange_zero_series = pd.Series(np.zeros(4), index=midx)
lagrange_positive_series = pd.Series([0.0, 2.0, 0.0, 0.0], index=midx)
grid_df = pd.concat([lagrange_negative_series,
lagrange_zero_series,
lagrange_positive_series],
axis=1)
target1 = GridSearch(copy.deepcopy(estimator),
constraints=DemographicParity(),
grid_size=3)
target2 = GridSearch(copy.deepcopy(estimator),
constraints=DemographicParity(),
grid=grid_df)
# Try both ways of specifying the Lagrange multipliers
target2.fit(X, y, sensitive_features=A)
target1.fit(X, y, sensitive_features=A)
assert len(target1.all_results) == 3
assert len(target2.all_results) == 3
# Check we generated the same multipliers
for i in range(3):
lm1 = target1.all_results[i].lambda_vec
lm2 = target2.all_results[i].lambda_vec
assert lm1.equals(lm2)
estimator = LogisticRegression(solver='liblinear',
fit_intercept=True,
random_state=97)
# Train an unmitigated estimator
unmitigated_estimator = copy.deepcopy(estimator)
unmitigated_estimator.fit(X, y)
# Do the grid search with a zero Lagrange multiplier
iterables = [['+', '-'], ['all'], [a0_label, a1_label]]
midx = pd.MultiIndex.from_product(iterables, names=['sign', 'event', 'group_id'])
lagrange_zero_series = pd.Series(np.zeros(4), index=midx)
grid_df = pd.DataFrame(lagrange_zero_series)
target = GridSearch(estimator,
constraints=DemographicParity(),
grid=grid_df)
target.fit(X, y, sensitive_features=A)
assert len(target.all_results) == 1
# Check coefficients
gs_coeff = target.best_result.predictor.coef_
um_coeff = unmitigated_estimator.coef_
assert np.array_equal(gs_coeff, um_coeff)
def test_X_Y_different_rows(self, transformX, transformY, transformA):
gs = GridSearch(self.estimator, self.disparity_criterion)
X, _, A = self._quick_data()
Y = np.random.randint(2, size=len(A)+1)
message = str("X and y must have same number of rows")
with pytest.raises(RuntimeError) as execInfo:
gs.fit(transformX(X),
transformY(Y),
sensitive_features=transformA(A))
assert message == execInfo.value.args[0]