Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
(pd.DataFrame, pd.Series, Any): A three-tuple containing:
* the covariates for individual under specific treatment,
* the observed outcomes for these individuals (if y was passed and is not None),
* the current treatment value.
"""
treatment_values = g_tools.get_iterable_treatment_values(None, a)
for treatment_value in treatment_values:
treated = a == treatment_value
cur_X = X.loc[treated, :]
cur_y = y[treated] if y is not None else None
cur_w = w[treated] if w is not None else None
yield cur_X, cur_y, cur_w, treatment_value
class Standardization(IndividualOutcomeEstimator):
"""
Standard standardization model for causal inference.
Learns a model that takes into account the treatment assignment, and later, this value can be intervened, changing
the predicted outcome.
"""
def __init__(self, learner, encode_treatment=False, predict_proba=False):
"""
Args:
learner: Initialized sklearn model.
encode_treatment (bool): Whether to encode the treatment as one-hot matrix.
Usually good if n_treatment > 2.
predict_proba (bool): In case the outcome task is classification and in case `learner` supports the
operation, if True - prediction will utilize learner's `predict_proba` or
`decision_function` which returns a continuous matrix of size (n_samples, n_classes).
# Attribute the provided sample_weights to the final estimator in the pipeline.
# Attribution is done by step name followed by dunder, see:
# https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html
estimator_name, estimator = estimator.steps[-1]
fit_params = {"{}__sample_weight".format(estimator_name): sample_weight}
else:
fit_params = dict(sample_weight=sample_weight)
if "sample_weight" not in inspect.signature(estimator.fit).parameters and sample_weight is None:
# Estimator does not support "sample_weight" parameter and sample_weight is not provided
fit_params = {}
return fit_params
class StratifiedStandardization(IndividualOutcomeEstimator):
"""
Standardization model that learns a model for each treatment group (i.e. subgroup of subjects with the same
treatment assignment).
"""
def __init__(self, learner, treatment_values=None, predict_proba=False):
"""
Args:
learner: Initialized sklearn model or a mapping (dict) between treatment value and initialized model,
For example: {0: Ridge(alpha=5), 1: Ridge(alpha=0.1)},
or even different models all over: {0: Ridge(), 1: RandomForestRegressor}
Make sure these treatment_values keys represent all treatment values found in later use.
treatment_values (list): list of unique values of treatment (can be a single value as well).
If known beforehand (on initialization time), can be passed now to init, otherwise
would be inferred during fit (where treatment assignment must be supplied).