Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
"quantile_gauss_fit": lambda x, p, snr, params: CausalSimulator3._treatment_quantile_gauss_fit(
x, p, snr),
"logistic": lambda x, p, snr, params: CausalSimulator3._treatment_logistic_dichotomous(x, p,
A DataFrame shaped (num_samples x num_of_possible_treatment_categories).
Raises:
ValueError: If given more than to categories. This method supports dichotomous treatment only.
"""
if prob_category.size != 2: # this method suited for dichotomous outcome only
raise ValueError("logistic method supports only binary treatment. Got the distribution vector "
"{p_vec} of length {n_cat}".format(n_cat=prob_category.size, p_vec=prob_category))
index_names = x_continuous.index
columns_names = prob_category.index
propensity = pd.DataFrame(index=index_names, columns=columns_names)
# compute propensities:
t = stats.norm(loc=0, scale=1).ppf(prob_category.iloc[1]) # percentile given a distribution
cur_propensity = stats.norm(loc=x_continuous, scale=(1 - snr)).sf(t) # sf is 1 - CDF
# discretize values:
treatment = CausalSimulator3._discretize_col(x_continuous, prob_category)
propensity.loc[:, columns_names[1]] = cur_propensity
propensity.loc[:, columns_names[0]] = np.ones(cur_propensity.size) - cur_propensity
return propensity, treatment
min_p = cur_pdfs.div(cur_pdfs.sum()).min()
cur_propensity = (max_p - min_p) * (cur_pdfs - cur_pdfs.min()) / \
(cur_pdfs.max() - cur_pdfs.min()) + min_p # type: pd.Series
# assign the propensity to the assigned category:
propensity.loc[cur_samples_mask, cur_category] = cur_propensity
# assign the propensity to the other, not assigned, categories:
left_over_ps = prob_category.drop(cur_category) # type: pd.Series
left_over_ps = left_over_ps.div(left_over_ps.sum())
not_propensity = pd.DataFrame(data=np.tile(np.ones_like(cur_propensity) - cur_propensity,
(left_over_ps.size, 1)).transpose(),
index=cur_propensity.index, columns=left_over_ps.index)
not_propensity = not_propensity.mul(left_over_ps)
propensity.loc[cur_samples_mask, left_over_ps.index] = not_propensity
# propensity = propensity.astype(np.float)
# treatment assignment is drawn according to marginal propensities:
treatment = CausalSimulator3._sample_from_row_stochastic_matrix(propensity)
return propensity, treatment
"marginal_structural_model": lambda x, t, m, beta=None: CausalSimulator3._marginal_structural_model_link(
x, t, m, beta=beta),
None: lambda x, beta=None: x
"""
creates a variable linearly dependant on its parents and then log it: log(beta*X)
Args:
X_parents (pd.DataFrame): a (num_samples x num_parents) matrix containing the data (over all samples or
samples or patients) of the variables which are topological parents of the current
variable
beta (pd.Series): Optional, a given Series which index corresponds to the parents variables
(X_parents.columns)
Returns:
(pd.Series, pd.Series): 2-element tuple containing:
- **x_new** (*pd.Series*): Newly created signal.
- **beta** (*pd.Series*): The coefficients used to create the linear link.
"""
x_new, beta = CausalSimulator3._affine_link(X_parents=X_parents, beta=beta)
x_new = np.log(np.abs(x_new)) # type: pd.Series
return x_new, beta
"log": lambda x, beta=None: CausalSimulator3._log_linking(x, beta),
"poly": lambda x, beta=None: CausalSimulator3._poly_linking(x, beta)}
TREATMENT_METHODS = {"random": lambda x, p, snr, params: CausalSimulator3._treatment_random(x, p),
"odds_ratio": lambda x, p, snr, params: CausalSimulator3._treatment_odds_ratio(x, p, snr),
"gaussian": lambda x, p, snr, params: CausalSimulator3._treatment_gaussian_dichotomous(x, p,
snr)}
# G for general - applicable to all types of variables
(pd.Series, pd.DataFrame, pd.Series): 3-element tuple containing:
- **treatment** (*pd.Series*): Treatment assignment to each sample.
- **propensity** (*pd.DataFrame*): The marginal conditional probability of treatment given covariates.
A DataFrame shaped (num_samples x num_of_possible_treatment_categories).
- **beta** (*pd.Series*): The coefficients used to generate current variable from it predecessors.
Raises:
ValueError: if prob_category is None (treatment must be categorical)
ValueError: If prob_category is not a legitimate probability vector (non negative, sums to 1)
"""
# Check input validity:
if prob_category is None:
raise ValueError("Treatment variable must be categorical, therefore it must have a legitimate distribution "
"over its possible values. Got None instead.")
CausalSimulator3._check_for_legitimate_probabilities(prob_category)
# generate only the continuous signal since it is later processed (therefore prob_category = None)
x_continuous, beta = self.generate_covariate_col(X_parents=X_parents, link_type=link_type, snr=snr,
prob_category=None, num_samples=X_parents.index.size,
var_name=var_name)
generation_method = self.TREATMENT_METHODS.get(method)
if generation_method is None:
raise KeyError("The given method {method} is not supported, "
"only {valid_methods}.".format(valid_methods=list(self.TREATMENT_METHODS.keys()),
method=method))
else:
params = self.params.get(var_name, {})
propensity, treatment = generation_method(x_continuous, prob_category, snr=snr, params=params)
return treatment.astype(int), propensity.astype(float), beta
Args:
X_parents (pd.DataFrame): a (num_samples x num_parents) matrix containing the data (over all samples or
samples or patients) of the variables which are topological parents of the current
variable
beta (pd.Series): Optional, a given Series which index corresponds to the parents variables
(X_parents.columns)
Returns:
(pd.Series, pd.Series): 2-element tuple containing:
- **X_new** (*pd.Series*): Newly created signal.
- **beta** (*pd.Series*): The coefficients used to create the linear link.
"""
X_parents = X_parents.copy() # type: pd.DataFrame
X_parents["intercept"] = 1
return CausalSimulator3._linear_link(X_parents, beta=beta)