Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# for now, require one feature per store/product combination
# TODO: would be nice to relax this somehow
assert X.shape[0] == Y.shape[0]
n_weeks = Y.shape[0]
Y = np.reshape(Y, (-1, 1)) # flatten prices into 1-d list
self._model.fit(self._reshape(X), Y)
def predict(self, X):
predicted = self._model.predict(self._reshape(X))
return np.reshape(predicted, (-1, block_size))
@property
def coef_(self):
return self._model.coef_
return econml.dml.LinearDMLCateEstimator(
model_t=GroupRegression(t_model, constant_features=[], constant_controls=constant_controls,
compute_gp_avgs=False, is_first_stage=True),
model_y=GroupRegression(y_model, constant_features=features, constant_controls=constant_controls,
compute_gp_avgs=True, is_first_stage=True),
model_final=GroupRegression(f_model, effect_features=features, compute_gp_avgs=True, is_first_stage=False)
).fit(X, Y).coef_
X_final[:, -d_t:],
X_final[:, :d_x],
X_final[:, d_x:-d_t],
sample_weight=n_sum,
sample_var=var_sum,
inference=StatsModelsInference(cov_type=cov_type))
class Splitter:
def __init__(self):
return
def split(self, X, T):
return [(np.arange(0, first_half), np.arange(first_half, X.shape[0])),
(np.arange(first_half, X.shape[0]), np.arange(0, first_half))]
lr = LinearDMLCateEstimator(model_y=first_stage(),
model_t=first_stage(),
n_splits=Splitter(),
linear_first_stages=False,
discrete_treatment=False)
lr.fit(y, X[:, -d_t:], X[:, :d_x], X[:, d_x:-d_t],
inference=StatsModelsInference(cov_type=cov_type))
for alpha in alpha_list:
key = ("n_{}_n_exp_{}_hetero_{}_d_{}_d_x_"
"{}_p_{}_d_t_{}_cov_type_{}_alpha_{}").format(
n, n_exp, hetero_coef, d, d_x, p, d_t, cov_type, alpha)
_append_coverage(key, coverage_est, est, X_test,
alpha, true_coef, true_effect)
_append_coverage(key, coverage_lr, lr, X_test,
alpha, true_coef, true_effect)
if it == n_exp - 1:
n_tests += 1
# columns: prices interacted with products (and constant), features
results = np.empty((n_weeks * block_size, (1 + n_products) + block_features.shape[1]))
# observe n_products * n_stores prices, same number of quantities
for w in range(n_weeks):
prices = gammas + np.random.normal(size=gammas.shape)
quantities[w * block_size: (w + 1) * block_size] = alphas_tiled * prices + \
betas + np.random.normal(size=betas.size)
results[w * block_size: (w + 1) * block_size, 0:1 + n_products] = prices.reshape((-1, 1)) * \
np.concatenate((np.ones((block_features.shape[0], 1)), block_features[:, :n_products]), axis=1)
results[w * block_size: (w + 1) * block_size, 1 + n_products:] = block_features
lassos.append(LassoCV().fit(results, quantities).coef_[0:n_products + 1])
ridges.append(RidgeCV().fit(results, quantities).coef_[0:n_products + 1])
# use features starting at index 1+n_products to skip all prices
doubleMls.append(econml.dml.LinearDMLCateEstimator(model_final=RidgeCV()).fit(results, quantities).coef_)
alphass.append(alphas)
pickleFile = open('pickledSparse_{0}_{1}_{2}_{3}.pickle'.format(n_exp, n_products, n_stores, n_weeks), 'wb')
pickle.dump((alphass, ridges, lassos, doubleMls), pickleFile)
pickleFile.close()
n_sum = np.concatenate((n_sum_first, n_sum_sec))
var_sum = np.concatenate((var_first, var_sec))
first_half_sum = len(y_sum_first)
first_half = len(y1)
for cov_type in cov_type_list:
class SplitterSum:
def __init__(self):
return
def split(self, X, T):
return [(np.arange(0, first_half_sum),
np.arange(first_half_sum, X.shape[0])),
(np.arange(first_half_sum, X.shape[0]),
np.arange(0, first_half_sum))]
est = LinearDMLCateEstimator(model_y=first_stage(),
model_t=first_stage(),
n_splits=SplitterSum(),
linear_first_stages=False,
discrete_treatment=False)
est.fit(y_sum,
X_final[:, -d_t:],
X_final[:, :d_x],
X_final[:, d_x:-d_t],
sample_weight=n_sum,
sample_var=var_sum,
inference=StatsModelsInference(cov_type=cov_type))
class Splitter:
def __init__(self):
return
class Splitter:
def __init__(self):
return
def split(self, X, T):
return [(np.arange(0, first_half), np.arange(first_half, X.shape[0])),
(np.arange(first_half, X.shape[0]), np.arange(0, first_half))]
lr = LinearDMLCateEstimator(model_y=first_stage(),
model_t=first_stage(),
n_splits=Splitter(),
linear_first_stages=False,
discrete_treatment=False)
lr.fit(y, X[:, -d_t:], X[:, :d_x], X[:, d_x:-d_t],
inference=StatsModelsInference(cov_type=cov_type))
for alpha in alpha_list:
key = ("n_{}_n_exp_{}_hetero_{}_d_{}_d_x_"
"{}_p_{}_d_t_{}_cov_type_{}_alpha_{}").format(
n, n_exp, hetero_coef, d, d_x, p, d_t, cov_type, alpha)
_append_coverage(key, coverage_est, est, X_test,
alpha, true_coef, true_effect)
_append_coverage(key, coverage_lr, lr, X_test,
alpha, true_coef, true_effect)
if it == n_exp - 1:
n_tests += 1
mean_coef_cov = np.mean(coverage_est[key]['coef_cov'])
mean_eff_cov = np.mean(coverage_est[key]['effect_cov'])
mean_coef_cov_lr = np.mean(coverage_lr[key]['coef_cov'])
mean_eff_cov_lr = np.mean(coverage_lr[key]['effect_cov'])
[print("{}. Time: {:.2f}, Mean Coef Cov: ({:.4f}, {:.4f}), "
"Mean Effect Cov: ({:.4f}, {:.4f})".format(key,
np.arange(first_half_sum, X.shape[0])),
(np.arange(first_half_sum, X.shape[0]),
np.arange(0, first_half_sum))]
est = LinearDMLCateEstimator(model_y=first_stage(),
model_t=first_stage(),
n_splits=SplitterSum(),
linear_first_stages=False,
discrete_treatment=False)
est.fit(y_sum,
X_final[:, -d_t:],
X_final[:, :d_x],
X_final[:, d_x:-d_t],
sample_weight=n_sum,
sample_var=var_sum,
inference=StatsModelsInference(cov_type=cov_type))
class Splitter:
def __init__(self):
return
def split(self, X, T):
return [(np.arange(0, first_half), np.arange(first_half, X.shape[0])),
(np.arange(first_half, X.shape[0]), np.arange(0, first_half))]
lr = LinearDMLCateEstimator(model_y=first_stage(),
model_t=first_stage(),
n_splits=Splitter(),
linear_first_stages=False,
discrete_treatment=False)
lr.fit(y, X[:, -d_t:], X[:, :d_x], X[:, d_x:-d_t],
inference=StatsModelsInference(cov_type=cov_type))
X = np.random.binomial(1, .8, size=(n, d))
T = np.hstack([np.random.binomial(1, .5 * X[:, 0] + .25,
size=(n,)).reshape(-1, 1) for _ in range(d_t)])
true_coef = np.hstack([np.hstack([it + np.arange(p).reshape(-1, 1),
it + np.ones((p, 1)), np.zeros((p, d_x - 1))])
for it in range(d_t)])
def true_effect(x, t):
return cross_product(
np.hstack([np.ones((x.shape[0], 1)), x[:, :d_x]]), t) @ true_coef.T
y = true_effect(X, T) + X[:, [0] * p] +\
(hetero_coef * X[:, [0]] + 1) * np.random.normal(0, 1, size=(n, p))
XT = np.hstack([X, T])
X1, X2, y1, y2, X_final_first, X_final_sec, y_sum_first, y_sum_sec,\
n_sum_first, n_sum_sec, var_first, var_sec = _summarize(XT, y)
X = np.vstack([X1, X2])
y = np.concatenate((y1, y2))
X_final = np.vstack([X_final_first, X_final_sec])
y_sum = np.concatenate((y_sum_first, y_sum_sec))
n_sum = np.concatenate((n_sum_first, n_sum_sec))
var_sum = np.concatenate((var_first, var_sec))
first_half_sum = len(y_sum_first)
first_half = len(y1)
for cov_type in cov_type_list:
class SplitterSum:
def __init__(self):
return
def split(self, X, T):
return [(np.arange(0, first_half_sum),
np.arange(first_half_sum, X.shape[0])),
beta[beta_support] = np.random.normal(size=len(beta_support))
beta = beta / np.linalg.norm(beta)
# DGP. Create samples of data (y, T, X) from known truth
y, T, X, _ = econml.dgp.dgp_data_multiple_treatments(
n_samples, n_cov, n_treatments, alpha, beta, effect)
# DML Estimation.
dml_reg = econml.dml.DML(np.arange(X.shape[1]), [], np.arange(X.shape[1], X.shape[1] + T.shape[1]),
model_y=internal_reg_y,
model_t=internal_reg_t,
model_f=internal_reg_f,
model_c=internal_reg_c)
dml_reg.fit(np.concatenate((X, T), axis=1), y)
y_test, T_test, X_test = econml.dgp.dgp_counterfactual_data_multiple_treatments(
n_samples, n_cov, beta, effect, 5. * np.ones(n_treatments))
dml_r2score.append(dml_reg.score(np.concatenate((X_test, T_test), axis=1), y_test))
dml_te.append(dml_reg.effect(np.zeros((1, 1)), np.ones((1, 1)), np.zeros((1, 0))))
# Estimation with other methods for comparison
direct_reg1.fit(np.concatenate((X, T), axis=1), y)
direct_r2score1.append(direct_reg1.score(np.concatenate((X_test, T_test), axis=1), y_test))
direct_te1.append(direct_reg1.coef_[X.shape[1]])
direct_reg2.fit(np.concatenate((X, T), axis=1), y)
direct_r2score2.append(direct_reg2.score(np.concatenate((X_test, T_test), axis=1), y_test))
direct_te2.append(direct_reg2.coef_[X.shape[1]])
dml_r2scores.append(dml_r2score)
direct_r2scores1.append(direct_r2score1)
direct_r2scores2.append(direct_r2score2)
# Sparse coefficients of treatment as a function of co-variates
alpha_sparsity = sparsity
alpha_support = np.random.choice(n_cov, alpha_sparsity, replace=False)
alpha = np.zeros(n_cov)
alpha[alpha_support] = np.random.normal(size=len(alpha_support))
alpha = alpha.reshape((-1, 1))
alpha = alpha / np.linalg.norm(alpha)
# Coefficients of outcomes as a function of co-variates
beta_sparsity = sparsity
beta_support = np.random.choice(n_cov, beta_sparsity, replace=False)
beta = np.zeros(n_cov)
beta[beta_support] = np.random.normal(size=len(beta_support))
beta = beta / np.linalg.norm(beta)
# DGP. Create samples of data (y, T, X) from known truth
y, T, X, _ = econml.dgp.dgp_data_multiple_treatments(
n_samples, n_cov, n_treatments, alpha, beta, effect)
# DML Estimation.
dml_reg = econml.dml.DML(np.arange(X.shape[1]), [], np.arange(X.shape[1], X.shape[1] + T.shape[1]),
model_y=internal_reg_y,
model_t=internal_reg_t,
model_f=internal_reg_f,
model_c=internal_reg_c)
dml_reg.fit(np.concatenate((X, T), axis=1), y)
y_test, T_test, X_test = econml.dgp.dgp_counterfactual_data_multiple_treatments(
n_samples, n_cov, beta, effect, 5. * np.ones(n_treatments))
dml_r2score.append(dml_reg.score(np.concatenate((X_test, T_test), axis=1), y_test))
dml_te.append(dml_reg.effect(np.zeros((1, 1)), np.ones((1, 1)), np.zeros((1, 0))))
# Estimation with other methods for comparison
alpha[alpha_support] = np.random.normal(size=len(alpha_support))
alpha = alpha.reshape((-1, 1))
alpha = alpha / np.linalg.norm(alpha)
# Coefficients of outcomes as a function of co-variates
beta_sparsity = sparsity
beta_support = np.random.choice(n_cov, beta_sparsity, replace=False)
beta = np.zeros(n_cov)
beta[beta_support] = np.random.normal(size=len(beta_support))
beta = beta / np.linalg.norm(beta)
# DGP. Create samples of data (y, T, X) from known truth
y, T, X, _ = econml.dgp.dgp_data_multiple_treatments(
n_samples, n_cov, n_treatments, alpha, beta, effect)
# DML Estimation.
dml_reg = econml.dml.DML(np.arange(X.shape[1]), [], np.arange(X.shape[1], X.shape[1] + T.shape[1]),
model_y=internal_reg_y,
model_t=internal_reg_t,
model_f=internal_reg_f,
model_c=internal_reg_c)
dml_reg.fit(np.concatenate((X, T), axis=1), y)
y_test, T_test, X_test = econml.dgp.dgp_counterfactual_data_multiple_treatments(
n_samples, n_cov, beta, effect, 5. * np.ones(n_treatments))
dml_r2score.append(dml_reg.score(np.concatenate((X_test, T_test), axis=1), y_test))
dml_te.append(dml_reg.effect(np.zeros((1, 1)), np.ones((1, 1)), np.zeros((1, 0))))
# Estimation with other methods for comparison
direct_reg1.fit(np.concatenate((X, T), axis=1), y)
direct_r2score1.append(direct_reg1.score(np.concatenate((X_test, T_test), axis=1), y_test))
direct_te1.append(direct_reg1.coef_[X.shape[1]])