How to use econml - 10 common examples

To help you get started, we’ve selected a few econml examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github microsoft / EconML / econml / test_integration.py View on Github external
# for now, require one feature per store/product combination
            # TODO: would be nice to relax this somehow
            assert X.shape[0] == Y.shape[0]
            n_weeks = Y.shape[0]
            Y = np.reshape(Y, (-1, 1))  # flatten prices into 1-d list
            self._model.fit(self._reshape(X), Y)

        def predict(self, X):
            predicted = self._model.predict(self._reshape(X))
            return np.reshape(predicted, (-1, block_size))

        @property
        def coef_(self):
            return self._model.coef_

    return econml.dml.LinearDMLCateEstimator(
        model_t=GroupRegression(t_model, constant_features=[], constant_controls=constant_controls,
                                compute_gp_avgs=False, is_first_stage=True),
        model_y=GroupRegression(y_model, constant_features=features, constant_controls=constant_controls,
                                compute_gp_avgs=True, is_first_stage=True),
        model_final=GroupRegression(f_model, effect_features=features, compute_gp_avgs=True, is_first_stage=False)
    ).fit(X, Y).coef_
github microsoft / EconML / monte_carlo_tests / monte_carlo_statsmodels.py View on Github external
X_final[:, -d_t:],
                                            X_final[:, :d_x],
                                            X_final[:, d_x:-d_t],
                                            sample_weight=n_sum,
                                            sample_var=var_sum,
                                            inference=StatsModelsInference(cov_type=cov_type))

                                    class Splitter:
                                        def __init__(self):
                                            return

                                        def split(self, X, T):
                                            return [(np.arange(0, first_half), np.arange(first_half, X.shape[0])),
                                                    (np.arange(first_half, X.shape[0]), np.arange(0, first_half))]

                                    lr = LinearDMLCateEstimator(model_y=first_stage(),
                                                                model_t=first_stage(),
                                                                n_splits=Splitter(),
                                                                linear_first_stages=False,
                                                                discrete_treatment=False)
                                    lr.fit(y, X[:, -d_t:], X[:, :d_x], X[:, d_x:-d_t],
                                           inference=StatsModelsInference(cov_type=cov_type))
                                    for alpha in alpha_list:
                                        key = ("n_{}_n_exp_{}_hetero_{}_d_{}_d_x_"
                                               "{}_p_{}_d_t_{}_cov_type_{}_alpha_{}").format(
                                            n, n_exp, hetero_coef, d, d_x, p, d_t, cov_type, alpha)
                                        _append_coverage(key, coverage_est, est, X_test,
                                                         alpha, true_coef, true_effect)
                                        _append_coverage(key, coverage_lr, lr, X_test,
                                                         alpha, true_coef, true_effect)
                                        if it == n_exp - 1:
                                            n_tests += 1
github microsoft / EconML / econml / test_integration.py View on Github external
# columns: prices interacted with products (and constant), features
        results = np.empty((n_weeks * block_size, (1 + n_products) + block_features.shape[1]))

        # observe n_products * n_stores prices, same number of quantities
        for w in range(n_weeks):
            prices = gammas + np.random.normal(size=gammas.shape)
            quantities[w * block_size: (w + 1) * block_size] = alphas_tiled * prices + \
                betas + np.random.normal(size=betas.size)
            results[w * block_size: (w + 1) * block_size, 0:1 + n_products] = prices.reshape((-1, 1)) * \
                np.concatenate((np.ones((block_features.shape[0], 1)), block_features[:, :n_products]), axis=1)
            results[w * block_size: (w + 1) * block_size, 1 + n_products:] = block_features

        lassos.append(LassoCV().fit(results, quantities).coef_[0:n_products + 1])
        ridges.append(RidgeCV().fit(results, quantities).coef_[0:n_products + 1])
        # use features starting at index 1+n_products to skip all prices
        doubleMls.append(econml.dml.LinearDMLCateEstimator(model_final=RidgeCV()).fit(results, quantities).coef_)
        alphass.append(alphas)

    pickleFile = open('pickledSparse_{0}_{1}_{2}_{3}.pickle'.format(n_exp, n_products, n_stores, n_weeks), 'wb')
    pickle.dump((alphass, ridges, lassos, doubleMls), pickleFile)
    pickleFile.close()
github microsoft / EconML / monte_carlo_tests / monte_carlo_statsmodels.py View on Github external
n_sum = np.concatenate((n_sum_first, n_sum_sec))
                                var_sum = np.concatenate((var_first, var_sec))
                                first_half_sum = len(y_sum_first)
                                first_half = len(y1)
                                for cov_type in cov_type_list:
                                    class SplitterSum:
                                        def __init__(self):
                                            return

                                        def split(self, X, T):
                                            return [(np.arange(0, first_half_sum),
                                                     np.arange(first_half_sum, X.shape[0])),
                                                    (np.arange(first_half_sum, X.shape[0]),
                                                     np.arange(0, first_half_sum))]

                                    est = LinearDMLCateEstimator(model_y=first_stage(),
                                                                 model_t=first_stage(),
                                                                 n_splits=SplitterSum(),
                                                                 linear_first_stages=False,
                                                                 discrete_treatment=False)
                                    est.fit(y_sum,
                                            X_final[:, -d_t:],
                                            X_final[:, :d_x],
                                            X_final[:, d_x:-d_t],
                                            sample_weight=n_sum,
                                            sample_var=var_sum,
                                            inference=StatsModelsInference(cov_type=cov_type))

                                    class Splitter:
                                        def __init__(self):
                                            return
github microsoft / EconML / monte_carlo_tests / monte_carlo_statsmodels.py View on Github external
class Splitter:
                                        def __init__(self):
                                            return

                                        def split(self, X, T):
                                            return [(np.arange(0, first_half), np.arange(first_half, X.shape[0])),
                                                    (np.arange(first_half, X.shape[0]), np.arange(0, first_half))]

                                    lr = LinearDMLCateEstimator(model_y=first_stage(),
                                                                model_t=first_stage(),
                                                                n_splits=Splitter(),
                                                                linear_first_stages=False,
                                                                discrete_treatment=False)
                                    lr.fit(y, X[:, -d_t:], X[:, :d_x], X[:, d_x:-d_t],
                                           inference=StatsModelsInference(cov_type=cov_type))
                                    for alpha in alpha_list:
                                        key = ("n_{}_n_exp_{}_hetero_{}_d_{}_d_x_"
                                               "{}_p_{}_d_t_{}_cov_type_{}_alpha_{}").format(
                                            n, n_exp, hetero_coef, d, d_x, p, d_t, cov_type, alpha)
                                        _append_coverage(key, coverage_est, est, X_test,
                                                         alpha, true_coef, true_effect)
                                        _append_coverage(key, coverage_lr, lr, X_test,
                                                         alpha, true_coef, true_effect)
                                        if it == n_exp - 1:
                                            n_tests += 1
                                            mean_coef_cov = np.mean(coverage_est[key]['coef_cov'])
                                            mean_eff_cov = np.mean(coverage_est[key]['effect_cov'])
                                            mean_coef_cov_lr = np.mean(coverage_lr[key]['coef_cov'])
                                            mean_eff_cov_lr = np.mean(coverage_lr[key]['effect_cov'])
                                            [print("{}. Time: {:.2f}, Mean Coef Cov: ({:.4f}, {:.4f}), "
                                                   "Mean Effect Cov: ({:.4f}, {:.4f})".format(key,
github microsoft / EconML / monte_carlo_tests / monte_carlo_statsmodels.py View on Github external
np.arange(first_half_sum, X.shape[0])),
                                                    (np.arange(first_half_sum, X.shape[0]),
                                                     np.arange(0, first_half_sum))]

                                    est = LinearDMLCateEstimator(model_y=first_stage(),
                                                                 model_t=first_stage(),
                                                                 n_splits=SplitterSum(),
                                                                 linear_first_stages=False,
                                                                 discrete_treatment=False)
                                    est.fit(y_sum,
                                            X_final[:, -d_t:],
                                            X_final[:, :d_x],
                                            X_final[:, d_x:-d_t],
                                            sample_weight=n_sum,
                                            sample_var=var_sum,
                                            inference=StatsModelsInference(cov_type=cov_type))

                                    class Splitter:
                                        def __init__(self):
                                            return

                                        def split(self, X, T):
                                            return [(np.arange(0, first_half), np.arange(first_half, X.shape[0])),
                                                    (np.arange(first_half, X.shape[0]), np.arange(0, first_half))]

                                    lr = LinearDMLCateEstimator(model_y=first_stage(),
                                                                model_t=first_stage(),
                                                                n_splits=Splitter(),
                                                                linear_first_stages=False,
                                                                discrete_treatment=False)
                                    lr.fit(y, X[:, -d_t:], X[:, :d_x], X[:, d_x:-d_t],
                                           inference=StatsModelsInference(cov_type=cov_type))
github microsoft / EconML / monte_carlo_tests / monte_carlo_statsmodels.py View on Github external
X = np.random.binomial(1, .8, size=(n, d))
                                T = np.hstack([np.random.binomial(1, .5 * X[:, 0] + .25,
                                                                  size=(n,)).reshape(-1, 1) for _ in range(d_t)])
                                true_coef = np.hstack([np.hstack([it + np.arange(p).reshape(-1, 1),
                                                                  it + np.ones((p, 1)), np.zeros((p, d_x - 1))])
                                                       for it in range(d_t)])

                                def true_effect(x, t):
                                    return cross_product(
                                        np.hstack([np.ones((x.shape[0], 1)), x[:, :d_x]]), t) @ true_coef.T
                                y = true_effect(X, T) + X[:, [0] * p] +\
                                    (hetero_coef * X[:, [0]] + 1) * np.random.normal(0, 1, size=(n, p))

                                XT = np.hstack([X, T])
                                X1, X2, y1, y2, X_final_first, X_final_sec, y_sum_first, y_sum_sec,\
                                    n_sum_first, n_sum_sec, var_first, var_sec = _summarize(XT, y)
                                X = np.vstack([X1, X2])
                                y = np.concatenate((y1, y2))
                                X_final = np.vstack([X_final_first, X_final_sec])
                                y_sum = np.concatenate((y_sum_first, y_sum_sec))
                                n_sum = np.concatenate((n_sum_first, n_sum_sec))
                                var_sum = np.concatenate((var_first, var_sec))
                                first_half_sum = len(y_sum_first)
                                first_half = len(y1)
                                for cov_type in cov_type_list:
                                    class SplitterSum:
                                        def __init__(self):
                                            return

                                        def split(self, X, T):
                                            return [(np.arange(0, first_half_sum),
                                                     np.arange(first_half_sum, X.shape[0])),
github microsoft / EconML / econml / test_integration.py View on Github external
beta[beta_support] = np.random.normal(size=len(beta_support))
            beta = beta / np.linalg.norm(beta)

            # DGP. Create samples of data (y, T, X) from known truth
            y, T, X, _ = econml.dgp.dgp_data_multiple_treatments(
                n_samples, n_cov, n_treatments, alpha, beta, effect)

            # DML Estimation.
            dml_reg = econml.dml.DML(np.arange(X.shape[1]), [], np.arange(X.shape[1], X.shape[1] + T.shape[1]),
                                     model_y=internal_reg_y,
                                     model_t=internal_reg_t,
                                     model_f=internal_reg_f,
                                     model_c=internal_reg_c)
            dml_reg.fit(np.concatenate((X, T), axis=1), y)

            y_test, T_test, X_test = econml.dgp.dgp_counterfactual_data_multiple_treatments(
                n_samples, n_cov, beta, effect, 5. * np.ones(n_treatments))
            dml_r2score.append(dml_reg.score(np.concatenate((X_test, T_test), axis=1), y_test))
            dml_te.append(dml_reg.effect(np.zeros((1, 1)), np.ones((1, 1)), np.zeros((1, 0))))

            # Estimation with other methods for comparison
            direct_reg1.fit(np.concatenate((X, T), axis=1), y)
            direct_r2score1.append(direct_reg1.score(np.concatenate((X_test, T_test), axis=1), y_test))
            direct_te1.append(direct_reg1.coef_[X.shape[1]])

            direct_reg2.fit(np.concatenate((X, T), axis=1), y)
            direct_r2score2.append(direct_reg2.score(np.concatenate((X_test, T_test), axis=1), y_test))
            direct_te2.append(direct_reg2.coef_[X.shape[1]])

        dml_r2scores.append(dml_r2score)
        direct_r2scores1.append(direct_r2score1)
        direct_r2scores2.append(direct_r2score2)
github microsoft / EconML / econml / test_integration.py View on Github external
# Sparse coefficients of treatment as a function of co-variates
            alpha_sparsity = sparsity
            alpha_support = np.random.choice(n_cov, alpha_sparsity, replace=False)
            alpha = np.zeros(n_cov)
            alpha[alpha_support] = np.random.normal(size=len(alpha_support))
            alpha = alpha.reshape((-1, 1))
            alpha = alpha / np.linalg.norm(alpha)
            # Coefficients of outcomes as a function of co-variates
            beta_sparsity = sparsity
            beta_support = np.random.choice(n_cov, beta_sparsity, replace=False)
            beta = np.zeros(n_cov)
            beta[beta_support] = np.random.normal(size=len(beta_support))
            beta = beta / np.linalg.norm(beta)

            # DGP. Create samples of data (y, T, X) from known truth
            y, T, X, _ = econml.dgp.dgp_data_multiple_treatments(
                n_samples, n_cov, n_treatments, alpha, beta, effect)

            # DML Estimation.
            dml_reg = econml.dml.DML(np.arange(X.shape[1]), [], np.arange(X.shape[1], X.shape[1] + T.shape[1]),
                                     model_y=internal_reg_y,
                                     model_t=internal_reg_t,
                                     model_f=internal_reg_f,
                                     model_c=internal_reg_c)
            dml_reg.fit(np.concatenate((X, T), axis=1), y)

            y_test, T_test, X_test = econml.dgp.dgp_counterfactual_data_multiple_treatments(
                n_samples, n_cov, beta, effect, 5. * np.ones(n_treatments))
            dml_r2score.append(dml_reg.score(np.concatenate((X_test, T_test), axis=1), y_test))
            dml_te.append(dml_reg.effect(np.zeros((1, 1)), np.ones((1, 1)), np.zeros((1, 0))))

            # Estimation with other methods for comparison
github microsoft / EconML / econml / test_integration.py View on Github external
alpha[alpha_support] = np.random.normal(size=len(alpha_support))
            alpha = alpha.reshape((-1, 1))
            alpha = alpha / np.linalg.norm(alpha)
            # Coefficients of outcomes as a function of co-variates
            beta_sparsity = sparsity
            beta_support = np.random.choice(n_cov, beta_sparsity, replace=False)
            beta = np.zeros(n_cov)
            beta[beta_support] = np.random.normal(size=len(beta_support))
            beta = beta / np.linalg.norm(beta)

            # DGP. Create samples of data (y, T, X) from known truth
            y, T, X, _ = econml.dgp.dgp_data_multiple_treatments(
                n_samples, n_cov, n_treatments, alpha, beta, effect)

            # DML Estimation.
            dml_reg = econml.dml.DML(np.arange(X.shape[1]), [], np.arange(X.shape[1], X.shape[1] + T.shape[1]),
                                     model_y=internal_reg_y,
                                     model_t=internal_reg_t,
                                     model_f=internal_reg_f,
                                     model_c=internal_reg_c)
            dml_reg.fit(np.concatenate((X, T), axis=1), y)

            y_test, T_test, X_test = econml.dgp.dgp_counterfactual_data_multiple_treatments(
                n_samples, n_cov, beta, effect, 5. * np.ones(n_treatments))
            dml_r2score.append(dml_reg.score(np.concatenate((X_test, T_test), axis=1), y_test))
            dml_te.append(dml_reg.effect(np.zeros((1, 1)), np.ones((1, 1)), np.zeros((1, 0))))

            # Estimation with other methods for comparison
            direct_reg1.fit(np.concatenate((X, T), axis=1), y)
            direct_r2score1.append(direct_reg1.score(np.concatenate((X_test, T_test), axis=1), y_test))
            direct_te1.append(direct_reg1.coef_[X.shape[1]])