How to use the statsmodels.api.distributions.ECDF function in statsmodels

To help you get started, we’ve selected a few statsmodels examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github selective-inference / Python-software / examples / plots.py View on Github external
dfs[label[1]]['naive_coverage'],
                 dfs[label[1]]['naive_pval'],
                 dfs[label[1]]['naive_length'])
    
    print("selective:", np.mean(P), np.std(P), np.mean(L), np.mean(coverage))
    print("naive:", np.mean(naive_P), np.std(naive_P), np.mean(naive_L), np.mean(naive_coverage))
    print("len ratio selective divided by naive:", np.mean(np.array(L) / np.array(naive_L)))


probit_P, naive_P = dfs['probit']['pval'], dfs['probit']['naive_pval']
logit_P = dfs['logit']['pval']


plt.clf()
plt.plot(U, sm.distributions.ECDF(probit_P)(U), 'c', linewidth=3, label = "fit probit")
plt.plot(U, sm.distributions.ECDF(logit_P)(U), 'b', linewidth=3, label="fit logit")
plt.plot(U, sm.distributions.ECDF(naive_P)(U), 'y', linewidth=3, label="naive")
plt.plot([0, 1], [0, 1], 'k--', linewidth=2)
plt.xlabel("Observed pivot", fontsize=18)
plt.ylabel("Proportion (empirical CDF)", fontsize=18)
plt.title("Pivots", fontsize=20)
plt.legend(fontsize=18, loc="lower right")
plt.savefig('cv_pivots.pdf')
github selective-inference / Python-software / selectinf / learning / utils.py View on Github external
'Naive': 'r',
                                  'Bonferroni': 'gray',
                                  'Lee':'gray',
                                  'Strawman':'gray'},
                       figsize=(8, 8), straw=False):

        f = plt.figure(figsize=figsize)
        new_df = pd.DataFrame({'Learned': df['pivot'],
                               'Naive': df['naive_pivot']})
        if straw:
            new_df = pd.DataFrame({'Learned': new_df['Learned'],
                                   'Strawman': new_df['Naive']})
        U = np.linspace(0, 1, 101)
        ax = f.gca()
        for k in new_df.keys():
            plt.plot(U, sm.distributions.ECDF(new_df[k])(U), color=palette[k], label=k, linewidth=5)
        plt.plot([0,1], [0,1], 'k--', linewidth=3)
        ax.set_xlabel('pivot', fontsize=20)
        ax.set_ylabel('ECDF(pivot)', fontsize=20)
        ax.legend(fontsize=15)

        pngfile = outbase + '_pivot.png'
        plt.savefig(pngfile, dpi=300)

        return ax, f, pngfile, df, new_df
github selective-inference / Python-software / examples / full_model_example.py View on Github external
import statsmodels.api as sm
    import matplotlib.pyplot as plt
    
    n = 100
    U = np.linspace(0, 1, 101)
    P, L, coverage = [], [], []
    plt.clf()
    for i in range(300):
        p, cover, l = simulate(n=n)
        coverage.extend(cover)
        P.extend(p)
        L.extend(l)
        print(np.mean(P), np.std(P), np.mean(L) / (2 * 1.65 / np.sqrt(n)), np.mean(coverage))

    plt.clf()
    plt.plot(U, sm.distributions.ECDF(P)(U), 'r', linewidth=3)
    plt.plot([0,1], [0,1], 'k--', linewidth=2)
    plt.show()
github selimnairb / RHESSysWorkflows / bin / RHESSysPlotMassbalance.py View on Github external
# Standard or log plot
    obs_y = obs[OBS_HEADER_STREAMFLOW]
    if plottype == PLOT_TYPE_CDF:
        obs_ecdf = sm.distributions.ECDF(obs_y)
        obs_y = obs_ecdf(x)
    obs_plt = None
    if not args.supressObs:
        (obs_plt,) = ax.plot(x, obs_y, obs_color, linewidth=2)
        
    # Plot modeled values
    data_plt = []
    for c in columns:
        # Standard or log plot
        mod_y = data[c]
        if plottype == PLOT_TYPE_CDF:
            mod_ecdf = sm.distributions.ECDF(data[c])
            mod_y = mod_ecdf(x)
        (mod_plt,) = ax.plot(x, mod_y, color=mod_color, linewidth=1)
        data_plt.append(mod_plt)
    
    # X-axis
    if plottype == PLOT_TYPE_STD or \
       plottype == PLOT_TYPE_LOGY:
        num_years = len(x) / 365
        if num_years > 2:
            if num_years > 5:
                ax.xaxis.set_major_locator(matplotlib.dates.YearLocator())
            else:
                ax.xaxis.set_major_locator(matplotlib.dates.MonthLocator(interval=3))
        else:
            ax.xaxis.set_major_locator(matplotlib.dates.MonthLocator())
        ax.xaxis.set_major_formatter(matplotlib.dates.DateFormatter('%b-%Y'))
github selective-inference / Python-software / examples / HIV / CV.py View on Github external
csvfile = 'HIV_CV.csv'
        outbase = csvfile[:-4]

        if df is not None and i > 0:

            try:
                df = pd.concat([df, pd.read_csv(csvfile)])
            except FileNotFoundError:
                pass
            df.to_csv(csvfile, index=False)

            if len(df['pivot']) > 0:
                pivot_ax, lengths_ax = pivot_plot(df, outbase)
                liu_pivot = df['liu_pivot']
                liu_pivot = liu_pivot[~np.isnan(liu_pivot)]
                pivot_ax.plot(U, sm.distributions.ECDF(liu_pivot)(U), 'gray', label='Liu CV',
                              linewidth=3)
                pivot_ax.legend()
                fig = pivot_ax.figure
                fig.savefig(csvfile[:-4] + '.pdf')
github selective-inference / Python-software / examples / keras / keras_targets.py View on Github external
df = simulate(B=10000)
        csvfile = 'keras_targets.csv'

        try:
            df = pd.concat([df, pd.read_csv(csvfile)])
        except FileNotFoundError:
            pass

        if df is not None and len(df['pivot']) > 0:

            print(df['pivot'], 'pivot')
            plt.clf()
            U = np.linspace(0, 1, 101)
            plt.plot(U, sm.distributions.ECDF(df['naive_pivot'])(U), 'b', label='Naive', linewidth=3)
            for b in np.unique(df['batch_size']):
                plt.plot(U, sm.distributions.ECDF(np.array(df['pivot'])[np.array(df['batch_size']) == b])(U), label='B=%d' % b, linewidth=3)

            plt.legend()
            plt.plot([0,1], [0,1], 'k--', linewidth=2)
            plt.savefig(csvfile[:-4] + '.pdf')

            df.to_csv(csvfile, index=False)
github selective-inference / Python-software / doc / learning_examples / standalone / replicate_basic_example.py View on Github external
print(np.mean(P), np.std(P), coverage / (i+1))

    plt.gca().set_ylim([-5,0])
    plt.show()

    coverage = 0
    L = []
    for i in range(100):
        p, cover, l = simulate()
        L.append(l)
        coverage += cover
        P.append(p)
        print(np.mean(P), np.std(P), np.mean(L) / (2 * 1.65 / np.sqrt(n)), coverage / (i+1))

    plt.clf()
    plt.plot(U, sm.distributions.ECDF(P)(U), 'r', linewidth=3)
    plt.plot([0,1], [0,1], 'k--', linewidth=2)
    plt.show()
github selective-inference / Python-software / examples / multi_target / gbm_targets.py View on Github external
csvfile = 'gbm_targets.csv'

            if i % 2 == 1 and i > 0:

                try:
                    df = pd.concat([df, pd.read_csv(csvfile)])
                except FileNotFoundError:
                    pass

                if len(df['pivot']) > 0:

                    plt.clf()
                    U = np.linspace(0, 1, 101)
                    plt.plot(U, sm.distributions.ECDF(df['naive_pivot'])(U), 'b', label='Naive', linewidth=3)
                    for b in np.unique(df['batch_size']):
                        plt.plot(U, sm.distributions.ECDF(np.array(df['pivot'])[np.array(df['batch_size']) == b])(U), label='B=%d' % b, linewidth=3)

                    plt.legend()
                    plt.plot([0,1], [0,1], 'k--', linewidth=2)
                    plt.savefig(csvfile[:-4] + '.pdf')

                df.to_csv(csvfile, index=False)
github selective-inference / Python-software / examples / keras / keras_targets_BH_strong.py View on Github external
for i in range(500):
        df = simulate(B=20000)
        csvfile = 'keras_targets_BH_strong.csv'

        try:
            df = pd.concat([df, pd.read_csv(csvfile)])
        except FileNotFoundError:
            pass

        if df is not None and len(df['pivot']) > 0:

            print(df['pivot'], 'pivot')
            plt.clf()
            U = np.linspace(0, 1, 101)
            plt.plot(U, sm.distributions.ECDF(df['naive_pivot'])(U), 'b', label='Naive', linewidth=3)
            for b in np.unique(df['batch_size']):
                plt.plot(U, sm.distributions.ECDF(np.array(df['pivot'])[np.array(df['batch_size']) == b])(U), label='B=%d' % b, linewidth=3)

            plt.legend()
            plt.plot([0,1], [0,1], 'k--', linewidth=2)
            plt.savefig(csvfile[:-4] + '.pdf')

            df.to_csv(csvfile, index=False)