Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
try:
df = pd.concat([df, pd.read_csv(csvfile)])
except FileNotFoundError:
pass
if len(df['pivot']) > 0:
print("selective:", np.mean(df['pivot']), np.std(df['pivot']), np.mean(df['length']), np.std(df['length']), np.mean(df['coverage']))
print("naive:", np.mean(df['naive_pivot']), np.std(df['naive_pivot']), np.mean(df['naive_length']), np.std(df['naive_length']), np.mean(df['naive_coverage']))
print("len ratio selective divided by naive:", np.mean(np.array(df['length']) / np.array(df['naive_length'])))
plt.clf()
U = np.linspace(0, 1, 101)
plt.plot(U, sm.distributions.ECDF(df['pivot'])(U), 'r', label='Selective', linewidth=3)
plt.plot(U, sm.distributions.ECDF(df['naive_pivot'])(U), 'b', label='Naive', linewidth=3)
plt.legend()
plt.plot([0,1], [0,1], 'k--', linewidth=2)
plt.savefig(csvfile[:-4] + '.pdf')
plt.clf()
plt.scatter(df['naive_length'], df['length'])
plt.savefig(csvfile[:-4] + '_lengths.pdf')
df.to_csv(csvfile, index=False)
def plotGraph(args, obs, data, sizeX=1, sizeY=1, dpi=80):
fig = plt.figure(figsize=(sizeX, sizeY), dpi=dpi, tight_layout=True)
ax = fig.add_subplot(111)
if args.plottype == PLOT_TYPE_STD or \
args.plottype == PLOT_TYPE_LOGY:
x = obs.index
elif args.plottype == PLOT_TYPE_CDF:
x = np.linspace(min_x, max_x, num=len(obs) )
# Plot observed values
# Standard or log plot
obs_y = obs
if args.plottype == PLOT_TYPE_CDF:
obs_ecdf = sm.distributions.ECDF(obs)
obs_y = obs_ecdf(x)
obs_plt = None
if not args.supressObs:
(obs_plt,) = ax.plot(x, obs_y, linewidth=2.0, color='black')
# Plot modeled values
data_plt = []
for (i, d) in enumerate(data):
# Standard or log plot
mod_y = d
if args.plottype == PLOT_TYPE_CDF:
mod_ecdf = sm.distributions.ECDF(d)
mod_y = mod_ecdf(x)
# Plot (we could move this outside of the for loop)
if args.linewidth:
df = simulate(B=40000)
csvfile = 'keras_targets_BH_weak.csv'
try:
df = pd.concat([df, pd.read_csv(csvfile)])
except FileNotFoundError:
pass
if df is not None and len(df['pivot']) > 0:
print(df['pivot'], 'pivot')
plt.clf()
U = np.linspace(0, 1, 101)
plt.plot(U, sm.distributions.ECDF(df['naive_pivot'])(U), 'b', label='Naive', linewidth=3)
for b in np.unique(df['batch_size']):
plt.plot(U, sm.distributions.ECDF(np.array(df['pivot'])[np.array(df['batch_size']) == b])(U), label='B=%d' % b, linewidth=3)
plt.legend()
plt.plot([0,1], [0,1], 'k--', linewidth=2)
plt.savefig(csvfile[:-4] + '.pdf')
df.to_csv(csvfile, index=False)
df = pd.concat([df, pd.read_csv(csvfile)])
except FileNotFoundError:
pass
if len(df['pivot']) > 0:
print("selective:", np.mean(df['pivot']), np.std(df['pivot']), np.mean(df['length']), np.std(df['length']), np.mean(df['coverage']))
print("lee:", np.mean(df['lee_pivot']), np.std(df['lee_pivot']), np.mean(df['lee_length']), np.std(df['lee_length']), np.mean(df['lee_coverage']))
print("naive:", np.mean(df['naive_pivot']), np.std(df['naive_pivot']), np.mean(df['naive_length']), np.std(df['naive_length']), np.mean(df['naive_coverage']))
print("len ratio selective divided by naive:", np.mean(np.array(df['length']) / np.array(df['naive_length'])))
print("len ratio selective divided by lee:", np.mean(np.array(df['length']) / np.array(df['lee_length'])))
plt.clf()
U = np.linspace(0, 1, 101)
plt.plot(U, sm.distributions.ECDF(df['pivot'])(U), 'r', label='Selective', linewidth=3)
plt.plot(U, sm.distributions.ECDF(df['naive_pivot'])(U), 'b', label='Naive', linewidth=3)
plt.plot(U, sm.distributions.ECDF(df['lee_pivot'][~np.isnan(df['lee_pivot'])])(U), 'g', label='Lee', linewidth=3)
plt.legend()
plt.plot([0,1], [0,1], 'k--', linewidth=2)
plt.savefig(csvfile[:-4] + '.pdf')
plt.clf()
plt.scatter(df['naive_length'], df['length'])
plt.scatter(df['naive_length'], df['lee_length'])
plt.savefig(csvfile[:-4] + '_lengths.pdf')
df.to_csv(csvfile, index=False)
if df is not None and i % 2 == 1 and i > 0:
try:
df = pd.concat([df, pd.read_csv(csvfile)])
except FileNotFoundError:
pass
if len(df['pivot']) > 0:
print("selective:", np.mean(df['pivot']), np.std(df['pivot']), np.mean(df['length']), np.std(df['length']), np.mean(df['coverage']))
plt.clf()
U = np.linspace(0, 1, 101)
plt.plot(U, sm.distributions.ECDF(df['pivot'])(U), 'r', label='Selective', linewidth=3)
plt.plot(U, sm.distributions.ECDF(df['naive_pivot'])(U), 'b', label='Naive', linewidth=3)
plt.legend()
plt.plot([0,1], [0,1], 'k--', linewidth=2)
plt.savefig(csvfile[:-4] + '.pdf')
plt.clf()
plt.scatter(df['naive_length'], df['length'])
plt.savefig(csvfile[:-4] + '_lengths.pdf')
df.to_csv(csvfile, index=False)
def ecdf(data, logtransform = True):
if logtransform:
edges = 10**np.histogram_bin_edges(np.log10(data), bins='auto')
else:
edges = np.histogram_bin_edges(data, bins='auto')
cdf = sm.distributions.ECDF(data)
return (edges, cdf(edges))
'length':l,
'naive_length':naive_l})
try:
df = pd.concat([df, pd.read_csv(csvfile)])
except FileNotFoundError:
pass
if len(df['pivot']) > 0:
print("selective:", np.mean(df['pivot']), np.std(df['pivot']), np.mean(df['length']), np.std(df['length']), np.mean(df['coverage']))
print("naive:", np.mean(df['naive_pivot']), np.std(df['naive_pivot']), np.mean(df['naive_length']), np.std(df['naive_length']), np.mean(df['naive_coverage']))
print("len ratio selective divided by naive:", np.mean(np.array(df['length']) / np.array(df['naive_length'])))
plt.clf()
U = np.linspace(0, 1, 101)
plt.plot(U, sm.distributions.ECDF(df['pivot'])(U), 'r', label='Selective', linewidth=3)
plt.plot(U, sm.distributions.ECDF(df['naive_pivot'])(U), 'b', label='Naive', linewidth=3)
plt.legend()
plt.plot([0,1], [0,1], 'k--', linewidth=2)
plt.savefig('lasso_example_variables_exact_nonnull.pdf')
plt.clf()
plt.scatter(df['naive_length'], df['length'])
plt.savefig('lasso_example_variables_lengths_nonnull.pdf')
df.to_csv(csvfile, index=False)
plot_pvalues = fig.add_subplot(121)
plot_pvalues1 = fig.add_subplot(122)
true_pvalues_all = np.asarray(true_pvalues_all, dtype=np.float32)
ecdf = sm.distributions.ECDF(true_pvalues_all)
x = np.linspace(min(true_pvalues_all), max(true_pvalues_all))
y = ecdf(x)
plot_pvalues.plot(x, y, '-o', lw=2)
plot_pvalues.plot([0, 1], [0, 1], 'k-', lw=2)
plot_pvalues.set_title("P values at the truth")
plot_pvalues.set_xlim([0,1])
plot_pvalues.set_ylim([0,1])
mle_pvalues_all = np.asarray(mle_pvalues_all, dtype=np.float32)
ecdf = sm.distributions.ECDF(mle_pvalues_all)
x = np.linspace(min(mle_pvalues_all), max(mle_pvalues_all))
y = ecdf(x)
plot_pvalues1.plot(x, y, '-o', lw=2)
plot_pvalues1.plot([0, 1], [0, 1], 'k-', lw=2)
plot_pvalues1.set_title("P values at the MLE")
plot_pvalues1.set_xlim([0,1])
plot_pvalues1.set_ylim([0,1])
plt.show()
plt.savefig("P values.png")
df = simulate(B=10000)
csvfile = 'keras_targets_medium.csv'
try:
df = pd.concat([df, pd.read_csv(csvfile)])
except FileNotFoundError:
pass
if df is not None and len(df['pivot']) > 0:
print(df['pivot'], 'pivot')
plt.clf()
U = np.linspace(0, 1, 101)
plt.plot(U, sm.distributions.ECDF(df['naive_pivot'])(U), 'b', label='Naive', linewidth=3)
for b in np.unique(df['batch_size']):
plt.plot(U, sm.distributions.ECDF(np.array(df['pivot'])[np.array(df['batch_size']) == b])(U), label='B=%d' % b, linewidth=3)
plt.legend()
plt.plot([0,1], [0,1], 'k--', linewidth=2)
plt.savefig(csvfile[:-4] + '.pdf')
df.to_csv(csvfile, index=False)
pass
if len(df['pivot']) > 0:
print("selective:", np.mean(df['pivot']), np.std(df['pivot']), np.mean(df['length']), np.std(df['length']), np.mean(df['coverage']))
print("liu:", np.mean(df['liu_pivot']), np.std(df['liu_pivot']), np.mean(df['liu_length']), np.std(df['liu_length']), np.mean(df['liu_coverage']))
print("naive:", np.mean(df['naive_pivot']), np.std(df['naive_pivot']), np.mean(df['naive_length']), np.std(df['naive_length']), np.mean(df['naive_coverage']))
print("len ratio selective divided by naive:", np.mean(np.array(df['length']) / np.array(df['naive_length'])))
print("len ratio selective divided by liu:", np.mean(np.array(df['length']) / np.array(df['liu_length'])))
plt.clf()
U = np.linspace(0, 1, 101)
plt.plot(U, sm.distributions.ECDF(df['pivot'])(U), 'r', label='Selective', linewidth=3)
plt.plot(U, sm.distributions.ECDF(df['naive_pivot'])(U), 'b', label='Naive', linewidth=3)
plt.plot(U, sm.distributions.ECDF(df['liu_pivot'][~np.isnan(df['liu_pivot'])])(U), 'g', label='Liu', linewidth=3)
plt.legend()
plt.plot([0,1], [0,1], 'k--', linewidth=2)
plt.savefig(csvfile[:-4] + '.pdf')
plt.clf()
plt.scatter(df['naive_length'], df['length'])
plt.scatter(df['naive_length'], df['liu_length'])
plt.savefig(csvfile[:-4] + '_lengths.pdf')
df.to_csv(csvfile, index=False)