Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# set_data_on_engines(client[:], train_data)
ll0 = parallel_compute_ll(client[:], x0, data['N'])
print "Training LL0: %f" % ll0
# Perform inference
x_inf = parallel_coord_descent(client, data['N'], x0=x0, maxiter=1,
use_hessian=False,
use_rop=False)
ll_train = parallel_compute_ll(client[:], x_inf, data['N'])
print "Training LL_inf: %f" % ll_train
train_lls[i] = ll_train
# Compute log lkhd on xv data
add_data_on_engines(client[:], xv_data)
ll_xv = parallel_compute_ll(client[:], x_inf, data['N'])
print "Cross Validation LL: %f" % ll_xv
xv_lls[i] = ll_xv
# Compute log lkhd on total dataset
add_data_on_engines(client[:], data)
ll_total = parallel_compute_ll(client[:], x_inf, data['N'])
print "Total LL: %f" % ll_total
total_lls[i] = ll_total
print "Saving partial results"
with open(os.path.join(options.resultsDir, 'results.partial.%d.pkl' % i),'w') as f:
cPickle.dump((x_inf, ll_train, ll_xv, ll_total) ,f, protocol=-1)
# Update best model
if ll_xv > best_xv_ll:
best_ind = i
print "Training the best model (%d) with the full dataset" % best_ind
# Set the best hyperparameters
set_hyperparameters_on_engines(client[:], best_model)
add_data_on_engines(client[:], data)
# Fit the best model on the full training data
best_x = parallel_coord_descent(client, data['N'], x0=best_x, maxiter=1,
use_hessian=False,
use_rop=False)
# Print results summary
for i in np.arange(len(models)):
print "Model %d:\tTrain LL: %.1f\tXV LL: %.1f\tTotal LL: %.1f" % (i, train_lls[i], xv_lls[i], total_lls[i])
print "Best model: %d" % best_ind
print "Best Total LL: %f" % parallel_compute_ll(client[:], best_x, data['N'])
print "True LL: %f" % popn_true.compute_ll(x_true)
stop_time = time.clock()
# Save results
with open(os.path.join(options.resultsDir, 'results.pkl'),'w') as f:
cPickle.dump(best_x, f, protocol=-1)
# Save runtime
with open(os.path.join(options.resultsDir, 'runtime.pkl'),'w') as f:
cPickle.dump(stop_time-start_time, f, protocol=-1)
set_hyperparameters_on_engines(client[:], model)
add_data_on_engines(client[:], train_data)
if use_existing and \
os.path.exists(os.path.join(options.resultsDir, 'results.partial.%d.pkl' % i)):
print "Found existing results for model %d" % i
with open(os.path.join(options.resultsDir, 'results.partial.%d.pkl' % i)) as f:
(x_inf, ll_train, ll_xv, ll_total) = cPickle.load(f)
train_lls[i] = ll_train
xv_lls[i] = ll_xv
total_lls[i] = ll_total
else:
x0 = copy.deepcopy(best_x)
# set_data_on_engines(client[:], train_data)
ll0 = parallel_compute_ll(client[:], x0, data['N'])
print "Training LL0: %f" % ll0
# Perform inference
x_inf = parallel_coord_descent(client, data['N'], x0=x0, maxiter=1,
use_hessian=False,
use_rop=False)
ll_train = parallel_compute_ll(client[:], x_inf, data['N'])
print "Training LL_inf: %f" % ll_train
train_lls[i] = ll_train
# Compute log lkhd on xv data
add_data_on_engines(client[:], xv_data)
ll_xv = parallel_compute_ll(client[:], x_inf, data['N'])
print "Cross Validation LL: %f" % ll_xv
xv_lls[i] = ll_xv
use_hessian=False,
use_rop=False)
ll_train = parallel_compute_ll(client[:], x_inf, data['N'])
print "Training LL_inf: %f" % ll_train
train_lls[i] = ll_train
# Compute log lkhd on xv data
add_data_on_engines(client[:], xv_data)
ll_xv = parallel_compute_ll(client[:], x_inf, data['N'])
print "Cross Validation LL: %f" % ll_xv
xv_lls[i] = ll_xv
# Compute log lkhd on total dataset
add_data_on_engines(client[:], data)
ll_total = parallel_compute_ll(client[:], x_inf, data['N'])
print "Total LL: %f" % ll_total
total_lls[i] = ll_total
print "Saving partial results"
with open(os.path.join(options.resultsDir, 'results.partial.%d.pkl' % i),'w') as f:
cPickle.dump((x_inf, ll_train, ll_xv, ll_total) ,f, protocol=-1)
# Update best model
if ll_xv > best_xv_ll:
best_ind = i
best_xv_ll = ll_xv
best_x = copy.deepcopy(x_inf)
best_model = copy.deepcopy(model)
print "Training the best model (%d) with the full dataset" % best_ind
# Set the best hyperparameters
train_lls[i] = ll_train
xv_lls[i] = ll_xv
total_lls[i] = ll_total
else:
x0 = copy.deepcopy(best_x)
# set_data_on_engines(client[:], train_data)
ll0 = parallel_compute_ll(client[:], x0, data['N'])
print "Training LL0: %f" % ll0
# Perform inference
x_inf = parallel_coord_descent(client, data['N'], x0=x0, maxiter=1,
use_hessian=False,
use_rop=False)
ll_train = parallel_compute_ll(client[:], x_inf, data['N'])
print "Training LL_inf: %f" % ll_train
train_lls[i] = ll_train
# Compute log lkhd on xv data
add_data_on_engines(client[:], xv_data)
ll_xv = parallel_compute_ll(client[:], x_inf, data['N'])
print "Cross Validation LL: %f" % ll_xv
xv_lls[i] = ll_xv
# Compute log lkhd on total dataset
add_data_on_engines(client[:], data)
ll_total = parallel_compute_ll(client[:], x_inf, data['N'])
print "Total LL: %f" % ll_total
total_lls[i] = ll_total
print "Saving partial results"
@interactive
def _serial_update(i, x):
return serial_updates[i].update(x)
## DEBUG Profile the Gibbs sampling loop
# import cProfile, pstats, StringIO
# pr = cProfile.Profile()
# pr.enable()
## END DEBUG
# Alternate fitting the network and fitting the GLMs
lp_smpls = np.zeros(N_samples+1)
lp_smpls[0] = parallel_compute_log_p(dview, master, x0, N)
ll_smpls = np.zeros(N_samples+1)
ll_smpls[0] = parallel_compute_ll(dview, x0, N)
lprior = parallel_compute_log_prior(dview, master, x0, N)
x_smpls = [x0]
x = copy.deepcopy(x0)
import time
start_time = time.time()
for smpl in np.arange(N_samples):
# Print the current log likelihood
ll = parallel_compute_ll(dview, x, N)
lp = parallel_compute_log_p(dview,
master,
x,
N)
ll_smpls[smpl+1] = ll
lp_smpls = np.zeros(N_samples+1)
lp_smpls[0] = parallel_compute_log_p(dview, master, x0, N)
ll_smpls = np.zeros(N_samples+1)
ll_smpls[0] = parallel_compute_ll(dview, x0, N)
lprior = parallel_compute_log_prior(dview, master, x0, N)
x_smpls = [x0]
x = copy.deepcopy(x0)
import time
start_time = time.time()
for smpl in np.arange(N_samples):
# Print the current log likelihood
ll = parallel_compute_ll(dview, x, N)
lp = parallel_compute_log_p(dview,
master,
x,
N)
ll_smpls[smpl+1] = ll
lp_smpls[smpl+1] = lp
# DEBUG: Look for sharp drops in LP
# if lp - lp_smpls[smpl] < -25:
# import pdb; pdb.set_trace()
# END DEBUG
# Compute iters per second
stop_time = time.time()
if stop_time - start_time == 0:
print "Gibbs iteration %d. Iter/s exceeds time resolution. LL: %.3f Log prob: %.3f" % (smpl, ll, lp)