Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
print(results.summary())
print("kinase error:")
y_pred_kinase = clf.predict(feature_kinase)
print(mean_squared_error(label_kinase,y_pred_kinase))
results = sm.OLS(y_pred_kinase,sm.add_constant(label_kinase)).fit()
print(results.summary())
print("GPCR error:")
y_pred_GPCR = clf.predict(feature_GPCR)
print(mean_squared_error(label_GPCR,y_pred_GPCR))
results = sm.OLS(y_pred_GPCR,sm.add_constant(label_GPCR)).fit()
print(results.summary())
print("channel error:")
y_pred_channel = clf.predict(feature_channel)
print(mean_squared_error(label_channel,y_pred_channel))
results = sm.OLS(y_pred_channel,sm.add_constant(label_channel)).fit()
print(results.summary())
######### Saving model
ridge_pkl_filename = 'ridge_20182101.pkl'
ridge_model_pkl = open(ridge_pkl_filename, 'wb')
pickle.dump(clf,ridge_model_pkl)
ridge_model_pkl.close()
y_pred = model.predict([channel_protein[0:size],channel_compound[0:size]])
elif i < num_bins-1:
temp = model.predict([channel_protein[(i*size):((i+1)*size)],channel_compound[(i*size):((i+1)*size)]])
y_pred = np.concatenate((y_pred,temp), axis=0)
else:
temp = model.predict([channel_protein[length_channel-size:length_channel],channel_compound[length_channel-size:length_channel]])
y_pred = np.concatenate((y_pred,temp[size-length_channel+(i*size):size]), axis=0)
er=0
for i in range(length_channel):
er += (y_pred[i]-channel_IC50[i])**2
mse = er/length_channel
print(mse)
results = sm.OLS(y_pred,sm.add_constant(channel_IC50)).fit()
print(results.summary())
print("error on train")
size = 64
length_train = len(train_protein)
print(length_train)
num_bins = math.ceil(length_train/size)
for i in range(num_bins):
if i==0:
y_pred = model.predict([train_protein[0:size],train_compound[0:size]])
elif i < num_bins-1:
temp = model.predict([train_protein[(i*size):((i+1)*size)],train_compound[(i*size):((i+1)*size)]])
y_pred = np.concatenate((y_pred,temp), axis=0)
else:
results = sm.OLS(y_pred_train,sm.add_constant(label_train)).fit()
print(results.summary())
print("test error:")
y_pred_test = clf.predict(feature_test)
print(mean_squared_error(label_test,y_pred_test))
results = sm.OLS(y_pred_test,sm.add_constant(label_test)).fit()
print(results.summary())
print("ER error:")
y_pred_ER = clf.predict(feature_ER)
print(mean_squared_error(label_ER,y_pred_ER))
results = sm.OLS(y_pred_ER,sm.add_constant(label_ER)).fit()
print(results.summary())
print("kinase error:")
y_pred_kinase = clf.predict(feature_kinase)
print(mean_squared_error(label_kinase,y_pred_kinase))
results = sm.OLS(y_pred_kinase,sm.add_constant(label_kinase)).fit()
print(results.summary())
print("GPCR error:")
y_pred_GPCR = clf.predict(feature_GPCR)
print(mean_squared_error(label_GPCR,y_pred_GPCR))
results = sm.OLS(y_pred_GPCR,sm.add_constant(label_GPCR)).fit()
self.update_residual_y_axis_label()
included_vars = [key for key in list(self.correlation.data['1']) if self.multi_var_reg_vars[key]]
included_vars.sort()
for n in GROUP_LABELS:
if self.time_series.current_dvh_group[n]:
x = []
x_count = len(self.correlation.data[n][list(self.correlation.data[n])[0]]['data'])
for i in range(x_count):
current_x = []
for k in included_vars:
current_x.append(self.correlation.data[n][k]['data'][i])
x.append(current_x)
x = sm.add_constant(x) # explicitly add constant to calculate intercept
y = self.correlation.data[n][self.y.value]['data']
fit = sm.OLS(y, x).fit()
coeff = fit.params
coeff_p = fit.pvalues
r_sq = fit.rsquared
model_p = fit.f_pvalue
coeff_str = ["%0.3E" % i for i in coeff]
coeff_p_str = ["%0.3f" % i for i in coeff_p]
r_sq_str = ["%0.3f" % r_sq]
model_p_str = ["%0.3f" % model_p]
getattr(self.sources, 'multi_var_coeff_results_%s' % n).data = {'var_name': ['Constant'] + included_vars,
'coeff': coeff.tolist(),
def trend_estimate(y):
X = np.arange(len(y))
X = sm.add_constant(X)
mod = sm.OLS(y, X)
res = mod.fit()
return res
def reg_m(y, x, estimator, weights=None):
ones = np.ones(len(x[0]))
X = sm.add_constant(np.column_stack((x[0], ones)))
for ele in x[1:]:
X = sm.add_constant(np.column_stack((ele, X)))
if estimator=='ols':
return sm.OLS(y, X).fit()
elif estimator=='wls':
return sm.WLS(y, X, weights).fit()
elif estimator=='gls':
return sm.GLS(y, X).fit()
return None
model0if = GLSAR(Y, X, 2)
res = model0if.iterative_fit(6)
print('iterativefit beta', res.params)
results.tvalues # XXX is this correct? it does equal params/bse
# but isn't the same as the AR example (which was wrong in the first place..)
print(results.t_test([0, 1])) # are sd and t correct? vs
print(results.f_test(np.eye(2)))
rhotrue = np.array([0.5, 0.2])
nlags = np.size(rhotrue)
beta = np.array([0.1, 2])
noiseratio = 0.5
nsample = 2000
x = np.arange(nsample)
X1 = sm.add_constant(x, prepend=False)
wnoise = noiseratio * np.random.randn(nsample + nlags)
#.. noise = noise[1:] + rhotrue*noise[:-1] # wrong this is not AR
#.. find my drafts for univariate ARMA functions
# generate AR(p)
if np.size(rhotrue) == 1:
# replace with scipy.signal.lfilter, keep for testing
arnoise = np.zeros(nsample + 1)
for i in range(1, nsample + 1):
arnoise[i] = rhotrue * arnoise[i - 1] + wnoise[i]
noise = arnoise[1:]
an = signal.lfilter([1], np.hstack((1, -rhotrue)), wnoise[1:])
print('simulate AR(1) difference', np.max(np.abs(noise - an)))
else:
noise = signal.lfilter([1], np.hstack((1, -rhotrue)), wnoise)[nlags:]
bikes_slots_available=np.asarray(zip(bikes_available,slots_available))
# Creating Lags of Bike and Slot Variables
bikes_available_lag0 = bikes_available[1:]
bikes_available_lag1 = bikes_available[0:len(bikes_available)-1]
slots_available_lag1 = slots_available[0:len(slots_available)-1]
bikes_slots_available = bikes_slots_available[1:]
# Calculated the lag-log-odds ratio
phat_lag1 = (bikes_available_lag1) / (bikes_available_lag1+slots_available_lag1)
logodds_lag1 = np.log( phat_lag1 / (1-phat_lag1) )
# Add Constant to Exogenous Variables
logodds_lag1 = sm.add_constant(logodds_lag1, prepend=False)
# Fit Binomial Regression. Coefficients constant in time
glm_binom = sm.GLM(bikes_slots_available, logodds_lag1, family=sm.families.Binomial())
res = glm_binom.fit()
print res.summary()
#
# This model includes month effects and one previous time point
##### previous time point is very significant predictor; months are not
import datetime
urlretrieve('http://www.ats.ucla.edu/stat/stata/seminars/svy_stata_intro/srs.dta', 'srs.dta')
print('downloading file')
srs = dta.genfromdta("srs.dta")
# from statsmodels.datasets import webuse
# srs = webuse('srs', 'http://www.ats.ucla.edu/stat/stata/seminars/svy_stata_intro/')
# #does currently not cache file
y = srs['api00']
#older numpy don't reorder
#x = srs[['growth', 'emer', 'yr_rnd']].view(float).reshape(len(y), -1)
#force sequence
x = np.column_stack([srs[ii] for ii in ['growth', 'emer', 'yr_rnd']])
group = srs['dnum']
#xx = sm.add_constant(x, prepend=True)
xx = sm.add_constant(x, prepend=False) #const at end for Stata compatibility
#remove nan observation
mask = (xx!=-999.0).all(1) #nan code in dta file
mask.shape
y = y[mask]
xx = xx[mask]
group = group[mask]
#run OLS
res_srs = sm.OLS(y, xx).fit()
print('params ', res_srs.params)
print('bse_OLS ', res_srs.bse)
#get cluster robust standard errors and compare with STATA
if equal_lengths(len_list): # nested blocks (add specified models!)
mixed_blocks = np.random.permutation(np.random.permutation(len_list))
rotate_groups = []
for m in mixed_blocks:
rotate_groups.append(index_groups[uniq_groups == unique_blocks[m]])
index_groups = np.array(rotate_groups).flatten()
else:
index_groups = np.random.permutation(list(range(n)))
if medtype == 'I':
EXOG_A = sm.add_constant(np.column_stack((leftvar, strip_ones(exog_vars))))
EXOG_A = EXOG_A[index_groups]
EXOG_B = np.column_stack((leftvar, rightvar))
EXOG_B = sm.add_constant(np.column_stack((EXOG_B, strip_ones(exog_vars))))
#pathA
t_valuesA = full_glm_results(endog_arr, EXOG_A, only_tvals=True)[1,:]
#pathB
t_valuesB = full_glm_results(endog_arr, EXOG_B, only_tvals=True)[1,:]
elif medtype == 'M':
EXOG_A = sm.add_constant(np.column_stack((leftvar, strip_ones(exog_vars))))
EXOG_A = EXOG_A[index_groups]
EXOG_B = np.column_stack((rightvar, leftvar))
EXOG_B = sm.add_constant(np.column_stack((EXOG_B, strip_ones(exog_vars))))
#pathA
t_valuesA = full_glm_results(endog_arr, EXOG_A, only_tvals=True)[1,:]