Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _get_residual(pheno, covariates):
e = pandas.DataFrame(covariates)
e["order"] = range(0, e.shape[0])
e["pheno"] = pheno
e_ = e.dropna()
e_ = e_[e_.columns[~(e_ == 0).all()]]
keys = list(e_.columns.values)
keys.remove("pheno")
keys.remove("order")
y, X = dmatrices("pheno ~ {}".format(" + ".join(keys)), data=e_, return_type="dataframe")
model = sm.OLS(y,X)
result = model.fit()
e_["residual"] = result.resid
e = e.merge(e_[["order", "residual"]], on="order", how="left")
return e["residual"]
def main():
df = pd.DataFrame(data=np.hstack((np.load('xx.npy'), np.load('yy.npy'))),
columns=['greedy_b', 'greedy_r', 'score_b', 'score_r'])
df = df[df['score_b'] >= 0]
df['score_diff'] = -df['score_b'] + df['score_r']
y, X = patsy.dmatrices('score_diff ~ '
'greedy_b + '
'greedy_r + '
'square(greedy_b) + '
'square(greedy_r) + '
'greedy_b:greedy_r',
data=df, return_type='dataframe')
result = sm.OLS(y, X).fit()
print result.summary()
x = df['greedy_b']
y = df['greedy_r']
z = df['score_diff']
'''
xi = np.linspace(0, 1, 1000)
yi = np.linspace(0, 1, 1000)
delta_weekday_dummy = delta_dayofweek.copy()
delta_weekday_dummy[delta_dayofweek < 5] = 1
delta_weekday_dummy[delta_dayofweek >= 5] = 0
arrivals_departures['weekday_dummy'] = delta_weekday_dummy
# print arrivals_departures
# print arrivals_departures.head(20)
# Create design matrix for months, hours, and weekday vs. weekend.
# We can't just create a "month" column to toss into our model, because it doesnt
# understand what "June" is. Instead, we need to create a column for each month
# and code each row according to what month it's in. Ditto for hours and weekday (=1).
y_arr, X_arr = patsy.dmatrices("arrivals ~ C(months, Treatment) + C(hours, Treatment) + C(weekday_dummy, Treatment)", arrivals_departures, return_type='dataframe')
y_dep, X_dep = patsy.dmatrices("departures ~ C(months, Treatment) + C(hours, Treatment) + C(weekday_dummy, Treatment)", arrivals_departures, return_type='dataframe')
y_dep[pd.isnull(y_dep)] = 0
# Fit poisson distributions for arrivals and departures, print results
arr_poisson_model = sm.Poisson(y_arr, X_arr)
arr_poisson_results = arr_poisson_model.fit()
dep_poisson_model = sm.Poisson(y_dep, X_dep)
dep_poisson_results = dep_poisson_model.fit()
# print arr_poisson_results.summary(), dep_poisson_results.summary()
poisson_results = [arr_poisson_results, dep_poisson_results]
return poisson_results
def test_nb2():
y, X = patsy.dmatrices('los ~ C(type) + hmo + white', medpar)
y = np.array(y)[:,0]
nb2 = NBin(y,X,'nb2').fit(maxiter=10000, maxfun=5000)
assert_almost_equal(nb2.params,
[2.31027893349935, 0.221248978197356, 0.706158824346228,
-0.067955221930748, -0.129065442248951, 0.4457567],
decimal=2)
def main():
# read the data from the internet into numpy arrays
medpar = pandas.read_csv(g_url)
y_patsy, X_patsy = patsy.dmatrices('los~type2+type3+hmo+white', medpar)
y = numpy.array(y_patsy).flatten()
X = numpy.array(X_patsy)
# define the objective function and the autodiff gradient and hessian
f = functools.partial(get_neg_ll, y, X)
g = functools.partial(eval_grad, f)
h = functools.partial(eval_hess, f)
# init the search for max likelihood parameters
theta0 = numpy.array([
numpy.log(numpy.mean(y)),
0, 0, 0, 0,
0.5,
], dtype=float)
# do the max likelihood search
exog_obs : DataFrame
Current values of the predictors where the variable to be
imputed is observed.
exog_miss : DataFrame
Current values of the predictors where the variable to be
Imputed is missing.
init_kwds : dict-like
The init keyword arguments for `vname`, processed through Patsy
as required.
fit_kwds : dict-like
The fit keyword arguments for `vname`, processed through Patsy
as required.
"""
formula = self.conditional_formula[vname]
endog, exog = patsy.dmatrices(formula, self.data,
return_type="dataframe")
# Rows with observed endog
ixo = self.ix_obs[vname]
endog_obs = np.asarray(endog.iloc[ixo])
exog_obs = np.asarray(exog.iloc[ixo, :])
# Rows with missing endog
ixm = self.ix_miss[vname]
exog_miss = np.asarray(exog.iloc[ixm, :])
predict_obs_kwds = {}
if vname in self.predict_kwds:
kwds = self.predict_kwds[vname]
predict_obs_kwds = self._process_kwds(kwds, ixo)
How many steps ahead would you like to forecast?
oos_data : pd.DataFrame
Data to use for the predictors in the forecast
intervals : boolean (default: False)
Whether to return prediction intervals
Returns
----------
- pd.DataFrame with predicted values
"""
if self.latent_variables.estimated is False:
raise Exception("No latent variables estimated!")
else:
_, X_oos = dmatrices(self.formula, oos_data)
X_oos = np.array([X_oos])[0]
X_pred = X_oos[:h]
lmda, Y, scores = self._model(self.latent_variables.get_z_values())
date_index = self.shift_dates(h)
if self.latent_variables.estimation_method in ['M-H']:
sim_vector = self._sim_prediction_bayes(h, 15000, X_pred)
error_bars = []
for pre in range(5,100,5):
error_bars.append(np.insert([np.percentile(i,pre) for i in sim_vector], 0, lmda[-1]))
forecasted_values = np.array([np.mean(i) for i in sim_vector])
prediction_01 = np.array([np.percentile(i, 1) for i in sim_vector])
prediction_05 = np.array([np.percentile(i, 5) for i in sim_vector])
prediction_95 = np.array([np.percentile(i, 95) for i in sim_vector])
estimated = hdd_res.fittedvalues
r2, rmse = hdd_rsquared, np.sqrt(hdd_res.ssr/hdd_res.nobs)
model_obj, model_res, formula = hdd_mod, hdd_res, hdd_formula
fit_bp_hdd = hdd_bp
elif use_cdd_only:
y, X = patsy.dmatrices(
cdd_formula, df, return_type='dataframe')
estimated = cdd_res.fittedvalues
r2, rmse = cdd_rsquared, np.sqrt(cdd_res.ssr/cdd_res.nobs)
model_obj, model_res, formula = cdd_mod, cdd_res, cdd_formula
fit_bp_cdd = cdd_bp
else:
# Use Intercept-only
y, X = patsy.dmatrices(
int_formula, df, return_type='dataframe')
estimated = int_res.fittedvalues
r2, rmse = int_rsquared, np.sqrt(int_res.ssr/int_res.nobs)
model_obj, model_res, formula = int_mod, int_res, int_formula
if y.mean != 0:
cvrmse = rmse / float(y.values.ravel().mean())
else:
cvrmse = np.nan
n = estimated.shape[0]
self.df = df
self.y, self.X = y, X
self.estimated = estimated
self.r2, self.rmse = r2, rmse
name='', model=None, offset=0., eval_env=0):
"""Creates linear component from `patsy` formula.
Parameters
----------
formula : str - a patsy formula
data : a dict-like object that can be used to look up variables referenced
in `formula`
eval_env : either a `patsy.EvalEnvironment` or else a depth represented as
an integer which will be passed to `patsy.EvalEnvironment.capture()`.
See `patsy.dmatrix` and `patsy.EvalEnvironment` for details.
Other arguments are documented in the constructor.
"""
import patsy
eval_env = patsy.EvalEnvironment.capture(eval_env, reference=1)
y, x = patsy.dmatrices(formula, data, eval_env=eval_env)
labels = x.design_info.column_names
return cls(np.asarray(x), np.asarray(y)[:, -1], intercept=False,
labels=labels, priors=priors, vars=vars, name=name,
model=model, offset=offset)