Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def two_gaussians(x):
log_like1 = - 0.5 * number_of_parameters * tt.log(2 * num.pi) \
- 0.5 * tt.log(dsigma) \
- 0.5 * (x - mu1).T.dot(isigma).dot(x - mu1)
log_like2 = - 0.5 * number_of_parameters * tt.log(2 * num.pi) \
- 0.5 * tt.log(dsigma) \
- 0.5 * (x - mu2).T.dot(isigma).dot(x - mu2)
return tt.log(w1 * tt.exp(log_like1) + w2 * tt.exp(log_like2))
with pm.Model() as self.PT_test:
for data_key in self.data_keys:
if data_key != "like":
uniform = pm.Uniform(data_key, shape=number_of_parameters, lower=-2. * num.ones_like(mu1),
upper=2. * num.ones_like(mu1), testval=-1. * num.ones_like(mu1), transform=None)
else:
like = pm.Deterministic('tmp', two_gaussians(uniform))
pm.Potential(self.data_keys[self.like_index], like)
# create or get test folder to write files.
self.test_dir_path = os.path.join(os.path.dirname(__file__), 'PT_TEST')
if not os.path.exists(self.test_dir_path):
try:
os.mkdir(self.test_dir_path)
except IOError as e:
print(e)
# create data.
data_increment = 1
self.lpoint = []
for data_key in self.data_keys:
if data_key != "like":
chain_data = num.arange(number_of_parameters).astype(num.float)*data_increment
def test_pymc3_convert_dists():
"""Just a basic check that all PyMC3 RVs will convert to and from Theano RVs."""
tt.config.compute_test_value = "ignore"
theano.config.cxx = ""
with pm.Model() as model:
norm_rv = pm.Normal("norm_rv", 0.0, 1.0, observed=1.0)
mvnorm_rv = pm.MvNormal("mvnorm_rv", np.r_[0.0], np.c_[1.0], shape=1, observed=np.r_[1.0])
cauchy_rv = pm.Cauchy("cauchy_rv", 0.0, 1.0, observed=1.0)
halfcauchy_rv = pm.HalfCauchy("halfcauchy_rv", 1.0, observed=1.0)
uniform_rv = pm.Uniform("uniform_rv", observed=1.0)
gamma_rv = pm.Gamma("gamma_rv", 1.0, 1.0, observed=1.0)
invgamma_rv = pm.InverseGamma("invgamma_rv", 1.0, 1.0, observed=1.0)
exp_rv = pm.Exponential("exp_rv", 1.0, observed=1.0)
halfnormal_rv = pm.HalfNormal("halfnormal_rv", 1.0, observed=1.0)
beta_rv = pm.Beta("beta_rv", 2.0, 2.0, observed=1.0)
binomial_rv = pm.Binomial("binomial_rv", 10, 0.5, observed=5)
dirichlet_rv = pm.Dirichlet("dirichlet_rv", np.r_[0.1, 0.1], observed=np.r_[0.1, 0.1])
poisson_rv = pm.Poisson("poisson_rv", 10, observed=5)
bernoulli_rv = pm.Bernoulli("bernoulli_rv", 0.5, observed=0)
betabinomial_rv = pm.BetaBinomial("betabinomial_rv", 0.1, 0.1, 10, observed=5)
categorical_rv = pm.Categorical("categorical_rv", np.r_[0.5, 0.5], observed=1)
multinomial_rv = pm.Multinomial("multinomial_rv", 5, np.r_[0.5, 0.5], observed=np.r_[2])
negbinomial_rv = pm.NegativeBinomial("negbinomial_rv", 10.2, 0.5, observed=5)
# Convert to a Theano `FunctionGraph`
fgraph = model_graph(model)
def test_pymc3_convert_dists():
"""Just a basic check that all PyMC3 RVs will convert to and from Theano RVs."""
tt.config.compute_test_value = "ignore"
theano.config.cxx = ""
with pm.Model() as model:
norm_rv = pm.Normal("norm_rv", 0.0, 1.0, observed=1.0)
mvnorm_rv = pm.MvNormal("mvnorm_rv", np.r_[0.0], np.c_[1.0], shape=1, observed=np.r_[1.0])
cauchy_rv = pm.Cauchy("cauchy_rv", 0.0, 1.0, observed=1.0)
halfcauchy_rv = pm.HalfCauchy("halfcauchy_rv", 1.0, observed=1.0)
uniform_rv = pm.Uniform("uniform_rv", observed=1.0)
gamma_rv = pm.Gamma("gamma_rv", 1.0, 1.0, observed=1.0)
invgamma_rv = pm.InverseGamma("invgamma_rv", 1.0, 1.0, observed=1.0)
exp_rv = pm.Exponential("exp_rv", 1.0, observed=1.0)
halfnormal_rv = pm.HalfNormal("halfnormal_rv", 1.0, observed=1.0)
beta_rv = pm.Beta("beta_rv", 2.0, 2.0, observed=1.0)
binomial_rv = pm.Binomial("binomial_rv", 10, 0.5, observed=5)
dirichlet_rv = pm.Dirichlet("dirichlet_rv", np.r_[0.1, 0.1], observed=np.r_[0.1, 0.1])
poisson_rv = pm.Poisson("poisson_rv", 10, observed=5)
bernoulli_rv = pm.Bernoulli("bernoulli_rv", 0.5, observed=0)
betabinomial_rv = pm.BetaBinomial("betabinomial_rv", 0.1, 0.1, 10, observed=5)
categorical_rv = pm.Categorical("categorical_rv", np.r_[0.5, 0.5], observed=1)
multinomial_rv = pm.Multinomial("multinomial_rv", 5, np.r_[0.5, 0.5], observed=np.r_[2])
negbinomial_rv = pm.NegativeBinomial("negbinomial_rv", 10.2, 0.5, observed=5)
# Convert to a Theano `FunctionGraph`
fgraph = model_graph(model)
rvs_by_name = {n.owner.inputs[1].name: n.owner.inputs[1] for n in fgraph.outputs}
plt.scatter(s_short, r_short, color='red')
plt.scatter(s_long, r_long, color='green')
plt.scatter(s_flat, r_flat, color='black')
plt.title("State vs Reward scatter plot")
plt.show()
# qvalues = [N x 3]
# 1 - action index
# 2 - state index
# 3 - reward
qvalues = np.array(full_tensor)
with pm.Model() as self.model:
Qmus = pm.Normal('Qmus', mu=self.Qmus_estimates_mu, sd=self.Qmus_estimates_sd, shape=[3, self.D])
Qsds = pm.Normal('Qsds', mu=self.Qsds_estimates_mu, sd=self.Qsds_estimates_sd, shape=[3, self.D])
idx0 = qvalues[:, 0].astype(int)
idx1 = qvalues[:, 1].astype(int)
pm.Normal('likelihood', mu=Qmus[idx0, idx1], sd=np.exp(Qsds[idx0, idx1]), observed=qvalues[:, 2])
mean_field = pm.fit(n=15000, method='advi', obj_optimizer=pm.adam(learning_rate=0.1))
self.trace = mean_field.sample(5000)
self.Qmus_estimates = np.mean(self.trace['Qmus'], axis=0)
self.Qsds_estimates = np.mean(self.trace['Qsds'], axis=0)
self.Qmus_estimates_mu = self.Qmus_estimates
self.Qmus_estimates_sd = np.std(self.trace['Qmus'], axis=0)
self.Qsds_estimates_mu = self.Qsds_estimates
self.shared_vars = {
'model_input': model_input,
'model_output': model_output,
'model_cats': model_cats
}
model = pm.Model()
with model:
mu_alpha = pm.Normal('mu_alpha', mu=0, sd=100)
sigma_alpha = pm.HalfNormal('sigma_alpha', sd=100)
mu_beta = pm.Normal('mu_beta', mu=0, sd=100)
sigma_beta = pm.HalfNormal('sigma_beta', sd=100)
alpha = pm.Normal('alpha', mu=mu_alpha, sd=sigma_alpha, shape=(self.num_cats,))
beta = pm.Normal('beta', mu=mu_beta, sd=sigma_beta, shape=(self.num_cats, self.num_pred))
c = model_cats
temp = alpha[c] + T.sum(beta[c] * model_input, 1)
p = pm.invlogit(temp)
o = pm.Bernoulli('o', p, observed=model_output)
return model
def fit(self, X, Y, n_samples=10000, tune_steps=1000, n_jobs=4):
with pm.Model() as self.model:
# Priors
std = pm.Uniform("std", 0, self.sps, testval=X.std())
beta = pm.StudentT("beta", mu=0, lam=self.sps, nu=self.nu)
alpha = pm.StudentT("alpha", mu=0, lam=self.sps, nu=self.nu, testval=Y.mean())
# Deterministic model
mean = pm.Deterministic("mean", alpha + beta * X)
# Posterior distribution
obs = pm.Normal("obs", mu=mean, sd=std, observed=Y)
## Run MCMC
# Find search start value with maximum a posterior estimation
start = pm.find_MAP()
# sample posterior distribution for latent variables
trace = pm.sample(n_samples, njobs=n_jobs, tune=tune_steps, start=start)
# Recover posterior samples
self.burned_trace = trace[int(n_samples / 2):]
Returns:
np.array: imputated dataset.
"""
# check if fitted then predict with least squares
check_is_fitted(self, "statistics_")
model = self.statistics_["param"]["model"]
labels = self.statistics_["param"]["labels"]
# add a Deterministic node for each missing value
# sampling then pulls from the posterior predictive distribution
# each missing data point. I.e. distribution for EACH missing
with model:
p_pred = pm.Deterministic(
"p_pred", pm.invlogit(model["alpha"] + model["beta"].dot(X.T))
)
tr = pm.sample(
self.sample,
tune=self.tune,
init=self.init,
**self.sample_kwargs
)
self.trace_ = tr
# decide how to impute. Use mean of posterior predictive or random draw
# not supported yet, but eventually consider using the MAP
if not self.fill_value or self.fill_value == "mean":
imp = tr["p_pred"].mean(0)
elif self.fill_value == "random":
imp = np.apply_along_axis(np.random.choice, 0, tr["p_pred"])
else:
err = f"{self.fill_value} must be 'mean' or 'random'."
raise ValueError(err)
def on_fit(self, X, y):
self.X = shared(X)
self.model_definition(model=self.model, X=self.X, y=y)
with self.model:
self.trace = pm.sample()
def _imp_bayes_logistic_reg(X, col_name, x, lm, imp_ix, fv, verbose,
sample=1000, tune=1000, init="auto", thresh=0.5):
"""Private method to perform bayesian logistic imputation."""
progress = _pymc3_logger(verbose)
model, labels = lm
# add a Deterministic node for each missing value
# sampling then pulls from the posterior predictive distribution
# of each of the missing data points. I.e. distribution for EACH missing
with model:
p_pred = pm.Deterministic(
"p_pred", pm.invlogit(model["alpha"] + model["beta"].dot(x.T))
)
tr = pm.sample(
sample=sample, tune=tune, init=init, progress_bar=progress
)
# decide how to impute. Use mean of posterior predictive or random draw
# not supported yet, but eventually consider using the MAP
if not fv or fv == "mean":
fills = tr["p_pred"].mean(0)
elif fv == "random":
fills = np.apply_along_axis(np.random.choice, 0, tr["p_pred"])
else:
err = f"{fv} not accepted reducer. Choose `mean` or `random`."
raise ValueError(err)
# convert probabilities to class membership
# then map class membership to corresponding label
fill_thresh = np.vectorize(lambda f: 1 if f > thresh else 0)
def fit(self, X, Y, n_samples=10000, tune_steps=1000, n_jobs=4):
with pm.Model() as self.model:
# Priors
std = pm.Uniform("std", 0, self.sps, testval=X.std())
beta = pm.StudentT("beta", mu=0, lam=self.sps, nu=self.nu)
alpha = pm.StudentT("alpha", mu=0, lam=self.sps, nu=self.nu, testval=Y.mean())
# Deterministic model
mean = pm.Deterministic("mean", alpha + beta * X)
# Posterior distribution
obs = pm.Normal("obs", mu=mean, sd=std, observed=Y)
## Run MCMC
# Find search start value with maximum a posterior estimation
start = pm.find_MAP()
# sample posterior distribution for latent variables
trace = pm.sample(n_samples, njobs=n_jobs, tune=tune_steps, start=start)
# Recover posterior samples
self.burned_trace = trace[int(n_samples / 2):]