Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_term_init(diabetes_data):
model = Model(diabetes_data)
term = Term(model, 'BMI', diabetes_data['BMI'])
# Test that all defaults are properly initialized
assert term.name == 'BMI'
assert term.categorical == False
assert term.type_ == 'fixed'
assert term.levels is not None
assert term.data.shape == (442, 1)
def test_add_term_to_model(base_model):
base_model.add_term('BMI')
assert isinstance(base_model.terms['BMI'], Term)
base_model.add_term('age_grp', random=False, categorical=True)
# Test that arguments are passed appropriately onto Term initializer
base_model.add_term('BP', random=True, split_by='age_grp', categorical=True)
assert isinstance(base_model.terms['BP'], Term)
# pass in new Y data that has 1 if y=event and 0 otherwise
y_data = y_vector[:, y_vector.design_info.column_names.index(event.group(1))]
y_data = pd.DataFrame({event.group(3): y_data})
self._add_y(y_label, family=family, link=link, data=y_data)
else:
# use Y as-is
self._add_y(y_label, family=family, link=link)
else:
x_matrix = dmatrix(fixed, data=data, NA_action="raise")
# Loop over predictor terms
for _name, _slice in x_matrix.design_info.term_name_slices.items():
cols = x_matrix.design_info.column_names[_slice]
term_data = pd.DataFrame(x_matrix[:, _slice], columns=cols)
prior = priors.pop(_name, priors.get("fixed", None))
self.terms[_name] = Term(_name, term_data, prior=prior)
# Random effects
if random is not None: # pylint: disable=too-many-nested-blocks
random = listify(random)
for random_effect in random:
random_effect = random_effect.strip()
# Split specification into intercept, predictor, and grouper
patt = r"^([01]+)*[\s\+]*([^\|]+)*\|(.*)"
intcpt, pred, grpr = re.search(patt, random_effect).groups()
label = "{}|{}".format(pred, grpr) if pred else grpr
prior = priors.pop(label, priors.get("random", None))
family = self.default_priors.get(family=family)
self.family = family
# Override family's link if another is explicitly passed
if link is not None:
self.family.link = link
if prior is None:
prior = self.family.prior
# implement default Uniform [0, sd(Y)] prior for residual SD
if self.family.name == "gaussian":
prior.update(sd=Prior("Uniform", lower=0, upper=self.clean_data[variable].std()))
data = kwargs.pop("data", self.clean_data[variable])
term = Term(variable, data, prior=prior, *args, **kwargs)
self.y = term
self.built = False
data = np.atleast_2d(data)
self.levels = list(range(data.shape[1]))
self.data = data
# identify and flag intercept and cell-means terms (i.e., full-rank
# dummy codes), which receive special priors
if constant is None:
self.constant = np.atleast_2d(data.T).T.sum(1).var() == 0
else:
self.constant = constant
self.prior = prior
class RandomTerm(Term):
random = True
def __init__(
self, name, data, predictor, grouper, categorical=False, prior=None, constant=None
):
super(RandomTerm, self).__init__(name, data, categorical, prior, constant)
self.grouper = grouper
self.predictor = predictor
self.group_index = self.invert_dummies(grouper)
def invert_dummies(self, dummies):
"""
For the sake of computational efficiency (i.e., to avoid lots of
large matrix multiplications in the backends), invert the dummy-coding
process and represent full-rank dummies as a vector of indices into the