Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_conditional_probability_of_n_purchases_up_to_time_is_between_0_and_1(self, cdnow_customers):
"""
Due to the large parameter space we take a random subset.
"""
ptf = estimation.ParetoNBDFitter()
ptf.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'])
for freq in np.random.choice(100, 5):
for recency in np.random.choice(100, 5):
for age in recency + np.random.choice(100, 5):
for t in np.random.choice(100, 5):
for n in np.random.choice(10, 5):
assert (
0.0
<= ptf.conditional_probability_of_n_purchases_up_to_time(n, t, freq, recency, age)
<= 1.0
)
def test_expectation_returns_same_value_as_R_BTYD(self, cdnow_customers):
""" From https://cran.r-project.org/web/packages/BTYD/BTYD.pdf """
ptf = estimation.ParetoNBDFitter()
ptf.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'], tol=1e-6)
expected = np.array([0.00000000, 0.05077821, 0.09916088, 0.14542507, 0.18979930,
0.23247466, 0.27361274, 0.31335159, 0.35181024, 0.38909211])
actual = ptf.expected_number_of_purchases_up_to_time(range(10))
npt.assert_allclose(expected, actual, atol=0.01)
def test_params_out_is_close_to_Hardie_paper(self, cdnow_customers):
ptf = estimation.ParetoNBDFitter()
ptf.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'], iterative_fitting=3)
expected = np.array([ 0.553, 10.578, 0.606, 11.669])
npt.assert_array_almost_equal(expected, np.array(ptf._unload_params('r', 'alpha', 's', 'beta')), decimal=2)
def test_conditional_probability_alive_matrix(self, cdnow_customers):
ptf = estimation.ParetoNBDFitter()
ptf.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'])
Z = ptf.conditional_probability_alive_matrix()
max_t = int(ptf.data['T'].max())
for t_x in range(Z.shape[0]):
for x in range(Z.shape[1]):
assert Z[t_x][x] == ptf.conditional_probability_alive(x, t_x, max_t)
def test_fit_with_and_without_weights(self, cdnow_customers):
original_dataset_with_weights = cdnow_customers.copy()
original_dataset_with_weights = original_dataset_with_weights.groupby(['frequency', 'recency', 'T']).size()
original_dataset_with_weights = original_dataset_with_weights.reset_index()
original_dataset_with_weights = original_dataset_with_weights.rename(columns={0:'weights'})
pnbd_noweights = estimation.ParetoNBDFitter()
pnbd_noweights.fit(
cdnow_customers['frequency'],
cdnow_customers['recency'],
cdnow_customers['T'],
)
pnbd = estimation.ParetoNBDFitter()
pnbd.fit(
original_dataset_with_weights['frequency'],
original_dataset_with_weights['recency'],
original_dataset_with_weights['T'],
original_dataset_with_weights['weights'],
)
npt.assert_array_almost_equal(
np.array(pnbd_noweights._unload_params('r', 'alpha', 's', 'beta')),
def test_conditional_probability_of_n_purchases_up_to_time_adds_up_to_1(self, cdnow_customers):
"""
Due to the large parameter space we take a random subset. We also restrict our limits to keep the number of
values of n for which the probability needs to be calculated to a sane level.
"""
ptf = estimation.ParetoNBDFitter()
ptf.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'])
for freq in np.random.choice(10, 5):
for recency in np.random.choice(9, 5):
for age in np.random.choice(np.arange(recency, 10, 1), 5):
for t in 1 + np.random.choice(9, 5):
npt.assert_almost_equal(
np.sum([
ptf.conditional_probability_of_n_purchases_up_to_time(n, t, freq, recency, age)
for n in np.arange(0, 20, 1)
]),
1.0,
decimal=2
)
def test_fit_with_and_without_weights(self, cdnow_customers):
original_dataset_with_weights = cdnow_customers.copy()
original_dataset_with_weights = original_dataset_with_weights.groupby(['frequency', 'recency', 'T']).size()
original_dataset_with_weights = original_dataset_with_weights.reset_index()
original_dataset_with_weights = original_dataset_with_weights.rename(columns={0:'weights'})
pnbd_noweights = estimation.ParetoNBDFitter()
pnbd_noweights.fit(
cdnow_customers['frequency'],
cdnow_customers['recency'],
cdnow_customers['T'],
)
pnbd = estimation.ParetoNBDFitter()
pnbd.fit(
original_dataset_with_weights['frequency'],
original_dataset_with_weights['recency'],
original_dataset_with_weights['T'],
original_dataset_with_weights['weights'],
)
npt.assert_array_almost_equal(
np.array(pnbd_noweights._unload_params('r', 'alpha', 's', 'beta')),
np.array(pnbd._unload_params('r', 'alpha', 's', 'beta')),
decimal=2
)
def test_conditional_probability_alive(self, cdnow_customers):
"""
Target taken from page 8,
https://cran.r-project.org/web/packages/BTYD/vignettes/BTYD-walkthrough.pdf
"""
ptf = estimation.ParetoNBDFitter()
ptf.params_ = OrderedDict(
zip(['r', 'alpha', 's', 'beta'],
[0.5534, 10.5802, 0.6061, 11.6562]))
p_alive = ptf.conditional_probability_alive(26.00, 30.86, 31.00)
assert abs(p_alive - 0.9979) < 0.001
def test_overflow_error(self):
ptf = estimation.ParetoNBDFitter()
params = np.array([10.465, 7.98565181e-03, 3.0516, 2.820])
freq = np.array([400., 500., 500.])
rec = np.array([5., 1., 4.])
age = np.array([6., 37., 37.])
assert all([r < 0 and not np.isinf(r) and not pd.isnull(r)
for r in ptf._log_A_0(params, freq, rec, age)])