Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_penalizer_term_will_shrink_coefs_to_0(self, cdnow_customers):
bfg_no_penalizer = estimation.BetaGeoFitter()
bfg_no_penalizer.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'])
params_1 = np.array(list(bfg_no_penalizer.params_.values()))
bfg_with_penalizer = estimation.BetaGeoFitter(penalizer_coef=0.1)
bfg_with_penalizer.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'])
params_2 = np.array(list(bfg_with_penalizer.params_.values()))
assert np.all(params_2 < params_1)
bfg_with_more_penalizer = estimation.BetaGeoFitter(penalizer_coef=10)
bfg_with_more_penalizer.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'])
params_3 = np.array(list(bfg_with_more_penalizer.params_.values()))
assert np.all(params_3 < params_2)
cdnow_customers_weights = cdnow_customers.copy()
cdnow_customers_weights['weights'] = 1.0
cdnow_customers_weights = cdnow_customers_weights.groupby(['frequency', 'recency', 'T'])['weights'].sum()
cdnow_customers_weights = cdnow_customers_weights.reset_index()
assert (cdnow_customers_weights['weights'] > 1).any()
bgf_weights = estimation.BetaGeoFitter(penalizer_coef=0.0)
bgf_weights.fit(
cdnow_customers_weights['frequency'],
cdnow_customers_weights['recency'],
cdnow_customers_weights['T'],
weights=cdnow_customers_weights['weights']
)
bgf_no_weights = estimation.BetaGeoFitter(penalizer_coef=0.0)
bgf_no_weights.fit(
cdnow_customers['frequency'],
cdnow_customers['recency'],
cdnow_customers['T']
)
npt.assert_almost_equal(
np.array(bgf_no_weights._unload_params('r', 'alpha', 'a', 'b')),
np.array(bgf_weights._unload_params('r', 'alpha', 'a', 'b')),
decimal=4)
def test_save_load_bgnbd_no_data(self, cdnow_customers):
"""Test saving and loading model for BG/NBD without data."""
bgf = estimation.BetaGeoFitter(penalizer_coef=0.0)
bgf.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'])
bgf.save_model(PATH_SAVE_BGNBD_MODEL, save_data=False)
bgf_new = estimation.BetaGeoFitter()
bgf_new.load_model(PATH_SAVE_BGNBD_MODEL)
assert bgf_new.__dict__['penalizer_coef'] == bgf.__dict__['penalizer_coef']
assert bgf_new.__dict__['_scale'] == bgf.__dict__['_scale']
assert bgf_new.__dict__['params_'] == bgf.__dict__['params_']
assert bgf_new.__dict__['_negative_log_likelihood_'] == bgf.__dict__['_negative_log_likelihood_']
assert bgf_new.__dict__['predict'](1, 1, 2, 5) == bgf.__dict__['predict'](1, 1, 2, 5)
assert bgf_new.expected_number_of_purchases_up_to_time(1) == bgf.expected_number_of_purchases_up_to_time(1)
assert bgf_new.__dict__['data'] is None
# remove saved model
os.remove(PATH_SAVE_BGNBD_MODEL)
def test_expectation_returns_same_value_Hardie_excel_sheet(self, cdnow_customers):
bfg = estimation.BetaGeoFitter()
bfg.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'], tol=1e-6)
times = np.array([0.1429, 1.0, 3.00, 31.8571, 32.00, 78.00])
expected = np.array([0.0078 ,0.0532 ,0.1506 ,1.0405,1.0437, 1.8576])
actual = bfg.expected_number_of_purchases_up_to_time(times)
npt.assert_array_almost_equal(actual, expected, decimal=3)
def test_penalizer_term_will_shrink_coefs_to_0(self, cdnow_customers):
bfg_no_penalizer = estimation.BetaGeoFitter()
bfg_no_penalizer.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'])
params_1 = np.array(list(bfg_no_penalizer.params_.values()))
bfg_with_penalizer = estimation.BetaGeoFitter(penalizer_coef=0.1)
bfg_with_penalizer.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'])
params_2 = np.array(list(bfg_with_penalizer.params_.values()))
assert np.all(params_2 < params_1)
bfg_with_more_penalizer = estimation.BetaGeoFitter(penalizer_coef=10)
bfg_with_more_penalizer.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'])
params_3 = np.array(list(bfg_with_more_penalizer.params_.values()))
assert np.all(params_3 < params_2)
def test_penalizer_term_will_shrink_coefs_to_0(self, cdnow_customers):
bfg_no_penalizer = estimation.BetaGeoFitter()
bfg_no_penalizer.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'])
params_1 = np.array(list(bfg_no_penalizer.params_.values()))
bfg_with_penalizer = estimation.BetaGeoFitter(penalizer_coef=0.1)
bfg_with_penalizer.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'])
params_2 = np.array(list(bfg_with_penalizer.params_.values()))
assert np.all(params_2 < params_1)
bfg_with_more_penalizer = estimation.BetaGeoFitter(penalizer_coef=10)
bfg_with_more_penalizer.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'])
params_3 = np.array(list(bfg_with_more_penalizer.params_.values()))
assert np.all(params_3 < params_2)
def test_conditional_probability_alive_is_between_0_and_1(self, cdnow_customers):
bfg = estimation.BetaGeoFitter()
bfg.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'])
for i in range(0, 100, 10):
for j in range(0, 100, 10):
for k in range(j, 100, 10):
assert 0 <= bfg.conditional_probability_alive(i, j, k) <= 1.0
def test_conditional_probability_alive_returns_1_if_no_repeat_purchases(self, cdnow_customers):
bfg = estimation.BetaGeoFitter()
bfg.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'])
assert bfg.conditional_probability_alive(0, 1, 1) == 1.0
def test_no_runtime_warnings_high_frequency(self, cdnow_customers):
old_settings = np.seterr(all='raise')
bgf = estimation.BetaGeoFitter(penalizer_coef=0.0)
bgf.fit(
cdnow_customers['frequency'],
cdnow_customers['recency'],
cdnow_customers['T'],
index=None
)
p_alive = bgf.conditional_probability_alive(frequency=1000, recency=10, T=100)
np.seterr(**old_settings)
assert p_alive == 0.
def test_using_weights_col_gives_correct_results(self, cdnow_customers):
cdnow_customers_weights = cdnow_customers.copy()
cdnow_customers_weights['weights'] = 1.0
cdnow_customers_weights = cdnow_customers_weights.groupby(['frequency', 'recency', 'T'])['weights'].sum()
cdnow_customers_weights = cdnow_customers_weights.reset_index()
assert (cdnow_customers_weights['weights'] > 1).any()
bgf_weights = estimation.BetaGeoFitter(penalizer_coef=0.0)
bgf_weights.fit(
cdnow_customers_weights['frequency'],
cdnow_customers_weights['recency'],
cdnow_customers_weights['T'],
weights=cdnow_customers_weights['weights']
)
bgf_no_weights = estimation.BetaGeoFitter(penalizer_coef=0.0)
bgf_no_weights.fit(
cdnow_customers['frequency'],
cdnow_customers['recency'],
cdnow_customers['T']
)
npt.assert_almost_equal(