Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_plot_calibration_purchases_vs_holdout_purchases(self, transaction_data, bgf):
holdout_expected = [0.161, 0.233, 0.348, 0.544, 0.710, 0.704, 1.606]
predictions_expected = [0.270, 0.294, 0.402, 0.422, 0.706, 0.809, 1.019]
labels = ['frequency_holdout', 'model_predictions']
summary = utils.calibration_and_holdout_data(transaction_data, 'id', 'date', '2014-09-01', '2014-12-31')
bgf.fit(summary['frequency_cal'], summary['recency_cal'], summary['T_cal'])
ax = plotting.plot_calibration_purchases_vs_holdout_purchases(bgf, summary)
lines = ax.lines
legend = ax.legend_
holdout = lines[0].get_data()[1]
predictions = lines[1].get_data()[1]
assert_allclose(holdout, holdout_expected, atol=0.01)
assert_allclose(predictions, predictions_expected, atol=0.01)
assert_array_equal([e.get_text() for e in legend.get_texts()], labels)
assert_equal(ax.title.get_text(), "Actual Purchases in Holdout Period vs Predicted Purchases")
assert_equal(ax.xaxis.get_label().get_text(), "Purchases in calibration period")
assert_equal(ax.yaxis.get_label().get_text(), "Average of Purchases in Holdout Period")
plt.close()
def test_calibration_and_holdout_data(large_transaction_level_data):
today = '2015-02-07'
calibration_end = '2015-02-01'
actual = utils.calibration_and_holdout_data(large_transaction_level_data, 'id', 'date', calibration_end, observation_period_end=today)
assert actual.loc[1]['frequency_holdout'] == 1
assert actual.loc[2]['frequency_holdout'] == 0
with pytest.raises(KeyError):
actual.loc[6]
[1, '2015-01-01'],
[1, '2015-02-06'], # excluded from both holdout and calibration
[2, '2015-01-01'],
[3, '2015-01-01'],
[3, '2015-01-02'],
[3, '2015-01-05'],
[4, '2015-01-16'],
[4, '2015-02-02'],
[4, '2015-02-05'], # excluded from both holdout and calibration
[5, '2015-01-16'],
[5, '2015-01-17'],
[5, '2015-01-18'],
[6, '2015-02-02'],
]
transactions = pd.DataFrame(d, columns=['id', 'date'])
actual = utils.calibration_and_holdout_data(transactions, 'id', 'date', calibration_period_end='2015-02-01', observation_period_end='2015-02-04')
assert actual['frequency_holdout'].loc[1] == 0
assert actual['frequency_holdout'].loc[4] == 1
def test_calibration_and_holdout_data_with_monetary_value(large_transaction_level_data_with_monetary_value):
today = '2015-02-07'
calibration_end = '2015-02-01'
actual = utils.calibration_and_holdout_data(large_transaction_level_data_with_monetary_value,
'id',
'date',
calibration_end,
observation_period_end=today,
monetary_value_col='monetary_value')
assert (actual['monetary_value_cal'] == [0, 0, 3, 0, 4.5]).all()
assert (actual['monetary_value_holdout'] == [2, 0, 0, 3, 0]).all()
def test_calibration_and_holdout_data_works_with_specific_frequency(large_transaction_level_data):
today = '2015-02-07'
calibration_end = '2015-02-01'
actual = utils.calibration_and_holdout_data(large_transaction_level_data, 'id', 'date', calibration_end, observation_period_end=today, freq='W')
expected_cols = ['id', 'frequency_cal', 'recency_cal', 'T_cal', 'frequency_holdout', 'duration_holdout']
expected = pd.DataFrame([[1, 0., 0., 4., 1, 1],
[2, 0., 0., 4., 0, 1],
[3, 1., 1., 4., 0, 1],
[4, 0., 0., 2., 1, 1],
[5, 0., 0., 2., 0, 1]], columns=expected_cols).set_index('id')
assert_frame_equal(actual, expected, check_dtype=False)