How to use the lifetimes.utils.summary_data_from_transaction_data function in Lifetimes

To help you get started, we’ve selected a few Lifetimes examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github CamDavidsonPilon / lifetimes / tests / test_estimation.py View on Github external
def test_purchase_predictions_do_not_differ_much_if_looking_at_hourly_or_daily_frequencies(self):
        transaction_data = load_transaction_data(parse_dates=['date'])
        daily_summary = utils.summary_data_from_transaction_data(transaction_data, 'id', 'date', observation_period_end=max(transaction_data.date), freq='D')
        hourly_summary = utils.summary_data_from_transaction_data(transaction_data, 'id', 'date', observation_period_end=max(transaction_data.date), freq='h')
        thirty_days = 30
        hours_in_day = 24
        mbfg = estimation.ModifiedBetaGeoFitter()

        np.random.seed(0)
        mbfg.fit(daily_summary['frequency'], daily_summary['recency'], daily_summary['T'])
        thirty_day_prediction_from_daily_data = mbfg.expected_number_of_purchases_up_to_time(thirty_days)

        np.random.seed(0)
        mbfg.fit(hourly_summary['frequency'], hourly_summary['recency'], hourly_summary['T'])
        thirty_day_prediction_from_hourly_data = mbfg.expected_number_of_purchases_up_to_time(thirty_days * hours_in_day)

        npt.assert_almost_equal(thirty_day_prediction_from_daily_data, thirty_day_prediction_from_hourly_data)
github CamDavidsonPilon / lifetimes / tests / test_utils.py View on Github external
freq_multiplier = 1 working and compare with tested data for last 4 records.

    dates = ['1997-01-11', '1997-01-12', '1997-01-13', '1997-01-14']
    actual_trans = [11, 12, 15, 19]
    expected_trans = [10.67, 12.67, 14.87, 17.24]

    """
    datetime_col = 'date'
    customer_id_col = 'id_sample'
    t = 14
    datetime_format = '%Y%m%d'
    freq = 'D'
    observation_period_end = '19970930'
    freq_multiplier = 1

    transactions_summary = utils.summary_data_from_transaction_data(
        cdnow_transactions, customer_id_col, datetime_col,
        datetime_format=datetime_format, freq=freq,
        freq_multiplier=freq_multiplier,
        observation_period_end=observation_period_end)

    transactions_summary = transactions_summary.reset_index()

    model = BetaGeoFitter()
    model.fit(transactions_summary['frequency'],
              transactions_summary['recency'],
              transactions_summary['T'])

    df_cum = utils.expected_cumulative_transactions(
        model, cdnow_transactions, datetime_col, customer_id_col, t,
        datetime_format, freq, set_index_date=True,
        freq_multiplier=freq_multiplier)
github CamDavidsonPilon / lifetimes / tests / test_utils.py View on Github external
def df_cum_transactions(cdnow_transactions):
    datetime_col = 'date'
    customer_id_col = 'id_sample'
    t = 25 * 7
    datetime_format = '%Y%m%d'
    freq = 'D'
    observation_period_end = '19970930'
    freq_multiplier = 7

    transactions_summary = utils.summary_data_from_transaction_data(
        cdnow_transactions, customer_id_col, datetime_col,
        datetime_format=datetime_format, freq=freq, freq_multiplier=freq_multiplier,
        observation_period_end=observation_period_end)

    transactions_summary = transactions_summary.reset_index()

    model = ParetoNBDFitter()
    model.fit(transactions_summary['frequency'],
              transactions_summary['recency'],
              transactions_summary['T'])

    df_cum = utils.expected_cumulative_transactions(
        model, cdnow_transactions, datetime_col, customer_id_col, t,
        datetime_format, freq, set_index_date=False, freq_multiplier=freq_multiplier)
    return df_cum
github CamDavidsonPilon / lifetimes / tests / test_utils.py View on Github external
def test_summary_data_from_transaction_data_squashes_period_purchases_to_one_purchase():
    transactions = pd.DataFrame([[1, '2015-01-01'], [1, '2015-01-01']], columns=['id', 't'])
    actual = utils.summary_data_from_transaction_data(transactions, 'id', 't', freq='W')
    assert actual.loc[1]['frequency'] == 1. - 1.
github CamDavidsonPilon / lifetimes / tests / test_utils.py View on Github external
def test_summary_date_from_transaction_data_with_specific_non_daily_frequency(large_transaction_level_data):
    today = '20150207'
    actual = utils.summary_data_from_transaction_data(large_transaction_level_data, 'id', 'date', observation_period_end=today, freq='W')
    expected = pd.DataFrame([[1, 1., 5., 5.],
                             [2, 0., 0., 5.],
                             [3, 1., 1., 5.],
                             [4, 1., 3., 3.],
                             [5, 0., 0., 3.],
                             [6, 0., 0., 0.]], columns=['id', 'frequency', 'recency', 'T']).set_index('id')
    assert_frame_equal(actual, expected)
github CamDavidsonPilon / lifetimes / tests / test_utils.py View on Github external
def test_summary_data_from_transaction_data_will_choose_the_correct_first_order_to_drop_in_monetary_transactions():
    # this is the correct behaviour. See https://github.com/CamDavidsonPilon/lifetimes/issues/85
    # and test_summary_statistics_are_indentical_to_hardies_paper_confirming_correct_aggregations
    cust = pd.Series([2, 2, 2])
    dates_ordered = pd.to_datetime(pd.Series([
                  '2014-03-14 00:00:00',
                  '2014-04-09 00:00:00',
                  '2014-05-21 00:00:00']))
    sales = pd.Series([10, 20, 25])
    transaction_data = pd.DataFrame({'date': dates_ordered, 'id': cust, 'sales': sales})
    summary_ordered_data = utils.summary_data_from_transaction_data(transaction_data, 'id', 'date', 'sales')

    dates_unordered = pd.to_datetime(pd.Series([
                  '2014-04-09 00:00:00',
                  '2014-03-14 00:00:00',
                  '2014-05-21 00:00:00']))
    sales = pd.Series([20, 10, 25])
    transaction_data = pd.DataFrame({'date': dates_unordered, 'id': cust, 'sales': sales})
    summary_unordered_data = utils.summary_data_from_transaction_data(transaction_data, 'id', 'date', 'sales')

    assert_frame_equal(summary_ordered_data, summary_unordered_data)
    assert summary_ordered_data['monetary_value'].loc[2] == 22.5
github CamDavidsonPilon / lifetimes / tests / test_utils.py View on Github external
def test_summary_statistics_are_indentical_to_hardies_paper_confirming_correct_aggregations():
    # see http://brucehardie.com/papers/rfm_clv_2005-02-16.pdf
    # RFM and CLV: Using Iso-value Curves for Customer Base Analysis
    df = pd.read_csv('lifetimes/datasets/CDNOW_sample.txt', sep='\s+', header=None, names=['_id', 'id', 'date', 'cds_bought', 'spent'])
    df['date'] = pd.to_datetime(df['date'], format='%Y%m%d')
    df_train = df[df['date'] < '1997-10-01']
    summary = utils.summary_data_from_transaction_data(df_train, 'id', 'date', 'spent')
    results = summary[summary['frequency'] > 0]['monetary_value'].describe()

    assert np.round(results.loc['mean']) == 35
    assert np.round(results.loc['std']) == 30
    assert np.round(results.loc['min']) == 3
    assert np.round(results.loc['50%']) == 27
    assert np.round(results.loc['max']) == 300
    assert np.round(results.loc['count']) == 946
github CamDavidsonPilon / lifetimes / tests / test_utils.py View on Github external
def test_summary_date_from_transaction_with_monetary_values(large_transaction_level_data_with_monetary_value):
    today = '20150207'
    actual = utils.summary_data_from_transaction_data(large_transaction_level_data_with_monetary_value, 'id', 'date', monetary_value_col='monetary_value', observation_period_end=today)
    expected = pd.DataFrame([[1, 1., 36., 37., 2],
                             [2, 0.,  0., 37., 0],
                             [3, 2.,  4., 37., 3],
                             [4, 2., 20., 22., 3],
                             [5, 2.,  2., 22., 4.5],
                             [6, 0.,  0.,  5., 0]], columns=['id', 'frequency', 'recency', 'T', 'monetary_value']).set_index('id')
    assert_frame_equal(actual, expected)
github CamDavidsonPilon / lifetimes / tests / test_generate_data.py View on Github external
def test_beta_geometric_nbd_model_transactional_data(T, r, alpha, a, b, observation_period_end, freq, size):
    np.random.seed(188898)
    transaction_data = beta_geometric_nbd_model_transactional_data(
        T=T,r=r,alpha=alpha,a=a,b=b, observation_period_end=observation_period_end, freq=freq, size=size
    )
    actual = summary_data_from_transaction_data(transactions=transaction_data,
                                                customer_id_col='customer_id', datetime_col='date',
                                                observation_period_end=observation_period_end,
                                                freq=freq)
    np.random.seed(188898)
    expected = beta_geometric_nbd_model(T=T,r=r,alpha=alpha,a=a,b=b,size=size)[['frequency', 'recency', 'T']]
    expected['recency'] = expected['recency'].apply(np.ceil)
    expected = expected.reset_index(drop=True)
    actual = actual.reset_index(drop=True)
    assert expected.equals(actual)
github CamDavidsonPilon / lifetimes / tests / test_utils.py View on Github external
def test_summary_data_from_transaction_data_returns_correct_results(transaction_level_data):
    today = '2015-02-07'
    actual = utils.summary_data_from_transaction_data(transaction_level_data, 'id', 'date', observation_period_end=today)
    expected = pd.DataFrame([[1, 1., 5., 6.],
                             [2, 0., 0., 37.],
                             [3, 2., 4., 37.]], columns=['id', 'frequency', 'recency', 'T']).set_index('id')
    assert_frame_equal(actual, expected)