Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_compute_temperature_features_billing_bimonthly_bad_degree_days(
il_electricity_cdd_hdd_billing_bimonthly
):
meter_data = il_electricity_cdd_hdd_billing_bimonthly["meter_data"]
temperature_data = il_electricity_cdd_hdd_billing_bimonthly["temperature_data"]
with pytest.raises(ValueError):
compute_temperature_features(
meter_data.index,
temperature_data,
heating_balance_points=[60, 61],
cooling_balance_points=[65, 66],
degree_day_method="UNKNOWN",
)
def test_compute_temperature_features_shorter_temperature_data(
il_electricity_cdd_hdd_daily
):
meter_data = il_electricity_cdd_hdd_daily["meter_data"]
temperature_data = il_electricity_cdd_hdd_daily["temperature_data"]
# drop some data
temperature_data = temperature_data[:-200]
df = compute_temperature_features(meter_data.index, temperature_data)
assert df.shape == (810, 3)
assert list(sorted(df.columns)) == [
"n_days_dropped",
"n_days_kept",
"temperature_mean",
]
assert round(df.temperature_mean.sum()) == 43958.0
def test_compute_temperature_features_empty_meter_data():
index = pd.DatetimeIndex([], tz="UTC", name="dt", freq="H")
temperature_data = pd.Series({"value": 0}, index=index)
result_index = temperature_data.resample("D").sum().index
meter_data_hack = pd.DataFrame({"value": []}, index=result_index)
meter_data_hack.index.freq = None
df = compute_temperature_features(
meter_data_hack.index,
temperature_data,
heating_balance_points=[65],
cooling_balance_points=[65],
degree_day_method="daily",
use_mean_daily_values=False,
)
assert df.shape == (0, 3)
assert list(sorted(df.columns)) == [
"n_days_dropped",
"n_days_kept",
"temperature_mean",
]
assert round(df.temperature_mean.sum()) == 0
def test_compute_temperature_features_daily_daily_degree_days(
il_electricity_cdd_hdd_daily, snapshot
):
meter_data = il_electricity_cdd_hdd_daily["meter_data"]
temperature_data = il_electricity_cdd_hdd_daily["temperature_data"]
df = compute_temperature_features(
meter_data.index,
temperature_data,
heating_balance_points=[60, 61],
cooling_balance_points=[65, 66],
temperature_mean=False,
degree_day_method="daily",
)
assert df.shape == (810, 6)
assert list(sorted(df.columns)) == [
"cdd_65",
"cdd_66",
"hdd_60",
"hdd_61",
"n_days_dropped",
"n_days_kept",
]
def test_compute_temperature_features_billing_monthly_bad_degree_day_method(
il_electricity_cdd_hdd_billing_monthly
):
meter_data = il_electricity_cdd_hdd_billing_monthly["meter_data"]
temperature_data = il_electricity_cdd_hdd_billing_monthly["temperature_data"]
with pytest.raises(ValueError):
compute_temperature_features(
meter_data.index,
temperature_data,
heating_balance_points=[60, 61],
cooling_balance_points=[65, 66],
degree_day_method="UNKNOWN",
)
def test_compute_temperature_features_daily_daily_degree_days_use_mean_false(
il_electricity_cdd_hdd_daily, snapshot
):
meter_data = il_electricity_cdd_hdd_daily["meter_data"]
temperature_data = il_electricity_cdd_hdd_daily["temperature_data"]
df = compute_temperature_features(
meter_data.index,
temperature_data,
heating_balance_points=[60, 61],
cooling_balance_points=[65, 66],
temperature_mean=False,
degree_day_method="daily",
use_mean_daily_values=False,
)
assert df.shape == (810, 6)
assert list(sorted(df.columns)) == [
"cdd_65",
"cdd_66",
"hdd_60",
"hdd_61",
"n_days_dropped",
"n_days_kept",
def test_caltrack_merge_temperatures_insufficient_temperature_per_period(
baseline_meter_data_billing, baseline_temperature_data
):
baseline_temperature_data_missing = baseline_temperature_data.copy(deep=True)
baseline_temperature_data_missing.iloc[: (4 * 24)] = np.nan
# test without percent_hourly_coverage_per_billing_period constraint
temperature_features_no_constraint = compute_temperature_features(
baseline_meter_data_billing.index,
baseline_temperature_data_missing,
heating_balance_points=range(40, 81),
cooling_balance_points=range(50, 91),
data_quality=True,
keep_partial_nan_rows=False,
percent_hourly_coverage_per_billing_period=0,
)
assert temperature_features_no_constraint["n_days_kept"].isnull().sum() == 0
# test with default percent_hourly_coverage_per_billing_period=0.9 constraint
temperature_features_with_constraint = compute_temperature_features(
baseline_meter_data_billing.index,
baseline_temperature_data_missing,
heating_balance_points=range(40, 81),
def test_compute_temperature_features_billing_monthly_daily_degree_days_use_mean_false(
il_electricity_cdd_hdd_billing_monthly, snapshot
):
meter_data = il_electricity_cdd_hdd_billing_monthly["meter_data"]
temperature_data = il_electricity_cdd_hdd_billing_monthly["temperature_data"]
df = compute_temperature_features(
meter_data.index,
temperature_data,
heating_balance_points=[60, 61],
cooling_balance_points=[65, 66],
temperature_mean=False,
degree_day_method="daily",
use_mean_daily_values=False,
)
assert df.shape == (27, 6)
assert list(sorted(df.columns)) == [
"cdd_65",
"cdd_66",
"hdd_60",
"hdd_61",
"n_days_dropped",
"n_days_kept",
Parameters
----------
meter_data : :any:`pandas.DataFrame`
Hourly meter data in eemeter format.
temperature_data : :any:`pandas.Series`
Hourly temperature data in eemeter format.
Returns
-------
design_matrix : :any:`pandas.DataFrame`
A design matrix with meter_value, hour_of_week, hdd_50, and cdd_65 features.
"""
time_features = compute_time_features(
meter_data.index, hour_of_week=True, hour_of_day=False, day_of_week=False
)
temperature_features = compute_temperature_features(
meter_data.index,
temperature_data,
heating_balance_points=[50],
cooling_balance_points=[65],
degree_day_method="hourly",
)
design_matrix = merge_features(
[meter_data.value.to_frame("meter_value"), temperature_features, time_features]
)
return design_matrix
Parameters
----------
meter_data : :any:`pandas.DataFrame`
Hourly meter data in eemeter format.
temperature_data : :any:`pandas.Series`
Hourly temperature data in eemeter format.
Returns
-------
design_matrix : :any:`pandas.DataFrame`
A design matrics with mean usage_per_day, hdd_30-hdd_90, and cdd_30-cdd_90
features.
"""
usage_per_day = compute_usage_per_day_feature(meter_data, series_name="meter_value")
temperature_features = compute_temperature_features(
meter_data.index,
temperature_data,
heating_balance_points=range(30, 91),
cooling_balance_points=range(30, 91),
data_quality=True,
tolerance=pd.Timedelta(
"35D"
), # limit temperature data matching to periods of up to 35 days.
)
design_matrix = merge_features([usage_per_day, temperature_features])
return design_matrix