Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def occupancy_precursor_only_nan(il_electricity_cdd_hdd_hourly):
meter_data = il_electricity_cdd_hdd_hourly["meter_data"]
meter_data = meter_data[datetime(2017, 1, 4) : datetime(2017, 6, 1)]
meter_data.iloc[-1] = np.nan
# Simulates a segment where there is only a single nan value
temperature_data = il_electricity_cdd_hdd_hourly["temperature_data"]
time_features = compute_time_features(meter_data.index)
temperature_features = compute_temperature_features(
meter_data.index,
temperature_data,
heating_balance_points=[50],
cooling_balance_points=[65],
degree_day_method="hourly",
)
return merge_features(
[meter_data.value.to_frame("meter_value"), temperature_features, time_features]
)
def segmented_data():
index = pd.date_range(start="2017-01-01", periods=24, freq="H", tz="UTC")
time_features = compute_time_features(index)
segmented_data = pd.DataFrame(
{
"hour_of_week": time_features.hour_of_week,
"temperature_mean": np.linspace(0, 100, 24),
"meter_value": np.linspace(10, 70, 24),
"weight": np.ones((24,)),
},
index=index,
)
return segmented_data
def occupancy_precursor(il_electricity_cdd_hdd_hourly):
meter_data = il_electricity_cdd_hdd_hourly["meter_data"]
temperature_data = il_electricity_cdd_hdd_hourly["temperature_data"]
time_features = compute_time_features(meter_data.index)
temperature_features = compute_temperature_features(
meter_data.index,
temperature_data,
heating_balance_points=[50],
cooling_balance_points=[65],
degree_day_method="hourly",
)
return merge_features(
[meter_data.value.to_frame("meter_value"), temperature_features, time_features]
)
def test_compute_time_features_none():
index = pd.date_range("2017-01-01", periods=168, freq="H", tz="UTC")
with pytest.raises(ValueError):
compute_time_features(
index, hour_of_week=False, day_of_week=False, hour_of_day=False
)
def segmented_data_nans():
num_periods = 200
index = pd.date_range(start="2017-01-01", periods=num_periods, freq="H", tz="UTC")
time_features = compute_time_features(index)
segmented_data = pd.DataFrame(
{
"hour_of_week": time_features.hour_of_week,
"temperature_mean": np.linspace(0, 100, num_periods),
"meter_value": np.linspace(10, 70, num_periods),
"weight": np.ones((num_periods,)),
},
index=index,
)
return segmented_data
def test_compute_time_features_all():
index = pd.date_range("2017-01-01", periods=168, freq="H", tz="UTC")
features = compute_time_features(index)
assert list(features.columns) == ["day_of_week", "hour_of_day", "hour_of_week"]
assert features.shape == (168, 3)
assert features.sum().sum() == 16464.0
with pytest.raises(TypeError): # categoricals
features.day_of_week.sum()
with pytest.raises(TypeError):
features.hour_of_day.sum()
with pytest.raises(TypeError):
features.hour_of_week.sum()
assert features.day_of_week.astype("float").sum() == sum(range(7)) * 24
assert features.hour_of_day.astype("float").sum() == sum(range(24)) * 7
assert features.hour_of_week.astype("float").sum() == sum(range(168))
assert features.index[0] == index[0]
assert features.index[-1] == index[-1]
def test_compute_occupancy_feature_hour_of_week_has_nan(even_occupancy):
index = pd.date_range("2017-01-01", periods=72, freq="H", tz="UTC")
time_features = compute_time_features(index, hour_of_week=True)
hour_of_week = time_features.hour_of_week
hour_of_week.iloc[-1] = np.nan
occupancy = compute_occupancy_feature(hour_of_week, even_occupancy)
assert occupancy.name == "occupancy"
assert occupancy.shape == (72,)
assert occupancy.sum() == 36
def segmented_data_nans_less_than_week():
num_periods = 4
index = pd.date_range(start="2017-01-01", periods=num_periods, freq="H", tz="UTC")
time_features = compute_time_features(index)
segmented_data = pd.DataFrame(
{
"hour_of_week": time_features.hour_of_week,
"temperature_mean": np.linspace(0, 100, num_periods),
"meter_value": np.linspace(10, 70, num_periods),
"weight": np.ones((num_periods,)),
},
index=index,
)
return segmented_data
def caltrack_hourly_prediction_feature_processor(
segment_name, segmented_data, occupancy_lookup, temperature_bins
):
# hour of week feature
hour_of_week_feature = compute_time_features(
segmented_data.index, hour_of_week=True, day_of_week=False, hour_of_day=False
)
# occupancy feature
occupancy = occupancy_lookup[segment_name]
occupancy_feature = compute_occupancy_feature(
hour_of_week_feature.hour_of_week, occupancy
)
# get temperature bin features
temperatures = segmented_data
bin_endpoints_list = (
temperature_bins[segment_name].index[temperature_bins[segment_name]].tolist()
)
temperature_bin_features = compute_temperature_bin_features(
segmented_data.temperature_mean, bin_endpoints_list
def caltrack_hourly_prediction_feature_processor(
segment_name, segmented_data, occupancy_lookup, temperature_bins
):
# hour of week feature
hour_of_week_feature = compute_time_features(segmented_data.index)
# occupancy feature
occupancy = occupancy_lookup[segment_name]
occupancy_feature = compute_occupancy_feature(
hour_of_week_feature.hour_of_week, occupancy
)
# get temperature bin features
temperatures = segmented_data
bin_endpoints_list = (
temperature_bins[segment_name].index[temperature_bins[segment_name]].tolist()
)
# TODO(philngo): combine with compute_temperature_features
temperature_bin_features = compute_temperature_bin_features(
segmented_data.temperature_mean, bin_endpoints_list
)