Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def occupancy_precursor_only_nan(il_electricity_cdd_hdd_hourly):
meter_data = il_electricity_cdd_hdd_hourly["meter_data"]
meter_data = meter_data[datetime(2017, 1, 4) : datetime(2017, 6, 1)]
meter_data.iloc[-1] = np.nan
# Simulates a segment where there is only a single nan value
temperature_data = il_electricity_cdd_hdd_hourly["temperature_data"]
time_features = compute_time_features(meter_data.index)
temperature_features = compute_temperature_features(
meter_data.index,
temperature_data,
heating_balance_points=[50],
cooling_balance_points=[65],
degree_day_method="hourly",
)
return merge_features(
[meter_data.value.to_frame("meter_value"), temperature_features, time_features]
)
def test_merge_features():
index = pd.date_range("2017-01-01", periods=100, freq="H", tz="UTC")
features = merge_features(
[
pd.Series(1, index=index, name="a"),
pd.DataFrame({"b": 2}, index=index),
pd.DataFrame({"c": 3, "d": 4}, index=index),
]
)
assert list(features.columns) == ["a", "b", "c", "d"]
assert features.shape == (100, 4)
assert features.sum().sum() == 1000
assert features.a.sum() == 100
assert features.b.sum() == 200
assert features.c.sum() == 300
assert features.d.sum() == 400
assert features.index[0] == index[0]
assert features.index[-1] == index[-1]
def occupancy_precursor(il_electricity_cdd_hdd_hourly):
meter_data = il_electricity_cdd_hdd_hourly["meter_data"]
temperature_data = il_electricity_cdd_hdd_hourly["temperature_data"]
time_features = compute_time_features(meter_data.index)
temperature_features = compute_temperature_features(
meter_data.index,
temperature_data,
heating_balance_points=[50],
cooling_balance_points=[65],
degree_day_method="hourly",
)
return merge_features(
[meter_data.value.to_frame("meter_value"), temperature_features, time_features]
)
def test_merge_features_empty_raises():
with pytest.raises(ValueError):
features = merge_features([])
occupancy = occupancy_lookup[segment_name]
occupancy_feature = compute_occupancy_feature(
hour_of_week_feature.hour_of_week, occupancy
)
# get temperature bin features
temperatures = segmented_data
bin_endpoints_list = (
temperature_bins[segment_name].index[temperature_bins[segment_name]].tolist()
)
temperature_bin_features = compute_temperature_bin_features(
segmented_data.temperature_mean, bin_endpoints_list
)
# combine features
return merge_features(
[
hour_of_week_feature,
occupancy_feature,
temperature_bin_features,
segmented_data.weight,
]
if temperature_file is not None:
gzipped = temperature_file.name.endswith(".gz")
temperature_data = temperature_data_from_csv(
temperature_file, gzipped=gzipped, freq="hourly"
)
else:
raise click.ClickException("Temperature data not specified.")
usage_per_day = compute_usage_per_day_feature(meter_data)
temperature_features = compute_temperature_features(
meter_data.index,
temperature_data,
heating_balance_points=heating_balance_points,
cooling_balance_points=cooling_balance_points,
)
return merge_features([usage_per_day, temperature_features])
# get occupied feature
hour_of_week = segmented_data.hour_of_week
occupancy = occupancy_lookup[segment_name]
occupancy_feature = compute_occupancy_feature(hour_of_week, occupancy)
# get temperature bin features
temperatures = segmented_data.temperature_mean
bin_endpoints_list = (
temperature_bins[segment_name].index[temperature_bins[segment_name]].tolist()
)
temperature_bin_features = compute_temperature_bin_features(
segmented_data.temperature_mean, bin_endpoints_list
)
# combine features
return merge_features(
[
segmented_data[["meter_value", "hour_of_week"]],
occupancy_feature,
temperature_bin_features,
segmented_data.weight,
]
Returns
-------
design_matrix : :any:`pandas.DataFrame`
A design matrix with meter_value, hour_of_week, hdd_50, and cdd_65 features.
"""
time_features = compute_time_features(
meter_data.index, hour_of_week=True, hour_of_day=False, day_of_week=False
)
temperature_features = compute_temperature_features(
meter_data.index,
temperature_data,
heating_balance_points=[50],
cooling_balance_points=[65],
degree_day_method="hourly",
)
design_matrix = merge_features(
[meter_data.value.to_frame("meter_value"), temperature_features, time_features]
)
return design_matrix
hour_of_week = segmented_data.hour_of_week
occupancy = occupancy_lookup[segment_name]
occupancy_feature = compute_occupancy_feature(hour_of_week, occupancy)
# get temperature bin features
temperatures = segmented_data.temperature_mean
bin_endpoints_list = (
temperature_bins[segment_name].index[temperature_bins[segment_name]].tolist()
)
# TODO(philngo): combine with compute_temperature_features
temperature_bin_features = compute_temperature_bin_features(
segmented_data.temperature_mean, bin_endpoints_list
)
# combine features
return merge_features(
[
segmented_data[["meter_value", "hour_of_week"]],
occupancy_feature,
temperature_bin_features,
segmented_data.weight,
]