Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_min_contiguous_months(input_df):
min_contiguous_months = 9
model = HourlyDayOfWeekModel(min_contiguous_months=min_contiguous_months)
with pytest.raises(model_exceptions.DataSufficiencyException) as sufficiency_exception:
model.fit(input_df)
def meets_sufficiency_or_error(self, df):
if len(df) < self.min_contiguous_months * 30 * 24:
raise model_exceptions.\
DataSufficiencyException("Min Contigous Month criteria not satisifed: Min Months Reqd: " +
str(self.min_contiguous_months))
direction=direction,
req=_n,
mp=mp_type,
upd_n=upd_n_non_nan,
temp_n=temp_n_non_nan
)
)
if reason is not None:
raise model_exceptions.DataSufficiencyException(
'Data does not meet minimum contiguous months requirement. {}'
.format(reason)
)
if not np.nansum(upd) > 0.01:
raise model_exceptions.DataSufficiencyException(
"Energy trace data is all or nearly all zero")
return
def meets_sufficiency_or_error(self, df):
if np.sum(np.isfinite(df['usage'])) < self.min_fraction_coverage * len(df):
raise model_exceptions.DataSufficiencyException("Insufficient coverage")
if len(df) < self.min_contiguous_months * 30:
raise model_exceptions.DataSufficiencyException("Insufficient data")
return
def meets_sufficiency_or_error(self, df):
if np.sum(np.isfinite(df['usage'])) < self.min_fraction_coverage * len(df):
raise model_exceptions.DataSufficiencyException("Insufficient coverage")
if len(df) < self.min_contiguous_months * 30:
raise model_exceptions.DataSufficiencyException("Insufficient data")
return
def billing_to_monthly_avg(self, trace_and_temp):
''' Helper function to handle monthly billing or other irregular data.
'''
(energy_data, temp_data) = trace_and_temp
# Handle empty series
if energy_data.empty:
raise model_exceptions.DataSufficiencyException("No energy trace data")
if temp_data.empty:
raise model_exceptions.DataSufficiencyException("No temperature data")
# Convert billing multiindex to straight index
temp_data.index = temp_data.index.droplevel()
# Resample temperature data to daily
temp_data_daily = temp_data.resample('D').apply(np.mean)[0]
# Drop any duplicate indices
energy_data = energy_data[
~energy_data.index.duplicated(keep='last')].sort_index()
# Check for empty series post-resampling and deduplication
if energy_data.empty:
raise model_exceptions.DataSufficiencyException(
def billing_to_monthly_avg(self, trace_and_temp):
''' Helper function to handle monthly billing or other irregular data.
'''
(energy_data, temp_data) = trace_and_temp
# Handle empty series
if energy_data.empty:
raise model_exceptions.DataSufficiencyException("No energy trace data")
if temp_data.empty:
raise model_exceptions.DataSufficiencyException("No temperature data")
# Convert billing multiindex to straight index
temp_data.index = temp_data.index.droplevel()
# Resample temperature data to daily
temp_data_daily = temp_data.resample('D').apply(np.mean)[0]
# Drop any duplicate indices
energy_data = energy_data[
~energy_data.index.duplicated(keep='last')].sort_index()
# Check for empty series post-resampling and deduplication
if energy_data.empty:
raise model_exceptions.DataSufficiencyException(
"No energy trace data after deduplication")
if temp_data_daily.empty:
# Convert billing multiindex to straight index
temp_data.index = temp_data.index.droplevel()
# Resample temperature data to daily
temp_data_daily = temp_data.resample('D').apply(np.mean)[0]
# Drop any duplicate indices
energy_data = energy_data[
~energy_data.index.duplicated(keep='last')].sort_index()
# Check for empty series post-resampling and deduplication
if energy_data.empty:
raise model_exceptions.DataSufficiencyException(
"No energy trace data after deduplication")
if temp_data_daily.empty:
raise model_exceptions.DataSufficiencyException(
"No temperature data after resampling")
# get daily mean values
upd_data_daily_mean_values = [
value / (e - s).days for value, s, e in
zip(energy_data, energy_data.index, energy_data.index[1:])
] + [np.nan] # add missing last data point, which is null by convention anyhow
usage_data_daily_mean_values = [
value for value, s, e in
zip(energy_data, energy_data.index, energy_data.index[1:])
] + [np.nan] # add missing last data point, which is null by convention anyhow
# Create arrays to hold computed CDD and HDD for each
# balance point temperature.
cdd = {i: [0] for i in self.bp_cdd}
hdd = {i: [0] for i in self.bp_hdd}