Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
grouped_data = self._transform_target(self._left_pad_data, dataset)
# splits test dataset with rolling date into N R^d time series where
# N is the number of rolling evaluation dates
split_dataset = np.split(
grouped_data[FieldName.TARGET], self.num_test_dates
)
all_entries = list()
for dataset_at_test_date in split_dataset:
grouped_data = dict()
grouped_data[FieldName.TARGET] = np.array(
list(dataset_at_test_date), dtype=np.float32
)
grouped_data = self._restrict_max_dimensionality(grouped_data)
grouped_data[FieldName.START] = self.first_timestamp
grouped_data[FieldName.FEAT_STATIC_CAT] = [0]
all_entries.append(grouped_data)
return ListDataset(
all_entries, freq=self.frequency, one_dim_target=False
)
def create_transformation(self) -> Transformation:
return Chain(
[
AsNumpyArray(field=FieldName.TARGET, expected_ndim=1),
AddTimeFeatures(
start_field=FieldName.START,
target_field=FieldName.TARGET,
output_field=FieldName.FEAT_TIME,
time_features=self.time_features,
pred_length=self.prediction_length,
),
SetFieldIfNotPresent(
field=FieldName.FEAT_STATIC_CAT, value=[0.0]
),
AsNumpyArray(field=FieldName.FEAT_STATIC_CAT, expected_ndim=1),
CanonicalInstanceSplitter(
target_field=FieldName.TARGET,
is_pad_field=FieldName.IS_PAD,
start_field=FieldName.START,
forecast_start_field=FieldName.FORECAST_START,
instance_sampler=TestSplitSampler(),
time_series_fields=[FieldName.FEAT_TIME],
instance_length=self.context_length,
use_prediction_features=True,
prediction_length=self.prediction_length,
),
transform.AddTimeFeatures(
start_field=FieldName.START,
target_field=FieldName.TARGET,
output_field=FieldName.FEAT_TIME,
time_features=time_features_from_frequency_str(self.freq),
pred_length=self.prediction_length,
),
transform.VstackFeatures(
output_field=FieldName.FEAT_DYNAMIC_REAL,
input_fields=[FieldName.FEAT_TIME],
),
transform.SetFieldIfNotPresent(
field=FieldName.FEAT_STATIC_CAT, value=[0.0]
),
transform.AsNumpyArray(
field=FieldName.FEAT_STATIC_CAT, expected_ndim=1
),
transform.InstanceSplitter(
target_field=FieldName.TARGET,
is_pad_field=FieldName.IS_PAD,
start_field=FieldName.START,
forecast_start_field=FieldName.FORECAST_START,
train_sampler=ExpectedNumInstanceSampler(num_instances=1),
past_length=self.context_length,
future_length=self.prediction_length,
time_series_fields=[FieldName.FEAT_DYNAMIC_REAL],
),
time_features=time_features_from_frequency_str(self.freq),
pred_length=self.prediction_length,
),
AddAgeFeature(
target_field=FieldName.TARGET,
output_field=FieldName.FEAT_AGE,
pred_length=self.prediction_length,
),
VstackFeatures(
output_field=FieldName.FEAT_TIME,
input_fields=[FieldName.FEAT_TIME, FieldName.FEAT_AGE],
),
SetFieldIfNotPresent(
field=FieldName.FEAT_STATIC_CAT, value=[0.0]
),
AsNumpyArray(field=FieldName.FEAT_STATIC_CAT, expected_ndim=1),
InstanceSplitter(
target_field=FieldName.TARGET,
is_pad_field=FieldName.IS_PAD,
start_field=FieldName.START,
forecast_start_field=FieldName.FORECAST_START,
train_sampler=ExpectedNumInstanceSampler(num_instances=1),
past_length=self.context_length,
future_length=pred_length,
output_NTC=False,
time_series_fields=[
FieldName.FEAT_TIME,
FieldName.OBSERVED_VALUES,
],
),
QuantizeScaled(
bin_edges=bin_edges.tolist(),
def create_transformation(self) -> Transformation:
return Chain(
[
AsNumpyArray(field=FieldName.TARGET, expected_ndim=1),
AddTimeFeatures(
start_field=FieldName.START,
target_field=FieldName.TARGET,
output_field=FieldName.FEAT_TIME,
time_features=self.time_features,
pred_length=self.prediction_length,
),
SetFieldIfNotPresent(
field=FieldName.FEAT_STATIC_CAT, value=[0.0]
),
AsNumpyArray(field=FieldName.FEAT_STATIC_CAT, expected_ndim=1),
CanonicalInstanceSplitter(
target_field=FieldName.TARGET,
is_pad_field=FieldName.IS_PAD,
start_field=FieldName.START,
forecast_start_field=FieldName.FORECAST_START,
instance_sampler=TestSplitSampler(),
time_series_fields=[FieldName.FEAT_TIME],
instance_length=self.context_length,
use_prediction_features=True,
prediction_length=self.prediction_length,
),
target_field=FieldName.TARGET,
output_field=FieldName.FEAT_TIME,
time_features=time_features_from_frequency_str(self.freq),
pred_length=self.prediction_length,
),
AddAgeFeature(
target_field=FieldName.TARGET,
output_field=FieldName.FEAT_AGE,
pred_length=self.prediction_length,
),
VstackFeatures(
output_field=FieldName.FEAT_TIME,
input_fields=[FieldName.FEAT_TIME, FieldName.FEAT_AGE],
),
SetFieldIfNotPresent(
field=FieldName.FEAT_STATIC_CAT, value=[0.0]
),
AsNumpyArray(field=FieldName.FEAT_STATIC_CAT, expected_ndim=1),
InstanceSplitter(
target_field=FieldName.TARGET,
is_pad_field=FieldName.IS_PAD,
start_field=FieldName.START,
forecast_start_field=FieldName.FORECAST_START,
train_sampler=ExpectedNumInstanceSampler(num_instances=1),
past_length=self.context_length,
future_length=pred_length,
output_NTC=False,
time_series_fields=[
FieldName.FEAT_TIME,
FieldName.OBSERVED_VALUES,
],
),
# This function generates the test and train json files which will be converted to csv format
if not os.path.exists(os.path.dirname(filename)):
os.makedirs(os.path.dirname(filename))
with open(filename, "w") as json_file:
for ts in time_series:
if is_missing:
target = [] # type: List
# For Forecast don't output feat_static_cat and feat_static_real
for j, val in enumerate(ts[FieldName.TARGET]):
# only add ones that are not missing
if j != 0 and j % num_missing == 0:
target.append(None)
else:
target.append(val)
ts[FieldName.TARGET] = target
ts.pop(FieldName.FEAT_STATIC_CAT, None)
ts.pop(FieldName.FEAT_STATIC_REAL, None)
# Chop features in training set
if (
FieldName.FEAT_DYNAMIC_REAL in ts.keys()
and "train" in filename
):
# TODO: Fix for missing values
for i, feat_dynamic_real in enumerate(
ts[FieldName.FEAT_DYNAMIC_REAL]
):
ts[FieldName.FEAT_DYNAMIC_REAL][i] = feat_dynamic_real[
: len(ts[FieldName.TARGET])
]
json.dump(ts, json_file)
json_file.write("\n")
transform.AsNumpyArray(
field=FieldName.TARGET, expected_ndim=1
),
transform.AddTimeFeatures(
start_field=FieldName.START,
target_field=FieldName.TARGET,
output_field=FieldName.FEAT_TIME,
time_features=time_features_from_frequency_str(self.freq),
pred_length=self.prediction_length,
),
transform.VstackFeatures(
output_field=FieldName.FEAT_DYNAMIC_REAL,
input_fields=[FieldName.FEAT_TIME],
),
transform.SetFieldIfNotPresent(
field=FieldName.FEAT_STATIC_CAT, value=[0.0]
),
transform.AsNumpyArray(
field=FieldName.FEAT_STATIC_CAT, expected_ndim=1
),
transform.InstanceSplitter(
target_field=FieldName.TARGET,
is_pad_field=FieldName.IS_PAD,
start_field=FieldName.START,
forecast_start_field=FieldName.FORECAST_START,
train_sampler=ExpectedNumInstanceSampler(num_instances=1),
past_length=self.context_length,
future_length=self.prediction_length,
time_series_fields=[FieldName.FEAT_DYNAMIC_REAL],
),
def create_transformation(self) -> Transformation:
return Chain(
trans=[
AsNumpyArray(field=FieldName.TARGET, expected_ndim=1),
AddTimeFeatures(
start_field=FieldName.START,
target_field=FieldName.TARGET,
output_field=FieldName.FEAT_TIME,
time_features=time_features_from_frequency_str(self.freq),
pred_length=self.prediction_length,
),
SetFieldIfNotPresent(
field=FieldName.FEAT_STATIC_CAT, value=[0.0]
),
AsNumpyArray(field=FieldName.FEAT_STATIC_CAT, expected_ndim=1),
InstanceSplitter(
target_field=FieldName.TARGET,
is_pad_field=FieldName.IS_PAD,
start_field=FieldName.START,
forecast_start_field=FieldName.FORECAST_START,
train_sampler=TestSplitSampler(),
time_series_fields=[FieldName.FEAT_TIME],
past_length=self.context_length,
future_length=self.prediction_length,
),
CategoricalFeatureInfo(
name="feat_static_cat_000", cardinality="10"
)
],
feat_static_real=[BasicFeatureInfo(name="feat_static_real_000")],
)
start_date = "2000-01-01 00:00:00"
train_ds = ListDataset(
data_iter=[
{
FieldName.ITEM_ID: str(i),
FieldName.START: start_date,
FieldName.TARGET: [float(i)] * 24,
FieldName.FEAT_STATIC_CAT: [i],
FieldName.FEAT_STATIC_REAL: [float(i)],
}
for i in range(10)
],
freq=metadata.freq,
)
test_ds = ListDataset(
data_iter=[
{
FieldName.ITEM_ID: str(i),
FieldName.START: start_date,
FieldName.TARGET: [float(i)] * 30,
FieldName.FEAT_STATIC_CAT: [i],
FieldName.FEAT_STATIC_REAL: [float(i)],
}