Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
del converted
else:
print('file found %s, using for data' % minmax_fp_ts_csv)
if not os.path.isfile(minmax_fp_ts_csv):
print('error :: file not found %s' % minmax_fp_ts_csv)
else:
print('file exists to create the minmax_fp_ts data frame from - %s' % minmax_fp_ts_csv)
try:
df = pd.read_csv(minmax_fp_ts_csv, delimiter=',', header=None, names=['metric', 'timestamp', 'value'])
df.columns = ['metric', 'timestamp', 'value']
except:
print('error :: failed to created data frame from %s' % (str(minmax_fp_ts_csv)))
try:
df_features = extract_features(
df, column_id='metric', column_sort='timestamp', column_kind=None,
column_value=None, feature_extraction_settings=tsf_settings)
except:
print('error :: failed to created df_features from %s' % (str(minmax_fp_ts_csv)))
# Create transposed features csv
if not os.path.isfile(minmax_fp_fname_out):
# Transpose
df_t = df_features.transpose()
df_t.to_csv(minmax_fp_fname_out)
try:
# Calculate the count and sum of the features values
df_sum = pd.read_csv(
minmax_fp_fname_out, delimiter=',', header=0,
names=['feature_name', 'value'])
df_sum.columns = ['feature_name', 'value']
converted = []
for datapoint in datapoints:
try:
new_datapoint = [float(datapoint[0]), float(datapoint[1])]
converted.append(new_datapoint)
except: # nosec
continue
for ts, value in converted:
utc_ts_line = '%s,%s,%s\n' % (base_name, str(int(ts)), str(value))
with open(anomalous_ts_csv, 'a') as fh:
fh.write(utc_ts_line)
del converted
df = pd.read_csv(anomalous_ts_csv, delimiter=',', header=None, names=['metric', 'timestamp', 'value'])
df.columns = ['metric', 'timestamp', 'value']
df_features_current = extract_features(
df, column_id='metric', column_sort='timestamp', column_kind=None,
column_value=None, feature_extraction_settings=tsf_settings)
del df
# Create transposed features csv
if not os.path.isfile(anomalous_fp_fname_out):
# Transpose
df_t = df_features_current.transpose()
df_t.to_csv(anomalous_fp_fname_out)
del df_t
del df_features_current
# Calculate the count and sum of the features values
df_sum_2 = pd.read_csv(
anomalous_fp_fname_out, delimiter=',', header=0,
names=['feature_name', 'value'])
df_sum_2.columns = ['feature_name', 'value']
os.remove(ts_csv)
for ts, value in converted:
# print('%s,%s' % (str(int(ts)), str(value)))
utc_ts_line = '%s,%s,%s\n' % (metric, str(int(ts)), str(value))
with open(ts_csv, 'a') as fh:
fh.write(utc_ts_line)
del converted
df = pd.read_csv(ts_csv, delimiter=',', header=None, names=['metric', 'timestamp', 'value'])
# print('DataFrame created with %s' % ts_csv)
df.columns = ['metric', 'timestamp', 'value']
tsf_settings = ReasonableFeatureExtractionSettings()
# Disable tqdm progress bar
tsf_settings.disable_progressbar = True
df_features = extract_features(
df, column_id='metric', column_sort='timestamp', column_kind=None,
column_value=None, feature_extraction_settings=tsf_settings)
del df
# print('features extracted from %s data' % ts_csv)
# write to disk
fname_out = fname_in + '.features.csv'
# Transpose
df_t = df_features.transpose()
# print('features transposed')
# Create transposed features csv
t_fname_out = fname_in + '.features.transposed.csv'
df_t.to_csv(t_fname_out)
del df_t
# Calculate the count and sum of the features values
df_sum = pd.read_csv(
t_fname_out, delimiter=',', header=0,
minmax_fp_ts_csv = '%s/fpid.%s.%s.minmax_fp_ts.tsfresh.input.std.csv' % (
settings.SKYLINE_TMP_DIR, str(fp_id), base_name)
if os.path.isfile(minmax_fp_ts_csv):
os.remove(minmax_fp_ts_csv)
minmax_fp_fname_out = minmax_fp_ts_csv + '.transposed.csv'
if os.path.isfile(minmax_fp_fname_out):
os.remove(minmax_fp_fname_out)
anomalous_ts_csv = '%s/%s.%s.minmax_anomalous_ts.tsfresh.std.csv' % (
settings.SKYLINE_TMP_DIR, metric_timestamp, base_name)
if os.path.isfile(anomalous_ts_csv):
os.remove(anomalous_ts_csv)
anomalous_fp_fname_out = anomalous_ts_csv + '.transposed.csv'
if os.path.isfile(anomalous_fp_fname_out):
os.remove(anomalous_fp_fname_out)
tsf_settings = ReasonableFeatureExtractionSettings()
tsf_settings.disable_progressbar = True
minmax_fp_features_sum = None
minmax_anomalous_features_sum = None
if minmax_anomalous_ts and minmax_fp_ts:
if not os.path.isfile(minmax_fp_ts_csv):
datapoints = minmax_fp_ts
converted = []
for datapoint in datapoints:
try:
new_datapoint = [float(datapoint[0]), float(datapoint[1])]
converted.append(new_datapoint)
except: # nosec
continue
for ts, value in converted:
try:
utc_ts_line = '%s,%s,%s\n' % (base_name, str(int(ts)), str(value))
continue
if os.path.isfile(ts_csv):
os.remove(ts_csv)
for ts, value in converted:
# print('%s,%s' % (str(int(ts)), str(value)))
utc_ts_line = '%s,%s,%s\n' % (metric, str(int(ts)), str(value))
with open(ts_csv, 'a') as fh:
fh.write(utc_ts_line)
del converted
df = pd.read_csv(ts_csv, delimiter=',', header=None, names=['metric', 'timestamp', 'value'])
# print('DataFrame created with %s' % ts_csv)
df.columns = ['metric', 'timestamp', 'value']
tsf_settings = ReasonableFeatureExtractionSettings()
# Disable tqdm progress bar
tsf_settings.disable_progressbar = True
df_features = extract_features(
df, column_id='metric', column_sort='timestamp', column_kind=None,
column_value=None, feature_extraction_settings=tsf_settings)
del df
# print('features extracted from %s data' % ts_csv)
# write to disk
fname_out = fname_in + '.features.csv'
# Transpose
df_t = df_features.transpose()
# print('features transposed')
# Create transposed features csv
t_fname_out = fname_in + '.features.transposed.csv'
df_t.to_csv(t_fname_out)
del df_t
all_values = []
for i, values in enumerate(column_data):
if values is None:
values = [0] * self.max_series_len
elif type(values) == type([]):
values = list(map(float,values))
else:
values = list(map(lambda x: float(x), values.split(' ')))
all_values.append(values)
df = pd.DataFrame({'main_feature': values, 'id': [1] * len(values)})
try:
features = extract_features(df, column_id='id',disable_progressbar=True, default_fc_parameters=default_fc_parameters,n_jobs=self.n_jobs)
except:
self.n_jobs = 1
features = extract_features(df, column_id='id',disable_progressbar=True, default_fc_parameters=default_fc_parameters,n_jobs=self.n_jobs)
features.fillna(value=0, inplace=True)
features = list(features.iloc[0])
ret.append(features)
for i, values in enumerate(all_values):
while len(values) < self.max_series_len:
values.append(0)
encoded_values = self.numerical_encoder.encode(values)
encoded_numbers_list = []
:param column_value: The name for the column keeping the value itself.
:type column_value: str
"""
dd, column_id, column_kind, column_value = \
_normalize_input_to_internal_representation(ts, column_id, column_sort, column_kind, column_value)
def create_bins(v):
n_bins = np.ceil(len(v) / interval_length)
return np.repeat(np.arange(n_bins), interval_length)[:len(v)]
dd[column_id] = dd[column_id].apply(str) + "_bin_" + \
dd.groupby([column_id, column_kind])[column_value].transform(create_bins).apply(str)
dd = extract_features(dd,
column_id=column_id,
column_value=column_value,
column_kind=column_kind,
default_fc_parameters=compression_functions)
dd.columns = [x.replace("__", "_") for x in dd.columns]
dd.columns = [x.replace("feature", "map") for x in dd.columns]
dd.reset_index(drop=False, inplace=True)
ids = dd[column_id].str.split("_bin_").apply(lambda s: s[0])
bin_number = dd["id"].str.split("_bin_").apply(lambda s: eval(s[1]))
dd[column_id] = ids
dd["bin"] = bin_number
return dd.sort_values(by=[column_id, "bin"])
local_dt = local.localize(naive, is_dst=None)
utc_dt = local_dt.astimezone(pytz.utc)
timestamp = utc_dt.strftime('%s')
utc_ts_line = '%s,%s,%s\n' % (metric, str(timestamp), value)
with open(tmp_csv, 'a') as fh:
fh.write(utc_ts_line)
df = pd.read_csv(tmp_csv, delimiter=',', header=None, names=['metric', 'timestamp', 'value'])
# if os.path.isfile(tmp_csv):
# os.remove(tmp_csv)
df.columns = ['metric', 'timestamp', 'value']
start_feature_extraction = timer()
try:
df_features = extract_features(df, column_id='metric', column_sort='timestamp', column_kind=None, column_value=None)
except:
print(traceback.print_exc())
print(colored('error: extracting features with tsfresh', 'red'))
sys.exit(1)
end_feature_extraction = timer()
print(colored('notice: extracting features with tsfresh took %.6f seconds', 'cyan') % (end_feature_extraction - start_feature_extraction))
# write to disk
fname_out = fname_in + '.features.csv'
df_features.to_csv(fname_out)
# Transpose
df_t = df_features.transpose()
t_fname_out = fname_in + '.features.transposed.csv'
df_t.to_csv(t_fname_out)
utc_ts_line = '%s,%s,%s\n' % (metric, str(timestamp), value)
with open(tmp_csv, 'a') as fh:
fh.write(utc_ts_line)
# TO HERE
start = timer()
df = pd.read_csv(tmp_csv, delimiter=',', header=None, names=['metric', 'timestamp', 'value'])
if os.path.isfile(tmp_csv):
os.remove(tmp_csv)
df.columns = ['metric', 'timestamp', 'value']
start_feature_extraction = timer()
try:
df_features = extract_features(df, column_id='metric', column_sort='timestamp', column_kind=None, column_value=None)
except:
print(traceback.print_exc())
print(colored('error: extracting features with tsfresh', 'red'))
sys.exit(1)
end_feature_extraction = timer()
print(colored('notice: extracting features with tsfresh took %.6f seconds', 'cyan') % (end_feature_extraction - start_feature_extraction))
# write to disk
fname_out = fname_in + '.features.csv'
df_features.to_csv(fname_out)
# Transpose
df_t = df_features.transpose()
t_fname_out = fname_in + '.features.transposed.csv'
df_t.to_csv(t_fname_out)
def time_series_has_duplicate_min(x):
"""
Checks if the minimal value of x is observed more than once
:param x: the time series to calculate the feature of
:type x: pandas.Series
:return: the value of this feature
:return type: bool
"""
return ts_feature_calculators.has_duplicate_min(x)