How to use the tsfresh.extract_features function in tsfresh

To help you get started, we’ve selected a few tsfresh examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github MaxBenChrist / tspreprocess / tspreprocess / compress / compress.py View on Github external
:param column_value: The name for the column keeping the value itself.
    :type column_value: str
    """

    dd, column_id, column_kind, column_value = \
        _normalize_input_to_internal_representation(ts, column_id, column_sort, column_kind, column_value)

    def create_bins(v):
        n_bins = np.ceil(len(v) / interval_length)
        return np.repeat(np.arange(n_bins), interval_length)[:len(v)]

    dd[column_id] = dd[column_id].apply(str) + "_bin_" + \
                    dd.groupby([column_id, column_kind])[column_value].transform(create_bins).apply(str)

    dd = extract_features(dd,
                          column_id=column_id,
                          column_value=column_value,
                          column_kind=column_kind,
                          default_fc_parameters=compression_functions)

    dd.columns = [x.replace("__", "_") for x in dd.columns]
    dd.columns = [x.replace("feature", "map") for x in dd.columns]
    dd.reset_index(drop=False, inplace=True)

    ids = dd[column_id].str.split("_bin_").apply(lambda s: s[0])
    bin_number = dd["id"].str.split("_bin_").apply(lambda s: eval(s[1]))

    dd[column_id] = ids
    dd["bin"] =  bin_number

    return dd.sort_values(by=[column_id, "bin"])
github earthgecko / skyline / skyline / tsfresh_features / scripts / tsfresh_graphite_csv.py View on Github external
local_dt = local.localize(naive, is_dst=None)
            utc_dt = local_dt.astimezone(pytz.utc)
            timestamp = utc_dt.strftime('%s')
            utc_ts_line = '%s,%s,%s\n' % (metric, str(timestamp), value)
            with open(tmp_csv, 'a') as fh:
                fh.write(utc_ts_line)

    df = pd.read_csv(tmp_csv, delimiter=',', header=None, names=['metric', 'timestamp', 'value'])

    # if os.path.isfile(tmp_csv):
    #     os.remove(tmp_csv)

    df.columns = ['metric', 'timestamp', 'value']
    start_feature_extraction = timer()
    try:
        df_features = extract_features(df, column_id='metric', column_sort='timestamp', column_kind=None, column_value=None)
    except:
        print(traceback.print_exc())
        print(colored('error: extracting features with tsfresh', 'red'))
        sys.exit(1)

    end_feature_extraction = timer()
    print(colored('notice: extracting features with tsfresh took %.6f seconds', 'cyan') % (end_feature_extraction - start_feature_extraction))

    # write to disk
    fname_out = fname_in + '.features.csv'
    df_features.to_csv(fname_out)

    # Transpose
    df_t = df_features.transpose()
    t_fname_out = fname_in + '.features.transposed.csv'
    df_t.to_csv(t_fname_out)
github earthgecko / skyline / skyline / tsfresh_features / generate_tsfresh_features.py View on Github external
utc_ts_line = '%s,%s,%s\n' % (metric, str(timestamp), value)
        with open(tmp_csv, 'a') as fh:
            fh.write(utc_ts_line)
    # TO HERE

    start = timer()

    df = pd.read_csv(tmp_csv, delimiter=',', header=None, names=['metric', 'timestamp', 'value'])

    if os.path.isfile(tmp_csv):
        os.remove(tmp_csv)

    df.columns = ['metric', 'timestamp', 'value']
    start_feature_extraction = timer()
    try:
        df_features = extract_features(df, column_id='metric', column_sort='timestamp', column_kind=None, column_value=None)
    except:
        print(traceback.print_exc())
        print(colored('error: extracting features with tsfresh', 'red'))
        sys.exit(1)

    end_feature_extraction = timer()
    print(colored('notice: extracting features with tsfresh took %.6f seconds', 'cyan') % (end_feature_extraction - start_feature_extraction))

    # write to disk
    fname_out = fname_in + '.features.csv'
    df_features.to_csv(fname_out)

    # Transpose
    df_t = df_features.transpose()
    t_fname_out = fname_in + '.features.transposed.csv'
    df_t.to_csv(t_fname_out)
github carlomazzaferro / kryptoflow / kryptoflow / ml / __deprecated__ / transforms.py View on Github external
def transform(self, X, y=None):
        X, y = unzip(X, y)
        return extract_features(X, column_id="id", column_sort="time", column_value="value", impute_function=impute,
                                show_warnings=False), y
github ow2-proactive / proactive-examples / MachineLearningScripts / resources / catalog / Tsfresh_Features_Extraction.py View on Github external
"length": None,
        "absolute_sum_of_changes": None,
        "abs_energy": None,
        # "sample_entropy": None,
        "number_peaks": [{"n": 2}],
        "number_cwt_peaks": [{"n": 2}, {"n": 3}],
        "autocorrelation": [{"lag": 2}, {"lag": 3}]
        # "value_count": #"large_standard_deviation": [{"r": 0.05}, {"r": 0.1}]
    }
# For convenience, three dictionaries are predefined and can be used right away
# ComprehensiveFCParameters, MinimalFCParameters, EfficientFCParameters
# MinimalFCParameters is set by default
else:
    extraction_settings = MinimalFCParameters()

extracted_features = extract_features(dataframe_df,
                                      column_id=ref_column,
                                      column_sort=time_column,
                                      default_fc_parameters=extraction_settings)
extracted_features[ref_column] = extracted_features.index

dataframe_json = extracted_features.to_json(orient='split').encode()
compressed_data = bz2.compress(dataframe_json)

dataframe_id = str(uuid.uuid4())
variables.put(dataframe_id, compressed_data)

print("dataframe id: ", dataframe_id)
print('dataframe size (original):   ', sys.getsizeof(dataframe_json), " bytes")
print('dataframe size (compressed): ', sys.getsizeof(compressed_data), " bytes")

resultMetadata.put("task.name", __file__)