How to use tsfresh - 10 common examples

To help you get started, we’ve selected a few tsfresh examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github earthgecko / skyline / utils / test_ionosphere_echo.py View on Github external
del converted
        else:
            print('file found %s, using for data' % minmax_fp_ts_csv)

        if not os.path.isfile(minmax_fp_ts_csv):
            print('error :: file not found %s' % minmax_fp_ts_csv)
        else:
            print('file exists to create the minmax_fp_ts data frame from - %s' % minmax_fp_ts_csv)

        try:
            df = pd.read_csv(minmax_fp_ts_csv, delimiter=',', header=None, names=['metric', 'timestamp', 'value'])
            df.columns = ['metric', 'timestamp', 'value']
        except:
            print('error :: failed to created data frame from %s' % (str(minmax_fp_ts_csv)))
        try:
            df_features = extract_features(
                df, column_id='metric', column_sort='timestamp', column_kind=None,
                column_value=None, feature_extraction_settings=tsf_settings)
        except:
            print('error :: failed to created df_features from %s' % (str(minmax_fp_ts_csv)))
        # Create transposed features csv
        if not os.path.isfile(minmax_fp_fname_out):
            # Transpose
            df_t = df_features.transpose()
            df_t.to_csv(minmax_fp_fname_out)

        try:
            # Calculate the count and sum of the features values
            df_sum = pd.read_csv(
                minmax_fp_fname_out, delimiter=',', header=0,
                names=['feature_name', 'value'])
            df_sum.columns = ['feature_name', 'value']
github earthgecko / skyline / utils / test_ionosphere_echo.py View on Github external
converted = []
            for datapoint in datapoints:
                try:
                    new_datapoint = [float(datapoint[0]), float(datapoint[1])]
                    converted.append(new_datapoint)
                except:  # nosec
                    continue
            for ts, value in converted:
                utc_ts_line = '%s,%s,%s\n' % (base_name, str(int(ts)), str(value))
                with open(anomalous_ts_csv, 'a') as fh:
                    fh.write(utc_ts_line)
            del converted

        df = pd.read_csv(anomalous_ts_csv, delimiter=',', header=None, names=['metric', 'timestamp', 'value'])
        df.columns = ['metric', 'timestamp', 'value']
        df_features_current = extract_features(
            df, column_id='metric', column_sort='timestamp', column_kind=None,
            column_value=None, feature_extraction_settings=tsf_settings)
        del df

        # Create transposed features csv
        if not os.path.isfile(anomalous_fp_fname_out):
            # Transpose
            df_t = df_features_current.transpose()
            df_t.to_csv(anomalous_fp_fname_out)
            del df_t
            del df_features_current
        # Calculate the count and sum of the features values
        df_sum_2 = pd.read_csv(
            anomalous_fp_fname_out, delimiter=',', header=0,
            names=['feature_name', 'value'])
        df_sum_2.columns = ['feature_name', 'value']
github earthgecko / skyline / utils / test_ionosphere_echo.py View on Github external
os.remove(ts_csv)

    for ts, value in converted:
        # print('%s,%s' % (str(int(ts)), str(value)))
        utc_ts_line = '%s,%s,%s\n' % (metric, str(int(ts)), str(value))
        with open(ts_csv, 'a') as fh:
            fh.write(utc_ts_line)
    del converted

    df = pd.read_csv(ts_csv, delimiter=',', header=None, names=['metric', 'timestamp', 'value'])
#    print('DataFrame created with %s' % ts_csv)
    df.columns = ['metric', 'timestamp', 'value']
    tsf_settings = ReasonableFeatureExtractionSettings()
    # Disable tqdm progress bar
    tsf_settings.disable_progressbar = True
    df_features = extract_features(
        df, column_id='metric', column_sort='timestamp', column_kind=None,
        column_value=None, feature_extraction_settings=tsf_settings)
    del df
#    print('features extracted from %s data' % ts_csv)
    # write to disk
    fname_out = fname_in + '.features.csv'
    # Transpose
    df_t = df_features.transpose()
#    print('features transposed')
    # Create transposed features csv
    t_fname_out = fname_in + '.features.transposed.csv'
    df_t.to_csv(t_fname_out)
    del df_t
    # Calculate the count and sum of the features values
    df_sum = pd.read_csv(
        t_fname_out, delimiter=',', header=0,
github earthgecko / skyline / utils / test_ionosphere_echo.py View on Github external
minmax_fp_ts_csv = '%s/fpid.%s.%s.minmax_fp_ts.tsfresh.input.std.csv' % (
        settings.SKYLINE_TMP_DIR, str(fp_id), base_name)
    if os.path.isfile(minmax_fp_ts_csv):
        os.remove(minmax_fp_ts_csv)
    minmax_fp_fname_out = minmax_fp_ts_csv + '.transposed.csv'
    if os.path.isfile(minmax_fp_fname_out):
        os.remove(minmax_fp_fname_out)
    anomalous_ts_csv = '%s/%s.%s.minmax_anomalous_ts.tsfresh.std.csv' % (
        settings.SKYLINE_TMP_DIR, metric_timestamp, base_name)
    if os.path.isfile(anomalous_ts_csv):
        os.remove(anomalous_ts_csv)
    anomalous_fp_fname_out = anomalous_ts_csv + '.transposed.csv'
    if os.path.isfile(anomalous_fp_fname_out):
        os.remove(anomalous_fp_fname_out)

    tsf_settings = ReasonableFeatureExtractionSettings()
    tsf_settings.disable_progressbar = True
    minmax_fp_features_sum = None
    minmax_anomalous_features_sum = None
    if minmax_anomalous_ts and minmax_fp_ts:
        if not os.path.isfile(minmax_fp_ts_csv):
            datapoints = minmax_fp_ts
            converted = []
            for datapoint in datapoints:
                try:
                    new_datapoint = [float(datapoint[0]), float(datapoint[1])]
                    converted.append(new_datapoint)
                except:  # nosec
                    continue
            for ts, value in converted:
                try:
                    utc_ts_line = '%s,%s,%s\n' % (base_name, str(int(ts)), str(value))
github earthgecko / skyline / utils / test_ionosphere_echo.py View on Github external
continue

    if os.path.isfile(ts_csv):
        os.remove(ts_csv)

    for ts, value in converted:
        # print('%s,%s' % (str(int(ts)), str(value)))
        utc_ts_line = '%s,%s,%s\n' % (metric, str(int(ts)), str(value))
        with open(ts_csv, 'a') as fh:
            fh.write(utc_ts_line)
    del converted

    df = pd.read_csv(ts_csv, delimiter=',', header=None, names=['metric', 'timestamp', 'value'])
#    print('DataFrame created with %s' % ts_csv)
    df.columns = ['metric', 'timestamp', 'value']
    tsf_settings = ReasonableFeatureExtractionSettings()
    # Disable tqdm progress bar
    tsf_settings.disable_progressbar = True
    df_features = extract_features(
        df, column_id='metric', column_sort='timestamp', column_kind=None,
        column_value=None, feature_extraction_settings=tsf_settings)
    del df
#    print('features extracted from %s data' % ts_csv)
    # write to disk
    fname_out = fname_in + '.features.csv'
    # Transpose
    df_t = df_features.transpose()
#    print('features transposed')
    # Create transposed features csv
    t_fname_out = fname_in + '.features.transposed.csv'
    df_t.to_csv(t_fname_out)
    del df_t
github mindsdb / lightwood / lightwood / encoders / time_series / ts_fresh_ts.py View on Github external
all_values = []


        for i, values in enumerate(column_data):
            if values is None:
                values = [0] * self.max_series_len
            elif type(values) == type([]):
                values = list(map(float,values))
            else:
                values = list(map(lambda x: float(x), values.split(' ')))

            all_values.append(values)
            df = pd.DataFrame({'main_feature': values, 'id': [1] * len(values)})

            try:
                features = extract_features(df, column_id='id',disable_progressbar=True, default_fc_parameters=default_fc_parameters,n_jobs=self.n_jobs)
            except:
                self.n_jobs = 1
                features = extract_features(df, column_id='id',disable_progressbar=True, default_fc_parameters=default_fc_parameters,n_jobs=self.n_jobs)

            features.fillna(value=0, inplace=True)

            features = list(features.iloc[0])
            ret.append(features)

        for i, values in  enumerate(all_values):
            while len(values) < self.max_series_len:
                values.append(0)

            encoded_values = self.numerical_encoder.encode(values)

            encoded_numbers_list = []
github MaxBenChrist / tspreprocess / tspreprocess / compress / compress.py View on Github external
:param column_value: The name for the column keeping the value itself.
    :type column_value: str
    """

    dd, column_id, column_kind, column_value = \
        _normalize_input_to_internal_representation(ts, column_id, column_sort, column_kind, column_value)

    def create_bins(v):
        n_bins = np.ceil(len(v) / interval_length)
        return np.repeat(np.arange(n_bins), interval_length)[:len(v)]

    dd[column_id] = dd[column_id].apply(str) + "_bin_" + \
                    dd.groupby([column_id, column_kind])[column_value].transform(create_bins).apply(str)

    dd = extract_features(dd,
                          column_id=column_id,
                          column_value=column_value,
                          column_kind=column_kind,
                          default_fc_parameters=compression_functions)

    dd.columns = [x.replace("__", "_") for x in dd.columns]
    dd.columns = [x.replace("feature", "map") for x in dd.columns]
    dd.reset_index(drop=False, inplace=True)

    ids = dd[column_id].str.split("_bin_").apply(lambda s: s[0])
    bin_number = dd["id"].str.split("_bin_").apply(lambda s: eval(s[1]))

    dd[column_id] = ids
    dd["bin"] =  bin_number

    return dd.sort_values(by=[column_id, "bin"])
github earthgecko / skyline / skyline / tsfresh_features / scripts / tsfresh_graphite_csv.py View on Github external
local_dt = local.localize(naive, is_dst=None)
            utc_dt = local_dt.astimezone(pytz.utc)
            timestamp = utc_dt.strftime('%s')
            utc_ts_line = '%s,%s,%s\n' % (metric, str(timestamp), value)
            with open(tmp_csv, 'a') as fh:
                fh.write(utc_ts_line)

    df = pd.read_csv(tmp_csv, delimiter=',', header=None, names=['metric', 'timestamp', 'value'])

    # if os.path.isfile(tmp_csv):
    #     os.remove(tmp_csv)

    df.columns = ['metric', 'timestamp', 'value']
    start_feature_extraction = timer()
    try:
        df_features = extract_features(df, column_id='metric', column_sort='timestamp', column_kind=None, column_value=None)
    except:
        print(traceback.print_exc())
        print(colored('error: extracting features with tsfresh', 'red'))
        sys.exit(1)

    end_feature_extraction = timer()
    print(colored('notice: extracting features with tsfresh took %.6f seconds', 'cyan') % (end_feature_extraction - start_feature_extraction))

    # write to disk
    fname_out = fname_in + '.features.csv'
    df_features.to_csv(fname_out)

    # Transpose
    df_t = df_features.transpose()
    t_fname_out = fname_in + '.features.transposed.csv'
    df_t.to_csv(t_fname_out)
github earthgecko / skyline / skyline / tsfresh_features / generate_tsfresh_features.py View on Github external
utc_ts_line = '%s,%s,%s\n' % (metric, str(timestamp), value)
        with open(tmp_csv, 'a') as fh:
            fh.write(utc_ts_line)
    # TO HERE

    start = timer()

    df = pd.read_csv(tmp_csv, delimiter=',', header=None, names=['metric', 'timestamp', 'value'])

    if os.path.isfile(tmp_csv):
        os.remove(tmp_csv)

    df.columns = ['metric', 'timestamp', 'value']
    start_feature_extraction = timer()
    try:
        df_features = extract_features(df, column_id='metric', column_sort='timestamp', column_kind=None, column_value=None)
    except:
        print(traceback.print_exc())
        print(colored('error: extracting features with tsfresh', 'red'))
        sys.exit(1)

    end_feature_extraction = timer()
    print(colored('notice: extracting features with tsfresh took %.6f seconds', 'cyan') % (end_feature_extraction - start_feature_extraction))

    # write to disk
    fname_out = fname_in + '.features.csv'
    df_features.to_csv(fname_out)

    # Transpose
    df_t = df_features.transpose()
    t_fname_out = fname_in + '.features.transposed.csv'
    df_t.to_csv(t_fname_out)
github Tencent / Metis / time_series_detector / feature / statistical_features.py View on Github external
def time_series_has_duplicate_min(x):
    """
    Checks if the minimal value of x is observed more than once

    :param x: the time series to calculate the feature of
    :type x: pandas.Series
    :return: the value of this feature
    :return type: bool
    """
    return ts_feature_calculators.has_duplicate_min(x)