How to use the sktime.transformers.base.BaseTransformer function in sktime

To help you get started, we’ve selected a few sktime examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github alan-turing-institute / sktime / sktime / contrib / transformers / template.py View on Github external
#dummy transform
import numpy as np
import pandas as pd
from sktime.transformers.base import BaseTransformer


class DummyTransformer(BaseTransformer):

    def __init__(self, check_input=True):
        self.check_input = check_input
        self.num_cases = None
        self.num_dimensions = None

    def fit(self, X, y=None):

        if self.check_input:
            pass

        self.num_cases, self.num_dimensions = X.shape

    def transform(self, X, y=None):
        if not isinstance(X, pd.DataFrame):
            raise TypeError("Input should be a pandas dataframe containing Series objects")
github alan-turing-institute / sktime / sktime / transformers / dictionary_based / SFA.py View on Github external
import numpy as np
import pandas as pd
import math
import sys

from sktime.transformers.dictionary_based.SAX import BitWord
from sktime.transformers.base import BaseTransformer


# TO DO: Finish comments


class SFA(BaseTransformer):
    __author__ = "Matthew Middlehurst"
    """ SFA Transformer, as described in 

    @inproceedings{schafer2012sfa,
      title={SFA: a symbolic fourier approximation and index for similarity search in high dimensional datasets},
      author={Sch{\"a}fer, Patrick and H{\"o}gqvist, Mikael},
      booktitle={Proceedings of the 15th International Conference on Extending Database Technology},
      pages={516--527},
      year={2012},
      organization={ACM}
    }

    Overview: for each series: 
        run a sliding window across the series
        for each window
            shorten the series with DFT
github alan-turing-institute / sktime / sktime / transformers / forecasting.py View on Github external
# find out by how much we have to shift seasonal_components to align with new index
        shift = -time_index[0] % self.sp

        # align seasonal components with new starting point of new time_index
        return np.roll(self.seasonal_components_, shift=shift, axis=1)

    def _fit_seasonal_decomposition_model(self, X):
        """Fit seasonal decopmosition model and return fitted seasonal components"""
        # statsmodels `seasonal_decompose` expects time series to be in columns, rather than rows, we therefore need to
        # transpose X here
        res = seasonal_decompose(X.T, model=self.model, freq=self.sp, filt=None, two_sided=True, extrapolate_trend=0)
        seasonal_components = res.seasonal.T
        return np.atleast_2d(seasonal_components)


class Detrender(BaseTransformer):
    """A transformer that removes trend of given polynomial order from time series/panel data

    Parameters
    ----------
    order : int
        Polynomial order, zero: mean, one: linear, two: quadratic, etc
    check_input : bool, optional (default=True)
        When set to ``True``, inputs will be validated, otherwise inputs are assumed to be valid
        and no checks are performed. Use with caution.
    """

    def __init__(self, order=0):

        if not (isinstance(order, int) and (order >= 0)):
            raise ValueError(f"order must be a positive integer, but found: {type(order)}")
        self.order = order
github alan-turing-institute / sktime / sktime / transformers / forecasting.py View on Github external
import numpy as np
import pandas as pd
from sklearn.utils.validation import check_is_fitted
from statsmodels.tsa.seasonal import seasonal_decompose

from sktime.transformers.base import BaseTransformer
from sktime.transformers.compose import Tabulariser
from sktime.utils.data_container import get_time_index
from sktime.utils.time_series import fit_trend, remove_trend, add_trend
from sktime.utils.validation.forecasting import validate_sp, check_is_fitted_in_transform
from sktime.utils.validation.supervised import validate_X, check_X_is_univariate


class Deseasonaliser(BaseTransformer):
    """A transformer that removes a seasonal component from time series/panel data

    Parameters
    ----------
    sp : int, optional (default=1)
        Seasonal periodicity
    model : str {'additive', 'multiplicative'}, optional (default='additive')
        Model to use for estimating seasonal component
    check_input : bool, optional (default=True)
        When set to ``True``, inputs will be validated, otherwise inputs are assumed to be valid
        and no checks are performed. Use with caution.
    """

    def __init__(self, sp=1, model='additive', check_input=True):
        self.sp = validate_sp(sp)
        allowed_models = ('additive', 'multiplicative')
github alan-turing-institute / sktime / sktime / transformers / compose.py View on Github external
rows_t.append(row_t)  # append transformed rows
                cols_t.append(rows_t)  # append transformed columns

            # if series-to-series transform, flatten transformed series
            Xt = concat_nested_arrays(cols_t)  # concatenate transformed columns

            # tabularise/unnest series-to-primitive transforms
            xt = Xt.iloc[0, 0]
            if isinstance(xt, (pd.Series, np.ndarray)) and len(xt) == 1:
                Xt = tabularize(Xt)
        return Xt




class Tabularizer(BaseTransformer):
    """
    A transformer that turns time series/panel data into tabular data.

    This estimator converts nested pandas dataframe containing time-series/panel data with numpy arrays or pandas Series in
    dataframe cells into a tabular pandas dataframe with only primitives in cells. This is useful for transforming
    time-series/panel data into a format that is accepted by standard validation learning algorithms (as in sklearn).

    Parameters
    ----------
    check_input: bool, optional (default=True)
        When set to ``True``, inputs will be validated, otherwise inputs are assumed to be valid
        and no checks are performed. Use with caution.
    """

    # TODO: allow to keep column names, but unclear how to handle multivariate data
github alan-turing-institute / sktime / sktime / transformers / summarise.py View on Github external
# filter out single points
            starts = starts[lengths >= self.min_length]
            lengths = lengths[lengths >= self.min_length]

            self._starts.append(starts)
            self._lengths.append(lengths)

        # put into dataframe
        Xt = pd.DataFrame()
        column_prefix = "%s_%s" % (column_name, "nan" if np.isnan(self.value) else str(self.value))
        Xt["%s_starts" % column_prefix] = pd.Series(self._starts)
        Xt["%s_lengths" % column_prefix] = pd.Series(self._lengths)
        return Xt


class DerivativeSlopeTransformer(BaseTransformer):
    # TODO add docstrings
    def transform(self, X, y=None):
        num_cases, num_dim = X.shape
        output_df = pd.DataFrame()
        for dim in range(num_dim):
            dim_data = X.iloc[:, dim]
            out = DerivativeSlopeTransformer.row_wise_get_der(dim_data)
            output_df['der_dim_' + str(dim)] = pd.Series(out)

        return output_df

    @staticmethod
    def row_wise_get_der(X):

        def get_der(x):
            der = []
github alan-turing-institute / sktime / sktime / contrib / transformers / discrete_fourier_transformer.py View on Github external
# dummy transform
import numpy as np
import pandas as pd
from sktime.transformers.base import BaseTransformer
from enum import Enum



class DiscreteFourierType(Enum):
    STANDARD = 1
    REAL = 2
    HERMITIAN = 3


class DiscreteFourierTransformer(BaseTransformer):

    def __init__(self, fourier_type=DiscreteFourierType.STANDARD, axis=None, norm=None, check_input=True):
        if not isinstance(self.type, DiscreteFourierType):
            raise TypeError("type should be defined as a DiscreteFourierTransform type")

        self.check_input = check_input
        self.type = fourier_type
        self.norm = norm
        self.axis = axis

    def transform(self, X, y=None):
        if not isinstance(X, pd.DataFrame):
            raise TypeError("Input should be a pandas dataframe containing Series objects")

        if self.type == 1:
            return np.fft.fftn(X, axis=self.axis, norm=self.norm)
github alan-turing-institute / sktime / sktime / transformers / spectral_based.py View on Github external
def __init__(self, fs=1.0, window='boxcar', nfft=None, detrend='constant', return_onesided=True, scaling='density',
                 axis=-1, check_input=True):
        self.transform_parameters = {'fs': fs, 'window': window, 'nfft': nfft, 'detrend': detrend,
                                     'return_onesided': return_onesided, 'scaling': scaling, 'axis': axis}

        self.type_ = FunctionConfigs.FuncType.POWER_SPECTRUM
        self.input_key_ = 'x'
        self.check_input_ = check_input
        self.is_fitted_ = True

    def get_transform_params(self):
        return self.transform_parameters


class CosineTransformer(BaseTransformer):

    def __init__(self):
        pass

    def transform(self, x, y=None):
        return np.cos(x)
github alan-turing-institute / sktime / sktime / contrib / transformers / reshape.py View on Github external
import numpy as np
import pandas as pd
from sktime.transformers.base import BaseTransformer
from enum import Enum


__all__ = ['Resizer']
__author__ = ["Tony Bagnall"]

class ResizeType(Enum):
    PADDER = 1
    TRUNCATOR = 2
    RESIZER = 3

class Resizer(BaseTransformer):
    """
        transformer to standardise the size of the data set in one of three ways.
        1. type == PADDER: Pads all series to the length of the longest series
        2. type == TRUNCATOR: Shortens every series to be the same size as the smallest
        3. type == RESIZER: Resizes so each series goes from start to end (inclusive). It will pad if necessary, and data at
        start will be at position 0 in the new series. If this type is set transform will throw an exception if start and end are not set.
            ----------
        in the case where there is an empty series, TRUNCATOR will make everything empty
    """

    def __init__(self,type=ResizeType.PADDER, start=None, end=None, pad_value=0):
        self.type=type
        self.start=start
        self.end=end
        self.pad_value=0
    # No need for fit for Padding transformer
github alan-turing-institute / sktime / sktime / transformers / dictionary_based / PAA.py View on Github external
import numpy as np
import pandas as pd

from sktime.transformers.base import BaseTransformer
from sktime.utils.load_data import load_from_tsfile_to_dataframe as load_ts


class PAA(BaseTransformer):
    __author__ = "Matthew Middlehurst"
    """ (PAA) Piecewise Aggregate Approximation Transformer, as described in 
 Eamonn Keogh, Kaushik Chakrabarti, Michael Pazzani, and Sharad Mehrotra. 
 Dimensionality reduction for fast similarity search in large time series databases. 
 Knowledge and information Systems, 3(3), 263-286, 2001.  
 For each series reduce the dimensionality to num_intervals, where each value is the mean of values in 
 the interval. 
TO DO: pythonise it to make it more efficient. Maybe check vs this version
        http://vigne.sh/posts/piecewise-aggregate-approx/
Could have: Tune the interval size in fit somehow?
        
    Parameters
    ----------
    num_intervals   : int, dimension of the transformed data (default 8)

         """