How to use the sktime.classifiers.base.BaseClassifier function in sktime

To help you get started, we’ve selected a few sktime examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github alan-turing-institute / sktime / sktime / classifiers / example_classifiers.py View on Github external
Parameters
        ----------
        X : array-like, pandas DataFrame or Series, shape (n_samples, ...)
            The training input samples.
        Returns
        -------
        y : ndarray, shape (n_samples,)
            Returns the dummy predictions
        """
        X = check_ts_array(X)
        check_is_fitted(self, 'is_fitted_')
        return np.ones(X.shape[0], dtype=np.int64) * self.theta_


class TSExampleClassifier(BaseClassifier):
    """ An example regressor that makes use of the xpandas input.
    """

    def __init__(self, func=np.mean, columns=None, estimator=RandomForestClassifier()):
        self.func = func
        self.columns = columns
        self.estimator = estimator

    def fit(self, X, y):
        """ A reference implementation of a fitting function.

        Parameters
        ----------
        X : array-like, pandas DataFrame or Series, shape (n_samples, ...)
            The training input samples.
        y : array-like, pandas dataFrame series, shape (n_samples,)
github alan-turing-institute / sktime / sktime / classifiers / distance_based / proximity_forest.py View on Github external
get_gain=proximity.get_gain,
                verbosity=proximity.verbosity,
                n_jobs=proximity.n_jobs
            )
            # grow the stump
            stump.fit(proximity.X, proximity.y)
            stump.grow()
            stumps.append(stump)
        # pick the best stump based upon gain
        stump = comparison.max(stumps, proximity.random_state, lambda stump: stump.entropy)
        return stump

    return find_best_stump


class ProximityStump(BaseClassifier):

    """
    Proximity Stump class to model a decision stump which uses a distance measure to partition data.

    Attributes:
        label_encoder: label encoder to change string labels to numeric indices
        y_exemplar: class label list of the exemplar instances
        X_exemplar: dataframe of the exemplar instances
        X_branches: dataframes for each branch, one per exemplar
        y_branches: class label list for each branch, one per exemplar
        classes_: unique list of classes
        entropy: the gain associated with the split of data
        random_state: the random state
        get_exemplars: function to extract exemplars from a dataframe and class value list
        setup_distance_measure: function to setup the distance measure getters from dataframe and class value list
        get_distance_measure: distance measure getters
github sktime / sktime-dl / sktime_dl / meta / _dlensemble.py View on Github external
import gc
import os
from pathlib import Path

import numpy as np
import pandas as pd
from sklearn.base import clone
from sklearn.utils.multiclass import class_distribution
from sktime.classifiers.base import BaseClassifier
from sktime.utils.validation.supervised import validate_X
from tensorflow import keras

from sktime_dl.deeplearning import InceptionTimeClassifier


class DeepLearnerEnsembleClassifier(BaseClassifier):
    """
    Simplified/streamlined class to ensemble over homogeneous network
    architectures with different random initialisations

    This may be refactored to use standard scikit-learn ensemble mechanisms in
    the future, currently somewhat bespoke
    for speed of implementation

    Originally proposed by:

    @article{fawaz2019deep,
      title={Deep neural network ensembles for time series classification},
      author={Fawaz, H Ismail and Forestier, Germain and Weber, Jonathan and
      Idoumghar, Lhassane and Muller, P},
      journal={arXiv preprint arXiv:1903.06602},
      year={2019}
github alan-turing-institute / sktime / sktime / model_selection.py View on Github external
def __init__(self, estimator, param_grid, scoring=None, fit_params=None,
                 n_jobs=None, iid='warn', refit=True, cv='warn', verbose=0,
                 pre_dispatch='2*n_jobs', error_score='raise-deprecating',
                 return_train_score="warn"):

        super(GridSearchCV, self).__init__(estimator, param_grid, scoring=scoring, fit_params=fit_params,
                                           n_jobs=n_jobs, iid=iid, refit=refit, cv=cv, verbose=verbose,
                                           pre_dispatch=pre_dispatch, error_score=error_score,
                                           return_train_score=return_train_score)

        if self.scoring is None:
            # using accuracy score as default for classifiers
            if isinstance(self.estimator, BaseClassifier):
                self.scoring = make_scorer(accuracy_score)
            # using mean squared error as default for regressors
            elif isinstance(self.estimator, BaseRegressor):
                self.scoring = make_scorer(mean_squared_error)
github alan-turing-institute / mlaut / mlaut / estimators / decision_trees.py View on Github external
CLASSIFICATION,
                                      REGRESSION,
                                      GRIDSEARCH_NUM_CV_FOLDS,
                                      GRIDSEARCH_CV_NUM_PARALLEL_JOBS,
                                      VERBOSE)
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import DecisionTreeRegressor

from sklearn.model_selection import GridSearchCV
import numpy as np


from sktime.classifiers.base import BaseClassifier
from sktime.regressors.base import BaseRegressor

class Decision_Tree_Classifier(BaseClassifier):
    """
    Wrapper for `sklearn Decision Tree Classifier `_.
    """
    def __init__(self, hyperparameters=None, 
                       n_jobs=GRIDSEARCH_CV_NUM_PARALLEL_JOBS, 
                       cv=GRIDSEARCH_NUM_CV_FOLDS):
        self.fitted_classifier = None
        self.n_jobs = n_jobs
        self.cv = cv
        if hyperparameters is None:
            self.hyperparameters = {"max_depth": [10,100, None],
                            "criterion": ['gini', 'entropy'],
                            "max_features": ['auto', 'sqrt','log2'],
                            "min_samples_leaf":np.arange(1,11)}
        else:
            self.hyperparameters=hyperparameters
github alan-turing-institute / mlaut / mlaut / estimators / cluster_estimators.py View on Github external
import numpy as np
from sklearn import neighbors
from sklearn.model_selection import GridSearchCV
from mlaut.shared.static_variables import GRIDSEARCH_CV_NUM_PARALLEL_JOBS, GRIDSEARCH_NUM_CV_FOLDS
from sktime.classifiers.base import BaseClassifier

class K_Neighbours(BaseClassifier):
    """
    Wrapper for `sklearn KNeighbours classifier `_.
    """
    def __init__(self, 
                 hyperparameters={
                    'n_neighbors': np.arange(1,31),
                    'p': [1, 2]
                    }, 
                    n_jobs=GRIDSEARCH_CV_NUM_PARALLEL_JOBS, 
                    cv=GRIDSEARCH_NUM_CV_FOLDS):
        self.fitted_classifier = None
        self.n_jobs = n_jobs
        self.cv = cv
        
        self.hyperparameters=hyperparameters
github alan-turing-institute / sktime / sktime / classifiers / distance_based / proximity_forest.py View on Github external
indices = np.ravel(indices)
                sub_tree = self.branches[index]
                if sub_tree is None:
                    sub_distribution = np.zeros((1, n_classes))
                    class_label = self.stump.y_exemplar[index]
                    sub_distribution[0][class_label] = 1
                else:
                    sub_X = X.iloc[indices, :]
                    sub_distribution = sub_tree.predict_proba(sub_X)
                assert sub_distribution.shape[1] == n_classes
                np.add.at(distribution, indices, sub_distribution)
        normalize(distribution, copy=False, norm='l1')
        return distribution


class ProximityForest(BaseClassifier):

    """
    Proximity Forest class to model a decision tree forest which uses distance measures to
    partition data.

@article{lucas19proximity,

  title={Proximity Forest: an effective and scalable distance-based classifier for time series},
  author={B. Lucas and A. Shifaz and C. Pelletier and L. O’Neill and N. Zaidi and B. Goethals and F. Petitjean and G. Webb},
  journal={Data Mining and Knowledge Discovery},
  volume={33},
  number={3},
  pages={607--635},
  year={2019}
  }
github alan-turing-institute / mlaut / mlaut / estimators / ensemble_estimators.py View on Github external
from sklearn.model_selection import GridSearchCV
from mlaut.shared.static_variables import(GRIDSEARCH_NUM_CV_FOLDS,
                                      GRIDSEARCH_CV_NUM_PARALLEL_JOBS)
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import BaggingRegressor

from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor
import numpy as np
from sktime.classifiers.base import BaseClassifier
from sktime.regressors.base import BaseRegressor


class Random_Forest_Classifier(BaseClassifier):
    """
    Wrapper for `sklearn Random Forest Classifier `_.
    """
    def __init__(self, hyperparameters=None, 
                       n_jobs=GRIDSEARCH_CV_NUM_PARALLEL_JOBS, 
                       cv=GRIDSEARCH_NUM_CV_FOLDS):
        self.fitted_classifier = None
        self.n_jobs = n_jobs
        self.cv = cv
        if hyperparameters is None:
            self.hyperparameters = hyperparameters = {"max_depth": [10,100, None],
                        "max_features": ['auto', 'sqrt','log2', None],
                        "min_samples_split": [2, 3, 10],
                        "bootstrap": [True, False],
                        "criterion": ["gini", "entropy"],
                        "n_estimators": [10, 100, 200, 500]}
github alan-turing-institute / sktime / sktime / classifiers / shapelet_based / stc.py View on Github external
import numpy as np
import random
import sys
import pandas as pd
import time
import math

from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.utils.multiclass import class_distribution

from sktime.transformers.shapelets import ContractedShapeletTransform
from sktime.classifiers.base import BaseClassifier

class ShapeletTransformClassifier(BaseClassifier):

    """ Shapelet Transform Classifier
        Basic implementation along the lines of
    @article{hills14shapelet,
      title={Classification of time series by shapelet transformation},
      author={J. Hills  and  J. Lines and E. Baranauskas and J. Mapp and A. Bagnall},
      journal={Data Mining and Knowledge Discovery},
      volume={28},
      number={4},
      pages={851--881},
      year={2014}
    }
    but with some of the refinements presented in
    @article{bostrom17binary,
      author={A. Bostrom and A. Bagnall},
      title={Binary Shapelet Transform for Multiclass Time Series Classification},
github alan-turing-institute / sktime / sktime / model_selection.py View on Github external
def __init__(self, estimator, param_grid, scoring=None, fit_params=None,
                 n_jobs=None, iid='warn', refit=True, cv='warn', verbose=0,
                 pre_dispatch='2*n_jobs', error_score='raise-deprecating',
                 return_train_score="warn"):
        super().__init__(estimator, param_grid, scoring=None, fit_params=None,
                 n_jobs=None, iid='warn', refit=True, cv='warn', verbose=0,
                 pre_dispatch='2*n_jobs', error_score='raise-deprecating',
                 return_train_score="warn")
        if self.scoring is None:
            # using accuracy score as default for classifiers
            if isinstance(self.estimator, BaseClassifier):
                self.scoring = make_scorer(accuracy_score)
            # using mean squared error as default for regressors
            elif isinstance(self.estimator, BaseRegressor):
                self.scoring = make_scorer(mean_squared_error)