Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
"""
performs a random search on the NN meta algo to find the best params
:param inputs: pd.DataFrame chosen as input
:param outputs: pd.DataFrame chosen as output
:param iterations: Number of parameter settings that are sampled
:param save_model: boolean set to True if the model needs to be saved
:return: best meta_algo with parameters
:rtype: scikit learn RandomizedSearchCV object
"""
X, y, cols, original_cols = self._transform_data(inputs, outputs)
if self.meta_algo != 'NN':
raise KeyError(f'''meta algo {self.meta_algo} not supported for random search''')
parameter_space = config("random_search_params")
meta_algo = MLPRegressor(max_iter=200)
X_train, X_test, y_train, y_test \
= train_test_split(X, y, test_size=0.20, random_state=42)
X_train, X_test = self._scale_data(X_train, X_test, save_model)
meta_algo = RandomizedSearchCV(meta_algo, parameter_space,
n_iter=iterations, n_jobs=2)
meta_algo.fit(X_train, y_train)
if self.verbose >= 2:
self.logger.info(f'''Best parameters found: {meta_algo.best_estimator_}''')
return meta_algo
meta_algo_params = \
{'criterion': 'mse', 'max_depth': 100, 'max_features': 10}
if generate_data:
inputs, outputs, _ = self._generate_data()
else:
if csv_name is not None:
inputs, outputs = self._transform_from_csv(csv_name=csv_name)
if inputs is None or outputs is None:
raise NameError('''no inputs / outputs found: please enter a csv name or set generate_data to True''')
X, y, cols, original_cols = self._transform_data(inputs, outputs)
# we decide on a meta-algorithm
if self.meta_algo not in config('supported_meta_algos'):
raise KeyError(f'''meta algo {self.meta_algo} currently not supported''')
if self.meta_algo == 'RF':
meta_algo = RandomForestRegressor(**meta_algo_params)
if self.meta_algo == 'NN':
meta_algo = MLPRegressor(**meta_algo_params)
if self.verbose >= 2:
self.logger.info(f'''Fitting {self.meta_algo} to estimate training durations for model {self.algo}''')
# dividing into train/test
X_train, X_test, y_train, y_test = \
train_test_split(X, y, test_size=0.20, random_state=42)
if self.meta_algo == 'NN':
X_train_scaled, X_test_scaled = \
def _fetch_algo_metadata(algo):
"""
retrieves algo name, algo params and meta params from sklearn model
:param algo: sklearn model
:return: dictionary
:rtype: dict
"""
algo_name = type(algo).__name__
algo_params = algo.get_params()
params = config(algo_name)
param_dic = {'name': algo_name,
'params': algo_params, 'config': params}
return param_dic
def params(self):
"""
retrieves the estimated algorithm's parameters if the algo is supported
else, return KeyError
:return: dictionary
"""
if self.algo not in config("supported_algos"):
raise KeyError(f'''{self.algo} not currently supported by this package''')
return config(self.algo)
def params(self):
"""
retrieves the estimated algorithm's parameters if the algo is supported
else, return KeyError
:return: dictionary
"""
if self.algo not in config("supported_algos"):
raise KeyError(f'''{self.algo} not currently supported by this package''')
return config(self.algo)
import argparse
import numpy as np
from scitime._model import Model
from scitime._utils import config
SUPPORTED_META_ALGOS = config('supported_meta_algos')
SUPPORTED_ALGOS = config('supported_algos')
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='''Gather & Persist
Data of model training runtimes''')
parser.add_argument('--drop_rate', required=False, default=0.999,
help='''drop rate of number of data generated
(from all param combinations taken from _config.json).
Default is 0.999''')
parser.add_argument('--meta_algo', required=False,
choices=SUPPORTED_META_ALGOS, help='''meta algo used to
fit the meta model (NN or RF) - default is RF''')
parser.add_argument('--verbose', required=False, default=1,
import argparse
import numpy as np
from scitime._model import Model
from scitime._utils import config
SUPPORTED_META_ALGOS = config('supported_meta_algos')
SUPPORTED_ALGOS = config('supported_algos')
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='''Gather & Persist
Data of model training runtimes''')
parser.add_argument('--drop_rate', required=False, default=0.999,
help='''drop rate of number of data generated
(from all param combinations taken from _config.json).
Default is 0.999''')
parser.add_argument('--meta_algo', required=False,
choices=SUPPORTED_META_ALGOS, help='''meta algo used to
fit the meta model (NN or RF) - default is RF''')
parser.add_argument('--verbose', required=False, default=1,
help='verbose mode (0, 1, 2 or 3)')
"""
estimates the model's training time given that the fit starts
:param X: np.array of inputs to be trained
:param y: np.array of outputs to be trained
(set to None if unsupervised algo)
:param algo: algo whose runtime the user wants to predict
:return: predicted runtime,
low and high values of the confidence interval
:rtype: tuple
"""
# fetching sklearn model of the end user
param_dic = self._fetch_algo_metadata(algo)
algo_name = param_dic['name']
if algo_name not in config("supported_algos"):
raise NotImplementedError(f'''{algo_name} not currently supported by this package''')
if self.meta_algo not in config('supported_meta_algos'):
raise KeyError(f'''meta algo {self.meta_algo} currently not supported''')
if self.verbose >= 3:
self.logger.debug(f'''Fetching estimator: {self.meta_algo}_{algo_name}_estimator.pkl''')
model_path = f'''{get_path("models")}/{self.meta_algo}_{algo_name}_estimator.pkl'''
meta_estimator = joblib.load(model_path)
# retrieving all parameters of interest:
df = self._fetch_params(algo, X, y)
# Transforming the inputs:
:param X: np.array of inputs to be trained
:param y: np.array of outputs to be trained
(set to None if unsupervised algo)
:param algo: algo whose runtime the user wants to predict
:return: predicted runtime,
low and high values of the confidence interval
:rtype: tuple
"""
# fetching sklearn model of the end user
param_dic = self._fetch_algo_metadata(algo)
algo_name = param_dic['name']
if algo_name not in config("supported_algos"):
raise NotImplementedError(f'''{algo_name} not currently supported by this package''')
if self.meta_algo not in config('supported_meta_algos'):
raise KeyError(f'''meta algo {self.meta_algo} currently not supported''')
if self.verbose >= 3:
self.logger.debug(f'''Fetching estimator: {self.meta_algo}_{algo_name}_estimator.pkl''')
model_path = f'''{get_path("models")}/{self.meta_algo}_{algo_name}_estimator.pkl'''
meta_estimator = joblib.load(model_path)
# retrieving all parameters of interest:
df = self._fetch_params(algo, X, y)
# Transforming the inputs:
if self.meta_algo == 'NN':
meta_X = self._transform_params(algo, df, scaled=True)
else: