Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_custom_metric_and_scores_1_estimator(self):
model = LinearRegression()
scorer = make_scorer(mean_squared_error)
scores_1 = cross_val_score(model, X_train, y=y_train,
cv=n_folds, scoring=scorer,
n_jobs=1, verbose=0)
# fit then transform
estimators = [('lr', LinearRegression())]
stack = StackingTransformer(estimators, regression=True,
metric=mean_squared_error,
n_folds=n_folds, shuffle=False,
variant='B', random_state=0,
verbose=0)
stack = stack.fit(X_train, y_train)
scores_2 = stack.scores_[0].copy()
# mean and std
mean_1 = np.mean(scores_1)
# Train model
ls.fit(X=x, y=y)
# Get prediction results
result = []
for row in tX:
if len(row) == 1:
row = row.reshape(-1, 1)
elif len(row) > 1:
row = row.reshape(1, -1)
result.append(ls.predict(row)[0])
# Analyze performance
print "Performance"
print "-----------"
print "Root Mean Squared Error", mean_squared_error(tY, np.array(result)) ** 0.5
print "Mean Absolute Error", mean_absolute_error(tY, np.array(result))
# Dump pickle files
joblib.dump(df_mapper, "mapper.pkl", compress = 3)
joblib.dump(ls, "estimator.pkl", compress = 3)
# Build pmml
command = "java -jar converter-executable-1.1-SNAPSHOT.jar --pkl-mapper-input mapper.pkl --pkl-estimator-input estimator.pkl --pmml-output mapper-estimator.pmml"
os.system(command)
F0min=np.min(F0nz)
F0skew=st.skew(F0nz)
F0kurt=st.kurtosis(F0nz)
tilt=[]
mse=[]
F0nzlist=np.split(F0, np.where(F0 == 0)[0]+1)
F0nzlist=[F0nzlist[j] for j in range(len(F0nzlist) ) if len(F0nzlist[j])>1]
F0nzlist=[F0nzlist[j][:-1] for j in range(len(F0nzlist) )]
for k in range(len(F0nzlist)):
if len(F0nzlist[k])>1:
t=np.arange(len(F0nzlist[k]))*tsf0
pol=np.polyfit(t, F0nzlist[k],1)
if not np.isnan(pol[0]):
tilt.append(pol[0])
f0rec=t*pol[0]+pol[1]
msef0t=mean_squared_error(F0nzlist[k],f0rec)
mse.append(msef0t)
tiltmean=np.mean(np.asarray(tilt))
msemean=np.mean(np.asarray(mse))
tiltstd=np.std(np.asarray(tilt))
msestd=np.std(np.asarray(mse))
tiltmax=np.max(np.asarray(tilt))
msemax=np.max(np.asarray(mse))
tiltmin=np.min(np.asarray(tilt))
msemin=np.min(np.asarray(mse))
tiltskw=st.skew(np.asarray(tilt))
mseskw=st.skew(np.asarray(mse))
tiltku=st.kurtosis(np.asarray(tilt))
mseku=st.kurtosis(np.asarray(mse))
f01mean=np.mean(F0nzlist[0])
def performAdaBoostReg(train, test, features, output):
"""
Ada Boost Regression
"""
clf = AdaBoostRegressor()
clf.fit(train[features], train[output])
Predicted = clf.predict(test[features])
plt.plot(test[output])
plt.plot(Predicted, color='red')
plt.show()
return mean_squared_error(test[output],Predicted), r2_score(test[output], Predicted)
def root_mean_squared_error(*args, **kwargs):
import sklearn.metrics
return math.sqrt(sklearn.metrics.mean_squared_error(*args, **kwargs))
def internal_mse_wrapper(y_true, y_pred, sample_weight=None):
return skm.mean_squared_error(y_true, y_pred,
multioutput=multioutput,
sample_weight=sample_weight)
def evaluate(Y_test, predictions, Y_test_inv_scaled, predictions_inv_scaled):
rmse = (mean_squared_error(Y_test, predictions) ** 0.5)
print('\nNormalized RMSE: %.3f' %(rmse))
nrmse = ((mean_squared_error(Y_test, predictions) ** 0.5))/np.mean(Y_test)
print('Normalized NRMSE: %.3f' %(nrmse))
mae = mean_absolute_error(Y_test, predictions)
print('Normalized MAE: %.3f' %(mae))
mape = compute_mape(Y_test, predictions)
print('Normalized MAPE: %.3f' %(mape))
correlation = np.corrcoef(Y_test.T, predictions.T)
print("Normalized Correlation: %.3f"%(correlation[0, 1]))
r2 = r2_score(Y_test, predictions)
print("Normalized r^2: %.3f"%(r2))
normalized_metrics = [rmse, nrmse, mae, mape, correlation[0, 1], r2]
#evaluating the model on the inverse-normalized dataset
rmse = (mean_squared_error(Y_test_inv_scaled, predictions_inv_scaled) ** 0.5)
print('\nInverse-Normalized Outsample RMSE: %.3f' %(rmse))
def _compute_mse(self, target_data, eval_data):
""" Computes the mean squared error error measure
Args:
target_data ([type]): the target data
eval_data ([type]): the evaluated data
Returns:
double -- the error
"""
from sklearn.metrics import mean_squared_error
return mean_squared_error(target_data, eval_data)
# save model
joblib.dump(model, 'models/model.joblib')
joblib.dump(column_order, 'models/column_order.joblib')
if settings.SHOULD_USE_MLFLOW == 'true':
# log training run to mlflow
mlflow.set_tracking_uri(uri=f'http://{settings.MLFLOW_IP}:5000')
if settings.CI == 'true':
mlflow.set_experiment('CI')
else:
mlflow.set_experiment('dev')
with mlflow.start_run() as run:
# calculate evaluation metrics
y_test_pred = model.predict(x_test)
rmse = sqrt(metrics.mean_squared_error(y_true=y_test, y_pred=y_test_pred))
r2_score = metrics.r2_score(y_true=y_test, y_pred=y_test_pred)
# log hyperparameters to mlflow
mlflow.log_param('n_estimators', N_ESTIMATORS)
mlflow.log_param('max_depth', MAX_DEPTH)
# log metrics to mlflow
mlflow.log_metric("rmse_validation_data", rmse)
mlflow.log_metric("r2_score_validation_data", r2_score)
else:
print('Not logging training run because MLFlow tracking server is not up, or its URL is not set in train.py')
# run n_trials for self-supervised sweep
for i in range(args.n_trials):
umis_X, umis_Y = ut.split_molecules(umis, data_split, overlap, random_state)
umis_X = np.sqrt(umis_X)
umis_Y = np.sqrt(umis_Y)
U, S, V = randomized_svd(umis_X, n_components=args.max_components)
US = U.dot(np.diag(S))
for j, k in enumerate(k_range):
pca_X = US[:, :k].dot(V[:k, :])
conv_exp = ut.convert_expectations(pca_X, data_split, data_split_complement)
rec_loss[i, j] = mean_squared_error(umis_X, pca_X)
mcv_loss[i, j] = mean_squared_error(umis_Y, conv_exp)
gt1_loss[i, j] = mean_squared_error(exp_split_means, conv_exp)
results = {
"dataset": dataset_name,
"method": "pca",
"loss": "mse",
"normalization": "sqrt",
"param_range": k_range,
"rec_loss": rec_loss,
"mcv_loss": mcv_loss,
"gt0_loss": gt0_loss,
"gt1_loss": gt1_loss,
}
with open(output_file, "wb") as out:
pickle.dump(results, out)