How to use sklearn - 10 common examples

To help you get started, we’ve selected a few sklearn examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github slinderman / pyhawkes / experiments / synthetic_comparison.py View on Github external
aucs['bfgs'] = roc_auc_score(A_true,
                                     bfgs_model.W.ravel())

    if sgd_model is not None:
        assert isinstance(sgd_model, DiscreteTimeStandardHawkesModel)
        aucs['sgd'] = roc_auc_score(A_true,
                                     sgd_model.W.ravel())

    if gibbs_samples is not None:
        # Compute ROC based on mean value of W_effective in second half of samples
        Weff_samples = np.array([s.weight_model.W_effective for s in gibbs_samples])
        N_samples    = Weff_samples.shape[0]
        offset       = N_samples // 2
        Weff_mean    = Weff_samples[offset:,:,:].mean(axis=0)

        aucs['gibbs'] = roc_auc_score(A_true, Weff_mean.ravel())

    if gibbs_ss_samples is not None:
        # Compute ROC based on mean value of W_effective in second half of samples
        Weff_samples = np.array([s.weight_model.W_effective for s in gibbs_ss_samples])
        N_samples    = Weff_samples.shape[0]
        offset       = N_samples // 2
        Weff_mean    = Weff_samples[offset:,:,:].mean(axis=0)

        aucs['gibbs_ss'] = roc_auc_score(A_true, Weff_mean.ravel())

    if vb_models is not None:
        # Compute ROC based on E[A] under variational posterior
        aucs['vb'] = roc_auc_score(A_true,
                                   vb_models[-1].weight_model.expected_A().ravel())

    if svi_models is not None:
github mne-tools / mne-python / examples / realtime / offline_testing / test_pipeline.py View on Github external
y = np.concatenate(y)

from sklearn import preprocessing
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.cross_validation import ShuffleSplit

cv = ShuffleSplit(len(y), 10, test_size=0.2)

pipe = True  # use pipeline?

for train_idx, test_idx in cv:
    y_train, y_test = y[train_idx], y[test_idx]

    # define transformer objects
    scaler = preprocessing.StandardScaler()
    concatenator = ConcatenateChannels()
    clf = SVC(C=1, kernel='linear')

    if pipe is not True:

        # Concatenate channels
        concatenator = concatenator.fit(X[train_idx, :, :], y_train)
        X_train = concatenator.transform(X[train_idx, :, :])

        # Scale data across trials
        X_train = scaler.fit_transform(X_train)

        X_test = concatenator.transform(X[test_idx, :, :])
        X_test = scaler.fit_transform(X_test)

        clf = clf.fit(X_train, y_train)
github onnx / sklearn-onnx / tests / test_sklearn_pipeline.py View on Github external
def test_combine_inputs_floats_ints(self):
        data = [[0, 0.0], [0, 0.0], [1, 1.0], [1, 1.0]]
        scaler = StandardScaler()
        scaler.fit(data)
        model = Pipeline([("scaler1", scaler), ("scaler2", scaler)])

        model_onnx = convert_sklearn(
            model,
            "pipeline",
            [
                ("input1", Int64TensorType([None, 1])),
                ("input2", FloatTensorType([None, 1])),
            ],
        )
        self.assertTrue(len(model_onnx.graph.node[-1].output) == 1)
        self.assertTrue(model_onnx is not None)
        data = numpy.array(data)
        data = {
            "input1": data[:, 0].reshape((-1, 1)).astype(numpy.int64),
github vecxoz / vecstack / tests / test_sklearn_api_regression.py View on Github external
def test_custom_metric_and_scores_1_estimator(self):

        model = LinearRegression()
        scorer = make_scorer(mean_squared_error)
        scores_1 = cross_val_score(model, X_train, y=y_train,
                                   cv=n_folds, scoring=scorer,
                                   n_jobs=1, verbose=0)
        
        # fit then transform
        estimators = [('lr', LinearRegression())]
        stack = StackingTransformer(estimators, regression=True,
                                    metric=mean_squared_error,
                                    n_folds=n_folds, shuffle=False,
                                    variant='B', random_state=0,
                                    verbose=0)
        stack = stack.fit(X_train, y_train)
        scores_2 = stack.scores_[0].copy()
        
        # mean and std
        mean_1 = np.mean(scores_1)
github rbaravalle / imfractal / tests / test_classifier_sea.py View on Github external
tr = args.transform[0]
    if tr in transformation_values:
        transform_str = tr
        if args.equalize[0] in true_values:
            transform_str += '_eq'
    else:
        transform_str = "no_transform"
    dfs_str = str(args.dfs[0])

    clf = cfr.fit(X_train, y_train)
    outdir_clf=filename_model_RF_tt.format(resol, transform_str, dfs_str)
    if not os.path.exists(model_directory):
        os.makedirs(model_directory)
    print "Saving classifier...",  outdir_clf
    joblib.dump(clf, outdir_clf) 
        
    clf2 = cfr2.fit(X_train, y_train)
    outdir_clf2=filename_model_SVC_tt.format(resol, transform_str, dfs_str)
    print "Saving classifier...", outdir_clf2
    joblib.dump(clf2, outdir_clf2)
    
    print "RF, SVM: " + str( round(clf.score(X_test, y_test), 4) ) + " " + str( round(clf2.score(X_test, y_test), 4) )
github onnx / sklearn-onnx / tests / test_algebra_onnx_operator_mixin_syntax.py View on Github external
def test_way4_mixin_fit(self):

        X = np.arange(20).reshape(10, 2)
        try:
            tr = wrap_as_onnx_mixin(KMeans(n_clusters=2))
        except KeyError as e:
            assert "SklearnGaussianProcessRegressor" in str(e)
            return
        tr.fit(X)

        onx = tr.to_onnx(X.astype(np.float32))

        dump_data_and_model(
            X.astype(np.float32), tr, onx,
            basename="MixinWay4OnnxMixin2")
github TeamHG-Memex / eli5 / tests / test_sklearn_explain_prediction.py View on Github external
    [SVC(kernel='linear', random_state=42, probability=True)],
    [NuSVC(kernel='linear', random_state=42)],
    [NuSVC(kernel='linear', random_state=42, decision_function_shape='ovr')],
])
def test_explain_linear_binary(newsgroups_train_binary, clf):
    assert_binary_linear_classifier_explained(newsgroups_train_binary, clf,
                                              explain_prediction)
github lyst / lightfm / tests / test_evaluation.py View on Github external
for user_id, row in enumerate(ground_truth):
        uid_array = np.empty(no_items, dtype=np.int32)
        uid_array.fill(user_id)
        predictions = model.predict(uid_array, pid_array,
                                    user_features=user_features,
                                    item_features=item_features,
                                    num_threads=4)

        true_pids = row.indices[row.data == 1]

        grnd = np.zeros(no_items, dtype=np.int32)
        grnd[true_pids] = 1

        if len(true_pids):
            scores.append(roc_auc_score(grnd, predictions))

    return scores
github jaredthecoder / BioPy / neuralnetworks / backpropagation / run_tests.py View on Github external
logger.info("###################################RUNNING EXPERIMENT NUM %s#########################", str(experiment_number))
    logger.info("Program Arguments:")
    args_dict = vars(args)
    for key, value in args_dict.iteritems() :
        logger.info("%s=%s" % (str(key), str(value)))

    test_suite = Tests(logger, args)
    target_test, Y_pred, cost_list, cost_test_list, learning_rates, rmse = test_suite.run_tests()

    Y_pred_copy = np.copy(Y_pred)
    accuracy_score_Y_pred =  np.rint(Y_pred_copy).astype(int)

    if args.test_type != 'f':
        logger.info('###################################Accuracy Results###############################')
        logger.info('Accuracy: ' + str(accuracy_score(target_test, accuracy_score_Y_pred)))
        logger.info('\n' + str(classification_report(target_test, accuracy_score_Y_pred)))
    else:
        logger.info('###################################Accuracy Results###############################')

        target_test_1d = target_test.ravel()
        Y_pred_1d = Y_pred.ravel()
        distance = 0

        for i in range(len(target_test_1d)):
            distance += abs(Y_pred_1d[i] - target_test_1d[i])

        avg_distance = distance / len(target_test_1d)
        logger.info("Accuracy Score: %s" % (str(avg_distance)))
        logger.info("NOTE: Accuracy Score is avg. distance between expected and predicted y-values")
        logger.info("NOTE: Computed using the following code:")
        logger.info("for i in range(len(target_test_1d)):")
        logger.info("\tdistance += abs(Y_pred_1d[i] - target_test_1d[i])")
github PUTvision / decision_tree / decision_trees / dataset_tester.py View on Github external
def _report_classifier(clf, expected: np.ndarray, predicted: np.ndarray):
    print("Detailed classification report:")

    print("Classification report for classifier %s:\n%s\n"
          % (clf, metrics.classification_report(expected, predicted)))
    cm = metrics.confusion_matrix(expected, predicted)
    cm = cm / cm.sum(axis=1)[:, None] * 100

    #np.set_printoptions(formatter={'float': '{: 2.2f}'.format})
    print(f"Confusion matrix:\n {cm}")

    f1_score = metrics.f1_score(expected, predicted, average='weighted')
    precision = metrics.precision_score(expected, predicted, average='weighted')
    recall = metrics.recall_score(expected, predicted, average='weighted')
    accuracy = metrics.accuracy_score(expected, predicted)
    print(f"f1_score: {f1_score:{2}.{4}}")
    print(f"precision: {precision:{2}.{4}}")
    print(f"recall: {recall:{2}.{4}}")
    print(f"accuracy: {accuracy:{2}.{4}}")