How to use the sklearn.externals.joblib.load function in sklearn

To help you get started, we’ve selected a few sklearn examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github AnthonyMRios / leml / example / example_parallel.py View on Github external
def main():
    print 'Loading data'
    sys.stdout.flush()
    X = joblib.load('./test_data/bibtex-train.pkl')
    labels = joblib.load('./test_data/bibtex-Y-train.pkl')
    X_test = joblib.load('./test_data/bibtex-test.pkl')
    labels_test = joblib.load('./test_data/bibtex-Y-test.pkl')
    print X.shape, labels.shape, X.getformat(), labels.getformat()

    print 'Training LEML'
    sys.stdout.flush()
    t0 = time()
    leml = LEML.get_instance('parallel', num_factors=64, num_iterations=25, reg_param=1., verbose=True)
    leml.fit(X.tocsc(), labels.tocsc().astype('float'))
    print 'Train time', time() - t0, 'seconds'
    sys.stdout.flush()
    preds = leml.predict(X_test)
    preds_top_k = preds.argsort()[:,::-1]
    preds_top_k = preds_top_k[:,:1]
    new_preds = np.zeros((preds.shape[0], preds.shape[1]))
    new_preds[np.arange(preds.shape[0]).repeat(1),preds_top_k.flatten()] = 1
github YCG09 / tf-text-classification / eval.py View on Github external
# Evaluate the model
            print("Start evaluating...\n")
            y_logits = []
            start = time.time()
            data_size = len(x_eval)
            # Generate eval batches
            eval_batches = data_helpers.batch_iter(x_eval, FLAGS.batch_size, shuffle=False)
            for x_batch in eval_batches:
                feed_dict[model.input_x] = x_batch
                if FLAGS.model_type == 'RNN':
                    feed_dict[model.seq_len] = data_helpers.real_len(x_batch)
                batch_predictions = sess.run(model.logits, feed_dict=feed_dict)
                y_logits.extend(batch_predictions)
            print("Mission complete, total number of eval examples: {}, evaluating speed: {:.0f} examples/sec\n".format(
                data_size, data_size / (time.time() - start)))
            label_transformer = joblib.load(os.path.join(root_dir, 'label_transformer.pkl'))
            y_logits_original = label_transformer.inverse_transform(np.array(y_logits))

            # Print accuracy if eval examples have label
            if FLAGS.has_label == True:
                df = pd.DataFrame([line.strip().split("\t") for line in open(FLAGS.eval_data, 'r', encoding='utf-8').readlines()
                    if len(line.strip().split("\t")) == 2], columns=['content', 'real_label'])
                y_eval_original = label_transformer.inverse_transform(y_eval)
                eval_accuracy = sum(y_logits_original == y_eval_original) / data_size
                print("Evaluating Accuracy: {:.3f}\n".format(eval_accuracy))
                print("Precision, Recall and F1-Score:\n\n", classification_report(y_eval_original, y_logits_original))
            else:
                df = pd.DataFrame([line.strip() for line in open(FLAGS.eval_data, 'r', encoding='utf-8').readlines()
                    if line.strip()], columns=['content'])

            # Save prediction result
            timestamp = str(int(time.time()))
github alzmcr / kaggle-Fast-Iron / train_and_predict.py View on Github external
compute_importances = True, random_state=7354)
if trainRF_models:
    print "fitting random forest regressor"
    init_time = time.time()
    rf1.fit(train_fea.drop(colToDropRF1, axis=1), train)
    print "RF1 done - elapsed time"+str((time.time() - init_time) / 60)
    init_time = time.time()
    rf2.fit(train_fea.drop(colToDropRF2, axis=1), train)
    print "RF2 done - elapsed time"+str((time.time() - init_time) / 60)
    ## DUMP MODELS
    if dumpModels:
        joblib.dump(rf1,'Models\\rf1_final.pk1')
        joblib.dump(rf2,'Models\\rf2_final.pk1')
else:
    print "loading serialized model - RF"
    rf1 = joblib.load('Models\\rf1_final.pk1')
    rf2 = joblib.load('Models\\rf2_final.pk1')    

### GRADIENT BOOSTING REGRESSORS - set trainGB_models for switch training on\off
gb1 = GradientBoostingRegressor(n_estimators=400,max_depth=8, random_state=9874, loss='huber')
gb2 = GradientBoostingRegressor(n_estimators=400,max_depth=8, random_state=9874, loss='huber')

if trainGB_models:
    print "fitting gradient boosting regressor"
    init_time = time.time()
    gb1.fit(train_fea.drop(colToDropGB1, axis=1), train)
    print "GB1 done - elapsed time"+str((time.time() - init_time) / 60)
    init_time = time.time()
    gb2.fit(train_fea.drop(colToDropGB2, axis=1), train)
    print "GB2 done - elapsed time"+str((time.time() - init_time) / 60)
    ## DUMP MODELS
    if dumpModels:
github xiangzhemeng / Kaggle-Twitter-Sentiment-Analysis / cnn3.py View on Github external
import numpy as np
import csv
from keras.models import Sequential
from keras.layers import Dense
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.layers import LSTM
from sklearn.externals import joblib

train_sequences = np.array(joblib.load('train_features_FT_50.sav'))
test_sequences = np.array(joblib.load('test_feature_FT_50.sav'))
y = np.array(joblib.load('train_labels.sav'))

# CNN model
model = Sequential()
model.add(Dense(50, input_dim=50))
model.add(Conv1D(padding="same", kernel_size=3, filters=32, activation="relu"))
model.add(MaxPooling1D(pool_size=2))
model.add(LSTM(100))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
print("Build model finished!")

model.fit(train_sequences, y, validation_split=0.1, epochs=1, batch_size=128, verbose=1, shuffle=True)
print("Fit model finished!")
github neptune-ml / steppy-toolkit / toolkit / sklearn_transformers / models.py View on Github external
def load(self, filepath):
        self.estimator = joblib.load(filepath)
        return self
github uhh-lt / sensegram / vector_representations / sparse_word_vectors.py View on Github external
def load(self, lmi_fpath):
        """ Load a pre-built model from numpy files. """

        matrix_fpath = lmi_fpath + self.MATRIX_EXT
        vectorizer_fpath = lmi_fpath + self.VECTORIZER_EXT
        word2idx_fpath = lmi_fpath + self.WORD2IDX_EXT

        if exists(matrix_fpath) and exists(vectorizer_fpath) and exists(word2idx_fpath):
            word_vectors = joblib.load(matrix_fpath)
            vectorizer = joblib.load(vectorizer_fpath)
            word2idx = joblib.load(word2idx_fpath)
            print("Loaded word vectors from:", lmi_fpath)
        else:
            print("Some input files are missing. Cannot load the model.")
            print(exists(matrix_fpath), matrix_fpath)
            print(exists(vectorizer_fpath), vectorizer_fpath)
            print(exists(word2idx_fpath), word2idx_fpath)
            word_vectors = None
            vectorizer = None
            word2idx = None

        return word_vectors, vectorizer, word2idx
github minerva-ml / open-solution-home-credit / feature_extraction.py View on Github external
def load(self, filepath):
        self.categorical_encoder = joblib.load(filepath)
        return self
github gorlins / salted / salted_demo.py View on Github external
def run(self):

        with self.input().open() as f:
            svc = joblib.load(f)

        digits = load_digits()
        predictions = svc.predict(digits.data[1::2])
        with self.output().open('w') as f:
            f.write('Accuracy: {}'.format(
                (predictions == digits.target[1::2]).mean()
            ))
github agr-ayush / Landsat-Time-Series-Analysis-for-Multi-Temporal-Land-Cover-Classification / GUI.py View on Github external
def loadmodel(test_file):
    print("Loading Model........................................................")
    if v.get()==0:
        rf = joblib.load('DefaultModel.pkl') 
    else:
        rf = joblib.load(str(directoryname + '\LandCoverModel.pkl'))
    print("Predicting...........................................................")
    if v.get()==0:
        predict_arr = gdal.Open(str(test_file + "\\stacked_image.tif"))
    else:
        predict_arr = gdal.Open(str(test_file + "\\stacked_image1.tif"))
    img_predict = predict_arr.ReadAsArray()
    img_predict = img_predict.T
    B = []
    count=0
    for i in range(len(img_predict[1])):
        class_predict = rf.predict(img_predict[:,i,:])
        B.append(class_predict)
    B = np.array(B)
    a,b = B.shape
github intel / Resilient-ML-Research-Platform / atdml / tasks / ml / predict_single_file_pattern.py View on Github external
out_f.close()
        print "INFO: End Feature list ===================================="
    
    predict_val=None
    sing_label_pred=None
    
    
    #print "lib_mode=",lib_mode
    # fit model here =============================================== FIT ===============================
    if lib_mode == "scikit": #"SKlean":
        print "INFO: Predict by sklearn library ***"
        # get the ML model
        model_file  = os.path.join(local_out_dir , row_id_str + '_model/' + row_id_str + '.pkl')

        # load clf from model file
        sk_model = joblib.load(model_file)
        clf_coef_arr=None
        intercept_arr=None
        
        print "INFO: clf=",sk_model
        #print "sk_model __class__=",sk_model.__class__," __name__=",sk_model.__class__.__name__

        try:
            if type(sk_model) in ( classes.SVC , classes.NuSVC) :# svm didn't have coef_
                col_num=sk_model.support_vectors_.shape[1]
            elif learning_algorithm in ('kmeans') :
                print "INFO: Kmeans cluster_centers_ =", sk_model.cluster_centers_ 
                # to convert sample to np array
                col_num=dic_len
            else: #linear only
                col_num = len(sk_model.coef_[0])
                clf_coef_arr=sk_model.coef_[0]