Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def main():
print 'Loading data'
sys.stdout.flush()
X = joblib.load('./test_data/bibtex-train.pkl')
labels = joblib.load('./test_data/bibtex-Y-train.pkl')
X_test = joblib.load('./test_data/bibtex-test.pkl')
labels_test = joblib.load('./test_data/bibtex-Y-test.pkl')
print X.shape, labels.shape, X.getformat(), labels.getformat()
print 'Training LEML'
sys.stdout.flush()
t0 = time()
leml = LEML.get_instance('parallel', num_factors=64, num_iterations=25, reg_param=1., verbose=True)
leml.fit(X.tocsc(), labels.tocsc().astype('float'))
print 'Train time', time() - t0, 'seconds'
sys.stdout.flush()
preds = leml.predict(X_test)
preds_top_k = preds.argsort()[:,::-1]
preds_top_k = preds_top_k[:,:1]
new_preds = np.zeros((preds.shape[0], preds.shape[1]))
new_preds[np.arange(preds.shape[0]).repeat(1),preds_top_k.flatten()] = 1
# Evaluate the model
print("Start evaluating...\n")
y_logits = []
start = time.time()
data_size = len(x_eval)
# Generate eval batches
eval_batches = data_helpers.batch_iter(x_eval, FLAGS.batch_size, shuffle=False)
for x_batch in eval_batches:
feed_dict[model.input_x] = x_batch
if FLAGS.model_type == 'RNN':
feed_dict[model.seq_len] = data_helpers.real_len(x_batch)
batch_predictions = sess.run(model.logits, feed_dict=feed_dict)
y_logits.extend(batch_predictions)
print("Mission complete, total number of eval examples: {}, evaluating speed: {:.0f} examples/sec\n".format(
data_size, data_size / (time.time() - start)))
label_transformer = joblib.load(os.path.join(root_dir, 'label_transformer.pkl'))
y_logits_original = label_transformer.inverse_transform(np.array(y_logits))
# Print accuracy if eval examples have label
if FLAGS.has_label == True:
df = pd.DataFrame([line.strip().split("\t") for line in open(FLAGS.eval_data, 'r', encoding='utf-8').readlines()
if len(line.strip().split("\t")) == 2], columns=['content', 'real_label'])
y_eval_original = label_transformer.inverse_transform(y_eval)
eval_accuracy = sum(y_logits_original == y_eval_original) / data_size
print("Evaluating Accuracy: {:.3f}\n".format(eval_accuracy))
print("Precision, Recall and F1-Score:\n\n", classification_report(y_eval_original, y_logits_original))
else:
df = pd.DataFrame([line.strip() for line in open(FLAGS.eval_data, 'r', encoding='utf-8').readlines()
if line.strip()], columns=['content'])
# Save prediction result
timestamp = str(int(time.time()))
compute_importances = True, random_state=7354)
if trainRF_models:
print "fitting random forest regressor"
init_time = time.time()
rf1.fit(train_fea.drop(colToDropRF1, axis=1), train)
print "RF1 done - elapsed time"+str((time.time() - init_time) / 60)
init_time = time.time()
rf2.fit(train_fea.drop(colToDropRF2, axis=1), train)
print "RF2 done - elapsed time"+str((time.time() - init_time) / 60)
## DUMP MODELS
if dumpModels:
joblib.dump(rf1,'Models\\rf1_final.pk1')
joblib.dump(rf2,'Models\\rf2_final.pk1')
else:
print "loading serialized model - RF"
rf1 = joblib.load('Models\\rf1_final.pk1')
rf2 = joblib.load('Models\\rf2_final.pk1')
### GRADIENT BOOSTING REGRESSORS - set trainGB_models for switch training on\off
gb1 = GradientBoostingRegressor(n_estimators=400,max_depth=8, random_state=9874, loss='huber')
gb2 = GradientBoostingRegressor(n_estimators=400,max_depth=8, random_state=9874, loss='huber')
if trainGB_models:
print "fitting gradient boosting regressor"
init_time = time.time()
gb1.fit(train_fea.drop(colToDropGB1, axis=1), train)
print "GB1 done - elapsed time"+str((time.time() - init_time) / 60)
init_time = time.time()
gb2.fit(train_fea.drop(colToDropGB2, axis=1), train)
print "GB2 done - elapsed time"+str((time.time() - init_time) / 60)
## DUMP MODELS
if dumpModels:
import numpy as np
import csv
from keras.models import Sequential
from keras.layers import Dense
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.layers import LSTM
from sklearn.externals import joblib
train_sequences = np.array(joblib.load('train_features_FT_50.sav'))
test_sequences = np.array(joblib.load('test_feature_FT_50.sav'))
y = np.array(joblib.load('train_labels.sav'))
# CNN model
model = Sequential()
model.add(Dense(50, input_dim=50))
model.add(Conv1D(padding="same", kernel_size=3, filters=32, activation="relu"))
model.add(MaxPooling1D(pool_size=2))
model.add(LSTM(100))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
print("Build model finished!")
model.fit(train_sequences, y, validation_split=0.1, epochs=1, batch_size=128, verbose=1, shuffle=True)
print("Fit model finished!")
def load(self, filepath):
self.estimator = joblib.load(filepath)
return self
def load(self, lmi_fpath):
""" Load a pre-built model from numpy files. """
matrix_fpath = lmi_fpath + self.MATRIX_EXT
vectorizer_fpath = lmi_fpath + self.VECTORIZER_EXT
word2idx_fpath = lmi_fpath + self.WORD2IDX_EXT
if exists(matrix_fpath) and exists(vectorizer_fpath) and exists(word2idx_fpath):
word_vectors = joblib.load(matrix_fpath)
vectorizer = joblib.load(vectorizer_fpath)
word2idx = joblib.load(word2idx_fpath)
print("Loaded word vectors from:", lmi_fpath)
else:
print("Some input files are missing. Cannot load the model.")
print(exists(matrix_fpath), matrix_fpath)
print(exists(vectorizer_fpath), vectorizer_fpath)
print(exists(word2idx_fpath), word2idx_fpath)
word_vectors = None
vectorizer = None
word2idx = None
return word_vectors, vectorizer, word2idx
def load(self, filepath):
self.categorical_encoder = joblib.load(filepath)
return self
def run(self):
with self.input().open() as f:
svc = joblib.load(f)
digits = load_digits()
predictions = svc.predict(digits.data[1::2])
with self.output().open('w') as f:
f.write('Accuracy: {}'.format(
(predictions == digits.target[1::2]).mean()
))
def loadmodel(test_file):
print("Loading Model........................................................")
if v.get()==0:
rf = joblib.load('DefaultModel.pkl')
else:
rf = joblib.load(str(directoryname + '\LandCoverModel.pkl'))
print("Predicting...........................................................")
if v.get()==0:
predict_arr = gdal.Open(str(test_file + "\\stacked_image.tif"))
else:
predict_arr = gdal.Open(str(test_file + "\\stacked_image1.tif"))
img_predict = predict_arr.ReadAsArray()
img_predict = img_predict.T
B = []
count=0
for i in range(len(img_predict[1])):
class_predict = rf.predict(img_predict[:,i,:])
B.append(class_predict)
B = np.array(B)
a,b = B.shape
out_f.close()
print "INFO: End Feature list ===================================="
predict_val=None
sing_label_pred=None
#print "lib_mode=",lib_mode
# fit model here =============================================== FIT ===============================
if lib_mode == "scikit": #"SKlean":
print "INFO: Predict by sklearn library ***"
# get the ML model
model_file = os.path.join(local_out_dir , row_id_str + '_model/' + row_id_str + '.pkl')
# load clf from model file
sk_model = joblib.load(model_file)
clf_coef_arr=None
intercept_arr=None
print "INFO: clf=",sk_model
#print "sk_model __class__=",sk_model.__class__," __name__=",sk_model.__class__.__name__
try:
if type(sk_model) in ( classes.SVC , classes.NuSVC) :# svm didn't have coef_
col_num=sk_model.support_vectors_.shape[1]
elif learning_algorithm in ('kmeans') :
print "INFO: Kmeans cluster_centers_ =", sk_model.cluster_centers_
# to convert sample to np array
col_num=dic_len
else: #linear only
col_num = len(sk_model.coef_[0])
clf_coef_arr=sk_model.coef_[0]