Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
[SVC(kernel='linear', random_state=42, probability=True)],
[NuSVC(kernel='linear', random_state=42)],
[NuSVC(kernel='linear', random_state=42, decision_function_shape='ovr')],
])
def test_explain_linear_binary(newsgroups_train_binary, clf):
assert_binary_linear_classifier_explained(newsgroups_train_binary, clf,
explain_prediction)
iris = datasets.load_iris()
X = iris.data
Y = iris.target
# 80% data to keep
hold_80 = np.random.rand(len(Y)) < 0.8
train, = np.where(hold_80)
# 20% test data
test, = np.where(hold_80 == False)
X_all = X[train]
Y_all = Y[train]
svc = svm.SVC(kernel='rbf')
svc.fit(X_all, Y_all)
print "Limited Label data example"
print "Test name\tprecision\trecall \tf1"
print "SVM 80.0pct\t%0.6f\t%0.6f\t%0.6f" %\
(precision_score(svc.predict(X[test]), Y[test]),\
recall_score(svc.predict(X[test]), Y[test]),\
f1_score(svc.predict(X[test]), Y[test]))
print "-------"
for num in [0.2, 0.3, 0.4, 1.0]:
lp = label_propagation.LabelPropagation()
hold_new = np.random.rand(len(train)) > num
train_new, = np.where(hold_new)
Y_dup = np.copy(Y_all)
Y_dup[train_new] = -1
def train_svm():
train_data = np.load(open('bottleneck_features_train.npy'))
nsamples, nx, ny, nz = train_data.shape
train_data = train_data.reshape((nsamples,nx*ny*nz)) # scikit-learn expects 2d num arrays for the training dataset for a fit function
train_labels = np.array(
[0] * (nb_train_samples / 2) + [1] * (nb_train_samples / 2))
clf = SVC(gamma='auto')
clf.fit(train_data, train_labels)
validation_data = np.load(open('bottleneck_features_validation.npy'))
nsamples, nx, ny, nz = validation_data.shape
validation_data = validation_data.reshape((nsamples,nx*ny*nz))
validation_labels = np.array(
[0] * (nb_validation_samples / 2) + [1] * (nb_validation_samples / 2))
accuracy = clf.score(validation_data, validation_labels)
print("SVM accuracy: %f" % accuracy)
img = io.imread(filename)
if(featureRepresentation == 'image'):
test_data[i] = img.flatten()
elif(featureRepresentation == 'pca'):
test_data[i] = decomposition.PCA(n_components=8).fit_transform(img.flatten())
elif(featureRepresentation == 'glcm'):
test_data[i] = Helper.get_textural_features(img, glcm_distance, glcm_isMultidirectional)
i = i + 1;
else:
(train_data, train_targets, test_data, expected) = Helper.unserialize(dataset_file)
# Perform build iterations
for i in tqdm.tqdm(range(0, iters)):
# Build Classifier
param_grid = {'C': [1e0, 5e0, 1e1, 5e1, 1e2], 'kernel': ['rbf', 'poly'] }
classifier = svm.SVC(kernel='rbf')#grid_search.GridSearchCV(svm.SVC(kernel='rbf', class_weight='balanced'), param_grid)
classifier.fit(train_data, train_targets)
# Get previous classifier and assess
serialized_classifier = Helper.unserialize(SVM_FILE)
if(serialized_classifier):
predictions = serialized_classifier.predict(test_data)
confusion_matrix = metrics.confusion_matrix(expected, predictions)
serialized_n_correct = confusion_matrix[0][0] + confusion_matrix[1][1]
predictions = classifier.predict(test_data)
confusion_matrix = metrics.confusion_matrix(expected, predictions)
n_correct = confusion_matrix[0][0] + confusion_matrix[1][1]
if(n_correct > serialized_n_correct):
Helper.serialize(SVM_FILE, classifier)
else:
Helper.serialize(SVM_FILE, classifier)
nifti_masker = NiftiMasker(mask_img=mask_filename, standardize=True)
func_filename = haxby_dataset.func[0]
# We give the nifti_masker a filename and retrieve a 2D array ready
# for machine learning with scikit-learn
fmri_masked = nifti_masker.fit_transform(func_filename)
# Restrict the classification to the face vs cat discrimination
fmri_masked = fmri_masked[condition_mask]
###########################################################################
# The decoding
# Here we use a Support Vector Classification, with a linear kernel
from sklearn.svm import SVC
svc = SVC(kernel='linear')
# And we run it
svc.fit(fmri_masked, target)
prediction = svc.predict(fmri_masked)
###########################################################################
# Compute prediction scores using cross-validation
from sklearn.cross_validation import KFold
cv = KFold(n=len(fmri_masked), n_folds=5)
cv_scores = []
for train, test in cv:
svc.fit(fmri_masked[train], target[train])
prediction = svc.predict(fmri_masked[test])
if args.TrainAll:
C_final = C_final/float(fold)
gamma_final = gamma_final/float(gamma_final)
print 'C_final: ', C_final
print 'gamma_final: ', gamma_final
X_final = data_preprocessing(X, data_preprocessing_method=data_preprocessing_method, output_path=args.output_dir+os.sep+class_data_num_str+'.iter'+str(args.i)+'.final'+'.metric.'+m)
if args.exhaustive:
C_final_range = np.logspace(np.log2(C_final)-1, np.log2(C_final)+1, 3, base=2)
g_final_range = np.logspace(np.log2(gamma_final)-1, np.log2(gamma_final)+1, 3, base=2)
print 'C_final_range: ', C_final_range
print 'g_final_range: ', g_final_range
for C in C_final_range:
for g in g_final_range:
clf_final = SVC(C=C , gamma=g, class_weight='balanced')
clf_final.fit(X_final, y)
# save model
np.save(args.output_dir+os.sep+class_data_num_str+'.iter'+str(args.i)+'.final'+'.metric.'+m+'.C_'+str(C)+'.g_'+str(g)+'.model', clf_final)
else:
clf_final = SVC(C=C_final , gamma=gamma_final, class_weight='balanced', probability=args.proba)
clf_final.fit(X_final, y)
# save model
np.save(args.output_dir+os.sep+class_data_num_str+'.iter'+str(args.i)+'.final'+'.metric.'+m+'.C_'+str(C_final)+'.g_'+str(gamma_final)+'.model', clf_final)
def _train_and_evaluate_svm_one_fold_(self, train_set, test_set):
"""
:param train_set: dict, {"fold_index":{"x":[],"x_ex":[]]}
:param test_set: a list of batches.
:return:
"""
train_feature = self.parameter.get("train_feature")
test_feature = self.parameter.get("test_feature")
clf = svm.SVC(decision_function_shape="ovo")
Xs = []
Ys = []
for fold in train_set.values():
Ys = Ys + list(np.argmax(fold['y'], axis=1))
if train_feature == "ex":
Xs = Xs + fold["x_ex"]
elif train_feature == "im":
Xs = Xs + fold["x_im"]
elif train_feature == "ex&im":
Xs = Xs + fold["x_ex_im"]
clf.fit(X=Xs, y=Ys)
# Test
IDs = test_set["consult_id"]
Ys = list(np.argmax(test_set['y'],axis=1))
Xs_ex = test_set["x_ex"]
class NuSVR(SklearnMixin, _NuSVR):
_cls = _NuSVR
__init__ = _NuSVR.__init__
class OneClassSVM(SklearnMixin, _OneClassSVM):
_cls = _OneClassSVM
__init__ = _OneClassSVM.__init__
class SVC(SklearnMixin, _SVC):
_cls = _SVC
__init__ = _SVC.__init__
class SVR(SklearnMixin, _SVR):
_cls = _SVR
__init__ = _SVR.__init__
def build_subsemble():
"""Build a subsemble with random partitions"""
sub = Subsemble(partitions=3, folds=2)
sub.add([SVC(), LogisticRegression()])
return sub
#CROSS VALIDATION
scores = []
loo = LeaveOneOut()
C_2d_range = range(200, 1200, 100)
gamma_2d_range = np.arange(0.0001, 0.01, 0.002)
#C_2d_range = [1e-2, 1, 1e2]
#gamma_2d_range = [1e-1, 1, 1e1]
best_index = None
c_values = []
gamma_values = []
index = 0
for C in C_2d_range:
for gamma in gamma_2d_range:
clf = svm.SVC(C=C, gamma=gamma)
predicted = model_selection.cross_val_predict(clf, features, labels, cv=loo)
score = accuracy_score(labels, predicted)
scores.append(score)
c_values.append(C)
gamma_values.append(gamma)
if best_index != None and scores[best_index] < score:
best_index = index
print('Best C: ' + str(C))
print('Best gamma: ' + str(gamma))
elif best_index == None:
best_index = index
print('Best C: ' + str(C))
print('Best gamma: ' + str(gamma))
index = index + 1
print('Current score is ' + str(score))