Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_pandas_integration(self):
"""
Test the ROCAUC with Pandas dataframe
"""
X, y = load_occupancy(return_dataset=True).to_pandas()
# Create train/test splits
splits = tts(X, y, test_size=0.2, random_state=4512)
X_train, X_test, y_train, y_test = splits
visualizer = ROCAUC(GaussianNB())
visualizer.fit(X_train, y_train)
visualizer.score(X_test, y_test)
# Compare the images
visualizer.finalize()
self.assert_images_similar(visualizer)
def test_pandas_integration(self):
"""
Test with Pandas DataFrame and Series input
"""
_, ax = plt.subplots()
# Load the occupancy dataset from fixtures
X, y = load_occupancy(return_dataset=True).to_pandas()
# Create train/test splits
splits = tts(X, y, test_size=0.2, random_state=8873)
X_train, X_test, y_train, y_test = splits
# Create confusion matrix
model = GaussianNB()
cm = ConfusionMatrix(model, ax=ax, classes=None)
cm.fit(X_train, y_train)
cm.score(X_test, y_test)
self.assert_images_similar(cm, tol=0.1)
# Ensure correct confusion matrix under the hood
npt.assert_array_equal(cm.confusion_matrix_, np.array([[3012, 114], [1, 985]]))
def test_pandas_integration(self):
"""
Test on a real dataset with pandas DataFrame and Series
"""
data = load_occupancy(return_dataset=True)
X, y = data.to_pandas()
# Use only the first 100 samples so the test will run faster
X_t = X[:100]
y_t = y[:100]
assert isinstance(X_t, pd.DataFrame)
assert isinstance(y_t, pd.Series)
cv = StratifiedKFold(n_splits=4, random_state=32)
oz = RFECV(RandomForestClassifier(random_state=83), cv=cv)
oz.fit(X_t, y_t)
oz.finalize()
self.assert_images_similar(oz, remove_legend=True)
def test_fit_class_labels_class_names_edge_case(self):
"""
Edge case that more class labels are defined than in datatset
"""
model = neighbors.KNeighborsClassifier(3)
viz = DecisionBoundariesVisualizer(
model, classes=['one', 'two', 'three', 'four', 'five'])
with pytest.raises(YellowbrickTypeError):
viz.fit(X_two_cols, y=y)
def test_binary_macro_error(self):
"""
Test ROCAUC to see if _binary_decision with macro = True raises an error
"""
# Create visualizer with a linear model to force a binary decision
visualizer = ROCAUC(LinearSVC(random_state=42), macro=True)
visualizer.fit(self.binary.X.train, self.binary.y.train)
# Ensure score raises error (macro curves aren't defined for binary decisions)
with pytest.raises(ModelError):
visualizer.score(self.binary.X.test, self.binary.y.test)
def test_classes_greater_than_indices(self):
"""
Assert error when y and y_pred contain zero values for
one of the specified classess
"""
X, y = load_occupancy(return_dataset=True).to_numpy()
classes = ["unoccupied", "occupied", "partytime"]
model = LinearSVC(random_state=42)
model.fit(X, y)
with pytest.raises(ModelError):
visualizer = ClassPredictionError(model, classes=classes)
visualizer.score(X, y)
def test_binary_micro_error(self):
"""
Test ROCAUC to see if _binary_decision with micro = True raises an error
"""
# Create visualizer with a linear model to force a binary decision
visualizer = ROCAUC(LinearSVC(random_state=42), micro=True)
visualizer.fit(self.binary.X.train, self.binary.y.train)
# Ensure score raises error (micro curves aren't defined for binary decisions)
with pytest.raises(ModelError):
visualizer.score(self.binary.X.test, self.binary.y.test)
def test_extra_classes(self):
"""
Assert that any extra classes raise an exception
"""
model = LogisticRegression(random_state=93)
cm = ConfusionMatrix(model, classes=[0, 1, 2, 11])
with pytest.raises(ModelError, match="could not decode"):
cm.fit(self.digits.X.train, self.digits.y.train)
def test_locate_elbow(self):
"""
Test the addition of locate_elbow to an image
"""
X, y = make_blobs(
n_samples=1000, n_features=5, centers=3, shuffle=True, random_state=42
)
visualizer = KElbowVisualizer(
KMeans(random_state=0),
k=6,
metric="calinski_harabasz",
timings=False,
locate_elbow=True,
)
visualizer.fit(X)
assert len(visualizer.k_scores_) == 5
assert visualizer.elbow_value_ == 3
expected = np.array(
[
4286.4798481306625,
12463.383743070379,
8763.75791732466,
6942.167328461612,
5859.608884917707,
def test_no_knee(self):
"""
Assert that a warning is issued if there is no knee detected
"""
X, y = make_blobs(n_samples=1000, centers=3, n_features=12, random_state=12)
message = (
"No 'knee' or 'elbow point' detected "
"This could be due to bad clustering, no "
"actual clusters being formed etc."
)
with pytest.warns(YellowbrickWarning, match=message):
visualizer = KElbowVisualizer(
KMeans(random_state=12), k=(4, 12), locate_elbow=True
)
visualizer.fit(X)