How to use yellowbrick - 10 common examples

To help you get started, we’ve selected a few yellowbrick examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github DistrictDataLabs / yellowbrick / tests / test_classifier / test_rocauc.py View on Github external
def test_pandas_integration(self):
        """
        Test the ROCAUC with Pandas dataframe
        """
        X, y = load_occupancy(return_dataset=True).to_pandas()

        # Create train/test splits
        splits = tts(X, y, test_size=0.2, random_state=4512)
        X_train, X_test, y_train, y_test = splits

        visualizer = ROCAUC(GaussianNB())
        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)

        # Compare the images
        visualizer.finalize()
        self.assert_images_similar(visualizer)
github DistrictDataLabs / yellowbrick / tests / test_classifier / test_confusion_matrix.py View on Github external
def test_pandas_integration(self):
        """
        Test with Pandas DataFrame and Series input
        """
        _, ax = plt.subplots()

        # Load the occupancy dataset from fixtures
        X, y = load_occupancy(return_dataset=True).to_pandas()

        # Create train/test splits
        splits = tts(X, y, test_size=0.2, random_state=8873)
        X_train, X_test, y_train, y_test = splits

        # Create confusion matrix
        model = GaussianNB()
        cm = ConfusionMatrix(model, ax=ax, classes=None)
        cm.fit(X_train, y_train)
        cm.score(X_test, y_test)

        self.assert_images_similar(cm, tol=0.1)

        # Ensure correct confusion matrix under the hood
        npt.assert_array_equal(cm.confusion_matrix_, np.array([[3012, 114], [1, 985]]))
github DistrictDataLabs / yellowbrick / tests / test_model_selection / test_rfecv.py View on Github external
def test_pandas_integration(self):
        """
        Test on a real dataset with pandas DataFrame and Series
        """
        data = load_occupancy(return_dataset=True)
        X, y = data.to_pandas()

        # Use only the first 100 samples so the test will run faster
        X_t = X[:100]
        y_t = y[:100]

        assert isinstance(X_t, pd.DataFrame)
        assert isinstance(y_t, pd.Series)

        cv = StratifiedKFold(n_splits=4, random_state=32)
        oz = RFECV(RandomForestClassifier(random_state=83), cv=cv)
        oz.fit(X_t, y_t)
        oz.finalize()

        self.assert_images_similar(oz, remove_legend=True)
github DistrictDataLabs / yellowbrick / tests / test_contrib / test_classifier / test_boundaries.py View on Github external
def test_fit_class_labels_class_names_edge_case(self):
        """
        Edge case that more class labels are defined than in datatset
        """
        model = neighbors.KNeighborsClassifier(3)
        viz = DecisionBoundariesVisualizer(
            model, classes=['one', 'two', 'three', 'four', 'five'])

        with pytest.raises(YellowbrickTypeError):
            viz.fit(X_two_cols, y=y)
github DistrictDataLabs / yellowbrick / tests / test_classifier / test_rocauc.py View on Github external
def test_binary_macro_error(self):
        """
        Test ROCAUC to see if _binary_decision with macro = True raises an error
        """
        # Create visualizer with a linear model to force a binary decision
        visualizer = ROCAUC(LinearSVC(random_state=42), macro=True)
        visualizer.fit(self.binary.X.train, self.binary.y.train)

        # Ensure score raises error (macro curves aren't defined for binary decisions)
        with pytest.raises(ModelError):
            visualizer.score(self.binary.X.test, self.binary.y.test)
github DistrictDataLabs / yellowbrick / tests / test_classifier / test_class_prediction_error.py View on Github external
def test_classes_greater_than_indices(self):
        """
        Assert error when y and y_pred contain zero values for
        one of the specified classess
        """
        X, y = load_occupancy(return_dataset=True).to_numpy()
        classes = ["unoccupied", "occupied", "partytime"]

        model = LinearSVC(random_state=42)
        model.fit(X, y)
        with pytest.raises(ModelError):
            visualizer = ClassPredictionError(model, classes=classes)
            visualizer.score(X, y)
github DistrictDataLabs / yellowbrick / tests / test_classifier / test_rocauc.py View on Github external
def test_binary_micro_error(self):
        """
        Test ROCAUC to see if _binary_decision with micro = True raises an error
        """
        # Create visualizer with a linear model to force a binary decision
        visualizer = ROCAUC(LinearSVC(random_state=42), micro=True)
        visualizer.fit(self.binary.X.train, self.binary.y.train)

        # Ensure score raises error (micro curves aren't defined for binary decisions)
        with pytest.raises(ModelError):
            visualizer.score(self.binary.X.test, self.binary.y.test)
github DistrictDataLabs / yellowbrick / tests / test_classifier / test_confusion_matrix.py View on Github external
def test_extra_classes(self):
        """
        Assert that any extra classes raise an exception
        """
        model = LogisticRegression(random_state=93)
        cm = ConfusionMatrix(model, classes=[0, 1, 2, 11])

        with pytest.raises(ModelError, match="could not decode"):
            cm.fit(self.digits.X.train, self.digits.y.train)
github DistrictDataLabs / yellowbrick / tests / test_cluster / test_elbow.py View on Github external
def test_locate_elbow(self):
        """
        Test the addition of locate_elbow to an image
        """
        X, y = make_blobs(
            n_samples=1000, n_features=5, centers=3, shuffle=True, random_state=42
        )

        visualizer = KElbowVisualizer(
            KMeans(random_state=0),
            k=6,
            metric="calinski_harabasz",
            timings=False,
            locate_elbow=True,
        )
        visualizer.fit(X)
        assert len(visualizer.k_scores_) == 5
        assert visualizer.elbow_value_ == 3
        expected = np.array(
            [
                4286.4798481306625,
                12463.383743070379,
                8763.75791732466,
                6942.167328461612,
                5859.608884917707,
github DistrictDataLabs / yellowbrick / tests / test_cluster / test_elbow.py View on Github external
def test_no_knee(self):
        """
        Assert that a warning is issued if there is no knee detected
        """
        X, y = make_blobs(n_samples=1000, centers=3, n_features=12, random_state=12)
        message = (
            "No 'knee' or 'elbow point' detected "
            "This could be due to bad clustering, no "
            "actual clusters being formed etc."
        )
        with pytest.warns(YellowbrickWarning, match=message):
            visualizer = KElbowVisualizer(
                KMeans(random_state=12), k=(4, 12), locate_elbow=True
            )
            visualizer.fit(X)