How to use the yellowbrick.cluster.elbow.KElbowVisualizer function in yellowbrick

To help you get started, we’ve selected a few yellowbrick examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github DistrictDataLabs / yellowbrick / tests / test_cluster / test_elbow.py View on Github external
def test_locate_elbow(self):
        """
        Test the addition of locate_elbow to an image
        """
        X, y = make_blobs(
            n_samples=1000, n_features=5, centers=3, shuffle=True, random_state=42
        )

        visualizer = KElbowVisualizer(
            KMeans(random_state=0),
            k=6,
            metric="calinski_harabasz",
            timings=False,
            locate_elbow=True,
        )
        visualizer.fit(X)
        assert len(visualizer.k_scores_) == 5
        assert visualizer.elbow_value_ == 3
        expected = np.array(
            [
                4286.4798481306625,
                12463.383743070379,
                8763.75791732466,
                6942.167328461612,
                5859.608884917707,
github DistrictDataLabs / yellowbrick / tests / test_cluster / test_elbow.py View on Github external
def test_no_knee(self):
        """
        Assert that a warning is issued if there is no knee detected
        """
        X, y = make_blobs(n_samples=1000, centers=3, n_features=12, random_state=12)
        message = (
            "No 'knee' or 'elbow point' detected "
            "This could be due to bad clustering, no "
            "actual clusters being formed etc."
        )
        with pytest.warns(YellowbrickWarning, match=message):
            visualizer = KElbowVisualizer(
                KMeans(random_state=12), k=(4, 12), locate_elbow=True
            )
            visualizer.fit(X)
github DistrictDataLabs / yellowbrick / tests / test_cluster / test_elbow.py View on Github external
def test_integrated_mini_batch_kmeans_elbow(self):
        """
        Test no exceptions for mini-batch kmeans k-elbow visualizer
        """
        # NOTE #182: cannot use occupancy dataset because of memory usage

        # Generate a blobs data set
        X, y = make_blobs(
            n_samples=1000, n_features=12, centers=6, shuffle=True, random_state=42
        )

        try:
            _, ax = plt.subplots()

            visualizer = KElbowVisualizer(MiniBatchKMeans(random_state=42), k=4, ax=ax)
            visualizer.fit(X)
            visualizer.finalize()

            self.assert_images_similar(visualizer)
        except Exception as e:
            pytest.fail("error during k-elbow: {}".format(e))
github DistrictDataLabs / yellowbrick / tests / test_cluster / test_elbow.py View on Github external
def test_timings(self):
        """
        Test the twinx double axes with k-elbow timings
        """
        visualizer = KElbowVisualizer(
            KMeans(random_state=0), k=5, timings=True, locate_elbow=False
        )
        visualizer.fit(self.clusters.X)

        # Check that we kept track of time
        assert len(visualizer.k_timers_) == 4
        assert all([t > 0 for t in visualizer.k_timers_])

        # Check that we plotted time on a twinx
        assert hasattr(visualizer, "axes")
        assert len(visualizer.axes) == 2

        # delete the timings axes and
        # overwrite k_timers_, k_values_ for image similarity Tests
        visualizer.axes[1].remove()
        visualizer.k_timers_ = [
github DistrictDataLabs / yellowbrick / tests / test_cluster / test_elbow.py View on Github external
def test_silhouette_metric(self):
        """
        Test the silhouette metric of the k-elbow visualizer
        """
        visualizer = KElbowVisualizer(
            KMeans(random_state=0),
            k=5,
            metric="silhouette",
            timings=False,
            locate_elbow=False,
        )
        visualizer.fit(self.clusters.X)

        expected = np.array(
            [
                0.6916363804000003,
                0.456645663683503,
                0.26918583373704463,
                0.25523298106687914,
            ]
        )
github DistrictDataLabs / yellowbrick / tests / test_cluster / test_elbow.py View on Github external
def test_distortion_metric(self):
        """
        Test the distortion metric of the k-elbow visualizer
        """
        visualizer = KElbowVisualizer(
            KMeans(random_state=0),
            k=5,
            metric="distortion",
            timings=False,
            locate_elbow=False,
        )
        visualizer.fit(self.clusters.X)

        expected = np.array(
            [
                69.10006514142941,
                54.081571290449936,
                44.491830981793605,
                33.99887993254433,
            ]
        )
github DistrictDataLabs / yellowbrick / tests / test_cluster / test_elbow.py View on Github external
def test_bad_metric(self):
        """
        Assert KElbow raises an exception when a bad metric is supplied
        """
        with pytest.raises(YellowbrickValueError):
            KElbowVisualizer(KMeans(), k=5, metric="foo")
github DistrictDataLabs / yellowbrick / tests / test_cluster / test_elbow.py View on Github external
def test_invalid_k(self):
        """
        Assert that invalid values of K raise exceptions
        """

        with pytest.raises(YellowbrickValueError):
            KElbowVisualizer(KMeans(), k=(1, 2, 3, "foo", 5))

        with pytest.raises(YellowbrickValueError):
            KElbowVisualizer(KMeans(), k="foo")
github DistrictDataLabs / yellowbrick / tests / test_cluster / test_elbow.py View on Github external
Assert that valid values of K generate correct k_values_
        """
        # if k is an int, k_values_ = range(2, k+1)
        # if k is a tuple of 2 ints, k_values = range(k[0], k[1])
        # if k is an iterable, k_values_ = list(k)

        visualizer = KElbowVisualizer(KMeans(), k=8)
        assert visualizer.k_values_ == list(np.arange(2, 8 + 1))

        visualizer = KElbowVisualizer(KMeans(), k=(4, 12))
        assert visualizer.k_values_ == list(np.arange(4, 12))

        visualizer = KElbowVisualizer(KMeans(), k=np.arange(10, 100, 10))
        assert visualizer.k_values_ == list(np.arange(10, 100, 10))

        visualizer = KElbowVisualizer(KMeans(), k=[10, 20, 30, 40, 50, 60, 70, 80, 90])
        assert visualizer.k_values_ == list(np.arange(10, 100, 10))
github DistrictDataLabs / yellowbrick / yellowbrick / cluster / elbow.py View on Github external
Automatically find the "elbow" or "knee" which likely corresponds to the optimal
        value of k using the "knee point detection algorithm". The knee point detection
        algorithm finds the point of maximum curvature, which in a well-behaved
        clustering problem also represents the pivot of the elbow curve. The point is
        labeled with a dashed line and annotated with the score and k values.

    kwargs : dict
        Keyword arguments that are passed to the base class and may influence
        the visualization as defined in other Visualizers.

    Returns
    -------
    viz : KElbowVisualizer
        The kelbow visualizer, fitted and finalized.
    """
    oz = KElbow(
        model,
        ax=ax,
        k=k,
        metric=metric,
        timings=timings,
        locate_elbow=locate_elbow,
        **kwargs
    )

    oz.fit(X, y)
    oz.finalize()
    return oz