How to use the yellowbrick.datasets.load_occupancy function in yellowbrick

To help you get started, we’ve selected a few yellowbrick examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github DistrictDataLabs / yellowbrick / tests / test_classifier / test_rocauc.py View on Github external
def test_pandas_integration(self):
        """
        Test the ROCAUC with Pandas dataframe
        """
        X, y = load_occupancy(return_dataset=True).to_pandas()

        # Create train/test splits
        splits = tts(X, y, test_size=0.2, random_state=4512)
        X_train, X_test, y_train, y_test = splits

        visualizer = ROCAUC(GaussianNB())
        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)

        # Compare the images
        visualizer.finalize()
        self.assert_images_similar(visualizer)
github DistrictDataLabs / yellowbrick / tests / test_classifier / test_confusion_matrix.py View on Github external
def test_pandas_integration(self):
        """
        Test with Pandas DataFrame and Series input
        """
        _, ax = plt.subplots()

        # Load the occupancy dataset from fixtures
        X, y = load_occupancy(return_dataset=True).to_pandas()

        # Create train/test splits
        splits = tts(X, y, test_size=0.2, random_state=8873)
        X_train, X_test, y_train, y_test = splits

        # Create confusion matrix
        model = GaussianNB()
        cm = ConfusionMatrix(model, ax=ax, classes=None)
        cm.fit(X_train, y_train)
        cm.score(X_test, y_test)

        self.assert_images_similar(cm, tol=0.1)

        # Ensure correct confusion matrix under the hood
        npt.assert_array_equal(cm.confusion_matrix_, np.array([[3012, 114], [1, 985]]))
github DistrictDataLabs / yellowbrick / tests / test_model_selection / test_rfecv.py View on Github external
def test_pandas_integration(self):
        """
        Test on a real dataset with pandas DataFrame and Series
        """
        data = load_occupancy(return_dataset=True)
        X, y = data.to_pandas()

        # Use only the first 100 samples so the test will run faster
        X_t = X[:100]
        y_t = y[:100]

        assert isinstance(X_t, pd.DataFrame)
        assert isinstance(y_t, pd.Series)

        cv = StratifiedKFold(n_splits=4, random_state=32)
        oz = RFECV(RandomForestClassifier(random_state=83), cv=cv)
        oz.fit(X_t, y_t)
        oz.finalize()

        self.assert_images_similar(oz, remove_legend=True)
github DistrictDataLabs / yellowbrick / tests / test_features / test_pcoords.py View on Github external
def test_parallel_coordinates_quickmethod(self):
        """
        Test the quick method producing a valid visualization
        """
        X, y = load_occupancy(return_dataset=True).to_numpy()

        # Compare the images
        # Use only the first 100 samples so the test will run faster
        visualizer = parallel_coordinates(X, y, sample=100, show=False)
        self.assert_images_similar(visualizer)
github DistrictDataLabs / yellowbrick / tests / test_features / test_radviz.py View on Github external
def test_integrated_radviz_numpy_classes_features(self):
        """
        Test RadViz with classes and features specified using numpy
        """
        # Load the data from the fixture
        data = load_occupancy(return_dataset=True)
        X, y = data.to_numpy()

        features = data.meta["features"][0:3]
        classes = [
            k for k, _ in sorted(data.meta["labels"].items(), key=lambda i: i[1])
        ]

        assert isinstance(X, np.ndarray)
        assert isinstance(y, np.ndarray)

        # Filter the dataset to make sure it's not just class names
        X = X[:, :3]
        y = y.astype(int)

        # Test the visualizer
        visualizer = RadViz(features=features, classes=classes)
github DistrictDataLabs / yellowbrick / tests / test_contrib / test_scatter.py View on Github external
def test_integrated_scatter_with_pandas(self):
        """
        Test scatterviz on the real, occupancy data set with pandas
        """
        # Load the data from the fixture
        # Load the data from the fixture
        X, y = load_occupancy(return_dataset=True).to_pandas()

        # Test the visualizer
        features = ["temperature", "relative humidity"]
        visualizer = ScatterViz(features=features)
        visualizer.fit_transform_poof(X, y)
github DistrictDataLabs / yellowbrick / docs / gallery.py View on Github external
def rocauc(dataset):
    if dataset == "binary":
        X, y = load_occupancy()
        model = GaussianNB()
    elif dataset == "multiclass":
        X, y = load_game()
        X = OrdinalEncoder().fit_transform(X)
        model = RidgeClassifier()
    else:
        raise ValueError("uknown dataset")

    X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2)
    oz = ROCAUC(model, ax=newfig())
    oz.fit(X_train, y_train)
    oz.score(X_test, y_test)
    savefig(oz, "rocauc_{}".format(dataset))
github DistrictDataLabs / yellowbrick / docs / gallery.py View on Github external
def radviz():
    X, y = load_occupancy()
    oz = RadViz(ax=newfig())
    oz.fit_transform(X, y)
    savefig(oz, "radviz")
github DistrictDataLabs / yellowbrick / docs / gallery.py View on Github external
def scatter():
    X, y = load_occupancy()
    oz = ScatterVisualizer(x="light", y="CO2", ax=newfig())
    oz.fit_transform(X, y)
    savefig(oz, "scatter")
github DistrictDataLabs / yellowbrick / docs / gallery.py View on Github external
def manifold(dataset, manifold):
    if dataset == "concrete":
        X, y = load_concrete()
    elif dataset == "occupancy":
        X, y = load_occupancy()
    else:
        raise ValueError("unknown dataset")

    oz = Manifold(manifold=manifold, ax=newfig())
    oz.fit_transform(X, y)
    savefig(oz, "{}_{}_manifold".format(dataset, manifold))