How to use the yellowbrick.exceptions.DatasetsError function in yellowbrick

To help you get started, we’ve selected a few yellowbrick examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github DistrictDataLabs / yellowbrick / tests / test_datasets / test_loaders.py View on Github external
def assert_valid_numpy(data):
    __tracebackhide__ = True
    X, y = data.to_numpy()
    assert isinstance(X, np.ndarray), "X is not a numpy array"
    assert isinstance(y, np.ndarray), "y is not a numpy array"
    assert X.ndim == 2 and y.ndim == 1, "X and y dimensions are incorrect"

    # Patch pandas and make defaults assertions
    X, y = data.to_data()
    assert isinstance(X, np.ndarray), "to_data does not return numpy"
    assert isinstance(y, np.ndarray), "to_data does not return numpy"

    with pytest.raises(DatasetsError):
        data.to_pandas(), "exception not raised when pandas unavailable"
github DistrictDataLabs / yellowbrick / tests / test_datasets / test_path.py View on Github external
def test_missing_find_dataset_path(tmpdir):
    """
    Test find_dataset_path when the dataset does not exist
    """
    data_home = tmpdir.mkdir("fixtures")

    # When the data directory doesn't exist
    with pytest.raises(DatasetsError):
        find_dataset_path("foo", data_home=str(data_home))

    # When the data directory exists but no file is in the directory
    foo = data_home.mkdir("foo")
    with pytest.raises(DatasetsError):
        find_dataset_path("foo", data_home=str(data_home))

    # When the specified file doesn't exist
    fpath = foo.join("foo.csv")
    fpath.write("1,2,3")
    with pytest.raises(DatasetsError):
        find_dataset_path("foo", data_home=str(data_home), ext=".npz")
github DistrictDataLabs / yellowbrick / yellowbrick / datasets / base.py View on Github external
def to_pandas(self):
        """
        Returns the dataset as two pandas objects: X and y.

        Returns
        -------
        X : DataFrame with shape (n_instances, n_features)
            A pandas DataFrame containing feature data and named columns.

        y : Series with shape (n_instances,)
            A pandas Series containing target data and an index that matches
            the feature DataFrame index.
        """
        # Ensure the metadata is valid before continuing
        if self.meta is None:
            raise DatasetsError(
                (
                    "the downloaded dataset was improperly packaged without meta.json "
                    "- please report this bug to the Yellowbrick maintainers!"
                )
            )

        if "features" not in self.meta or "target" not in self.meta:
            raise DatasetsError(
                (
                    "the downloaded dataset was improperly packaged without features "
                    "or target - please report this bug to the Yellowbrick maintainers!"
                )
            )

        # Load data frame and return features and target
        # TODO: Return y as None if there is no self.meta["target"]
github DistrictDataLabs / yellowbrick / yellowbrick / datasets / base.py View on Github external
def to_dataframe(self):
        """
        Returns the entire dataset as a single pandas DataFrame.

        Returns
        -------
        df : DataFrame with shape (n_instances, n_columns)
            A pandas DataFrame containing the complete original data table
            including all targets (specified by the meta data) and all
            features (including those that might have been filtered out).
        """
        if pd is None:
            raise DatasetsError(
                "pandas is required to load DataFrame, it can be installed with pip"
            )

        path = find_dataset_path(self.name, ext=".csv.gz", data_home=self.data_home)
        return pd.read_csv(path, compression="gzip")
github DistrictDataLabs / yellowbrick / yellowbrick / datasets / path.py View on Github external
if fname is None:
        if ext is None:
            path = os.path.join(data_home, dataset)
        else:
            path = os.path.join(data_home, dataset, "{}{}".format(dataset, ext))
    else:
        path = os.path.join(data_home, dataset, fname)

    # Determine if the path exists
    if not os.path.exists(path):

        # Suppress exceptions if required
        if not raises:
            return None

        raise DatasetsError(
            ("could not find dataset at {} - does it need to be downloaded?").format(
                path
            )
        )

    return path
github DistrictDataLabs / yellowbrick / yellowbrick / datasets / base.py View on Github external
y : Series with shape (n_instances,)
            A pandas Series containing target data and an index that matches
            the feature DataFrame index.
        """
        # Ensure the metadata is valid before continuing
        if self.meta is None:
            raise DatasetsError(
                (
                    "the downloaded dataset was improperly packaged without meta.json "
                    "- please report this bug to the Yellowbrick maintainers!"
                )
            )

        if "features" not in self.meta or "target" not in self.meta:
            raise DatasetsError(
                (
                    "the downloaded dataset was improperly packaged without features "
                    "or target - please report this bug to the Yellowbrick maintainers!"
                )
            )

        # Load data frame and return features and target
        # TODO: Return y as None if there is no self.meta["target"]
        df = self.to_dataframe()
        return df[self.meta["features"]], df[self.meta["target"]]
github DistrictDataLabs / yellowbrick / yellowbrick / datasets / base.py View on Github external
def to_numpy(self):
        """
        Returns the dataset as two numpy arrays: X and y.

        Returns
        -------
        X : array-like with shape (n_instances, n_features)
            A numpy array describing the instance features.

        y : array-like with shape (n_instances,)
            A numpy array describing the target vector.
        """
        path = find_dataset_path(self.name, ext=".npz", data_home=self.data_home)
        with np.load(path, allow_pickle=False) as npf:
            if "X" not in npf or "y" not in npf:
                raise DatasetsError(
                    (
                        "the downloaded dataset was improperly packaged without numpy "
                        "arrays - please report this bug to the Yellowbrick maintainers!"
                    )
                )

            # TODO: How to handle the case where y is None?
            return npf["X"], npf["y"]
github DistrictDataLabs / yellowbrick / yellowbrick / datasets / download.py View on Github external
Extract the archive file after downloading it
    """
    data_home = get_data_home(data_home)

    # Get the name of the file from the URL
    basename = os.path.basename(url)
    name, _ = os.path.splitext(basename)

    # Get the archive and data directory paths
    archive = os.path.join(data_home, basename)
    datadir = os.path.join(data_home, name)

    # If the archive exists cleanup or raise override exception
    if os.path.exists(archive):
        if not replace:
            raise DatasetsError((
                "dataset already exists at {}, set replace=False to overwrite"
            ).format(archive))

        cleanup_dataset(name, data_home=data_home)

    # Create the output directory if it does not exist
    if not os.path.exists(datadir):
        os.mkdir(datadir)

    # Fetch the response in a streaming fashion and write it to disk.
    response = urlopen(url)

    with open(archive, 'wb') as f:
        while True:
            chunk = response.read(CHUNK)
            if not chunk: