How to use the yellowbrick.utils.is_dataframe function in yellowbrick

To help you get started, we’ve selected a few yellowbrick examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github DistrictDataLabs / yellowbrick / yellowbrick / features / pcoords.py View on Github external
y : ndarray or Series of length n
            An array or series of target or class values

        kwargs : dict
            Pass generic arguments to the drawing method

        Returns
        -------
        self : instance
            Returns the instance of the transformer/visualizer
        """
        # Determine the features, classes, and colors
        super(ParallelCoordinates, self).fit(X, y)

        # Convert from pandas data types
        if is_dataframe(X):
            X = X.values
        if is_series(y):
            y = y.values

        # Ticks for each feature specified
        self._increments = np.arange(len(self.features_))

        # Subsample instances
        X, y = self._subsample(X, y)

        # Normalize instances
        if self.normalize is not None:
            X = self.NORMALIZERS[self.normalize].fit_transform(X)

        self.draw(X, y, **kwargs)
        return self
github DistrictDataLabs / yellowbrick / yellowbrick / features / base.py View on Github external
if self.features is not None:
            # Use the user-specified features with some checking
            # TODO: allow the user specified features to filter the dataset
            if len(self.features) != n_columns:
                raise YellowbrickValueError(
                    (
                        "number of supplied feature names does not match the number "
                        "of columns in the training data."
                    )
                )

            self.features_ = np.array(self.features)

        else:
            # Attempt to determine the feature names from the input data
            if is_dataframe(X):
                self.features_ = np.array(X.columns)

            # Otherwise create numeric labels for each column.
            else:
                self.features_ = np.arange(0, n_columns)

        # Ensure super is called and fit is returned
        super(MultiFeatureVisualizer, self).fit(X, y)
        return self
github DistrictDataLabs / yellowbrick / yellowbrick / features / rankd.py View on Github external
-------
        ranks : ndarray
            An n-dimensional, symmetric array of rank scores, where n is the
            number of features. E.g. for 1D ranking, it is (n,), for a
            2D ranking it is (n,n) and so forth.
        """
        algorithm = algorithm or self.ranking_
        algorithm = algorithm.lower()

        if algorithm not in self.ranking_methods:
            raise YellowbrickValueError(
                "'{}' is unrecognized ranking method".format(algorithm)
            )

        # Extract matrix from dataframe if necessary
        if is_dataframe(X):
            X = X.values

        return self.ranking_methods[algorithm](X)
github DistrictDataLabs / yellowbrick / yellowbrick / model_selection / importances.py View on Github external
# Apply absolute value filter before normalization
        if self.absolute:
            self.feature_importances_ = np.abs(self.feature_importances_)

        # Normalize features relative to the maximum
        if self.relative:
            maxv = np.abs(self.feature_importances_).max()
            self.feature_importances_ /= maxv
            self.feature_importances_ *= 100.0

        # Create labels for the feature importances
        # NOTE: this code is duplicated from MultiFeatureVisualizer
        if self.labels is None:
            # Use column names if a dataframe
            if is_dataframe(X):
                self.features_ = np.array(X.columns)

            # Otherwise use the column index as the labels
            else:
                _, ncols = X.shape
                self.features_ = np.arange(0, ncols)
        else:
            self.features_ = np.array(self.labels)

        # Sort the features and their importances
        if self.stack:
            sort_idx = np.argsort(np.mean(self.feature_importances_, 0))
            self.features_ = self.features_[sort_idx]
            self.feature_importances_ = self.feature_importances_[:, sort_idx]
        else:
            sort_idx = np.argsort(self.feature_importances_)
github DistrictDataLabs / yellowbrick / yellowbrick / target / feature_correlation.py View on Github external
def _create_labels_for_features(self, X):
        """
        Create labels for the features

        NOTE: this code is duplicated from MultiFeatureVisualizer
        """
        if self.labels is None:
            # Use column names if a dataframe
            if is_dataframe(X):
                self.features_ = np.array(X.columns)
            # Otherwise use the column index as the labels
            else:
                _, ncols = X.shape
                self.features_ = np.arange(0, ncols)
        else:
            self.features_ = np.array(self.labels)
github DistrictDataLabs / yellowbrick / yellowbrick / contrib / classifier / boundaries.py View on Github external
""" """

        if len(X.shape) == 1:
            X_flat = X.copy().view(np.float64).reshape(len(X), -1)
        else:
            X_flat = X

        _, ncols = X_flat.shape

        if ncols == 2:
            X_two_cols = X
            if self.features_ is None:
                self.features_ = ["Feature One", "Feature Two"]

        # Handle the feature names if they're None.
        elif self.features_ is not None and is_dataframe(X):
            X_two_cols = X[self.features_].as_matrix()

        # handle numpy named/ structured array
        elif self.features_ is not None and is_structured_array(X):
            X_selected = X[self.features_]
            X_two_cols = X_selected.copy().view(np.float64).reshape(len(X_selected), -1)

        # handle features that are numeric columns in ndarray matrix
        elif self.features_ is not None and has_ndarray_int_columns(self.features_, X):
            f_one, f_two = self.features_
            X_two_cols = X[:, [int(f_one), int(f_two)]]

        else:
            raise YellowbrickValueError("""
                ScatterVisualizer only accepts two features, please
                explicitly set these two features in the init kwargs or
github DistrictDataLabs / yellowbrick / yellowbrick / features / radviz.py View on Github external
def draw(self, X, y, **kwargs):
        """
        Called from the fit method, this method creates the radviz canvas and
        draws each instance as a class or target colored point, whose location
        is determined by the feature data set.
        """
        # Convert from dataframe
        if is_dataframe(X):
            X = X.values

        # Clean out nans and warn that the user they aren't plotted
        nan_warnings.warn_if_nans_exist(X)
        X, y = nan_warnings.filter_missing(X, y)

        # Get the shape of the data
        nrows, ncols = X.shape

        # Set the axes limits
        self.ax.set_xlim([-1, 1])
        self.ax.set_ylim([-1, 1])

        # Create a data structure to hold scatter plot representations
        to_plot = {label: [[], []] for label in self.classes_}
github DistrictDataLabs / yellowbrick / yellowbrick / contrib / missing / base.py View on Github external
kwargs : dict
            Pass generic arguments to the drawing method

        Returns
        -------
        self : instance
            Returns the instance of the transformer/visualizer
        """
        # Do not call super here - the data visualizer has been refactored
        # to provide increased functionality that is not yet compatible with
        # the current implementation. This mimicks the previous functionality.
        # TODO: Refactor MissingDataVisualizer to make use of new features.
        self.features_ = self.features

        if is_dataframe(X):
            self.X = X.values
            if self.features_ is None:
                self.features_ = X.columns
        else:
            self.X = X

        self.y = y

        self.draw(X, y, **kwargs)
        return self