How to use the modin.error_message.ErrorMessage function in modin

To help you get started, we’ve selected a few modin examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github modin-project / modin / modin / pandas / groupby.py View on Github external
def _index_grouped(self):
        if self._index_grouped_cache is None:
            if self._is_multi_by:
                # Because we are doing a collect (to_pandas) here and then groupby, we
                # end up using pandas implementation. Add the warning so the user is
                # aware.
                ErrorMessage.catch_bugs_and_request_email(self._axis == 1)
                ErrorMessage.default_to_pandas("Groupby with multiple columns")
                self._index_grouped_cache = {
                    k: v.index
                    for k, v in self._df._query_compiler.getitem_column_array(self._by)
                    .to_pandas()
                    .groupby(by=self._by)
                }
            else:
                if isinstance(self._by, type(self._query_compiler)):
                    by = self._by.to_pandas().squeeze()
                else:
                    by = self._by
                if self._axis == 0:
                    self._index_grouped_cache = self._index.groupby(by)
                else:
                    self._index_grouped_cache = self._columns.groupby(by)
        return self._index_grouped_cache
github modin-project / modin / modin / pandas / general.py View on Github external
left,
    right,
    on=None,
    left_on=None,
    right_on=None,
    left_by=None,
    right_by=None,
    fill_method=None,
    suffixes=("_x", "_y"),
    how="outer",
):
    if not isinstance(left, DataFrame):
        raise ValueError(
            "can not merge DataFrame with instance of type {}".format(type(right))
        )
    ErrorMessage.default_to_pandas("`merge_ordered`")
    if isinstance(right, DataFrame):
        right = to_pandas(right)
    return DataFrame(
        pandas.merge_ordered(
            to_pandas(left),
            right,
            on=on,
            left_on=left_on,
            right_on=right_on,
            left_by=left_by,
            right_by=right_by,
            fill_method=fill_method,
            suffixes=suffixes,
            how=how,
        )
github modin-project / modin / modin / pandas / base.py View on Github external
def _default_to_pandas(self, op, *args, **kwargs):
        """Helper method to use default pandas function"""
        empty_self_str = "" if not self.empty else " for empty DataFrame"
        ErrorMessage.default_to_pandas(
            "`{}.{}`{}".format(
                self.__name__,
                op if isinstance(op, str) else op.__name__,
                empty_self_str,
            )
        )
        args = (a._to_pandas() if hasattr(a, "_to_pandas") else a for a in args)
        kwargs = {
            k: v._to_pandas() if hasattr(v, "_to_pandas") else v
            for k, v in kwargs.items()
        }
        if callable(op):
            result = op(self._to_pandas(), *args, **kwargs)
        elif isinstance(op, str):
            # The inner `getattr` is ensuring that we are treating this object (whether
            # it is a DataFrame, Series, etc.) as a pandas object. The outer `getattr`
github modin-project / modin / modin / pandas / reshape.py View on Github external
columns: Which columns to encode.
        sparse (bool): Not Implemented: If True, returns SparseDataFrame.
        drop_first (bool): Whether to remove the first level of encoded data.
        dtype: The dtype for the get_dummies call.

    Returns:
        DataFrame or one-hot encoded data.
    """
    if sparse:
        raise NotImplementedError(
            "SparseDataFrame is not implemented. "
            "To contribute to Modin, please visit "
            "github.com/modin-project/modin."
        )
    if not isinstance(data, DataFrame):
        ErrorMessage.default_to_pandas("`get_dummies` on non-DataFrame")
        return DataFrame(
            pandas.get_dummies(
                data,
                prefix=prefix,
                prefix_sep=prefix_sep,
                dummy_na=dummy_na,
                columns=columns,
                sparse=sparse,
                drop_first=drop_first,
                dtype=dtype,
            )
        )
    else:
        new_manager = data._query_compiler.get_dummies(
            columns,
            prefix=prefix,
github modin-project / modin / modin / pandas / base.py View on Github external
**kwds
    ):
        """Apply a function along input axis of DataFrame.

        Args:
            func: The function to apply
            axis: The axis over which to apply the func.
            broadcast: Whether or not to broadcast.
            raw: Whether or not to convert to a Series.
            reduce: Whether or not to try to apply reduction procedures.

        Returns:
            Series or DataFrame, depending on func.
        """
        axis = self._get_axis_number(axis)
        ErrorMessage.non_verified_udf()
        if isinstance(func, str):
            if axis == 1:
                kwds["axis"] = axis
            result = self._string_function(func, *args, **kwds)
            # Sometimes we can return a scalar here
            if isinstance(result, BasePandasDataset):
                return result._query_compiler
            return result
        elif isinstance(func, dict):
            if axis == 1:
                raise TypeError(
                    "(\"'dict' object is not callable\", "
                    "'occurred at index {0}'".format(self.index[0])
                )
            if len(self.columns) != len(set(self.columns)):
                warnings.warn(
github modin-project / modin / modin / engines / base / io / io.py View on Github external
def read_parquet(cls, path, engine, columns, **kwargs):
        """Load a parquet object from the file path, returning a DataFrame.
           Ray DataFrame only supports pyarrow engine for now.

        Args:
            path: The filepath of the parquet file.
                  We only support local files for now.
            engine: Ray only support pyarrow reader.
                    This argument doesn't do anything for now.
            kwargs: Pass into parquet's read_pandas function.

        Notes:
            ParquetFile API is used. Please refer to the documentation here
            https://arrow.apache.org/docs/python/parquet.html
        """
        ErrorMessage.default_to_pandas("`read_parquet`")
        return cls.from_pandas(pandas.read_parquet(path, engine, columns, **kwargs))
github modin-project / modin / modin / pandas / reshape.py View on Github external
def lreshape(data, groups, dropna=True, label=None):
    if not isinstance(data, DataFrame):
        raise ValueError("can not lreshape with instance of type {}".format(type(data)))
    ErrorMessage.default_to_pandas("`lreshape`")
    return DataFrame(
        pandas.lreshape(to_pandas(data), groups, dropna=dropna, label=label)
    )