How to use the awswrangler.data_types.athena2pandas function in awswrangler

To help you get started, we’ve selected a few awswrangler examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github awslabs / aws-data-wrangler / awswrangler / pandas.py View on Github external
def _cast_pandas(dataframe: pd.DataFrame, cast_columns: Dict[str, str]) -> pd.DataFrame:
        for col, athena_type in cast_columns.items():
            pandas_type: str = data_types.athena2pandas(dtype=athena_type)
            if pandas_type == "datetime64":
                dataframe[col] = pd.to_datetime(dataframe[col])
            elif pandas_type == "date":
                dataframe[col] = pd.to_datetime(dataframe[col]).dt.date.replace(to_replace={pd.NaT: None})
            else:
                dataframe[col] = dataframe[col].astype(pandas_type, skipna=True)
        return dataframe
github awslabs / aws-data-wrangler / awswrangler / pandas.py View on Github external
def _get_query_dtype(self, query_execution_id: str) -> Tuple[Dict[str, str], List[str], List[str], Dict[str, Any]]:
        cols_metadata: Dict[str, str] = self._session.athena.get_query_columns_metadata(
            query_execution_id=query_execution_id)
        logger.debug(f"cols_metadata: {cols_metadata}")
        dtype: Dict[str, str] = {}
        parse_timestamps: List[str] = []
        parse_dates: List[str] = []
        converters: Dict[str, Any] = {}
        col_name: str
        col_type: str
        for col_name, col_type in cols_metadata.items():
            pandas_type: str = data_types.athena2pandas(dtype=col_type)
            if pandas_type in ["datetime64", "date"]:
                parse_timestamps.append(col_name)
                if pandas_type == "date":
                    parse_dates.append(col_name)
            elif pandas_type == "list":
                converters[col_name] = Pandas._list_parser
            elif pandas_type == "bool":
                logger.debug(f"Ignoring bool column: {col_name}")
            elif pandas_type == "decimal":
                converters[col_name] = lambda x: Decimal(str(x)) if str(x) != "" else None
            else:
                dtype[col_name] = pandas_type
        logger.debug(f"dtype: {dtype}")
        logger.debug(f"parse_timestamps: {parse_timestamps}")
        logger.debug(f"parse_dates: {parse_dates}")
        logger.debug(f"converters: {converters}")