How to use the pyarrow.bool_ function in pyarrow

To help you get started, we’ve selected a few pyarrow examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github omnisci / pymapd / tests / test_integration.py View on Github external
float_ FLOAT,
            double_ DOUBLE,
            varchar_ VARCHAR(40),
            text_ TEXT,
            time_ TIME,
            timestamp_ TIMESTAMP,
            date_ DATE
        );''')
        # skipping decimal for now
        c.execute(create)

        names = ['boolean_', 'smallint_', 'int_', 'bigint_',
                 'float_', 'double_', 'varchar_', 'text_',
                 'time_', 'timestamp_', 'date_']

        columns = [pa.array([True, False, None], type=pa.bool_()),
                   pa.array([1, 0, None]).cast(pa.int16()),
                   pa.array([1, 0, None]).cast(pa.int32()),
                   pa.array([1, 0, None]),
                   pa.array([1.0, 1.1, None]).cast(pa.float32()),
                   pa.array([1.0, 1.1, None]),
                   # no fixed-width string
                   pa.array(['a', 'b', None]),
                   pa.array(['a', 'b', None]),
                   (pa.array([1, 2, None]).cast(pa.int32())
                    .cast(pa.time32('s'))),
                   pa.array([datetime.datetime(2016, 1, 1, 12, 12, 12),
                             datetime.datetime(2017, 1, 1), None]),
                   pa.array([datetime.date(2016, 1, 1),
                             datetime.date(2017, 1, 1), None])]
        table = pa.Table.from_arrays(columns, names=names)
        con.load_table_arrow("all_types", table)
github tensorflow / io / tests / test_arrow_eager.py View on Github external
def get_arrow_type(self, dt, is_list):
    """get_arrow_type"""
    if dt == dtypes.bool:
      arrow_type = pa.bool_()
    elif dt == dtypes.int8:
      arrow_type = pa.int8()
    elif dt == dtypes.int16:
      arrow_type = pa.int16()
    elif dt == dtypes.int32:
      arrow_type = pa.int32()
    elif dt == dtypes.int64:
      arrow_type = pa.int64()
    elif dt == dtypes.uint8:
      arrow_type = pa.uint8()
    elif dt == dtypes.uint16:
      arrow_type = pa.uint16()
    elif dt == dtypes.uint32:
      arrow_type = pa.uint32()
    elif dt == dtypes.uint64:
      arrow_type = pa.uint64()
github tensorflow / io / tests / test_arrow_eager.py View on Github external
def get_arrow_type(self, dt, is_list):
    """get_arrow_type"""
    if dt == dtypes.bool:
      arrow_type = pa.bool_()
    elif dt == dtypes.int8:
      arrow_type = pa.int8()
    elif dt == dtypes.int16:
      arrow_type = pa.int16()
    elif dt == dtypes.int32:
      arrow_type = pa.int32()
    elif dt == dtypes.int64:
      arrow_type = pa.int64()
    elif dt == dtypes.uint8:
      arrow_type = pa.uint8()
    elif dt == dtypes.uint16:
      arrow_type = pa.uint16()
    elif dt == dtypes.uint32:
      arrow_type = pa.uint32()
    elif dt == dtypes.uint64:
      arrow_type = pa.uint64()
github IntelPython / sdc / sdc / io / parquet_pio.py View on Github external
def _get_numba_typ_from_pa_typ(pa_typ):
    import pyarrow as pa
    _typ_map = {
        # boolean
        pa.bool_(): types.bool_,
        # signed int types
        pa.int8(): types.int8,
        pa.int16(): types.int16,
        pa.int32(): types.int32,
        pa.int64(): types.int64,
        # unsigned int types
        pa.uint8(): types.uint8,
        pa.uint16(): types.uint16,
        pa.uint32(): types.uint32,
        pa.uint64(): types.uint64,
        # float types (TODO: float16?)
        pa.float32(): types.float32,
        pa.float64(): types.float64,
        # String
        pa.string(): string_type,
        # date
github andrewgross / json2parquet / json2parquet / client.py View on Github external
elif column.type.id == pa.date32().id:
            _converted_col = map(_date_converter, _col)
            array_data.append(pa.array(_converted_col, type=pa.date32()))
        # Float types are ambiguous for conversions, need to specify the exact type
        elif column.type.id == pa.float64().id:
            array_data.append(pa.array(_col, type=pa.float64()))
        elif column.type.id == pa.float32().id:
            # Python doesn't have a native float32 type
            # and PyArrow cannot cast float64 -> float32
            _col = pd.to_numeric(_col, downcast='float')
            array_data.append(pa.Array.from_pandas(_col, type=pa.float32()))
        elif column.type.id == pa.int32().id:
            # PyArrow 0.8.0 can cast int64 -> int32
            _col64 = pa.array(_col, type=pa.int64())
            array_data.append(_col64.cast(pa.int32()))
        elif column.type.id == pa.bool_().id:
            _col = map(_boolean_converter, _col)
            array_data.append(pa.array(_col, type=column.type))
        else:
            array_data.append(pa.array(_col, type=column.type))
        if isinstance(field_aliases, dict):
            schema_names.append(field_aliases.get(column.name, column.name))
        else:
            schema_names.append(column.name)
    return pa.RecordBatch.from_arrays(array_data, schema_names)
github kylebarron / medicare_utils / medicare_utils / parquet.py View on Github external
elif vartype == np.int16:
            fields.append(pa.field(varname, pa.int16()))
        elif vartype == np.int32:
            fields.append(pa.field(varname, pa.int32()))
        elif vartype == np.int64:
            fields.append(pa.field(varname, pa.int64()))
        elif vartype == np.uint8:
            fields.append(pa.field(varname, pa.uint8()))
        elif vartype == np.uint16:
            fields.append(pa.field(varname, pa.uint16()))
        elif vartype == np.uint32:
            fields.append(pa.field(varname, pa.uint32()))
        elif vartype == np.uint64:
            fields.append(pa.field(varname, pa.uint64()))
        elif vartype == np.bool_:
            fields.append(pa.field(varname, pa.bool_()))
        elif (vartype == object) | (vartype.name == 'category'):
            fields.append(pa.field(varname, pa.string()))
        elif np.issubdtype(vartype, np.datetime64):
            fields.append(pa.field(varname, pa.timestamp('ns')))

    assert len(dtypes) == len(fields)
    schema = pa.schema(fields)
    return schema
github JDASoftwareGroup / kartothek / kartothek / core / index.py View on Github external
else:
        probe = None
        has_probe = False

    # type inference
    if (dtype is None) and has_probe:
        if isinstance(probe, np.datetime64):
            dtype = pa.timestamp(
                "ns"
            )  # workaround pyarrow type inference bug (ARROW-2554)
        elif isinstance(probe, pd.Timestamp):
            dtype = pa.timestamp(
                "ns"
            )  # workaround pyarrow type inference bug (ARROW-2554)
        elif isinstance(probe, (np.bool_, bool)):
            dtype = pa.bool_()

    # fix pyarrow input
    if dtype is None:
        keys = np.asarray(list(keys_it))
    else:
        if pa.types.is_unsigned_integer(dtype):
            # numpy might create object ndarrays here, which pyarrow might (for some reason) convert fo floats
            keys = list(keys_it)
        elif (
            dtype == pa.timestamp("ns")
            and has_probe
            and isinstance(probe, pd.Timestamp)
        ):
            keys = np.asarray([d.to_datetime64() for d in keys_it])
        else:
            keys = np.asarray(list(keys_it))
github apache / arrow / python / pyarrow / jvm.py View on Github external
if not jvm_type.isComplex():
        type_str = jvm_type.getTypeID().toString()
        if type_str == 'Null':
            typ = pa.null()
        elif type_str == 'Int':
            typ = _from_jvm_int_type(jvm_type)
        elif type_str == 'FloatingPoint':
            typ = _from_jvm_float_type(jvm_type)
        elif type_str == 'Utf8':
            typ = pa.string()
        elif type_str == 'Binary':
            typ = pa.binary()
        elif type_str == 'FixedSizeBinary':
            typ = pa.binary(jvm_type.getByteWidth())
        elif type_str == 'Bool':
            typ = pa.bool_()
        elif type_str == 'Time':
            typ = _from_jvm_time_type(jvm_type)
        elif type_str == 'Timestamp':
            typ = _from_jvm_timestamp_type(jvm_type)
        elif type_str == 'Date':
            typ = _from_jvm_date_type(jvm_type)
        elif type_str == 'Decimal':
            typ = pa.decimal128(jvm_type.getPrecision(), jvm_type.getScale())
        else:
            raise NotImplementedError(
                "Unsupported JVM type: {}".format(type_str))
    else:
        # TODO: The following JVM types are not implemented:
        #       Struct, List, FixedSizeList, Union, Dictionary
        raise NotImplementedError(
            "JVM field conversion only implemented for primitive types.")