Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
float_ FLOAT,
double_ DOUBLE,
varchar_ VARCHAR(40),
text_ TEXT,
time_ TIME,
timestamp_ TIMESTAMP,
date_ DATE
);''')
# skipping decimal for now
c.execute(create)
names = ['boolean_', 'smallint_', 'int_', 'bigint_',
'float_', 'double_', 'varchar_', 'text_',
'time_', 'timestamp_', 'date_']
columns = [pa.array([True, False, None], type=pa.bool_()),
pa.array([1, 0, None]).cast(pa.int16()),
pa.array([1, 0, None]).cast(pa.int32()),
pa.array([1, 0, None]),
pa.array([1.0, 1.1, None]).cast(pa.float32()),
pa.array([1.0, 1.1, None]),
# no fixed-width string
pa.array(['a', 'b', None]),
pa.array(['a', 'b', None]),
(pa.array([1, 2, None]).cast(pa.int32())
.cast(pa.time32('s'))),
pa.array([datetime.datetime(2016, 1, 1, 12, 12, 12),
datetime.datetime(2017, 1, 1), None]),
pa.array([datetime.date(2016, 1, 1),
datetime.date(2017, 1, 1), None])]
table = pa.Table.from_arrays(columns, names=names)
con.load_table_arrow("all_types", table)
def get_arrow_type(self, dt, is_list):
"""get_arrow_type"""
if dt == dtypes.bool:
arrow_type = pa.bool_()
elif dt == dtypes.int8:
arrow_type = pa.int8()
elif dt == dtypes.int16:
arrow_type = pa.int16()
elif dt == dtypes.int32:
arrow_type = pa.int32()
elif dt == dtypes.int64:
arrow_type = pa.int64()
elif dt == dtypes.uint8:
arrow_type = pa.uint8()
elif dt == dtypes.uint16:
arrow_type = pa.uint16()
elif dt == dtypes.uint32:
arrow_type = pa.uint32()
elif dt == dtypes.uint64:
arrow_type = pa.uint64()
def get_arrow_type(self, dt, is_list):
"""get_arrow_type"""
if dt == dtypes.bool:
arrow_type = pa.bool_()
elif dt == dtypes.int8:
arrow_type = pa.int8()
elif dt == dtypes.int16:
arrow_type = pa.int16()
elif dt == dtypes.int32:
arrow_type = pa.int32()
elif dt == dtypes.int64:
arrow_type = pa.int64()
elif dt == dtypes.uint8:
arrow_type = pa.uint8()
elif dt == dtypes.uint16:
arrow_type = pa.uint16()
elif dt == dtypes.uint32:
arrow_type = pa.uint32()
elif dt == dtypes.uint64:
arrow_type = pa.uint64()
def _get_numba_typ_from_pa_typ(pa_typ):
import pyarrow as pa
_typ_map = {
# boolean
pa.bool_(): types.bool_,
# signed int types
pa.int8(): types.int8,
pa.int16(): types.int16,
pa.int32(): types.int32,
pa.int64(): types.int64,
# unsigned int types
pa.uint8(): types.uint8,
pa.uint16(): types.uint16,
pa.uint32(): types.uint32,
pa.uint64(): types.uint64,
# float types (TODO: float16?)
pa.float32(): types.float32,
pa.float64(): types.float64,
# String
pa.string(): string_type,
# date
elif column.type.id == pa.date32().id:
_converted_col = map(_date_converter, _col)
array_data.append(pa.array(_converted_col, type=pa.date32()))
# Float types are ambiguous for conversions, need to specify the exact type
elif column.type.id == pa.float64().id:
array_data.append(pa.array(_col, type=pa.float64()))
elif column.type.id == pa.float32().id:
# Python doesn't have a native float32 type
# and PyArrow cannot cast float64 -> float32
_col = pd.to_numeric(_col, downcast='float')
array_data.append(pa.Array.from_pandas(_col, type=pa.float32()))
elif column.type.id == pa.int32().id:
# PyArrow 0.8.0 can cast int64 -> int32
_col64 = pa.array(_col, type=pa.int64())
array_data.append(_col64.cast(pa.int32()))
elif column.type.id == pa.bool_().id:
_col = map(_boolean_converter, _col)
array_data.append(pa.array(_col, type=column.type))
else:
array_data.append(pa.array(_col, type=column.type))
if isinstance(field_aliases, dict):
schema_names.append(field_aliases.get(column.name, column.name))
else:
schema_names.append(column.name)
return pa.RecordBatch.from_arrays(array_data, schema_names)
elif vartype == np.int16:
fields.append(pa.field(varname, pa.int16()))
elif vartype == np.int32:
fields.append(pa.field(varname, pa.int32()))
elif vartype == np.int64:
fields.append(pa.field(varname, pa.int64()))
elif vartype == np.uint8:
fields.append(pa.field(varname, pa.uint8()))
elif vartype == np.uint16:
fields.append(pa.field(varname, pa.uint16()))
elif vartype == np.uint32:
fields.append(pa.field(varname, pa.uint32()))
elif vartype == np.uint64:
fields.append(pa.field(varname, pa.uint64()))
elif vartype == np.bool_:
fields.append(pa.field(varname, pa.bool_()))
elif (vartype == object) | (vartype.name == 'category'):
fields.append(pa.field(varname, pa.string()))
elif np.issubdtype(vartype, np.datetime64):
fields.append(pa.field(varname, pa.timestamp('ns')))
assert len(dtypes) == len(fields)
schema = pa.schema(fields)
return schema
else:
probe = None
has_probe = False
# type inference
if (dtype is None) and has_probe:
if isinstance(probe, np.datetime64):
dtype = pa.timestamp(
"ns"
) # workaround pyarrow type inference bug (ARROW-2554)
elif isinstance(probe, pd.Timestamp):
dtype = pa.timestamp(
"ns"
) # workaround pyarrow type inference bug (ARROW-2554)
elif isinstance(probe, (np.bool_, bool)):
dtype = pa.bool_()
# fix pyarrow input
if dtype is None:
keys = np.asarray(list(keys_it))
else:
if pa.types.is_unsigned_integer(dtype):
# numpy might create object ndarrays here, which pyarrow might (for some reason) convert fo floats
keys = list(keys_it)
elif (
dtype == pa.timestamp("ns")
and has_probe
and isinstance(probe, pd.Timestamp)
):
keys = np.asarray([d.to_datetime64() for d in keys_it])
else:
keys = np.asarray(list(keys_it))
if not jvm_type.isComplex():
type_str = jvm_type.getTypeID().toString()
if type_str == 'Null':
typ = pa.null()
elif type_str == 'Int':
typ = _from_jvm_int_type(jvm_type)
elif type_str == 'FloatingPoint':
typ = _from_jvm_float_type(jvm_type)
elif type_str == 'Utf8':
typ = pa.string()
elif type_str == 'Binary':
typ = pa.binary()
elif type_str == 'FixedSizeBinary':
typ = pa.binary(jvm_type.getByteWidth())
elif type_str == 'Bool':
typ = pa.bool_()
elif type_str == 'Time':
typ = _from_jvm_time_type(jvm_type)
elif type_str == 'Timestamp':
typ = _from_jvm_timestamp_type(jvm_type)
elif type_str == 'Date':
typ = _from_jvm_date_type(jvm_type)
elif type_str == 'Decimal':
typ = pa.decimal128(jvm_type.getPrecision(), jvm_type.getScale())
else:
raise NotImplementedError(
"Unsupported JVM type: {}".format(type_str))
else:
# TODO: The following JVM types are not implemented:
# Struct, List, FixedSizeList, Union, Dictionary
raise NotImplementedError(
"JVM field conversion only implemented for primitive types.")