Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
const fields: Field[] = [];
for (let i = 0; i < table.numCols; i++) {
const col = table.getColumnAt(i);
if (col) {
const schema = table.schema.fields[i];
let type = FieldType.other;
const values: Vector = col;
switch ((schema.typeId as unknown) as ArrowType) {
case ArrowType.Decimal:
case ArrowType.Int:
case ArrowType.FloatingPoint: {
type = FieldType.number;
break;
}
case ArrowType.Bool: {
type = FieldType.boolean;
break;
}
case ArrowType.Timestamp: {
type = FieldType.time;
break;
}
default:
console.log('UNKNOWN Type:', schema);
}
// console.log(' field>', schema.metadata);
fields.push({
name: col.name,
type,
config: {}, // TODO, pull from metadata
export function arrowTableToDataFrame(table: Table): ArrowDataFrame {
const fields: Field[] = [];
for (let i = 0; i < table.numCols; i++) {
const col = table.getColumnAt(i);
if (col) {
const schema = table.schema.fields[i];
let type = FieldType.other;
const values: Vector = col;
switch ((schema.typeId as unknown) as ArrowType) {
case ArrowType.Decimal:
case ArrowType.Int:
case ArrowType.FloatingPoint: {
type = FieldType.number;
break;
}
case ArrowType.Bool: {
type = FieldType.boolean;
break;
}
case ArrowType.Timestamp: {
type = FieldType.time;
break;
}
default:
console.log('UNKNOWN Type:', schema);
}
// console.log(' field>', schema.metadata);
export function arrowTableToDataFrame(table: Table): ArrowDataFrame {
const fields: Field[] = [];
for (let i = 0; i < table.numCols; i++) {
const col = table.getColumnAt(i);
if (col) {
const schema = table.schema.fields[i];
let type = FieldType.other;
const values: Vector = col;
switch ((schema.typeId as unknown) as ArrowType) {
case ArrowType.Decimal:
case ArrowType.Int:
case ArrowType.FloatingPoint: {
type = FieldType.number;
break;
}
case ArrowType.Bool: {
type = FieldType.boolean;
break;
}
case ArrowType.Timestamp: {
type = FieldType.time;
break;
}
default:
console.log('UNKNOWN Type:', schema);
}
// console.log(' field>', schema.metadata);
if (col) {
const schema = table.schema.fields[i];
let type = FieldType.other;
const values: Vector = col;
switch ((schema.typeId as unknown) as ArrowType) {
case ArrowType.Decimal:
case ArrowType.Int:
case ArrowType.FloatingPoint: {
type = FieldType.number;
break;
}
case ArrowType.Bool: {
type = FieldType.boolean;
break;
}
case ArrowType.Timestamp: {
type = FieldType.time;
break;
}
default:
console.log('UNKNOWN Type:', schema);
}
// console.log(' field>', schema.metadata);
fields.push({
name: col.name,
type,
config: {}, // TODO, pull from metadata
values,
});
}
}
function getArrowSchema(schema) {
const arrowFields = [];
for (const key in schema) {
const field = schema[key];
if (field.type === Float32Array) {
const metadata = field; // just store the original field as metadata
// arrow: new Field(name, nullable, metadata)
const arrowField = new Field(field.name, Float32, field.nullable, metadata);
arrowFields.push(arrowField);
}
}
if (arrowFields.length === 0) {
throw new Error('No arrow convertable fields');
}
return new Schema(arrowFields);
}
export async function parseArrowInBatches(asyncIterator, options) {
// Creates the appropriate RecordBatchReader subclasses from the input
// This will also close the underlying source in case of early termination or errors
const readers = await RecordBatchReader.readAll(asyncIterator);
// As an optimization, return a non-async iterator
if (isIterable(readers)) {
return (function* arrowIterator() {
for (const reader of readers) {
for (const batch of reader) {
yield processBatch(batch, reader);
}
break; // only processing one stream of batches
}
})();
}
return (async function* arrowAsyncIterator() {
for await (const reader of readers) {
for await (const batch of reader) {
export async function parseArrowInBatchesSync(iterator, options) {
// Creates the appropriate RecordBatchReader subclasses from the input
// This will also close the underlying source in case of early termination or errors
const readers = RecordBatchReader.readAll(iterator);
// Check that `readers` is not a Promise, and is iterable
if (isIterable(readers) || isIterator(readers)) {
return (function* arrowIterator() {
for (const reader of readers) {
for (const batch of reader) {
yield processBatch(batch);
}
break; // only processing one stream of batches
}
})();
}
return assert(false);
}
function getArrowSchema(schema) {
const arrowFields = [];
for (const key in schema) {
const field = schema[key];
if (field.type === Float32Array) {
const metadata = field; // just store the original field as metadata
// arrow: new Field(name, nullable, metadata)
const arrowField = new Field(field.name, Float32, field.nullable, metadata);
arrowFields.push(arrowField);
}
}
if (arrowFields.length === 0) {
throw new Error('No arrow convertable fields');
}
return new Schema(arrowFields);
}
getNormalizedBatch() {
const batch = super.getNormalizedBatch();
if (batch) {
// Get the arrow schema
this.arrowSchema = this.arrowSchema || getArrowSchema(batch.schema);
// Get arrow format vectors
const arrowVectors = getArrowVectors(this.arrowSchema, batch.data);
// Create the record batch
// new RecordBatch(schema, numRows, vectors, ...);
return new RecordBatch(this.arrowSchema, batch.length, arrowVectors);
}
return null;
}
}
export default function parseArrowSync(arrayBuffer, options) {
const arrowTable = Table.from([new Uint8Array(arrayBuffer)]);
// Extract columns
// TODO - avoid calling `getColumn` on columns we are not interested in?
// Add options object?
const columnarTable = {};
arrowTable.schema.fields.forEach(field => {
// This (is intended to) coalesce all record batches into a single typed array
const arrowColumn = arrowTable.getColumn(field.name);
const values = arrowColumn.toArray();
columnarTable[field.name] = values;
});
return columnarTable;
}