Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _read_json(datum, schema):
record_type = extract_record_type(schema)
passthrough_schemas = (
'null',
'boolean',
'string',
'int',
'long',
'float',
'double',
'enum',
)
if record_type in passthrough_schemas:
return datum
elif record_type == 'bytes':
def _write_json(datum, schema):
record_type = extract_record_type(schema)
passthrough_schemas = (
'null',
'boolean',
'string',
'int',
'long',
'float',
'double',
'enum',
)
if record_type in passthrough_schemas:
return datum
elif record_type == 'bytes':
def write_data(encoder, datum, schema):
"""Write a datum of data to output stream.
Paramaters
----------
encoder: encoder
Type of encoder (e.g. binary or json)
datum: object
Data to write
schema: dict
Schemda to use
"""
record_type = extract_record_type(schema)
logical_type = extract_logical_type(schema)
fn = WRITERS.get(record_type)
if fn:
if logical_type:
prepare = LOGICAL_WRITERS.get(logical_type)
if prepare:
datum = prepare(datum, schema)
return fn(encoder, datum, schema)
else:
return write_data(encoder, datum, SCHEMA_DEFS[record_type])
def read_data(decoder, writer_schema, reader_schema=None,
return_record_name=False):
"""Read data from file object according to schema."""
record_type = extract_record_type(writer_schema)
if reader_schema and record_type in AVRO_TYPES:
# If the schemas are the same, set the reader schema to None so that no
# schema resolution is done for this call or future recursive calls
if writer_schema == reader_schema:
reader_schema = None
else:
match_schemas(writer_schema, reader_schema)
reader_fn = READERS.get(record_type)
if reader_fn:
try:
if record_type in ['array', 'map', 'record', 'union']:
data = reader_fn(decoder, writer_schema, reader_schema,
return_record_name)
else:
def write_union(encoder, datum, schema):
"""A union is encoded by first writing a long value indicating the
zero-based position within the union of the schema of its value. The value
is then encoded per the indicated schema within the union."""
if isinstance(datum, tuple):
(name, datum) = datum
for index, candidate in enumerate(schema):
if extract_record_type(candidate) == 'record':
schema_name = candidate['name']
else:
schema_name = candidate
if name == schema_name:
break
else:
msg = 'provided union type name %s not found in schema %s' \
% (name, schema)
raise ValueError(msg)
else:
pytype = type(datum)
best_match_index = -1
most_fields = -1
for index, candidate in enumerate(schema):
if validate(datum, candidate, raise_errors=False):
if extract_record_type(candidate) == 'record':
def _parse(self, schema):
record_type = extract_record_type(schema)
if record_type == 'record':
production = []
production.append(RecordStart())
for field in schema["fields"]:
production.insert(0, FieldStart(field["name"]))
production.insert(0, self._parse(field["type"]))
production.insert(0, FieldEnd())
production.insert(0, RecordEnd())
seq = Sequence(*production)
return seq
elif record_type == 'union':
symbols = []
best_match_index = index
most_fields = fields
else:
best_match_index = index
break
if best_match_index < 0:
pytype = type(datum)
msg = '%r (type %s) do not match %s' % (datum, pytype, schema)
raise ValueError(msg)
best_match_schema = schema[best_match_index]
if best_match_schema == 'null':
return None
else:
best_records_type = extract_record_type(best_match_schema)
if best_records_type in ('record', 'enum', 'fixed'):
key = best_match_schema['name']
else:
key = best_match_schema
return {key: _write_json(datum, best_match_schema)}
elif record_type == 'array':
dtype = schema['items']
return [_write_json(item, dtype) for item in datum]
elif record_type == 'map':
vtype = schema['values']
result = {}
for key, value in iteritems(datum):
result[key] = _write_json(value, vtype)
return result
'null',
'boolean',
'string',
'int',
'long',
'float',
'double',
'enum',
'bytes',
'map',
'array',
):
return _read_json(value, dtype)
for single_schema in schema:
if (extract_record_type(single_schema) in ('record, enum, fixed')
and single_schema.get('name') == dtype):
return _read_json(value, single_schema)
elif record_type == 'array':
dtype = schema['items']
return [_read_json(item, dtype) for item in datum]
elif record_type == 'map':
vtype = schema['values']
result = {}
for key, value in iteritems(datum):
result[key] = _read_json(value, vtype)
return result
elif record_type in ('record', 'error', 'request',):
result = {}