Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_appending_records_different_schema_fails(tmpdir):
"""https://github.com/fastavro/fastavro/issues/276"""
schema = {
"type": "record",
"name": "test_appending_records_different_schema_fails",
"fields": [{
"name": "field",
"type": "string",
}]
}
test_file = str(tmpdir.join("test.avro"))
with open(test_file, "wb") as new_file:
fastavro.writer(new_file, schema, [{"field": "foo"}])
different_schema = {
"type": "record",
"name": "test_appending_records",
"fields": [{
"name": "field",
"type": "int",
}]
}
with open(test_file, "a+b") as new_file:
with pytest.raises(
ValueError, match="does not match file writer_schema"
):
fastavro.writer(new_file, different_schema, [{"field": 1}])
fastavro.writer(new_file, schema, [{"field": "foo"}])
different_schema = {
"type": "record",
"name": "test_appending_records",
"fields": [{
"name": "field",
"type": "int",
}]
}
with open(test_file, "a+b") as new_file:
with pytest.raises(
ValueError, match="does not match file writer_schema"
):
fastavro.writer(new_file, different_schema, [{"field": 1}])
"fields": [
{"name": "station", "type": "string"},
{"name": "time", "type": "long"},
{"name": "temp", "type": "int"},
],
}
records = [
{"station": "011990-99999", "temp": 0, "time": 1433269388},
{"station": "011990-99999", "temp": 22, "time": 1433270389},
{"station": "011990-99999", "temp": -11, "time": 1433273379},
{"station": "012650-99999", "temp": 111, "time": 1433275478},
]
file = MemoryIO()
fastavro.writer(file, schema, records, codec=codec)
file.seek(0)
out_records = list(fastavro.reader(file))
assert records == out_records
"name": "test_schema_migration_remove_field",
"fields": [{
"name": "test",
"type": "string",
}]
}
new_schema = {
"type": "record",
"name": "test_schema_migration_remove_field_new",
"fields": []
}
new_file = MemoryIO()
records = [{'test': 'test'}]
fastavro.writer(new_file, schema, records)
new_file.seek(0)
new_reader = fastavro.reader(new_file, new_schema)
new_records = list(new_reader)
assert new_records == [{}]
"type": "enum",
"name": "test",
"symbols": ["A", "B"],
}
new_schema = {
"type": "enum",
"name": "test",
"symbols": ["C", "D"],
"default": "C",
}
original_records = ["A"]
bio = BytesIO()
fastavro.writer(bio, original_schema, original_records)
bio.seek(0)
new_records = list(fastavro.reader(bio, new_schema))
assert new_records == ["C"]
def roundtrip(schema, records, new_schema):
new_file = MemoryIO()
fastavro.writer(new_file, schema, records)
new_file.seek(0)
reader = fastavro.reader(new_file, new_schema)
new_records = list(reader)
return new_records
def generate_avro(src_file: Text, output_file: Text):
"""Generates avro file based on src file.
Args:
src_file: path to Chicago taxi dataset.
output_file: output path for avro file.
"""
df = pd.read_csv(src_file)
# Replaces NaN's with None's for avroWriter to interpret null values
df = df.where((pd.notnull(df)), None)
records = df.to_dict(orient='records')
parsed_schema = fastavro.parse_schema(get_schema())
with open(output_file, 'wb') as f:
fastavro.writer(f, parsed_schema, records)
def roundtrip(record, writer_schema, reader_schema):
new_file = MemoryIO()
fastavro.writer(new_file, writer_schema, [record])
new_file.seek(0)
new_records = list(fastavro.reader(new_file, reader_schema))
return new_records[0]
new_schema = {
"type": "record",
"name": "test_schema_migration_array_failure_new",
"fields": [{
"name": "test",
"type": {
"type": "array",
"items": ["string", "boolean"]
},
}]
}
new_file = MemoryIO()
records = [{"test": [1, 2, 3]}]
fastavro.writer(new_file, schema, records)
new_file.seek(0)
new_reader = fastavro.reader(new_file, new_schema)
with pytest.raises(fastavro.read.SchemaResolutionError):
list(new_reader)
def put_avro(self, schema, records, blob_name, codec='snappy'):
path = self._get_path_and_create_dir(blob_name)
with open(path, "wb") as f:
fastavro.writer(f, schema, records, codec)
size = os.path.getsize(path)
return Blob(blob_name, size)