Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
"""
import avro.io as avio
from spavro.datafile import DataFileReader,DataFileWriter
from spavro import schema
#recursively make all directories
dparts=fname.split(os.sep)[:-1]
for i in range(len(dparts)):
pdir=os.sep+os.sep.join(dparts[:i+1])
if not(os.path.exists(pdir)):
os.mkdir(pdir)
with file(fname,'w') as hf:
inschema="""{"type":"string"}"""
writer=DataFileWriter(hf,avio.DatumWriter(inschema),writers_schema=schema.parse(inschema))
#encoder = avio.BinaryEncoder(writer)
#datum_writer = avio.DatumWriter()
for datum in lines:
writer.append(datum)
writer.close()
parser.add_argument('avro_file', help='Avro file for iterate')
parser.add_argument('--pyavro', default=False, action='store_true',
help='run the avro python benchmark as well')
args = parser.parse_args(argv[1:])
from fastavro import reader
print('Using {0}'.format(reader))
with open(args.avro_file, 'rb') as fo:
timeit('fastavro', reader(fo))
if args.pyavro:
import avro.io
import avro.datafile
with open(args.avro_file, 'rb') as fo:
reader = avro.datafile.DataFileReader(fo, avro.io.DatumReader())
timeit('avro', reader)
def getInvalidValue(self, cls, fieldName):
"""
Returns a value that should trigger a schema validation failure.
"""
fieldType = self.getAvroSchema(cls, fieldName).type
if isinstance(fieldType, avro.schema.UnionSchema):
types = list(t.type for t in fieldType.schemas)
val = self.instanceGenerator.generateInvalidateTypeValue(*types)
else:
val = self.instanceGenerator.generateInvalidateTypeValue(fieldType)
return val
def getContainerEventSchema(schema_files_location):
# Read all the schemas needed in order to produce the final Container Event Schema
known_schemas = avro.schema.Names()
container_status_schema = LoadAvsc(schema_files_location + "/container_status.avsc", known_schemas)
container_event_payload_schema = LoadAvsc(schema_files_location + "/container_event_payload.avsc", known_schemas)
container_event_type_schema = LoadAvsc(schema_files_location + "/container_event_type.avsc", known_schemas)
container_event_schema = LoadAvsc(schema_files_location + "/container_event.avsc", known_schemas)
return container_event_schema
def test_enum(self):
test_schema = schema.EnumSchema('test_enum', None, ['A', 'B'], schema.Names())
self.assertEquals(self.converter.to_json_object('A', test_schema), 'A')
self.assertEquals(self.converter.from_json_object('B', test_schema), 'B')
if isinstance(typ, avro.schema.UnionSchema):
t0 = typ.schemas[0]
t1 = typ.schemas[1]
if isinstance(t0, avro.schema.PrimitiveSchema):
if t0.type == "null":
typ = t1
elif t1.type == "null":
typ = t0
else:
raise Exception(err)
ret = None
if isinstance(typ, avro.schema.MapSchema):
ret = {"key": ["value1", "value2"]}
if not isinstance(typ.values, avro.schema.ArraySchema):
raise Exception(err)
elif isinstance(typ, avro.schema.ArraySchema):
if cls.isEmbeddedType(field.name):
embeddedClass = cls.getEmbeddedType(field.name)
ret = [self.getTypicalInstance(embeddedClass)]
else:
try:
ret = [self.typicalValueMap[typ.items.type]]
except KeyError:
ret = [self.typicalValueMap[typ.items.type]]
elif isinstance(typ, avro.schema.EnumSchema):
ret = typ.symbols[0]
elif isinstance(typ, avro.schema.RecordSchema):
self.assertTrue(cls.isEmbeddedType(fieldName))
embeddedClass = cls.getEmbeddedType(fieldName)
ret = self.getTypicalInstance(embeddedClass)
elif typ.type in self.typicalValueMap:
def _python_create_file(filename):
if sys.version_info >= (3,):
schema = avro.schema.Parse(json_schema)
else:
schema = avro.schema.parse(json_schema)
fp = open(filename, 'wb')
writer = avro.datafile.DataFileWriter(fp, avro.io.DatumWriter(), schema)
for i in range(1):
writer.append({"name": "Alyssa", "favorite_number": 256})
writer.append({"name": "Ben", "favorite_number": 7, "favorite_color": "red"})
writer.close()
fp.close()
tweet.metadata.isBareRetweet.known = True
tweet.metadata.isBareRetweet.data = False
tweet.metadata.isRetweet.known = True
tweet.metadata.isRetweet.data = True
tweet.metadata.venueID.known = False
tweet.metadata.venueID.data = None
tweet.metadata.venuePoint.known = False
tweet.metadata.venuePoint.data = None
tmp_file = tempfile.mktemp()
with open(tmp_file, "w+b") as f:
df = datafile.DataFileWriter(f, io.DatumWriter(), schema.parse(schema_json))
df.append(tweet)
df.close()
with open(tmp_file, "rb") as f:
df = datafile.DataFileReader(f, SpecificDatumReader())
tweet1 = next(df)
df.close()
self.assertEqual(tweet.ID, tweet1.ID)
self.assertEqual(tweet.text, tweet1.text)
self.assertEqual(tweet.authorScreenName, tweet1.authorScreenName)
self.assertEqual(tweet.authorProfileImageURL, tweet1.authorProfileImageURL)
self.assertEqual(tweet.authorUserID, tweet1.authorUserID)
self.assertTrue(isinstance(tweet1.location, AvroPoint))
self.assertEqual(tweet.location.latitude, tweet1.location.latitude)
def read_avro(fin):
reader = avro.datafile.DataFileReader(fin, avro.io.DatumReader())
return list(reader)
def deserialize(self, rec_bytes):
return self.reader.read(BinaryDecoder(string_io(rec_bytes)))