How to use the avro.io.BinaryDecoder function in avro

To help you get started, we’ve selected a few avro examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Byhiras / pyavroc / tests / test_serialize.py View on Github external
def deserialize(self, rec_bytes):
        return self.reader.read(BinaryDecoder(string_io(rec_bytes)))
github apache / beam / sdks / python / apache_beam / io / avroio.py View on Github external
# raw data (without headers). See zlib and Avro documentations for more
      # details.
      return zlib.decompress(data, -zlib.MAX_WBITS)
    elif codec == 'snappy':
      # Snappy is an optional avro codec.
      # See Snappy and Avro documentation for more details.
      try:
        import snappy
      except ImportError:
        raise ValueError('Snappy does not seem to be installed.')

      # Compressed data includes a 4-byte CRC32 checksum which we verify.
      # We take care to avoid extra copies of data while slicing large objects
      # by use of a buffer.
      result = snappy.decompress(buffer(data)[:-4])
      avroio.BinaryDecoder(cStringIO.StringIO(data[-4:])).check_crc32(result)
      return result
    else:
      raise ValueError('Unknown codec: %r', codec)
github johnj / php5-xcom / src / py / avro / datafile.py View on Github external
def __init__(self, reader, datum_reader):
    self._reader = reader
    self._decoder = io.BinaryDecoder(reader)
    self._datum_reader = datum_reader
    
    # read the header: magic, meta, sync
    self._read_header()

    # ensure codec is valid
    codec_from_file = self.get_meta('codec')
    if codec_from_file is not None and codec_from_file not in VALID_CODECS:
      raise DataFileException('Unknown codec: %s.' % codec_from_file)

    # get file length
    self._file_length = self.determine_file_length()

    # get ready to read
    self._block_count = 0
    self.datum_reader.writers_schema = schema.parse(self.get_meta('schema'))
github cloudera / hue / desktop / core / ext-py / avro-1.7.6 / src / avro / datafile.py View on Github external
def __init__(self, reader, datum_reader):
    self._reader = reader
    self._raw_decoder = io.BinaryDecoder(reader)
    self._datum_decoder = None # Maybe reset at every block.
    self._datum_reader = datum_reader
    
    # read the header: magic, meta, sync
    self._read_header()

    # ensure codec is valid
    self.codec = self.get_meta('avro.codec')
    if self.codec is None:
      self.codec = "null"
    if self.codec not in VALID_CODECS:
      raise DataFileException('Unknown codec: %s.' % self.codec)

    # get file length
    self._file_length = self.determine_file_length()
github apache / avro / lang / py / avro / datafile.py View on Github external
def __init__(self, reader, datum_reader):
    self._reader = reader
    self._raw_decoder = avro.io.BinaryDecoder(reader)
    self._datum_decoder = None # Maybe reset at every block.
    self._datum_reader = datum_reader

    # read the header: magic, meta, sync
    self._read_header()

    # ensure codec is valid
    self.codec = self.get_meta('avro.codec')
    if self.codec is None:
      self.codec = "null"
    if self.codec not in VALID_CODECS:
      raise DataFileException('Unknown codec: %s.' % self.codec)

    # get file length
    self._file_length = self.determine_file_length()
github Chabane / bigdata-playground / ml / spark / stream.py View on Github external
def deserialize(flight_info_bytes) :
    if flight_info_bytes is not None:
        bytes_reader = BytesIO(flight_info_bytes)
        decoder = BinaryDecoder(bytes_reader)
        schema_flight_info = Parse(open(dir_path + "/flight-info.schema.avsc", "rb").read())
        reader = DatumReader(schema_flight_info)
        flight_info = reader.read(decoder)

        return json.dumps([{"id": 907955534287978496}])
    else:
        return None
github Yelp / data_pipeline_avro_util / data_pipeline_avro_util / avro_string_reader.py View on Github external
def decode(self, encoded_message):
        """ Decodes a given `encoded_message` which was encoded using the
        same schema as `self.writer_schema` into a representation defined by
        `self.reader_schema`.

        Args:
            encoded_message (string): An encoded object

        Returns (dict):
            The decoded dictionary representation.
        """
        stringio = cStringIO.StringIO(encoded_message)
        decoder = avro.io.BinaryDecoder(stringio)
        return self.avro_reader.read(decoder)
github cloudera / hue / desktop / core / ext-py / avro-1.8.2 / src / avro / ipc.py View on Github external
def respond(self, call_request):
    """
    Called by a server to deserialize a request, compute and serialize
    a response or error. Compare to 'handle()' in Thrift.
    """
    buffer_reader = StringIO(call_request)
    buffer_decoder = io.BinaryDecoder(buffer_reader)
    buffer_writer = StringIO()
    buffer_encoder = io.BinaryEncoder(buffer_writer)
    error = None
    response_metadata = {}
    
    try:
      remote_protocol = self.process_handshake(buffer_decoder, buffer_encoder)
      # handshake failure
      if remote_protocol is None:  
        return buffer_writer.getvalue()

      # read request using remote protocol
      request_metadata = META_READER.read(buffer_decoder)
      remote_message_name = buffer_decoder.read_utf8()

      # get remote and local request schemas so we can do
github pluralsight / spavro / benchmark / benchmark.py View on Github external
def read_func(data):
        bytes_reader = io.BytesIO(data)
        decoder = avro.io.BinaryDecoder(bytes_reader)
        return reader.read(decoder)
    return read_func
github apache / avro / lang / py3 / avro / datafile.py View on Github external
def _read_block_header(self):
    self._block_count = self.raw_decoder.read_long()
    if self.codec == "null":
      # Skip a long; we don't need to use the length.
      self.raw_decoder.skip_long()
      self._datum_decoder = self._raw_decoder
    elif self.codec == 'deflate':
      # Compressed data is stored as (length, data), which
      # corresponds to how the "bytes" type is encoded.
      data = self.raw_decoder.read_bytes()
      # -15 is the log of the window size; negative indicates
      # "raw" (no zlib headers) decompression.  See zlib.h.
      uncompressed = zlib.decompress(data, -15)
      self._datum_decoder = avro_io.BinaryDecoder(io.BytesIO(uncompressed))
    elif self.codec == 'snappy':
      # Compressed data includes a 4-byte CRC32 checksum
      length = self.raw_decoder.read_long()
      data = self.raw_decoder.read(length - 4)
      uncompressed = snappy.decompress(data)
      self._datum_decoder = avro_io.BinaryDecoder(io.BytesIO(uncompressed))
      self.raw_decoder.check_crc32(uncompressed);
    else:
      raise DataFileException("Unknown codec: %r" % self.codec)