How to use the pyarrow.deserialize function in pyarrow

To help you get started, we’ve selected a few pyarrow examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github PistonY / torch-toolbox / torchtoolbox / tools / convert_lmdb.py View on Github external
def load_pyarrow(buf):
    assert buf is not None, 'buf should not be None.'
    return pyarrow.deserialize(buf)
github catalyst-team / catalyst / catalyst / utils / serialization.py View on Github external
def pyarrow_deserialize(data):
    """
    Deserialize bytes into an object using pyarrow

    Args:
        bytes: a bytes object containing serialized with pyarrow data.

    Returns:
        Returns a value deserialized from the bytes-like object.
    """
    return pyarrow.deserialize(data)
github cyoon1729 / Distributed-Reinforcement-Learning / common / utils / buffer_helper.py View on Github external
def recv_data(self):
        new_replay_data_id = False
        try:
            new_replay_data_id = self.pull_socket.recv(zmq.DONTWAIT)
        except zmq.Again:
            pass

        if new_replay_data_id:
            new_replay_data = pa.deserialize(new_replay_data_id)
            for replay_data, priorities in new_replay_data:
                self.buffer.add(*replay_data)
                self.buffer.update_priorities([(self.buffer._next_idx - 1) % self.buffer._maxsize], priorities)
github nismod / smif / src / smif / data_layer / datafile_interface.py View on Github external
def _get_data_from_native_file(filepath):
    with pa.memory_map(filepath, 'rb') as native_file:
        native_file.seek(0)
        buf = native_file.read_buffer()
        data = pa.deserialize(buf)
    return data
github guanfuchen / semseg / semseg / dataloader / folder2lmdb.py View on Github external
def __getitem__(self, index):
        img = None
        env = self.env
        with env.begin(write=False) as txn:
            byteflow = txn.get(self.keys[index])
        img = pyarrow.deserialize(byteflow)

        # load image
        # imgbuf = unpacked[0]
        # print('imgbuf:', imgbuf)
        # print('imgbuf.shape:', imgbuf.shape)
        # cv2.imshow('img:', img)
        # cv2.waitKey(1)
        img = torch.FloatTensor(img)
        # buf = six.BytesIO()
        # buf.write(imgbuf)
        # buf.seek(0)
        # img = Image.open(buf).convert('RGB')
        # print('buf:', buf)

        # if self.transform is not None:
        #     img = self.transform(img)
github nismod / smif / src / smif / data_layer / datafile_interface.py View on Github external
def _get_data_from_native_file(filepath):
        with pa.memory_map(filepath, 'rb') as f:
            f.seek(0)
            buf = f.read_buffer()

            data = pa.deserialize(buf)
        return data
github Fangyh09 / Image2LMDB / folder2lmdb.py View on Github external
def loads_pyarrow(buf):
    """
    Args:
        buf: the output of `dumps`.
    """
    return pa.deserialize(buf)
github bdqnghi / bi-tbcnn / ast2vec / ast2vec / fast_pickle_file_to_nodes.py View on Github external
print (lang)
    
    with open(infile, 'rb') as file_handler:
        data_source = pickle.load(file_handler)

    print ('Pickle load finished')

    node_counts = defaultdict(int)
    samples = []

    maps = {}
    map_filename = 'maps%s.pa' % lang
    if os.path.exists(map_filename):
       with open(map_filename, 'rb') as f:
           buf = f.read()
           maps = pyarrow.deserialize(buf)
           f.close()
    OrderedDict(sorted(maps.items(), key=lambda t: t[0]))
    #for key, value in maps.items() :
    #    print("%s<=%s" % (value, key))
    #print (map_filename)

    has_capacity = lambda x: -1 < 0 or node_counts[x] < -1
    can_add_more = lambda: 10000 < 0 or len(samples) < -1

    for item in data_source:
        root = item['tree']
        if root.HasField("element"):
            element = root.element
            new_samples = [
                {
                    'node': element.kind,
github hydro-project / droplet / droplet / shared / serializer.py View on Github external
def _load_numpy(self, msg):
        if not msg:
            return msg

        return pa.deserialize(msg)
github deepdrive / deepdrive / sim / uepy_client.py View on Github external
start_serialize = time.time()
            msg = pyarrow.serialize([method, args, kwargs]).to_buffer()

            start_send = time.time()
            self.socket.send(msg)
            log.debug('send took %r' % (time.time() - start_send))

            start_receive = time.time()
            resp = self.socket.recv()
            log.debug('receive took %r', (time.time() - start_receive))

            size_formatted = sizeof_fmt(sys.getsizeof(resp))
            log.debug('receive size was %s', size_formatted)

            start_deserialize = time.time()
            ret = pyarrow.deserialize(resp)
            log.debug('deserialize took %r', (time.time() - start_deserialize))
        except zmq.error.Again:
            print('Waiting for uepy server')
            self.create_socket()
            return None
        finally:
            if ret is None:
                raise RuntimeError(
                    'Could not get response from uepy server. '
                    'Ensure your Arrow/pyarrow versions are compatible, and/or '
                    'try restarting sim or Unreal Editor. ')
            if not ret['success']:
                log.error(ret['result'])
                raise RuntimeError(
                    'Error executing %s(%s, %s) in Unreal - '
                    'Traceback above' % (method, str(args), str(kwargs)))