How to use the pyarrow.plasma.ObjectID function in pyarrow

To help you get started, we’ve selected a few pyarrow examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github ray-project / ray / python / ray / experimental / async_plasma.py View on Github external
def as_future(self, object_id, check_ready=True):
        """Turn an object_id into a Future object.

        Args:
            object_id: A Ray's object_id.
            check_ready (bool): If true, check if the object_id is ready.

        Returns:
            PlasmaObjectFuture: A future object that waits the object_id.
        """
        if not isinstance(object_id, ray.ObjectID):
            raise TypeError("Input should be an ObjectID.")

        plain_object_id = plasma.ObjectID(object_id.binary())
        fut = PlasmaObjectFuture(loop=self._loop, object_id=plain_object_id)

        if check_ready:
            ready, _ = ray.wait([object_id], timeout=0)
            if ready:
                if self._loop.get_debug():
                    logger.debug("%s has been ready.", plain_object_id)
                self._complete_future(fut)
                return fut

        if plain_object_id not in self._waiting_dict:
            linked_list = PlasmaObjectLinkedList(self._loop, plain_object_id)
            linked_list.add_done_callback(self._unregister_callback)
            self._waiting_dict[plain_object_id] = linked_list
        self._waiting_dict[plain_object_id].append(fut)
        if self._loop.get_debug():
github russellromney / brain-plasma / brain_plasma / brain_plasma.py View on Github external
def _brain_new_ids_or_existing_ids(self,name,client):
        '''if name exists, returns object id of that name and that client; else new ids'''
        if self._brain_name_exists(name,client):
            # get the brain_object for the old name
            brain_object = self._brain_names_objects(client)
            for x in brain_object:
                if x['name']==name:
                    brain_object=x
                    break
            # delete the old name and thing objects
            client.delete([plasma.ObjectID(brain_object['name_id']),plasma.ObjectID(brain_object['id'])])
            # get the new ids
            thing_id = plasma.ObjectID(brain_object['id'])
            name_id = plasma.ObjectID(brain_object['name_id'])
        else:
            # create a new name id and thing id
            name_id = self._brain_create_named_object(name)
            thing_id = plasma.ObjectID.from_random()
        return thing_id,name_id
github ray-project / ray / python / ray / worker.py View on Github external
local object store.

        Args:
            object_ids (List[object_id.ObjectID]): A list of the object IDs
                whose values should be retrieved.
        """
        # Make sure that the values are object IDs.
        for object_id in object_ids:
            if not isinstance(object_id, ray.ObjectID):
                raise Exception("Attempting to call `get` on the value {}, "
                                "which is not an ObjectID.".format(object_id))
        # Do an initial fetch for remote objects. We divide the fetch into
        # smaller fetches so as to not block the manager for a prolonged period
        # of time in a single call.
        plain_object_ids = [
            plasma.ObjectID(object_id.id()) for object_id in object_ids
        ]
        for i in range(0, len(object_ids),
                       ray._config.worker_fetch_request_size()):
            if not self.use_raylet:
                self.plasma_client.fetch(plain_object_ids[i:(
                    i + ray._config.worker_fetch_request_size())])
            else:
                self.local_scheduler_client.reconstruct_objects(
                    object_ids[i:(
                        i + ray._config.worker_fetch_request_size())], True)

        # Get the objects. We initially try to get the objects immediately.
        final_results = self.retrieve_and_deserialize(plain_object_ids, 0)
        # Construct a dictionary mapping object IDs that we haven't gotten yet
        # to their original index in the object_ids argument.
        unready_ids = {
github russellromney / brain-plasma / brain_plasma / brain_plasma_hash.py View on Github external
f"Namespace wrong length; 5 >= namespace >= 15; name {namespace} is {len(namespace)}"
            )

        # CHANGE THE NAMESPACE AND ACKNOWLEDGE THE CHANGE
        self.namespace = namespace

        # IF THE NAMESPACE OBJECT EXISTS ALREADY, JUST ADD THE NEW NAMESPACE
        if plasma.ObjectID(b"brain_namespaces_set") in self.client.list().keys():
            # ADD TO NAMESPACES
            namespaces = self.client.get(
                plasma.ObjectID(b"brain_namespaces_set")
            ).union([self.namespace, "default"])
            # REMOVE OLD NAMESPACES OBJECT
            self.client.delete([plasma.ObjectID(b"brain_namespaces_set")])
            # ASSIGN NEW NAMESPACES OBJECT
            self.client.put(namespaces, plasma.ObjectID(b"brain_namespaces_set"))
        
        # OTHERWISE, CREATE THE NAMESPACES OBJECT AND ADD TO PLASMA
        else:
            self.client.put(
                set([self.namespace, "default"]),
                plasma.ObjectID(b"brain_namespaces_set"),
            )
        
        # RETURN THE CURRENT NAMESPACE
        return self.namespace
github Angel-ML / PyAngelPS / pyangel / datastore.py View on Github external
async def aget(self, object_id):
        if not isinstance(object_id, pyarrow._plasma.ObjectID):
            object_id = plasma.ObjectID(object_id)
        buffer = self.plasma_client.get_buffers([object_id])[0]
        buffer = memoryview(buffer)
        # data_head = DataHead()
        data_head.from_buffer(buffer)
        return data_head.parse_data(buffer)
github russellromney / brain-plasma / brain_plasma / brain_plasma_hash.py View on Github external
def object_ids(self) -> dict:
        """
        return a dictionary of names and their ObjectIDs
        
        limited to names in the current namespace
        """
        names_ = self.metadata()
        return {x["name"]: plasma.ObjectID(x["id"]) for x in names_}
github apache / arrow / python / examples / plasma / sorting / sort_df.py View on Github external
def put_df(df):
    record_batch = pa.RecordBatch.from_pandas(df)

    # Get size of record batch and schema
    mock_sink = pa.MockOutputStream()
    stream_writer = pa.RecordBatchStreamWriter(mock_sink, record_batch.schema)
    stream_writer.write_batch(record_batch)
    data_size = mock_sink.size()

    # Generate an ID and allocate a buffer in the object store for the
    # serialized DataFrame
    object_id = plasma.ObjectID(np.random.bytes(20))
    buf = client.create(object_id, data_size)

    # Write the serialized DataFrame to the object store
    sink = pa.FixedSizeBufferWriter(buf)
    stream_writer = pa.RecordBatchStreamWriter(sink, record_batch.schema)
    stream_writer.write_batch(record_batch)

    # Seal the object
    client.seal(object_id)

    return object_id
github russellromney / brain-plasma / brain_plasma / brain_plasma_hash.py View on Github external
name: "this"
        namespace: "default"
        16-byte hash digest: b'%\x14\x997F\x08I\xfb\xe4\xc3\xf8V\x98\x13\x0e\xee'
        combined (20-byte): b'this%\x14\x997F\x08I\xfb\xe4\xc3\xf8V\x98\x13\x0e\xee'
        return object id: ObjectID(7468697325149937460849fbe4c3f85698130eee)
        """
        if not namespace:
            namespace = self.namespace

        # NAMESPACE CAN'T BE SET TO AN INCORRECT SIZE
        namespace_len = len(namespace)
        hash_len = 20 - namespace_len
        encoded = namespace.encode()
        name_hash = self._hash(name, hash_len)
        combined = encoded + name_hash
        return plasma.ObjectID(combined)
github russellromney / brain-plasma / brain_plasma / brain_plasma.py View on Github external
def show_namespaces(self):
        return self.client.get(plasma.ObjectID(b'brain_namespaces_set'))