How to use the datasketch.storage.UnorderedStorage function in datasketch

To help you get started, we’ve selected a few datasketch examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github ekzhu / datasketch / datasketch / experimental / aio / storage.py View on Github external
self._collection.aggregate([{'$group': {'_id': '$key', 'count': {'$sum': 1}}}])}

        async def has_key(self, key):
            return True if await self._collection.find_one({'key': key}) else False

        async def status(self):
            status = self._parse_config(self.config['mongo'])
            status.update({'keyspace_size': await self.size()})
            return status

        async def empty_buffer(self):
            fs = (self._buffer.execute(command) for command in ('insert', 'delete_by_key', 'delete_by_val'))
            await asyncio.gather(*fs)


    class AsyncMongoSetStorage(UnorderedStorage, AsyncMongoListStorage):
        async def get(self, key):
            keys = [doc['vals'] async for doc in self._collection.find(filter={'key': key},
                                                                       projection={'_id': False, 'key': False})]
            return frozenset(keys)

        async def _insert(self, obj, key, *values):
            await obj.insert_one(document={'key': key, 'vals': values[0]})

        async def remove(self, *keys, **kwargs):
            pass

        async def remove_val(self, key, val, **kwargs):
            buffer = kwargs.pop('buffer', False)
            if buffer:
                await self._buffer.delete_many_by_val(val=val)
            else:
github ekzhu / datasketch / datasketch / storage.py View on Github external
def size(self):
        return len(self._dict)

    def itemcounts(self, **kwargs):
        '''Returns a dict where the keys are the keys of the container.
        The values are the *lengths* of the value sequences stored
        in this container.
        '''
        return {k: len(v) for k, v in self._dict.items()}

    def has_key(self, key):
        return key in self._dict


class DictSetStorage(UnorderedStorage, DictListStorage):
    '''This is a wrapper class around ``defaultdict(set)`` enabling
    it to support an API consistent with `Storage`
    '''
    def __init__(self, config):
        self._dict = defaultdict(set)

    def get(self, key):
        return self._dict.get(key, set())

    def insert(self, key, *vals, **kwargs):
        self._dict[key].update(vals)


if cassandra is not None:

    class CassandraSharedSession(object):
github ekzhu / datasketch / datasketch / storage.py View on Github external
        @staticmethod
        def _get_len(r, k):
            return r.llen(k)

        def has_key(self, key):
            return self._redis.hexists(self._name, key)

        def empty_buffer(self):
            self._buffer.execute()
            # To avoid broken pipes, recreate the connection
            # objects upon emptying the buffer
            self.__init__(self.config, name=self._name)


    class RedisSetStorage(UnorderedStorage, RedisListStorage):
        def __init__(self, config, name=None):
            RedisListStorage.__init__(self, config, name=name)

        @staticmethod
        def _get_items(r, k):
            return r.smembers(k)

        def remove_val(self, key, val):
            redis_key = self.redis_key(key)
            self._redis.srem(redis_key, val)
            if not self._redis.exists(redis_key):
                self._redis.hdel(self._name, redis_key)

        def _insert(self, r, key, *values):
            redis_key = self.redis_key(key)
            r.hset(self._name, key, redis_key)
github ekzhu / datasketch / datasketch / storage.py View on Github external
return len(self.keys())

        def itemcounts(self):
            """Implement interface."""
            return self._client.select_count(self.keys())

        def has_key(self, key):
            """Implement interface."""
            return self._client.one(key) is not None

        def empty_buffer(self):
            """Implement interface."""
            self._client.empty_buffer()


    class CassandraSetStorage(UnorderedStorage, CassandraListStorage):
        """
        OrderedStorage storage implementation using Cassandra as backend.

        Note: since we are interested in keeping duplicates or ordered data, we upsert the data
            ignoring what the timestamp actually means.
        """

        def get(self, key):
            """Implement interface and override super-class."""
            return set(super(CassandraSetStorage, self).get(key))

        def insert(self, key, *vals, **kwargs):
            """Implement interface and override super-class."""
            buffer = kwargs.pop('buffer', False)
            self._client.upsert(key, vals, buffer)