How to use the mmh3.hash_bytes function in mmh3

To help you get started, we’ve selected a few mmh3 examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github huydhn / cuckoo-filter / tests / test_bucket.py View on Github external
'transformer': lambda string: str(int(IPAddress(string))),

                'action': bucket.delete,
                'expected': True,

                'full': False,
                'included': False,
            },
        ]

        for case in cases:
            item = case['transformer'](case['item'])

            # Generate all the fingerprints
            fingerprint = bitarray()
            fingerprint.frombytes(mmh3.hash_bytes(item))

            self.assertEqual(case['action'](fingerprint), case['expected'], 'Save {0} into the bucket ok'.format(item))
            self.assertEqual(bucket.is_full(), case['full'], 'Bucket capacity is ok')

            # Make sure that all items are in the bucket
            self.assertEqual(bucket.contains(fingerprint), case['included'], 'Item {0} is in the bucket'.format(item))
            self.assertEqual(fingerprint in bucket, case['included'], 'Item {0} is in the bucket'.format(item))
github dask / dask / dask / hashing.py View on Github external
def _hash_murmurhash(buf):
        """
        Produce a 16-bytes hash of *buf* using MurmurHash.
        """
        return mmh3.hash_bytes(buf)
github kalafut / py-imohash / imohash / imohash.py View on Github external
def hashfileobject(f, sample_threshhold=SAMPLE_THRESHOLD, sample_size=SAMPLE_SIZE, hexdigest=False):
    #get file size from file object
    f.seek(0, os.SEEK_END)
    size = f.tell()
    f.seek(0, os.SEEK_SET)

    if size < sample_threshhold or sample_size < 1:
        data = f.read()
    else:
        data = f.read(sample_size)
        f.seek(size//2)
        data += f.read(sample_size)
        f.seek(-sample_size, os.SEEK_END)
        data += f.read(sample_size)

    hash_tmp = mmh3.hash_bytes(data)
    hash_ = hash_tmp[7::-1] + hash_tmp[16:7:-1]
    enc_size = varint.encode(size)
    digest = enc_size + hash_[len(enc_size):]

    return binascii.hexlify(digest).decode() if hexdigest else digest
github cmusatyalab / opendiamond / opendiamond / helpers.py View on Github external
def murmur(data):
    '''Return a lower-case hex string representing the MurmurHash3-x64-128
    hash of the specified data using the seed 0xbb40e64d (the first 10
    digits of pi, in hex).'''
    return mmh3.hash_bytes(data, 314159).hex()
github huydhn / cuckoo-filter / cuckoo / filter.py View on Github external
def index(self, item):
        '''
        Calculate the (first) index of an item in the filter.
        '''
        item_hash = mmh3.hash_bytes(item)
        # Because of this modular computation, it will be tricky to increase
        # the capacity of the filter directly
        return int(codecs.encode(item_hash, 'hex'), 16) % self.capacity
github ClusterHQ / flocker / flocker / control / _persistence.py View on Github external
# different hashes.
        object_to_process = frozenset(object_to_process.iteritems()).union(
            [_MAPPING_TOKEN]
        )

    if isinstance(object_to_process, Set):
        sub_hashes = (generation_hash(x) for x in object_to_process)
        result = bytes(
            reduce(_xor_bytes, sub_hashes, bytearray(_NULLSET_TOKEN))
        )
    elif isinstance(object_to_process, Iterable):
        result = mmh3_hash_bytes(b''.join(
            generation_hash(x) for x in object_to_process
        ))
    else:
        result = mmh3_hash_bytes(wire_encode(object_to_process))

    if is_pyrsistent:
        _generation_hash_cache[input_object] = result

    return result
github ClusterHQ / flocker / flocker / control / _persistence.py View on Github external
def _sync_save(self, deployment):
        """
        Save and flush new configuration to disk synchronously.
        """
        config = Configuration(version=_CONFIG_VERSION, deployment=deployment)
        data = wire_encode(config)
        self._hash = b16encode(mmh3_hash_bytes(data)).lower()
        self._config_path.setContent(data)

mmh3

Python extension for MurmurHash (MurmurHash3), a set of fast and robust hash functions.

MIT
Latest version published 3 months ago

Package Health Score

88 / 100
Full package analysis