How to use the perception.hashers function in Perception

To help you get started, we’ve selected a few Perception examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github thorn-oss / perception / tests / test_tools.py View on Github external
def test_unletterbox_noblackbars():
    image = hashers.tools.read(testing.DEFAULT_TEST_IMAGES[0])
    (x1, x2), (y1, y2) = hashers.tools.unletterbox(image)
    assert x1 == 0
    assert y1 == 0
    assert x2 == image.shape[1]
    assert y2 == image.shape[0]
github thorn-oss / perception / perception / testing / __init__.py View on Github external
assert hasher.compute_distance(hash1_1, hash1_3) == 0

    # Ensure the conversion to and from vectors works for both base64 and hex.
    assert hasher.vector_to_string(hasher.string_to_vector(hash2_1)) == hash2_1
    assert hasher.vector_to_string(
        hasher.string_to_vector(
            hasher.vector_to_string(
                hasher.string_to_vector(hash2_1), hash_format='hex'),
            hash_format='hex')) == hash2_1

    # Ensure parallelization works properly.
    test_hasher_parallelization(hasher=hasher, test_filepaths=test_images)

    # Ensure the isometric hashes computation work properly
    for image in test_images:
        transforms = hashers.tools.get_isometric_transforms(image)
        hashes_exp = {
            key: hasher.compute(value)
            for key, value in transforms.items()
        }
        hashes_act = hasher.compute_isometric(transforms['r0'])
        for transform_name in hashes_exp.keys():
            assert hasher.compute_distance(
                hashes_exp[transform_name],
                hashes_act[transform_name]) < transform_threshold

    # Verify that hashes are the correct length.
    hash_bits = hasher.hash_length * SIZES[hasher.dtype]

    words_base64 = math.ceil(
        hash_bits / 6)  # Base64 uses 8 bits for every 6 bits
    words_base64 += 0 if words_base64 % 4 == 0 else 4 - (
github thorn-oss / perception / perception / testing / __init__.py View on Github external
def test_opencv_hasher(hasher: hashers.ImageHasher, image1: str, image2: str):
    # For OpenCV hashers we make sure the distance we compute
    # is the same as inside OpenCV
    f1 = image1
    f2 = image2
    opencv_distance = hasher.hasher.compare(
        hasher.hasher.compute(hashers.tools.read(f1)),
        hasher.hasher.compute(hashers.tools.read(f2)))
    if hasher.distance_metric == 'hamming':
        opencv_distance /= hasher.hash_length
    np.testing.assert_approx_equal(
        opencv_distance,
        hasher.compute_distance(hasher.compute(f1), hasher.compute(f2)),
        significant=4)
github thorn-oss / perception / tests / test_hashers.py View on Github external
    [(hashers.AverageHash, 0.1, 0.1, False),
     (hashers.WaveletHash, 0.1, 0.1, False), (hashers.PHash, 0.1, 0.1, False),
     (PDQHash, 0.1, 0.15, False), (hashers.DHash, 0.1, 0.1, False),
     (hashers.MarrHildreth, 0.1, 0.1, True),
     (hashers.BlockMean, 0.1, 0.1, True),
     (hashers.ColorMoment, 10, 0.1, True)])
def test_image_hashing_common(hasher_class, pil_opencv_threshold,
                              transform_threshold, opencv_hasher):
    testing.test_image_hasher_integrity(
        hasher=hasher_class(),
        pil_opencv_threshold=pil_opencv_threshold,
        transform_threshold=transform_threshold,
        opencv_hasher=opencv_hasher)
github thorn-oss / perception / tests / test_hashers.py View on Github external
     (hashers.ColorMoment, 10, 0.1, True)])
def test_image_hashing_common(hasher_class, pil_opencv_threshold,
                              transform_threshold, opencv_hasher):
    testing.test_image_hasher_integrity(
        hasher=hasher_class(),
        pil_opencv_threshold=pil_opencv_threshold,
        transform_threshold=transform_threshold,
        opencv_hasher=opencv_hasher)
github thorn-oss / perception / tests / test_benchmarking.py View on Github external
benchmarking.video_transforms.get_simple_transform(clip_s=(1, None)),
        'blackpad':
        benchmarking.video_transforms.get_black_frame_padding_transform(
            duration_s=1),
        'slideshow':
        benchmarking.video_transforms.get_slideshow_transform(
            frame_input_rate=1, frame_output_rate=1),
    }
    transformed = video_dataset.transform(
        storage_dir=tempfile.TemporaryDirectory().name, transforms=transforms)
    assert len(transformed._df) == len(transforms) * len(video_dataset._df)
    assert transformed._df['filepath'].isnull().sum() == 0

    # We will compute hashes for each of the transformed
    # videos and check the results for correctness.
    phash_framewise_hasher = hashers.FramewiseHasher(
        frame_hasher=hashers.PHash(),
        interframe_threshold=-1,
        frames_per_second=2)
    hashes = transformed.compute_hashes(
        hashers={'phashframewise': phash_framewise_hasher})

    guid = hashes._df.guid.iloc[0]
    df = hashes._df[hashes._df['guid'] == guid]
    clip1s = df[(df.transform_name == 'clip1s')]
    noop = df[(df.transform_name == 'noop')]
    blackpad = df[(df.transform_name == 'blackpad')]
    slideshow = df[(df.transform_name == 'slideshow')]

    # We should have dropped two hashes from the beginning
    # on the clipped video.
    assert len(clip1s) == len(noop) - 2
github thorn-oss / perception / tests / test_benchmarking.py View on Github external
def test_deduplicate():
    tempdir = tempfile.TemporaryDirectory()
    new_file = os.path.join(tempdir.name, 'dup_file.jpg')
    shutil.copy(files[0], new_file)
    duplicated_files = files + [new_file]
    deduplicated, duplicates = benchmarking.BenchmarkImageDataset.from_tuples(
        [(fn, i % 2) for i, fn in enumerate(duplicated_files)]).deduplicate(
            hasher=hashers.AverageHash(), threshold=1e-2)
    assert len(duplicates) == 1
    assert len(deduplicated._df) == len(files)