How to use the pikepdf.PdfImage function in pikepdf

To help you get started, we’ve selected a few pikepdf examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github pikepdf / pikepdf / tests / test_image_access.py View on Github external
def test_jp2(resources):
    pdf = Pdf.open(resources / 'pike-jp2.pdf')
    xobj = next(iter(pdf.pages[0].images.values()))
    pim = PdfImage(xobj)

    assert '/JPXDecode' in pim.filters
    assert pim.colorspace == '/DeviceRGB'
    assert not pim.is_inline
    assert not pim.indexed
    assert pim.mode == 'RGB'
    assert pim.bits_per_component == 8

    outstream = BytesIO()
    pim.extract_to(stream=outstream)
    del pim
    del xobj.ColorSpace

    # If there is no explicit ColorSpace metadata we should get it from the
    # compressed data stream
    pim = PdfImage(xobj)
github pikepdf / pikepdf / tests / test_image_access.py View on Github external
def test_ccitt_encodedbytealign(sandwich):
    xobj, _pdf = sandwich

    # Pretend this is image is "EncodedByteAlign". We don't have a FOSS
    # example of such an image.
    xobj.DecodeParms.EncodedByteAlign = True
    pim = PdfImage(xobj)
    with pytest.raises(UnsupportedImageTypeError):
        pim.as_pil_image()
github pikepdf / pikepdf / tests / test_image_access.py View on Github external
def test_direct_extract(resources, filename, bpc, filters, ext, mode, format_):
    xobj, _pdf = first_image_in(resources / filename)
    pim = PdfImage(xobj)

    assert pim.bits_per_component == bpc
    assert pim.filters == filters

    outstream = BytesIO()
    outext = pim.extract_to(stream=outstream)
    assert outext == ext, 'unexpected output file'
    outstream.seek(0)

    im = Image.open(outstream)
    assert im.mode == mode
    assert im.format == format_
github pikepdf / pikepdf / tests / test_image_access.py View on Github external
def test_icc_extract(resources):
    xobj, _pdf = first_image_in(resources / 'tree-icc.pdf')

    pim = PdfImage(xobj)
    assert pim.as_pil_image().info['icc_profile'] == pim.icc.tobytes()
github pikepdf / pikepdf / tests / test_image_access.py View on Github external
def test_icc_use(resources):
    xobj, _pdf = first_image_in(resources / '1biticc.pdf')

    pim = PdfImage(xobj)
    assert pim.mode == '1'
    assert pim.colorspace == '/ICCBased'
    assert pim.bits_per_component == 1

    assert pim.icc.profile.xcolor_space == 'GRAY'
github pikepdf / pikepdf / tests / test_image_access.py View on Github external
def test_extract_filepath(congress, outdir):
    xobj, _pdf = congress
    pim = PdfImage(xobj)

    # fspath is for Python 3.5
    result = pim.extract_to(fileprefix=fspath(outdir / 'image'))
    assert Path(result).exists()
    assert (outdir / 'image.jpg').exists()
github jbarlow83 / OCRmyPDF / src / ocrmypdf / optimize.py View on Github external
def extract_image_filter(pike, root, log, image, xref):
    if image.Subtype != Name.Image:
        return None
    if image.Length < 100:
        log.debug("Skipping small image, xref %s", xref)
        return None

    pim = pikepdf.PdfImage(image)

    if len(pim.filter_decodeparms) > 1:
        log.debug("Skipping multiply filtered, xref %s", xref)
        return None
    filtdp = pim.filter_decodeparms[0]

    if pim.bits_per_component > 8:
        return None  # Don't mess with wide gamut images

    if filtdp[0] == Name.JPXDecode:
        return None  # Don't do JPEG2000

    if Name.Decode in image:
        return None  # Don't mess with custom Decode tables

    return pim, filtdp