How to use the pikepdf.open function in pikepdf

To help you get started, we’ve selected a few pikepdf examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github jbarlow83 / OCRmyPDF / tests / test_graft.py View on Github external
def test_no_glyphless_graft(resources, outdir):
    pdf = pikepdf.open(resources / 'francais.pdf')
    pdf_aspect = pikepdf.open(resources / 'aspect.pdf')
    pdf_cmyk = pikepdf.open(resources / 'cmyk.pdf')
    pdf.pages.extend(pdf_aspect.pages)
    pdf.pages.extend(pdf_cmyk.pages)
    pdf.save(outdir / 'test.pdf')

    with patch('ocrmypdf._graft.MAX_REPLACE_PAGES', 2):
        ocrmypdf.ocr(
            outdir / 'test.pdf', outdir / 'out.pdf', deskew=True, tesseract_timeout=0
        )
github pikepdf / pikepdf / tests / test_encrypt.py View on Github external
def test_encrypt_info(trivial, outpdf):
    trivial.save(outpdf, encryption=dict(R=4, owner='foo', user='bar'))
    pdf = pikepdf.open(outpdf, password='foo')
    assert pdf.encryption.user_password == b'bar'
    assert pdf.encryption.bits == 128
github jbarlow83 / OCRmyPDF / tests / test_ghostscript.py View on Github external
def linn(resources):
    path = resources / 'linn.pdf'
    return path, pikepdf.open(path)
github jbarlow83 / OCRmyPDF / tests / test_metadata.py View on Github external
def test_preserve_metadata(spoof_tesseract_noop, output_type, resources, outpdf):
    pdf_before = pikepdf.open(resources / 'graph.pdf')

    output = check_ocrmypdf(
        resources / 'graph.pdf',
        outpdf,
        '--output-type',
        output_type,
        env=spoof_tesseract_noop,
    )

    pdf_after = pikepdf.open(output)

    for key in ('/Title', '/Author'):
        assert pdf_before.docinfo[key] == pdf_after.docinfo[key]

    pdfa_info = file_claims_pdfa(str(output))
    assert pdfa_info['output'] == output_type
github pikepdf / pikepdf / tests / test_pdf.py View on Github external
def test_with_block_abuse(resources):
    with pikepdf.open(resources / 'pal-1bit-trivial.pdf') as pdf:
        im0 = pdf.pages[0].Resources.XObject['/Im0']
    with pytest.raises(PdfError):
        im0.read_bytes()
github jbarlow83 / OCRmyPDF / tests / test_graft.py View on Github external
def test_no_glyphless_graft(resources, outdir):
    pdf = pikepdf.open(resources / 'francais.pdf')
    pdf_aspect = pikepdf.open(resources / 'aspect.pdf')
    pdf_cmyk = pikepdf.open(resources / 'cmyk.pdf')
    pdf.pages.extend(pdf_aspect.pages)
    pdf.pages.extend(pdf_cmyk.pages)
    pdf.save(outdir / 'test.pdf')

    with patch('ocrmypdf._graft.MAX_REPLACE_PAGES', 2):
        ocrmypdf.ocr(
            outdir / 'test.pdf', outdir / 'out.pdf', deskew=True, tesseract_timeout=0
        )
github pikepdf / pikepdf / tests / test_encrypt.py View on Github external
def test_encrypt_basic(trivial, outpdf, R, owner, user):
    trivial.save(outpdf, encryption=dict(R=R, owner=owner, user=user))
    pdf_owner = pikepdf.open(outpdf, password=owner)
    assert pdf_owner.is_encrypted
    pdf_user = pikepdf.open(outpdf, password=user)
    assert pdf_user.is_encrypted
github jbarlow83 / OCRmyPDF / src / ocrmypdf / _pipeline.py View on Github external
def metadata_fixup(working_file, context):
    output_file = context.get_path('metafix.pdf')
    options = context.options
    original = pikepdf.open(context.origin)
    docinfo = get_docinfo(original, options)
    pdf = pikepdf.open(working_file)
    with pdf.open_metadata() as meta:
        meta.load_from_docinfo(docinfo, delete_missing=False)
        # If xmp:CreateDate is missing, set it to the modify date to
        # match Ghostscript, for consistency
        if 'xmp:CreateDate' not in meta:
            meta['xmp:CreateDate'] = meta.get('xmp:ModifyDate', '')

        meta_original = original.open_metadata()
        not_copied = set(meta_original.keys()) - set(meta.keys())
        if not_copied:
            if options.output_type.startswith('pdfa'):
                context.log.warning(
                    "Some input metadata could not be copied because it is not "
                    "permitted in PDF/A. You may wish to examine the output "
                    "PDF's XMP metadata."
                )
github jbarlow83 / OCRmyPDF / src / ocrmypdf / optimize.py View on Github external
input_size = Path(input_file).stat().st_size
    output_size = Path(target_file).stat().st_size
    if output_size == 0:
        raise OutputFileAccessError(
            f"Output file not created after optimizing. We probably ran "
            f"out of disk space in the temporary folder: {tempfile.gettempdir()}."
        )
    ratio = input_size / output_size
    savings = 1 - output_size / input_size
    log.info(f"Optimize ratio: {ratio:.2f} savings: {(100 * savings):.1f}%")

    if savings < 0:
        log.info("Image optimization did not improve the file - discarded")
        # We still need to save the file
        with pikepdf.open(input_file) as pike:
            pike.remove_unreferenced_resources()
            pike.save(output_file, **save_settings)
    else:
        safe_symlink(target_file, output_file)