How to use the ocrmypdf.exec.ghostscript.rasterize_pdf function in ocrmypdf

To help you get started, we’ve selected a few ocrmypdf examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github jbarlow83 / OCRmyPDF / tests / test_optimize.py View on Github external
def test_mono_not_inverted(resources, outdir):
    infile = resources / '2400dpi.pdf'
    opt.main(infile, outdir / 'out.pdf', level=3)

    rasterize_pdf(
        outdir / 'out.pdf',
        outdir / 'im.png',
        xres=10,
        yres=10,
        raster_device='pnggray',
        log=logging.getLogger(name='test_mono_not_inverted'),
    )

    with Image.open(fspath(outdir / 'im.png')) as im:
        assert im.getpixel((0, 0)) == 255, "Expected white background"
github jbarlow83 / OCRmyPDF / tests / test_ghostscript.py View on Github external
def test_rasterize_size(linn, outdir, caplog):
    path, pdf = linn
    page_size_pts = (pdf.pages[0].MediaBox[2], pdf.pages[0].MediaBox[3])
    assert pdf.pages[0].MediaBox[0] == pdf.pages[0].MediaBox[1] == 0
    page_size = (page_size_pts[0] / Decimal(72), page_size_pts[1] / Decimal(72))
    target_size = Decimal('200.0'), Decimal('150.0')
    target_dpi = 42.0, 4242.0

    log = logging.getLogger()
    rasterize_pdf(
        path,
        outdir / 'out.png',
        target_size[0] / page_size[0],
        target_size[1] / page_size[1],
        raster_device='pngmono',
        log=log,
        page_dpi=target_dpi,
    )

    with Image.open(outdir / 'out.png') as im:
        assert im.size == target_size
        assert im.info['dpi'] == target_dpi
github jbarlow83 / OCRmyPDF / tests / test_rotation.py View on Github external
def rasterize(pdf, pageno, png):
        if png.exists():
            print(png)
            return
        ghostscript.rasterize_pdf(
            pdf,
            png,
            xres=100,
            yres=100,
            raster_device='pngmono',
            log=gslog,
            pageno=pageno,
            rotation=0,
        )
github jbarlow83 / OCRmyPDF / tests / test_ghostscript.py View on Github external
def test_rasterize_rotated(linn, outdir, caplog):
    path, pdf = linn
    page_size_pts = (pdf.pages[0].MediaBox[2], pdf.pages[0].MediaBox[3])
    assert pdf.pages[0].MediaBox[0] == pdf.pages[0].MediaBox[1] == 0
    page_size = (page_size_pts[0] / Decimal(72), page_size_pts[1] / Decimal(72))
    target_size = Decimal('200.0'), Decimal('150.0')
    target_dpi = 42.0, 4242.0

    log = logging.getLogger()
    caplog.set_level(logging.DEBUG)
    rasterize_pdf(
        path,
        outdir / 'out.png',
        target_size[0] / page_size[0],
        target_size[1] / page_size[1],
        raster_device='pngmono',
        log=log,
        page_dpi=target_dpi,
        rotation=90,
    )

    with Image.open(outdir / 'out.png') as im:
        assert im.size == (target_size[1], target_size[0])
        assert im.info['dpi'] == (target_dpi[1], target_dpi[0])