How to use the ocrmypdf.exec.tesseract function in ocrmypdf

To help you get started, we’ve selected a few ocrmypdf examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github jbarlow83 / OCRmyPDF / tests / test_rotation.py View on Github external
def test_tesseract_orientation(resources, tmp_path):
    pix = leptonica.Pix.open(resources / 'crom.png')
    pix_rotated = pix.rotate_orth(2)  # 180 degrees clockwise
    pix_rotated.write_implied_format(tmp_path / '000001.png')

    log = logging.getLogger()
    tesseract.get_orientation(  # Test results of this are unreliable
        tmp_path / '000001.png', engine_mode='3', timeout=10, log=log
    )
github jbarlow83 / OCRmyPDF / tests / test_main.py View on Github external
@pytest.mark.skipif(not tesseract.has_user_words(), reason='not functional until 4.1.0')
def test_user_words_ocr(resources, outdir):
    # Does not actually test if --user-words causes output to differ
    word_list = outdir / 'wordlist.txt'
    sidecar_after = outdir / 'sidecar.txt'

    with word_list.open('w') as f:
        f.write('cromulent\n')  # a perfectly cromulent word

    check_ocrmypdf(
        resources / 'crom.png',
        outdir / 'out.pdf',
        '--image-dpi',
        150,
        '--sidecar',
        sidecar_after,
        '--user-words',
github jbarlow83 / OCRmyPDF / tests / test_tess3.py View on Github external
@pytest.mark.skipif(tesseract.has_textonly_pdf(),
                    reason="check that missing dep is reported on old tess3")
def test_textonly_pdf_on_older_tess3(resources, no_outpdf):
    p, _, _ = pytest.helpers.run_ocrmypdf(
        resources / 'linn.pdf',
        no_outpdf, '--pdf-renderer', 'sandwich')

    assert p.returncode == ExitCode.missing_dependency