Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_skip_pages_does_not_replicate(resources, basename, outdir):
infile = resources / basename
outpdf = outdir / basename
check_ocrmypdf(
infile,
outpdf,
'--pdf-renderer',
'sandwich',
'--force-ocr',
'--tesseract-timeout',
'0',
)
info_in = pdfinfo.PdfInfo(infile)
info = pdfinfo.PdfInfo(outpdf)
for page in info:
assert len(page.images) == 1, "skipped page was replicated"
for n in range(len(info_in)):
assert info[n].width_inches == info_in[n].width_inches
def test_pickle(resources):
# For multiprocessing we must be able to pickle our information - if
# this fails then we are probably storing some unpickleabe pikepdf or
# other external data around
filename = resources / 'graph_ocred.pdf'
pdf = pdfinfo.PdfInfo(filename)
pickle.dumps(pdf)
def test_content_preservation(resources, outpdf):
infile = resources / 'masks.pdf'
check_ocrmypdf(
infile, outpdf, '--pdf-renderer', 'sandwich', '--tesseract-timeout', '0'
)
info = pdfinfo.PdfInfo(outpdf)
page = info[0]
assert len(page.images) > 1, "masks were rasterized"
def test_single_page_text(outdir):
filename = outdir / 'text.pdf'
pdf = Canvas(str(filename), pagesize=(8 * 72, 6 * 72))
text = pdf.beginText()
text.setFont('Helvetica', 12)
text.setTextOrigin(1 * 72, 3 * 72)
text.textLine(
"Methink'st thou art a general offence and every" " man should beat thee."
)
pdf.drawText(text)
pdf.showPage()
pdf.save()
info = pdfinfo.PdfInfo(filename)
assert len(info) == 1
page = info[0]
assert page.has_text
assert len(page.images) == 0