Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_image_to_pdf_or_hocr(test_file, extension):
result = image_to_pdf_or_hocr(test_file, extension=extension)
if extension is 'pdf':
if IS_PYTHON_2:
assert isinstance(result, str)
result = str(result).strip()
assert result.startswith('%PDF')
assert result.endswith('EOF')
else:
assert isinstance(result, bytes)
if extension is 'hocr':
assert isinstance(result, bytes) # type
result = result.decode('utf-8') if IS_PYTHON_2 else str(result, 'utf-8')
result = str(result).strip()
assert result.startswith('')
def _image_to_pdf(path):
image = Image.open(path)
pdf = pytesseract.image_to_pdf_or_hocr(image, extension='pdf')
filename = '.'.join(os.path.basename(path).split('.')[:-1]) + '.pdf'
dirname = os.path.dirname(path)
path = filedialog.asksaveasfilename(title='Save Converted PDF As', defaultextension='.pdf',
initialdir=dirname, initialfile=filename,
filetypes=[('PDF files', '*.pdf'), ('all files', '.*')])
if path == '' or path is None:
return
with open(path, 'wb') as out:
out.write(pdf)
return path
def convert_png(png):
ocr_output = pytesseract.image_to_pdf_or_hocr(png, extension='pdf')
with open(png + '.pdf', 'wb') as f:
f.write(ocr_output)
return png + '.pdf'