How to use the pdf2image.convert_from_bytes function in pdf2image

To help you get started, we’ve selected a few pdf2image examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Belval / pdf2image / tests.py View on Github external
def test_conversion_from_bytes_14_last_page_12(self):
        start_time = time.time()
        with open("./tests/test_14.pdf", "rb") as pdf_file:
            images_from_bytes = convert_from_bytes(pdf_file.read(), last_page=12)
            self.assertTrue(len(images_from_bytes) == 12)
        print(
            "test_conversion_from_bytes_14_last_page_12: {} sec".format(
                (time.time() - start_time) / 14.0
            )
github Belval / pdf2image / tests.py View on Github external
def test_not_locked_pdf(self):
        start_time = time.time()
        with TemporaryDirectory() as path:
            with open("./tests/test.pdf", "rb") as pdf_file:
                images_from_bytes = convert_from_bytes(
                    pdf_file.read(), output_folder=path, fmt=".jpg", userpw="pdf2image"
                )
                self.assertTrue(len(images_from_bytes) == 1)
                [im.close() for im in images_from_bytes]
        print(
            "test_locked_pdf_with_userpw_only: {} sec".format(time.time() - start_time)
        )
github Belval / pdf2image / tests.py View on Github external
def test_conversion_from_bytes_14(self):
        start_time = time.time()
        with open("./tests/test_14.pdf", "rb") as pdf_file:
            images_from_bytes = convert_from_bytes(pdf_file.read())
            self.assertTrue(len(images_from_bytes) == 14)
        print(
            "test_conversion_from_bytes_14: {} sec".format(
                (time.time() - start_time) / 14.0
            )
github Belval / pdf2image / tests.py View on Github external
def test_conversion_from_bytes_using_dir_241(self):  # pragma: no cover
        start_time = time.time()
        with TemporaryDirectory() as path:
            with open("./tests/test_241.pdf", "rb") as pdf_file:
                images_from_bytes = convert_from_bytes(
                    pdf_file.read(), output_folder=path
                )
                self.assertTrue(len(images_from_bytes) == 241)
                [im.close() for im in images_from_bytes]
        print(
            "test_conversion_from_bytes_using_dir_241: {} sec".format(
                (time.time() - start_time) / 241.0
            )
github Belval / pdf2image / tests.py View on Github external
def test_conversion_from_bytes_with_quality_and_progressive_and_optimize(self):
        start_time = time.time()
        with open("./tests/test.pdf", "rb") as pdf_file:
            images_from_bytes = convert_from_bytes(
                pdf_file.read(),
                fmt="jpg",
                jpegopt={"quality": 100, "progressive": True, "optimize": True},
            )
            self.assertTrue(len(images_from_bytes) == 1)
        print(
            "test_conversion_from_bytes_with_quality_and_progressive_and_optimize: {} sec".format(
                time.time() - start_time
            )
github Belval / pdf2image / tests.py View on Github external
def test_conversion_to_grayscale_from_bytes(self):
        start_time = time.time()
        with open("./tests/test_14.pdf", "rb") as pdf_file:
            images_from_bytes = convert_from_bytes(pdf_file.read(), grayscale=True)
            self.assertTrue(images_from_bytes[0].mode == "L")
        print(
            "test_conversion_to_grayscale_from_bytes_14: {} sec".format(
                (time.time() - start_time) / 14.0
            )
github dbrainio / wrappa / wrappa / resources / predict.py View on Github external
tmp = obj_url.split('/')[-1].split('?')
            if len(tmp) <= 1:
                filename = ''.join(tmp)
            else:
                filename = ''.join(tmp[:-1])
        content_type = key.split('-')[0]
        ext = filename.split('.')[-1]
        if filename is not None and buf is not None:
            if content_type == 'image':
                try:
                    buf.seek(0)
                    _ = Image.open(buf)
                    return [WrappaImage(
                        payload=buf.getvalue(), ext=ext, name=filename)]
                except:
                    imgs = convert_from_bytes(buf.getvalue())
                    for i, img in enumerate(imgs):
                        buf = io.BytesIO()
                        img.save(buf, format='JPEG')
                        buf.flush()
                        imgs[i] = WrappaImage(
                            payload=buf.getvalue(),
                            ext='jpeg',
                            name='{}-{}.jpeg'.format(
                                filename.split('.')[0], str(i)
                            )
                        )
                    return imgs
            data = {
                'payload': buf.getvalue(),
                'ext': ext,
                'name': filename
github pkorus / neural-imaging / helpers / results_data.py View on Github external
\end{preview}
        \end{document}
        """.replace('[]', latex)

    pdf = build_pdf(latex)
    
    if format == 'file':
        filename = filename or '/tmp/{}.pdf'.format(''.join(np.random.choice(list('abcdef'), 10, replace=True)))
        
        if filename.endswith('.pdf'):
            with open(filename, 'wb') as f:
                f.write(pdf.data)
            
        elif filename.endswith('.png'):
            from pdf2image import convert_from_bytes
            image = convert_from_bytes(pdf.data)
            imageio.imwrite(filename, image)
        
        return filename
    
    elif format == 'bytes':
        return pdf
    
    elif format == 'array':
        from pdf2image import convert_from_bytes
        return np.array(convert_from_bytes(pdf.data)[0])
    
    elif format == 'fig':
        from pdf2image import convert_from_bytes
        from matplotlib.figure import Figure
        dpi, scale = 300, 0.75
        image = np.array(convert_from_bytes(pdf.data, dpi=dpi)[0])
github algoo / preview-generator / preview_generator / preview / builder / image__wand.py View on Github external
def convert_pdf_to_jpeg(pdf: typing.IO[bytes], preview_size: ImgDims) -> BytesIO:

    pdf_content = pdf.read()
    images = convert_from_bytes(pdf_content)

    output = BytesIO()
    for image in images:
        resize_dims = compute_resize_dims(ImgDims(image.width, image.height), preview_size)
        resized = image.resize((resize_dims.width, resize_dims.height), resample=True)
        resized.save(output, format="JPEG")

    output.seek(0, 0)
    return output
github dothealth / pdf-utils / server / app.py View on Github external
def process_hocr_pdf(savepath, bind=True):
    def convert_png(png):
        ocr_output = pytesseract.image_to_pdf_or_hocr(png, extension='pdf')
        with open(png + '.pdf', 'wb') as f:
            f.write(ocr_output)
            return png + '.pdf'

    with open(savepath, 'rb') as inputfile:
        basename = os.path.basename(savepath)
        filename, filetype = os.path.splitext(basename)
        logger.info('processing file ' + savepath)

        with tempfile.TemporaryDirectory() as path:
            pngs = convert_from_bytes(inputfile.read(), dpi=300, output_folder=path)
            pdfs = []
            for i, png in enumerate(pngs):
                png_filename = filename + '_' + str(i + 1) + '.png'
                png.save(os.path.join(path, png_filename))
                pdfs.append(convert_png(os.path.join(path, png_filename)))
            outputfilename = filename + '_hocr.pdf'
            subprocess.call(['pdftk'] + [pdf for pdf in pdfs] + ['cat', 'output', os.path.join('./tmp', outputfilename)])
            logger.info('processed file ' + outputfilename)
            return { 'output': outputfilename }