How to use the pdf2image.convert_from_path function in pdf2image

To help you get started, we’ve selected a few pdf2image examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Belval / pdf2image / tests.py View on Github external
def test_conversion_from_path_using_dir_14(self):
        start_time = time.time()
        with TemporaryDirectory() as path:
            images_from_path = convert_from_path(
                "./tests/test_14.pdf", output_folder=path
            )
            self.assertTrue(len(images_from_path) == 14)
            [im.close() for im in images_from_path]
        print(
            "test_conversion_from_path_using_dir_14: {} sec".format(
                (time.time() - start_time) / 14.0
            )
github Belval / pdf2image / tests.py View on Github external
def test_pdfinfo_not_installed_throws(self):
        start_time = time.time()
        try:
            images_from_path = convert_from_path("./tests/test_14.pdf")
            raise Exception("This should not happen")
        except PDFInfoNotInstalledError as ex:
            pass

        print(
            "test_pdfinfo_not_installed_throws: {} sec".format(
                (time.time() - start_time) / 14.0
            )
github Belval / pdf2image / tests.py View on Github external
def test_conversion_from_path_using_dir_14_first_page_12(self):
        start_time = time.time()
        with TemporaryDirectory() as path:
            images_from_path = convert_from_path(
                "./tests/test_14.pdf", output_folder=path, first_page=12
            )
            self.assertTrue(len(images_from_path) == 3)
            [im.close() for im in images_from_path]
        print(
            "test_conversion_from_path_using_dir_14_first_page_12: {} sec".format(
                (time.time() - start_time) / 14.0
            )
github Belval / pdf2image / tests.py View on Github external
def test_conversion_from_path_14_first_page_12_last_page_1(self):
        start_time = time.time()
        images_from_path = convert_from_path(
            "./tests/test_14.pdf", first_page=12, last_page=1
        )
        self.assertTrue(len(images_from_path) == 0)
        print(
            "test_conversion_from_path_14: {} sec".format(
                (time.time() - start_time) / 14.0
            )
github Belval / pdf2image / tests.py View on Github external
def test_conversion_from_pathlib_path_14(self):
        start_time = time.time()
        images_from_path = convert_from_path(pathlib.Path("./tests/test_14.pdf"))
        self.assertTrue(len(images_from_path) == 14)
        print(
            "test_conversion_from_pathlib_path_14: {} sec".format(
                (time.time() - start_time) / 14.0
            )
github Belval / pdf2image / tests.py View on Github external
def test_conversion_from_path_14_last_page_12(self):
        start_time = time.time()
        images_from_path = convert_from_path("./tests/test_14.pdf", last_page=12)
        self.assertTrue(len(images_from_path) == 12)
        print(
            "test_conversion_from_path_14_last_page_12: {} sec".format(
                (time.time() - start_time) / 14.0
            )
github Belval / pdf2image / tests.py View on Github external
def test_conversion_from_path_14_with_4_threads(self):
        start_time = time.time()
        images_from_path = convert_from_path("./tests/test_14.pdf", thread_count=4)
        self.assertTrue(len(images_from_path) == 14)
        print(
            "test_conversion_from_path_14_with_4_thread: {} sec".format(
                (time.time() - start_time) / 14.0
            )
github MaliParag / TFD-ICDAR2019 / Task3_Detection / convert_pdf_to_image.py View on Github external
:param pdf_dir: Directory with PDF files
    :param output_dir: Output directory for storing image files for each PDF file
    :return: None
    '''
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    pdf_files = []
    for _, _, fileList in os.walk(pdf_dir):
        pdf_files.extend(fileList)
        break
    for pdf_file in pdf_files:
        pdf_name = pdf_file.split(".pdf")[0]
        output_path = os.path.join(output_dir, pdf_name)
        if not os.path.exists(output_path):
            os.makedirs(output_path)
        pages = convert_from_path(os.path.join(pdf_dir, pdf_file), 600)
        for i in range(len(pages)):
            pages[i].save(os.path.join(output_path, str(i + 1) + ".png"), 'PNG')
github TomAnthony / pdf-to-powerpoint / convert.py View on Github external
pdf_file = sys.argv[1]
print()
print("Converting file: " + pdf_file)
print()

# Prep presentation
prs = Presentation()
blank_slide_layout = prs.slide_layouts[6]

# Create working folder
base_name = pdf_file.split(".pdf")[0]

# Convert PDF to list of images
print("Starting conversion...")
slideimgs = convert_from_path(pdf_file, 300, fmt='ppm', thread_count=2)
print("...complete.")
print()

# Loop over slides
for i, slideimg in enumerate(slideimgs):
	if i % 10 == 0:
		print("Saving slide: " + str(i))

	imagefile = BytesIO()
	slideimg.save(imagefile, format='tiff')
	imagedata = imagefile.getvalue()
	imagefile.seek(0)
	width, height = slideimg.size

	# Set slide dimensions
	prs.slide_height = height * 9525
github coreybobco / generativepoetry-py / generativepoetry / pdf.py View on Github external
def generate_png(self, input_filepath=None):
        pages = convert_from_path(input_filepath)
        for page in pages:
            page.save(f'{input_filepath[:-3]}png', 'PNG')