How to use pdfplumber - 10 common examples

To help you get started, we’ve selected a few pdfplumber examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github jsvine / pdfplumber / tests / test-la-precinct-bulletin-2014-p1.py View on Github external
def precinct(self):
        h1_left = list(self.bboxes["h1"])
        h1_left[-2] = float(h1_left[-2]) / 2
        h1_left_chars = intersects_bbox(self.chars, h1_left)
        txt = h1_left_chars.groupby("top").apply(collate_chars).iloc[-1]
        p_id = "|".join(re.split(r"\s{2,}", txt)[1:3])
        return p_id
github jsvine / pdfplumber / tests / test-basics.py View on Github external
def test_password(self):
        path = os.path.join(HERE, "pdfs/password-example.pdf")
        pdf = pdfplumber.open(path, password = "test")
        assert(len(pdf.chars) > 0)
        pdf.close()
github jsvine / pdfplumber / tests / test-ca-warn-report.py View on Github external
def test_pandas(self):

        rect_x0_clusters = utils.cluster_list([ r["x0"]
            for r in self.pdf.pages[1].rects ], tolerance=3)

        v_lines = [ x[0] for x in rect_x0_clusters ]

        def parse_page(page):
            data = page.extract_table({
                "vertical_strategy": "explicit",
                "explicit_vertical_lines": v_lines
            })
            without_spaces = [ fix_row_spaces(row) for row in data ]
            return without_spaces

        parsed = parse_page(self.pdf.pages[0])

        assert(parsed[0] == [
            "NoticeDate",
github jsvine / pdfplumber / tests / test-basics.py View on Github external
def test_rotation(self):
        rotated = pdfplumber.from_path(
            os.path.join(HERE, "pdfs/nics-background-checks-2015-11-rotated.pdf")
        )
        assert(self.pdf.pages[0].width == 1008)
        assert(self.pdf.pages[0].height == 612)

        assert(rotated.pages[0].width == 612)
        assert(rotated.pages[0].height == 1008)

        assert(rotated.pages[0].cropbox == self.pdf.pages[0].cropbox)
        assert(rotated.pages[0].bbox != self.pdf.pages[0].bbox)
github jsvine / pdfplumber / tests / test-issues.py View on Github external
def test_issue_21(self):
        pdf = pdfplumber.from_path(
            os.path.join(HERE, "pdfs/150109DSP-Milw-505-90D.pdf")
        )
        len(pdf.objects)
github jsvine / pdfplumber / tests / test-issues.py View on Github external
def test_issue_14(self):
        pdf = pdfplumber.from_path(
            os.path.join(HERE, "pdfs/cupertino_usd_4-6-16.pdf")
        )
        len(pdf.objects)
github jsvine / pdfplumber / tests / test-ca-warn-report.py View on Github external
def setUp(self):
        path = os.path.join(HERE, "pdfs/WARN-Report-for-7-1-2015-to-03-25-2016.pdf")
        self.pdf = pdfplumber.from_path(path)
        self.PDF_WIDTH = self.pdf.pages[0].width
github jsvine / pdfplumber / tests / test-nics-background-checks-2015-11.py View on Github external
def setUp(self):
        path = os.path.join(HERE, "pdfs/nics-background-checks-2015-11.pdf")
        self.pdf = pdfplumber.from_path(path)
        self.PDF_WIDTH = self.pdf.pages[0].width
github jsvine / pdfplumber / tests / test-la-precinct-bulletin-2014-p1.py View on Github external
def setUp(self):
        path = os.path.join(HERE, "pdfs/la-precinct-bulletin-2014-p1.pdf")
        self.pdf = pdfplumber.from_path(path)
        self.PDF_WIDTH = self.pdf.pages[0].width
github jsvine / pdfplumber / tests / test-basics.py View on Github external
def setUp(self):
        path = os.path.join(HERE, "pdfs/nics-background-checks-2015-11.pdf")
        self.pdf = pdfplumber.from_path(path)