How to use the pdfplumber.utils.collate_chars function in pdfplumber

To help you get started, we’ve selected a few pdfplumber examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github jsvine / pdfplumber / tests / test-la-precinct-bulletin-2014-p1.py View on Github external
def parse_results_line(chars):
    _left = chars[chars["x0rel"] < 125]
    left = collate_chars(_left) if len(_left) else None
    _right = chars[(chars["x0rel"] > 155)]
    right = int(collate_chars(_right)) if len(_right) else None
    _mid = chars[(chars["x0rel"] > 125) & (chars["x0rel"] < 155)]
    mid = collate_chars(_mid) if len(_mid) else None
    return { "text": left, "aff": mid, "votes": right }
github jsvine / pdfplumber / tests / test-la-precinct-bulletin-2014-p1.py View on Github external
def parse_results_line(chars):
    _left = chars[chars["x0rel"] < 125]
    left = collate_chars(_left) if len(_left) else None
    _right = chars[(chars["x0rel"] > 155)]
    right = int(collate_chars(_right)) if len(_right) else None
    _mid = chars[(chars["x0rel"] > 125) & (chars["x0rel"] < 155)]
    mid = collate_chars(_mid) if len(_mid) else None
    return { "text": left, "aff": mid, "votes": right }
github jsvine / pdfplumber / tests / test-la-precinct-bulletin-2014-p1.py View on Github external
def registered_voters(self):
        h2_chars = within_bbox(self.chars, self.bboxes["h2"])
        txt = h2_chars.groupby("top").apply(collate_chars).iloc[1]
        return int(re.match(r"(\d+) REGISTERED VOTERS", txt).group(1))
github jsvine / pdfplumber / tests / test-la-precinct-bulletin-2014-p1.py View on Github external
def parse_results_line(chars):
    _left = chars[chars["x0rel"] < 125]
    left = collate_chars(_left) if len(_left) else None
    _right = chars[(chars["x0rel"] > 155)]
    right = int(collate_chars(_right)) if len(_right) else None
    _mid = chars[(chars["x0rel"] > 125) & (chars["x0rel"] < 155)]
    mid = collate_chars(_mid) if len(_mid) else None
    return { "text": left, "aff": mid, "votes": right }
github jsvine / pdfplumber / tests / test-nics-background-checks-2015-11.py View on Github external
return int(x.replace(",", ""))

        def parse_row(row):
            return dict((COLUMNS[i], parse_value(i, v))
                for i, v in enumerate(row))

        parsed_table = [ parse_row(row) for row in table ]

        # [1:] because first column is state name
        for c in COLUMNS[1:]:
            total = parsed_table[-1][c]
            colsum = sum(row[c] or 0 for row in parsed_table)
            assert(colsum == (total * 2))

        month_chars = within_bbox(page.chars, (0, 35, self.PDF_WIDTH, 65))
        month_text = collate_chars(month_chars)
        assert(month_text == "November - 2015")
github jsvine / pdfplumber / tests / test-nics-background-checks-2015-11.py View on Github external
def parse_value(x):
            if pd.isnull(x) or x == "": return None
            return int(x.replace(",", ""))

        table.columns = COLUMNS
        table[table.columns[1:]] = table[table.columns[1:]].applymap(parse_value)

        # [1:] because first column is state name
        for c in COLUMNS[1:]:
            total = table[c].iloc[-1]
            colsum = table[c].sum()
            assert(colsum == (total * 2))

        month_chars = within_bbox(page.chars, (0, 35, self.PDF_WIDTH, 65))
        month_text = collate_chars(month_chars)
        assert(month_text == "November - 2015")