How to use the pdfplumber.utils.filter_edges function in pdfplumber

To help you get started, we’ve selected a few pdfplumber examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github jsvine / pdfplumber / pdfplumber / table.py View on Github external
}
            else:
                edge = {
                    "x0": desc,
                    "x1": desc,
                    "top": self.page.bbox[1],
                    "bottom": self.page.bbox[3],
                }
            edge["height"] = edge["bottom"] - edge["top"]
            edge["orientation"] = "v"
            return edge

        v_explicit = list(map(v_edge_desc_to_edge, settings["explicit_vertical_lines"]))

        if  v_strat == "lines":
            v_base = utils.filter_edges(self.page.edges, "v")
        elif v_strat == "lines_strict":
            v_base = utils.filter_edges(self.page.edges, "v",
                edge_type="lines")
        elif v_strat == "text":
            v_base = words_to_edges_v(words,
                word_threshold=settings["min_words_vertical"])
        elif v_strat == "explicit":
            v_base = []

        v = v_base + v_explicit
        
        def h_edge_desc_to_edge(desc):
            if isinstance(desc, dict):
                edge = {
                    "x0": desc.get("x0", self.page.bbox[0]),
                    "x1": desc.get("x1", self.page.bbox[2]),
github jsvine / pdfplumber / pdfplumber / table.py View on Github external
}
            else:
                edge = {
                    "x0": self.page.bbox[0],
                    "x1": self.page.bbox[2],
                    "top": desc,
                    "bottom": desc,
                }
            edge["width"] = edge["x1"] - edge["x0"]
            edge["orientation"] = "h"
            return edge

        h_explicit = list(map(h_edge_desc_to_edge, settings["explicit_horizontal_lines"]))

        if  h_strat == "lines":
            h_base = utils.filter_edges(self.page.edges, "h")
        elif h_strat == "lines_strict":
            h_base = utils.filter_edges(self.page.edges, "h",
                edge_type="lines")
        elif h_strat == "text":
            h_base = words_to_edges_h(words,
                word_threshold=settings["min_words_horizontal"])
        elif h_strat == "explicit":
            h_base = []

        h = h_base + h_explicit

        edges = list(v) + list(h)
        if settings["snap_tolerance"] > 0 or settings["join_tolerance"] > 0:
            edges = merge_edges(edges,
                snap_tolerance=settings["snap_tolerance"],
                join_tolerance=settings["join_tolerance"],
github jsvine / pdfplumber / pdfplumber / table.py View on Github external
edge = {
                    "x0": desc,
                    "x1": desc,
                    "top": self.page.bbox[1],
                    "bottom": self.page.bbox[3],
                }
            edge["height"] = edge["bottom"] - edge["top"]
            edge["orientation"] = "v"
            return edge

        v_explicit = list(map(v_edge_desc_to_edge, settings["explicit_vertical_lines"]))

        if  v_strat == "lines":
            v_base = utils.filter_edges(self.page.edges, "v")
        elif v_strat == "lines_strict":
            v_base = utils.filter_edges(self.page.edges, "v",
                edge_type="lines")
        elif v_strat == "text":
            v_base = words_to_edges_v(words,
                word_threshold=settings["min_words_vertical"])
        elif v_strat == "explicit":
            v_base = []

        v = v_base + v_explicit
        
        def h_edge_desc_to_edge(desc):
            if isinstance(desc, dict):
                edge = {
                    "x0": desc.get("x0", self.page.bbox[0]),
                    "x1": desc.get("x1", self.page.bbox[2]),
                    "top": desc.get("top", desc.get("bottom")),
                    "bottom": desc.get("bottom", desc.get("top")),
github jsvine / pdfplumber / pdfplumber / table.py View on Github external
edge_type="lines")
        elif h_strat == "text":
            h_base = words_to_edges_h(words,
                word_threshold=settings["min_words_horizontal"])
        elif h_strat == "explicit":
            h_base = []

        h = h_base + h_explicit

        edges = list(v) + list(h)
        if settings["snap_tolerance"] > 0 or settings["join_tolerance"] > 0:
            edges = merge_edges(edges,
                snap_tolerance=settings["snap_tolerance"],
                join_tolerance=settings["join_tolerance"],
            )
        return utils.filter_edges(edges,
            min_length=settings["edge_min_length"])
github jsvine / pdfplumber / pdfplumber / table.py View on Github external
edge = {
                    "x0": self.page.bbox[0],
                    "x1": self.page.bbox[2],
                    "top": desc,
                    "bottom": desc,
                }
            edge["width"] = edge["x1"] - edge["x0"]
            edge["orientation"] = "h"
            return edge

        h_explicit = list(map(h_edge_desc_to_edge, settings["explicit_horizontal_lines"]))

        if  h_strat == "lines":
            h_base = utils.filter_edges(self.page.edges, "h")
        elif h_strat == "lines_strict":
            h_base = utils.filter_edges(self.page.edges, "h",
                edge_type="lines")
        elif h_strat == "text":
            h_base = words_to_edges_h(words,
                word_threshold=settings["min_words_horizontal"])
        elif h_strat == "explicit":
            h_base = []

        h = h_base + h_explicit

        edges = list(v) + list(h)
        if settings["snap_tolerance"] > 0 or settings["join_tolerance"] > 0:
            edges = merge_edges(edges,
                snap_tolerance=settings["snap_tolerance"],
                join_tolerance=settings["join_tolerance"],
            )
        return utils.filter_edges(edges,