How to use pyocr - 10 common examples

To help you get started, we’ve selected a few pyocr examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github openpaperwork / pyocr / tests / tests_tesseract.py View on Github external
def set_builder(self):
        self._builder = builders.TextBuilder()
github openpaperwork / pyocr / tests / tests_base.py View on Github external
def set_builder(self):
        self._builder = builders.DigitLineBoxBuilder()
github openpaperwork / pyocr / tests / tests_base.py View on Github external
def set_builder(self):
        self._builder = builders.LineBoxBuilder()
github openpaperwork / pyocr / run_tests.py View on Github external
from tests import tests_libtesseract

if __name__ == '__main__':
    for tool in pyocr.TOOLS:
        print("- OCR: %s" % tool.get_name())
        available = tool.is_available()
        print("  is_available(): %s" % (str(available)))
        if available:
            print("  get_version(): %s" % (str(tool.get_version())))
            print("  get_available_languages(): ")
            print("    " + ", ".join(tool.get_available_languages()))
        print("")
    print("")

    print("OCR tool found:")
    for tool in pyocr.get_available_tools():
        print("- %s" % tool.get_name())
    if libtesseract.is_available():
        print("---")
        print("Tesseract C-API:")
        unittest.TextTestRunner().run(tests_libtesseract.get_all_tests())
    if tesseract.is_available():
        print("---")
        print("Tesseract SH:")
        unittest.TextTestRunner().run(tests_tesseract.get_all_tests())
    if cuneiform.is_available():
        print("---")
        print("Cuneiform SH:")
        unittest.TextTestRunner().run(tests_cuneiform.get_all_tests())
github openpaperwork / pyocr / run_tests.py View on Github external
print("  is_available(): %s" % (str(available)))
        if available:
            print("  get_version(): %s" % (str(tool.get_version())))
            print("  get_available_languages(): ")
            print("    " + ", ".join(tool.get_available_languages()))
        print("")
    print("")

    print("OCR tool found:")
    for tool in pyocr.get_available_tools():
        print("- %s" % tool.get_name())
    if libtesseract.is_available():
        print("---")
        print("Tesseract C-API:")
        unittest.TextTestRunner().run(tests_libtesseract.get_all_tests())
    if tesseract.is_available():
        print("---")
        print("Tesseract SH:")
        unittest.TextTestRunner().run(tests_tesseract.get_all_tests())
    if cuneiform.is_available():
        print("---")
        print("Cuneiform SH:")
        unittest.TextTestRunner().run(tests_cuneiform.get_all_tests())
github openpaperwork / pyocr / tests / tests_tesseract.py View on Github external
def test_langs(self):
        langs = tesseract.get_available_languages()
        self.assertTrue("eng" in langs,
                        ("English training does not appear to be installed."
                         " (required for the tests)"))
        self.assertTrue("fra" in langs,
                        ("French training does not appear to be installed."
                         " (required for the tests)"))
        self.assertTrue("jpn" in langs,
                        ("Japanese training does not appear to be installed."
                         " (required for the tests)"))
github openpaperwork / pyocr / tests / tests_tesseract.py View on Github external
def test_can_detect_orientation(self):
        self.assertTrue(tesseract.can_detect_orientation())
github openpaperwork / pyocr / tests / tests_tesseract.py View on Github external
def test_orientation_0(self):
        img = base.Image.open(self._path_to_img("test.png"))
        result = tesseract.detect_orientation(img, lang='eng')
        self.assertEqual(result['angle'], 0)
github openpaperwork / pyocr / tests / tests_tesseract.py View on Github external
def set_builder(self):
        self._builder = tesseract.CharBoxBuilder()
github openpaperwork / pyocr / tests / tests_base.py View on Github external
def _read_from_img(self, image_path, lang=None):
        boxes = tesseract.image_to_string(
            Image.open(image_path),
            lang=lang,
            builder=self._builder
        )
        boxes.sort()

        return boxes