How to use tesserocr - 10 common examples

To help you get started, we’ve selected a few tesserocr examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github sirfz / tesserocr / tests / test_api.py View on Github external
def test_layout_getcomponents(self):
        self._api.Init()
        self._api.SetImageFile(self._image_file)
        result = self._api.GetComponentImages(tesserocr.RIL.BLOCK, True)
        # Test if not empty
        self.assertTrue(result)
        _, xywh, _, _ = result[0] # bbox of largest
        self.assertIn('w', xywh)
        self.assertIn('h', xywh)
        area = xywh['w'] * xywh['h']
        # Test if the largest block is quite large
        self.assertGreater(area, 400000)
github sirfz / tesserocr / tests / test_api.py View on Github external
def test_layout_boundingbox(self):
        self._api.Init()
        self._api.SetImageFile(self._image_file)
        layout = self._api.AnalyseLayout()
        # Test if not empty
        self.assertTrue(layout)
        self.assertFalse(layout.Empty(tesserocr.RIL.BLOCK))
        result = layout.BoundingBox(tesserocr.RIL.BLOCK) # bbox of largest
        self.assertIsNot(result, None)
        x0, y0, x1, y1 = result
        area = (x1 - x0) * (y1 - y0)
        # Test if the largest block is quite large
        self.assertGreater(area, 400000)
github sirfz / tesserocr / tests / test_api.py View on Github external
def test_init(self):
        """Test Init calls with different lang and oem."""
        self._api.Init(lang='eng+osd')
        self.assertEqual(self._api.GetInitLanguagesAsString(), 'eng+osd')
        self._api.Init(lang='eng')
        self.assertEqual(self._api.GetInitLanguagesAsString(), 'eng')
        self._api.Init(oem=tesserocr.OEM.TESSERACT_ONLY)
        self.assertEqual(self._api.oem(), tesserocr.OEM.TESSERACT_ONLY)
github sirfz / tesserocr / tests / test_api.py View on Github external
def test_init(self):
        """Test Init calls with different lang and oem."""
        self._api.Init(lang='eng+osd')
        self.assertEqual(self._api.GetInitLanguagesAsString(), 'eng+osd')
        self._api.Init(lang='eng')
        self.assertEqual(self._api.GetInitLanguagesAsString(), 'eng')
        self._api.Init(oem=tesserocr.OEM.TESSERACT_ONLY)
        self.assertEqual(self._api.oem(), tesserocr.OEM.TESSERACT_ONLY)
github UB-Mannheim / ocromore / test_tesserocr.py View on Github external
def check_test(): 
    api.SetImageFile(image)
    api.SetVariable("save_blob_choices", "T")
    api.SetRectangle(37, 228, 548, 31)
    api.Recognize()

    ri = api.GetIterator()
    level = RIL.SYMBOL
    for r in iterate_level(ri, level):
        symbol = r.GetUTF8Text(level)  # r == ri
        conf = r.Confidence(level)
        if symbol:
            print("symbol ", symbol, " confidence", conf)
        indent = False
        ci = r.GetChoiceIterator()
        for c in ci:
            if indent:
                print('\t\t '),
            print('\t- '),
            choice = c.GetUTF8Text()  # c == ci
            print(u'{} conf: {}'.format(choice, c.Confidence()))
            indent = True
        print('---------------------------------------------')
github UB-Mannheim / ocromore / test_tesserocr.py View on Github external
def check_test(): 
    api.SetImageFile(image)
    api.SetVariable("save_blob_choices", "T")
    api.SetRectangle(37, 228, 548, 31)
    api.Recognize()

    ri = api.GetIterator()
    level = RIL.SYMBOL
    for r in iterate_level(ri, level):
        symbol = r.GetUTF8Text(level)  # r == ri
        conf = r.Confidence(level)
        if symbol:
            print("symbol ", symbol, " confidence", conf)
        indent = False
        ci = r.GetChoiceIterator()
        for c in ci:
            if indent:
                print('\t\t '),
            print('\t- '),
            choice = c.GetUTF8Text()  # c == ci
            print(u'{} conf: {}'.format(choice, c.Confidence()))
            indent = True
        print('---------------------------------------------')
github UB-Mannheim / ocromore / test_tesserocr.py View on Github external
def orientation_stuff():
    api2 = PyTessBaseAPI(psm=PSM.OSD_ONLY, path=MY_TESSDATA_PATH) 
    api2.SetImageFile('/home/johannes/Repos/tesseract/testing/eurotext.tif')

    # os = api2.DetectOS()
    os = api2.DetectOrientationScript()  # beide verursachen fehler: 'Speicherzugriffsfehler (Speicherabzug geschrieben)'
    print("Orientation: {orientation}\nOrientation confidence: {oconfidence}\n Script: {script}\nScript confidence: {sconfidence}".format(**os))
github sirfz / tesserocr / tests / test_api.py View on Github external
def setUp(self):
        if pil_installed:
            with open(self._image_file, 'rb') as f:
                self._image = Image.open(f)
                self._image.load()
        self._api = tesserocr.PyTessBaseAPI(init=True)
github sirfz / tesserocr / tests / test_api.py View on Github external
def test_page_seg_mode(self):
        """Test SetPageSegMode and GetPageSegMode."""
        self._api.SetPageSegMode(tesserocr.PSM.SINGLE_WORD)
        self.assertEqual(self._api.GetPageSegMode(), tesserocr.PSM.SINGLE_WORD)
        self._api.SetPageSegMode(tesserocr.PSM.AUTO)
        self.assertEqual(self._api.GetPageSegMode(), tesserocr.PSM.AUTO)
github sirfz / tesserocr / tests / test_api.py View on Github external
def test_detect_os(self):
        """Test DetectOS and DetectOrientationScript (tesseract v4+)."""
        self._api.SetPageSegMode(tesserocr.PSM.OSD_ONLY)
        self._api.SetImageFile(self._image_file)
        orientation = self._api.DetectOS()
        all(self.assertIn(k, orientation) for k in ['sconfidence', 'oconfidence', 'script', 'orientation'])
        self.assertEqual(orientation['orientation'], 0)
        languages = tesserocr.get_languages()[1] # this is sorted alphabetically!
        self.assertLess(orientation['script'], len(languages))
        script_name = languages[orientation['script']] # therefore does not work
        #self.assertEqual(script_name, 'Latin') # cannot test: not reliable
        if _TESSERACT_VERSION >= 0x3999800:
            orientation = self._api.DetectOrientationScript()
            all(self.assertIn(k, orientation) for k in ['orient_deg', 'orient_conf', 'script_name', 'script_conf'])
            self.assertEqual(orientation['orient_deg'], 0)
            self.assertEqual(orientation['script_name'], 'Latin')