How to use the tesserocr.RIL.SYMBOL function in tesserocr

To help you get started, we’ve selected a few tesserocr examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github UB-Mannheim / ocromore / test_tesserocr.py View on Github external
def check_test(): 
    api.SetImageFile(image)
    api.SetVariable("save_blob_choices", "T")
    api.SetRectangle(37, 228, 548, 31)
    api.Recognize()

    ri = api.GetIterator()
    level = RIL.SYMBOL
    for r in iterate_level(ri, level):
        symbol = r.GetUTF8Text(level)  # r == ri
        conf = r.Confidence(level)
        if symbol:
            print("symbol ", symbol, " confidence", conf)
        indent = False
        ci = r.GetChoiceIterator()
        for c in ci:
            if indent:
                print('\t\t '),
            print('\t- '),
            choice = c.GetUTF8Text()  # c == ci
            print(u'{} conf: {}'.format(choice, c.Confidence()))
            indent = True
        print('---------------------------------------------')
github OCR-D / ocrd_tesserocr / ocrd_tesserocr / recognize.py View on Github external
def _process_glyphs_in_word(self, result_it, word, word_xywh):
        if not result_it or result_it.Empty(RIL.SYMBOL):
            LOG.debug("No glyph in word '%s'", word.id)
            return
        # iterate until IsAtFinalElement(RIL.WORD, RIL.SYMBOL):
        glyph_no = 0
        while result_it and not result_it.Empty(RIL.SYMBOL):
            glyph_id = '%s_glyph%04d' % (word.id, glyph_no)
            LOG.debug("Decoding text in glyph '%s'", glyph_id)
            #  glyph_text = result_it.GetUTF8Text(RIL.SYMBOL) # equals first choice?
            glyph_conf = result_it.Confidence(RIL.SYMBOL)/100 # equals first choice?
            #LOG.debug('best glyph: "%s" [%f]', glyph_text, glyph_conf)
            bbox = result_it.BoundingBox(RIL.SYMBOL)
            # convert to absolute coordinates:
            polygon = coordinates_for_segment(polygon_from_x0y0x1y1(bbox),
                                              None, word_xywh) - self.parameter['padding']
            points = points_from_polygon(polygon)
            glyph = GlyphType(id=glyph_id, Coords=CoordsType(points))
            word.add_Glyph(glyph)
            choice_it = result_it.GetChoiceIterator()
            for (choice_no, choice) in enumerate(choice_it):
                alternative_text = choice.GetUTF8Text()
                alternative_conf = choice.Confidence()/100
                #LOG.debug('alternative glyph: "%s" [%f]', alternative_text, alternative_conf)
                if (glyph_conf - alternative_conf > CHOICE_THRESHOLD_CONF or
                    choice_no > CHOICE_THRESHOLD_NUM):
                    break
                # todo: consider SymbolIsSuperscript (TextStyle), SymbolIsDropcap (RelationType) etc
                glyph.add_TextEquiv(TextEquivType(index=choice_no, Unicode=alternative_text, conf=alternative_conf))
github OCR-D / ocrd_tesserocr / ocrd_tesserocr / recognize.py View on Github external
def _process_glyphs_in_word(self, result_it, word, word_xywh):
        if not result_it or result_it.Empty(RIL.SYMBOL):
            LOG.debug("No glyph in word '%s'", word.id)
            return
        # iterate until IsAtFinalElement(RIL.WORD, RIL.SYMBOL):
        glyph_no = 0
        while result_it and not result_it.Empty(RIL.SYMBOL):
            glyph_id = '%s_glyph%04d' % (word.id, glyph_no)
            LOG.debug("Decoding text in glyph '%s'", glyph_id)
            #  glyph_text = result_it.GetUTF8Text(RIL.SYMBOL) # equals first choice?
            glyph_conf = result_it.Confidence(RIL.SYMBOL)/100 # equals first choice?
            #LOG.debug('best glyph: "%s" [%f]', glyph_text, glyph_conf)
            bbox = result_it.BoundingBox(RIL.SYMBOL)
            # convert to absolute coordinates:
            polygon = coordinates_for_segment(polygon_from_x0y0x1y1(bbox),
                                              None, word_xywh) - self.parameter['padding']
            points = points_from_polygon(polygon)
            glyph = GlyphType(id=glyph_id, Coords=CoordsType(points))
            word.add_Glyph(glyph)
            choice_it = result_it.GetChoiceIterator()
            for (choice_no, choice) in enumerate(choice_it):
                alternative_text = choice.GetUTF8Text()
                alternative_conf = choice.Confidence()/100
                #LOG.debug('alternative glyph: "%s" [%f]', alternative_text, alternative_conf)
                if (glyph_conf - alternative_conf > CHOICE_THRESHOLD_CONF or
                    choice_no > CHOICE_THRESHOLD_NUM):
                    break
github OCR-D / ocrd_tesserocr / ocrd_tesserocr / recognize.py View on Github external
glyph_image.height + 2 * pad), bg)
                padded.paste(glyph_image, (pad, pad))
                tessapi.SetImage(padded)
            else:
                tessapi.SetImage(glyph_image)
            tessapi.SetPageSegMode(PSM.SINGLE_CHAR)
            LOG.debug("Recognizing text in glyph '%s'", glyph.id)
            if glyph.get_TextEquiv():
                LOG.warning("Glyph '%s' already contained text results", glyph.id)
                glyph.set_TextEquiv([])
            #glyph_text = tessapi.GetUTF8Text().rstrip("\n\f")
            glyph_conf = tessapi.AllWordConfidences()
            glyph_conf = glyph_conf[0]/100.0 if glyph_conf else 1.0
            #LOG.debug('best glyph: "%s" [%f]', glyph_text, glyph_conf)
            result_it = tessapi.GetIterator()
            if not result_it or result_it.Empty(RIL.SYMBOL):
                LOG.error("No text in glyph '%s'", glyph.id)
                continue
            choice_it = result_it.GetChoiceIterator()
            for (choice_no, choice) in enumerate(choice_it):
                alternative_text = choice.GetUTF8Text()
                alternative_conf = choice.Confidence()/100
                #LOG.debug('alternative glyph: "%s" [%f]', alternative_text, alternative_conf)
                if (glyph_conf - alternative_conf > CHOICE_THRESHOLD_CONF or
                    choice_no > CHOICE_THRESHOLD_NUM):
                    break
                # todo: consider SymbolIsSuperscript (TextStyle), SymbolIsDropcap (RelationType) etc
                glyph.add_TextEquiv(TextEquivType(index=choice_no, Unicode=alternative_text, conf=alternative_conf))
github OCR-D / ocrd_tesserocr / ocrd_tesserocr / recognize.py View on Github external
def _process_glyphs_in_word(self, result_it, word, word_xywh):
        if not result_it or result_it.Empty(RIL.SYMBOL):
            LOG.debug("No glyph in word '%s'", word.id)
            return
        # iterate until IsAtFinalElement(RIL.WORD, RIL.SYMBOL):
        glyph_no = 0
        while result_it and not result_it.Empty(RIL.SYMBOL):
            glyph_id = '%s_glyph%04d' % (word.id, glyph_no)
            LOG.debug("Decoding text in glyph '%s'", glyph_id)
            #  glyph_text = result_it.GetUTF8Text(RIL.SYMBOL) # equals first choice?
            glyph_conf = result_it.Confidence(RIL.SYMBOL)/100 # equals first choice?
            #LOG.debug('best glyph: "%s" [%f]', glyph_text, glyph_conf)
            bbox = result_it.BoundingBox(RIL.SYMBOL)
            # convert to absolute coordinates:
            polygon = coordinates_for_segment(polygon_from_x0y0x1y1(bbox),
                                              None, word_xywh) - self.parameter['padding']
            points = points_from_polygon(polygon)
            glyph = GlyphType(id=glyph_id, Coords=CoordsType(points))
github OCR-D / ocrd_tesserocr / ocrd_tesserocr / recognize.py View on Github external
word.add_Glyph(glyph)
            choice_it = result_it.GetChoiceIterator()
            for (choice_no, choice) in enumerate(choice_it):
                alternative_text = choice.GetUTF8Text()
                alternative_conf = choice.Confidence()/100
                #LOG.debug('alternative glyph: "%s" [%f]', alternative_text, alternative_conf)
                if (glyph_conf - alternative_conf > CHOICE_THRESHOLD_CONF or
                    choice_no > CHOICE_THRESHOLD_NUM):
                    break
                # todo: consider SymbolIsSuperscript (TextStyle), SymbolIsDropcap (RelationType) etc
                glyph.add_TextEquiv(TextEquivType(index=choice_no, Unicode=alternative_text, conf=alternative_conf))
            if result_it.IsAtFinalElement(RIL.WORD, RIL.SYMBOL):
                break
            else:
                glyph_no += 1
                result_it.Next(RIL.SYMBOL)