How to use the tesserocr.OEM.LSTM_ONLY function in tesserocr

To help you get started, we’ve selected a few tesserocr examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github alephdata / aleph / services / recognize-text / textrecognizer / recognize.py View on Github external
def configure_engine(self, languages, mode):
        # log.info("Configuring OCR engine (%s)", languages)
        if not hasattr(self.tl, 'api') or self.tl.api is None:
            self.tl.api = PyTessBaseAPI(lang=languages, oem=OEM.LSTM_ONLY)
        if languages != self.tl.api.GetInitLanguagesAsString():
            self.tl.api.Init(lang=languages, oem=OEM.LSTM_ONLY)
        if mode != self.tl.api.GetPageSegMode():
            self.tl.api.SetPageSegMode(mode)
        return self.tl.api
github alephdata / aleph / services / ingest-file / ingestors / support / ocr.py View on Github external
def configure_engine(self, languages):
        from tesserocr import PyTessBaseAPI, PSM, OEM
        if not hasattr(self.tl, 'api') or self.tl.api is None:
            log.info("Configuring OCR engine (%s)", languages)
            self.tl.api = PyTessBaseAPI(lang=languages,
                                        oem=OEM.LSTM_ONLY,
                                        psm=PSM.AUTO_OSD)
        if languages != self.tl.api.GetInitLanguagesAsString():
            log.info("Re-initialising OCR engine (%s)", languages)
            self.tl.api.Init(lang=languages, oem=OEM.LSTM_ONLY)
        return self.tl.api
github alephdata / aleph / services / ingest-file / ingestors / support / ocr.py View on Github external
def configure_engine(self, languages):
        from tesserocr import PyTessBaseAPI, PSM, OEM
        if not hasattr(self.tl, 'api') or self.tl.api is None:
            log.info("Configuring OCR engine (%s)", languages)
            self.tl.api = PyTessBaseAPI(lang=languages,
                                        oem=OEM.LSTM_ONLY,
                                        psm=PSM.AUTO_OSD)
        if languages != self.tl.api.GetInitLanguagesAsString():
            log.info("Re-initialising OCR engine (%s)", languages)
            self.tl.api.Init(lang=languages, oem=OEM.LSTM_ONLY)
        return self.tl.api
github alephdata / aleph / services / recognize-text / textrecognizer / recognize.py View on Github external
def configure_engine(self, languages, mode):
        # log.info("Configuring OCR engine (%s)", languages)
        if not hasattr(self.tl, 'api') or self.tl.api is None:
            self.tl.api = PyTessBaseAPI(lang=languages, oem=OEM.LSTM_ONLY)
        if languages != self.tl.api.GetInitLanguagesAsString():
            self.tl.api.Init(lang=languages, oem=OEM.LSTM_ONLY)
        if mode != self.tl.api.GetPageSegMode():
            self.tl.api.SetPageSegMode(mode)
        return self.tl.api
github alephdata / memorious / memorious / helpers / ocr.py View on Github external
def get_ocr():
    """Check if OCR service is available; else throw an error"""
    if not hasattr(settings, '_ocr'):
        try:
            from tesserocr import PyTessBaseAPI, PSM, OEM
            log.info("Configuring OCR engine...")
            settings._ocr = PyTessBaseAPI(lang=LANGUAGES,
                                          oem=OEM.LSTM_ONLY,
                                          psm=PSM.AUTO_OSD)
        except ImportError:
            log.warning("OCR engine is not available")
            settings._ocr = None
    return settings._ocr