How to use the ocrd.utils.getLogger function in ocrd

To help you get started, we’ve selected a few ocrd examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github OCR-D / core / ocrd / resolver_cache.py View on Github external
import os

from ocrd.constants import DEFAULT_CACHE_FOLDER

from ocrd.utils import getLogger, safe_filename
log = getLogger('ocrd.cache')

class ResolverCache(object):
    """
    Cache of downloads, based on URL.

    Args:
        cache_directory (string): Where to store cached files

    """

    def __init__(self, cache_directory=DEFAULT_CACHE_FOLDER):
        """
        Instantiate a cache
        """
        self.directory = cache_directory
        if not os.path.isdir(self.directory):
github OCR-D / core / ocrd / processor / __init__.py View on Github external
from ocrd.utils import getLogger
log = getLogger('ocrd.processor')

def run_processor(processor, mets_url=None, resolver=None, workspace=None):
    """
    Create a workspace for mets_url and run processor through it
    """
    if workspace is None:
        if resolver is None:
            raise Exception("Need to pass a resolver to create a workspace")
        if mets_url is None:
            raise Exception("Need to pass mets_url to create a workspace")
        workspace = resolver.create_workspace(mets_url)
    log.debug("Running processor %s", processor)
    processor(workspace).process()
    workspace.persist()

class Processor(object):
github OCR-D / core / ocrd / validator.py View on Github external
import json
import re

from jsonschema import Draft4Validator, validators # pylint: disable=import-error

from ocrd.constants import FILE_GROUP_CATEGORIES, FILE_GROUP_PREFIX, OCRD_TOOL_SCHEMA
from ocrd.utils import getLogger

log = getLogger('ocrd.validator')


# http://python-jsonschema.readthedocs.io/en/latest/faq/
def extend_with_default(validator_class):
    validate_properties = validator_class.VALIDATORS["properties"]

    def set_defaults(validator, properties, instance, schema):
        for prop, subschema in properties.items():
            if "default" in subschema:
                instance.setdefault(prop, subschema["default"])

        for error in validate_properties(validator, properties, instance, schema):
            yield error

    return validators.extend(validator_class, {"properties" : set_defaults})
github OCR-D / core / ocrd / processor / recognize / tesserocr.py View on Github external
from __future__ import absolute_import

from ocrd.model import OcrdPage
from ocrd.processor.base import Processor
from ocrd.utils import getLogger, mets_file_id
from ocrd.constants import MIMETYPE_PAGE, TESSDATA_PREFIX

import tesserocr

log = getLogger('processor.Tesseract3Recognizer')

DEFAULT_MODEL = tesserocr.get_languages()[1][-1]

class Tesseract3Recognizer(Processor):

    def process(self):
        """
        Performs the (text) recognition.
        """
        with tesserocr.PyTessBaseAPI(path=TESSDATA_PREFIX, lang=DEFAULT_MODEL) as tessapi:
            log.info("Using model %s in %s for recognition", tesserocr.get_languages()[0], tesserocr.get_languages()[1][-1])
            tessapi.SetPageSegMode(tesserocr.PSM.SINGLE_LINE)
            for (n, input_file) in enumerate(self.input_files):
                log.info("INPUT FILE %i / %s", n, input_file)
                self.workspace.download_file(input_file)
                page = OcrdPage.from_file(input_file)
github OCR-D / core / ocrd / processor / segment_line / tesserocr.py View on Github external
from __future__ import absolute_import

from ocrd.model import OcrdPage
from ocrd.processor.base import Processor
from ocrd.utils import getLogger, mets_file_id
from ocrd.constants import MIMETYPE_PAGE, TESSDATA_PREFIX

import tesserocr

log = getLogger('processor.segment_line.tesserocr')

class Tesseract3LineSegmenter(Processor):

    def process(self):
        """
        Performs the line segmentation.
        """
        with tesserocr.PyTessBaseAPI(path=TESSDATA_PREFIX) as tessapi:
            for (n, input_file) in enumerate(self.input_files):
                page = OcrdPage.from_file(self.workspace.download_file(input_file))
                image_url = page.imageFileName
                for region in page.list_textregions():
                    log.debug("Detecting lines in %s with tesseract", region)
                    image = self.workspace.resolve_image_as_pil(image_url, region.coords)
                    tessapi.SetImage(image)
                    for component in tessapi.GetComponentImages(tesserocr.RIL.TEXTLINE, True):
github OCR-D / core / ocrd / processor / segment_region / tesserocr.py View on Github external
from __future__ import absolute_import
import os

from ocrd.model import OcrdPage
from ocrd.processor.base import Processor
from ocrd.utils import getLogger, mets_file_id
from ocrd.constants import MIMETYPE_PAGE, TESSDATA_PREFIX

import tesserocr

log = getLogger('Tesseract3RegionSegmenter')

class Tesseract3RegionSegmenter(Processor):

    def process(self):
        """
        Performs the region segmentation.
        """
        with tesserocr.PyTessBaseAPI(path=TESSDATA_PREFIX) as tessapi:
            for (n, input_file) in enumerate(self.input_files):
                page = OcrdPage.from_file(self.workspace.download_file(input_file))
                image = self.workspace.resolve_image_as_pil(page.imageFileName)
                log.debug("Detecting regions with tesseract")
                tessapi.SetImage(image)
                for component in tessapi.GetComponentImages(tesserocr.RIL.BLOCK, True):
                    box, index = component[1], component[2]
                    # the region reference in the reading order element