How to use ocrd - 10 common examples

To help you get started, we’ve selected a few ocrd examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github OCR-D / core / tests / test_decorators.py View on Github external
@ocrd_cli_options
def cli_param_dumper(*args, **kwargs):      # pylint: disable=unused-argument
    print(json.dumps(kwargs['parameter']))
github OCR-D / core / tests / test_decorators.py View on Github external
def cli_dummy_processor(*args, **kwargs):
    return ocrd_cli_wrap_processor(DummyProcessor, *args, **kwargs)
github OCR-D / core / tests / validator / test_ocrd_zip_validator.py View on Github external
def setUp(self):
        self.resolver = Resolver()
        self.bagger = WorkspaceBagger(self.resolver)
        self.tempdir = mkdtemp()
        self.bagdir = join(self.tempdir, 'kant_aufklaerung_1784')
        copytree(assets.path_to('kant_aufklaerung_1784'), self.bagdir)
        self.workspace_dir = join(self.bagdir, 'data')
        self.workspace = Workspace(self.resolver, directory=join(self.workspace_dir))
github OCR-D / core / tests / processor / test_processor.py View on Github external
def setUp(self):
        self.resolver = Resolver()
        self.workspace = self.resolver.workspace_from_url(assets.url_of('SBB0000F29300010000/data/mets.xml'))
github OCR-D / core / tests / test_task_sequence.py View on Github external
def test_validate_sequence(self):
        resolver = Resolver()
        with TemporaryDirectory() as tempdir:
            workspace = resolver.workspace_from_url(assets.path_to('kant_aufklaerung_1784/data/mets.xml'), dst_dir=tempdir)
            params_path = Path(tempdir, 'params.json')
            params_path.write_text('{"param1": true}')

            with self.assertRaisesRegex(Exception, "Input file group not contained in METS or produced by previous steps: FOO'"):
                validate_tasks([ProcessorTask.parse(x) for x in [
                    'sample-processor-required-param -I OCR-D-IMG -O OUT1 -p %s' % params_path,
                    'sample-processor-required-param -I FOO -O OUT2 -p %s' % params_path
                ]], workspace)

            with self.assertRaisesRegex(Exception, "Input fileGrp.@USE='IN'. not in METS!"):
                validate_tasks([ProcessorTask.parse(x) for x in [
                    'sample-processor-required-param -I IN -O OUT1 -p %s' % params_path,
                ]], workspace)
github OCR-D / core / tests / validator / test_workspace_bagger.py View on Github external
def setUp(self):
        if exists(BACKUPDIR):
            rmtree(BACKUPDIR)
        self.resolver = Resolver()
        self.bagger = WorkspaceBagger(self.resolver)
        self.tempdir = mkdtemp()
        self.bagdir = join(self.tempdir, 'bag')
        copytree(assets.path_to('kant_aufklaerung_1784'), self.bagdir)
        self.workspace_dir = join(self.bagdir, 'data')
        self.workspace = Workspace(self.resolver, directory=self.workspace_dir)
github OCR-D / ocrd_tesserocr / test / test_segment_line.py View on Github external
def runTest(self):
        resolver = Resolver()
        workspace = resolver.workspace_from_url(METS_HEROLD_SMALL, dst_dir=WORKSPACE_DIR)
        TesserocrSegmentRegion(workspace, input_file_grp="INPUT", output_file_grp="OCR-D-SEG-BLOCK").process()
        #  workspace.save_mets()
        TesserocrSegmentLine(workspace, input_file_grp="OCR-D-SEG-BLOCK", output_file_grp="OCR-D-SEG-LINE").process()
        workspace.save_mets()
github OCR-D / core / test / processor / recognize / test_tesserocr.py View on Github external
def runTest(self):
        resolver = Resolver(cache_enabled=True)
        workspace = resolver.workspace_from_url(METS_HEROLD_SMALL, directory=WORKSPACE_DIR)
        Tesseract3RegionSegmenter(workspace, inputGrp="INPUT", outputGrp="OCR-D-SEG-BLOCK").process()
        workspace.save_mets()
        Tesseract3LineSegmenter(workspace, inputGrp="OCR-D-SEG-BLOCK", outputGrp="OCR-D-SEG-LINE").process()
        workspace.save_mets()
        #  TODO takes too long
        #  Tesseract3Recognizer(workspace, inputGrp="OCR-D-SEG-LINE", outputGrp="OCR-D-OCR-TESS").process()
        workspace.save_mets()
github OCR-D / core / tests / test_task_sequence.py View on Github external
def test_422(self):
        """
        # OCR-D/core#422
        """
        resolver = Resolver()
        with TemporaryDirectory() as tempdir:
            workspace = resolver.workspace_from_url(assets.path_to('kant_aufklaerung_1784/data/mets.xml'), dst_dir=tempdir)
            validate_tasks([ProcessorTask.parse(x) for x in [
                "sample-processor -I OCR-D-IMG       -O OCR-D-SEG-BLOCK",
                "sample-processor -I OCR-D-SEG-BLOCK -O OCR-D-SEG-LINE",
                "sample-processor -I OCR-D-SEG-LINE  -O OCR-D-SEG-WORD",
                "sample-processor -I OCR-D-SEG-WORD  -O OCR-D-OCR-TESS",
            ]], workspace)
github OCR-D / core / tests / test_workspace.py View on Github external
def setUp(self):
        self.resolver = Resolver()