Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def setUp(self):
self.resolver = Resolver()
self.bagger = WorkspaceBagger(self.resolver)
self.tempdir = mkdtemp()
self.bagdir = join(self.tempdir, 'kant_aufklaerung_1784')
copytree(assets.path_to('kant_aufklaerung_1784'), self.bagdir)
self.workspace_dir = join(self.bagdir, 'data')
self.workspace = Workspace(self.resolver, directory=join(self.workspace_dir))
def setUp(self):
self.resolver = Resolver()
self.workspace = self.resolver.workspace_from_url(assets.url_of('SBB0000F29300010000/data/mets.xml'))
def test_validate_sequence(self):
resolver = Resolver()
with TemporaryDirectory() as tempdir:
workspace = resolver.workspace_from_url(assets.path_to('kant_aufklaerung_1784/data/mets.xml'), dst_dir=tempdir)
params_path = Path(tempdir, 'params.json')
params_path.write_text('{"param1": true}')
with self.assertRaisesRegex(Exception, "Input file group not contained in METS or produced by previous steps: FOO'"):
validate_tasks([ProcessorTask.parse(x) for x in [
'sample-processor-required-param -I OCR-D-IMG -O OUT1 -p %s' % params_path,
'sample-processor-required-param -I FOO -O OUT2 -p %s' % params_path
]], workspace)
with self.assertRaisesRegex(Exception, "Input fileGrp.@USE='IN'. not in METS!"):
validate_tasks([ProcessorTask.parse(x) for x in [
'sample-processor-required-param -I IN -O OUT1 -p %s' % params_path,
]], workspace)
def setUp(self):
if exists(BACKUPDIR):
rmtree(BACKUPDIR)
self.resolver = Resolver()
self.bagger = WorkspaceBagger(self.resolver)
self.tempdir = mkdtemp()
self.bagdir = join(self.tempdir, 'bag')
copytree(assets.path_to('kant_aufklaerung_1784'), self.bagdir)
self.workspace_dir = join(self.bagdir, 'data')
self.workspace = Workspace(self.resolver, directory=self.workspace_dir)
def runTest(self):
resolver = Resolver()
workspace = resolver.workspace_from_url(METS_HEROLD_SMALL, dst_dir=WORKSPACE_DIR)
TesserocrSegmentRegion(workspace, input_file_grp="INPUT", output_file_grp="OCR-D-SEG-BLOCK").process()
# workspace.save_mets()
TesserocrSegmentLine(workspace, input_file_grp="OCR-D-SEG-BLOCK", output_file_grp="OCR-D-SEG-LINE").process()
workspace.save_mets()
def runTest(self):
resolver = Resolver(cache_enabled=True)
workspace = resolver.workspace_from_url(METS_HEROLD_SMALL, directory=WORKSPACE_DIR)
Tesseract3RegionSegmenter(workspace, inputGrp="INPUT", outputGrp="OCR-D-SEG-BLOCK").process()
workspace.save_mets()
Tesseract3LineSegmenter(workspace, inputGrp="OCR-D-SEG-BLOCK", outputGrp="OCR-D-SEG-LINE").process()
workspace.save_mets()
# TODO takes too long
# Tesseract3Recognizer(workspace, inputGrp="OCR-D-SEG-LINE", outputGrp="OCR-D-OCR-TESS").process()
workspace.save_mets()
def test_422(self):
"""
# OCR-D/core#422
"""
resolver = Resolver()
with TemporaryDirectory() as tempdir:
workspace = resolver.workspace_from_url(assets.path_to('kant_aufklaerung_1784/data/mets.xml'), dst_dir=tempdir)
validate_tasks([ProcessorTask.parse(x) for x in [
"sample-processor -I OCR-D-IMG -O OCR-D-SEG-BLOCK",
"sample-processor -I OCR-D-SEG-BLOCK -O OCR-D-SEG-LINE",
"sample-processor -I OCR-D-SEG-LINE -O OCR-D-SEG-WORD",
"sample-processor -I OCR-D-SEG-WORD -O OCR-D-OCR-TESS",
]], workspace)
def setUp(self):
self.resolver = Resolver()
from flask import Flask
from flask import request
from ocrd.processor.base import run_processor
# from ocrd.processor.segment_line.tesserocr import Tesseract3LineSegmenter
# from ocrd.processor.segment_region.tesserocr import Tesseract3RegionSegmenter
from ocrd.resolver import Resolver
resolver = Resolver()
def create():
app = Flask(__name__)
# @app.route('/processor/segment_line/tesserocr', methods=['PUT'])
# def _segment_line_tesserocr():
# run_processor(Tesseract3LineSegmenter, request.args['mets_url'], resolver)
# return 'DONE', 200
# @app.route('/processor/segment_region/tesserocr', methods=['PUT'])
# def _segment_region_tesserocr():
# run_processor(Tesseract3RegionSegmenter, request.args['mets_url'], resolver)
# return 'DONE', 200
return app
def run_tasks(mets, log_level, page_id, task_strs):
resolver = Resolver()
workspace = resolver.workspace_from_url(mets)
log = getLogger('ocrd.task_sequence.run_tasks')
tasks = [ProcessorTask.parse(task_str) for task_str in task_strs]
validate_tasks(tasks, workspace)
# Run the tasks
for task in tasks:
log.info("Start processing task '%s'", task)
# execute cli
returncode = run_cli(
task.executable,
mets,
resolver,