How to use the ocrd.workspace.Workspace function in ocrd

To help you get started, we’ve selected a few ocrd examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github OCR-D / core / tests / validator / test_ocrd_zip_validator.py View on Github external
def setUp(self):
        self.resolver = Resolver()
        self.bagger = WorkspaceBagger(self.resolver)
        self.tempdir = mkdtemp()
        self.bagdir = join(self.tempdir, 'kant_aufklaerung_1784')
        copytree(assets.path_to('kant_aufklaerung_1784'), self.bagdir)
        self.workspace_dir = join(self.bagdir, 'data')
        self.workspace = Workspace(self.resolver, directory=join(self.workspace_dir))
github OCR-D / core / tests / validator / test_workspace_bagger.py View on Github external
def setUp(self):
        if exists(BACKUPDIR):
            rmtree(BACKUPDIR)
        self.resolver = Resolver()
        self.bagger = WorkspaceBagger(self.resolver)
        self.tempdir = mkdtemp()
        self.bagdir = join(self.tempdir, 'bag')
        copytree(assets.path_to('kant_aufklaerung_1784'), self.bagdir)
        self.workspace_dir = join(self.bagdir, 'data')
        self.workspace = Workspace(self.resolver, directory=self.workspace_dir)
github OCR-D / core / tests / test_workspace.py View on Github external
def test_remove_file_force(self):
        with copy_of_directory(assets.path_to('SBB0000F29300010000/data')) as tempdir:
            workspace = Workspace(self.resolver, directory=tempdir)
            with self.assertRaisesRegex(FileNotFoundError, "not found"):
                # should fail
                workspace.remove_file('non-existing-id')
            # should succeed
            workspace.remove_file('non-existing-id', force=True)
github OCR-D / core / tests / test_workspace.py View on Github external
def test_remove_file_group_force(self):
        with copy_of_directory(assets.path_to('SBB0000F29300010000/data')) as tempdir:
            workspace = Workspace(self.resolver, directory=tempdir)
            with self.assertRaisesRegex(Exception, "No such fileGrp"):
                # raise error unless force
                workspace.remove_file_group('I DO NOT EXIST')
            # no error
            workspace.remove_file_group('I DO NOT EXIST', force=True)
github OCR-D / core / tests / test_workspace.py View on Github external
def test_superfluous_copies_in_ws_dir(self):
        """
        https://github.com/OCR-D/core/issues/227
        """
        def find_recursive(root):
            ret = []
            for _, _, f in walk(root):
                for file in f:
                    ret.append(file)
            return ret
        with TemporaryDirectory() as wsdir:
            with open(assets.path_to('SBB0000F29300010000/data/mets_one_file.xml'), 'r') as f_in:
                with open(join(wsdir, 'mets.xml'), 'w') as f_out:
                    f_out.write(f_in.read())
            self.assertEqual(len(find_recursive(wsdir)), 1)
            ws1 = Workspace(self.resolver, wsdir)
            for file in ws1.mets.find_files():
                ws1.download_file(file)
            self.assertEqual(len(find_recursive(wsdir)), 2)
            self.assertTrue(exists(join(wsdir, 'OCR-D-IMG/FILE_0005_IMAGE.tif')))
github OCR-D / core / tests / test_workspace.py View on Github external
def test_remove_file_group_rmdir(self):
        with copy_of_directory(assets.path_to('SBB0000F29300010000/data')) as tempdir:
            workspace = Workspace(self.resolver, directory=tempdir)
            self.assertTrue(exists(join(tempdir, 'OCR-D-IMG')))
            workspace.remove_file_group('OCR-D-IMG', recursive=True)
            self.assertFalse(exists(join(tempdir, 'OCR-D-IMG')))
github OCR-D / core / ocrd / ocrd / resolver.py View on Github external
def workspace_from_nothing(self, directory, mets_basename='mets.xml', clobber_mets=False):
        """
        Create an empty workspace.
        """
        if directory is None:
            directory = tempfile.mkdtemp(prefix=TMP_PREFIX)
        Path(directory).mkdir(parents=True, exist_ok=True)
        mets_path = Path(directory, mets_basename)
        if mets_path.exists() and not clobber_mets:
            raise FileExistsError("METS '%s' already exists in '%s' and clobber_mets not set." % (mets_basename, directory))
        mets = OcrdMets.empty_mets()
        log.info("Writing METS to %s", mets_path)
        mets_path.write_bytes(mets.to_xml(xmllint=True))

        return Workspace(self, directory, mets, mets_basename=mets_basename)
github OCR-D / core / ocrd / ocrd / resolver.py View on Github external
log.debug("Deriving dst_dir %s from %s", Path(mets_url).parent, mets_url)
                dst_dir = Path(mets_url).parent
            else:
                log.debug("Creating ephemeral workspace '%s' for METS @ <%s>", dst_dir, mets_url)
                dst_dir = tempfile.mkdtemp(prefix=TMP_PREFIX)
        # XXX Path.resolve is always strict in Python <= 3.5, so create dst_dir unless it exists consistently
        if not Path(dst_dir).exists():
            Path(dst_dir).mkdir(parents=True, exist_ok=False)
        dst_dir = str(Path(dst_dir).resolve())

        log.debug("workspace_from_url\nmets_basename='%s'\nmets_url='%s'\nsrc_baseurl='%s'\ndst_dir='%s'",
            mets_basename, mets_url, src_baseurl, dst_dir)

        self.download_to_directory(dst_dir, mets_url, basename=mets_basename, if_exists='overwrite' if clobber_mets else 'skip')

        workspace = Workspace(self, dst_dir, mets_basename=mets_basename, baseurl=src_baseurl)

        if download:
            for f in workspace.mets.find_files():
                workspace.download_file(f)

        return workspace