Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def setUp(self):
self.resolver = Resolver()
self.bagger = WorkspaceBagger(self.resolver)
self.tempdir = mkdtemp()
self.bagdir = join(self.tempdir, 'kant_aufklaerung_1784')
copytree(assets.path_to('kant_aufklaerung_1784'), self.bagdir)
self.workspace_dir = join(self.bagdir, 'data')
self.workspace = Workspace(self.resolver, directory=join(self.workspace_dir))
def setUp(self):
if exists(BACKUPDIR):
rmtree(BACKUPDIR)
self.resolver = Resolver()
self.bagger = WorkspaceBagger(self.resolver)
self.tempdir = mkdtemp()
self.bagdir = join(self.tempdir, 'bag')
copytree(assets.path_to('kant_aufklaerung_1784'), self.bagdir)
self.workspace_dir = join(self.bagdir, 'data')
self.workspace = Workspace(self.resolver, directory=self.workspace_dir)
def test_remove_file_force(self):
with copy_of_directory(assets.path_to('SBB0000F29300010000/data')) as tempdir:
workspace = Workspace(self.resolver, directory=tempdir)
with self.assertRaisesRegex(FileNotFoundError, "not found"):
# should fail
workspace.remove_file('non-existing-id')
# should succeed
workspace.remove_file('non-existing-id', force=True)
def test_remove_file_group_force(self):
with copy_of_directory(assets.path_to('SBB0000F29300010000/data')) as tempdir:
workspace = Workspace(self.resolver, directory=tempdir)
with self.assertRaisesRegex(Exception, "No such fileGrp"):
# raise error unless force
workspace.remove_file_group('I DO NOT EXIST')
# no error
workspace.remove_file_group('I DO NOT EXIST', force=True)
def test_superfluous_copies_in_ws_dir(self):
"""
https://github.com/OCR-D/core/issues/227
"""
def find_recursive(root):
ret = []
for _, _, f in walk(root):
for file in f:
ret.append(file)
return ret
with TemporaryDirectory() as wsdir:
with open(assets.path_to('SBB0000F29300010000/data/mets_one_file.xml'), 'r') as f_in:
with open(join(wsdir, 'mets.xml'), 'w') as f_out:
f_out.write(f_in.read())
self.assertEqual(len(find_recursive(wsdir)), 1)
ws1 = Workspace(self.resolver, wsdir)
for file in ws1.mets.find_files():
ws1.download_file(file)
self.assertEqual(len(find_recursive(wsdir)), 2)
self.assertTrue(exists(join(wsdir, 'OCR-D-IMG/FILE_0005_IMAGE.tif')))
def test_remove_file_group_rmdir(self):
with copy_of_directory(assets.path_to('SBB0000F29300010000/data')) as tempdir:
workspace = Workspace(self.resolver, directory=tempdir)
self.assertTrue(exists(join(tempdir, 'OCR-D-IMG')))
workspace.remove_file_group('OCR-D-IMG', recursive=True)
self.assertFalse(exists(join(tempdir, 'OCR-D-IMG')))
def workspace_from_nothing(self, directory, mets_basename='mets.xml', clobber_mets=False):
"""
Create an empty workspace.
"""
if directory is None:
directory = tempfile.mkdtemp(prefix=TMP_PREFIX)
Path(directory).mkdir(parents=True, exist_ok=True)
mets_path = Path(directory, mets_basename)
if mets_path.exists() and not clobber_mets:
raise FileExistsError("METS '%s' already exists in '%s' and clobber_mets not set." % (mets_basename, directory))
mets = OcrdMets.empty_mets()
log.info("Writing METS to %s", mets_path)
mets_path.write_bytes(mets.to_xml(xmllint=True))
return Workspace(self, directory, mets, mets_basename=mets_basename)
log.debug("Deriving dst_dir %s from %s", Path(mets_url).parent, mets_url)
dst_dir = Path(mets_url).parent
else:
log.debug("Creating ephemeral workspace '%s' for METS @ <%s>", dst_dir, mets_url)
dst_dir = tempfile.mkdtemp(prefix=TMP_PREFIX)
# XXX Path.resolve is always strict in Python <= 3.5, so create dst_dir unless it exists consistently
if not Path(dst_dir).exists():
Path(dst_dir).mkdir(parents=True, exist_ok=False)
dst_dir = str(Path(dst_dir).resolve())
log.debug("workspace_from_url\nmets_basename='%s'\nmets_url='%s'\nsrc_baseurl='%s'\ndst_dir='%s'",
mets_basename, mets_url, src_baseurl, dst_dir)
self.download_to_directory(dst_dir, mets_url, basename=mets_basename, if_exists='overwrite' if clobber_mets else 'skip')
workspace = Workspace(self, dst_dir, mets_basename=mets_basename, baseurl=src_baseurl)
if download:
for f in workspace.mets.find_files():
workspace.download_file(f)
return workspace