Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_dataset_deserialization(client, dataset):
"""Test Dataset deserialization."""
from renku.core.models.datasets import Dataset
dataset_ = Dataset.from_yaml(
client.get_dataset_path('dataset'), client=client
)
dataset_types = {
'created': datetime.datetime,
'creator': list,
'description': str,
'files': list,
'identifier': str,
'keywords': list,
}
for attribute, type_ in dataset_types.items():
assert type(dataset_.__getattribute__(attribute)) is type_
creator_types = {'email': str, '_id': str, 'name': str, 'affiliation': str}
def datasets_from_commit(self, commit=None):
"""Return datasets defined in a commit."""
commit = commit or self.repo.head.commit
try:
datasets = commit.tree / self.renku_home / self.DATASETS
except KeyError:
return
for tree in datasets:
try:
blob = tree / self.METADATA
except KeyError:
continue
dataset = Dataset.from_yaml(
self.path / Path(blob.path), client=self
)
dataset.commit = commit
yield dataset
if self.load_dataset(name=short_name):
raise errors.DatasetExistsError(
'Dataset exists: "{}".'.format(short_name)
)
identifier = str(uuid.uuid4())
path = (self.renku_datasets_path / identifier / self.METADATA)
try:
path.parent.mkdir(parents=True, exist_ok=False)
except FileExistsError:
raise errors.DatasetExistsError(
'Dataset with reference {} exists'.format(path.parent)
)
with with_reference(path):
dataset = Dataset(
client=self,
identifier=identifier,
name=name,
short_name=short_name,
description=description,
creator=creators
)
dataset_ref = LinkReference.create(
client=self, name='datasets/' + short_name
)
dataset_ref.set_reference(path)
dataset.to_yaml()
return dataset, path, dataset_ref
def migrate_datasets_pre_v0_3(client):
"""Migrate datasets from Renku 0.3.x."""
for old_path in dataset_pre_0_3(client):
name = str(old_path.parent.relative_to(client.path / 'data'))
dataset = Dataset.from_yaml(old_path, client=client)
new_path = (client.renku_datasets_path / dataset.uid / client.METADATA)
new_path.parent.mkdir(parents=True, exist_ok=True)
with client.with_metadata(read_only=True) as meta:
for module in client.repo.submodules:
if Path(module.url).name == meta.name:
module.remove()
for file_ in dataset.files:
if not Path(file_.path).exists():
expected_path = (
client.path / 'data' / dataset.name / file_.path
)
if expected_path.exists():
file_.path = expected_path.relative_to(client.path)
type='prov:Generation',
context={
'prov': 'http://www.w3.org/ns/prov#',
},
cmp=False,
)
class Generation(EntityProxyMixin):
"""Represent an act of generating a file."""
entity = jsonld.ib(
context={
'@reverse': 'prov:qualifiedGeneration',
},
type=[
'renku.core.models.entities.Entity',
'renku.core.models.entities.Collection', Dataset, DatasetFile
]
)
role = jsonld.ib(context='prov:hadRole', default=None)
_activity = attr.ib(
default=None,
kw_only=True,
converter=lambda value: weakref.ref(value)
if value is not None else None,
)
_id = jsonld.ib(context='@id', kw_only=True)
@property
def activity(self):
"""Return the activity object."""
def edit_dataset(client, dataset_id, transform_fn, commit_message=None):
"""Edit dataset metadata."""
dataset = client.load_dataset(dataset_id)
if not dataset:
raise DatasetNotFound()
edited = yaml.safe_load(transform_fn(dataset))
updated_ = Dataset(client=client, **edited)
dataset.update_metadata(updated_)
dataset.to_yaml()
def load_dataset_from_path(self, path, commit=None):
"""Return a dataset from a given path."""
path = Path(path)
if not path.is_absolute():
path = self.path / path
return Dataset.from_yaml(path, client=self, commit=commit)