How to use the renku.core.models.datasets.Dataset function in renku

To help you get started, we’ve selected a few renku examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github SwissDataScienceCenter / renku-python / tests / core / commands / test_serialization.py View on Github external
def test_dataset_deserialization(client, dataset):
    """Test Dataset deserialization."""
    from renku.core.models.datasets import Dataset
    dataset_ = Dataset.from_yaml(
        client.get_dataset_path('dataset'), client=client
    )

    dataset_types = {
        'created': datetime.datetime,
        'creator': list,
        'description': str,
        'files': list,
        'identifier': str,
        'keywords': list,
    }

    for attribute, type_ in dataset_types.items():
        assert type(dataset_.__getattribute__(attribute)) is type_

    creator_types = {'email': str, '_id': str, 'name': str, 'affiliation': str}
github SwissDataScienceCenter / renku-python / renku / core / management / datasets.py View on Github external
def datasets_from_commit(self, commit=None):
        """Return datasets defined in a commit."""
        commit = commit or self.repo.head.commit

        try:
            datasets = commit.tree / self.renku_home / self.DATASETS
        except KeyError:
            return

        for tree in datasets:
            try:
                blob = tree / self.METADATA
            except KeyError:
                continue
            dataset = Dataset.from_yaml(
                self.path / Path(blob.path), client=self
            )
            dataset.commit = commit
            yield dataset
github SwissDataScienceCenter / renku-python / renku / core / management / datasets.py View on Github external
if self.load_dataset(name=short_name):
            raise errors.DatasetExistsError(
                'Dataset exists: "{}".'.format(short_name)
            )

        identifier = str(uuid.uuid4())
        path = (self.renku_datasets_path / identifier / self.METADATA)
        try:
            path.parent.mkdir(parents=True, exist_ok=False)
        except FileExistsError:
            raise errors.DatasetExistsError(
                'Dataset with reference {} exists'.format(path.parent)
            )

        with with_reference(path):
            dataset = Dataset(
                client=self,
                identifier=identifier,
                name=name,
                short_name=short_name,
                description=description,
                creator=creators
            )

        dataset_ref = LinkReference.create(
            client=self, name='datasets/' + short_name
        )
        dataset_ref.set_reference(path)

        dataset.to_yaml()

        return dataset, path, dataset_ref
github SwissDataScienceCenter / renku-python / renku / core / commands / checks / migration.py View on Github external
def migrate_datasets_pre_v0_3(client):
    """Migrate datasets from Renku 0.3.x."""
    for old_path in dataset_pre_0_3(client):
        name = str(old_path.parent.relative_to(client.path / 'data'))

        dataset = Dataset.from_yaml(old_path, client=client)
        new_path = (client.renku_datasets_path / dataset.uid / client.METADATA)
        new_path.parent.mkdir(parents=True, exist_ok=True)

        with client.with_metadata(read_only=True) as meta:
            for module in client.repo.submodules:
                if Path(module.url).name == meta.name:
                    module.remove()

        for file_ in dataset.files:
            if not Path(file_.path).exists():
                expected_path = (
                    client.path / 'data' / dataset.name / file_.path
                )
                if expected_path.exists():
                    file_.path = expected_path.relative_to(client.path)
github SwissDataScienceCenter / renku-python / renku / core / models / provenance / qualified.py View on Github external
type='prov:Generation',
    context={
        'prov': 'http://www.w3.org/ns/prov#',
    },
    cmp=False,
)
class Generation(EntityProxyMixin):
    """Represent an act of generating a file."""

    entity = jsonld.ib(
        context={
            '@reverse': 'prov:qualifiedGeneration',
        },
        type=[
            'renku.core.models.entities.Entity',
            'renku.core.models.entities.Collection', Dataset, DatasetFile
        ]
    )

    role = jsonld.ib(context='prov:hadRole', default=None)

    _activity = attr.ib(
        default=None,
        kw_only=True,
        converter=lambda value: weakref.ref(value)
        if value is not None else None,
    )
    _id = jsonld.ib(context='@id', kw_only=True)

    @property
    def activity(self):
        """Return the activity object."""
github SwissDataScienceCenter / renku-python / renku / core / commands / dataset.py View on Github external
def edit_dataset(client, dataset_id, transform_fn, commit_message=None):
    """Edit dataset metadata."""
    dataset = client.load_dataset(dataset_id)

    if not dataset:
        raise DatasetNotFound()

    edited = yaml.safe_load(transform_fn(dataset))
    updated_ = Dataset(client=client, **edited)
    dataset.update_metadata(updated_)
    dataset.to_yaml()
github SwissDataScienceCenter / renku-python / renku / core / management / datasets.py View on Github external
def load_dataset_from_path(self, path, commit=None):
        """Return a dataset from a given path."""
        path = Path(path)
        if not path.is_absolute():
            path = self.path / path
        return Dataset.from_yaml(path, client=self, commit=commit)