How to use datalad - 10 common examples

To help you get started, we’ve selected a few datalad examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github datalad / datalad / datalad / distribution / create_test_dataset.py View on Github external
Up to how many leading directories withing a dataset could lead to a
      sub-dataset

    Yields
    ------
    str
       Path to the generated dataset(s)

    """
    # we apparently can't import api functionality within api
    from datalad.api import add
    # To simplify managing all the file paths etc
    if not isabs(path):
        path = abspath(path)
    # make it a git (or annex??) repository... ok - let's do randomly one or another ;)
    RepoClass = GitRepo if random.randint(0, 1) else AnnexRepo
    lgr.info("Generating repo of class %s under %s", RepoClass, path)
    repo = RepoClass(path, create=True)
    # let's create some dummy file and add it to the beast
    fn = opj(path, "file%d.dat" % random.randint(1, 1000))
    with open(fn, 'w') as f:
        f.write(fn)
    repo.add(fn, git=True)
    repo.commit(msg="Added %s" % fn)

    yield path

    if levels:
        # make a dataset for that one since we want to add sub datasets
        ds_ = Dataset(path)
        # Process the levels
        level, levels_ = levels[0], levels[1:]
github datalad / datalad / datalad / interface / rerun.py View on Github external
def _revs_as_results(dset, revs):
    for rev in revs:
        res = get_status_dict("run", ds=dset, commit=rev)
        full_msg = dset.repo.format_commit("%B", rev)
        try:
            msg, info = get_run_info(dset, full_msg)
        except ValueError as exc:
            # Recast the error so the message includes the revision.
            raise ValueError(
                "Error on {}'s message: {}".format(rev, exc_str(exc)))

        if info is not None:
            res["run_info"] = info
            res["run_message"] = msg
        yield dict(res, status="ok")
github datalad / datalad / datalad / cmd.py View on Github external
shlex.split(cmd, posix=not on_windows)
                    if isinstance(cmd, string_types)
                    else cmd)
            try:
                proc = subprocess.Popen(cmd,
                                        stdout=outputstream,
                                        stderr=errstream,
                                        shell=shell,
                                        cwd=popen_cwd,
                                        env=popen_env,
                                        stdin=stdin)

            except Exception as e:
                prot_exc = e
                lgr.log(11, "Failed to start %r%r: %s" %
                        (cmd, " under %r" % cwd if cwd else '', exc_str(e)))
                raise

            finally:
                if self.protocol.records_ext_commands:
                    self.protocol.end_section(prot_id, prot_exc)

            try:
                if log_online:
                    out = self._get_output_online(proc,
                                                  log_stdout, log_stderr,
                                                  outputstream, errstream,
                                                  expect_stderr=expect_stderr,
                                                  expect_fail=expect_fail)
                else:
                    out = proc.communicate()
github datalad / datalad / datalad / support / external_versions.py View on Github external
def _get_system_ssh_version():
    """Return version of ssh available system-wide

    Annex prior 20170302 was using bundled version, but now would use system one
    if installed
    """
    try:
        out, err = _runner.run('ssh -V'.split(),
                               expect_fail=True, expect_stderr=True)
        # apparently spits out to err but I wouldn't trust it blindly
        if err.startswith('OpenSSH'):
            out = err
        assert out.startswith('OpenSSH')  # that is the only one we care about atm
        return out.split(' ', 1)[0].rstrip(',.').split('_')[1]
    except CommandError as exc:
        lgr.debug("Could not determine version of ssh available: %s", exc_str(exc))
        return None
github datalad / datalad / datalad / interface / base.py View on Github external
# Don't add result_filter if it's None because then
                # eval_results can't distinguish between --report-{status,type}
                # not specified via the CLI and None passed via the Python API.
                kwargs['result_filter'] = res_filter
            kwargs['proc_pre'] = args.common_proc_pre
            kwargs['proc_post'] = args.common_proc_post
        try:
            ret = cls.__call__(**kwargs)
            if inspect.isgenerator(ret):
                ret = list(ret)
            if args.common_output_format == 'tailored' and \
                    hasattr(cls, 'custom_result_summary_renderer'):
                cls.custom_result_summary_renderer(ret)
            return ret
        except KeyboardInterrupt as exc:
            ui.error("\nInterrupted by user while doing magic: %s" % exc_str(exc))
            if cls._interrupted_exit_code is not None:
                sys.exit(cls._interrupted_exit_code)
            else:
                raise
github datalad / datalad / datalad / interface / rerun.py View on Github external
In the standard case, the information in these results will be used to
    actually re-execute the commands.
    """
    revs = dset.repo.repo.git.rev_list("--reverse", revrange, "--").split()
    try:
        results = _revs_as_results(dset, revs)
    except ValueError as exc:
        yield get_status_dict("run", status="error", message=exc_str(exc))
        return

    if since is not None and since.strip() == "":
        # For --since='', drop any leading commits that don't have
        # a run command.
        results = list(dropwhile(lambda r: "run_info" not in r, results))
        if not results:
            yield get_status_dict(
                "run", status="impossible", ds=dset,
                message=("No run commits found in history of %s", revrange))
            return
    else:
        results = list(results)
        if not results:
            yield get_status_dict(
                "run", status="impossible", ds=dset,
                message=("No commits found in %s", revrange))
            return

    if onto is not None and onto.strip() == "":
        # Special case: --onto='' is the value of --since. Because we're
        # currently aborting if the revision list contains merges, we know
        # that, regardless of if and how --since is specified, the effective
        # value for --since is the parent of the first revision.
github datalad / datalad / datalad / interface / run_procedure.py View on Github external
yield res
            return

        if not isinstance(spec, (tuple, list)):
            # maybe coming from config
            import shlex
            spec = shlex.split(spec)
        name = spec[0]
        args = spec[1:]

        try:
            # get the first match an run with it
            procedure_file, cmd_name, cmd_tmpl, cmd_help = \
                next(_get_procedure_implementation(name, ds=ds))
        except StopIteration:
            res = get_status_dict(
                    action='run_procedure',
                    # TODO: Default renderer requires a key "path" to exist.
                    # Doesn't make a lot of sense in this case
                    path=name,
                    logger=lgr,
                    refds=ds.path if ds else None,
                    status='impossible',
                    message="Cannot find procedure with name '%s'" % name)
            yield res
            return

        ex = _guess_exec(procedure_file)
        # configured template (call-format string) takes precedence:
        if cmd_tmpl:
            ex['template'] = cmd_tmpl
github datalad / datalad / datalad / interface / rerun.py View on Github external
def _revs_as_results(dset, revs):
    for rev in revs:
        res = get_status_dict("run", ds=dset, commit=rev)
        full_msg = dset.repo.format_commit("%B", rev)
        try:
            msg, info = get_run_info(dset, full_msg)
        except ValueError as exc:
            # Recast the error so the message includes the revision.
            raise ValueError(
                "Error on {}'s message: {}".format(rev, exc_str(exc)))

        if info is not None:
            res["run_info"] = info
            res["run_message"] = msg
        yield dict(res, status="ok")
github datalad / datalad / datalad / interface / utils.py View on Github external
# far as I'm aware.
        mod = sys.modules[wrapped.__module__]
        if PY2:
            # we rely on:
            # - decorated function is method of a subclass of Interface
            # - the name of the class matches the last part of the module's name
            #   if converted to lower
            # for example:
            # ..../where/ever/mycommand.py:
            # class MyCommand(Interface):
            #     @eval_results
            #     def __call__(..)
            command_class_names = \
                [i for i in mod.__dict__
                 if type(mod.__dict__[i]) == type and
                 issubclass(mod.__dict__[i], Interface) and
                 i.lower().startswith(wrapped.__module__.split('.')[-1].replace('datalad_', '').replace('_', ''))]
            assert len(command_class_names) == 1, (command_class_names, mod.__name__)
            command_class_name = command_class_names[0]
        else:
            command_class_name = wrapped.__qualname__.split('.')[-2]
        _func_class = mod.__dict__[command_class_name]
        lgr.debug("Determined class of decorated function: %s", _func_class)

        # retrieve common options from kwargs, and fall back on the command
        # class attributes, or general defaults if needed
        kwargs = kwargs.copy()  # we will pop, which might cause side-effect
        common_params = {
            p_name: kwargs.pop(
                p_name,
                getattr(_func_class, p_name, eval_defaults[p_name]))
            for p_name in eval_params}
github datalad / datalad / datalad / customremotes / base.py View on Github external
def initiate(self):
        if self._initiated:
            return
        self._initiated = True
        d = opj(self.repopath, '.git', 'bin')
        if not exists(d):
            os.makedirs(d)

        suf = '-' + self.custom_remote_name.rstrip(':') if self.custom_remote_name else ''
        self._file = _file = opj(d, 'git-annex-remote-datalad' + suf)

        if exists(_file):
            lgr.debug("Commenting out previous entries")
            # comment out all the past entries
            with open(_file, 'rb') as f:
                entries = list(map(assure_unicode, f.readlines()))
            for i in range(len(self.HEADER.split(os.linesep)), len(entries)):
                e = entries[i]
                if e.startswith('recv ') or e.startswith('send '):
                    entries[i] = '#' + e
            with open(_file, 'wb') as f:
                f.write(u''.join(entries).encode('utf-8'))
            return  # nothing else to be done

        lgr.debug("Initiating protocoling."
                  "cd %s; vim %s"
                  % (realpath(self.repopath),
                     _file[len(self.repopath) + 1:]))
        with open(_file, 'a') as f:
            f.write(self.HEADER)
        os.chmod(_file, 0o755)