How to use the datalad.support.gitrepo.GitRepo function in datalad

To help you get started, we’ve selected a few datalad examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github datalad / datalad / datalad / distribution / create_test_dataset.py View on Github external
Up to how many leading directories withing a dataset could lead to a
      sub-dataset

    Yields
    ------
    str
       Path to the generated dataset(s)

    """
    # we apparently can't import api functionality within api
    from datalad.api import add
    # To simplify managing all the file paths etc
    if not isabs(path):
        path = abspath(path)
    # make it a git (or annex??) repository... ok - let's do randomly one or another ;)
    RepoClass = GitRepo if random.randint(0, 1) else AnnexRepo
    lgr.info("Generating repo of class %s under %s", RepoClass, path)
    repo = RepoClass(path, create=True)
    # let's create some dummy file and add it to the beast
    fn = opj(path, "file%d.dat" % random.randint(1, 1000))
    with open(fn, 'w') as f:
        f.write(fn)
    repo.add(fn, git=True)
    repo.commit(msg="Added %s" % fn)

    yield path

    if levels:
        # make a dataset for that one since we want to add sub datasets
        ds_ = Dataset(path)
        # Process the levels
        level, levels_ = levels[0], levels[1:]
github datalad / datalad / 3rd / datalad-revolution / datalad_revolution / gitrepo.py View on Github external
# helper to commit changes reported in status
        _datalad_msg = False
        if not message:
            message = 'Recorded changes'
            _datalad_msg = True

        # TODO remove pathobj stringification when commit() can
        # handle it
        to_commit = [str(f.relative_to(self.pathobj))
                     for f, props in iteritems(status)] \
                    if partial_commit else None
        if not partial_commit or to_commit:
            # we directly call GitRepo.commit() to avoid a whole slew
            # if direct-mode safeguards and workarounds in the AnnexRepo
            # implementation (which also run an additional dry-run commit
            GitRepo.commit(
                self,
                files=to_commit,
                msg=message,
                _datalad_msg=_datalad_msg,
                options=None,
                # do not raise on empty commit
                # it could be that the `add` in this save-cycle has already
                # brought back a 'modified' file into a clean state
                careless=True,
            )
github datalad / datalad / datalad / crawler / nodes / annex.py View on Github external
def _initiate_dataset(self, path, name):
        lgr.info("Initiating dataset %s" % name)

        if self.branch is not None:
            raise NotImplementedError("Disabled for now")
            # because all the 'create' magic is stuffed into the constructor ATM
            # we need first to initiate a git repository
            git_repo = GitRepo(path, create=True)
            # since we are initiating, that branch shouldn't exist yet, thus --orphan
            git_repo.checkout(self.branch, options=["--orphan"])
            # TODO: RF whenevever create becomes a dedicated factory/method
            # and/or branch becomes an option for the "creator"

        backend = self.backend or cfg.obtain('datalad.crawl.default_backend', default='MD5E')
        direct = cfg.obtain('datalad.crawl.init_direct', default=False)

        if direct:
            raise NotImplementedError("Disabled for now to init direct mode ones")

        ds = create(
                path=path,
                force=False,
                # no_annex=False,  # TODO: add as an arg
                # Passing save arg based on backend was that we need to save only if
github datalad / datalad / datalad / cmdline / helpers.py View on Github external
elif class_ == GitRepo:
            type_ = "git"
        else:
            raise RuntimeError("Unknown class %s." % str(class_))

    while not ismount(dir_):  # TODO: always correct termination?
        if exists(opj(dir_, '.git')):
            # found git dir
            if class_ is None:
                # detect repo type:
                try:
                    return AnnexRepo(dir_, create=False)
                except RuntimeError as e:
                    pass
                try:
                    return GitRepo(dir_, create=False)
                except InvalidGitRepositoryError as e:
                    raise RuntimeError("No datalad repository found in %s" %
                                       abspath_)
            else:
                try:
                    return class_(dir_, create=False)
                except (RuntimeError, InvalidGitRepositoryError) as e:
                    raise RuntimeError("No %s repository found in %s." %
                                       (type_, abspath_))
        else:
            dir_ = normpath(opj(dir_, ".."))

    if class_ is not None:
        raise RuntimeError("No %s repository found in %s" % (type_, abspath_))
    else:
        raise RuntimeError("No datalad repository found in %s" % abspath_)
github datalad / datalad / datalad / utils.py View on Github external
def knows_annex(path):
    """Returns whether at a given path there is information about an annex

    It is just a thin wrapper around GitRepo.is_with_annex() classmethod
    which also checks for `path` to exist first.

    This includes actually present annexes, but also uninitialized ones, or
    even the presence of a remote annex branch.
    """
    from os.path import exists
    if not exists(path):
        lgr.debug("No annex: test path {0} doesn't exist".format(path))
        return False
    from datalad.support.gitrepo import GitRepo
    return GitRepo(path, init=False, create=False).is_with_annex()
github datalad / datalad / datalad / support / annexrepo.py View on Github external
def is_valid_repo(cls, path, allow_noninitialized=False):
        """Return True if given path points to an annex repository
        """
        # Note: default value for allow_noninitialized=False is important
        # for invalidating an instance via self._flyweight_invalid. If this is
        # changed, we also need to override _flyweight_invalid and explicitly
        # pass allow_noninitialized=False!

        initialized_annex = GitRepo.is_valid_repo(path) and \
            exists(opj(path, '.git', 'annex'))
        if allow_noninitialized:
            try:
                return initialized_annex \
                    or GitRepo(path, create=False, init=False).is_with_annex()
            except (NoSuchPathError, InvalidGitRepositoryError):
                return False
        else:
            return initialized_annex
github datalad / datalad / datalad / support / gitrepo.py View on Github external
def _fixup_submodule_dotgit_setup(ds, relativepath):
    """Implementation of our current of .git in a subdataset

    Each subdataset/module has its own .git directory where a standalone
    repository would have it. No gitdir files, no symlinks.
    """
    # move .git to superrepo's .git/modules, remove .git, create
    # .git-file
    path = opj(ds.path, relativepath)
    subds_dotgit = opj(path, ".git")
    src_dotgit = GitRepo.get_git_dir(path)

    if src_dotgit == '.git':
        # this is what we want
        return

    # first we want to remove any conflicting worktree setup
    # done by git to find the checkout at the mountpoint of the
    # submodule, if we keep that, any git command will fail
    # after we move .git
    GitRepo(path, init=False).config.unset(
        'core.worktree', where='local')
    # what we have here is some kind of reference, remove and
    # replace by the target
    os.remove(subds_dotgit)
    # make absolute
    src_dotgit = opj(path, src_dotgit)
github datalad / datalad / datalad / metadata / metadata.py View on Github external
#recursion_limit=recursion_limit,
                action='metadata',
                # uninstalled subdatasets could be queried via aggregated metadata
                # -> no 'error'
                unavailable_path_status='',
                nondataset_path_status='error',
                # we need to know when to look into aggregated data
                force_subds_discovery=True,
                force_parentds_discovery=True,
                return_type='generator',
                on_failure='ignore'):
            if ap.get('status', None):
                # this is done
                yield ap
                continue
            if ap.get('type', None) == 'dataset' and GitRepo.is_valid_repo(ap['path']):
                ap['process_content'] = True
            to_query = None
            if ap.get('state', None) == 'absent' or \
                    ap.get('type', 'dataset') != 'dataset':
                # this is a lonely absent dataset/file or content in a present dataset
                # -> query through parent
                # there must be a parent, otherwise this would be a non-dataset path
                # and would have errored during annotation
                to_query = ap['parentds']
            else:
                to_query = ap['path']
            if to_query:
                pcontent = content_by_ds.get(to_query, [])
                pcontent.append(ap)
                content_by_ds[to_query] = pcontent
github datalad / datalad / datalad / crawler / nodes / annex.py View on Github external
"""
        if path is None:
            path = realpath(curdir)
        # TODO: commented out to ease developing for now
        # self.repo = _call(AnnexRepo, path, **kwargs)
        # TODO: backend -- should be fetched from the config I guess... or should we
        # give that duty to the dataset initialization routine to change default backend?
        # Well -- different annexifiers might have different ideas for the backend, but
        # then those could be overriden via options

        if exists(path):
            if not exists(opj(path, '.git')):
                if (len(listdir(path))) and (not allow_dirty):
                    raise RuntimeError("Directory %s is not empty." % path)

        self.repo = (GitRepo if no_annex else AnnexRepo)(path, always_commit=False, **kwargs)

        git_remotes = self.repo.get_remotes()
        if special_remotes:
            if no_annex: # isinstance(self.repo, GitRepo):
                raise ValueError("Cannot have special remotes in a simple git repo")

            # TODO: move under AnnexRepo with proper testing etc
            repo_info_repos = [v for k, v in self.repo.repo_info().items()
                               if k.endswith(' repositories')]
            annex_remotes = {r['description']: r for r in sum(repo_info_repos, [])}

            for remote in special_remotes:
                if remote not in git_remotes:
                    if remote in annex_remotes:
                        # Already known - needs only enabling
                        lgr.info("Enabling existing special remote %s" % remote)