How to use the datalad.utils.assure_list function in datalad

To help you get started, we’ve selected a few datalad examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github datalad / datalad / datalad / distribution / siblings.py View on Github external
if publish_depends:
            if depvar in ds.config:
                # config vars are incremental, so make sure we start from
                # scratch
                ds.config.unset(depvar, where='local', reload=False)
            for d in assure_list(publish_depends):
                lgr.info(
                    'Configure additional publication dependency on "%s"',
                    d)
                ds.config.add(depvar, d, where='local', reload=False)
            ds.config.reload()

        if publish_by_default:
            if dfltvar in ds.config:
                ds.config.unset(dfltvar, where='local', reload=False)
            for refspec in assure_list(publish_by_default):
                lgr.info(
                    'Configure additional default publication refspec "%s"',
                    refspec)
                ds.config.add(dfltvar, refspec, 'local')
            ds.config.reload()

        assert isinstance(ds.repo, GitRepo)  # just against silly code
        if isinstance(ds.repo, AnnexRepo):
            # we need to check if added sibling an annex, and try to enable it
            # another part of the fix for #463 and #432
            try:
                exc = None
                if not ds.config.obtain(
                        'remote.{}.annex-ignore'.format(name),
                        default=False,
                        valtype=EnsureBool(),
github datalad / datalad / datalad / interface / run_procedure.py View on Github external
within ~/.gitconfig or your local repository's .git/config.
    So, local definitions take precedence over remote ones and more specific
    ones over more general ones.

    Returns
    -------
    tuple
      path, name, format string, help message
    """

    ds = ds if isinstance(ds, Dataset) else Dataset(ds) if ds else None

    # 1. check system and user account for procedure
    for loc in (cfg.obtain('datalad.locations.user-procedures'),
                cfg.obtain('datalad.locations.system-procedures')):
        for dir in assure_list(loc):
            for m, n in _get_file_match(dir, name):
                yield (m, n,) + _get_proc_config(n)
    # 2. check dataset for procedure
    if ds is not None and ds.is_installed():
        # could be more than one
        dirs = assure_list(
                ds.config.obtain('datalad.locations.dataset-procedures'))
        for dir in dirs:
            # TODO `get` dirs if necessary
            for m, n in _get_file_match(op.join(ds.path, dir), name):
                yield (m, n,) + _get_proc_config(n, ds=ds)
        # 2.1. check subdatasets recursively
        for subds in ds.subdatasets(return_type='generator',
                                    result_xfm='datasets'):
            for m, n, f, h in _get_procedure_implementation(name=name, ds=subds):
                yield m, n, f, h
github datalad / datalad / datalad / support / annexrepo.py View on Github external
'description': 'web',
                  'here': False,
                  'urls': ['http://127.0.0.1:43442/about.txt', 'http://example.com/someurl']
                }}
        """
        if batch:
            lgr.warning("TODO: --batch mode for whereis.  Operating serially")

        OUTPUTS = {'descriptions', 'uuids', 'full'}
        if output not in OUTPUTS:
            raise ValueError(
                "Unknown value output=%r. Known are %s"
                % (output, ', '.join(map(repr, OUTPUTS)))
            )

        options = assure_list(options, copy=True)
        options += ["--key"] if key else []

        json_objects = self._run_annex_command_json('whereis', args=options + files)
        if output in {'descriptions', 'uuids'}:
            return [
                [remote.get(output[:-1]) for remote in j.get('whereis')]
                if j.get('success') else []
                for j in json_objects]
        elif output == 'full':
            # TODO: we might want to optimize storage since many remotes entries will be the
            # same so we could just reuse them instead of brewing copies
            return {j['key' if (key or '--all' in options) else 'file']:
                        self._whereis_json_to_dict(j)
                    for j in json_objects
                    if not j.get('key').endswith('.this-is-a-test-key')}
github datalad / datalad / datalad / plugin / add_readme.py View on Github external
# flatten possibly existing multiple metadata sources
            for src in dsinfo['metadata']:
                if src.startswith('@'):
                    # not a source
                    continue
                meta.update(dsinfo['metadata'][src])

        metainfo = ''
        for label, content in (
                ('', meta.get('description', meta.get('shortdescription', ''))),
                ('Author{}'.format('s' if isinstance(meta.get('author', None), list) else ''),
                    u'\n'.join([u'- {}'.format(a) for a in assure_list(meta.get('author', []))])),
                ('Homepage', meta.get('homepage', '')),
                ('Reference', meta.get('citation', '')),
                ('License', meta.get('license', '')),
                ('Keywords', u', '.join([u'`{}`'.format(k) for k in assure_list(meta.get('tag', []))])),
                ('Funding', meta.get('fundedby', '')),
                ):
            if label and content:
                metainfo += u'\n\n### {}\n\n{}'.format(label, content)
            elif content:
                metainfo += u'\n\n{}'.format(content)

        for key in 'title', 'name', 'shortdescription':
            if 'title' in meta:
                break
            if key in meta:
                meta['title'] = meta[key]

        default_content=u"""\
# {title}{metainfo}
github datalad / datalad / datalad / metadata / metadata.py View on Github external
# Ugly? Jep: #2055
        content_info = zip(paths, ds.repo.file_has_content(paths), ds.repo.is_under_annex(paths))
        paths = [p for p, c, a in content_info if not a or c]
        nocontent = len(fullpathlist) - len(paths)
        if nocontent:
            # TODO better fail, or support incremental and label this file as no present
            lgr.warning(
                '{} files have no content present, '
                'some extractors will not operate on {}'.format(
                    nocontent,
                    'them' if nocontent > 10
                           else [p for p, c, a in content_info if not c and a])
            )

    # pull out potential metadata field blacklist config settings
    blacklist = [re.compile(bl) for bl in assure_list(ds.config.obtain(
        'datalad.metadata.aggregate-ignore-fields',
        default=[]))]
    # enforce size limits
    max_fieldsize = ds.config.obtain('datalad.metadata.maxfieldsize')
    # keep local, who knows what some extractors might pull in
    from pkg_resources import iter_entry_points  # delayed heavy import
    extractors = {ep.name: ep for ep in iter_entry_points('datalad.metadata.extractors')}

    # we said that we want to fail, rather then just moan about less metadata
    # Do an early check if all extractors are available so not to wait hours
    # and then crash for some obvious reason
    absent_extractors = [t for t in types if t not in extractors]
    if absent_extractors:
        raise ValueError(
            '%d enabled metadata extractor%s not available in this installation'
            ': %s' %
github datalad / datalad / datalad / support / gitrepo.py View on Github external
def add_(self, files, git=True, git_options=None, update=False):
        """Like `add`, but returns a generator"""
        # TODO: git_options is used as options for the git-add here,
        # instead of options to the git executable => rename for consistency

        if not git:
            lgr.warning(
                'GitRepo.add() called with git=%s, this should not happen',
                git)
            git = True

        # there is no other way then to collect all files into a list
        # at this point, because we need to pass them at once to a single
        # `git add` call
        files = [_normalize_path(self.path, f) for f in assure_list(files) if f]

        if not (files or git_options or update):
            # wondering why just a warning? in cmdline this is also not an error
            lgr.warning("add was called with empty file list and no options.")
            return

        try:
            # without --verbose git 2.9.3  add does not return anything
            add_out = self._git_custom_command(
                files,
                # Set annex.largefiles to prevent storing files in annex when
                # GitRepo() is instantiated with a v6+ annex repo.
                ['git', '-c', 'annex.largefiles=nothing', 'add'] +
                assure_list(git_options) +
                to_options(update=update) + ['--verbose']
            )
github datalad / datalad / datalad / interface / run_procedure.py View on Github external
tuple
      path, name, format string, help message
    """

    ds = ds if isinstance(ds, Dataset) else Dataset(ds) if ds else None

    # 1. check system and user account for procedure
    for loc in (cfg.obtain('datalad.locations.user-procedures'),
                cfg.obtain('datalad.locations.system-procedures')):
        for dir in assure_list(loc):
            for m, n in _get_file_match(dir, name):
                yield (m, n,) + _get_proc_config(n)
    # 2. check dataset for procedure
    if ds is not None and ds.is_installed():
        # could be more than one
        dirs = assure_list(
                ds.config.obtain('datalad.locations.dataset-procedures'))
        for dir in dirs:
            # TODO `get` dirs if necessary
            for m, n in _get_file_match(op.join(ds.path, dir), name):
                yield (m, n,) + _get_proc_config(n, ds=ds)
        # 2.1. check subdatasets recursively
        for subds in ds.subdatasets(return_type='generator',
                                    result_xfm='datasets'):
            for m, n, f, h in _get_procedure_implementation(name=name, ds=subds):
                yield m, n, f, h

    # 3. check extensions for procedure
    # delay heavy import until here
    from pkg_resources import iter_entry_points
    from pkg_resources import resource_isdir
    from pkg_resources import resource_filename
github datalad / datalad-neuroimaging / datalad_neuroimaging / bids2scidata.py View on Github external
if not exists(output_directory):
        lgr.info(
            "creating output directory at '{}'".format(output_directory))
        os.makedirs(output_directory)

    # prep for assay table info
    protocols = OrderedDict()
    for prop in assay_props:
        info[prop] = []

    # pull out essential metadata bits about the dataset itself
    # for study description)
    dsbidsmeta = getprop(dsmeta, ['metadata', 'bids'], {})
    info['name'] = dsbidsmeta.get('shortdescription', dsbidsmeta.get('name', 'TODO'))
    info['author'] = '\t'.join(assure_list(dsbidsmeta.get('author', [])))
    info['keywords'] = '\t'.join(assure_list(dsbidsmeta.get('tag', [])))
    # generate: s_study.txt
    study_df = _get_study_df(dsmeta)
    if study_df.empty:
        # no samples, no assays, no metadataset
        return None

    _gather_protocol_parameters_from_df(study_df, protocols)
    _store_beautiful_table(
        study_df,
        output_directory,
        "s_study.txt")
    info['studytab_filename'] = 's_study.txt'

    deface_df = None
    # all imaging modalities recognized in BIDS
    #TODO maybe fold 'defacemask' into each suffix as a derivative
github datalad / datalad / datalad / support / annexrepo.py View on Github external
('{}-={}', remove)):
            if d:
                spec.extend(_genspec(expr, d))
        # prefix all with '-s' and extend arg list
        args.extend(j for i in zip(['-s'] * len(spec), spec) for j in i)
        if purge:
            # and all '-r' args
            args.extend(j for i in zip(['-r'] * len(purge), purge)
                        for j in i)
        if not args:
            return

        if recursive:
            args.append('--force')
        # append actual file path arguments
        args.extend(assure_list(files))

        # XXX do we need the return values for anything?
        self._run_annex_command_json(
            'metadata',
            args)
github datalad / datalad / datalad / metadata / search.py View on Github external
def get_query(self, query):
        query = assure_list(query)
        simple_fieldspec = re.compile(r"(?P\S*?):(?P.*)")
        quoted_fieldspec = re.compile(r"'(?P[^']+?)':(?P.*)")
        query_rec_matches = [
            simple_fieldspec.match(q) or
            quoted_fieldspec.match(q) or
            q
            for q in query]
        query_group_dicts_only = [
            q.groupdict() for q in query_rec_matches if hasattr(q, 'groupdict')
        ]
        self._queried_keys = [
            qgd['field']
            for qgd in query_group_dicts_only
            if ('field' in qgd and qgd['field'])
        ]
        if len(query_group_dicts_only) != len(query_rec_matches):