How to use the datalad.utils.assure_unicode function in datalad

To help you get started, we’ve selected a few datalad examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github datalad / datalad / datalad / customremotes / base.py View on Github external
def initiate(self):
        if self._initiated:
            return
        self._initiated = True
        d = opj(self.repopath, '.git', 'bin')
        if not exists(d):
            os.makedirs(d)

        suf = '-' + self.custom_remote_name.rstrip(':') if self.custom_remote_name else ''
        self._file = _file = opj(d, 'git-annex-remote-datalad' + suf)

        if exists(_file):
            lgr.debug("Commenting out previous entries")
            # comment out all the past entries
            with open(_file, 'rb') as f:
                entries = list(map(assure_unicode, f.readlines()))
            for i in range(len(self.HEADER.split(os.linesep)), len(entries)):
                e = entries[i]
                if e.startswith('recv ') or e.startswith('send '):
                    entries[i] = '#' + e
            with open(_file, 'wb') as f:
                f.write(u''.join(entries).encode('utf-8'))
            return  # nothing else to be done

        lgr.debug("Initiating protocoling."
                  "cd %s; vim %s"
                  % (realpath(self.repopath),
                     _file[len(self.repopath) + 1:]))
        with open(_file, 'a') as f:
            f.write(self.HEADER)
        os.chmod(_file, 0o755)
github datalad / datalad / datalad / distribution / add.py View on Github external
annex_opts=None,
            annex_add_opts=None,
            jobs=None):
        # parameter constraints:
        if not path:
            raise InsufficientArgumentsError(
                "insufficient information for adding: requires at least a path")
        refds_path = Interface.get_refds_path(dataset)
        common_report = dict(action='add', logger=lgr, refds=refds_path)

        if message and message_file:
            raise ValueError("Both a message and message file were specified")

        if message_file:
            with open(message_file, "rb") as mfh:
                message = assure_unicode(mfh.read())

        to_add = []
        subds_to_add = {}
        ds_to_annotate_from_recursion = {}
        got_nothing = True
        for ap in AnnotatePaths.__call__(
                path=path,
                dataset=dataset,
                # never recursion, need to handle manually below to be able to
                # discover untracked content
                recursive=False,
                action='add',
                # speed things up by using Git's modification detection, if there
                # is a repo with at least one commit
                modified='HEAD' \
                if dataset and \
github datalad / datalad / datalad / cmd.py View on Github external
def _process_one_line(self, out_type, proc, log_, log_is_callable,
                          expected=False, line=None, suf=None):
        if line is None:
            lgr.log(3, "Reading line from %s", out_type)
            line = {'stdout': proc.stdout, 'stderr': proc.stderr}[out_type].readline()
        else:
            lgr.log(3, "Processing provided line")
        if line and log_is_callable:
            # Let it be processed
            line = log_(assure_unicode(line))
            if line is not None:
                # we are working with binary type here
                line = assure_bytes(line)
        if line:
            if out_type == 'stdout':
                self._log_out(assure_unicode(line))
            elif out_type == 'stderr':
                self._log_err(line.decode('utf-8') if PY3 else line,
                              expected)
            else:  # pragma: no cover
                raise RuntimeError("must not get here")
            return (line + suf) if suf else line
        # it was output already directly but for code to work, return ""
        return binary_type()
github datalad / datalad-neuroimaging / datalad_neuroimaging / extractors / bids.py View on Github external
def yield_participant_info(bids):
    for bidsvars in bids.get_collections(
            level='dataset')[0].to_df().to_dict(orient='records'):
        props = dict(id=assure_unicode(bidsvars.pop('subject')))
        for p in bidsvars:
            # take away some ambiguity
            normk = assure_unicode(p).lower()
            hk = content_metakey_map.get(normk, normk)
            val = assure_unicode(bidsvars[p])
            if hk in ('sex', 'gender'):
                if hasattr(val, 'lower'):
                    val = val.lower()
                elif isinstance(val, float) and isnan(val):
                    # pybids reports 'n/a' is NaN
                    val = 'n/a'
                val = sex_label_map.get(val, val)
            if hk == 'suffix' and val == 'participants':
                # regression in PyBIDS 0.7.1, should be fixed in 0.8
                # https://github.com/bids-standard/pybids/issues/380
                # TODO: remove workaround whenever we depend on pybids >= 0.8
github datalad / datalad / datalad / core / local / run.py View on Github external
#
            # FIXME: This covers the predominant command-line case, but, for
            # Python API callers, it means values like ["./script with spaces"]
            # requires additional string-like escaping, which is inconsistent
            # with the handling of multi-item lists (and subprocess's
            # handling). Once we have a way to detect "running from Python API"
            # (discussed in gh-2986), update this.
            command = command[0]
        else:
            if command and command[0] == "--":
                # Strip disambiguation marker. Note: "running from Python API"
                # FIXME from below applies to this too.
                command = command[1:]
            command = " ".join(shlex_quote(c) for c in command)
    else:
        command = assure_unicode(command)
    return command
github datalad / datalad / datalad / metadata / search.py View on Github external
lgr.debug(
                        'Added %s on dataset %s',
                        single_or_plural(
                            'document',
                            'documents',
                            idx_size - old_idx_size,
                            include_count=True),
                        old_ds_rpath)
                log_progress(lgr.info, 'autofieldidxbuild',
                             'Indexed dataset at %s', old_ds_rpath,
                             update=1, increment=True)
                old_idx_size = idx_size
                old_ds_rpath = admin['path']
                admin['id'] = res.get('dsid', None)

            doc.update({k: assure_unicode(v) for k, v in admin.items()})
            lgr.debug("Adding document to search index: {}".format(doc))
            # inject into index
            idx.add_document(**doc)
            idx_size += 1

        if old_ds_rpath:
            lgr.debug(
                'Added %s on dataset %s',
                single_or_plural(
                    'document',
                    'documents',
                    idx_size - old_idx_size,
                    include_count=True),
                old_ds_rpath)

        lgr.debug("Committing index")
github datalad / datalad / datalad / cmd.py View on Github external
def run(self, cmd, env=None, *args, **kwargs):
        out, err = super(GitRunner, self).run(
            cmd, env=self.get_git_environ_adjusted(env), *args, **kwargs)
        # All communication here will be returned as unicode
        # TODO: do that instead within the super's run!
        return assure_unicode(out), assure_unicode(err)
github datalad / datalad-neuroimaging / datalad_neuroimaging / extractors / bids.py View on Github external
)
        meta = {
            self._key2stdkey.get(k, k): v
            for k, v in dsdesc_dict.items()
        }

        # TODO maybe normalize labels of standard licenses to definition URIs
        # perform mapping

        README_fname = opj(self.ds.path, 'README')
        if not meta.get('description') and exists(README_fname):
            # BIDS uses README to provide description, so if was not
            # explicitly provided to possibly override longer README, let's just
            # load README
            with open(README_fname, 'rb') as f:
                desc = assure_unicode(f.read())
            meta['description'] = desc.strip()

        # special case
        # Could be None which we can't strip so or ''
        bids_version = (meta.get('BIDSVersion', '') or '').strip()
        bids_defurl = 'http://bids.neuroimaging.io'
        if bids_version:
            bids_defurl += '/bids_spec{}.pdf'.format(bids_version)
        meta['conformsto'] = bids_defurl
        context['bids'] = {
            # not really a working URL, but BIDS doesn't provide term defs in
            # any accessible way
            '@id': '{}#'.format(bids_defurl),
            'description': 'ad-hoc vocabulary for the Brain Imaging Data Structure (BIDS) standard',
            'type': vocabulary_id,
        }
github datalad / datalad / datalad / support / archives.py View on Github external
and that leading directory will be removed.
    """
    if not exists(dir_):
        lgr.debug("Creating directory %s to extract archive into" % dir_)
        os.makedirs(dir_)

    with swallow_outputs() as cmo:
        archive = assure_bytes(archive)
        dir_ = assure_bytes(dir_)
        patoolib.util.check_existing_filename(archive)
        patoolib.util.check_existing_filename(dir_, onlyfiles=False)
        # Call protected one to avoid the checks on existence on unixified path
        outdir = unixify_path(dir_)
        if not PY2:
            # should be supplied in PY3 to avoid b''
            outdir = assure_unicode(outdir)
            archive = assure_unicode(archive)

        format_compression = patoolib.get_archive_format(archive)
        if format_compression == ('gzip', None):
            # Yarik fell into the trap of being lazy and not providing proper
            # support for .gz .xz etc "stream archivers" formats in handling
            # of archives. ATM out support for .gz relies on behavior of 7z while
            # extracting them and respecting possibly present .gz filename
            # header field.
            # See more https://github.com/datalad/datalad/pull/3176#issuecomment-466819861
            # TODO: provide proper handling of all those archives without
            # relying on any filename been stored in the header
            program = patoolib.find_archive_program(
                format_compression[0], 'extract')
            if basename(program) != '7z':
                raise MissingExternalDependency(