How to use the luigi.six.iteritems function in luigi

To help you get started, we’ve selected a few luigi examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github spotify / luigi / test / helpers.py View on Github external
        @functools.wraps(fun)
        def wrapper(*args, **kwargs):
            import luigi.configuration
            orig_conf = luigi.configuration.LuigiConfigParser.instance()
            new_conf = luigi.configuration.LuigiConfigParser()
            luigi.configuration.LuigiConfigParser._instance = new_conf
            orig_dict = {k: dict(orig_conf.items(k)) for k in orig_conf.sections()}
            new_dict = self._make_dict(orig_dict)
            for (section, settings) in six.iteritems(new_dict):
                new_conf.add_section(section)
                for (name, value) in six.iteritems(settings):
                    new_conf.set(section, name, value)
            try:
                return fun(*args, **kwargs)
            finally:
                luigi.configuration.LuigiConfigParser._instance = orig_conf
        return wrapper
github spotify / luigi / luigi / task_register.py View on Github external
def get_all_params(cls):
        """
        Compiles and returns all parameters for all :py:class:`Task`.

        :return: a generator of tuples (TODO: we should make this more elegant)
        """
        for task_name, task_cls in six.iteritems(cls._get_reg()):
            if task_cls == cls.AMBIGUOUS_CLASS:
                continue
            for param_name, param_obj in task_cls.get_params():
                yield task_name, (not task_cls.use_cmdline_section), param_name, param_obj
github spotify / luigi / luigi / contrib / hadoop.py View on Github external
def _flush_batch_incr_counter(self):
        """
        Increments any unflushed counter values.
        """
        for key, count in six.iteritems(self._counter_dict):
            if count == 0:
                continue
            args = list(key) + [count]
            self._incr_counter(*args)
            self._counter_dict[key] = 0
github spotify / luigi / luigi / task.py View on Github external
def _dump(self):
                    with self.no_unpicklable_properties():
                        pickle.dumps(self)

        """
        unpicklable_properties = tuple(luigi.worker.TaskProcess.forward_reporter_attributes.values())
        reserved_properties = {}
        for property_name in unpicklable_properties:
            if hasattr(self, property_name):
                reserved_properties[property_name] = getattr(self, property_name)
                setattr(self, property_name, 'placeholder_during_pickling')

        yield

        for property_name, value in six.iteritems(reserved_properties):
            setattr(self, property_name, value)
github spotify / luigi / luigi / batch_notifier.py View on Github external
def _email_body(self, fail_counts, disable_counts, scheduling_counts, fail_expls):
        expls = {
            (name, fail_count, disable_counts[name], scheduling_counts[name]): self._expl_body(fail_expls[name])
            for name, fail_count in six.iteritems(fail_counts)
        }
        expl_groups = sorted(self._task_expl_groups(expls), key=self._expls_key)
        body_lines = []
        for tasks, msg in expl_groups:
            body_lines.append(self._format_tasks(tasks))
            body_lines.append(msg)
        body = six.u('\n').join(filter(None, body_lines)).rstrip()
        if self._email_format == 'html':
            return six.u('<ul>\n{}\n</ul>').format(body)
        else:
            return body
github groupon / luigi-warehouse / luigi_warehouse / sources.py View on Github external
def _get_s3_config(self, key=None, s3_cfg='s3'):
        try:
            config = dict(luigi.configuration.get_config().items(s3_cfg))
        except NoSectionError:
            return {}
        # So what ports etc can be read without us having to specify all dtypes
        for k, v in luigi.six.iteritems(config):
            try:
                config[k] = int(v)
            except ValueError:
                pass
        if key:
            return config.get(key)
        return config
github spotify / luigi / luigi / tools / range.py View on Github external
Makes a wildcard expression for the set, a bit readable, e.g. [1-5].
        """
        chars = sorted(chars)
        if len(chars) > 1 and ord(chars[-1]) - ord(chars[0]) == len(chars) - 1:
            return '[%s-%s]' % (chars[0], chars[-1])
        else:
            return '[%s]' % ''.join(chars)

    current = {glob: paths}
    while True:
        pos = list(current.keys())[0].find('[0-9]')
        if pos == -1:
            # no wildcard expressions left to specialize in the glob
            return list(current.keys())
        char_sets = {}
        for g, p in six.iteritems(current):
            char_sets[g] = sorted({path[pos] for path in p})
        if sum(len(s) for s in char_sets.values()) > limit:
            return [g.replace('[0-9]', digit_set_wildcard(char_sets[g]), 1) for g in current]
        for g, s in six.iteritems(char_sets):
            for c in s:
                new_glob = g.replace('[0-9]', c, 1)
                new_paths = list(filter(lambda p: p[pos] == c, current[g]))
                current[new_glob] = new_paths
            del current[g]
github spotify / luigi / luigi / task.py View on Github external
.. code-block:: python

        >>> sorted(flatten({'a': 'foo', 'b': 'bar'}))
        ['bar', 'foo']
        >>> sorted(flatten(['foo', ['bar', 'troll']]))
        ['bar', 'foo', 'troll']
        >>> flatten('foo')
        ['foo']
        >>> flatten(42)
        [42]
    """
    if struct is None:
        return []
    flat = []
    if isinstance(struct, dict):
        for _, result in six.iteritems(struct):
            flat += flatten(result)
        return flat
    if isinstance(struct, six.string_types):
        return [struct]

    try:
        # if iterable
        iterator = iter(struct)
    except TypeError:
        return [struct]

    for result in iterator:
        flat += flatten(result)
    return flat
github spotify / luigi / luigi / contrib / hadoop.py View on Github external
# Add static files and directories
        extra_files = get_extra_files(job.extra_files())

        files = []
        for src, dst in extra_files:
            dst_tmp = '%s_%09d' % (dst.replace('/', '_'), random.randint(0, 999999999))
            files += ['%s#%s' % (src, dst_tmp)]
            # -files doesn't support subdirectories, so we need to create the dst_tmp -> dst manually
            job.add_link(dst_tmp, dst)

        if files:
            arglist += ['-files', ','.join(files)]

        jobconfs = job.jobconfs()

        for k, v in six.iteritems(self.jobconfs):
            jobconfs.append('%s=%s' % (k, v))

        for conf in jobconfs:
            arglist += ['-D', conf]

        arglist += self.streaming_args

        # Add additonal non-generic  per-job streaming args
        extra_streaming_args = job.extra_streaming_arguments()
        for (arg, value) in extra_streaming_args:
            if not arg.startswith('-'):  # safety first
                arg = '-' + arg
            arglist += [arg, value]

        arglist += ['-mapper', map_cmd]