Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
@functools.wraps(fun)
def wrapper(*args, **kwargs):
import luigi.configuration
orig_conf = luigi.configuration.LuigiConfigParser.instance()
new_conf = luigi.configuration.LuigiConfigParser()
luigi.configuration.LuigiConfigParser._instance = new_conf
orig_dict = {k: dict(orig_conf.items(k)) for k in orig_conf.sections()}
new_dict = self._make_dict(orig_dict)
for (section, settings) in six.iteritems(new_dict):
new_conf.add_section(section)
for (name, value) in six.iteritems(settings):
new_conf.set(section, name, value)
try:
return fun(*args, **kwargs)
finally:
luigi.configuration.LuigiConfigParser._instance = orig_conf
return wrapper
def get_all_params(cls):
"""
Compiles and returns all parameters for all :py:class:`Task`.
:return: a generator of tuples (TODO: we should make this more elegant)
"""
for task_name, task_cls in six.iteritems(cls._get_reg()):
if task_cls == cls.AMBIGUOUS_CLASS:
continue
for param_name, param_obj in task_cls.get_params():
yield task_name, (not task_cls.use_cmdline_section), param_name, param_obj
def _flush_batch_incr_counter(self):
"""
Increments any unflushed counter values.
"""
for key, count in six.iteritems(self._counter_dict):
if count == 0:
continue
args = list(key) + [count]
self._incr_counter(*args)
self._counter_dict[key] = 0
def _dump(self):
with self.no_unpicklable_properties():
pickle.dumps(self)
"""
unpicklable_properties = tuple(luigi.worker.TaskProcess.forward_reporter_attributes.values())
reserved_properties = {}
for property_name in unpicklable_properties:
if hasattr(self, property_name):
reserved_properties[property_name] = getattr(self, property_name)
setattr(self, property_name, 'placeholder_during_pickling')
yield
for property_name, value in six.iteritems(reserved_properties):
setattr(self, property_name, value)
def _email_body(self, fail_counts, disable_counts, scheduling_counts, fail_expls):
expls = {
(name, fail_count, disable_counts[name], scheduling_counts[name]): self._expl_body(fail_expls[name])
for name, fail_count in six.iteritems(fail_counts)
}
expl_groups = sorted(self._task_expl_groups(expls), key=self._expls_key)
body_lines = []
for tasks, msg in expl_groups:
body_lines.append(self._format_tasks(tasks))
body_lines.append(msg)
body = six.u('\n').join(filter(None, body_lines)).rstrip()
if self._email_format == 'html':
return six.u('<ul>\n{}\n</ul>').format(body)
else:
return body
def _get_s3_config(self, key=None, s3_cfg='s3'):
try:
config = dict(luigi.configuration.get_config().items(s3_cfg))
except NoSectionError:
return {}
# So what ports etc can be read without us having to specify all dtypes
for k, v in luigi.six.iteritems(config):
try:
config[k] = int(v)
except ValueError:
pass
if key:
return config.get(key)
return config
Makes a wildcard expression for the set, a bit readable, e.g. [1-5].
"""
chars = sorted(chars)
if len(chars) > 1 and ord(chars[-1]) - ord(chars[0]) == len(chars) - 1:
return '[%s-%s]' % (chars[0], chars[-1])
else:
return '[%s]' % ''.join(chars)
current = {glob: paths}
while True:
pos = list(current.keys())[0].find('[0-9]')
if pos == -1:
# no wildcard expressions left to specialize in the glob
return list(current.keys())
char_sets = {}
for g, p in six.iteritems(current):
char_sets[g] = sorted({path[pos] for path in p})
if sum(len(s) for s in char_sets.values()) > limit:
return [g.replace('[0-9]', digit_set_wildcard(char_sets[g]), 1) for g in current]
for g, s in six.iteritems(char_sets):
for c in s:
new_glob = g.replace('[0-9]', c, 1)
new_paths = list(filter(lambda p: p[pos] == c, current[g]))
current[new_glob] = new_paths
del current[g]
.. code-block:: python
>>> sorted(flatten({'a': 'foo', 'b': 'bar'}))
['bar', 'foo']
>>> sorted(flatten(['foo', ['bar', 'troll']]))
['bar', 'foo', 'troll']
>>> flatten('foo')
['foo']
>>> flatten(42)
[42]
"""
if struct is None:
return []
flat = []
if isinstance(struct, dict):
for _, result in six.iteritems(struct):
flat += flatten(result)
return flat
if isinstance(struct, six.string_types):
return [struct]
try:
# if iterable
iterator = iter(struct)
except TypeError:
return [struct]
for result in iterator:
flat += flatten(result)
return flat
# Add static files and directories
extra_files = get_extra_files(job.extra_files())
files = []
for src, dst in extra_files:
dst_tmp = '%s_%09d' % (dst.replace('/', '_'), random.randint(0, 999999999))
files += ['%s#%s' % (src, dst_tmp)]
# -files doesn't support subdirectories, so we need to create the dst_tmp -> dst manually
job.add_link(dst_tmp, dst)
if files:
arglist += ['-files', ','.join(files)]
jobconfs = job.jobconfs()
for k, v in six.iteritems(self.jobconfs):
jobconfs.append('%s=%s' % (k, v))
for conf in jobconfs:
arglist += ['-D', conf]
arglist += self.streaming_args
# Add additonal non-generic per-job streaming args
extra_streaming_args = job.extra_streaming_arguments()
for (arg, value) in extra_streaming_args:
if not arg.startswith('-'): # safety first
arg = '-' + arg
arglist += [arg, value]
arglist += ['-mapper', map_cmd]