How to use the mrjob.conf.combine_dicts function in mrjob

To help you get started, we’ve selected a few mrjob examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Yelp / mrjob / tests / test_conf.py View on Github external
def test_empty(self):
        self.assertEqual(combine_dicts(), {})
github Yelp / mrjob / tests / mock_boto3 / util.py View on Github external
def __iter__(self):
        result = self._make_request()

        values = result[self.result_key]

        for page_start in range(0, len(values), self.page_size):
            page = values[page_start:page_start + self.page_size]
            yield combine_dicts(result, {self.result_key: page})
github Yelp / mrjob / tests / test_hadoop.py View on Github external
def test_spark_step(self):
        job = MRNullSpark(['-r', 'hadoop', '--cmdenv', 'FOO=bar'])
        job.sandbox()

        with job.make_runner() as runner:
            self.assertEqual(
                runner._env_for_step(0),
                combine_dicts(os.environ,
                              dict(FOO='bar', PYSPARK_PYTHON=PYTHON_BIN))
            )
github Yelp / mrjob / tests / test_conf.py View on Github external
def test_skip_None(self):
        self.assertEqual(
            combine_dicts(None, {'USER': 'dave'}, None,
                          {'TERM': 'xterm'}, None),
            {'USER': 'dave', 'TERM': 'xterm'})
github Yelp / mrjob / mrjob / launch.py View on Github external
def _job_kwargs(self):
        """Keyword arguments to the runner class that can be specified
        by the job/launcher itself."""
        # use the most basic combiners; leave magic like resolving paths
        # and blanking out jobconf values to the runner
        return dict(
            # command-line has the final say on jobconf and libjars
            jobconf=combine_dicts(
                self.jobconf(), self.options.jobconf),
            libjars=combine_lists(
                self.libjars(), self.options.libjars),
            partitioner=self.partitioner(),
            sort_values=self.sort_values(),
            # TODO: should probably put self.options last below for consistency
            upload_archives=combine_lists(
                self.options.upload_archives, self.archives()),
            upload_dirs=combine_lists(
                self.options.upload_dirs, self.dirs()),
            upload_files=combine_lists(
                self.options.upload_files, self.files()),
        )
github Yelp / mrjob / mrjob / sim.py View on Github external
def _opt_combiners(self):
        """Combine *cmdenv* with :py:func:`~mrjob.conf.combine_local_envs`"""
        return combine_dicts(
            super(SimMRJobRunner, self)._opt_combiners(),
            dict(cmdenv=combine_local_envs),
        )
github Yelp / mrjob / mrjob / launch.py View on Github external
def _runner_kwargs(self):
        # just use combine_dicts() and not combine_confs(); leave the
        # magic to the runner
        return combine_dicts(
            self._non_option_kwargs(),
            # don't screen out irrelevant opts (see #1898)
            self._kwargs_from_switches(set(_RUNNER_OPTS)),
            self._job_kwargs(),
        )
github Yelp / mrjob / mrjob / options.py View on Github external
' many megabytes. Default is 100 MiB. Set to 0 to'
                      ' disable multipart uploading entirely.'),
                type=float,
            )),
        ],
    ),
    cluster_id=dict(
        switches=[
            (['--cluster-id'], dict(
                help='ID of an existing cluster to run our job on',
            )),
        ],
    ),
    cluster_properties=dict(
        cloud_role='launch',
        combiner=combine_dicts,
        switches=[
            (['--cluster-property'], dict(
                action=_KeyValueAction,
                help=('Properties to set in Hadoop config files on Dataproc.'
                      'Args take the form file_prefix:property=value.'
                      ' You can use --cluster-property multiple times.'
                      ' For more info, see'
                      ' https://cloud.google.com/dataproc/docs/concepts'
                      '/configuring-clusters/cluster-properties'),
            )),
        ],
    ),
    cmdenv=dict(
        combiner=combine_envs,
        switches=[
            (['--cmdenv'], dict(