How to use the mrjob.conf.combine_lists function in mrjob

To help you get started, we’ve selected a few mrjob examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Yelp / mrjob / tests / test_conf.py View on Github external
def test_mix_lists_and_scalars(self):
        self.assertEqual(combine_lists([1, 2], 3, (4, 5), 6),
                         [1, 2, 3, 4, 5, 6])
github Yelp / mrjob / tests / test_conf.py View on Github external
def test_concatenation(self):
        self.assertEqual(combine_lists([1, 2], None, (3, 4)), [1, 2, 3, 4])
github Yelp / mrjob / mrjob / launch.py View on Github external
def _job_kwargs(self):
        """Keyword arguments to the runner class that can be specified
        by the job/launcher itself."""
        # use the most basic combiners; leave magic like resolving paths
        # and blanking out jobconf values to the runner
        return dict(
            # command-line has the final say on jobconf and libjars
            jobconf=combine_dicts(
                self.jobconf(), self.options.jobconf),
            libjars=combine_lists(
                self.libjars(), self.options.libjars),
            partitioner=self.partitioner(),
            sort_values=self.sort_values(),
            # TODO: should probably put self.options last below for consistency
            upload_archives=combine_lists(
                self.options.upload_archives, self.archives()),
            upload_dirs=combine_lists(
                self.options.upload_dirs, self.dirs()),
            upload_files=combine_lists(
                self.options.upload_files, self.files()),
        )
github Yelp / mrjob / mrjob / launch.py View on Github external
"""Keyword arguments to the runner class that can be specified
        by the job/launcher itself."""
        # use the most basic combiners; leave magic like resolving paths
        # and blanking out jobconf values to the runner
        return dict(
            # command-line has the final say on jobconf and libjars
            jobconf=combine_dicts(
                self.jobconf(), self.options.jobconf),
            libjars=combine_lists(
                self.libjars(), self.options.libjars),
            partitioner=self.partitioner(),
            sort_values=self.sort_values(),
            # TODO: should probably put self.options last below for consistency
            upload_archives=combine_lists(
                self.options.upload_archives, self.archives()),
            upload_dirs=combine_lists(
                self.options.upload_dirs, self.dirs()),
            upload_files=combine_lists(
                self.options.upload_files, self.files()),
        )
github Yelp / mrjob / mrjob / launch.py View on Github external
# use the most basic combiners; leave magic like resolving paths
        # and blanking out jobconf values to the runner
        return dict(
            # command-line has the final say on jobconf and libjars
            jobconf=combine_dicts(
                self.jobconf(), self.options.jobconf),
            libjars=combine_lists(
                self.libjars(), self.options.libjars),
            partitioner=self.partitioner(),
            sort_values=self.sort_values(),
            # TODO: should probably put self.options last below for consistency
            upload_archives=combine_lists(
                self.options.upload_archives, self.archives()),
            upload_dirs=combine_lists(
                self.options.upload_dirs, self.dirs()),
            upload_files=combine_lists(
                self.options.upload_files, self.files()),
        )
github Yelp / mrjob / mrjob / job.py View on Github external
def _job_kwargs(self):
        """Keyword arguments to the runner class that can be specified
        by the job/launcher itself."""
        # use the most basic combiners; leave magic like resolving paths
        # and blanking out jobconf values to the runner
        return dict(
            # command-line has the final say on jobconf and libjars
            jobconf=combine_dicts(
                self.jobconf(), self.options.jobconf),
            libjars=combine_lists(
                self.libjars(), self.options.libjars),
            partitioner=self.partitioner(),
            sort_values=self.sort_values(),
            # TODO: should probably put self.options last below for consistency
            upload_archives=combine_lists(
                self.options.upload_archives, self.archives()),
            upload_dirs=combine_lists(
                self.options.upload_dirs, self.dirs()),
            upload_files=combine_lists(
                self.options.upload_files, self.files()),
        )
github Yelp / mrjob / mrjob / options.py View on Github external
#   'append' and None otherwise.
#
# the list of which options apply to which runner is in the runner class
# itself (e.g. EMRJobRunner.OPT_NAMES)
_RUNNER_OPTS = dict(
    additional_emr_info=dict(
        cloud_role='launch',
        switches=[
            (['--additional-emr-info'], dict(
                help='A JSON string for selecting additional features on EMR',
            )),
        ],
    ),
    applications=dict(
        cloud_role='launch',
        combiner=combine_lists,
        switches=[
            (['--applications', '--application'], dict(
                action=_AppendCommaSeparatedItemsAction,
                help=('Additional applications to run on 4.x and 5.x'
                      ' AMIs, separated by commas (e.g.'
                      ' "Ganglia,Spark")'),
            )),
        ],
    ),
    aws_access_key_id=dict(
        cloud_role='connect',
    ),
    aws_secret_access_key=dict(
        cloud_role='connect',
    ),
    aws_session_token=dict(
github Yelp / mrjob / mrjob / launch.py View on Github external
def _job_kwargs(self):
        """Keyword arguments to the runner class that can be specified
        by the job/launcher itself."""
        # use the most basic combiners; leave magic like resolving paths
        # and blanking out jobconf values to the runner
        return dict(
            # command-line has the final say on jobconf and libjars
            jobconf=combine_dicts(
                self.jobconf(), self.options.jobconf),
            libjars=combine_lists(
                self.libjars(), self.options.libjars),
            partitioner=self.partitioner(),
            sort_values=self.sort_values(),
            # TODO: should probably put self.options last below for consistency
            upload_archives=combine_lists(
                self.options.upload_archives, self.archives()),
            upload_dirs=combine_lists(
                self.options.upload_dirs, self.dirs()),
            upload_files=combine_lists(
                self.options.upload_files, self.files()),
        )