How to use the mrjob.step.MRStep function in mrjob

To help you get started, we’ve selected a few mrjob examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Yelp / mrjob / tests / test_step.py View on Github external
def _test_explicit(self, m=False, c=False, r=False, **kwargs):
        s = MRStep(**kwargs)
        self.assertEqual(s.has_explicit_mapper, m)
        self.assertEqual(s.has_explicit_combiner, c)
        self.assertEqual(s.has_explicit_reducer, r)
github Yelp / mrjob / tests / test_job.py View on Github external
def test_mapper_combiner(self):
        self._assert_script_protocols(
            [MRStep(mapper=self._yield_none,
                    combiner=self._yield_none)],
            [dict(mapper=(PickleProtocol, JSONValueProtocol),
                  combiner=(JSONValueProtocol, JSONValueProtocol))])
github Yelp / mrjob / tests / test_step.py View on Github external
def test_render_reducer_pre_filter(self):
        self.assertEqual(
            MRStep(
                reducer=identity_reducer,
                reducer_pre_filter='cat').description(1),
            {
                'type': 'streaming',
                'reducer': {
                    'type': 'script',
                    'pre_filter': 'cat',
                },
github Yelp / mrjob / tests / mr_count_lines_by_filename.py View on Github external
def steps(self):
        return [
            MRStep(mapper=self.mapper, reducer=self.reducer),
            MRStep(mapper=self.mapper2)
        ]
github Yelp / mrjob / tests / test_job.py View on Github external
def test_mapper_combiner_reducer(self):
        self._assert_script_protocols(
            [MRStep(
                mapper=self._yield_none,
                combiner=self._yield_none,
                reducer=self._yield_none)],
            [dict(mapper=(PickleProtocol, JSONProtocol),
                  combiner=(JSONProtocol, JSONProtocol),
                  reducer=(JSONProtocol, JSONValueProtocol))])
github Yelp / mrjob / tests / spark / test_runner.py View on Github external
def steps(self):
                return [
                    MRStep(mapper=self.mapper),
                    MRStep(mapper=self.mapper, jobconf=dict(foo='bar')),
                    MRStep(mapper=self.mapper, jobconf=dict(foo='bar')),
                    MRStep(mapper=self.mapper, jobconf=dict(foo='baz')),
                ]
github Yelp / mrjob / mrjob / job.py View on Github external
def _combine_or_reduce_pairs(self, pairs, mrc, step_num=0):
        """Helper for :py:meth:`combine_pairs` and :py:meth:`reduce_pairs`."""
        step = self._get_step(step_num, MRStep)

        task = step[mrc]
        task_init = step[mrc + '_init']
        task_final = step[mrc + '_final']
        if task is None:
            raise ValueError('No %s in step %d' % (mrc, step_num))

        if task_init:
            for k, v in task_init() or ():
                yield k, v

        # group all values of the same key together, and pass to the reducer
        #
        # be careful to use generators for everything, to allow for
        # very large groupings of values
        for key, pairs_for_key in itertools.groupby(pairs, lambda k_v: k_v[0]):
github AmazaspShumik / MapReduce-Machine-Learning / Gaussian Mixture Model MapReduce / IterationGaussianMixtureMR.py View on Github external
def steps(self):
        return [MRStep(mapper_init = self.mapper_gmm_init,
                       mapper = self.mapper_gmm, 
                       mapper_final = self.mapper_final_gmm,
                       reducer = self.reducer_gmm)]
github alexcomu / hadoop-mapreduce / src / 02-advanced-mapreduce / 07_most_rated_movie.py View on Github external
def steps(self):
        return [
            MRStep(mapper=self.get_movies_rating,
                   reducer=self.reducer_movie_rating),
            MRStep(reducer=self.reducer_output)
        ]
github alexcomu / hadoop-mapreduce / src / 02-advanced-mapreduce / 09_most_popular_superhero_ver2.py View on Github external
def steps(self):
        return [
            MRStep(mapper=self.mapper_count_friends_per_line,
                   reducer=self.reducer_combine_friends),
            MRStep(mapper=self.mapper_prep_for_sort,
                   mapper_init=self.load_name_dictionary,
                   reducer=self.reducer_find_max_friends)
        ]