How to use the mrjob.step.StepFailedException function in mrjob

To help you get started, we’ve selected a few mrjob examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Yelp / mrjob / tests / test_step.py View on Github external
def test_step_num(self):
        ex = StepFailedException(step_num=0)
        self.assertEqual(str(ex), 'Step 1 failed')
        self.assertEqual(repr(ex), 'StepFailedException(step_num=0)')
github Yelp / mrjob / tests / test_hadoop.py View on Github external
def test_no_such_file(self):
        missing_hadoop_bin = os.path.join(self.tmp_dir, 'no-hadoop-here')

        job = MRTwoStepJob(['-r', 'hadoop'])
        job.sandbox()

        with job.make_runner() as runner:
            self.patch_args_for_step(runner, missing_hadoop_bin)
            self.assertRaises(StepFailedException, runner.run)
github Yelp / mrjob / tests / spark / test_runner.py View on Github external
def test_spark_job_failure(self):
        job = MRSparKaboom(['-r', 'spark'])
        job.sandbox(stdin=BytesIO(b'line\n'))

        with job.make_runner() as runner:
            self.assertRaises(StepFailedException, runner.run)
github Yelp / mrjob / tests / test_step.py View on Github external
def test_num_steps_with_no_step_num(self):
        ex = StepFailedException(num_steps=4)
        self.assertEqual(str(ex), 'Step failed')
        self.assertEqual(repr(ex), 'StepFailedException(num_steps=4)')
github Yelp / mrjob / tests / test_emr_pooling.py View on Github external
def test_dont_destroy_other_pooled_cluster_on_failure(self):
        # Issue 242: job failure shouldn't kill the pooled clusters
        _, cluster_id = self.make_pooled_cluster()

        self.mock_emr_failures = set([(cluster_id, 0)])

        mr_job = MRTwoStepJob(['-r', 'emr', '-v',
                               '--pool-clusters'])
        mr_job.sandbox()

        self.mock_emr_failures = set([('j-MOCKCLUSTER0', 0)])

        with mr_job.make_runner() as runner:
            self.assertIsInstance(runner, EMRJobRunner)
            self.prepare_runner_for_ssh(runner)
            self.assertRaises(StepFailedException, runner.run)

            self.assertEqual(runner.get_cluster_id(), cluster_id)

            for _ in range(10):
                self.simulate_emr_progress(runner.get_cluster_id())

            cluster = runner._describe_cluster()
            self.assertEqual(cluster['Status']['State'], 'WAITING')

        # job shouldn't get terminated by cleanup
        for _ in range(10):
            self.simulate_emr_progress(runner.get_cluster_id())

        cluster = runner._describe_cluster()
        self.assertEqual(cluster['Status']['State'], 'WAITING')
github Yelp / mrjob / tests / examples / test_mr_spark_most_used_word.py View on Github external
def test_empty(self):
        # this doesn't work on the inline runner because
        # Spark doesn't have a working dir to upload stop_words.txt
        # to. See below for what does and doesn't work in inline
        # runner
        job = MRSparkMostUsedWord(['-r', 'local'])
        job.sandbox()

        with job.make_runner() as runner:
            # still doesn't work because you can't run max() on no records
            self.assertRaises(StepFailedException, runner.run)
github Yelp / mrjob / tests / test_bin.py View on Github external
def test_non_oserror_exception(self):
        self.start(patch('os.execvpe', side_effect=KeyboardInterrupt))

        job = MRSparkWordcount(['-r', 'local'])
        job.sandbox()

        with job.make_runner() as runner:
            self.assertRaises(StepFailedException, runner.run)
github Yelp / mrjob / mrjob / launch.py View on Github external
Called from :py:meth:`run`. You'd probably only want to call this
        directly from automated tests.
        """
        # self.stderr is strictly binary, need to wrap it so it's possible
        # to log to it in Python 3
        log_stream = codecs.getwriter('utf_8')(self.stderr)

        self.set_up_logging(quiet=self.options.quiet,
                            verbose=self.options.verbose,
                            stream=log_stream)

        with self.make_runner() as runner:
            try:
                runner.run()
            except StepFailedException as e:
                # no need for a runner stacktrace if step failed; runners will
                # log more useful information anyway
                log.error(str(e))
                sys.exit(1)

            if self._should_cat_output():
                for chunk in runner.cat_output():
                    self.stdout.write(chunk)
                self.stdout.flush()
github Yelp / mrjob / mrjob / hadoop.py View on Github external
if 'output_dir' not in step_interpretation:
                step_interpretation['output_dir'] = (
                    self._step_output_uri(step_num))

            log_interpretation['step'] = step_interpretation

            self._log_counters(log_interpretation, step_num)

            if returncode:
                error = self._pick_error(log_interpretation, step_type)
                if error:
                    _log_probable_cause_of_failure(log, error)

                # use CalledProcessError's well-known message format
                reason = str(CalledProcessError(returncode, step_args))
                raise StepFailedException(
                    reason=reason, step_num=step_num,
                    num_steps=self._num_steps())
github Yelp / mrjob / mrjob / spark / runner.py View on Github external
log.info(_format_counters(
                        counter_dict,
                        desc=('Counters for step %d' % desc_num)))

        # for non-streaming steps, there are no counters.
        # pad self._counters to match number of steps
        while len(self._counters) < (last_step_num or step_num) + 1:
            self._counters.append({})

        if returncode:
            error = _pick_error(dict(step=step_interpretation))
            if error:
                _log_probable_cause_of_failure(log, error)

            reason = str(CalledProcessError(returncode, spark_submit_args))
            raise StepFailedException(
                reason=reason, step_num=step_num, last_step_num=last_step_num,
                num_steps=self._num_steps())