How to use the goodtables.pipeline.Batch function in goodtables

To help you get started, we’ve selected a few goodtables examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github frictionlessdata / goodtables-py / tests / test_batch.py View on Github external
def custom_sleep_time():
            batch = Batch(self.batch_csv, sleep=3, pipeline_options=self.pipeline_options)
            start = timer(); batch.run(); end = timer()
            return end - start
github frictionlessdata / goodtables-py / tests / test_batch.py View on Github external
def test_batch_from_csv(self):

        batch = Batch(self.batch_csv, pipeline_options=self.pipeline_options)

        self.assertEqual(len(batch.dataset), 4)
github frictionlessdata / goodtables-py / tests / test_batch.py View on Github external
def default_time():
            batch = Batch(self.batch_csv, pipeline_options=self.pipeline_options)
            start = timer(); batch.run(); end = timer()
            return end - start
github frictionlessdata / goodtables-py / tests / test_batch.py View on Github external
def test_bad_pipeline_post_task_raises(self):

        say_hi = 'Say Hi!'
        self.assertRaises(exceptions.InvalidHandlerError, Batch,
                          self.batch_csv, pipeline_post_task=say_hi)
github frictionlessdata / data-quality-cli / tests / tasks / test_aggregate.py View on Github external
def test_agregator_batch_run(self):
        """Test that Aggregator task updates run file after each batch"""

        config = self.config
        aggregator_task = tasks.Aggregator(config)

        def mokup_function(instance):
            aggregator_task.write_run()
        batch_options = config['goodtables']['arguments']['batch']
        batch_options['post_task'] = mokup_function
        batch_options['pipeline_options'] = config['goodtables']['arguments']['pipeline']
        batch = pipeline.Batch(aggregator_task.source_file, **batch_options)
        runs_before_run = self.read_file_contents(aggregator_task.run_file)
        batch.run()
        runs_after_run = self.read_file_contents(aggregator_task.run_file)

        self.assertGreater(len(runs_after_run), len(runs_before_run))
github frictionlessdata / goodtables-py / tests / test_batch.py View on Github external
def test_batch_with_pipeline_post_process_handler(self):

        def say_hi(pipeline):
            return 'Hi!'

        batch = Batch(self.batch_csv, pipeline_post_task=say_hi,
                      pipeline_options=self.pipeline_options)
        rv = batch.run()

        self.assertFalse(rv)
github frictionlessdata / data-quality-cli / data_quality / main.py View on Github external
assesser.run()
            deployer = tasks.Deployer(config)
            deployer.run()

    else:

        def batch_handler(instance):
            aggregator.write_run()
            assesser = tasks.PerformanceAssessor(config)
            assesser.run()

    post_tasks = {'post_task': batch_handler, 'pipeline_post_task': aggregator.run}
    config['goodtables']['arguments']['batch'].update(post_tasks)
    batch_options = config['goodtables']['arguments']['batch']
    batch_options['pipeline_options'] = config['goodtables']['arguments']['pipeline']
    batch = pipeline.Batch(source_filepath, **batch_options)
    batch.run()