How to use the goodtables.pipeline.Pipeline function in goodtables

To help you get started, we’ve selected a few goodtables examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github frictionlessdata / goodtables-py / tests / test_structure.py View on Github external
def test_pipeline_report_limit_in_range(self):

        filepath = os.path.join(self.data_dir, 'report_limit_structure.csv')
        options = {}
        validator = Pipeline(filepath, processors=('structure',),
                             report_limit=1, options=options)
        result, report = validator.run()

        self.assertEqual(len(report.generate()['results']), 1)
github frictionlessdata / goodtables-py / tests / test_jungle.py View on Github external
def test_messytables_source_two(self):

        data = 'https://raw.githubusercontent.com/okfn/messytables/master/horror/utf-16le_encoded.csv'
        pipeline = Pipeline(data)
        result, report = pipeline.run()

        self.assertTrue(pipeline.data)
github frictionlessdata / goodtables-py / tests / test_structure.py View on Github external
def test_pipeline_row_limit_out_range(self):

        filepath = os.path.join(self.data_dir, 'valid.csv')
        limit = Pipeline.ROW_LIMIT_MAX
        validator = Pipeline(filepath, row_limit=(limit + 1))

        self.assertEqual(validator.row_limit, limit)
        self.assertEqual(validator.pipeline[0].row_limit, limit)
github frictionlessdata / goodtables-py / tests / test_schema.py View on Github external
def test_pipeline_report_stream_none(self):
        filepath = os.path.join(self.data_dir, 'valid.csv')
        report_stream = None
        options = {}
        validator = Pipeline(filepath, processors=('schema',),
                             report_stream=report_stream, options=options)
        result, report = validator.run()

        self.assertTrue(result)
github frictionlessdata / goodtables-py / tests / test_pipeline.py View on Github external
def test_from_url(self):

        pipeline = Pipeline(self.data_url)
        result, report = pipeline.run()

        self.assertTrue(pipeline.data)
github frictionlessdata / goodtables-py / tests / test_structure.py View on Github external
def test_pipeline_ignore_duplicate_columns_false(self):

        filepath = os.path.join(self.data_dir, 'duplicate_columns.csv')
        validator = Pipeline(filepath, processors=('structure',))
        result, report = validator.run()

        self.assertFalse(result)
github frictionlessdata / goodtables-py / tests / test_pipeline.py View on Github external
def test_pipeline_error_report_when_invalid_excel_error(self):
        
        data_source = os.path.join(self.data_dir, 'hmt', 'invalid_excel.xlsx')
        validator = Pipeline(data_source, fail_fast=True, format='excel')
        result, report = validator.run()
        generated_report = report.generate()
        report_results = generated_report['results']
        
        self.assertFalse(result)
        self.assertEqual(len(report_results), 1)
        self.assertEqual(report_results[0]['result_id'], 'invalid_excel_error')
github frictionlessdata / goodtables-py / tests / test_jungle.py View on Github external
def test_messytables_source_three(self):

        data = 'https://raw.githubusercontent.com/okfn/messytables/master/horror/sparse_with_column_errors.csv'
        pipeline = Pipeline(data)
        result, report = pipeline.run()

        self.assertTrue(pipeline.data)
github frictionlessdata / goodtables-py / tests / test_jungle.py View on Github external
def test_gla_source_two(self):

        data = os.path.join(self.data_dir, 'jungle', 'gla-250-report-2014-15-P07.csv')
        pipeline = Pipeline(data)
        result, report = pipeline.run()

        self.assertFalse(result)
        self.assertTrue(pipeline.data)
github frictionlessdata / data-quality-cli / data_quality / tasks / check_datapackage.py View on Github external
def check_database_content(self):
        """Check that the database content is compliant with the datapackage"""

        self.run()
        for resource in self.datapackage.resources:
            resource_path = resource.local_data_path
            if os.path.exists(resource_path):
                options = {'schema': {'schema': resource.descriptor['schema']}}
                pipe = pipeline.Pipeline(resource_path, processors=['schema'],
                                                 options=options)
                result, report = pipe.run()
                if result is False:
                    issues = [res['result_message'] for res in report.generate()['results']]
                    msg = ('The file {0} is not compliant with the schema '
                           'you declared for it in "datapackage.json".'
                           'Errors: {1}'
                          ).format(resource_path, ';'.join(issues))
                    raise ValueError(msg)