How to use goodtables - 10 common examples

To help you get started, we’ve selected a few goodtables examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github frictionlessdata / goodtables-py / tests / test_validate.py View on Github external
def test_validate_warnings_table_limit():
    source = 'data/datapackages/invalid/datapackage.json'
    report = validate(source, preset='datapackage', table_limit=1)
    assert len(report['warnings']) == 1
    assert 'table(s) limit' in report['warnings'][0]
github frictionlessdata / goodtables-py / tests / presets / test_table.py View on Github external
def test_validate_table_invalid_row_limit(log):
    report = validate('data/invalid.csv', row_limit=2, infer_schema=True)
    assert log(report) == [
        (1, None, 3, 'blank-header'),
        (1, None, 4, 'duplicate-header'),
        (1, 2, 3, 'missing-value'),
        (1, 2, 4, 'missing-value'),
    ]
github frictionlessdata / goodtables-py / tests / test_inspector.py View on Github external
def test_inspector_warnings_table_and_error_limit():
    inspector = Inspector(table_limit=1, error_limit=1)
    source = 'data/datapackages/invalid/datapackage.json'
    report = inspector.inspect(source, preset='datapackage')
    assert len(report['warnings']) == 2
    assert 'table(s) limit' in report['warnings'][0]
    assert 'error(s) limit' in report['warnings'][1]
github frictionlessdata / goodtables-py / tests / test_structure.py View on Github external
def test_pipeline_report_limit_in_range(self):

        filepath = os.path.join(self.data_dir, 'report_limit_structure.csv')
        options = {}
        validator = Pipeline(filepath, processors=('structure',),
                             report_limit=1, options=options)
        result, report = validator.run()

        self.assertEqual(len(report.generate()['results']), 1)
github frictionlessdata / goodtables-py / tests / test_jungle.py View on Github external
def test_messytables_source_two(self):

        data = 'https://raw.githubusercontent.com/okfn/messytables/master/horror/utf-16le_encoded.csv'
        pipeline = Pipeline(data)
        result, report = pipeline.run()

        self.assertTrue(pipeline.data)
github frictionlessdata / goodtables-py / tests / test_structure.py View on Github external
def test_pipeline_row_limit_out_range(self):

        filepath = os.path.join(self.data_dir, 'valid.csv')
        limit = Pipeline.ROW_LIMIT_MAX
        validator = Pipeline(filepath, row_limit=(limit + 1))

        self.assertEqual(validator.row_limit, limit)
        self.assertEqual(validator.pipeline[0].row_limit, limit)
github frictionlessdata / goodtables-py / tests / test_schema.py View on Github external
def test_pipeline_report_stream_none(self):
        filepath = os.path.join(self.data_dir, 'valid.csv')
        report_stream = None
        options = {}
        validator = Pipeline(filepath, processors=('schema',),
                             report_stream=report_stream, options=options)
        result, report = validator.run()

        self.assertTrue(result)
github frictionlessdata / goodtables-py / tests / test_pipeline.py View on Github external
def test_from_url(self):

        pipeline = Pipeline(self.data_url)
        result, report = pipeline.run()

        self.assertTrue(pipeline.data)
github frictionlessdata / goodtables-py / tests / test_structure.py View on Github external
def test_pipeline_ignore_duplicate_columns_false(self):

        filepath = os.path.join(self.data_dir, 'duplicate_columns.csv')
        validator = Pipeline(filepath, processors=('structure',))
        result, report = validator.run()

        self.assertFalse(result)
github frictionlessdata / goodtables-py / tests / test_pipeline.py View on Github external
def test_pipeline_error_report_when_invalid_excel_error(self):
        
        data_source = os.path.join(self.data_dir, 'hmt', 'invalid_excel.xlsx')
        validator = Pipeline(data_source, fail_fast=True, format='excel')
        result, report = validator.run()
        generated_report = report.generate()
        report_results = generated_report['results']
        
        self.assertFalse(result)
        self.assertEqual(len(report_results), 1)
        self.assertEqual(report_results[0]['result_id'], 'invalid_excel_error')