How to use the goodtables.processors function in goodtables

To help you get started, we’ve selected a few goodtables examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github datasets / s-and-p-500 / scripts / test_data.py View on Github external
def test_structure(self):
        data_format = 'csv'
        processor = processors.StructureProcessor(format=data_format, fail_fast=False,
            row_limit=ROW_LIMIT,
            report_limit=REPORT_LIMIT)

        data = dp.descriptor['resources'][0]['path']
        valid, report, data = processor.run(data)

        output_format = 'txt'
        exclude = ['result_context', 'processor', 'row_name', 'result_category',
                                   'column_index', 'column_name', 'result_level']
        out = report.generate(output_format, exclude=exclude)

        self.assertTrue(valid, out)
github frictionlessdata / goodtables-py / tests / test_jungle.py View on Github external
def test_standalone_hmt_bbsrc(self):

        data = os.path.join(self.data_dir, 'hmt', '1011-bbsrc-25k-spend-return.csv')
        validator = processors.StructureProcessor()
        result, report, data = validator.run(data, encoding=None)

        self.assertTrue(data)
github datasets / s-and-p-500 / scripts / test_data.py View on Github external
def test_schema(self):
        data_format = 'csv'
        data = dp.descriptor['resources'][0]['path']
        schema = dp.descriptor['resources'][0]['schema']

        processor = processors.SchemaProcessor(schema=schema,
                                               format=data_format,
                                               row_limit=ROW_LIMIT,
                                               report_limit=REPORT_LIMIT)
        valid, report, data = processor.run(data)

        output_format = 'txt'
        exclude = ['result_context', 'processor', 'row_name', 'result_category',
                                   'column_name', 'result_id', 'result_level']
        out = report.generate(output_format, exclude=exclude)

        self.assertTrue(valid, out)
github frictionlessdata / goodtables-py / tests / test_schema.py View on Github external
def test_standalone_report_stream_none(self):

        filepath = os.path.join(self.data_dir, 'valid.csv')
        report_stream = None
        with io.open(filepath) as stream:
            validator = processors.SchemaProcessor(
                report_stream=report_stream)
            result, report, data = validator.run(stream)

            self.assertTrue(result)
github frictionlessdata / goodtables-py / tests / test_structure.py View on Github external
def test_standalone_report_limit_out_range(self):

        limit = processors.StructureProcessor.REPORT_LIMIT_MAX
        validator = processors.StructureProcessor(report_limit=(limit + 1))

        self.assertEqual(validator.report_limit, limit)
github frictionlessdata / example-continuous-data-integration / scripts / test_data.py View on Github external
def test_structure(self):
        # TODO: infer from data package format field (and default to csv)
        data_format = 'csv'
        processor = processors.StructureProcessor(format=data_format, fail_fast=False,
            row_limit=row_limit,
            report_limit=report_limit)

        data = dp.metadata['resources'][0]['path']
        valid, report, data = processor.run(data)

        output_format = 'txt'
        exclude = ['result_context', 'processor', 'row_name', 'result_category',
                                   'column_index', 'column_name', 'result_level']
        out = report.generate(output_format, exclude=exclude)

        self.assertTrue(valid, out)
github frictionlessdata / example-continuous-data-integration / scripts / test_data.py View on Github external
def test_schema(self):
        data_format = 'csv'
        data = dp.metadata['resources'][0]['path']
        schema = dp.metadata['resources'][0]['schema']

        # TODO: check JTS is valid JTS before we go on

        processor = processors.SchemaProcessor(schema=schema,
                                               format=data_format,
                                               row_limit=row_limit,
                                               report_limit=report_limit)
        valid, report, data = processor.run(data)

        output_format = 'txt'
        exclude = ['result_context', 'processor', 'row_name', 'result_category',
                                   'column_name', 'result_id', 'result_level']
        out = report.generate(output_format, exclude=exclude)

        self.assertTrue(valid, out)
github frictionlessdata / goodtables-py / goodtables / cli / main.py View on Github external
def schema(data, schema, format, fail_fast, row_limit, report_limit, output):

    """Run a Good Tables SchemaProcessor."""

    processor = processors.SchemaProcessor(schema=schema, format=format,
                                           fail_fast=fail_fast, row_limit=row_limit,
                                           report_limit=report_limit)

    valid, report, data = processor.run(data)

    if output == 'json':
        exclude = None
    else:
        exclude = ['result_context', 'processor', 'row_name', 'result_category',
                   'column_index', 'column_name', 'result_level']

    valid_msg = 'Well done! The data is valid :)\n'.upper()
    invalid_msg = 'Oops.The data is invalid :(\n'.upper()

    if valid:
        click.echo(click.style(valid_msg, fg='green'))
github frictionlessdata / goodtables-py / goodtables / utilities / helpers.py View on Github external
def builtin_processors():
    """Return dict of public builtin processors. Avoids circular import."""
    from .. import processors
    return {
        processors.StructureProcessor.name: processors.StructureProcessor,
        processors.SchemaProcessor.name: processors.SchemaProcessor
    }
github frictionlessdata / goodtables-py / goodtables / utilities / helpers.py View on Github external
def builtin_processors():
    """Return dict of public builtin processors. Avoids circular import."""
    from .. import processors
    return {
        processors.StructureProcessor.name: processors.StructureProcessor,
        processors.SchemaProcessor.name: processors.SchemaProcessor
    }