How to use the goodtables.Inspector function in goodtables

To help you get started, we’ve selected a few goodtables examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github frictionlessdata / goodtables-py / tests / test_inspector.py View on Github external
def test_inspector_warnings_table_and_error_limit():
    inspector = Inspector(table_limit=1, error_limit=1)
    source = 'data/datapackages/invalid/datapackage.json'
    report = inspector.inspect(source, preset='datapackage')
    assert len(report['warnings']) == 2
    assert 'table(s) limit' in report['warnings'][0]
    assert 'error(s) limit' in report['warnings'][1]
github frictionlessdata / goodtables-py / tests / test_inspector.py View on Github external
def test_inspector_datapackage_valid(log, dp_path):
    inspector = Inspector()
    report = inspector.inspect(dp_path)
    assert log(report) == []
github frictionlessdata / goodtables-py / tests / test_inspector.py View on Github external
def test_inspector_catch_all_open_exceptions(log):
    inspector = Inspector()
    report = inspector.inspect('data/latin1.csv', encoding='utf-8')
    assert log(report) == [
        (1, None, None, 'source-error'),
    ]
github frictionlessdata / goodtables-py / tests / test_inspector.py View on Github external
def test_nested_presets_set_default_preset():
    inspector = Inspector(infer_schema=True)
    report = inspector.inspect([
        {'source': 'data/datapackages/valid/datapackage.json'},
    ], preset='nested')
    assert report['valid']
    assert report['warnings'] == []
github frictionlessdata / goodtables-py / tests / test_inspector.py View on Github external
def test_inspector_table_invalid(log):
    inspector = Inspector(infer_schema=True)
    report = inspector.inspect('data/invalid.csv')
    assert log(report) == [
        (1, None, 3, 'blank-header'),
        (1, None, 4, 'duplicate-header'),
        (1, 2, 3, 'missing-value'),
        (1, 2, 4, 'missing-value'),
        (1, 3, None, 'duplicate-row'),
        (1, 4, None, 'blank-row'),
        (1, 5, 5, 'extra-value'),
    ]
github frictionlessdata / goodtables-py / tests / test_inspector.py View on Github external
def test_inspector_no_headers():
    inspector = Inspector()
    report = inspector.inspect('data/invalid_no_headers.csv', headers=None)
    assert report['tables'][0]['row-count'] == 3
    assert report['tables'][0]['error-count'] == 1
    assert report['tables'][0]['errors'][0]['code'] == 'extra-value'
github openknowledge-archive / dpm-py / dpm / client / __init__.py View on Github external
tables = []
    for resource in datapackage.resources:
        is_tabular = resource.descriptor.get('format', None) == 'csv' \
                or resource.descriptor.get('mediatype', None) == 'text/csv' \
                or resource.local_data_path.endswith('csv')

        if is_tabular:
            path = resource.remote_data_path or resource.local_data_path
            tables.append({
                'source': path,
                'stream': Stream(path, headers=1),
                'schema': Schema(resource.descriptor['schema']),
                'extra': {}
            })
    inspector = Inspector()

    reports = []
    errors = []
    for table in tables:
        report = inspector._Inspector__inspect_table(table)
        errors.extend(report['errors'])
        reports.append(report)

    # Stop timer
    stop = datetime.datetime.now()
    errors = errors[:1000]
    report = {
        'time': round((stop - start).total_seconds(), 3),
        'valid': True if len(reports) == 0 else all(report['valid'] for report in reports),
        'table-count': len(tables),
        'error-count': sum(len(report['errors']) for report in reports),
github frictionlessdata / goodtables-py / examples / ckan.py View on Github external
for package in data['result']['results']:
        for resource in package['resources']:
            if resource['url'].endswith('.csv'):
                tables.append({
                    'source': resource['url'],
                    'stream': Stream(resource['url'], headers=1),
                    'schema': None,
                    'extra': {
                        'dataset': package['title'],
                        'resource': resource['name'],
                        'publisher': package['organization']['name']
                    },
                })
    return warnings, tables

inspector = Inspector(custom_presets=[ckan_preset])
report = inspector.inspect('http://data.surrey.ca', preset='ckan')
pprint(report)
github frictionlessdata / goodtables-py / examples / datapackages.py View on Github external
from pprint import pprint
from goodtables import Inspector

inspector = Inspector()
report = inspector.inspect([
    {'source': 'data/datapackages/valid/datapackage.json'},
    {'source': 'data/datapackages/invalid/datapackage.json'},
], preset='datapackages')
pprint(report)