How to use the tabulator.Stream function in tabulator

To help you get started, we’ve selected a few tabulator examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github frictionlessdata / tabulator-py / tests / test_stream.py View on Github external
def test_stream_encoding_explicit_latin1():
    with Stream('data/special/latin1.csv', encoding='latin1') as stream:
        assert stream.encoding == 'iso8859-1'
        assert stream.read() == [['id', 'name'], ['1', 'english'], ['2', '©']]
github frictionlessdata / tabulator-py / tests / test_stream.py View on Github external
def test_stream_skip_blank_at_the_end_issue_bco_dmo_33():
    source = 'data/special/skip-blank-at-the-end.csv'
    with Stream(source, headers=1, skip_rows=['#']) as stream:
        assert stream.headers == ['test1', 'test2']
        assert stream.read() == [['1', '2'], []]
github frictionlessdata / tabulator-py / tests / formats / test_xls.py View on Github external
def test_stream_xlsx_merged_cells():
    source = 'data/special/merged-cells.xls'
    with Stream(source) as stream:
        assert stream.read() == [['data', ''], ['', ''], ['', '']]
github frictionlessdata / tabulator-py / tests / test_stream.py View on Github external
def test_stream_compression_error_gz():
    source = 'id,filename\n\1,dump.tar.gz'
    stream = Stream(source, scheme='text', format='csv')
    stream.open()
github frictionlessdata / tabulator-py / tests / test_stream.py View on Github external
def test_stream_local_csv_zip_multiple_open():
    # That's how `tableschema.iter()` acts
    stream = Stream('data/table.csv.zip')
    stream.open()
    assert stream.headers is None
    assert stream.read() == [['id', 'name'], ['1', 'english'], ['2', '中国人']]
    stream.close()
    stream.open()
    assert stream.headers is None
    assert stream.read() == [['id', 'name'], ['1', 'english'], ['2', '中国人']]
    stream.close()
github frictionlessdata / tabulator-py / tests / formats / test_csv.py View on Github external
def test_stream_csv_dialect_should_not_persist_if_sniffing_fails_issue_goodtables_228():
    source1 = 'a;b;c\n#comment'
    source2 = 'a,b,c\n#comment'
    with Stream(source1, scheme='text', format='csv', headers=1, delimiter=';') as stream:
        assert stream.headers == ['a', 'b', 'c']
    with Stream(source2, scheme='text', format='csv', headers=1) as stream:
        assert stream.headers == ['a', 'b', 'c']
github frictionlessdata / tabulator-py / tests / formats / test_xls.py View on Github external
def test_stream_xls_with_boolean():
    with Stream('data/special/table-with-booleans.xls') as stream:
        assert stream.headers is None
        assert stream.read() == [['id', 'boolean'], [1, True], [2, False]]
github frictionlessdata / tabulator-py / tests / formats / test_xls.py View on Github external
def test_stream_xls_sheet_by_name():
    source = 'data/special/sheet2.xls'
    with Stream(source, sheet='Sheet2') as stream:
        assert stream.fragment == 'Sheet2'
        assert stream.read() == [['id', 'name'], [1, 'english'], [2, '中国人']]
github os-data / eu-structural-funds / common / processors / stream_from_path.py View on Github external
parameters['post_parse'].append(drop_bad_rows)
            parameters.update(encoding=get_encoding(parameters, resource))

        if extension in ('.xls', '.xlsx'):
            parameters['post_parse'].append(force_strings)

        if extension == '.json':
            fill_missing_fields(path)
            parameters['post_parse'].append(force_strings)

        info('Ingesting file = %s', path)
        info('Ingestion parameters = %s', format_to_json(parameters))

        parameters.update(headers=get_headers(parameters, path))

        with Stream(path, **parameters) as stream:
            check_fields_match(resource, stream)
            log_sample_table(stream)
            yield stream.iter(keyed=True)
github frictionlessdata / tabulator-py / examples / stream.py View on Github external
import io
import sys
from tabulator import Stream


print('Parse csv format:')
source = 'data/table.csv'
with Stream(source, headers='row1') as stream:
    print(stream.headers)
    for row in stream:
        print(row)


print('\nParse linear tsv format:')
source = 'data/table.tsv'
with Stream(source, headers='row1') as stream:
    print(stream.headers)
    for row in stream:
        print(row)


print('\nParse json with dicts:')
source = 'file://data/table-dicts.json'
with Stream(source) as stream:
    print(stream.headers)
    for row in stream:
        print(row)


print('\nParse json with lists:')
source = 'file://data/table-lists.json'
with Stream(source, headers='row1') as stream: