How to use the dataflows.filter_rows function in dataflows

To help you get started, we’ve selected a few dataflows examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github datahq / dataflows / tests / test_lib.py View on Github external
def test_filter_rows():
    from dataflows import filter_rows

    f = Flow(
        [
            {'a': 1, 'b': 3},
            {'a': 2, 'b': 3},
            {'a': 1, 'b': 4},
            {'a': 2, 'b': 4},
        ],
        filter_rows(equals=[dict(a=1)]),
        filter_rows(not_equals=[dict(b=3)]),
    )
    results, _, _ = f.results()
    assert results[0][0] == dict(a=1, b=4)
    assert len(results[0]) == 1
    assert len(results) == 1
github datahq / dataflows / tests / test_lib.py View on Github external
def test_filter_rows():
    from dataflows import filter_rows

    f = Flow(
        [
            {'a': 1, 'b': 3},
            {'a': 2, 'b': 3},
            {'a': 1, 'b': 4},
            {'a': 2, 'b': 4},
        ],
        filter_rows(equals=[dict(a=1)]),
        filter_rows(not_equals=[dict(b=3)]),
    )
    results, _, _ = f.results()
    assert results[0][0] == dict(a=1, b=4)
    assert len(results[0]) == 1
    assert len(results) == 1
github datahq / dataflows / tests / test_lib.py View on Github external
def test_filter_rows_callable():
    from dataflows import filter_rows

    f = Flow(
        [
            {'a': 1, 'b': 3},
            {'a': 2, 'b': 3},
            {'a': 1, 'b': 4},
            {'a': 2, 'b': 4},
        ],
        filter_rows(condition=lambda row: row['a'] > 1 and row['b'] < 4),
    )
    results, _, _ = f.results()
    assert results[0][0] == dict(a=2, b=3)
    assert len(results[0]) == 1
    assert len(results) == 1
github datahq / dataflows / tests / test_examples.py View on Github external
f = Flow(
        # Emmy award nominees and winners
        load('data/emmy.csv', name='emmies'),
        filter_rows(equals=[dict(winner=1)]),
        concatenate(dict(
                emmy_nominee=['nominee'],
            ), 
            dict(name='emmies_filtered'),
            resources='emmies'),
        # Academy award nominees and winners
        load('data/academy.csv', encoding='utf8', name='oscars'),
        join('emmies_filtered', ['emmy_nominee'],  # Source resource
             'oscars', ['Name'],                   # Target resource
             full=False   # Don't add new fields, remove unmatched rows
        ),
        filter_rows(equals=[dict(Winner='1')]),
        dump_to_path('out/double_winners')
    )
    _ = f.process()
github datahq / dataflows / tests / test_examples.py View on Github external
def test_example_9():
    from dataflows import Flow, load, dump_to_path, join, concatenate, filter_rows

    f = Flow(
        # Emmy award nominees and winners
        load('data/emmy.csv', name='emmies'),
        filter_rows(equals=[dict(winner=1)]),
        concatenate(dict(
                emmy_nominee=['nominee'],
            ), 
            dict(name='emmies_filtered'),
            resources='emmies'),
        # Academy award nominees and winners
        load('data/academy.csv', encoding='utf8', name='oscars'),
        join('emmies_filtered', ['emmy_nominee'],  # Source resource
             'oscars', ['Name'],                   # Target resource
             full=False   # Don't add new fields, remove unmatched rows
        ),
        filter_rows(equals=[dict(Winner='1')]),
        dump_to_path('out/double_winners')
    )
    _ = f.process()
github frictionlessdata / datapackage-pipelines / datapackage_pipelines / lib / filter.py View on Github external
def flow(parameters):
    return Flow(
        filter_rows(
            equals = parameters.get('in', []),
            not_equals = parameters.get('out', []),
            resources = parameters.get('resources'),
        )