How to use the datapackage.Package function in datapackage

To help you get started, we’ve selected a few datapackage examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github frictionlessdata / datapackage-py / tests / test_package.py View on Github external
def test_local_with_relative_resources_paths_is_safe():
    package = Package('data/datapackage_with_foo.txt_resource.json', {})
    assert package.safe()
github frictionlessdata / datapackage-py / tests / test_package.py View on Github external
def test_init_raises_if_url_isnt_a_json():
    url = 'http://someplace.com/datapackage.json'
    body = 'Not a JSON'
    httpretty.register_uri(httpretty.GET, url, body=body, content_type='application/json')
    with pytest.raises(exceptions.DataPackageException):
        Package(url)
github frictionlessdata / datapackage-py / tests / test_package.py View on Github external
def test_base_path_defaults_to_none():
    assert Package().base_path is None
github frictionlessdata / datapackage-py / tests / test_package.py View on Github external
def test_init_accepts_dicts():
    descriptor = {
        'profile': 'data-package',
    }
    package = Package(descriptor)
    assert package.descriptor == descriptor
github frictionlessdata / datapackage-py / tests / test_package.py View on Github external
def test_should_raise_if_zipfile_raised_LargeZipFile(zipfile_mock, tmpfile):
    zipfile_mock.side_effect = zipfile.LargeZipFile()
    package = Package({}, {})
    with pytest.raises(exceptions.DataPackageException):
        package.save(tmpfile)
github datasets / five-thirty-eight-datasets / main.py View on Github external
def datapackage_creator(location, title, name, source_title, source_path):
    package = Package()

    package.descriptor['title'] = title
    package.descriptor['name'] = name

    package.descriptor['sources'] = [{}]
    package.descriptor['sources'][0]['title'] = source_title
    package.descriptor['sources'][0]['path'] = source_path

    package.descriptor['licences'] = [{}]
    package.descriptor['licences'][0]['name'] = 'odc-pddl'
    package.descriptor['licences'][0]['title'] = 'Open Data Commons Public Domain Dedication and Licence (PDDL)'
    package.descriptor['licences'][0]['path'] = 'http://opendatacommons.org/licenses/pddl/'

    package.commit()
    package.infer(location + '/data/*.csv')
    package_json = package.descriptor
github catalyst-cooperative / pudl / results / datapkg-msha / pudl_msha_pkg.py View on Github external
resources[res][f"{d}_df"] = \
                pd.read_csv(f"{archive_dir}/{resources[res][d]}",
                            delimiter="|",
                            encoding="iso-8859-1")
        # Read the input tabular data resource JSON file we've prepared
        resources[res]["json"] = json.load(
            open(os.path.join(input_dir, f"{res}.json")))

    # OMFG even the MSHA data is broken. *sigh*
    resources["employment-production-quarterly"]["data_df"].columns = \
        list(resources["employment-production-quarterly"]
             ["defs_df"]['COLUMN_NAME'])

    # Create a data package to contain our resources, based on the template
    # JSON file that we have already prepared as an input.
    pkg = datapackage.Package(os.path.join(input_dir, "datapackage.json"))

    for res in resources:
        # Convert the definitions to a dictionary of field descriptions
        field_desc = resources[res]["defs_df"].set_index(
            'COLUMN_NAME').to_dict()['FIELD_DESCRIPTION']

        # Set the description attribute of the fields in the schema using field
        # descriptions.
        for field in resources[res]["json"]["schema"]["fields"]:
            field['description'] = field_desc[field['name']]
        resources[res]["resource"] = datapackage.Resource(
            descriptor=resources[res]["json"])

        # Make sure we didn't miss or re-name any fields accidentally
        json_fields = resources[res]["resource"].schema.field_names
        defs_fields = list(resources[res]["defs_df"]['COLUMN_NAME'])
github frictionlessdata / FrictionlessDarwinCore / FrictionlessDarwinCore / package.py View on Github external
from datapackage import Package

class DwCPackage(Package):

    def __init__(self, path):
        Package.__init__(self,path)


if __name__ == '__main__':
#    p = Package(base_path='../tmp/fdwc/')
#    p.infer('occurrence.csv')
    p = Package('../tmp/fdwc.zip')
    print(p.valid)
    print(p.errors)
github augusto-herrmann / transparencia-dados-abertos-brasil / tools / import / archive / klein-2017 / archive-klein-2017.py View on Github external
# concatenate both sources

dp = Package(os.path.join(OUTPUT_PATH,'datapackage.json'))
fields = dp.get_resource('brazilian-transparency-and-open-data-portals').schema.fields
columns=[field.name for field in fields]

df = pd.concat([
        pd.DataFrame(columns=columns),
        df_state,
        df_municipality
], sort=False)

# look up municipal codes

geo_package = Package(os.path.join(IBGE_CODE_PATH, 'datapackage.json'))
municipalities = pd.DataFrame(
    geo_package.get_resource('municipality')
    .read(keyed=True)
)

municipalities.rename( # line up column names in preparation for merge
    columns={
        'uf': 'state_code',
        'name': 'municipality',
        'code': 'municipality_code'
    },
    inplace=True
)

df = (
    df.drop('municipality_code', axis=1) # discard original column