Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_local_with_relative_resources_paths_is_safe():
package = Package('data/datapackage_with_foo.txt_resource.json', {})
assert package.safe()
def test_init_raises_if_url_isnt_a_json():
url = 'http://someplace.com/datapackage.json'
body = 'Not a JSON'
httpretty.register_uri(httpretty.GET, url, body=body, content_type='application/json')
with pytest.raises(exceptions.DataPackageException):
Package(url)
def test_base_path_defaults_to_none():
assert Package().base_path is None
def test_init_accepts_dicts():
descriptor = {
'profile': 'data-package',
}
package = Package(descriptor)
assert package.descriptor == descriptor
def test_should_raise_if_zipfile_raised_LargeZipFile(zipfile_mock, tmpfile):
zipfile_mock.side_effect = zipfile.LargeZipFile()
package = Package({}, {})
with pytest.raises(exceptions.DataPackageException):
package.save(tmpfile)
def datapackage_creator(location, title, name, source_title, source_path):
package = Package()
package.descriptor['title'] = title
package.descriptor['name'] = name
package.descriptor['sources'] = [{}]
package.descriptor['sources'][0]['title'] = source_title
package.descriptor['sources'][0]['path'] = source_path
package.descriptor['licences'] = [{}]
package.descriptor['licences'][0]['name'] = 'odc-pddl'
package.descriptor['licences'][0]['title'] = 'Open Data Commons Public Domain Dedication and Licence (PDDL)'
package.descriptor['licences'][0]['path'] = 'http://opendatacommons.org/licenses/pddl/'
package.commit()
package.infer(location + '/data/*.csv')
package_json = package.descriptor
resources[res][f"{d}_df"] = \
pd.read_csv(f"{archive_dir}/{resources[res][d]}",
delimiter="|",
encoding="iso-8859-1")
# Read the input tabular data resource JSON file we've prepared
resources[res]["json"] = json.load(
open(os.path.join(input_dir, f"{res}.json")))
# OMFG even the MSHA data is broken. *sigh*
resources["employment-production-quarterly"]["data_df"].columns = \
list(resources["employment-production-quarterly"]
["defs_df"]['COLUMN_NAME'])
# Create a data package to contain our resources, based on the template
# JSON file that we have already prepared as an input.
pkg = datapackage.Package(os.path.join(input_dir, "datapackage.json"))
for res in resources:
# Convert the definitions to a dictionary of field descriptions
field_desc = resources[res]["defs_df"].set_index(
'COLUMN_NAME').to_dict()['FIELD_DESCRIPTION']
# Set the description attribute of the fields in the schema using field
# descriptions.
for field in resources[res]["json"]["schema"]["fields"]:
field['description'] = field_desc[field['name']]
resources[res]["resource"] = datapackage.Resource(
descriptor=resources[res]["json"])
# Make sure we didn't miss or re-name any fields accidentally
json_fields = resources[res]["resource"].schema.field_names
defs_fields = list(resources[res]["defs_df"]['COLUMN_NAME'])
from datapackage import Package
class DwCPackage(Package):
def __init__(self, path):
Package.__init__(self,path)
if __name__ == '__main__':
# p = Package(base_path='../tmp/fdwc/')
# p.infer('occurrence.csv')
p = Package('../tmp/fdwc.zip')
print(p.valid)
print(p.errors)
# concatenate both sources
dp = Package(os.path.join(OUTPUT_PATH,'datapackage.json'))
fields = dp.get_resource('brazilian-transparency-and-open-data-portals').schema.fields
columns=[field.name for field in fields]
df = pd.concat([
pd.DataFrame(columns=columns),
df_state,
df_municipality
], sort=False)
# look up municipal codes
geo_package = Package(os.path.join(IBGE_CODE_PATH, 'datapackage.json'))
municipalities = pd.DataFrame(
geo_package.get_resource('municipality')
.read(keyed=True)
)
municipalities.rename( # line up column names in preparation for merge
columns={
'uf': 'state_code',
'name': 'municipality',
'code': 'municipality_code'
},
inplace=True
)
df = (
df.drop('municipality_code', axis=1) # discard original column