Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_generates_filenames_for_named_resources(self, tmpfile):
descriptor = {
'name': 'proverbs',
'resources': [
{'name': 'proverbs', 'format': 'TXT', 'path': 'unicode.txt'},
{'name': 'proverbs_without_format', 'path': 'unicode.txt'}
]
}
schema = {}
dp = datapackage.DataPackage(
descriptor, schema, default_base_path='tests/fixtures')
dp.save(tmpfile)
with zipfile.ZipFile(tmpfile, 'r') as z:
assert 'data/proverbs.txt' in z.namelist()
assert 'data/proverbs_without_format' in z.namelist()
def setUp(self):
# GIVEN datapackage that can be treated as valid by the dpm
self.valid_dp = datapackage.DataPackage({
"name": "some-datapackage",
"resources": [
{"name": "some-resource", "path": "./data/some_data.csv", }
]
},
default_base_path='.')
patch('dpm.client.DataPackage', lambda *a: self.valid_dp).start()
patch('dpm.client.exists', lambda *a: True).start()
def test_schema(self):
descriptor = {}
schema = {'foo': 'bar'}
dp = datapackage.DataPackage(descriptor, schema=schema)
assert dp.schema.to_dict() == schema
def test_resources_are_empty_tuple_by_default(self):
descriptor = {}
dp = datapackage.DataPackage(descriptor)
assert dp.resources == ()
def test_base_path_cant_be_set_directly(self):
dp = datapackage.DataPackage()
with pytest.raises(AttributeError):
dp.base_path = 'foo'
def test_should_raise_if_path_doesnt_exist(self):
dp = datapackage.DataPackage({}, {})
with pytest.raises(datapackage.exceptions.DataPackageException):
dp.save('/non/existent/file/path')
False (the default), fail if an existing PUDL DB is found.
"""
# prepping the sqlite engine
pudl_engine = sa.create_engine(sqlite_url)
logger.info("Dropping the current PUDL DB, if it exists.")
try:
# So that we can wipe it out
pudl.helpers.drop_tables(pudl_engine, clobber=clobber)
except sa.exc.OperationalError:
pass
# And start anew
pudl_engine = sa.create_engine(sqlite_url)
# grab the merged datapackage metadata file:
pkg = datapackage.DataPackage(
descriptor=str(pathlib.Path(out_path, 'datapackage.json')))
# we want to grab the dictionary of columns that need autoincrement id cols
try:
autoincrement = pkg.descriptor['autoincrement']
# in case there is no autoincrement columns in the metadata..
except KeyError:
autoincrement = {}
logger.info(f"Loading merged datapackage into SQLite.")
logger.info("This could take a while. It might be a good time")
logger.info(f"to get a drink of water. Hydrate or die!")
try:
# Save the data package in SQL
pkg.save(storage='sql', engine=pudl_engine, merge_groups=True,
autoincrement=autoincrement)
except exceptions.TableSchemaException as exception:
self.data_dir = self.config['data_dir']
self.result_file = os.path.join(self.data_dir, self.config['result_file'])
self.run_file = os.path.join(self.data_dir, self.config['run_file'])
self.source_file = os.path.join(self.data_dir, self.config['source_file'])
self.performance_file = os.path.join(self.data_dir,
self.config['performance_file'])
self.publisher_file = os.path.join(self.data_dir,
self.config['publisher_file'])
self.cache_dir = self.config['cache_dir']
self.data_key = self.config['goodtables']['arguments']['batch']['data_key']
datapkg_file_path = self.config.get('datapackage_file', 'datapackage.json')
if not os.path.isabs(datapkg_file_path):
datapkg_file_path = os.path.join(os.path.dirname(self.data_dir),
datapkg_file_path)
try:
self.datapackage = datapackage.DataPackage(datapkg_file_path)
except datapackage.exceptions.DataPackageException as e:
raise ValueError(('A datapackage couldn\'t be created because of the '
'following error: "{0}". Make sure the file is not '
'empty and use "dq init" command.').format(e))
self.all_scores = []
def validate(self, dp):
if isinstance(dp, datapackage.DataPackage) and not is_tabular(dp):
raise ValueError("data package must be a tabular data package")
else:
dp = datapackage.DataPackage(dp, schema="tabular")
dp.validate()
self.logger.debug("valid tabular data package")
if len(dp.resources) < 2:
self.__error("data package must have at least two resources")
res_map = dict((_.descriptor['name'], _) for _ in dp.resources)
try:
objects = res_map[cmso.OBJECTS_TABLE]
except KeyError:
self.__error("objects table not found")
else:
self.validate_objects(objects.descriptor)
try:
links = res_map[cmso.LINKS_TABLE]