How to use the datapackage.DataPackage function in datapackage

To help you get started, we’ve selected a few datapackage examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github frictionlessdata / datapackage-py / tests / test_datapackage.py View on Github external
def test_generates_filenames_for_named_resources(self, tmpfile):
        descriptor = {
            'name': 'proverbs',
            'resources': [
                {'name': 'proverbs', 'format': 'TXT', 'path': 'unicode.txt'},
                {'name': 'proverbs_without_format', 'path': 'unicode.txt'}
            ]
        }
        schema = {}
        dp = datapackage.DataPackage(
            descriptor, schema, default_base_path='tests/fixtures')
        dp.save(tmpfile)
        with zipfile.ZipFile(tmpfile, 'r') as z:
            assert 'data/proverbs.txt' in z.namelist()
            assert 'data/proverbs_without_format' in z.namelist()
github openknowledge-archive / dpm-py / tests / tests_cli / test_publish_connerror.py View on Github external
def setUp(self):
        # GIVEN datapackage that can be treated as valid by the dpm
        self.valid_dp = datapackage.DataPackage({
                "name": "some-datapackage",
                "resources": [
                    {"name": "some-resource", "path": "./data/some_data.csv", }
                ]
            },
            default_base_path='.')
        patch('dpm.client.DataPackage', lambda *a: self.valid_dp).start()
        patch('dpm.client.exists', lambda *a: True).start()
github frictionlessdata / datapackage-py / tests / test_datapackage.py View on Github external
def test_schema(self):
        descriptor = {}
        schema = {'foo': 'bar'}
        dp = datapackage.DataPackage(descriptor, schema=schema)
        assert dp.schema.to_dict() == schema
github frictionlessdata / datapackage-py / tests / test_datapackage.py View on Github external
def test_resources_are_empty_tuple_by_default(self):
        descriptor = {}
        dp = datapackage.DataPackage(descriptor)
        assert dp.resources == ()
github frictionlessdata / datapackage-py / tests / test_datapackage.py View on Github external
def test_base_path_cant_be_set_directly(self):
        dp = datapackage.DataPackage()
        with pytest.raises(AttributeError):
            dp.base_path = 'foo'
github frictionlessdata / datapackage-py / tests / test_datapackage.py View on Github external
def test_should_raise_if_path_doesnt_exist(self):
        dp = datapackage.DataPackage({}, {})

        with pytest.raises(datapackage.exceptions.DataPackageException):
            dp.save('/non/existent/file/path')
github catalyst-cooperative / pudl / src / pudl / convert / datapkg_to_sqlite.py View on Github external
False (the default), fail if an existing PUDL DB is found.

    """
    # prepping the sqlite engine
    pudl_engine = sa.create_engine(sqlite_url)
    logger.info("Dropping the current PUDL DB, if it exists.")
    try:
        # So that we can wipe it out
        pudl.helpers.drop_tables(pudl_engine, clobber=clobber)
    except sa.exc.OperationalError:
        pass
    # And start anew
    pudl_engine = sa.create_engine(sqlite_url)

    # grab the merged datapackage metadata file:
    pkg = datapackage.DataPackage(
        descriptor=str(pathlib.Path(out_path, 'datapackage.json')))
    # we want to grab the dictionary of columns that need autoincrement id cols
    try:
        autoincrement = pkg.descriptor['autoincrement']
    # in case there is no autoincrement columns in the metadata..
    except KeyError:
        autoincrement = {}

    logger.info(f"Loading merged datapackage into SQLite.")
    logger.info("This could take a while. It might be a good time")
    logger.info(f"to get a drink of water. Hydrate or die!")
    try:
        # Save the data package in SQL
        pkg.save(storage='sql', engine=pudl_engine, merge_groups=True,
                 autoincrement=autoincrement)
    except exceptions.TableSchemaException as exception:
github frictionlessdata / data-quality-cli / data_quality / tasks / base_task.py View on Github external
self.data_dir = self.config['data_dir']
        self.result_file = os.path.join(self.data_dir, self.config['result_file'])
        self.run_file = os.path.join(self.data_dir, self.config['run_file'])
        self.source_file = os.path.join(self.data_dir, self.config['source_file'])
        self.performance_file = os.path.join(self.data_dir,
                                             self.config['performance_file'])
        self.publisher_file = os.path.join(self.data_dir,
                                           self.config['publisher_file'])
        self.cache_dir = self.config['cache_dir']
        self.data_key = self.config['goodtables']['arguments']['batch']['data_key']
        datapkg_file_path = self.config.get('datapackage_file', 'datapackage.json')
        if not os.path.isabs(datapkg_file_path):
            datapkg_file_path = os.path.join(os.path.dirname(self.data_dir),
                                             datapkg_file_path)
        try:
            self.datapackage = datapackage.DataPackage(datapkg_file_path)
        except datapackage.exceptions.DataPackageException as e:
            raise ValueError(('A datapackage couldn\'t be created because of the '
                              'following error: "{0}". Make sure the file is not '
                              'empty and use "dq init" command.').format(e))
        self.all_scores = []
github CellMigStandOrg / biotracks / biotracks / validation.py View on Github external
def validate(self, dp):
        if isinstance(dp, datapackage.DataPackage) and not is_tabular(dp):
            raise ValueError("data package must be a tabular data package")
        else:
            dp = datapackage.DataPackage(dp, schema="tabular")
        dp.validate()
        self.logger.debug("valid tabular data package")
        if len(dp.resources) < 2:
            self.__error("data package must have at least two resources")
        res_map = dict((_.descriptor['name'], _) for _ in dp.resources)
        try:
            objects = res_map[cmso.OBJECTS_TABLE]
        except KeyError:
            self.__error("objects table not found")
        else:
            self.validate_objects(objects.descriptor)
        try:
            links = res_map[cmso.LINKS_TABLE]