How to use the clldutils.jsonlib function in clldutils

To help you get started, we’ve selected a few clldutils examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github clld / clld / clld / scripts / llod.py View on Github external
{'name': 'linguistics'},
            {'name': 'lod'},
            {'name': 'llod'},
        ]}
    if dataset.contact:
        md['maintainer_email'] = dataset.contact
    if dataset.license:
        if 'creativecommons.org/licenses/by/' in dataset.license:
            md['license_id'] = 'cc-by-sa'
            md['license_title'] = "Creative Commons Attribution Share-Alike"
        elif 'creativecommons.org/' in dataset.license and '-nc' in dataset.license:
            md['license_id'] = 'cc-nc'
            md['license_title'] = "Creative Commons Non-Commercial (Any)"
    rdf_md = args.data_file('rdf-metadata.json')
    if rdf_md.exists():
        rdf_md = jsonlib.load(rdf_md)
        md['extras'] = [
            {'key': k, 'value': str(rdf_md[k])} for k in rdf_md.keys()
            if k.split(':')[0] in ['triples', 'resources', 'links']]

    package = datahub('package_update', id=name, **md)
    resources = [rsc['name'] for rsc in package['resources']]
    if 'VoID description' not in resources:
        rsc = datahub(
            'resource_create',
            package_id=package['id'],
            name='VoID description',
            url='http://%s/void.ttl' % dataset.domain,
            format='meta/void',
            mimetype='text/turtle')
        assert rsc
github clld / clld / src / clld / scripts / util.py View on Github external
def gbs_func(command, args, sources=None):  # pragma: no cover
    def words(s):
        return set(slug(s.strip(), remove_whitespace=False).split())

    log = args.log
    count = 0
    api_url = "https://www.googleapis.com/books/v1/volumes?"

    if command == 'cleanup':
        for fname in args.data_file('gbs').glob('*.json'):
            try:
                fname = Path(fname)
                data = jsonlib.load(fname)
                if data.get('totalItems') == 0:
                    fname.unlink()
            except ValueError:
                fname.unlink()
        return

    if not sources:
        sources = DBSession.query(common.Source)\
            .order_by(common.Source.id)\
            .options(joinedload(common.Source.data))
    if callable(sources):
        sources = sources()

    for i, source in enumerate(page_query(sources, verbose=True, commit=True)):
        filepath = args.data_file('gbs', 'source%s.json' % source.id)
github clld / clld / clld / scripts / llod.py View on Github external
try:
                q = DBSession.query(rsc.model)
            except InvalidRequestError:
                args.log.info('... skipping')
                continue
            for obj in page_query(q.order_by(rsc.model.pk), n=10000, verbose=True):
                graph = get_graph(obj, args.env['request'], rsc.name)
                count_triples += len(graph)
                count_rsc += 1
                fp.write(n3(graph, with_head=count_rsc == 1))
            args.log.info('... finished')

    # put in args.data_file('..', 'static', 'download')?
    md = {'path': as_posix(tmp), 'resources': count_rsc, 'triples': count_triples}
    md.update(count_links(as_posix(tmp_dump)))
    jsonlib.dump(md, args.data_file('rdf-metadata.json'))
    print(md)

    dataset = Dataset.first()
    rdf_dump = args.module_dir.joinpath(
        'static', 'download', '%s-dataset.n3' % dataset.id)
    tmp_dump.copy(rdf_dump)
    check_call('gzip -f %s' % rdf_dump, shell=True)
    print(str(rdf_dump))
github clld / glottolog3 / glottolog3 / static_archive.py View on Github external
langs, identifiers = {}, {}
    for version in versions:
        aggregate(version, langs, identifiers)

    for version in versions:
        dump(
            out.joinpath('glottolog-{0}'.format(version)),
            version, 
            langs,
            {pk: list(c) for pk, c in itertools.groupby(identifiers[version], lambda i: i.lpk)})

    gc2v = {}
    for v in versions:
        for gc in sorted(langs[v].keys()):
            gc2v[gc] = v
    jsonlib.dump(gc2v, out.joinpath('glottocode2version.json'), indent=4)
github clld / clld / src / clld / scripts / util.py View on Github external
log.info('%s' % sorted(iwords))
            if needs_check:
                log.info('------- %s -> %s' % (
                    source.id, item['volumeInfo'].get('industryIdentifiers')))
                log.info('%s %s' % (
                    item['volumeInfo']['title'], item['volumeInfo'].get('subtitle', '')))
                log.info(stitle)
                log.info(item['volumeInfo'].get('publishedDate'))
                log.info(source.year)
                log.info(item['volumeInfo'].get('authors'))
                log.info(source.author)
                log.info(item['volumeInfo'].get('publisher'))
                log.info(source.publisher)
                if not confirm('Are the records the same?'):
                    log.warn('---- removing ----')
                    jsonlib.dump({"totalItems": 0}, filepath)
        elif command == 'update':
            source.google_book_search_id = item['id']
            source.update_jsondata(gbs=item)
            count += 1
        elif command == 'download':
            if source.author and (source.title or source.booktitle):
                title = source.title or source.booktitle
                if filepath.exists():
                    continue
                q = [
                    'inauthor:' + quote_plus(source.author.encode('utf8')),
                    'intitle:' + quote_plus(title.encode('utf8')),
                ]
                if source.publisher:
                    q.append('inpublisher:' + quote_plus(
                        source.publisher.encode('utf8')))
github clld / clld / clld / scripts / freeze.py View on Github external
def load(table, csv, engine):
    schema = jsonlib.load(csv.parent.joinpath(csv.stem + '.' + CsvmJsonAdapter.extension))
    converter = get_converter(schema['tableSchema'], table)
    engine.execute(
        table.insert(), [converted(d, converter) for d in reader(csv, dicts=True)])
    return schema.get("dc:identifier")
github clld / clld / clld / scripts / freeze.py View on Github external
db_version = get_alembic_version(DBSession)

    for table in Base.metadata.sorted_tables:
        csv = dump_dir.joinpath('%s.csv' % table.name)
        if with_history or not table.name.endswith('_history'):
            _freeze(table, csv)

        if csv.exists():
            csvm = '%s.%s' % (table.name, CsvmJsonAdapter.extension)
            doc = CsvmJsonAdapter.csvm_doc(
                csvm, args.env['request'], [(col.name, col) for col in table.columns])
            if db_version:
                # We (ab)use a dc:identifier property to pass the alembic revision of the
                # database to the unfreeze script.
                doc["dc:identifier"] = db_version  # pragma: no cover
            jsonlib.dump(doc, dump_dir.joinpath(csvm))

    with ZipFile(
            as_posix(args.data_file('..', 'data.zip')), 'w', ZIP_DEFLATED) as zipfile:
        for f in dump_dir.iterdir():
            if f.is_file():
                with f.open('rb') as fp:
                    zipfile.writestr(f.name, fp.read())
github clld / clld / src / clld / db / meta.py View on Github external
def __json__(self, req):
        """Custom JSON serialization of an object.

        :param req: pyramid Request object.
        :return: ``dict`` suitable for serialization as JSON.
        """
        exclude = {'active', 'version', 'created', 'updated', 'polymorphic_type'}
        cols = [
            col.key for om in inspect(self).mapper.iterate_to_root()
            for col in om.local_table.c
            if col.key not in exclude and not exclude.add(col.key)]
        return {col: jsonlib.format(getattr(self, col)) for col in cols}