Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_CldfDownload(env, tmppath, mocker, capsys):
from clld.web.adapters.cldf import CldfDownload
mocker.patch('clld.web.adapters.cldf.transaction')
tmp = tmppath / 'dl.zip'
dl = CldfDownload(Dataset, 'clld')
dl.create(env['request'], outfile=tmp, verbose=True)
out, err = capsys.readouterr()
assert 'Value' in out
outdir = tmppath / 'cldf'
with ZipFile(tmp.as_posix()) as zip:
assert 'Wordlist-metadata.json' in zip.namelist()
zip.extractall(str(outdir))
ds = CldfDataset.from_metadata(outdir.joinpath('Wordlist-metadata.json'))
assert ds.module == 'Wordlist'
values = list(ds[ds.primary_table])
assert len(values) == 3
for v in values:
list(ds.sources.expand_refs(v['Source']))
}
kwargs.update(kw)
if isinstance(namespace, tuple):
namespace = dict(namespace)
# get the datatypes from configuration as to namespace
datatypes = read_conf(kwargs['conf'])[1]
# Load the dataset.
fname = Path(path)
if not fname.exists():
raise compat.FileNotFoundError(
'{:} does not exist'.format(fname))
if fname.suffix == '.json':
dataset = pycldf.dataset.Dataset.from_metadata(fname)
else:
dataset = pycldf.dataset.Dataset.from_data(fname)
if dataset.module == "Wordlist":
# First, make a list of cognate codes if they are in a separate table.
cognateset_assignments = {}
try:
form_reference = dataset["CognateTable", "formReference"].name
for row in dataset["CognateTable"].iterdicts():
cognateset_assignments[row[form_reference]] = row
except KeyError:
# Either there are no cognate codes, or they are in the form
# table. Both options are fine.
pass
f_id = dataset["FormTable", "id"].name
if isinstance(namespace, tuple):
namespace = dict(namespace)
# get the datatypes from configuration as to namespace
datatypes = read_conf(kwargs['conf'])[1]
# Load the dataset.
fname = Path(path)
if not fname.exists():
raise compat.FileNotFoundError(
'{:} does not exist'.format(fname))
if fname.suffix == '.json':
dataset = pycldf.dataset.Dataset.from_metadata(fname)
else:
dataset = pycldf.dataset.Dataset.from_data(fname)
if dataset.module == "Wordlist":
# First, make a list of cognate codes if they are in a separate table.
cognateset_assignments = {}
try:
form_reference = dataset["CognateTable", "formReference"].name
for row in dataset["CognateTable"].iterdicts():
cognateset_assignments[row[form_reference]] = row
except KeyError:
# Either there are no cognate codes, or they are in the form
# table. Both options are fine.
pass
f_id = dataset["FormTable", "id"].name
# Access columns by type, not by name.
def write_cldf(req, contrib, valuesets, features, outdir):
ds = Dataset('wals-chapter-%s' % contrib.id)
ds.fields = (
'ID',
'Language_ID',
'Language_name',
'Parameter_ID',
'Value',
'DomainElement',
'Source',
'Comment')
ds.table.schema.aboutUrl = url_template(req, 'valueset', 'ID')
ds.table.schema.columns['Language_ID'].valueUrl = Identifier(
type='glottolog', name='{Language_ID}').url()
ds.table.schema.columns['Parameter_ID'].valueUrl = url_template(
req, 'parameter', 'Parameter_ID')
ds.metadata['dc:bibliographicCitation '] = text_citation(req, contrib)
Parameters
----------
fname : str or Path
Path to a CLDF dataset
Returns
-------
Dataset
"""
fname = Path(fname)
if not fname.exists():
raise FileNotFoundError(
'{:} does not exist'.format(fname))
if fname.suffix == '.json':
return pycldf.dataset.Dataset.from_metadata(fname)
return pycldf.dataset.Dataset.from_data(fname)
Parameters
----------
fname : str or Path
Path to a CLDF dataset
Returns
-------
Dataset
"""
fname = Path(fname)
if not fname.exists():
raise FileNotFoundError(
'{:} does not exist'.format(fname))
if fname.suffix == '.json':
return pycldf.dataset.Dataset.from_metadata(fname)
return pycldf.dataset.Dataset.from_data(fname)
def bibrecord2source(req, src):
rec = src.bibtex()
rec['wals_url'] = req.resource_url(src)
return Source(rec.genre.value if rec.genre else 'misc', rec.id, **dict(rec.items()))
def source2source(req, source):
"""Harmonize the different Source implementations in clld and pycldf."""
bibrecord = source.bibtex()
fields = OrderedDict({'%s_url' % req.dataset.id: req.resource_url(source)})
for key, value in bibrecord.items():
fields[key] = '; '.join(value) if isinstance(value, list) else value
return sources.Source(
getattr(bibrecord.genre, 'value', bibrecord.genre) if bibrecord.genre else 'misc',
source.id,
**fields)
def create(self, req, filename=None, verbose=True, outfile=None):
cldf_cfg = req.registry.getUtility(ICldfConfig)
with TemporaryDirectory() as tmpd:
cls = getattr(dataset, cldf_cfg.module)
ds = cls.in_dir(tmpd)
ds.properties['dc:bibliographicCitation'] = text_citation(req, req.dataset)
ds.properties['dc:publisher'] = '%s, %s' % (
req.dataset.publisher_name, req.dataset.publisher_place)
ds.properties['dc:license'] = req.dataset.license
ds.properties['dc:issued'] = req.dataset.published.isoformat()
ds.properties['dc:title'] = req.dataset.name
ds.properties['dc:creator'] = req.dataset.formatted_editors()
ds.properties['dc:identifier'] = req.resource_url(req.dataset)
ds.properties['dcat:accessURL'] = req.route_url('download')
if DBSession.query(Sentence).count():
ds.add_component('ExampleTable')
if DBSession.query(DomainElement).count():
ds.add_component('CodeTable', {'name': 'Number', 'datatype': 'integer'})
ds.add_component('ParameterTable')
ds.add_component('LanguageTable')
form : str (default="ipa")
The column in which the unsegmented phonetic strings are stored.
note : str (default=None)
The column in which you store your comments.
form_in_source : str (default=None)
The column in which you store the original form in the source.
source : str (default=None)
The column in which you store your source information.
alignment : str (default="alignment")
The column in which you store the alignments.
"""
if not cldf:
raise ValueError('The package pycldf needs to be installed')
# create cldf-dataset
ds = CLDF_Wordlist.in_dir(path)
# add sources if they are available
ds.add_sources(
read_text(source_path) if source_path else '')
# add components
ds.add_component('LanguageTable')
ds.add_component('ParameterTable', 'Concepticon_ID')
ds.add_component('CognateTable')
ds.add_columns('FormTable', 'form_in_source')
languages, parameters, forms, cognates = {}, {}, [], []
for idx in wordlist:
lid = slug(wordlist[idx, 'doculect'])
if lid not in languages:
languages[lid] = dict(
ID=lid,
Name=wordlist[idx, 'doculect'],