Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_Data(db):
from clld.db.models.common import Language, Language_data
l = Language(id='abc', name='Name')
l.data.append(Language_data(key='abstract', value='c'))
DBSession.add(l)
DBSession.flush()
DBSession.refresh(l)
assert l.datadict()['abstract'] == 'c'
kw.update(
publisher=entry.publisher_and_address[0],
address=entry.publisher_and_address[1],
year_int=entry.year_int,
pages_int=entry.pages_int,
med_index=-entry.weight[0],
med_pages=entry.weight[1],
med_type=entry.med_type.id,
id=entry.fields['glottolog_ref_id'],
fts=fts.tsvector('\n'.join(v for k, v in entry.fields.items() if k != 'abstract')),
name='{} {}'.format(entry.fields.get('author', 'na'), entry.fields.get('year', 'nd')),
description=entry.fields.get('title') or entry.fields.get('booktitle'),
bibtex_type=btype)
ref = models.Ref(**kw)
DBSession.add(ref)
DBSession.flush()
reflangs, trigger = [], None
no_ca = [{'degruyter'}, {'benjamins'}]
provs = set()
for key in entry.fields['srctrickle'].split(','):
key = key.strip()
if key:
reflangs.extend(lgsources.get(key, []))
prov, key = key.split('#', 1)
provs.add(prov)
DBSession.add(models.Refprovider(
provider_pk=data['Provider'][prov].pk,
ref_pk=ref.pk,
id='{0}:{1}'.format(prov, key)))
year_int=int(year),
title=title,
author=author,
address='Dallas',
publisher='SIL International',
url='http://www.sil.org/iso639-3/cr_files/%s.pdf' % crno,
language_note=', '.join('%(Language Name)s [%(Affected Identifier)s]' % spec for spec in affected),
jsondata=dict(hhtype=dtid, src=pid))
ref.doctypes.append(dt)
ref.providers.append(provider)
for spec in affected:
lang = Languoid.get(spec['Affected Identifier'], key='hid', default=None)
if lang and lang not in ref.languages:
ref.languages.append(lang)
DBSession.add(ref)
transaction.commit()
transaction.begin()
matched = 0
near = 0
max_identifier_pk = DBSession.query(
Identifier.pk).order_by(desc(Identifier.pk)).first()[0]
families = []
for family in DBSession.query(Languoid)\
.filter(Languoid.level == LanguoidLevel.family)\
.filter(Language.active == True)\
.all():
isoleafs = set()
for row in DBSession.query(TreeClosureTable.child_pk, Languoid.hid)\
.filter(family.pk == TreeClosureTable.parent_pk)\
description=entry.fields.get('title') or entry.fields.get('booktitle'),
bibtex_type=btype)
ref = models.Ref(**kw)
DBSession.add(ref)
DBSession.flush()
reflangs, trigger = [], None
no_ca = [{'degruyter'}, {'benjamins'}]
provs = set()
for key in entry.fields['srctrickle'].split(','):
key = key.strip()
if key:
reflangs.extend(lgsources.get(key, []))
prov, key = key.split('#', 1)
provs.add(prov)
DBSession.add(models.Refprovider(
provider_pk=data['Provider'][prov].pk,
ref_pk=ref.pk,
id='{0}:{1}'.format(prov, key)))
if not reflangs:
reflangs, trigger = entry.languoids(lgcodes)
if trigger and ((provs in no_ca) or (reflangs)):
# Discard computerized assigned languoids for bibs where this does not make sense,
# or for bib entries that have been manually assigned in a Languoid's ini file.
reflangs, trigger = [], None
for lid in set(reflangs):
DBSession.add(
common.LanguageSource(
language_pk=data['Languoid'][lid].pk, source_pk=ref.pk, active=not bool(trigger)))
if trigger:
def add_language_codes(data, lang, isocode, glottocodes=None, glottocode=None):
def identifier(type_, id_):
return data.add(
common.Identifier, '%s:%s' % (type_, id_),
id='%s:%s' % (type_, id_),
name=id_,
type=getattr(common.IdentifierType, type_).value)
if isocode and len(isocode) == 3:
DBSession.add(common.LanguageIdentifier(
language=lang, identifier=identifier('iso', isocode)))
if glottocode or (glottocodes and isocode and isocode in glottocodes):
glottocode = glottocode or glottocodes[isocode]
DBSession.add(common.LanguageIdentifier(
language=lang, identifier=identifier('glottolog', glottocode)))
attrs[name] = enum.from_string(attrs[name])
l = languoids.get(attrs['pk'])
if l:
for k, v in attrs.items():
setattr(l, k, v)
#
# We do not assign ISO codes for existing languages, because it could be
# that the ISO code is now assigned to a family node, due to a change
# request, e.g. see https://github.com/clld/glottolog-data/issues/40
#
if len(l.hid or '') == 3 and not l.iso_code:
args.log.warn('Language with hid %s but no iso code!' % l.hid)
else:
l = Languoid(**attrs)
DBSession.add(l)
languoids[l.pk] = l
if len(attrs.get('hid', '')) == 3:
create_identifier(
None, l, name=attrs['hid'], type=IdentifierType.iso.value)
create_identifier(
gl_names.get(l.name),
l,
name=l.name,
description=gl_name.description,
type=gl_name.type)
if hname:
l.update_jsondata(hname=hname)
old_sl = {}
for pair in DBSession.query(common.LanguageSource):
old_sl[(pair.source_pk, pair.language_pk)] = True
references = list(references)
references.extend([
(common.ValueSetReference, 'valueset'),
(common.SentenceReference, 'sentence')])
sl = {}
for model, attr in references:
for ref in DBSession.query(model):
sl[(ref.source_pk, getattr(ref, attr).language_pk)] = True
for s, l in sl:
if (s, l) not in old_sl:
DBSession.add(common.LanguageSource(language_pk=l, source_pk=s))
def create_identifier(identifier, l, **kw):
global MAX_IDENTIFIER_PK
if identifier is None:
MAX_IDENTIFIER_PK += 1
DBSession.add(Identifier(pk=MAX_IDENTIFIER_PK, id=str(MAX_IDENTIFIER_PK), **kw))
pk = MAX_IDENTIFIER_PK
else:
pk = identifier.pk
DBSession.add(LanguageIdentifier(language_pk=l.pk, identifier_pk=pk))