Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# get indices for header
indices = [self._header[x] for x in cols]
header = [c.upper() for c in cols]
else:
indices = [r for r in range(len(self.header))]
if rows:
stmts = []
for key, value in rows.items():
if key == 'ID':
stmts += ["key " + value]
else:
idx = self._header[key]
stmts += ["line[{0}] ".format(idx) + value]
log.debug("calculated what should be excluded")
# get the data
out = {}
for key, line in self._data.items():
log.debug(key)
if rows:
if eval(" and ".join(stmts)):
out[key] = [line[i] for i in indices]
else:
out[key] = [line[i] for i in indices]
log.debug("passing data to wl2qlc")
return wl2qlc(header, out, **keywords)
# output dst-format (phylip)
merge_vowels=rcParams['merge_vowels'],
model=rcParams['model'])
distances = []
for key, idxs in wordlist.get_etymdict(ref=ref).items():
# get only valid numbers for index-search
idx = [idx[0] for idx in idxs if idx != 0][0]
log.debug('{0}, {1}'.format(idx, idxs))
# get proto and consensus from wordlist
proto = wordlist[idx, gold]
consensus = wordlist[idx, test]
log.debug('{0}, {1}'.format(proto, consensus))
if tokens or classes:
proto = ipa2tokens(proto, **keywords)
consensus = ipa2tokens(consensus, **keywords)
if classes:
proto = tokens2class(proto, **keywords)
consensus = tokens2class(consensus, **keywords)
distances.append(edit_dist(proto, consensus, normalized=False))
med = sum(distances) / len(distances)
log.info('MEAN ED: {0:.2f}'.format(med))
return med
# check for duplicates in the orthography profile (fail if dups)
if not grapheme in self.op_graphemes:
self.op_graphemes[grapheme] = 1
else:
raise Exception("You have a duplicate in your orthography profile.")
if len(tokens) == 1:
continue
for i, token in enumerate(tokens):
token = token.strip()
self.mappings[grapheme, self.column_labels[i].lower()] = token
log.debug('%s %s' % (grapheme, self.column_labels[i].lower()))
# print the tree structure if debug mode is on
if log.get_logger().getEffectiveLevel() <= logging.INFO:
log.debug("A graphical representation of your orthography profile in a tree ('*' denotes sentinels):\n")
printTree(self.root, "")
print()
lingpy.data.model.Model
compile_dvt
"""
log.info("Compiling model <" + model + ">...")
# get the path to the models
new_path = lambda *cmps: os.path.join(path or util.data_path('models'), model, *cmps)
log.debug("Model-Path: %s" % new_path)
# load the sound classes
sound_classes = _import_sound_classes(new_path('converter'))
# dump the data
cache.dump(sound_classes, model + '.converter')
log.info("... successfully created the converter.")
# try to load the scoring function or the score tree
scorer = False
if os.path.isfile(new_path('matrix')):
scorer = read_scorer(new_path('matrix'))
elif os.path.isfile(new_path('scorer')):
score_tree = _import_score_tree(new_path('scorer'))
# calculate the scoring dictionary
score_dict = _make_scoring_dictionary(score_tree)
# make score_dict a ScoreDict instance
chars = sorted(set([s[0] for s in score_dict.keys()]))
matrix = [[0 for i in range(len(chars))] for j in
range(len(chars))]
"""
Module provides functions for the transformation of text data into visually appealing
format.
"""
from __future__ import unicode_literals, print_function, division
from lingpy.settings import rcParams
from lingpy import log
import numpy as np
import networkx as nx
try:
import matplotlib.pyplot as plt
import matplotlib as mpl
except:
log.missing_module('matplotlib')
plt, mpl = False, False
try:
import scipy.cluster.hierarchy as sch
except:
log.missing_module('scipy')
sch = False
from lingpy.thirdparty import cogent as cg
from lingpy.convert.tree import nwk2tree_matrix
from lingpy.convert.graph import gls2gml, radial_layout
def plot_gls(
gls,
treestring,
degree=90,
# get the two dictionaries
dictA, dictB = [wl.get_dict(col=tax, entry=ref) for tax in [taxA, taxB]]
# count amount of shared concepts
shared, missing = 0, 0
for concept in getattr(wl, concepts_attr):
if concept not in dictA or concept not in dictB:
missing += 1 if not ignore_missing else 0
elif [k for k in dictA[concept] if k in dictB[concept]]:
shared += 1
try:
return 1 - shared / (wl.height - missing)
except ZeroDivisionError:
log.get_logger().exception(
"Zero-division error encountered in '{0}' and '{1}'.".format(
taxA, taxB))
return 1.0
def __init__(self, seqs, **keywords):
self.log = log.get_logger()
# store input sequences, check whether tokens or strings are passed
if isinstance(seqs[0], (list, tuple)):
self.seqs = [' '.join(s) for s in seqs]
self.tokens = [s for s in seqs]
else:
self.seqs = seqs
self.tokens = []
# define a tokenizer function for convenience
kw = {
"diacritics": rcParams['diacritics'],
"vowels": rcParams['vowels'],
"tones": rcParams['tones'],
"combiners": rcParams['combiners'],
"breaks": rcParams['breaks'],
"stress": rcParams["stress"],
idx += 1
if not D[0]:
columns = list(s.keys())
D[0] = [c.lower() for c in columns]
D[idx] = [datatypes.get(
namespace.get(
column,
''),
lambda x: x)(
s.get(column, '')) for column in columns]
D[0] = [namespace.get(c, c) for c in columns]
if len(D[0]) != len(set(D[0])):
log.warning('|'.join(columns))
log.warning('|'.join(D[0]))
raise ValueError('name space clashes, cannot parse data')
# convert to wordlist and return
return cls(D, **kwargs)
else:
# For most LingPy applications, it might be best to see whether we got
# a Wordlist module.
raise ValueError("LingPy has no procedures for CLDF {:} data.".format(
dataset.module))
def _get_colexifications(wordlist, entry='ipa', concept='concept', family='family'):
"""
Helper function computes colexifications for a given set of languages in a
wordlist.
"""
if family not in wordlist.header:
family = 'doculect'
taxa = wordlist.cols
colexifications = []
for taxon in taxa:
log.info('Analyzing taxon {0}...'.format(taxon))
tmp_idxs = wordlist.get_list(taxon=taxon, flat=True)
tmp_family = wordlist[tmp_idxs[0], family]
tmp_concepts = wordlist.get_list(taxon=taxon, flat=True, entry=concept)
tmp_entries = wordlist.get_list(taxon=taxon, flat=True, entry=entry)
# iterate over all concepts and add them to the graph
for (i, c1), (j, c2) in combinations2(enumerate(tmp_concepts)):
if tmp_entries[i] == tmp_entries[j] and c1 != c2:
colexifications += [(c1, c2, taxon, tmp_family, tmp_entries[i])]
return colexifications