How to use the lingpy.log function in lingpy

To help you get started, we’ve selected a few lingpy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github lingpy / lingpy / lingpy / basic / wordlist.py View on Github external
# get indices for header
                indices = [self._header[x] for x in cols]
                header = [c.upper() for c in cols]
            else:
                indices = [r for r in range(len(self.header))]

            if rows:
                stmts = []
                for key, value in rows.items():
                    if key == 'ID':
                        stmts += ["key " + value]
                    else:
                        idx = self._header[key]
                        stmts += ["line[{0}] ".format(idx) + value]

            log.debug("calculated what should be excluded")

            # get the data
            out = {}
            for key, line in self._data.items():
                log.debug(key)

                if rows:
                    if eval(" and ".join(stmts)):
                        out[key] = [line[i] for i in indices]
                else:
                    out[key] = [line[i] for i in indices]

            log.debug("passing data to wl2qlc")
            return wl2qlc(header, out, **keywords)

        # output dst-format (phylip)
github lingpy / lingpy / lingpy / evaluate / alr.py View on Github external
merge_vowels=rcParams['merge_vowels'],
        model=rcParams['model'])

    distances = []

    for key, idxs in wordlist.get_etymdict(ref=ref).items():
        # get only valid numbers for index-search
        idx = [idx[0] for idx in idxs if idx != 0][0]

        log.debug('{0}, {1}'.format(idx, idxs))

        # get proto and consensus from wordlist
        proto = wordlist[idx, gold]
        consensus = wordlist[idx, test]

        log.debug('{0}, {1}'.format(proto, consensus))

        if tokens or classes:
            proto = ipa2tokens(proto, **keywords)
            consensus = ipa2tokens(consensus, **keywords)

            if classes:
                proto = tokens2class(proto, **keywords)
                consensus = tokens2class(consensus, **keywords)

        distances.append(edit_dist(proto, consensus, normalized=False))

    med = sum(distances) / len(distances)
    log.info('MEAN ED: {0:.2f}'.format(med))
    return med
github lingpy / lingpy / lingpy / sequence / tokenizer.py View on Github external
# check for duplicates in the orthography profile (fail if dups)
            if not grapheme in self.op_graphemes:
                self.op_graphemes[grapheme] = 1
            else:
                raise Exception("You have a duplicate in your orthography profile.")

            if len(tokens) == 1:
                continue

            for i, token in enumerate(tokens):
                token = token.strip()
                self.mappings[grapheme, self.column_labels[i].lower()] = token
                log.debug('%s %s' % (grapheme, self.column_labels[i].lower()))

        # print the tree structure if debug mode is on
        if log.get_logger().getEffectiveLevel() <= logging.INFO:
            log.debug("A graphical representation of your orthography profile in a tree ('*' denotes sentinels):\n")
            printTree(self.root, "")
            print()
github lingpy / lingpy / lingpy / data / derive.py View on Github external
lingpy.data.model.Model
    compile_dvt

    """
    log.info("Compiling model <" + model + ">...")
    # get the path to the models
    new_path = lambda *cmps: os.path.join(path or util.data_path('models'), model, *cmps)

    log.debug("Model-Path: %s" % new_path)

    # load the sound classes
    sound_classes = _import_sound_classes(new_path('converter'))

    # dump the data
    cache.dump(sound_classes, model + '.converter')
    log.info("... successfully created the converter.")

    # try to load the scoring function or the score tree
    scorer = False

    if os.path.isfile(new_path('matrix')):
        scorer = read_scorer(new_path('matrix'))
    elif os.path.isfile(new_path('scorer')):
        score_tree = _import_score_tree(new_path('scorer'))

        # calculate the scoring dictionary
        score_dict = _make_scoring_dictionary(score_tree)

        # make score_dict a ScoreDict instance
        chars = sorted(set([s[0] for s in score_dict.keys()]))
        matrix = [[0 for i in range(len(chars))] for j in
                  range(len(chars))]
github lingpy / lingpy / lingpy / convert / plot.py View on Github external
"""
Module provides functions for the transformation of text data into visually appealing
format.
"""
from __future__ import unicode_literals, print_function, division

from lingpy.settings import rcParams
from lingpy import log

import numpy as np
import networkx as nx
try:
    import matplotlib.pyplot as plt
    import matplotlib as mpl
except:
    log.missing_module('matplotlib')
    plt, mpl = False, False

try:
    import scipy.cluster.hierarchy as sch
except:
    log.missing_module('scipy')
    sch = False

from lingpy.thirdparty import cogent as cg
from lingpy.convert.tree import nwk2tree_matrix
from lingpy.convert.graph import gls2gml, radial_layout

def plot_gls(
    gls,
    treestring,
    degree=90,
github lingpy / lingpy / lingpy / basic / ops.py View on Github external
# get the two dictionaries
    dictA, dictB = [wl.get_dict(col=tax, entry=ref) for tax in [taxA, taxB]]

    # count amount of shared concepts
    shared, missing = 0, 0

    for concept in getattr(wl, concepts_attr):
        if concept not in dictA or concept not in dictB:
            missing += 1 if not ignore_missing else 0
        elif [k for k in dictA[concept] if k in dictB[concept]]:
            shared += 1

    try:
        return 1 - shared / (wl.height - missing)
    except ZeroDivisionError:
        log.get_logger().exception(
            "Zero-division error encountered in '{0}' and '{1}'.".format(
                taxA, taxB))
        return 1.0
github lingpy / lingpy / lingpy / align / multiple.py View on Github external
def __init__(self, seqs, **keywords):
        self.log = log.get_logger()
        # store input sequences, check whether tokens or strings are passed
        if isinstance(seqs[0], (list, tuple)):
            self.seqs = [' '.join(s) for s in seqs]
            self.tokens = [s for s in seqs]
        else:
            self.seqs = seqs
            self.tokens = []

        # define a tokenizer function for convenience
        kw = {
            "diacritics": rcParams['diacritics'],
            "vowels": rcParams['vowels'],
            "tones": rcParams['tones'],
            "combiners": rcParams['combiners'],
            "breaks": rcParams['breaks'],
            "stress": rcParams["stress"],
github lingpy / lingpy / lingpy / basic / wordlist.py View on Github external
idx += 1

                if not D[0]:
                    columns = list(s.keys())
                    D[0] = [c.lower() for c in columns]

                D[idx] = [datatypes.get(
                    namespace.get(
                        column,
                        ''),
                    lambda x: x)(
                    s.get(column, '')) for column in columns]
            D[0] = [namespace.get(c, c) for c in columns]
            if len(D[0]) != len(set(D[0])):
                log.warning('|'.join(columns))
                log.warning('|'.join(D[0]))
                raise ValueError('name space clashes, cannot parse data')

            # convert to wordlist and return
            return cls(D, **kwargs)
        else:
            # For most LingPy applications, it might be best to see whether we got
            # a Wordlist module.
            raise ValueError("LingPy has no procedures for CLDF {:} data.".format(
                dataset.module))
github lingpy / lingpy / lingpy / meaning / colexification.py View on Github external
def _get_colexifications(wordlist, entry='ipa', concept='concept', family='family'):
    """
    Helper function computes colexifications for a given set of languages in a
    wordlist.
    """
    if family not in wordlist.header:
        family = 'doculect'

    taxa = wordlist.cols
    colexifications = []
    for taxon in taxa:
        log.info('Analyzing taxon {0}...'.format(taxon))

        tmp_idxs = wordlist.get_list(taxon=taxon, flat=True)
        tmp_family = wordlist[tmp_idxs[0], family]
        tmp_concepts = wordlist.get_list(taxon=taxon, flat=True, entry=concept)
        tmp_entries = wordlist.get_list(taxon=taxon, flat=True, entry=entry)

        # iterate over all concepts and add them to the graph
        for (i, c1), (j, c2) in combinations2(enumerate(tmp_concepts)):
            if tmp_entries[i] == tmp_entries[j] and c1 != c2:
                colexifications += [(c1, c2, taxon, tmp_family, tmp_entries[i])]

    return colexifications