How to use the lingpy.log.info function in lingpy

To help you get started, we’ve selected a few lingpy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github lingpy / lingpy / lingpy / compare / _phylogeny / utils.py View on Github external
tracer[char][2] += 1
                        bor += 1

            states[name] = [ret, inn, bor]

    # calculate the scores
    ret = sum([c[0] for c in tracer.values()])
    inn = sum([c[1] for c in tracer.values()])
    tra = sum([c[2] for c in tracer.values()])

    ipn = inn / len(acs)
    tpn = tra / len(acs)

    total2 = ipn + tpn

    log.info("Innovations: {0}, {1:.2f}, {2:.2f}".format(inn, ipn, ipn / total2))
    log.info("Transferred: {0}, {1:.2f}, {2:.2f}".format(tra, tpn, tpn / total2))

    if return_dists:
        leaves = []
        nodes = []
        for node in [n for n in tree.getNodeNames() if n != 'root']:
            innovations = states[node][1] + states[node][2]
            if node in tree.taxa:
                leaves += [innovations]
            else:
                nodes += [innovations]

        # evaluate using mwu
        p, r = sps.mstats.kruskalwallis(leaves, nodes)

        return p, r
github lingpy / lingpy / lingpy / compare / lexstat.py View on Github external
tasks = (self.width ** 2) / 2

        if method == 'markov':
            seqs, pros, weights = {}, {}, {}

            # get a random distribution for all pairs
            sample = random.sample(
                [(i, j) for i in range(kw['rands']) for j in
                    range(kw['rands'])], kw['runs'])

            with util.pb(
                    desc='SEQUENCE GENERATION',
                    total=len(self.cols)) as progress:
                for i, taxon in enumerate(self.cols):
                    progress.update(1)
                    log.info("Analyzing taxon {0}.".format(taxon))
                    tokens = self.get_list(col=taxon, entry="tokens", flat=True)
                    prostrings = self.get_list(
                            col=taxon, entry=self._prostrings, flat=True)
                    m = MCPhon(tokens, True, prostrings)
                    words = []
                    j, k = 0, 0
                    while j < kw['rands']:
                        s = m.get_string(new=False)
                        if s in words:
                            k += 1
                            if k > kw['limit']:
                                break
                        else:
                            j += 1
                            words += [s]
                    if len(words) < kw['rands']:
github lingpy / lingpy / lingpy / basic / spreadsheet.py View on Github external
self.sep,
            strip_lines = False # this is of crucial importance, otherwise
            )

        # columns that have language data
        language_indices = []
        concept_id = 0

        # first row must be the header in the input; TODO: add more functionality
        header = spreadsheet[0] 

        log.info('%s' % header[0:10])

        for i, cell in enumerate(header):
            cell = cell.strip()
            log.info('%s' % cell)
            if cell == self.meanings:
                concept_id = i
            if self.language_id in cell:
                language_indices.append(i)

        matrix_header = []
        matrix_header.append(header[concept_id])        
        for i in language_indices:
            matrix_header.append(header[i].replace(self.language_id, "").strip())
        self.matrix.append(matrix_header)

        # append the concepts and words in languages and append the rows (skip header row)
        for i in range(1, len(spreadsheet)):
            matrix_row = []
            # if the concept cell is empty skip if flagged
            if spreadsheet[i][concept_id] == "" and self.skip_empty_concepts:
github lingpy / lingpy / lingpy / compare / lexstat.py View on Github external
This is an iterator object and it yields the indices of a given
        concept, the matrix, and the concept.
        """
        # currently, there are no defaults XXX
        kw = dict(
            defaults=False,
            external_scorer=False,  # external scoring function
        )
        kw.update(keywords)
        function = self._distance_method(
                method, scale=scale, factor=factor,
                restricted_chars=restricted_chars, mode=mode, gop=gop,
                restriction=restriction, external_scorer=kw['external_scorer'])
        concepts = [concept] if concept else sorted(self.rows)
        for c in concepts:
            log.info("Analyzing words for concept <{0}>.".format(c))
            indices = self.get_list(row=c, flat=True)
            matrix = []
            for idxA, idxB in util.combinations2(indices):
                try:
                    d = function(idxA, idxB)
                except ZeroDivisionError:
                    log.warning(
                        "Encountered Zero-Division for the comparison of "
                        "{0} ({2}) and {1} ({3})".format(
                            ''.join(self[idxA, self._segments]),
                            ''.join(self[idxB, self._segments]),
                            idxA, idxB
                            ))
                    d = 100
                matrix += [d]
            matrix = misc.squareform(matrix)
github lingpy / lingpy / lingpy / data / derive.py View on Github external
The information in such a file is automatically converted into a
        scoring dictionary (see :evobib:`List2012b` for details).

    Based on the information provided by the files, a dictionary for the
    conversion of IPA-characters to sound classes and a scoring dictionary are
    created and stored as a binary.  The model can be loaded with help of the
    :py:class:`~lingpy.data.model.Model` class and used in the various classes
    and functions provided by the library.
    
    See also
    --------
    lingpy.data.model.Model
    compile_dvt

    """
    log.info("Compiling model <" + model + ">...")
    # get the path to the models
    new_path = lambda *cmps: os.path.join(path or util.data_path('models'), model, *cmps)

    log.debug("Model-Path: %s" % new_path)

    # load the sound classes
    sound_classes = _import_sound_classes(new_path('converter'))

    # dump the data
    cache.dump(sound_classes, model + '.converter')
    log.info("... successfully created the converter.")

    # try to load the scoring function or the score tree
    scorer = False

    if os.path.isfile(new_path('matrix')):
github lingpy / lingpy / lingpy / basic / ops.py View on Github external
elif data in ['groups', 'cluster']:
        if 'distances' not in wordlist._meta:
            distances = wl2dst(wordlist, taxa, concepts, ref, **keywords)
        else:
            distances = wordlist._meta['distances']
        if 'groups' in wordlist._meta and not keywords['force']:
            logger.warn(
                    "Distance matrix has already been calculated, "
                    "force overwrite by "
                    "setting 'force' to 'True'.")
            return
        wordlist._meta['groups'] = clustering.matrix2groups(
            keywords['threshold'], distances, these_taxa,
            keywords['cluster_method'])
    log.info("Successfully calculated {0}.".format(data))
github lingpy / lingpy / lingpy / data / derive.py View on Github external
:py:obj:`rcParams['vowels']`, :py:obj:`rcParams['diacritics']`, and
    :py:obj:`rcParams['tones']`. Their core purpose is to guide the
    tokenization of IPA strings (cf.
    :py:func:`~lingpy.sequence.sound_classes.ipa2tokens`). In order to change the
    variables, one simply has to change the text files :file:`diacritics`,
    :file:`tones`, and
    :file:`vowels` in the :file:`data/models/dv` directory. The structure of
    these files is fairly simple: Each line contains a vowel or a diacritic
    character, whereas diacritics are preceded by a dash.
    
    See also
    --------
    lingpy.data.model.Model
    lingpy.data.derive.compile_model
    """
    log.info("Compiling diacritics and vowels...")

    # get the path to the models
    if not path:
        file_path = util.data_path('models', 'dvt')
    elif path in ['evolaemp', 'el']:
        file_path = util.data_path('models', 'dvt_el')
    else:
        file_path = path

    def _read_string(name):
        # normalize stuff
        # TODO: this is potentially dangerous and it is important to decide whether
        # TODO: switching to NFD might not be a better choice
        return util.read_text_file(
            os.path.join(file_path, name), normalize='NFC').replace('\n', '')
github lingpy / lingpy / lingpy / read / starling.py View on Github external
"""
    Converts a file directly output from starling to LingPy-QLC format.
    """
    cleant = clean_taxnames or identity
    data = csv2list(filename)

    # check for strange chars in data due to notepad errors
    data[0][0] = data[0][0].replace('\ufeff', '')

    # get the header
    header = data[0]

    # debugging
    if debug:
        error = False
        log.info("Header line has length {0}.".format(len(header)))
        for line in data[1:]:
            if len(line) != len(header):  # pragma: no cover
                log.error("Error for item {0} with length {1}, expected {2}.".format(
                    '/'.join(line[0:2]), len(line), len(header)))
                error = True
        if error:  # pragma: no cover
            log.error("Errors were found, aborting function call.")
            return
        else:
            log.info("Everything went fine, carrying on with function call.")

    # determine language names in header
    taxa = []
    for i in range(len(header) - 1):
        prev = header[i]
        post = header[i + 1]
github lingpy / lingpy / lingpy / align / sca.py View on Github external
Function creates confidence scores for a given set of alignments.

        Parameters
        ----------
        scorer : :py:class:`~lingpy.algorithm._misc.ScoreDict`
            A *ScoreDict* object which gives similarity scores for all segments in
            the alignment.
        ref : str (default="lexstatid")
            The reference entry-type, referring to the cognate-set to be used for
            the analysis.
        gap_weight : {loat} (default=1.0)
            Determine the weight assigned to matches containing gaps.

        """
        corrs = confidence.get_confidence(self, scorer, ref, gap_weight)
        log.info("Successfully calculated confidence values for alignments.")
        return corrs