How to use the lingpy.log.debug function in lingpy

To help you get started, we’ve selected a few lingpy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github lingpy / lingpy / lingpy / sequence / tokenizer.py View on Github external
# if no orthography profile is specified, simply return 
        # Unicode grapheme clusters, regex pattern "\X"
        if self.orthography_profile == None:
            return self.grapheme_clusters(string)

        parses = []
        for word in string.split():
            parse = getParse(self.root, word)

            # case where the parsing fails
            if len(parse) == 0:
                # replace characters in string but not in orthography profile with 
                parse = " "+self.find_missing_characters(self.characters(word))
                # write problematic stuff to standard error
                log.debug("The string '{0}' does not parse given the specified orthography profile {1}.\n".format(word, self.orthography_profile))
            
            parses.append(parse)

        # remove the outter word boundaries
        result = "".join(parses).replace("##", "#")
        result = result.rstrip("#")
        result = result.lstrip("#")
        return result.strip()
github lingpy / lingpy / lingpy / sequence / tokenizer.py View on Github external
tokens = line.split("\t") 
            grapheme = tokens[0].strip()

            # check for duplicates in the orthography profile (fail if dups)
            if not grapheme in self.op_graphemes:
                self.op_graphemes[grapheme] = 1
            else:
                raise Exception("You have a duplicate in your orthography profile.")

            if len(tokens) == 1:
                continue

            for i, token in enumerate(tokens):
                token = token.strip()
                self.mappings[grapheme, self.column_labels[i].lower()] = token
                log.debug('%s %s' % (grapheme, self.column_labels[i].lower()))

        # print the tree structure if debug mode is on
        if log.get_logger().getEffectiveLevel() <= logging.INFO:
            log.debug("A graphical representation of your orthography profile in a tree ('*' denotes sentinels):\n")
            printTree(self.root, "")
            print()
github lingpy / lingpy / lingpy / basic / ops.py View on Github external
def tsv2triple(wordlist, outfile=None):
    """
    Function converts a wordlist to a triple data structure.

    Notes
    -----
    The basic values of which the triples consist are:
      * ID (the ID in the TSV file)
      * COLUMN (the column in the TSV file)
      * VALUE (the entry in the TSV file)
    """
    tstore = []
    for head in wordlist.header:
        log.debug('tsv2triple: ' + head)
        for key in wordlist:
            tstore.append((key, head.upper(), wordlist[key, head]))

    if outfile:
        out = ''
        for a, b, c in tstore:
            if isinstance(c, list):
                c = ' '.join([text_type(x) for x in c])
            if c != '-':
                out += '{0}\t{1}\t{2}\n'.format(a, b, c)
        util.write_text_file(outfile, out, normalize='NFC')
    return tstore
github lingpy / lingpy / lingpy / sequence / tokenizer.py View on Github external
"""
        # if no orthography profile was initiated, this method can't be called
        # if self.orthography_profile == None:
        #    raise Exception("This function requires that an orthography profile is specified.")

        # if no orthography profile rules file has been specified, simply return the string
        if self.orthography_profile_rules == None:
            return string

        result = unicodedata.normalize("NFD", string)
        for i in range(0, len(self.op_rules)):
            match = self.op_rules[i].search(result)
            if match:
                result = re.sub(self.op_rules[i], self.op_replacements[i], result)
                log.debug("Input/output:"+"\t"+string+"\t"+result)
                log.debug("Pattern/replacement:"+"\t"+self.op_rules[i].pattern+"\t"+self.op_replacements[i])

        # this is incase someone introduces a non-NFD ordered sequence of characters
        # in the orthography profile
        result = unicodedata.normalize("NFD", result)
        return result
github lingpy / lingpy / lingpy / sequence / tokenizer.py View on Github external
"""
        # if no orthography profile was initiated, this method can't be called
        # if self.orthography_profile == None:
        #    raise Exception("This function requires that an orthography profile is specified.")

        # if no orthography profile rules file has been specified, simply return the string
        if self.orthography_profile_rules == None:
            return string

        result = unicodedata.normalize("NFD", string)
        for i in range(0, len(self.op_rules)):
            match = self.op_rules[i].search(result)
            if match:
                result = re.sub(self.op_rules[i], self.op_replacements[i], result)
                log.debug("Input/output:"+"\t"+string+"\t"+result)
                log.debug("Pattern/replacement:"+"\t"+self.op_rules[i].pattern+"\t"+self.op_replacements[i])

        # this is incase someone introduces a non-NFD ordered sequence of characters
        # in the orthography profile
        result = unicodedata.normalize("NFD", result)
        return result
github lingpy / lingpy / lingpy / basic / wordlist.py View on Github external
stmts += ["line[{0}] ".format(idx) + value]

            log.debug("calculated what should be excluded")

            # get the data
            out = {}
            for key, line in self._data.items():
                log.debug(key)

                if rows:
                    if eval(" and ".join(stmts)):
                        out[key] = [line[i] for i in indices]
                else:
                    out[key] = [line[i] for i in indices]

            log.debug("passing data to wl2qlc")
            return wl2qlc(header, out, **keywords)

        # output dst-format (phylip)
        if fileformat == 'dst':
            # check for distances as keyword
            if 'distances' not in self._meta:
                self._meta['distances'] = wl2dst(self, **keywords)

            out = matrix2dst(self._meta['distances'], self.taxa,
                    stamp=keywords['stamp'], taxlen=keywords.get('taxlen', 0))
            return _write_file(keywords['filename'], out, fileformat)

        # output tre-format (newick)
        if fileformat in ['tre', 'nwk']:  # ,'cluster','groups']:
            if 'tree' not in self._meta:
                # check for distances
github lingpy / lingpy / lingpy / basic / spreadsheet.py View on Github external
def _normalize(self):
        """ 
        Function to Unicode normalize (NFD) cells in the input matrix.
        """
        for i in range(0, len(self.matrix)):
            for j in range(0, len(self.matrix[i])):
                normalized_cell = unicodedata.normalize("NFD", self.matrix[i][j])
                if not normalized_cell == self.matrix[i][j]:
                    log.debug("Cell at <"+self.matrix[i][j]+"> ["+str(i)+","+str(j)+"] not in Unicode NFD. Normalizing.")
                    self.matrix[i][j] = normalized_cell
github lingpy / lingpy / lingpy / sequence / tokenizer.py View on Github external
if not grapheme in self.op_graphemes:
                self.op_graphemes[grapheme] = 1
            else:
                raise Exception("You have a duplicate in your orthography profile.")

            if len(tokens) == 1:
                continue

            for i, token in enumerate(tokens):
                token = token.strip()
                self.mappings[grapheme, self.column_labels[i].lower()] = token
                log.debug('%s %s' % (grapheme, self.column_labels[i].lower()))

        # print the tree structure if debug mode is on
        if log.get_logger().getEffectiveLevel() <= logging.INFO:
            log.debug("A graphical representation of your orthography profile in a tree ('*' denotes sentinels):\n")
            printTree(self.root, "")
            print()
github lingpy / lingpy / lingpy / read / qlc.py View on Github external
# then check for alms consisting only of gaps
    cols = misc.transpose(alm_clone)
    idxs = []
    for i, col in enumerate(cols):
        if set(col) == set('-'):
            idxs += [i]
    for idx in idxs[::-1]:
        for i, alm in enumerate(alm_clone):
            del alm_clone[i][idx]
    if alignment != alm_clone:
        lgtxt = 'Modified the alignment:\n'
        for i in range(len(alignment)):
            lgtxt += '[!] ' + ' '.join(alignment[i]) + '->'
            lgtxt += ' '.join(alm_clone[i]) + '\n'
        log.debug(lgtxt)
        return alm_clone
    else:
        return alignment