How to use the lingpy.util.setdefaults function in lingpy

To help you get started, we’ve selected a few lingpy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github lingpy / lingpy / lingpy / align / sca.py View on Github external
* "html" -- output of the multiple alignment in ``html``-format.

        filename : str
            Select a specific name for the outfile, otherwise, the name of
            the infile will be taken by default.

        sorted_seqs : bool
            Indicate whether the sequences should be sorted or not (applys only
            to 'msa' and 'msq' output.

        unique_seqs : bool
            Indicate whether only unique sequences should be written to file or
            not.

        """
        util.setdefaults(keywords, wordlist=False, timestamp=False)

        if fileformat in ['html', 'tex']:
            with util.TemporaryPath(suffix='.msa') as tmp:
                self.output(
                    fileformat='msa',
                    filename=os.path.splitext(tmp)[0],
                    sorted_seqs=sorted_seqs,
                    unique_seqs=unique_seqs)
                if 'filename' not in keywords:
                    keywords['input_file'] = os.path.split(self.infile)[1]
                    keywords['filename'] = filename

                getattr(html, 'msa2' + fileformat)(tmp, **keywords)
                return

        # create a specific format string in order to receive taxa of equal length
github lingpy / lingpy / lingpy / basic / wordlist.py View on Github external
def _output(self, fileformat, **keywords):
        """
        Internal function that eases its modification by daughter classes.
        """
        # check for stamp attribute
        keywords["stamp"] = getattr(self, '_stamp', '')

        # add the default parameters, they will be checked against the keywords
        util.setdefaults(
            keywords,
            cols=False,
            distances=False,
            entries=("concept", "counterpart"),
            entry='concept',
            fileformat=fileformat,
            filename=rcParams['filename'],
            formatter='concept',
            modify_ref=False,
            meta=self._meta,
            missing=0,
            prettify='false',
            ignore='all',
            ref='cogid',
            rows=False,
            subset=False,  # setup a subset of the data,
github lingpy / lingpy / lingpy / align / multiple.py View on Github external
gap_weight : float (default=0)
            The factor by which gaps in aligned columns contribute to the
            calculation of the column score. When set to 0, gaps will be
            ignored in the calculation. When set to 0.5, gaps will count half
            as much as other characters.

        restricted_chars : string (default="T")
            Define which characters of the prosodic string of a sequence
            reflect its secondary structure (cf. :evobib:`List2012b`) and
            should therefore be aligned specifically. This defaults to "T",
            since this is the character that represents tones in the prosodic
            strings of sequences.

        """
        setdefaults(
            keywords,
            new_calc=True,
            model=rcParams['sca'],
            mode='global',
            gop=-3,
            scale=0.5,
            factor=1,
            restricted_chars='T_',
            classes=True,
            sonar=True,
            scorer={})

        if keywords['new_calc']:
            # define the class model
            self._set_model(
                keywords['model'],
github lingpy / lingpy / lingpy / align / sca.py View on Github external
A sound class model according to which the IPA strings shall be
        converted to sound-class strings.
    local : { c{bool}, "peaks", "gaps" }(default=False)
        Specify whether local pre-processing should be applied to the data. If
        set to c{peaks}, the average alignment score of each column is taken as
        reference to remove low-scoring columns from the alignment. If set to
        "gaps", the columns with the highest proportion of gaps will be
        excluded.

    Returns
    -------
    cons : c{str}
        A consensus string of the given MSA.
    """

    util.setdefaults(
        keywords,
        model=rcParams['sca'],
        stress=rcParams['stress'],
        cldf=False,
        diacritics=rcParams['diacritics'],
        gap_scale=1.0,
        mode='majority',
        gap_score=-10,
        weights=[1 for i in range(len(msa[0]))],
        local=False)

    # transform the matrix
    matrix = misc.transpose(getattr(msa, 'alm_matrix', msa))
    
    # custom function for tokens2class
    tk2k = lambda x: token2class(x, keywords['model'], cldf=keywords['cldf'],
github lingpy / lingpy / lingpy / align / sca.py View on Github external
tree : {c{str} ~lingpy.thirdparty.cogent.PhyloNode}
            A tree object or a Newick string along which the consensus shall be
            calculated.
        gaps : c{bool} (default=False)
            If set to c{True}, return the gap positions in the consensus.
        classes : c{bool} (default=False)
            Specify whether sound classes shall be used to calculate the consensus.
        model : ~lingpy.data.model.Model
            A sound class model according to which the IPA strings shall be
            converted to sound-class strings.
        return_data : c{bool} (default=False)
            Return the data instead of adding it in a column to the wordlist
            object.

        """
        util.setdefaults(
            keywords, model=rcParams['sca'], gap_scale=1.0,
            ref=rcParams['ref'], stress=rcParams['stress'],
            diacritics=rcParams['diacritics'], cldf=False)

        # switch ref
        if keywords['ref'] != rcParams['ref']:
            rcParams['ref'] = keywords['ref']

        # reassing ref for convenience
        ref = keywords['ref']

        # check for existing alignments
        test = list(self.msa[ref].keys())[0]
        if 'alignment' not in self.msa[ref][test]:
            log.error(
                "No alignments could be found. You should carry out"
github lingpy / lingpy / lingpy / sequence / sound_classes.py View on Github external
def check_tokens(tokens, **keywords):
    """
    Function checks whether tokens are given in a consistent input format.
    """
    setdefaults(keywords, stress=rcParams['stress'],
            diacritics=rcParams['diacritics'], cldf=False)
    errors = []
    for i, token in enumerate(tokens):
        # check for conversion within the articulation-model
        cls = token2class(token, rcParams['art'], stress=keywords['stress'],
                cldf=keywords['cldf'], diacritics=keywords['diacritics'])
        if cls == '0':
            errors.append((i, token))

    return errors
github lingpy / lingpy / lingpy / evaluate / apa.py View on Github external
def diff(self, **keywords):
        """
        Write all differences between two sets to a file.

        Parameters
        ----------

        filename : str (default='eval_psa_diff')
            Default

        """
        setdefaults(keywords, filename=self.gold.infile)
        if not keywords['filename'].endswith('.diff'):
            keywords['filename'] = keywords['filename'] + '.diff'

        out = []
        for i, (a, b) in enumerate(zip(self.gold.alignments, self.test.alignments)):
            g1, g2, g3 = a
            t1, t2, t3 = b
            maxL = max([len(g1), len(t1)])
            if g1 != t1 or g2 != t2:
                taxA, taxB = self.gold.taxa[i]
                taxlen = max(len(taxA), len(taxB))
                seq_id = self.gold.seq_ids[i]
                out.append('{0}\n{1}\t{2}\n{3}\t{4}\n{5}\n{1}\t{6}\n{3}\t{7}\n\n'.format(
                    seq_id,
                    taxA,
                    '\t'.join(g1),
github lingpy / lingpy / lingpy / convert / html.py View on Github external
def psa2html(infile, **kw):
    """
    Function converts a PSA-file into colored html-format.
    """
    util.setdefaults(
        kw,
        template=False,
        css=False,
        comment='#',
        filename=infile[:-4]+'.html',
        compact=True)

    template = util.read_text_file(kw['template'] or template_path('psa.html'))
    css = util.read_text_file(kw['css'] or template_path('psa.css'))

    data = []
    for line in util.read_text_file(infile, lines=True):
        if not line.startswith(kw['comment']):
            data.append(line)

    seq_ids = []
github lingpy / lingpy / lingpy / basic / wordlist.py View on Github external
h1=('concept', r'\section{{Concept: ``{0}"}}' + '\n'),
                    h2=('cogid', r'\subsection{{Cognate Set: ``{0}"}}' + '\n'))
            elif fileformat == 'html':
                sections = dict(
                    h1=('concept', '<h1>Concept: {0}</h1>'),
                    h2=('cogid', '<h2>Cognate Set: {0}</h2>'))

        if not entries:
            if fileformat == 'txt':
                entries = [('language', '{0} '), ('ipa', '{0}\n')]
            elif fileformat == 'tex':
                entries = [('language', '{0} '), ('ipa', '[{0}]' + '\n')]
            elif fileformat == 'html':
                entries = [('language', '{0}&nbsp;'), ('ipa', '[{0}]\n')]

        util.setdefaults(keywords, filename=rcParams['filename'])

        # get the temporary dictionary
        out = wl2dict(self, sections, entries, exclude)

        # assign the output string
        out_string = ''

        # iterate over the dictionary and start to fill the string
        for key in sorted(out, key=lambda x: str(x).lower()):
            # write key to file
            out_string += key[1]

            # reassign tmp
            tmp = out[key]

            # set the pointer and the index