How to use the lingpy.sequence.sound_classes.tokens2class function in lingpy

To help you get started, we’ve selected a few lingpy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github lingpy / lingpy / lingpy / compare / lexstat.py View on Github external
if k > kw['limit']:
                                break
                        else:
                            j += 1
                            words += [s]
                    if len(words) < kw['rands']:
                        log.warning(
                                "Could not generate enough distinct words for"
                                " the random distribution. "
                                "Will expand automatically")
                        while len(words) < kw['rands']:
                            words += [words[random.randint(0, len(words)-1)]]

                    seqs[taxon], pros[taxon], weights[taxon] = [], [], []
                    for w in words:
                        cls = tokens2class(w.split(' '), self.model,
                                cldf=self._cldf)
                        pros[taxon].append(prosodic_string(w.split(' ')))
                        weights[taxon].append(prosodic_weights(pros[taxon][-1]))
                        seqs[taxon].append([
                            '{0}.{1}'.format(c, p) for c, p in zip(
                                 cls,
                                 [self._transform[pr] for pr in pros[taxon][-1]]
                                 )])

            with util.pb(
                    desc='RANDOM CORRESPONDENCE CALCULATION',
                    total=tasks) as progress:
                for (i, tA), (j, tB) in util.multicombinations2(
                        enumerate(self.cols)):
                    progress.update(1)
                    log.info(
github lingpy / lingpy / lingpy / compare / lexstat.py View on Github external
                lambda x: ''.join(tokens2class(x, kw["model"], cldf=self._cldf,
                    stress=rcParams['stress'])))
        # create IDs for the languages
github lingpy / lingpy / lingpy / sequence / generate.py View on Github external
# start filling the dictionary
        for i, w in enumerate(words):

            # check for tokenized string
            if not tokens:
                tk = ipa2tokens(w, **keywords)
            else:
                tk = w[:]
            self.tokens += [tk]

            # create prosodic string
            if prostrings:
                p = prostrings[i]
            else:
                tt = tokens2class(tk, rcParams['art'])
                p = prosodic_string(
                        tk, 
                        rcParams['art'],
                        cldf=keywords['cldf'],
                        diacritics=keywords['diacritics'],
                        stress=keywords['stress'])
            # create classes
            if classes:
                c = tokens2class(tk, class_model, cldf=keywords['cldf'],
                        diacritics=keywords['diacritics'],
                        stress=keywords['stress'])
                bigrams = list(zip(p, c))
                self.classes += [c]
            else:
                # zip the stuff
                bigrams = list(zip(p, tk))
github lingpy / lingpy / lingpy / align / multiple.py View on Github external
            classify = lambda x: tokens2class(x, self.model)
github lingpy / lingpy / lingpy / align / sca.py View on Github external
if cog in self.msa[ref]:
                    log.debug("Analyzing cognate set number '{0}'...".format(cog))

                    # temporary solution for sound-class integration
                    if classes == True:
                        _classes = []
                        if weights:
                            keywords['weights'] = prosodic_weights(
                                prosodic_string(self.msa[ref][cog]['_sonority_consensus'])
                            )
                        else:
                            keywords['weights'] = [
                                1.0 for i in range(len(self.msa[ref][cog]['alignment']))]

                        for alm in self.msa[ref][cog]['alignment']:
                            cls = [c for c in tokens2class(
                                alm,
                                keywords['model'], 
                                stress=keywords['stress'],
                                cldf=keywords['cldf'],
                                diacritics=keywords['diacritics']
                            ) if c != '0']
                            cls = class2tokens(cls, alm)
                            _classes.append(cls)
                        _classes = misc.transpose(_classes)
                    else:
                        _classes = classes

                    cons = get_consensus(
                        self.msa[ref][cog]['alignment'],
                        classes=_classes,
                        tree=tree,
github lingpy / lingpy / lingpy / align / pairwise.py View on Github external
lambda x: (
                tokens2class(x[0], self.model, stress=keywords['stress']),
                tokens2class(x[1], self.model, stress=keywords['stress'])),
            self.tokens
github lingpy / lingpy / lingpy / align / pairwise.py View on Github external
cognacy : {0, 1}
        The cognacy assertion which is either 0 (words are probably cognate) or
        1 (words are not likely to be cognate).

    """
    if text_type(model) == model:
        model = rcParams[model]
    elif not hasattr(model, 'info'):
        raise ValueError("[!] No valid model instance selected.")

    if isinstance(seqA, string_types):
        seqA = ipa2tokens(seqA)
        seqB = ipa2tokens(seqB)

    classA = tokens2class(seqA, model)
    classB = tokens2class(seqB, model)

    if classA[0] in model.vowels:
        classA[0] = 'H'
    if classB[0] in model.vowels:
        classB[0] = 'H'

    return int(''.join([k for k in classA if k not in model.vowels])[:2] !=
               ''.join([k for k in classB if k not in model.vowels])[:2])
github lingpy / lingpy / lingpy / align / multiple.py View on Github external
                map(lambda x: [int(t) for t in tokens2class(
                    x, rcParams['art'], stress=rcParams['stress'])],
                    [self.tokens[key] for key in keys]))