How to use the lingpy.sequence.sound_classes.prosodic_string function in lingpy

To help you get started, we’ve selected a few lingpy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github lingpy / lingpy / lingpy / compare / lexstat.py View on Github external
else:
                            j += 1
                            words += [s]
                    if len(words) < kw['rands']:
                        log.warning(
                                "Could not generate enough distinct words for"
                                " the random distribution. "
                                "Will expand automatically")
                        while len(words) < kw['rands']:
                            words += [words[random.randint(0, len(words)-1)]]

                    seqs[taxon], pros[taxon], weights[taxon] = [], [], []
                    for w in words:
                        cls = tokens2class(w.split(' '), self.model,
                                cldf=self._cldf)
                        pros[taxon].append(prosodic_string(w.split(' ')))
                        weights[taxon].append(prosodic_weights(pros[taxon][-1]))
                        seqs[taxon].append([
                            '{0}.{1}'.format(c, p) for c, p in zip(
                                 cls,
                                 [self._transform[pr] for pr in pros[taxon][-1]]
                                 )])

            with util.pb(
                    desc='RANDOM CORRESPONDENCE CALCULATION',
                    total=tasks) as progress:
                for (i, tA), (j, tB) in util.multicombinations2(
                        enumerate(self.cols)):
                    progress.update(1)
                    log.info(
                        "Calculating random alignments"
                        " for pair {0}/{1}.".format(tA, tB)
github lingpy / lingpy / lingpy / align / multiple.py View on Github external
self.log.warning("There are empty segments in the consensus.")
                self.log.info(
                    '',
                    extra=dict(lines=[' '.join([str(x) for x in cons])
                                      for cons in [consA, consB]]))
            except:
                self.log.error(
                    "Failed to compute the consensus string.",
                    extra=dict(lines=[
                        sonarA, sonarB,
                        almsA[0], [self._get(n_, 'tokens') for n_ in almsA[0]],
                        almsB[0], [self._get(n_, 'tokens') for n_ in almsB[0]]
                    ]))

        prosA = prosodic_string(consA)
        prosB = prosodic_string(consB)

        self.log.debug('', extra=dict(lines=[(prosA, consA), (prosB, consB)]))
        weightsA, weightsB = prosodic_weights(prosA), prosodic_weights(prosB)

        # carry out the alignment
        almA, almB, sim = calign.align_profile(
            profileA,
            profileB,
            weightsA,
            weightsB,
            prosA,
            prosB,
            gop,
            scale,
            factor,
            self.scorer,
github lingpy / lingpy / lingpy / align / sca.py View on Github external
# go on with the analysis
        cons_dict = {}
        with util.pb(desc='CONSENSUS', total=len(self.etd[ref])) as progress:
            for cog in self.etd[ref]:
                progress.update(1)

                if cog in self.msa[ref]:
                    log.debug("Analyzing cognate set number '{0}'...".format(cog))

                    # temporary solution for sound-class integration
                    if classes == True:
                        _classes = []
                        if weights:
                            keywords['weights'] = prosodic_weights(
                                prosodic_string(self.msa[ref][cog]['_sonority_consensus'])
                            )
                        else:
                            keywords['weights'] = [
                                1.0 for i in range(len(self.msa[ref][cog]['alignment']))]

                        for alm in self.msa[ref][cog]['alignment']:
                            cls = [c for c in tokens2class(
                                alm,
                                keywords['model'], 
                                stress=keywords['stress'],
                                cldf=keywords['cldf'],
                                diacritics=keywords['diacritics']
                            ) if c != '0']
                            cls = class2tokens(cls, alm)
                            _classes.append(cls)
                        _classes = misc.transpose(_classes)
github lingpy / lingpy / lingpy / align / multiple.py View on Github external
if sonar and sonars:  # == list:
            self._sonars = [sonars[key] for key in keys]
            self._prostrings = list([prosodic_string(s) for s in self._sonars])
        # create sonars if the argument is true
        elif sonar:
            self._sonars = list(
                map(lambda x: [int(t) for t in tokens2class(
                    x, rcParams['art'], stress=rcParams['stress'])],
                    [self.tokens[key] for key in keys]))
            if log.get_level() <= logging.DEBUG:
                for _i, _sonar in enumerate(self._sonars):
                    if 0 in _sonar:
                        self.log.warning(
                            "Sequence {0} contains unrecognized characters!".format(
                                self.seqs[self.int2ext[_i][0]]))
            self._prostrings = list([prosodic_string(s) for s in self._sonars])
        # do nothing if no arguments are passed
        else:
            self._sonars = False
            self._prostrings = False

        # create a scoredict for the calculation of alignment analyses
        # append the scorer if it is given with the model
        def scorer(x, y):
            if classes:
                return self.model.scorer[x, y]
            if scoredict:
                return scoredict[x, y]
            return 1.0 if x == y else -1.0

        self.scoredict = {}
        for (i, seqA), (j, seqB) in combinations_with_replacement(
github lingpy / lingpy / lingpy / sequence / generate.py View on Github external
# start filling the dictionary
        for i, w in enumerate(words):

            # check for tokenized string
            if not tokens:
                tk = ipa2tokens(w, **keywords)
            else:
                tk = w[:]
            self.tokens += [tk]

            # create prosodic string
            if prostrings:
                p = prostrings[i]
            else:
                tt = tokens2class(tk, rcParams['art'])
                p = prosodic_string(
                        tk, 
                        rcParams['art'],
                        cldf=keywords['cldf'],
                        diacritics=keywords['diacritics'],
                        stress=keywords['stress'])
            # create classes
            if classes:
                c = tokens2class(tk, class_model, cldf=keywords['cldf'],
                        diacritics=keywords['diacritics'],
                        stress=keywords['stress'])
                bigrams = list(zip(p, c))
                self.classes += [c]
            else:
                # zip the stuff
                bigrams = list(zip(p, tk))
github lingpy / lingpy / lingpy / align / multiple.py View on Github external
len([k for k in col if k >= 0]) + 0.5) for col in sonarB]
                self.log.warning("There are empty segments in the consensus.")
                self.log.info(
                    '',
                    extra=dict(lines=[' '.join([str(x) for x in cons])
                                      for cons in [consA, consB]]))
            except:
                self.log.error(
                    "Failed to compute the consensus string.",
                    extra=dict(lines=[
                        sonarA, sonarB,
                        almsA[0], [self._get(n_, 'tokens') for n_ in almsA[0]],
                        almsB[0], [self._get(n_, 'tokens') for n_ in almsB[0]]
                    ]))

        prosA = prosodic_string(consA)
        prosB = prosodic_string(consB)

        self.log.debug('', extra=dict(lines=[(prosA, consA), (prosB, consB)]))
        weightsA, weightsB = prosodic_weights(prosA), prosodic_weights(prosB)

        # carry out the alignment
        almA, almB, sim = calign.align_profile(
            profileA,
            profileB,
            weightsA,
            weightsB,
            prosA,
            prosB,
            gop,
            scale,
            factor,
github lingpy / lingpy / lingpy / compare / lexstat.py View on Github external
"apply_checks": False,
            "defaults": False,
            "no_bscorer": False,
            "errors": "errors.log",
            "expand_nasals": False,
            "segments": "tokens",
            "numbers": "numbers",
            "classes": "classes",
            "transcription": "ipa",
            "prostrings": "prostrings",
            "weights": "weights",
            "sonars": "sonars",
            "langid": "langid",
            "duplicates": "duplicates",
            "tokenize": ipa2tokens,
            "get_prostring": prosodic_string,
            "row": "concept",
            "col": "doculect",
            "conf": None,
            'cldf': True
        }
        kw.update(keywords)

        # make segments, numbers and classes persistent across classes
        self._segments = kw['segments']
        self._numbers = kw['numbers']
        self._classes = kw['classes']
        self._weights = kw['weights']
        self._prostrings = kw['prostrings']
        self._sonars = kw['sonars']
        self._langid = kw['langid']
        self._duplicates = kw['duplicates']