Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
* "html" -- output of the multiple alignment in ``html``-format.
filename : str
Select a specific name for the outfile, otherwise, the name of
the infile will be taken by default.
sorted_seqs : bool
Indicate whether the sequences should be sorted or not (applys only
to 'msa' and 'msq' output.
unique_seqs : bool
Indicate whether only unique sequences should be written to file or
not.
"""
util.setdefaults(keywords, wordlist=False, timestamp=False)
if fileformat in ['html', 'tex']:
with util.TemporaryPath(suffix='.msa') as tmp:
self.output(
fileformat='msa',
filename=os.path.splitext(tmp)[0],
sorted_seqs=sorted_seqs,
unique_seqs=unique_seqs)
if 'filename' not in keywords:
keywords['input_file'] = os.path.split(self.infile)[1]
keywords['filename'] = filename
getattr(html, 'msa2' + fileformat)(tmp, **keywords)
return
# create a specific format string in order to receive taxa of equal length
def _output(self, fileformat, **keywords):
"""
Internal function that eases its modification by daughter classes.
"""
# check for stamp attribute
keywords["stamp"] = getattr(self, '_stamp', '')
# add the default parameters, they will be checked against the keywords
util.setdefaults(
keywords,
cols=False,
distances=False,
entries=("concept", "counterpart"),
entry='concept',
fileformat=fileformat,
filename=rcParams['filename'],
formatter='concept',
modify_ref=False,
meta=self._meta,
missing=0,
prettify='false',
ignore='all',
ref='cogid',
rows=False,
subset=False, # setup a subset of the data,
gap_weight : float (default=0)
The factor by which gaps in aligned columns contribute to the
calculation of the column score. When set to 0, gaps will be
ignored in the calculation. When set to 0.5, gaps will count half
as much as other characters.
restricted_chars : string (default="T")
Define which characters of the prosodic string of a sequence
reflect its secondary structure (cf. :evobib:`List2012b`) and
should therefore be aligned specifically. This defaults to "T",
since this is the character that represents tones in the prosodic
strings of sequences.
"""
setdefaults(
keywords,
new_calc=True,
model=rcParams['sca'],
mode='global',
gop=-3,
scale=0.5,
factor=1,
restricted_chars='T_',
classes=True,
sonar=True,
scorer={})
if keywords['new_calc']:
# define the class model
self._set_model(
keywords['model'],
A sound class model according to which the IPA strings shall be
converted to sound-class strings.
local : { c{bool}, "peaks", "gaps" }(default=False)
Specify whether local pre-processing should be applied to the data. If
set to c{peaks}, the average alignment score of each column is taken as
reference to remove low-scoring columns from the alignment. If set to
"gaps", the columns with the highest proportion of gaps will be
excluded.
Returns
-------
cons : c{str}
A consensus string of the given MSA.
"""
util.setdefaults(
keywords,
model=rcParams['sca'],
stress=rcParams['stress'],
cldf=False,
diacritics=rcParams['diacritics'],
gap_scale=1.0,
mode='majority',
gap_score=-10,
weights=[1 for i in range(len(msa[0]))],
local=False)
# transform the matrix
matrix = misc.transpose(getattr(msa, 'alm_matrix', msa))
# custom function for tokens2class
tk2k = lambda x: token2class(x, keywords['model'], cldf=keywords['cldf'],
tree : {c{str} ~lingpy.thirdparty.cogent.PhyloNode}
A tree object or a Newick string along which the consensus shall be
calculated.
gaps : c{bool} (default=False)
If set to c{True}, return the gap positions in the consensus.
classes : c{bool} (default=False)
Specify whether sound classes shall be used to calculate the consensus.
model : ~lingpy.data.model.Model
A sound class model according to which the IPA strings shall be
converted to sound-class strings.
return_data : c{bool} (default=False)
Return the data instead of adding it in a column to the wordlist
object.
"""
util.setdefaults(
keywords, model=rcParams['sca'], gap_scale=1.0,
ref=rcParams['ref'], stress=rcParams['stress'],
diacritics=rcParams['diacritics'], cldf=False)
# switch ref
if keywords['ref'] != rcParams['ref']:
rcParams['ref'] = keywords['ref']
# reassing ref for convenience
ref = keywords['ref']
# check for existing alignments
test = list(self.msa[ref].keys())[0]
if 'alignment' not in self.msa[ref][test]:
log.error(
"No alignments could be found. You should carry out"
def check_tokens(tokens, **keywords):
"""
Function checks whether tokens are given in a consistent input format.
"""
setdefaults(keywords, stress=rcParams['stress'],
diacritics=rcParams['diacritics'], cldf=False)
errors = []
for i, token in enumerate(tokens):
# check for conversion within the articulation-model
cls = token2class(token, rcParams['art'], stress=keywords['stress'],
cldf=keywords['cldf'], diacritics=keywords['diacritics'])
if cls == '0':
errors.append((i, token))
return errors
def diff(self, **keywords):
"""
Write all differences between two sets to a file.
Parameters
----------
filename : str (default='eval_psa_diff')
Default
"""
setdefaults(keywords, filename=self.gold.infile)
if not keywords['filename'].endswith('.diff'):
keywords['filename'] = keywords['filename'] + '.diff'
out = []
for i, (a, b) in enumerate(zip(self.gold.alignments, self.test.alignments)):
g1, g2, g3 = a
t1, t2, t3 = b
maxL = max([len(g1), len(t1)])
if g1 != t1 or g2 != t2:
taxA, taxB = self.gold.taxa[i]
taxlen = max(len(taxA), len(taxB))
seq_id = self.gold.seq_ids[i]
out.append('{0}\n{1}\t{2}\n{3}\t{4}\n{5}\n{1}\t{6}\n{3}\t{7}\n\n'.format(
seq_id,
taxA,
'\t'.join(g1),
def psa2html(infile, **kw):
"""
Function converts a PSA-file into colored html-format.
"""
util.setdefaults(
kw,
template=False,
css=False,
comment='#',
filename=infile[:-4]+'.html',
compact=True)
template = util.read_text_file(kw['template'] or template_path('psa.html'))
css = util.read_text_file(kw['css'] or template_path('psa.css'))
data = []
for line in util.read_text_file(infile, lines=True):
if not line.startswith(kw['comment']):
data.append(line)
seq_ids = []
h1=('concept', r'\section{{Concept: ``{0}"}}' + '\n'),
h2=('cogid', r'\subsection{{Cognate Set: ``{0}"}}' + '\n'))
elif fileformat == 'html':
sections = dict(
h1=('concept', '<h1>Concept: {0}</h1>'),
h2=('cogid', '<h2>Cognate Set: {0}</h2>'))
if not entries:
if fileformat == 'txt':
entries = [('language', '{0} '), ('ipa', '{0}\n')]
elif fileformat == 'tex':
entries = [('language', '{0} '), ('ipa', '[{0}]' + '\n')]
elif fileformat == 'html':
entries = [('language', '{0} '), ('ipa', '[{0}]\n')]
util.setdefaults(keywords, filename=rcParams['filename'])
# get the temporary dictionary
out = wl2dict(self, sections, entries, exclude)
# assign the output string
out_string = ''
# iterate over the dictionary and start to fill the string
for key in sorted(out, key=lambda x: str(x).lower()):
# write key to file
out_string += key[1]
# reassign tmp
tmp = out[key]
# set the pointer and the index