Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
desc='THRESHOLD DETERMINATION',
total=len(self.pairs)-len(self.cols)) as progress:
for l1, l2 in self.pairs:
progress.update(1)
if l1 != l2:
pairs = self.pairs[l1, l2]
for p1, p2 in pairs:
dx = [align(p1, pairs[random.randint(
0, len(pairs) - 1)][1])
for i in range(len(pairs) // 20 or 5)]
thresholds.extend(dx)
if thresholds:
threshold = sum(thresholds) / len(thresholds) * 0.5
self._meta['guessed_threshold'] = threshold
with util.pb(
desc='SEQUENCE CLUSTERING',
total=len(self.rows)) as progress:
for concept, indices, matrix in matrices:
progress.update(1)
# check for keyword to guess the threshold
if kw['guess_threshold'] and kw['gt_mode'] == 'item':
t = clustering.best_threshold(matrix, kw['gt_trange'])
# FIXME: considering new function here JML
# elif kw['guess_threshold'] and kw['gt_mode'] == 'nullditem':
# pass
else:
t = threshold
c = fclust(matrix, t)
def read_conf(conf=''):
# load the configuration file
if not conf:
conf = util.data_path('conf', 'qlc.rc')
# read the file defined by its path in conf
tmp = [line.split('\t') for line in util.read_config_file(conf)]
aliasD, classD, class_stringD, alias2D = {}, {}, {}, {}
for name, cls, alias in tmp:
# make sure the name itself is there
aliasD[name.lower()] = aliasD[name.upper()] = name
classD[name.lower()] = classD[name.upper()] = eval(cls)
class_stringD[name.lower()] = class_stringD[name.upper()] = cls
# add the aliases
for a in alias.split(','):
aliasD[a.lower()] = aliasD[a.upper()] = name
classD[a.lower()] = classD[a.upper()] = eval(cls)
class_stringD[a.lower()] = class_stringD[a.upper()] = cls
def _init_rules(self, f):
# Process the orthography rules file.
for line in util.read_config_file(f, normalize='NFD'):
rule, replacement = line.split("\t")
rule = rule.strip() # just in case there's trailing whitespace
replacement = replacement.strip() # because there's probably trailing whitespace!
self.op_rules.append(re.compile(rule))
self.op_replacements.append(replacement)
# check that num rules == num replacements; if not fail
if len(self.op_rules) != len(self.op_replacements):
raise ValueError("Number of inputs does not match number of outputs in the rules file.")
# open the infile
if not os.path.exists(infile):
infile = infile + '.alm'
data = util.read_text_file(infile)
# create the outfile
if not filename:
filename = rcParams['filename']
# read in the templates
html = util.read_text_file(main_template or template_path('alm2html.html'))
if not table_template:
table_template = template_path(
'alm2html.table.js.html' if confidence else 'alm2html.table.html')
table = util.read_text_file(table_template)
css = util.read_text_file(template_path('alm.css'))
js = util.read_text_file(template_path('alm.js'))
# define a label function for the taxa
label = lambda x: keywords['labels'][x] if x in keywords['labels'] else x
# check for windows-compatibility
data = data.replace(os.linesep, '\n')[:-1]
# split the data into blocks
blocks = data.split('\n\n')
# retrieve the dataset
dataset = dataset or blocks[0]
# create the outstring
tmp_str = ''
if not os.path.exists(infile):
infile = infile + '.alm'
data = util.read_text_file(infile)
# create the outfile
if not filename:
filename = rcParams['filename']
# read in the templates
html = util.read_text_file(main_template or template_path('alm2html.html'))
if not table_template:
table_template = template_path(
'alm2html.table.js.html' if confidence else 'alm2html.table.html')
table = util.read_text_file(table_template)
css = util.read_text_file(template_path('alm.css'))
js = util.read_text_file(template_path('alm.js'))
# define a label function for the taxa
label = lambda x: keywords['labels'][x] if x in keywords['labels'] else x
# check for windows-compatibility
data = data.replace(os.linesep, '\n')[:-1]
# split the data into blocks
blocks = data.split('\n\n')
# retrieve the dataset
dataset = dataset or blocks[0]
# create the outstring
tmp_str = ''
"""
Function converts a PSA-file into colored html-format.
"""
util.setdefaults(
kw,
template=False,
css=False,
comment='#',
filename=infile[:-4]+'.html',
compact=True)
template = util.read_text_file(kw['template'] or template_path('psa.html'))
css = util.read_text_file(kw['css'] or template_path('psa.css'))
data = []
for line in util.read_text_file(infile, lines=True):
if not line.startswith(kw['comment']):
data.append(line)
seq_ids = []
pairs = []
taxa = []
alignments = []
del data[0]
i = 0
while i <= len(data) - 3:
try:
seq_ids.append(data[i])
datA = data[i + 1].split('\t')
A list-representation of the CSV file.
"""
# check for correct fileformat
if fileformat:
infile = filename + '.' + fileformat
else:
infile = filename
if dtype is None:
dtype = []
l = []
# open the file
infile = read_text_file(infile, lines=True, normalize="NFC")
# check for header
idx = 0 if header else -1
for i, line in enumerate(infile):
if line and (not comment or not line.startswith(comment)) and idx != i:
if strip_lines:
cells = [c.strip() for c in line.strip().split(sep)]
else:
cells = [c.strip() for c in line.split(sep)]
if not dtype:
l += [cells]
else:
l += [[f(c) for f, c in zip(dtype, cells)]]
return l
set this value to 0 and make sure to use tabstops as separators between
values in your matrix file.
comment : str (default = '#')
The comment character to be used if your file contains additional
information which should be ignored.
Returns
-------
data : tuple
A tuple consisting of a list of taxa and a matrix.
"""
if '\n' in filename:
lines = [f for f in filename.split('\n') if f.strip()]
else:
lines = read_text_file(filename, normalize="NFC", lines=True)
taxa, matrix = [], []
for line in lines[1:]:
if not line.startswith(comment):
if taxlen > 0:
taxa.append(line[:taxlen].strip())
matrix.append([float(val) for val in
re.split(r'\s+', line[taxlen + 1:].strip())])
else:
splits = line.split('\t')
taxa.append(splits[0])
matrix.append([float(val.strip()) for val in splits[1:]])
return taxa, matrix
# if no orthography profile is specified, simply return
# Unicode grapheme clusters, regex pattern "\X"
if self.orthography_profile == None:
return self.grapheme_clusters(string)
parses = []
for word in string.split():
parse = getParse(self.root, word)
# case where the parsing fails
if len(parse) == 0:
# replace characters in string but not in orthography profile with
parse = " "+self.find_missing_characters(self.characters(word))
# write problematic stuff to standard error
log.debug("The string '{0}' does not parse given the specified orthography profile {1}.\n".format(word, self.orthography_profile))
parses.append(parse)
# remove the outter word boundaries
result = "".join(parses).replace("##", "#")
result = result.rstrip("#")
result = result.lstrip("#")
return result.strip()
tokens = line.split("\t")
grapheme = tokens[0].strip()
# check for duplicates in the orthography profile (fail if dups)
if not grapheme in self.op_graphemes:
self.op_graphemes[grapheme] = 1
else:
raise Exception("You have a duplicate in your orthography profile.")
if len(tokens) == 1:
continue
for i, token in enumerate(tokens):
token = token.strip()
self.mappings[grapheme, self.column_labels[i].lower()] = token
log.debug('%s %s' % (grapheme, self.column_labels[i].lower()))
# print the tree structure if debug mode is on
if log.get_logger().getEffectiveLevel() <= logging.INFO:
log.debug("A graphical representation of your orthography profile in a tree ('*' denotes sentinels):\n")
printTree(self.root, "")
print()