Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
###For each column of values, summarize the numerical values (take the mean) for each GO/MAPP term
for goid in go_values_db:
index_db={}; index_ls=[]
for vals in go_values_db[goid]:
index = 0
for val in vals:
if val != 0: ###only occurs when the value was non-numeric, otherwise it's a float
try: index_db[index].append(val)
except KeyError: index_db[index] = [val]
index_ls.append(index) ###for sorting through
index+=1
index_ls = unique.unique(index_ls); index_ls.sort()
summary_values = []; summary_stdev_values = []
for index in index_ls:
try:
try: avg_val = statistics.avg(index_db[index]); summary_values.append(str(avg_val))
except KeyError: summary_values.append('')
try: stdev_val = statistics.stdev(index_db[index]); summary_stdev_values.append(str(stdev_val))
except KeyError: summary_stdev_values.append('')
except ValueError: summary_values.append(''); summary_stdev_values.append('')
go_values_db[goid] = summary_values, summary_stdev_values
#print 'Gene counts (re-derived) for GO-Elite results writen to:\n',gene_ranking_filename
return combined_associations,combined_gene_ranking,go_gene_annotation_db,go_values_db,value_headers,goids_with_redundant_genes,len(gene_to_goid)
groups = map(str, group_index_db) ### store group names
new_sample_list = map(lambda x: group_db[x], sample_list) ### lookup index of each sample in the ordered group sample list
title = string.join([t[0]]+groups,'\t')+'\n' ### output the new sample order (group file order)
export_data.write(title)
row_number=1
else:
tissue = t[0]
if platform == 'RNASeq' and 'LineageCorrelations' not in filename:
### Convert to log2 RPKM values - or counts
values = map(lambda x: math.log(float(x),2), t[1:])
else:
values = map(float,t[1:])
avg_z=[]
for group_name in group_index_db:
group_values = map(lambda x: values[x], group_index_db[group_name]) ### simple and fast way to reorganize the samples
avg = statistics.avg(group_values)
avg_z.append(str(avg))
export_data.write(string.join([tissue]+avg_z,'\t')+'\n')
export_data.close()
return export_path
group_name = group_db[x]
sample_index = t[1:].index(x)
try: group_index_db[group_name].append(sample_index)
except Exception: group_index_db[group_name] = [sample_index] ### dictionary of group to input file sample indexes
row_number=1
else:
gene = t[0]
if platform == 'RNASeq':
### Convert to log2 RPKM values - or counts
values = map(lambda x: math.log(float(x),2), t[1:])
else:
values = map(float,t[1:])
### Calculate log-fold values relative to the mean of all sample expression values
values = map(lambda x: values[x], sample_index_list) ### simple and fast way to reorganize the samples
avg = statistics.avg(values)
log_folds = map(lambda x: (x-avg), values)
if gene in genes_to_import:
### Genes regulated in any user-indicated comparison according to the fold and pvalue cutoffs provided
log_folds = map(lambda x: str(x), log_folds)
try: gene = gene+' '+probeset_symbol[gene]
except Exception: gene = gene
export_data.write(string.join([gene]+log_folds,'\t')+'\n')
if exportRelative:
### Calculate log-fold values relative to the mean of each valid group comparison
control_group_avg={}; comps_exp_db={}
for group_name in comps_name_db: ### control group names
con_group_values = map(lambda x: values[x], group_index_db[group_name]) ### simple and fast way to reorganize the samples
control_group_avg[group_name] = statistics.avg(con_group_values) ### store the mean value of each control group
for exp_group in comps_name_db[group_name]:
except KeyError: null =[] ###occurs if the expression exon list is missing some of these exons
try:
if len(exp_list)==0:
for exon in exon_list:
try:
exp_list.append(exp_dbase[exon][0][x]); len_list.append(exp_dbase[exon][1])
#kill
except KeyError: null=[] ### Gene entries will cause this error, since they are in the database but not in the count file
if normalize_feature_exp == 'RPKM':
sum_const_exp=sum(map(float,exp_list)); gene_sum+=sum_const_exp
sum_length=sum(len_list) ### can have different lengths for each sample, since only expressed exons are considered
### Add only one avg-expression value for each array, this loop
try: steady_state_db[gene].append((sum_const_exp,sum_length))
except KeyError: steady_state_db[gene] = [(sum_const_exp,sum_length)]
else:
avg_const_exp=statistics.avg(exp_list)
if avg_const_exp != 1: gene_sum+=avg_const_exp
### Add only one avg-expression value for each array, this loop
try: steady_state_db[gene].append(avg_const_exp)
except KeyError: steady_state_db[gene] = [avg_const_exp]
except ZeroDivisionError: null=[] ### Occurs when processing a truncated dataset (for testing usually) - no values for the gene should be included
x += 1
if gene_sum==0:
try:
del steady_state_db[gene] ### Hence, no genes showed evidence of expression (most critical for RNA-Seq)
except Exception: null=[] ### Error occurs when a gene is added to the database from location_gene_db, but is not expressed
return steady_state_db
### store the minimal information to recover the original count and ID data prior to quantile normalization
try: condition_unnormalized_db[condition].append([count,key])
except Exception: condition_unnormalized_db[condition]=[[count,key]]
quantile_normalize_db={}; key_db={}
for condition in condition_unnormalized_db:
condition_unnormalized_db[condition].sort() ### Sort lists by count number
rank=0 ### thus, the ID is the rank order of counts
for (count,key) in condition_unnormalized_db[condition]:
try: quantile_normalize_db[rank].append(count)
except KeyError: quantile_normalize_db[rank] = [count]
rank+=1
### Get the average value for each index
for rank in quantile_normalize_db:
quantile_normalize_db[rank] = statistics.avg(quantile_normalize_db[rank])
for condition in condition_unnormalized_db:
rank=0
count_db = condition_count_db[condition]
for (count,key) in condition_unnormalized_db[condition]:
avg_count = quantile_normalize_db[rank]
rank+=1
count_db[key] = str(avg_count) ### re-set this value to the normalized value
clearObjectsFromMemory(condition_unnormalized_db); condition_unnormalized_db = []
clearObjectsFromMemory(quantile_normalize_db); quantile_normalize_db = []
return condition_count_db