How to use the statistics.avg function in statistics

To help you get started, we’ve selected a few statistics examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github nsalomonis / altanalyze / AltAnalyze_release / gene_associations.py View on Github external
###For each column of values, summarize the numerical values (take the mean) for each GO/MAPP term
    for goid in go_values_db:
        index_db={}; index_ls=[]
        for vals in go_values_db[goid]:
            index = 0
            for val in vals:
                if val != 0: ###only occurs when the value was non-numeric, otherwise it's a float
                    try: index_db[index].append(val)
                    except KeyError: index_db[index] = [val]
                    index_ls.append(index) ###for sorting through
                index+=1
        index_ls = unique.unique(index_ls); index_ls.sort()
        summary_values = []; summary_stdev_values = []
        for index in index_ls:
            try:
                try: avg_val = statistics.avg(index_db[index]); summary_values.append(str(avg_val))
                except KeyError: summary_values.append('')
                try: stdev_val = statistics.stdev(index_db[index]); summary_stdev_values.append(str(stdev_val))
                except KeyError: summary_stdev_values.append('')
            except ValueError: summary_values.append(''); summary_stdev_values.append('')
        go_values_db[goid] = summary_values, summary_stdev_values
    #print 'Gene counts (re-derived) for GO-Elite results writen to:\n',gene_ranking_filename
    return combined_associations,combined_gene_ranking,go_gene_annotation_db,go_values_db,value_headers,goids_with_redundant_genes,len(gene_to_goid)
github nsalomonis / altanalyze / AltAnalyze_release / ExpressionBuilder.py View on Github external
groups = map(str, group_index_db) ### store group names
            new_sample_list = map(lambda x: group_db[x], sample_list) ### lookup index of each sample in the ordered group sample list
            title = string.join([t[0]]+groups,'\t')+'\n' ### output the new sample order (group file order)
            export_data.write(title)
            row_number=1
        else:
            tissue = t[0]
            if platform == 'RNASeq' and 'LineageCorrelations' not in filename:
                ### Convert to log2 RPKM values - or counts
                values = map(lambda x: math.log(float(x),2), t[1:])
            else:
                values = map(float,t[1:])
            avg_z=[]
            for group_name in group_index_db:
                group_values = map(lambda x: values[x], group_index_db[group_name]) ### simple and fast way to reorganize the samples
                avg = statistics.avg(group_values)
                avg_z.append(str(avg))
            export_data.write(string.join([tissue]+avg_z,'\t')+'\n')
    export_data.close()
    return export_path
github nsalomonis / altanalyze / AltAnalyze_release / ExpressionBuilder.py View on Github external
group_name = group_db[x]
                        sample_index = t[1:].index(x)
                        try: group_index_db[group_name].append(sample_index)
                        except Exception: group_index_db[group_name] = [sample_index] ### dictionary of group to input file sample indexes
                row_number=1
            else:
                gene = t[0]
                if platform == 'RNASeq':
                    ### Convert to log2 RPKM values - or counts
                    values = map(lambda x: math.log(float(x),2), t[1:])
                else:
                    values = map(float,t[1:])
                
                ### Calculate log-fold values relative to the mean of all sample expression values
                values = map(lambda x: values[x], sample_index_list) ### simple and fast way to reorganize the samples
                avg = statistics.avg(values)
                log_folds = map(lambda x: (x-avg), values)
                                
                if gene in genes_to_import:
                    ### Genes regulated in any user-indicated comparison according to the fold and pvalue cutoffs provided
                    log_folds = map(lambda x: str(x), log_folds)
                    try: gene = gene+' '+probeset_symbol[gene]
                    except Exception: gene = gene
                    export_data.write(string.join([gene]+log_folds,'\t')+'\n')
                    
                    if exportRelative:
                        ### Calculate log-fold values relative to the mean of each valid group comparison
                        control_group_avg={}; comps_exp_db={}
                        for group_name in comps_name_db: ### control group names
                            con_group_values = map(lambda x: values[x], group_index_db[group_name]) ### simple and fast way to reorganize the samples
                            control_group_avg[group_name] = statistics.avg(con_group_values) ### store the mean value of each control group
                            for exp_group in comps_name_db[group_name]:
github nsalomonis / altanalyze / AltAnalyze_release / RNASeq.py View on Github external
except KeyError: null =[] ###occurs if the expression exon list is missing some of these exons
            try:
                if len(exp_list)==0:
                    for exon in exon_list:
                        try:
                            exp_list.append(exp_dbase[exon][0][x]); len_list.append(exp_dbase[exon][1])
                            #kill
                        except KeyError: null=[] ### Gene entries will cause this error, since they are in the database but not in the count file
                if normalize_feature_exp == 'RPKM':
                    sum_const_exp=sum(map(float,exp_list)); gene_sum+=sum_const_exp
                    sum_length=sum(len_list) ### can have different lengths for each sample, since only expressed exons are considered
                    ### Add only one avg-expression value for each array, this loop
                    try: steady_state_db[gene].append((sum_const_exp,sum_length))
                    except KeyError: steady_state_db[gene] = [(sum_const_exp,sum_length)]
                else:
                    avg_const_exp=statistics.avg(exp_list)
                    if avg_const_exp != 1: gene_sum+=avg_const_exp
                    ### Add only one avg-expression value for each array, this loop
                    try: steady_state_db[gene].append(avg_const_exp)
                    except KeyError: steady_state_db[gene] = [avg_const_exp]
            except ZeroDivisionError: null=[] ### Occurs when processing a truncated dataset (for testing usually) - no values for the gene should be included
            x += 1
        if gene_sum==0:
            try:
                del steady_state_db[gene] ### Hence, no genes showed evidence of expression (most critical for RNA-Seq)
            except Exception: null=[] ### Error occurs when a gene is added to the database from location_gene_db, but is not expressed
            
    return steady_state_db
github nsalomonis / altanalyze / AltAnalyze_release / RNASeq.py View on Github external
### store the minimal information to recover the original count and ID data prior to quantile normalization
            try: condition_unnormalized_db[condition].append([count,key])
            except Exception: condition_unnormalized_db[condition]=[[count,key]]
                    
    quantile_normalize_db={}; key_db={}
    for condition in condition_unnormalized_db:
        condition_unnormalized_db[condition].sort() ### Sort lists by count number
        rank=0 ### thus, the ID is the rank order of counts
        for (count,key) in condition_unnormalized_db[condition]:
            try: quantile_normalize_db[rank].append(count)
            except KeyError: quantile_normalize_db[rank] = [count]
            rank+=1
            
    ### Get the average value for each index
    for rank in quantile_normalize_db:
        quantile_normalize_db[rank] = statistics.avg(quantile_normalize_db[rank])

    for condition in condition_unnormalized_db:
        rank=0
        count_db = condition_count_db[condition]
        for (count,key) in condition_unnormalized_db[condition]:
            avg_count = quantile_normalize_db[rank]
            rank+=1
            count_db[key] = str(avg_count) ### re-set this value to the normalized value
            
    clearObjectsFromMemory(condition_unnormalized_db); condition_unnormalized_db =  []
    clearObjectsFromMemory(quantile_normalize_db); quantile_normalize_db = []
    return condition_count_db