Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# ZINB header line has names of conditions, organized as 3+4*n+3 (4 groups X n conditions)
if self.filetype=="anova": n = int((len(w)-6)/2)
elif self.filetype=="zinb": n = int((len(headers)-6)/4)
headers = headers[3:3+n]
headers = [x.replace("Mean_","") for x in headers]
vals = [float(x) for x in w[3:3+n]] # take just the columns of means
qval = float(w[-2])
if qval<0.05: data.append(w); means.append(vals)
else: print("filetype not recognized: %s" % self.filetype); sys.exit(-1)
print("correlations based on %s genes" % len(means))
genenames = ["%s/%s" % (w[0],w[1]) for w in data]
hash = {}
headers = [h.replace("Mean_","") for h in headers]
for i,col in enumerate(headers): hash[col] = FloatVector([x[i] for x in means])
df = DataFrame(hash) # can't figure out how to set rownames
corrplotFunc = self.make_corrplotFunc()
corrplotFunc(df,StrVector(headers),StrVector(genenames),self.outfile) # pass headers to put cols in order, since df comes from dict
self.finish()
self.transit_message("Finished Corrplot")
val = rn[n]
if val is None:
val = kv[2]
connectors[n][1].append(val)
r_connectors = [(kv[0], connector_cols[n][1](kv[1]))
for n, kv in enumerate(connectors)]
# Tags in Rpy2 format
r_tags = {}
for tag, node_ids in raw_tags.items():
r_tags[tag] = rinterface.IntSexpVector(node_ids)
# Construct output similar to rcatmaid's request response parsing function.
skeleton_data = robjects.ListVector({
'nodes': robjects.DataFrame(rlc.OrdDict(r_nodes)),
'connectors': robjects.DataFrame(rlc.OrdDict(r_connectors)),
'tags': robjects.ListVector(r_tags),
})
skeleton_envelope = {}
skeleton_envelope[str(skeleton_id)] = skeleton_data
cs_r[str(skeleton_id)] = read_neuron_local(str(skeleton_id),
robjects.ListVector(skeleton_envelope), project_id)
# Make sure all temporary R values are garbage collected. With many
# skeletons, this can otherwise become a memory problem quickly (the
# Python GC doesn't now about the R memory).
del r_nodes, r_connectors, r_tags, skeleton_data
# Explicitly garbage collect after each skeleton is loaded.
gc.collect()
def write(iteration, kME):
'''
writes the eigengene connectivity (kME)
dictionary to file
'''
df = ro.DataFrame(kME)
df.rownames = (iteration)
write_data_frame(df, 'eigengene-connectivity.txt', 'Iteration')
if zero_start:
BaseStart.append(unicode(i))
else:
BaseStart.append(unicode(i + reg_data.start))
else:
if zero_start:
BaseStart.append(unicode(
reg_data.end - reg_data.start - i - 1))
else:
BaseStart.append(unicode(
reg_data.end - i - 1))
Bases.append(base)
BaseRegion.append(reg_data.reg_id)
BaseStrand.append(REV_STRAND)
return r.DataFrame({
'Position':r.FloatVector(BaseStart),
'Base':r.StrVector(Bases),
'Region':r.StrVector(BaseRegion),
'Strand':r.FactorVector(
r.StrVector(BaseStrand),
ordered=True, levels=r.StrVector((FWD_STRAND, REV_STRAND)))})
# vals[vals > 1000] = 0
vals[isnan(PAmask)] = nan
indice = '%s_%s' % (var, agg)
data[str(indice)] = ro.FloatVector(ravel(vals))
if i == 0:
form = form + 's(%s, k=3)' % indice
else:
form = form + ' + s(%s, k=3)' % indice
LOGGER.info('form string generated for gam model')
except:
msg = 'form string generation for gam failed'
LOGGER.exception(msg)
# raise Exception
try:
dataf = ro.DataFrame(data)
eq = ro.Formula(str(form))
gam_model = mgcv.gam(base.eval(eq),
data=dataf,
family=stats.binomial(),
scale=-1,
na_action=stats.na_exclude)
LOGGER.info('GAM model trained')
except:
msg = 'failed to train the GAM model'
LOGGER.exception(msg)
# ####################
# plot response curves
# ####################
try:
cat_type = ldf[colname].dtype
levels = rpy2.robjects.StrVector(list(cat_type.categories))
ordered = cat_type.ordered
# if colname == 'Bsmt_Full_Bath':
# print('col: {}, levels: {}, ordered: {}'.format(colname, levels, ordered))
lds = ldf[colname]
factorized_column = None
if colname in latent:
factorized_column = rpy2.robjects.vectors.FactorVector(rpy2.robjects.StrVector(NA_lds), levels=levels, ordered=ordered)
else:
factorized_column = rpy2.robjects.vectors.FactorVector(rpy2.robjects.StrVector(lds), levels=levels, ordered=ordered)
cols += [factorized_column]
od = rpy2.rlike.container.OrdDict([(colnames[i], col) for i,col in enumerate(cols)])
r_df = rpy2.robjects.DataFrame(od)
return r_df
read_id for _ in range(len(running_diffs))])}
# add region is applicable
if region_name is not None:
old_dat['Region'] = r.StrVector([
region_name for _ in range(len(old_bases))])
new_dat['Region'] = r.StrVector([
region_name for _ in range(len(new_bases))])
sig_dat['Region'] = r.StrVector([
region_name for _ in range(len(norm_reg_signal))])
diff_dat['Region'] = r.StrVector([
region_name for _ in range(len(running_diffs))])
old_dat = r.DataFrame(old_dat)
new_dat = r.DataFrame(new_dat)
sig_dat = r.DataFrame(sig_dat)
diff_dat = r.DataFrame(diff_dat)
return old_dat, new_dat, sig_dat, diff_dat
unicode(r_cov[3]) + r_ovp[3] + " -")
else:
if include_chrm:
reg_title += ":" + int_i.strand
reg_title += ' ' + int_i.reg_text
if include_cov:
cov_str = (
'Sample (Red): ' + unicode(r_cov[0]) + r_ovp[0] +
'; Control (Black): ' + unicode(r_cov[2]) + r_ovp[2]
) if int_i.strand == '+' else (
'Sample (Red): ' + unicode(r_cov[1]) + r_ovp[1] +
'; Control (Black): ' + unicode(r_cov[3]) + r_ovp[3])
reg_title += " Coverage: " + cov_str
titles.append(reg_title)
Titles = r.DataFrame({
'Title':r.StrVector(titles),
'Region':r.StrVector([int_i.reg_id for int_i in all_reg_data])})
return Titles, plot_types