Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
@jobs_limit(1, "R")
@transform(runMetavelvet
, suffix(".contigs.fa")
, ".stats.pdf")
def plotCoverageHistogram(infile, outfile):
'''
plot the coverage over kmers
'''
inf = P.snip(infile, ".contigs.fa") + ".stats.txt"
outf = P.snip(inf, ".txt") + ".pdf"
R('''library(plotrix)''')
R('''data = read.table("%s", header=TRUE)''' % inf)
R('''pdf("%s", height = 7, width = 7 )''' % outf)
R('''weighted.hist(data$short1_cov, data$lgth, breaks=seq(0, 200, by=1))''')
R["dev.off"]()
# estimate common dispersion
R('''countsTable = estimateGLMCommonDisp(countsTable, design)''')
# estimate tagwise dispersion
R('''countsTable = estimateGLMTagwiseDisp(countsTable, design)''')
# fitting model to each tag
R('''fit = glmFit(countsTable, design)''')
else:
# fitting model to each tag
if dispersion is None:
raise ValueError("no replicates and no dispersion")
E.warn("no replicates - using a fixed dispersion value")
R('''fit = glmFit(countsTable, design, dispersion=%f)''' %
dispersion)
# perform LR test
R('''lrt = glmLRT(fit)''')
E.info("Generating output")
# output cpm table
R('''suppressMessages(library(reshape2))''')
R('''countsTable.cpm <- cpm(countsTable, normalized.lib.sizes=TRUE)''')
R('''melted <- melt(countsTable.cpm)''')
R('''names(melted) <- c("test_id", "sample", "ncpm")''')
# melt columns are factors - convert to string for sorting
R('''melted$test_id = levels(melted$test_id)[as.numeric(melted$test_id)]''')
R('''melted$sample = levels(melted$sample)[as.numeric(melted$sample)]''')
# sort cpm table by test_id and sample
R('''sorted = melted[with(melted, order(test_id, sample)),]''')
R('''gz = gzfile("%(outfile_prefix)scpm.tsv.gz", "w" )''' % locals())
R('''write.table(sorted, file=gz, sep = "\t",
row.names=FALSE, quote=FALSE)''')
)
swc$Parent[is.na(swc$Parent)]=-1L
sp=somapos.catmaidneuron(swc=swc, tags=tags)
soma_id_in_neuron = if(nrow(sp)==0) NULL else sp$PointNo
n=nat::as.neuron(swc, origin=soma_id_in_neuron, skid=skid)
# add all fields from input list except for nodes themselves
n[names(res[-1])]=res[-1]
# we expect connectors field to be null when there are no connectors
if(length(n$connectors)<1) n$connectors=NULL
class(n)=c('catmaidneuron', 'neuron')
n
}
''')
concat_neurons_local = robjects.r('''
get.neuron.catmaid_local<-function(skid, sk_data, pid=1L, ...) {
n=sk_data[[toString(skid)]]
n
}
concat.neurons.catmaid_local<-function(skids, sk_data, pid=1L, OmitFailures=NA, df=NULL,
fetch.annotations=FALSE, ...) {
# Assume is list of integers
if(is.null(df)) {
names(skids)=as.character(skids)
df=data.frame(pid=pid, skid=skids,
# We don't need full names, otherwise use:
# name=catmaid_get_neuronnames(skids, pid),
name=names(skids),
stringsAsFactors = F)
rownames(df)=names(skids)
R('''data <- read.csv("%s", header = T, stringsAsFactors = F, sep = "\t")''' %
infile)
for name in names_list:
outname = os.path.join("genome_coverage.dir", name) + ".coverage.png"
R('''data2 <- data[data$track == "%s",]''' % name)
R('''png("%s", height = 500, width = 1000)''' % outname)
R('''par(mai = c(2,1,1,1))''')
R('''barplot(data2$pcoverage[order(data2$pcoverage)]
, names.arg = data2$gi[order(data2$pcoverage)]
, las = 2
, cex.names = 0.75
, ylim = c(0,1)
, col = "blue")''')
R('''abline(h = 0.95, cex = 2)''')
R('''abline(h = 0.90, cex = 2, lty = 2)''')
R["dev.off"]()
import functools
import numpy as np
from scipy.stats import norm as ndist
import regreg.api as rr
from selection.tests.instance import gaussian_instance
from selection.algorithms.lasso import lasso, ROSI
from knockoffs import lasso_glmnet
import rpy2.robjects as rpy
from rpy2.robjects import numpy2ri
rpy.r('library(knockoff); library(glmnet)')
from rpy2 import rinterface
from core import (infer_full_target,
split_sampler, # split_sampler not working yet
normal_sampler,
logit_fit,
probit_fit)
def simulate(n=200, p=100, s=10, signal=(0, 0), sigma=2, alpha=0.1):
# description of statistical problem
X, y, truth = gaussian_instance(n=n,
p=p,
s=s,
equicorrelated=False,
import os
import sys
import time
import tempfile
import numpy as np
from numpy.lib.recfunctions import append_fields
from scipy.linalg import diagsvd
from scipy.stats import rankdata
import scipy.stats.distributions as dists
from joblib import Parallel,delayed
# Connect to an R session
import rpy2.robjects
r = rpy2.robjects.r
# For a Pythonic interface to R
from rpy2.robjects.packages import importr
from rpy2.robjects import Formula, FactorVector
from rpy2.robjects.environments import Environment
from rpy2.robjects.vectors import DataFrame, Vector, FloatVector
from rpy2.rinterface import MissingArg,SexpVector
# Make it so we can send numpy arrays to R
import rpy2.robjects.numpy2ri
rpy2.robjects.numpy2ri.activate()
#import rpy2.robjects as ro
#import rpy2.robjects.numpy2ri as numpy2ri
#ro.conversion.py2ri = numpy2ri
#numpy2ri.activate()
if reg_num_reads >= num_reads_per_plot:
break
if reg_num_reads < num_reads_per_plot:
th.warning_message(
'Fewer reads found than requested for this region.')
pass
try:
OldSegDat = _R_DF.rbind(*OldSegDat)
except:
OldSegDat = None
NewSegDat = _R_DF.rbind(*NewSegDat)
SigDat = _R_DF.rbind(*SigDat)
if VERBOSE: th.status_message('Plotting.')
r.r(resource_string(__name__, 'R_scripts/plotMultiReadCorr.R').decode())
r.r('pdf("' + pdf_fn + '", height=5, width=11)')
if include_orig_bcs and OldSegDat is not None:
r.globalenv[str('plotMultiReadCorr')](OldSegDat, NewSegDat, SigDat)
else:
r.globalenv[str('plotMultiReadCorrNoOrig')](NewSegDat, SigDat)
r.r('dev.off()')
return
If groups is *True*, the variable ``groups`` is
used for colouring. If *False*, the groups are
determined by sample labels.
'''
R('''suppressMessages(library(ggplot2))''')
R('''pca = prcomp(t(countsTable))''')
# Build factor groups by splitting labels at "."
R('''colour=groups''')
R('''shape=0''')
R('''size=1''')
if groups is False:
R('''mm = matrix(
unlist(sapply(colnames(countsTable),strsplit,'[.]')),
nrow=length(colnames(countsTable)),
byrow=T)''')
nrows, nlevels = R('''dim(mm)''')
if nlevels > 1:
R('''colour=mm[,1]''')
if nlevels > 2:
R('''shape=mm[,2]''')
try:
R('''p1 = ggplot(
as.data.frame(pca$x),
aes(x=PC1, y=PC2,
colour=colour,
shape=shape,
label=rownames(pca$x))) \
+ geom_text(size=4, vjust=1) \
+ geom_point()''')
R('''p2 = qplot(x=PC1, y=PC3,
data = as.data.frame(pca$x),
p = "P.Value"
elif threshold_stat == "padj":
p = "adj.P.Val"
else:
p = "adj.P.Val"
R('''library(ggplot2)''')
R('''dat <- read.csv("%s",
header = T,
stringsAsFactors = F, sep = "\t")''' % infile)
R('''dat$sig <- ifelse(dat$%s < %f & abs(dat$logFC) > %f, 1, 0)'''
% (p, p_threshold, fc_threshold))
R('''a <- aes(x = AveExpr, y = logFC, colour = factor(sig))''')
R('''plot1 <- ggplot(dat, a)''')
R('''plot2 <- plot1 + geom_point(alpha = 0.5)''')
R('''plot3 <- plot2 + scale_colour_manual(values = c("black", "blue"))''')
R('''ggsave("%s")''' % outfile)