Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
----------
segmentation : :obj:`dict`
Dictionary with CNV matrices for each resolution.
"""
# TODO: implement distributed mode
import warnings
from tqdm import tqdm
import rpy2
from rpy2.rinterface import RRuntimeWarning
from rpy2.robjects import numpy2ri, pandas2ri
from rpy2.robjects.packages import STAP
warnings.filterwarnings("ignore", category=RRuntimeWarning)
numpy2ri.activate()
pandas2ri.activate()
DNAcopy = rpy2.robjects.packages.importr("DNAcopy")
if matrix is None:
matrix = self.matrix_norm
elif isinstance(matrix, str):
matrix = getattr(self, matrix)
if resolutions is None:
resolutions = self.resolutions
if samples is None:
samples = self.samples
segmentation = dict()
for resolution in tqdm(resolutions, desc="Resolution"):
chrom = np.array(list(map(lambda x: x[0], matrix[resolution].index.str.split(":"))))
start = np.array(
list(
def full_converter() -> conversion.Converter:
pandas2ri.activate()
new_converter = conversion.Converter("anndata conversion", template=conversion.converter)
pandas2ri.deactivate()
overlay_converter(scipy2ri.converter, new_converter)
# overwrite the scipy2ri Sexp4 converter and add our others
overlay_converter(converter, new_converter)
return new_converter
dims = rmatrix.names # map from dim to levels
# print(dims)
coords = {}
dim_names = []
if len(dims) == 1:
dname = 'dimension_0'
dim_names += [dname]
levels = list(dims[0])
coords.update({dname: levels})
else:
for dname in dims.names:
dim_names += [dname]
levels = list(dims.rx(dname)[0])
coords.update({dname: levels})
with rpy2.robjects.conversion.localconverter(ro.default_converter + rpy2.robjects.pandas2ri.converter):
values = ro.conversion.rpy2py(rmatrix)
ar = xr.DataArray(values, dims=dim_names, coords=coords)
return ar
def read_rdata(rdata_fullpath, table_name):
"""
Returns the pandas DataFrame
"""
from rpy2.robjects import pandas2ri, r
pandas2ri.activate()
# we want forward slashes for R
rdata_fullpath_forR = rdata_fullpath.replace("\\", "/")
print "Loading %s" % rdata_fullpath_forR
# read in the data from the R session with python
r['load'](rdata_fullpath_forR)
# check that it's there
table_df = pandas2ri.ri2py(r['model_summary'])
# fillna
for col in table_df.columns:
nullcount = sum(pandas.isnull(table_df[col]))
if nullcount > 0: print " Found %5d NA values in column %s" % (nullcount, col)
table_df = table_df.fillna(0)
for col in table_df.columns:
nullcount = sum(pandas.isnull(table_df[col]))
if nullcount > 0: print " -> Found %5d NA values in column %s" % (nullcount, col)
print "Read %d lines from %s" % (len(table_df), rdata_fullpath)
return table_df
# load if MFE library is installed
mfe = importr(mfe_name)
else:
# if MFE not found
raise RLibNotFound(mfe_name)
# extracting meta-features from R MFE
if not isinstance(X, pd.DataFrame):
X = pd.DataFrame(X)
if not isinstance(X, pd.DataFrame):
y = pd.DataFrame(y)
pandas2ri.activate()
result = mfe.metafeatures(X, y, self.group, self.summary)
pandas2ri.deactivate()
return pd.DataFrame(np.array(result), index=np.array(result.names))
def get_tf_factor(var, from_to, value_col="IMPUTED"):
r_var = r['as.character'](robjects.FactorVector(var))
r_from_to = robjects.IntVector(from_to)
data = r['tf_factor_tbl'](r['as.character'](r_var), r_from_to, value_col)
data = pandas2ri.ri2py_dataframe(data)
print(var[0])
gc.collect()
return data
brewer.pal(length(levels(fact_df$factor_value)),"Dark2"))(
length(levels(fact_df$factor_value)))
side_colours = colours[as.numeric((fact_df$factor_value))]
print(side_colours)
# plot
png("%(plotfile)s", width=1000, heigh=1000)
heatmap.3(as.dist(1- as.matrix(int_df)),
Rowv=FALSE, Colv=FALSE,
ColIndividualColors = side_colours,
RowIndividualColors = side_colours,
breaks=100, main="%(factor)s")
dev.off()
}
''' % locals())
plotHeatmap(pandas2ri.py2ri(intersection_pivot),
pandas2ri.py2ri(factors_df))
P.touch(outfile)
from joblib import Parallel, delayed
from collections import defaultdict
import pandas as pd
import numpy as np
from typing import DefaultDict, Dict, Iterable
import pkg_resources, os
from natsort import natsorted
from io import StringIO
# from helper.functions
import logging
from rpy2.robjects import r, pandas2ri
pandas2ri.activate()
from rpy2.robjects.robject import RObject
from rpy2.robjects.packages import importr
importr("S4Vectors")
bioc = importr("GenomicRanges")
from epic.scripts.overlaps.files_to_chromosome_coverage import files_to_chromosome_coverage
__author__ = "Endre Bakken Stovner https://github.com/endrebak/"
__license__ = "MIT"
def overlap_matrix_nucleotides(all_files, nb_cpu):
# type: (Iterable[str], int) -> pd.DataFrame
rles = files_to_chromosome_coverage(all_files, nb_cpu)
nucleotide_overlaps = Parallel(n_jobs=nb_cpu)(delayed(_overlap_matrix_nucleotides)(
**kwargs: :obj:`dict`
Additional keyword arguments to be passed to the DESeq function of DESeq2.
Returns
-------
:obj:`pandas.DataFrame`
Data frame with results, statistics for each feature.
"""
from tqdm import tqdm
from ngs_toolkit.utils import r2pandas_df, recarray2pandas_df
from rpy2.robjects import numpy2ri, pandas2ri, r
from rpy2.robjects.packages import importr
numpy2ri.activate()
pandas2ri.activate()
importr("DESeq2")
# order experiment and count matrices in same way
experiment_matrix = experiment_matrix.set_index("sample_name").loc[count_matrix.columns, :]
# save the matrices just in case
if save_inputs:
count_matrix.to_csv(os.path.join(output_dir, output_prefix + ".count_matrix.tsv"), sep="\t")
experiment_matrix.to_csv(
os.path.join(output_dir, output_prefix + ".experiment_matrix.tsv"), sep="\t"
)
comparison_table.to_csv(
os.path.join(output_dir, output_prefix + ".comparison_table.tsv"), sep="\t"
)
def nnd_hotdeck_using_rpy2(receiver = None, donor = None, matching_variables = None,
z_variables = None, donor_classes = None):
from rpy2.robjects.packages import importr
from rpy2.robjects import pandas2ri
assert receiver is not None and donor is not None
assert matching_variables is not None
pandas2ri.activate()
StatMatch = importr("StatMatch")
if isinstance(donor_classes, str):
assert donor_classes in receiver, 'Donor class not present in receiver'
assert donor_classes in donor, 'Donor class not present in donor'
try:
if donor_classes:
out_NND = StatMatch.NND_hotdeck(
data_rec = receiver,
data_don = donor,
match_vars = pd.Series(matching_variables),
don_class = pd.Series(donor_classes)
)
else:
out_NND = StatMatch.NND_hotdeck(