Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
import json
import numpy as np
cur_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, os.path.abspath("%s/.." % cur_dir))
from clipper_admin import Clipper
import time
import subprocess32 as subprocess
import pprint
import random
import socket
from pandas import *
from rpy2.robjects.packages import importr
import rpy2.robjects as ro
from rpy2.robjects import r, pandas2ri
pandas2ri.activate()
stats = importr('stats')
base = importr('base')
headers = {'Content-type': 'application/json'}
app_name = "R_model_test"
model_name = "R_model"
import sys
if sys.version_info[0] < 3:
from StringIO import StringIO
else:
from io import StringIO
class BenchmarkException(Exception):
def __init__(self, value):
from inspect import Parameter
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
from sklearn.metrics import mean_squared_error
from rpy2.robjects.packages import importr
from rpy2.robjects import pandas2ri
from .base import BetterModel
from .spark import GridSearchCV
pandas2ri.activate()
rLME4 = importr('lme4')
class BetterLME4(BaseEstimator, RegressorMixin, BetterModel):
_params = [Parameter('formulas', Parameter.POSITIONAL_OR_KEYWORD,
default=()),
Parameter('LME4Params', Parameter.POSITIONAL_OR_KEYWORD,
default={})]
def fit(self, X, Y):
'''Doc String'''
check_X_y(X, Y, multi_output=True, dtype=None)
data = pd.concat((pd.DataFrame(X, columns=self.xLabels),
pd.DataFrame(Y, columns=self.yLabels)), axis=1)
def intersectionHeatmap(infiles, outfile):
''' calculate the intersection between the infiles and plot'''
pandas2ri.activate()
name2genes = {}
df = pd.DataFrame(columns=["id_1", "id_2", "intersection", "perc"])
ix = 0
for inf in infiles:
name = P.snip(os.path.basename(inf)).split(".")[0]
name = name.replace(".", "_")
with IOTools.openFile(inf, "r") as f:
genes = set()
for line in f:
if line[0] == "#":
continue
def query_log_source(source, time_filter, time_column):
from rpy2.robjects import pandas2ri
cutoff = f"DATEADD(day, -{time_filter}, CURRENT_TIMESTAMP())"
query = f"SELECT * FROM {source} WHERE {time_column} > {cutoff};"
try:
data = list(db.fetch(query))
except Exception as e:
log.error("Failed to query log source: ", e)
f = pack(data)
frame = pandas.DataFrame(f)
pandas2ri.activate()
r_dataframe = pandas2ri.py2rpy(frame)
return r_dataframe
except:
from dcptree.debug import ipsh
ipsh()
r_assign(cvindices, "cvindices")
# feature matrix
var_type_to_col_type = {'boolean': 'bool',
'categorical': 'str',
'numeric': 'float',
'ordinal': 'str',
}
col_types = {n: var_type_to_col_type[data['variable_types'][n]] for n in data['variable_names']}
pandas2ri.activate()
X_df = pd.DataFrame(data = data['X'])
X_df.columns = data['variable_names']
X_df = X_df.astype(col_types)
rn.r.assign('X', X_df)
# test set
has_test_set = ('X_test' in data) and ('Y_test' in data) and ('sample_weights_test' in data)
if has_test_set:
X_test_df = pd.DataFrame(data = data['X_test'])
X_test_df.columns = data['variable_names']
X_test_df = X_test_df.astype(col_types)
rn.r.assign('X_test', pandas2ri.py2ri(X_test_df))
r_assign(data['Y_test'], 'Y_test')
r_assign(data['sample_weights_test'], 'sample_weights_test')
else:
def load_rds(filename, types=None):
import os
import pandas as pd, numpy as np
import rpy2.robjects as RO
import rpy2.robjects.vectors as RV
import rpy2.rinterface as RI
from rpy2.robjects import numpy2ri
numpy2ri.activate()
from rpy2.robjects import pandas2ri
pandas2ri.activate()
def load(data, types, rpy2_version=3):
if types is not None and not isinstance(data, types):
return np.array([])
# FIXME: I'm not sure if I should keep two versions here
# rpy2_version 2.9.X is more tedious but it handles BoolVector better
# rpy2 version 3.0.1 converts bool to integer directly without dealing with
# NA properly. It gives something like (0,1,-234235).
# Possibly the best thing to do is to open an issue for it to the developers.
if rpy2_version == 2:
# below works for rpy2 version 2.9.X
if isinstance(data, RI.RNULLType):
res = None
elif isinstance(data, RV.BoolVector):
data = RO.r['as.integer'](data)
res = np.array(data, dtype=int)
def deseq_analysis(
count_matrix, experiment_matrix, comparison_table, formula,
output_dir, output_prefix,
overwrite=True, alpha=0.05):
"""
Perform differential comparisons with DESeq2.
"""
import pandas as pd
from tqdm import tqdm
from rpy2.robjects import numpy2ri, pandas2ri
import rpy2.robjects as robjects
numpy2ri.activate()
pandas2ri.activate()
def r2pandas_df(r_df):
import numpy as np
df = pd.DataFrame(np.asarray(r_df)).T
df.columns = [str(x) for x in r_df.colnames]
df.index = [str(x) for x in r_df.rownames]
return df
robjects.r('require("DESeq2")')
_as_formula = robjects.r('as.formula')
_DESeqDataSetFromMatrix = robjects.r('DESeqDataSetFromMatrix')
_DESeq = robjects.r('DESeq')
_results = robjects.r('results')
_as_data_frame = robjects.r('as.data.frame')
# order experiment and count matrices in same way
def generate(self, S: int) -> np.ndarray:
from rpy2.robjects import pandas2ri, r as R
pandas2ri.activate()
R.assign('Data', self.Data)
R.assign('N', N)
R("""
library(rmgarch)
def main():
args = parser.parse_args()
pandas2ri.activate()
predict_func_probs = read_in_rds(args.rds)
func_names = predict_func_probs.names
# Read in and convert input biom table to pandas dataframe.
# (Based on James Morton's blog post here:
# http://mortonjt.blogspot.ca/2016/07/behind-scenes-with-biom-tables.html)
study_seq_counts = biom_to_pandas_df(biom.load_table(args.input))
exp_marker_copy = pd.read_table(filepath_or_buffer=args.marker,
sep="\t",
index_col="sequence")
study_seq_counts = norm_by_marker_copies(study_seq_counts,
exp_marker_copy,