How to use the rpy2.robjects function in rpy2

To help you get started, we’ve selected a few rpy2 examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github jonathan-taylor / formula / tests / test_design.py View on Github external
X = random_from_categorical_formula(d, size)

    X = ML.rec_append_fields(X, 'response', np.random.standard_normal(size))

    fname = tempfile.mktemp()
    ML.rec2csv(X, fname)

    Rstr = '''
    data = read.table("%s", sep=',', header=T)
    cur.lm = lm(response ~ %s, data)
    COEF = coef(cur.lm)
    ''' % (fname, d.Rstr)

    rpy2.robjects.r(Rstr)
    remove(fname)
    nR = list(np.array(rpy2.robjects.r("names(COEF)")))

    nt.assert_true('(Intercept)' in nR)
    nR.remove("(Intercept)")
    nF = [str(t).replace("_","").replace("*",":") for t in d.formula.terms]
             
    nR = sorted([sorted(n.split(":")) for n in nR])

    nt.assert_true('1' in nF)
    nF.remove('1')

    nF = sorted([sorted(n.split(":")) for n in nF])
    nt.assert_equal(nR, nF)

    return d, X, nR, nF
github jaredwo / topowx / twx / infill / infill_post_process.py View on Github external
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
from twx.db.reanalysis import NNRNghData
from scipy import stats
from twx.interp.clibs import clib_wxTopo
import twx.db.ushcn as ushcn
from datetime import datetime
import twx.utils as utils
import mpl_toolkits.basemap as bm

#rpy2
import rpy2
import rpy2.robjects as robjects
from rpy2.robjects.numpy2ri import numpy2ri
robjects.conversion.py2ri = numpy2ri
r = robjects.r

NCDF_CHK_COLS = 50
USE_ALL_IMP_THRESHOLD = np.round(365.25 * 5.0)

RM_STN_FLAG = "RM_STN_FLAG"
RM_STN_DUP = 1
RM_STN_BAD_DATA = 2
RM_STN_NO_TDI = 3

DTYPE_RM_STN = [(STN_ID,"
github charanpald / APGL / exp / sandbox / predictors / AbstractTreeRankR.py View on Github external
def __loadLeafRanks(self):
        utilFileName = PathDefaults.getSourceDir() + "/apgl/metabolomics/R/Util.R"
        leafRanksFileName = PathDefaults.getSourceDir() + "/apgl/metabolomics/R/MSLeafRanks.R"
        robjects.r["source"](utilFileName)
        robjects.r["source"](leafRanksFileName)
github dchaplinsky / declarations.com.ua / declarations_site / catalog / management / commands / analytics.py View on Github external
def _generate_table(self, declarations):
        """Generates an R data frame table from the list of declarations."""
        decl_table = defaultdict(list)
        for decl in declarations:
            decl_dict = self._map_fields(decl)
            # R DataFrame is column-major.
            for k, v in decl_dict.items():
                decl_table[k].append(v)
        return robjects.DataFrame(
            # Have to translate into a properly typed vector, otherwise R will treat the data in a bad way.
            {k: (robjects.StrVector(v) if k in STR_COLUMNS else robjects.FloatVector(v)) for k, v in decl_table.items()}
        )
github CommitAnalyzingService / CAS_CodeRepoAnalyzer / analyzer / linear_reg_model.py View on Github external
def __init__(self, metrics, repo_id, testingCommits):
    """
    @metrics - this is the list of metrics from the TRAINING data set.
    @repo_id - the repository repo_id
    @testingCommits - this is commits from the TESTING data set
    """
    self.metrics = metrics
    self.repo_id = repo_id
    self.stats = importr('stats', robject_translations={'format_perc': '_format_perc'})
    self.base = importr('base')
    self.readcsv = robjects.r['read.csv']
    self.sig_threshold = 0.05
    self.data = None 
    self.commits = testingCommits
github cesium-ml / cesium / mltsp / TCP / Algorithms / rpy2_classifiers.py View on Github external
def get_confident_sources(self, combo_result_dict={}, n_sources_per_class=10, prob_thresh=0.5):
        """ Generate a N-list of confident sources which should be a good representations
        of each science class.
        """
        robjects.globalenv['pred'] = robjects.IntVector(combo_result_dict['all.pred'])
        robjects.globalenv['maxprob'] = robjects.FloatVector(combo_result_dict['all_top_prob'])

        # KLUDGEY
        srcid_list = []
        for str_srcid in combo_result_dict['srcid_list']:
            srcid_list.append(int(str_srcid))
        robjects.globalenv['ID'] = robjects.IntVector(srcid_list)

        r_str  = '''
 m = %d
 probThresh= %f
 whichConf = which(maxprob>probThresh) # only look at sources with maxProb>probThresh
 tabConf = table(pred[whichConf]) # class distribution of confident sources
 confAdd = NULL # sources to add
 for(ii in 1:length(tabConf)){
   if(tabConf[ii]>0){ # cycle thru confident classes
     if(tabConf[ii]
github pennmem / ptsa_new / ptsa / stats / lmer.py View on Github external
from joblib import Parallel,delayed

# Connect to an R session
import rpy2.robjects
r = rpy2.robjects.r

# For a Pythonic interface to R
from rpy2.robjects.packages import importr
from rpy2.robjects import Formula, FactorVector
from rpy2.robjects.environments import Environment
from rpy2.robjects.vectors import DataFrame, Vector, FloatVector
from rpy2.rinterface import MissingArg,SexpVector

# Make it so we can send numpy arrays to R
import rpy2.robjects.numpy2ri
rpy2.robjects.numpy2ri.activate()

# load some required packages
# PBS: Eventually we should try/except these to get people 
# to install missing packages
lme4 = importr('lme4')
rstats = importr('stats')
fdrtool = importr('fdrtool')
ssvd = importr('ssvd')
if hasattr(lme4,'coef'):
    r_coef  = lme4.coef
else:
    r_coef = rstats.coef
if hasattr(lme4,'model_matrix'):
    r_model_matrix = lme4.model_matrix
else:
    r_model_matrix = rstats.model_matrix
github tabdelaal / scRNAseq_Benchmark / Scripts / run_scVI.py View on Github external
LabelsPath : Cell population annotations file path (.csv).
    CV_RDataPath : Cross validation RData file path (.RData), obtained from Cross_Validation.R function.
    OutputDir : Output directory defining the path of the exported file.
    GeneOrderPath : Gene order file path (.csv) obtained from feature selection, 
    defining the genes order for each cross validation fold, default is NULL.
    NumGenes : Number of genes used in case of feature selection (integer), default is 0.
    '''
    
    # read the Rdata file
    robjects.r['load'](CV_RDataPath)

    nfolds = np.array(robjects.r['n_folds'], dtype = 'int')
    tokeep = np.array(robjects.r['Cells_to_Keep'], dtype = 'bool')
    col = np.array(robjects.r['col_Index'], dtype = 'int')
    col = col - 1 
    test_ind = np.array(robjects.r['Test_Idx'])
    train_ind = np.array(robjects.r['Train_Idx'])

    # read the data
    data = pd.read_csv(DataPath,index_col=0,sep=',')
    labels = pd.read_csv(LabelsPath, header=0,index_col=None, sep=',', usecols = col)

    labels = labels.iloc[tokeep]
    data = data.iloc[tokeep] 
    
    # read the feature file
    if (NumGenes > 0):
        features = pd.read_csv(GeneOrderPath,header=0,index_col=None, sep=',')
        
    os.chdir(OutputDir)
    
    if (NumGenes == 0):
github Nexedi / dream / dream / simulation / outputanalysis / ConfidenceIntervals.py View on Github external
def ConfidIntervals(self,data,p): 
  
        data=robjects.FloatVector(data)  #The given list changes into float vector in order to be handled by RPy2
        alpha=1-p
        rsqrt=robjects.r['sqrt']     #Call square root function - R function 
        rsd=robjects.r['sd']       #Call standard deviation function - R function
        rmean=robjects.r['mean']   #Call mean function - R function
        t=len(data)
        n=rsqrt(t)
        b=rsd(data)

        rqt=robjects.r['qt']       #Call the cumulative probability distribution function for t distribution
        q=rqt((1-(alpha/2)),t-1)
        m=rmean(data)               #Calculate the sample average value

        me=q[0]*(b[0]/n[0])        #Calculate the margin of error

        #Calculate the lower and the upper bound 
        lo=m[0]-me
        up=m[0]+me
        l=[lo,up]
        return l
github mcveanlab / treeseq-inference / src / ARG_metrics.py View on Github external
import warnings
import rpy2.robjects as robjects
import rpy2.rinterface as rinterface
from rpy2.robjects.packages import importr

# Suppress noisy warnings from R.
if hasattr(rinterface, "RRuntimeWarning"):
    warnings.simplefilter("ignore", rinterface.RRuntimeWarning)
else:
    # older versions of rpy2 don't have RRuntimeWarning, they use UserWarning instead
    warnings.simplefilter("ignore", UserWarning)

try:
	ape=importr("ape")
	ARGmetrics = importr("ARGmetrics")
	if not robjects.r('packageVersion("ARGmetrics") >= "0.0.2.0"')[0]:
		raise ImportError
except (ImportError, rinterface.RRuntimeError):
    logging.warning("ARGmetrics in R not installed or too old (requires >= 0.0.2.0). "
    'Install the latest version from source by syncing with git and doing e.g.\n'
    '> R CMD INSTALL ARGmetrics')
    raise


def get_metric_names():
    """
    Returns the list of the names of the computed metrics.
    """
    # We could do it with :
    # return list(pandas.DataFrame(columns=ARGmetrics.genome_trees_dist().names))
    # but it's extremely slow. Just return the list of strings instead.
    return [n for n in ARGmetrics.genome_trees_dist().names if n!='rgt']