How to use the rpy2.robjects.pandas2ri.activate function in rpy2

To help you get started, we’ve selected a few rpy2 examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github ucbrise / clipper / integration-tests / deploy_R_models.py View on Github external
import json
import numpy as np
cur_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, os.path.abspath("%s/.." % cur_dir))
from clipper_admin import Clipper
import time
import subprocess32 as subprocess
import pprint
import random
import socket

from pandas import *
from rpy2.robjects.packages import importr
import rpy2.robjects as ro
from rpy2.robjects import r, pandas2ri
pandas2ri.activate()
stats = importr('stats')
base = importr('base')

headers = {'Content-type': 'application/json'}
app_name = "R_model_test"
model_name = "R_model"

import sys
if sys.version_info[0] < 3:
    from StringIO import StringIO
else:
    from io import StringIO


class BenchmarkException(Exception):
    def __init__(self, value):
github matteosox / statcast / statcast / better / mixed.py View on Github external
from inspect import Parameter

import numpy as np
import pandas as pd

from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
from sklearn.metrics import mean_squared_error

from rpy2.robjects.packages import importr
from rpy2.robjects import pandas2ri

from .base import BetterModel
from .spark import GridSearchCV

pandas2ri.activate()
rLME4 = importr('lme4')


class BetterLME4(BaseEstimator, RegressorMixin, BetterModel):

    _params = [Parameter('formulas', Parameter.POSITIONAL_OR_KEYWORD,
                         default=()),
               Parameter('LME4Params', Parameter.POSITIONAL_OR_KEYWORD,
                         default={})]

    def fit(self, X, Y):
        '''Doc String'''

        check_X_y(X, Y, multi_output=True, dtype=None)
        data = pd.concat((pd.DataFrame(X, columns=self.xLabels),
                          pd.DataFrame(Y, columns=self.yLabels)), axis=1)
github CGATOxford / CGATPipelines / CGATPipelines / PipelineExome.py View on Github external
def intersectionHeatmap(infiles, outfile):
    ''' calculate the intersection between the infiles and plot'''

    pandas2ri.activate()

    name2genes = {}
    df = pd.DataFrame(columns=["id_1", "id_2", "intersection", "perc"])

    ix = 0
    for inf in infiles:

        name = P.snip(os.path.basename(inf)).split(".")[0]
        name = name.replace(".", "_")

        with IOTools.openFile(inf, "r") as f:
            genes = set()

            for line in f:
                if line[0] == "#":
                    continue
github snowflakedb / SnowAlert / src / runners / baseline_runner.py View on Github external
def query_log_source(source, time_filter, time_column):
    from rpy2.robjects import pandas2ri

    cutoff = f"DATEADD(day, -{time_filter}, CURRENT_TIMESTAMP())"
    query = f"SELECT * FROM {source} WHERE {time_column} > {cutoff};"
    try:
        data = list(db.fetch(query))
    except Exception as e:
        log.error("Failed to query log source: ", e)
    f = pack(data)
    frame = pandas.DataFrame(f)
    pandas2ri.activate()
    r_dataframe = pandas2ri.py2rpy(frame)
    return r_dataframe
github ustunb / dcptree / dcptree / data_io.py View on Github external
except:

        from dcptree.debug import ipsh
        ipsh()

    r_assign(cvindices, "cvindices")

    # feature matrix
    var_type_to_col_type = {'boolean': 'bool',
                            'categorical': 'str',
                            'numeric': 'float',
                            'ordinal': 'str',
                            }
    col_types = {n: var_type_to_col_type[data['variable_types'][n]] for n in data['variable_names']}

    pandas2ri.activate()

    X_df = pd.DataFrame(data = data['X'])
    X_df.columns = data['variable_names']
    X_df = X_df.astype(col_types)
    rn.r.assign('X', X_df)

    # test set
    has_test_set = ('X_test' in data) and ('Y_test' in data) and ('sample_weights_test' in data)
    if has_test_set:
        X_test_df = pd.DataFrame(data = data['X_test'])
        X_test_df.columns = data['variable_names']
        X_test_df = X_test_df.astype(col_types)
        rn.r.assign('X_test', pandas2ri.py2ri(X_test_df))
        r_assign(data['Y_test'], 'Y_test')
        r_assign(data['sample_weights_test'], 'sample_weights_test')
    else:
github stephenslab / dsc / src / dsc_io.py View on Github external
def load_rds(filename, types=None):
    import os
    import pandas as pd, numpy as np
    import rpy2.robjects as RO
    import rpy2.robjects.vectors as RV
    import rpy2.rinterface as RI
    from rpy2.robjects import numpy2ri
    numpy2ri.activate()
    from rpy2.robjects import pandas2ri
    pandas2ri.activate()

    def load(data, types, rpy2_version=3):
        if types is not None and not isinstance(data, types):
            return np.array([])
        # FIXME: I'm not sure if I should keep two versions here
        # rpy2_version 2.9.X is more tedious but it handles BoolVector better
        # rpy2 version 3.0.1 converts bool to integer directly without dealing with
        # NA properly. It gives something like (0,1,-234235).
        # Possibly the best thing to do is to open an issue for it to the developers.
        if rpy2_version == 2:
            # below works for rpy2 version 2.9.X
            if isinstance(data, RI.RNULLType):
                res = None
            elif isinstance(data, RV.BoolVector):
                data = RO.r['as.integer'](data)
                res = np.array(data, dtype=int)
github afrendeiro / toolkit / ngs_toolkit / general.py View on Github external
def deseq_analysis(
        count_matrix, experiment_matrix, comparison_table, formula,
        output_dir, output_prefix,
        overwrite=True, alpha=0.05):
    """
    Perform differential comparisons with DESeq2.
    """
    import pandas as pd
    from tqdm import tqdm
    from rpy2.robjects import numpy2ri, pandas2ri
    import rpy2.robjects as robjects
    numpy2ri.activate()
    pandas2ri.activate()

    def r2pandas_df(r_df):
        import numpy as np
        df = pd.DataFrame(np.asarray(r_df)).T
        df.columns = [str(x) for x in r_df.colnames]
        df.index = [str(x) for x in r_df.rownames]
        return df

    robjects.r('require("DESeq2")')
    _as_formula = robjects.r('as.formula')
    _DESeqDataSetFromMatrix = robjects.r('DESeqDataSetFromMatrix')
    _DESeq = robjects.r('DESeq')
    _results = robjects.r('results')
    _as_data_frame = robjects.r('as.data.frame')

    # order experiment and count matrices in same way
github edouardberthe / ADPPortfolioSelection / adp / generator.py View on Github external
def generate(self, S: int) -> np.ndarray:
        from rpy2.robjects import pandas2ri, r as R
        pandas2ri.activate()
        R.assign('Data', self.Data)
        R.assign('N', N)
        R("""
library(rmgarch)
github picrust / picrust2 / scripts / per_sample_functions.py View on Github external
def main():

    args = parser.parse_args()

    pandas2ri.activate()

    predict_func_probs = read_in_rds(args.rds)

    func_names = predict_func_probs.names

    # Read in and convert input biom table to pandas dataframe.
    # (Based on James Morton's blog post here:
    # http://mortonjt.blogspot.ca/2016/07/behind-scenes-with-biom-tables.html)
    study_seq_counts = biom_to_pandas_df(biom.load_table(args.input))

    exp_marker_copy = pd.read_table(filepath_or_buffer=args.marker,
                                    sep="\t",
                                    index_col="sequence")

    study_seq_counts = norm_by_marker_copies(study_seq_counts,
                                             exp_marker_copy,