How to use the rpy2.robjects.DataFrame function in rpy2

To help you get started, we’ve selected a few rpy2 examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mad-lab / transit / src / pytransit / analysis / corrplot.py View on Github external
# ZINB header line has names of conditions, organized as 3+4*n+3 (4 groups X n conditions)
              if self.filetype=="anova": n = int((len(w)-6)/2) 
              elif self.filetype=="zinb": n = int((len(headers)-6)/4) 
              headers = headers[3:3+n]
              headers = [x.replace("Mean_","") for x in headers]
            vals = [float(x) for x in w[3:3+n]] # take just the columns of means
            qval = float(w[-2])
            if qval<0.05: data.append(w); means.append(vals)
        else: print("filetype not recognized: %s" % self.filetype); sys.exit(-1)
        print("correlations based on %s genes" % len(means))

        genenames = ["%s/%s" % (w[0],w[1]) for w in data]
        hash = {}
        headers = [h.replace("Mean_","") for h in headers]
        for i,col in enumerate(headers): hash[col] = FloatVector([x[i] for x in means])
        df = DataFrame(hash) # can't figure out how to set rownames

        corrplotFunc = self.make_corrplotFunc()
        corrplotFunc(df,StrVector(headers),StrVector(genenames),self.outfile) # pass headers to put cols in order, since df comes from dict

        self.finish()
        self.transit_message("Finished Corrplot")
github catmaid / CATMAID / django / applications / catmaid / control / nat.py View on Github external
val = rn[n]
                        if val is None:
                                val = kv[2]
                        connectors[n][1].append(val)
        r_connectors = [(kv[0], connector_cols[n][1](kv[1]))
                for n, kv in enumerate(connectors)]

        # Tags in Rpy2 format
        r_tags = {}
        for tag, node_ids in raw_tags.items():
               r_tags[tag] = rinterface.IntSexpVector(node_ids)

        # Construct output similar to rcatmaid's request response parsing function.
        skeleton_data = robjects.ListVector({
                'nodes': robjects.DataFrame(rlc.OrdDict(r_nodes)),
                'connectors': robjects.DataFrame(rlc.OrdDict(r_connectors)),
                'tags': robjects.ListVector(r_tags),
        })

        skeleton_envelope = {}
        skeleton_envelope[str(skeleton_id)] = skeleton_data
        cs_r[str(skeleton_id)] = read_neuron_local(str(skeleton_id),
                robjects.ListVector(skeleton_envelope), project_id)

        # Make sure all temporary R values are garbage collected. With many
        # skeletons, this can otherwise become a memory problem quickly (the
        # Python GC doesn't now about the R memory).
        del r_nodes, r_connectors, r_tags, skeleton_data

        # Explicitly garbage collect after each skeleton is loaded.
        gc.collect()
github cstoeckert / iterativeWGCNA / iwgcna / kme.py View on Github external
def write(iteration, kME):
    '''
    writes the eigengene connectivity (kME)
    dictionary to file
    '''
    df = ro.DataFrame(kME)
    df.rownames = (iteration)
    write_data_frame(df, 'eigengene-connectivity.txt', 'Iteration')
github nanoporetech / tombo / tombo / _plot_commands.py View on Github external
if zero_start:
                        BaseStart.append(unicode(i))
                    else:
                        BaseStart.append(unicode(i + reg_data.start))
                else:
                    if zero_start:
                        BaseStart.append(unicode(
                            reg_data.end - reg_data.start - i - 1))
                    else:
                        BaseStart.append(unicode(
                            reg_data.end - i - 1))
                Bases.append(base)
                BaseRegion.append(reg_data.reg_id)
                BaseStrand.append(REV_STRAND)

    return r.DataFrame({
        'Position':r.FloatVector(BaseStart),
        'Base':r.StrVector(Bases),
        'Region':r.StrVector(BaseRegion),
        'Strand':r.FactorVector(
            r.StrVector(BaseStrand),
            ordered=True, levels=r.StrVector((FWD_STRAND, REV_STRAND)))})
github bird-house / flyingpigeon / flyingpigeon / sdm.py View on Github external
# vals[vals > 1000] = 0
            vals[isnan(PAmask)] = nan
            indice = '%s_%s' % (var, agg)
            data[str(indice)] = ro.FloatVector(ravel(vals))
            if i == 0:
                form = form + 's(%s, k=3)' % indice
            else:
                form = form + ' + s(%s, k=3)' % indice
        LOGGER.info('form string generated for gam model')
    except:
        msg = 'form string generation for gam failed'
        LOGGER.exception(msg)
        # raise Exception

    try:
        dataf = ro.DataFrame(data)
        eq = ro.Formula(str(form))
        gam_model = mgcv.gam(base.eval(eq),
                             data=dataf,
                             family=stats.binomial(),
                             scale=-1,
                             na_action=stats.na_exclude)
        LOGGER.info('GAM model trained')
    except:
        msg = 'failed to train the GAM model'
        LOGGER.exception(msg)

    # ####################
    # plot response curves
    # ####################

    try:
github cs224 / pybnl / pybnl / bn.py View on Github external
cat_type = ldf[colname].dtype
        levels = rpy2.robjects.StrVector(list(cat_type.categories))
        ordered = cat_type.ordered
        # if colname == 'Bsmt_Full_Bath':
        #     print('col: {}, levels: {}, ordered: {}'.format(colname, levels, ordered))

        lds = ldf[colname]
        factorized_column = None
        if colname in latent:
            factorized_column =  rpy2.robjects.vectors.FactorVector(rpy2.robjects.StrVector(NA_lds), levels=levels, ordered=ordered)
        else:
            factorized_column =  rpy2.robjects.vectors.FactorVector(rpy2.robjects.StrVector(lds), levels=levels, ordered=ordered)
        cols += [factorized_column]

    od = rpy2.rlike.container.OrdDict([(colnames[i], col) for i,col in enumerate(cols)])
    r_df = rpy2.robjects.DataFrame(od)

    return r_df
github nanoporetech / tombo / tombo / _plot_commands.py View on Github external
read_id for _ in range(len(running_diffs))])}
    # add region is applicable
    if region_name is not None:
        old_dat['Region'] = r.StrVector([
            region_name for _ in range(len(old_bases))])
        new_dat['Region'] = r.StrVector([
            region_name for _ in range(len(new_bases))])
        sig_dat['Region'] = r.StrVector([
            region_name for _ in range(len(norm_reg_signal))])
        diff_dat['Region'] = r.StrVector([
            region_name for _ in range(len(running_diffs))])

    old_dat = r.DataFrame(old_dat)
    new_dat = r.DataFrame(new_dat)
    sig_dat = r.DataFrame(sig_dat)
    diff_dat = r.DataFrame(diff_dat)

    return old_dat, new_dat, sig_dat, diff_dat
github nanoporetech / tombo / tombo / _plot_commands.py View on Github external
unicode(r_cov[3]) + r_ovp[3] + " -")
            else:
                if include_chrm:
                    reg_title += ":" + int_i.strand
                reg_title += '  ' + int_i.reg_text
                if include_cov:
                    cov_str = (
                        'Sample (Red): ' + unicode(r_cov[0]) + r_ovp[0] +
                        '; Control (Black): ' + unicode(r_cov[2]) + r_ovp[2]
                    ) if int_i.strand == '+' else (
                        'Sample (Red): ' + unicode(r_cov[1]) + r_ovp[1] +
                        '; Control (Black): ' + unicode(r_cov[3]) + r_ovp[3])
                    reg_title += "  Coverage: " + cov_str
        titles.append(reg_title)

    Titles = r.DataFrame({
        'Title':r.StrVector(titles),
        'Region':r.StrVector([int_i.reg_id for int_i in all_reg_data])})

    return Titles, plot_types