How to use the pandas.read_table function in pandas

To help you get started, we’ve selected a few pandas examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github vanheeringen-lab / gimmemotifs / test / test_maelstrom.py View on Github external
def test1_maelstrom(self):
        """ Test Motif Activity by Ensemble Learning (maelstrom) """

        run_maelstrom(
            self.clusters,
            "mm10",
            self.outdir,
            score_table=self.score_table,
            count_table=self.count_table,
            plot=False,
        )
        df = pd.read_table(self.outfile, index_col=0, comment="#")
        print(df.shape)
        self.assertEquals((623, 4), df.shape)

        for fname in glob(os.path.join(self.outdir, "activity*")):
            os.unlink(fname)
        for fname in glob(os.path.join(self.outdir, "gimme.verte*")):
            os.unlink(fname)
        os.unlink(self.outfile)
github rraadd88 / beditor / beditor / lib / plot_res.py View on Github external
plot_dist_dguides(dstep,dpam,plotpf)
        else:
            logging.warning(f'not found: {dstepp}')

    # make plot_dna_features_view
    stepi=3
    plotd=f"{datad}/plot_d{cfg[stepi].replace('/','').split('_')[-1]}_dna_features_view"
    plotps=glob(plotd+'/*')
    if len(plotps)==0 or cfg['force']:
        dguidesp=f"{cfg[stepi]}/d{cfg[stepi].replace('/','').split('_')[-1]}.tsv"
        dsequencesp=f"{cfg[stepi-2]}/d{cfg[stepi-2].replace('/','').split('_')[-1]}.tsv"
        if exists(dguidesp):
            logging.info('plot_dna_features_view')
            plot_dna_features_view(cfg,
                                   dsequences=del_Unnamed(pd.read_table(dsequencesp,keep_default_na=False)).drop_duplicates(),
                       dguides=del_Unnamed(pd.read_table(dguidesp,keep_default_na=False)).drop_duplicates(),
                       plotd=plotd,more=False)
        else:
            logging.warning(f'not found: {dstepp}')
            
#     # step2 # make submap #FIXME get all the columns used for plotting in the dguides.
#     stepi=3
#     plotp=f"{datad}/plot_d{cfg[stepi].replace('/','').split('_')[-1]}_submap_used_for_mutagenesis"
#     plotps=glob(plotp+'*')
#     if len(plotps)==0 or cfg['force']:
#         plotpf=plotp+"_{mutation_type}.png"
#         dstepp=f"{cfg[stepi]}/d{cfg[stepi].replace('/','').split('_')[-1]}.tsv"
#         dstep=del_Unnamed(pd.read_table(dstepp)).drop_duplicates()
#         logging.info('plot_submap_possibilities')
#         plot_submap_possibilities(dmutagenesis=dstep,
#                                   plotpf=plotpf,test=False)
github poldracklab / fitlins / fitlins / interfaces / visualizations.py View on Github external
def _load_data(self, fname):
        _, _, ext = split_filename(fname)
        if ext == '.tsv':
            return pd.read_table(fname, index_col=0)
        elif ext in ('.nii', '.nii.gz', '.gii'):
            return nb.load(fname)
        raise ValueError("Unknown file type!")
github iseekwonderful / PyPathway / pypathway / analysis / network / __init__.py View on Github external
def check_for_job_done(mission_id):
        while True:
            r = requests.get('http://www.enrichnet.org/filecreated.php?temp={}'.format(mission_id))
            if len(r.text) < len("Success"):
                time.sleep(20)
                continue
            if r.status_code == 404:
                time.sleep(20)
                continue
            elif r.status_code == 200:
                break
            else:
                raise Exception("Unclear status code while query the server")
        r = requests.get("http://www.enrichnet.org/file2.php?filen=C:/xampp/htdocs/enrichnet/pages/"
                         "tmp/{}/enrichnet_ranking_table.txt".format(mission_id))
        return pd.read_table(StringIO(r.text))
github BayAreaMetro / travel-model-one / utilities / PBA40 / metrics / countTrips.py View on Github external
active_adult_trips_df.loc[active_adult_trips_df['trip_mode_str']=='wlk_lrf_drv', 'active_mode'] = 'wTrnD'
    active_adult_trips_df.loc[active_adult_trips_df['trip_mode_str']=='wlk_exp_drv', 'active_mode'] = 'wTrnD'
    active_adult_trips_df.loc[active_adult_trips_df['trip_mode_str']=='wlk_hvy_drv', 'active_mode'] = 'wTrnD'
    active_adult_trips_df.loc[active_adult_trips_df['trip_mode_str']=='wlk_com_drv', 'active_mode'] = 'wTrnD'

    active_adult_trips_df.loc[:,'active_minutes'] = 0.0

    # print active_adult_trips_df['active_mode'].value_counts()
    # print active_adult_trips_df['time_period'].value_counts()
    active_adult_trips_df_len = len(active_adult_trips_df)

    # figure out how many minutes of activity per trip: join with activeTimeSkims
    for time_period in ['EA','AM','MD','PM','EV']:
        filename = os.path.join("database", "ActiveTimeSkimsDatabase%s.csv" % time_period)
        print "%s Reading %s" % (datetime.datetime.now().strftime("%x %X"), filename)
        skim_df  = pandas.read_table(filename, sep=",")
        skim_df.loc[:, 'time_period'] = time_period

        for active_mode in ['walk','bike','wTrnW','dTrnW','wTrnD']:
            # get the skim for this mode
            skim_tp_df = skim_df.loc[:,['orig','dest','time_period',active_mode]]
            skim_tp_df.loc[:,'active_mode']=active_mode
            skim_tp_df.rename(columns={'orig':'orig_taz','dest':'dest_taz'}, inplace=True)

            # join it, adding active_mode-named column
            active_adult_trips_df = pandas.merge(left    =active_adult_trips_df,
                                                 right   =skim_tp_df,
                                                 on      =['orig_taz','dest_taz','time_period','active_mode'],
                                                 how     ='left')
            # set those minutes
            active_adult_trips_df.loc[active_adult_trips_df[active_mode].notnull(), 'active_minutes'] = active_adult_trips_df[active_mode]
            # drop the new column
github cangermueller / deepcpg / scripts / tomtom_format.py View on Github external
logging.basicConfig(filename=opts.log_file,
                            format='%(levelname)s (%(asctime)s): %(message)s')
        log = logging.getLogger(name)
        if opts.verbose:
            log.setLevel(logging.DEBUG)
        else:
            log.setLevel(logging.INFO)
        log.debug(opts)

        motifs = dict()
        for meme_file in opts.meme_db:
            log.info('Parse %s' % (meme_file))
            motifs.update(parse_meme(meme_file))

        log.info('Enrich %s' % (opts.tomtom_file))
        tom = pd.read_table(opts.tomtom_file)
        tom.rename(columns={'#Query ID': 'Query ID'}, inplace=True)
        tom['Target name'] = ''
        tom['URL'] = ''
        for i in tom.index:
            target = tom.loc[i, 'Target ID']
            if target in motifs:
                tom.loc[i, 'Target name'] = motifs[target][0]
                tom.loc[i, 'URL'] = motifs[target][1]

        t = tom.to_csv(opts.out_file, sep='\t', index=False)
        if t is not None:
            print(t, end='')

        log.info('Done!')
        return 0
github CGATOxford / CGATPipelines / CGATPipelines / PipelineRrbs.py View on Github external
def readAndMerge(df, cpg_annotations_inf):
        tmp_df = pd.read_table(cpg_annotations_inf)
        tmp_df.set_index(["contig", "position"], inplace=True)
        tmp_df = tmp_df.join(df, how="inner")
        tmp_df = pd.melt(tmp_df, id_vars=["feature", "CpG_density"])
        return tmp_df
github avallonking / ForestQC / ForestQC / classification.py View on Github external
def execute_classification(good_variants, bad_variants, grey_variants, model, output_handle, user_features, threshold):
    dir = os.path.dirname(os.path.realpath(good_variants))
    print('Loading data...')
    good = pd.read_table(good_variants)
    bad = pd.read_table(bad_variants)
    grey = pd.read_table(grey_variants)

    pred, prob = classification(good, bad, grey, model, user_features, threshold)
    grey['Probability'] = prob
    grey['Good'] = pred

    predicted_good = grey[grey['Good'] == 1]
    predicted_bad = grey[grey['Good'] == 0]

    print('Number of predicted good variants: ' + str(predicted_good.shape[0]))
    print('Number of predicted bad variants: ' + str(predicted_bad.shape[0]))

    print('\nWriting data...')
    predicted_good.to_csv(dir + '/' + 'predicted_good.' + output_handle, header=None, index = False, sep = '\t', na_rep='NA')
    predicted_bad.to_csv(dir + '/' + 'predicted_bad.' + output_handle, header=None, index = False, sep = '\t', na_rep='NA')
    print('Done.')
github duolinwang / MusiteDeep / MusiteDeep / methods / DProcess.py View on Github external
def convertSampleToVector2DList(sampleSeq3DArr, nb_windows, refMatrFileName):
	"""
	Convertd the raw data to probability matrix
	PARAMETER
	---------
	sampleSeq3DArr: 3D List
		List -  numpy matrix(3D)
	Sample List: List (nb_windows, nb_samples, SEQLen/nb_windows , 100)
	"""
	
	rawDataFrame = pd.read_table(refMatrFileName, sep='\t', header=None)
	
	raw_data_seq_index_df = pd.DataFrame({'seq' : rawDataFrame[0] , 'indexing':rawDataFrame.index})
	raw_data_seq_df_index_dict = raw_data_seq_index_df.set_index('seq')['indexing'].to_dict()

	
	nb_raw_data_frame_column = len(rawDataFrame.columns)
	
	nb_sample = sampleSeq3DArr.shape[0]
	len_seq = len(sampleSeq3DArr[1]) 
	re_statement =  ".{%d}" % (nb_windows)
	
	
	probMatr_list = []
	for tmp_idx in range(nb_windows):
		probMatr_list.append( np.zeros((nb_sample, int((len_seq - tmp_idx)/nb_windows) , 100)) )
github buaazhangfan / CS294-112-Deep-Reinforcement-Learning / hw5 / meta / plot.py View on Github external
def get_datasets(fpath, condition=None):
    unit = 0
    datasets = []
    for root, dir, files in os.walk(fpath):
        if 'log.txt' in files:
            param_path = open(os.path.join(root,'params.json'))
            params = json.load(param_path)
            exp_name = params['exp_name']
            
            log_path = os.path.join(root,'log.txt')
            experiment_data = pd.read_table(log_path)

            experiment_data.insert(
                len(experiment_data.columns),
                'Unit',
                unit
                )        
            experiment_data.insert(
                len(experiment_data.columns),
                'Condition',
                condition or exp_name
                )

            datasets.append(experiment_data)
            unit += 1

    return datasets