How to use the pandas.read_csv function in pandas

To help you get started, we’ve selected a few pandas examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github antoinecarme / pyaf / tests / HeartRateTimeSeries / HeartRateTimeSeries_series3.py View on Github external
import pandas as pd
import numpy as np
import pyaf.ForecastEngine as autof
import datetime

#get_ipython().magic('matplotlib inline')

trainfile = "https://raw.githubusercontent.com/antoinecarme/TimeSeriesData/master/HeartRateTimeSeries/hr.207"
df = pd.read_csv(trainfile, sep=r',', engine='python', skiprows=0);
df.columns = ['HeartRate']
df['Date'] = range(df.shape[0]);
print(df.head());

lDateVar = 'Date'
lSignalVar = 'HeartRate'

lEngine = autof.cForecastEngine()
lEngine

H = 10;

#lEngine.mOptions.enable_slow_mode();
lEngine.mOptions.mDebugPerformance = True;
lEngine.train(df , lDateVar , lSignalVar, H);
lEngine.getModelInfo();
github rconcep / snl-quest / es_gui / tools / valuation / utilities.py View on Github external
rtCAP = np.append(rtCAP, df_file_rtCAP.values)
                    # NYCA Regulation Movement ($/MW)
                    df_file_rtMOV = df_file.loc[df_file['PTID'] == zoneid, ['NYCA Regulation Movement ($/MW)']]
                    rtMOV = np.append(rtMOV, df_file_rtMOV.values)
                elif (year>=2001 and month>=10 and day_x>=0) or (year>=2001 and month>=11) or \
                     (year>=2002 and not ((year>=2016 and month>=6 and day_x>=23) or (year>=2016 and month>=7) or (year>=2017))):
                    df_file_rtCAP = df_file['East Regulation ($/MWHr)']
                    rtCAP = np.append(rtCAP, df_file_rtCAP.values)
                    df_file_rtMOV = df_file[' NYCA Regulation Movement ($/MW)']
                    rtMOV = np.append(rtMOV, df_file_rtMOV.values)
                    # RT ancillary services for NYISO start on July 2004


            if RT_DAM == "DAM" or RT_DAM == "both":
                try:
                    df_file = pd.read_csv(fname_path_ASP_DA, index_col=False)
                except FileNotFoundError:
                    daCAP = np.empty([0])
                    logging.warning('read_nyiso_data: DA ASP file missing, returning empty array.')
                    break

                if (year >= 2016 and month >= 6 and day_x >= 23) or (year >= 2016 and month >= 7) or (year >= 2017):
                    df_file_daCAP = df_file.loc[df_file['PTID'] == zoneid, ['NYCA Regulation Capacity ($/MWHr)']]
                    daCAP = np.append(daCAP, df_file_daCAP.values)
                elif (year >= 2001 and month >= 10 and day_x >= 0) or (year >= 2001 and month >= 11) or \
                     (year >= 2002 and not ((year >= 2016 and month >= 6 and day_x >= 23) or (year >= 2016 and month >= 7) or (year >= 2017))):
                    df_file_daCAP = df_file['East Regulation ($/MWHr)']
                    daCAP = np.append(daCAP, df_file_daCAP.values)
                else:
                    df_file_daCAP = df_file['Regulation ($/MWHr)']
                    daCAP = np.append(daCAP, df_file_daCAP.values)
github IssamLaradji / BlockCoordinateDescent / base / utils.py View on Github external
def read_csv(path):
    csv = pd.read_csv(path + ".csv")
    return csv
github nict-csl / exist / scripts / insert2db / reputation / plugins / cins.py View on Github external
def makeDataframe(self):
        df = pd.DataFrame()
        newline = ''
        try:
            res = requests.get(self.URL)
            if res.status_code != 200:
                return df
            newline = self.cmpFiles(self.DataFilePath, res.text)
        except Exception as e:
            logger.error(e)
        if not newline == '':
            open(self.DataFilePath, 'w').write(res.text)
            df = pd.read_csv(StringIO(newline), names=self.header)
        return df
github Kirubaharan / hydrology / tank_lake_water_balance / tmg_hadonahalli / tmg_daily_wb.py View on Github external
"""
Overflow
"""
water_balance_df.loc[:, 'overflow(cu.m)'] = 0.000
for index, row in water_balance_df.iterrows():
    obs_volume = row['volume (cu.m)']
    if obs_volume > full_volume:
        overflow_volume = obs_volume - full_volume
        water_balance_df.loc[index.strftime(date_format), 'overflow(cu.m)'] = overflow_volume

print water_balance_df['overflow(cu.m)'].sum()

"""
Stage vs area linear relationship
"""
stage_area_df = pd.read_csv('/media/kiruba/New Volume/milli_watershed/tmg_lake_bathymetry/stage_volume_area/stage_area_tmg.csv',
                            sep=',', header=0, names=['stage_ft', 'area_sq_ft', 'stage_m', 'total_area_sq_m'])
stage_area_df.drop(['stage_ft', 'area_sq_ft'], inplace=True, axis=1)
# set stage as index
stage_area_df.set_index(stage_area_df['stage_m'], inplace=True)
# create empty column
water_balance_df.loc[:, 'ws_area(sq.m)'] = 0.000
for index, row in water_balance_df.iterrows():
    obs_stage = row['stage(m)']  # observed stage
    if obs_stage >= stage_cutoff:
        x1, x2 = cd.find_range(stage_area_df['stage_m'].tolist(), obs_stage)
        x_diff = x2 - x1
        y1 = stage_area_df.loc[x1, 'total_area_sq_m']
        y2 = stage_area_df.loc[x2, 'total_area_sq_m']
        y_diff = y2 - y1
        slope = y_diff / x_diff
        y_intercept = y2 - (slope * x2)
github BladeCoda / Tencent2018_Final_Phrase_Presto / preProcessing.py View on Github external
def merge(dtype='train'):
    if dtype=='train':
        df_join=pd.read_csv('data/origin/train.csv')
        out_path='data/merge/merge_train.csv'
    elif dtype=='test1':
        df_join=pd.read_csv('data/origin/test1.csv')
        out_path='data/merge/merge_test1.csv'
    elif dtype=='test2':
        df_join=pd.read_csv('data/origin/test2.csv')
        out_path='data/merge/merge_test2.csv'
    else:
        print('error type')
        return
        
    print('加载特征表')
        
    df_ad=pd.read_csv('data/origin/adFeature.csv')
    df_user=pd.read_csv('data/origin/userFeature.csv')
    
    #拼接信息
    print('开始拼接%s的广告信息'%dtype)
    df_join=pd.merge(df_join,df_ad,how='left',on='aid') #拼接用户信息
github NifTK / NiftyNet / niftynet / contrib / csv_reader / csv_reader.py View on Github external
def initialise(self, path_to_csv):
        label_df = pd.read_csv(path_to_csv, header=None, names=['subject_ids', 'labels'])
        self._paths = label_df['subject_ids'].values
        self.label_names = list(label_df['labels'].unique())
        self._df = label_df
        self.dims = len(self.label_names)
        
        self._labels = self.to_ohe(label_df['labels'].values)
        return self
github dennycn / python_practice_of_data_analysis_and_mining / chapter7 / demo / code / 7-2_data_clean.py View on Github external
#-*- coding: utf-8 -*-
# 数据清洗,过滤掉不符合规则的数据

import pandas as pd

datafile = '../data/air_data.csv'  # 航空原始数据,第一行为属性标签
cleanedfile = '../tmp/data_cleaned.csv'  # 数据清洗后保存的文件
cleanedfile2 = '../tmp/data_cleaned.xls' 

# 读取原始数据,指定UTF-8编码(需要用文本编辑器将数据装换为UTF-8编码)
data = pd.read_csv(datafile, encoding='utf-8')

# NOTE: * instead of &
data = data[data['SUM_YR_1'].notnull() & data['SUM_YR_2'].notnull()
            ]  # 票价非空值才保留

# 只保留票价非零的,或者平均折扣率与总飞行公里数同时为0的记录。
index1 = data['SUM_YR_1'] != 0
index2 = data['SUM_YR_2'] != 0
index3 = (data['SEG_KM_SUM'] == 0) & (data['avg_discount'] == 0)  # 该规则是“与”
data = data[index1 | index2 | index3]  # 该规则是“或”

# to_csv
data.to_csv(cleanedfile, encoding='utf-8')  # 导出结果
data.to_excel(cleanedfile2)
print('END')
github spatialucr / geosnap / geosnap / io / util.py View on Github external
-------
    pandas.DataFrame
        a pandas DataFrame with columns representing census blocks, indexed on
        the block FIPS code.

    """
    lodes_vars = pd.read_csv(
        os.path.join(os.path.dirname(os.path.abspath(__file__)), "lodes.csv"))
    renamer = dict(
        zip(lodes_vars["variable"].tolist(), lodes_vars["name"].tolist()))

    state = state.lower()
    url = "https://lehd.ces.census.gov/data/lodes/LODES7/{state}/{dataset}/{state}_{dataset}_S000_JT00_{year}.csv.gz".format(
        dataset=dataset, state=state, year=year)
    try:
        df = pd.read_csv(url, converters={"w_geocode": str, "h_geocode": str})
    except HTTPError:
        raise ValueError(
            "Unable to retrieve LEHD data. Check your internet connection "
            "and that the state/year combination you specified is available")
    df = df.rename({"w_geocode": "geoid", "h_geocode": "geoid"}, axis=1)
    df.rename(renamer, axis="columns", inplace=True)
    df = df.set_index("geoid")

    return df
github deyachatterjee / ml-andrewng-python / ex1 / ex1.py View on Github external
return np.divide((X - np.mean(X,axis=0)),np.std(X,axis=0))


def computeCost(X, y, theta):
    m = len(y)
    J = (np.sum((np.dot(X,theta) - y)**2))/(2*m)
    return J

print('Running warmUpExercise ... \n')
print('5x5 Identity Matrix: \n')

print(warmUpExercise()) 
input('Program paused. Press enter to continue.\n')

print('Plotting Data ...\n')
data = pd.read_csv("ex1data1.txt",names=["X","y"])
x = np.array(data.X)[:,None] # population in 10,0000
y = np.array(data.y) # profit for a food truck
m = len(y) 
fig = plotData(x,y)
fig.show()
input('Program paused. Press enter to continue.\n')
print('Running Gradient Descent ...\n')
ones = np.ones_like(x) #an array of ones of same dimension as x
X = np.hstack((ones,x)) # Add a column of ones to x. hstack means stacking horizontally i.e. columnwise
theta = np.zeros(2) # initialize
iterations = 1500
alpha = 0.01
computeCost(X, y, theta)
theta, hist = gradientDescent(X, y, theta, alpha, iterations)
print('Theta found by gradient descent: ')
print(theta[0],"\n", theta[1])