Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
import pandas as pd
import numpy as np
import pyaf.ForecastEngine as autof
import datetime
#get_ipython().magic('matplotlib inline')
trainfile = "https://raw.githubusercontent.com/antoinecarme/TimeSeriesData/master/HeartRateTimeSeries/hr.207"
df = pd.read_csv(trainfile, sep=r',', engine='python', skiprows=0);
df.columns = ['HeartRate']
df['Date'] = range(df.shape[0]);
print(df.head());
lDateVar = 'Date'
lSignalVar = 'HeartRate'
lEngine = autof.cForecastEngine()
lEngine
H = 10;
#lEngine.mOptions.enable_slow_mode();
lEngine.mOptions.mDebugPerformance = True;
lEngine.train(df , lDateVar , lSignalVar, H);
lEngine.getModelInfo();
rtCAP = np.append(rtCAP, df_file_rtCAP.values)
# NYCA Regulation Movement ($/MW)
df_file_rtMOV = df_file.loc[df_file['PTID'] == zoneid, ['NYCA Regulation Movement ($/MW)']]
rtMOV = np.append(rtMOV, df_file_rtMOV.values)
elif (year>=2001 and month>=10 and day_x>=0) or (year>=2001 and month>=11) or \
(year>=2002 and not ((year>=2016 and month>=6 and day_x>=23) or (year>=2016 and month>=7) or (year>=2017))):
df_file_rtCAP = df_file['East Regulation ($/MWHr)']
rtCAP = np.append(rtCAP, df_file_rtCAP.values)
df_file_rtMOV = df_file[' NYCA Regulation Movement ($/MW)']
rtMOV = np.append(rtMOV, df_file_rtMOV.values)
# RT ancillary services for NYISO start on July 2004
if RT_DAM == "DAM" or RT_DAM == "both":
try:
df_file = pd.read_csv(fname_path_ASP_DA, index_col=False)
except FileNotFoundError:
daCAP = np.empty([0])
logging.warning('read_nyiso_data: DA ASP file missing, returning empty array.')
break
if (year >= 2016 and month >= 6 and day_x >= 23) or (year >= 2016 and month >= 7) or (year >= 2017):
df_file_daCAP = df_file.loc[df_file['PTID'] == zoneid, ['NYCA Regulation Capacity ($/MWHr)']]
daCAP = np.append(daCAP, df_file_daCAP.values)
elif (year >= 2001 and month >= 10 and day_x >= 0) or (year >= 2001 and month >= 11) or \
(year >= 2002 and not ((year >= 2016 and month >= 6 and day_x >= 23) or (year >= 2016 and month >= 7) or (year >= 2017))):
df_file_daCAP = df_file['East Regulation ($/MWHr)']
daCAP = np.append(daCAP, df_file_daCAP.values)
else:
df_file_daCAP = df_file['Regulation ($/MWHr)']
daCAP = np.append(daCAP, df_file_daCAP.values)
def read_csv(path):
csv = pd.read_csv(path + ".csv")
return csv
def makeDataframe(self):
df = pd.DataFrame()
newline = ''
try:
res = requests.get(self.URL)
if res.status_code != 200:
return df
newline = self.cmpFiles(self.DataFilePath, res.text)
except Exception as e:
logger.error(e)
if not newline == '':
open(self.DataFilePath, 'w').write(res.text)
df = pd.read_csv(StringIO(newline), names=self.header)
return df
"""
Overflow
"""
water_balance_df.loc[:, 'overflow(cu.m)'] = 0.000
for index, row in water_balance_df.iterrows():
obs_volume = row['volume (cu.m)']
if obs_volume > full_volume:
overflow_volume = obs_volume - full_volume
water_balance_df.loc[index.strftime(date_format), 'overflow(cu.m)'] = overflow_volume
print water_balance_df['overflow(cu.m)'].sum()
"""
Stage vs area linear relationship
"""
stage_area_df = pd.read_csv('/media/kiruba/New Volume/milli_watershed/tmg_lake_bathymetry/stage_volume_area/stage_area_tmg.csv',
sep=',', header=0, names=['stage_ft', 'area_sq_ft', 'stage_m', 'total_area_sq_m'])
stage_area_df.drop(['stage_ft', 'area_sq_ft'], inplace=True, axis=1)
# set stage as index
stage_area_df.set_index(stage_area_df['stage_m'], inplace=True)
# create empty column
water_balance_df.loc[:, 'ws_area(sq.m)'] = 0.000
for index, row in water_balance_df.iterrows():
obs_stage = row['stage(m)'] # observed stage
if obs_stage >= stage_cutoff:
x1, x2 = cd.find_range(stage_area_df['stage_m'].tolist(), obs_stage)
x_diff = x2 - x1
y1 = stage_area_df.loc[x1, 'total_area_sq_m']
y2 = stage_area_df.loc[x2, 'total_area_sq_m']
y_diff = y2 - y1
slope = y_diff / x_diff
y_intercept = y2 - (slope * x2)
def merge(dtype='train'):
if dtype=='train':
df_join=pd.read_csv('data/origin/train.csv')
out_path='data/merge/merge_train.csv'
elif dtype=='test1':
df_join=pd.read_csv('data/origin/test1.csv')
out_path='data/merge/merge_test1.csv'
elif dtype=='test2':
df_join=pd.read_csv('data/origin/test2.csv')
out_path='data/merge/merge_test2.csv'
else:
print('error type')
return
print('加载特征表')
df_ad=pd.read_csv('data/origin/adFeature.csv')
df_user=pd.read_csv('data/origin/userFeature.csv')
#拼接信息
print('开始拼接%s的广告信息'%dtype)
df_join=pd.merge(df_join,df_ad,how='left',on='aid') #拼接用户信息
def initialise(self, path_to_csv):
label_df = pd.read_csv(path_to_csv, header=None, names=['subject_ids', 'labels'])
self._paths = label_df['subject_ids'].values
self.label_names = list(label_df['labels'].unique())
self._df = label_df
self.dims = len(self.label_names)
self._labels = self.to_ohe(label_df['labels'].values)
return self
#-*- coding: utf-8 -*-
# 数据清洗,过滤掉不符合规则的数据
import pandas as pd
datafile = '../data/air_data.csv' # 航空原始数据,第一行为属性标签
cleanedfile = '../tmp/data_cleaned.csv' # 数据清洗后保存的文件
cleanedfile2 = '../tmp/data_cleaned.xls'
# 读取原始数据,指定UTF-8编码(需要用文本编辑器将数据装换为UTF-8编码)
data = pd.read_csv(datafile, encoding='utf-8')
# NOTE: * instead of &
data = data[data['SUM_YR_1'].notnull() & data['SUM_YR_2'].notnull()
] # 票价非空值才保留
# 只保留票价非零的,或者平均折扣率与总飞行公里数同时为0的记录。
index1 = data['SUM_YR_1'] != 0
index2 = data['SUM_YR_2'] != 0
index3 = (data['SEG_KM_SUM'] == 0) & (data['avg_discount'] == 0) # 该规则是“与”
data = data[index1 | index2 | index3] # 该规则是“或”
# to_csv
data.to_csv(cleanedfile, encoding='utf-8') # 导出结果
data.to_excel(cleanedfile2)
print('END')
-------
pandas.DataFrame
a pandas DataFrame with columns representing census blocks, indexed on
the block FIPS code.
"""
lodes_vars = pd.read_csv(
os.path.join(os.path.dirname(os.path.abspath(__file__)), "lodes.csv"))
renamer = dict(
zip(lodes_vars["variable"].tolist(), lodes_vars["name"].tolist()))
state = state.lower()
url = "https://lehd.ces.census.gov/data/lodes/LODES7/{state}/{dataset}/{state}_{dataset}_S000_JT00_{year}.csv.gz".format(
dataset=dataset, state=state, year=year)
try:
df = pd.read_csv(url, converters={"w_geocode": str, "h_geocode": str})
except HTTPError:
raise ValueError(
"Unable to retrieve LEHD data. Check your internet connection "
"and that the state/year combination you specified is available")
df = df.rename({"w_geocode": "geoid", "h_geocode": "geoid"}, axis=1)
df.rename(renamer, axis="columns", inplace=True)
df = df.set_index("geoid")
return df
return np.divide((X - np.mean(X,axis=0)),np.std(X,axis=0))
def computeCost(X, y, theta):
m = len(y)
J = (np.sum((np.dot(X,theta) - y)**2))/(2*m)
return J
print('Running warmUpExercise ... \n')
print('5x5 Identity Matrix: \n')
print(warmUpExercise())
input('Program paused. Press enter to continue.\n')
print('Plotting Data ...\n')
data = pd.read_csv("ex1data1.txt",names=["X","y"])
x = np.array(data.X)[:,None] # population in 10,0000
y = np.array(data.y) # profit for a food truck
m = len(y)
fig = plotData(x,y)
fig.show()
input('Program paused. Press enter to continue.\n')
print('Running Gradient Descent ...\n')
ones = np.ones_like(x) #an array of ones of same dimension as x
X = np.hstack((ones,x)) # Add a column of ones to x. hstack means stacking horizontally i.e. columnwise
theta = np.zeros(2) # initialize
iterations = 1500
alpha = 0.01
computeCost(X, y, theta)
theta, hist = gradientDescent(X, y, theta, alpha, iterations)
print('Theta found by gradient descent: ')
print(theta[0],"\n", theta[1])