Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def get_snapshot(data,
file_name,
evaluation_point_loc):
"""Main function wrapper to assemble snapshot dataframes"""
# Start by getting the 48-hour window ± 24hrs around the evaluation point
evaluation_index = data.index[data.id == evaluation_point_loc]
data["rounded_local_time"] = \
pd.to_datetime(data["est.localTime"],
utc=True).dt.ceil(freq="5min")
evaluation_time = \
pd.to_datetime(data.loc[evaluation_index,
'rounded_local_time'].values[0],
utc=True)
df_misc = get_time_to_calculate_at(evaluation_time)
start_time = evaluation_time - datetime.timedelta(days=1)
end_time = evaluation_time + datetime.timedelta(days=1)
snapshot_df = data[(data['rounded_local_time'] >= start_time) &
(data['rounded_local_time'] <= end_time)]
# Get pumpSettings list of active schedules
active_schedule = get_active_schedule(data,
snapshot_df,
file_name,
evaluation_point_loc,
def prof_data_creation(self, all_data):
"""
Creates dataset called prof_data compirising data of professionals who answered at least one question
"""
# Select only professionals who answered at least one question
active_professionals = pd.DataFrame({'professionals_id': all_data.professionals_id.unique()})
prof_data = self.professionals.merge(active_professionals, how='right', on='professionals_id')
# Extract state or country from location
prof_data['professionals_state'] = prof_data['professionals_location'].apply(lambda loc: str(loc).split(', ')[-1])
# Transform dates from string representation to datetime object
prof_data.professionals_date_joined = pd.to_datetime(prof_data.professionals_date_joined)
# Count the number of answered questions by each professional
number_answered = all_data[['questions_id', 'professionals_id']].groupby('professionals_id').count()
number_answered = number_answered.rename({'questions_id': 'professionals_questions_answered'}, axis=1)
# Add professionals_questions_answered feature to prof_data
prof_data = prof_data.merge(number_answered, left_on='professionals_id', right_index=True)
# Get average question age for every professional among questions he answered
average_question_age = (
all_data.groupby('professionals_id')
.questions_age.mean(numeric_only=False)
)
average_question_age = pd.DataFrame({'professionals_average_question_age': average_question_age})
# Add professionals_average_question_age feature to prof_data
names=col_names, sep=r'\s+')
df['time'] = pd.to_datetime(df[['year', 'month', 'day', 'hour', 'minute']], utc=True)
df['hours'] = np.floor(df['hourly_low_pressure_time'] / 100)
df['minutes'] = df['hourly_low_pressure_time'] - df['hours'] * 100
df['hours'] = df['hours'].replace(99, np.nan)
df['minutes'] = df['minutes'].replace(99, np.nan)
df['hourly_low_pressure_time'] = pd.to_datetime(df[['year', 'month', 'day', 'hours',
'minutes']], utc=True)
df['hours'] = np.floor(df['hourly_high_wind_time'] / 100)
df['minutes'] = df['hourly_high_wind_time'] - df['hours'] * 100
df['hours'] = df['hours'].replace(99, np.nan)
df['minutes'] = df['minutes'].replace(99, np.nan)
df['hourly_high_wind_time'] = pd.to_datetime(df[['year', 'month', 'day',
'hours', 'minutes']], utc=True)
df = df.drop(columns=['year', 'month', 'day', 'hour', 'minute', 'hours', 'minutes'])
df.units = col_units
return df
def clean_df(df, tech_analysis):
clean_df = df[['date', 'price', 'signal']].copy()
col_name = '{}_signal'.format(tech_analysis)
clean_df.columns = ['date', 'price', col_name]
clean_df['date'] = pd.to_datetime(clean_df['date'])
return clean_df
#df1 = pd.get_dummies(df,columns=['lineID'])
df['startTime'] = df['time'].apply(lambda x: x[:15].replace(time_str, '-01-29') + '0:00')
df = df.groupby(['startTime','stationID']).status.agg(['count', 'sum']).reset_index()
df = test.merge(df, 'left', ['stationID','startTime'])
df['time'] = df['startTime'].apply(lambda x: x[:15].replace('-01-29', time_str) + '0:00')
del df['startTime'],df['endTime']
# base time
df['day'] = df['time'].apply(lambda x: int(x[8:10]))
df['week'] = pd.to_datetime(df['time']).dt.dayofweek + 1
#df['weekend'] = (pd.to_datetime(df.time).dt.weekday >=5).astype(int)
df['hour'] = df['time'].apply(lambda x: int(x[11:13]))
df['minute'] = df['time'].apply(lambda x: int(x[14:15]+'0'))
result = df.copy()
# in,out
result['inNums'] = result['sum']
result['outNums'] = result['count'] - result['sum']
#
result['day_since_first'] = result['day'] - 1
###rank复赛记得有提分,初赛没有用起来,当时打开方式不对
#result['rank'] = (result['stationID']+1)*(result['day']*144+result['hour']*6+result['minute'])
result.fillna(0, inplace=True)
del result['sum'],result['count']
vols, spot, mktData, mktFlds, optionData, optionFlds, optionids = wind_data.get_wind_data(evalDate)
rf_container = svi_data.calculate_PCParity_riskFreeRate(evalDate, daycounter, calendar)
dividend_ts = ql.YieldTermStructureHandle(ql.FlatForward(evalDate, 0.0, daycounter))
month_indexs = wind_data.get_contract_months(evalDate)
sse = 0
print("-" * 80)
print("SVI In Sample Performance:")
print("=" * 80)
print(" %15s %25s %25s " % ("market price","model price", "square error(* e-4)"))
for idx, optionid in enumerate(optionids):
optionDataIdx = optionData[optionFlds.index('wind_code')].index(optionid)
mdate = pd.to_datetime(optionData[optionFlds.index('exercise_date')][optionDataIdx])
maturitydt = ql.Date(mdate.day, mdate.month, mdate.year)
mktindex = mktData[mktFlds.index('option_code')].index(optionid)
strike = optionData[optionFlds.index('exercise_price')][optionDataIdx]
close = mktData[mktFlds.index('close')][mktindex]
ttm = daycounter.yearFraction(evalDate, maturitydt)
nbr_month = maturitydt.month()
if nbr_month == month_indexs[0]:
a, b, rho, m, sigma = params_month0
rf = min(0.0002, rf_container.get(0).get(strike))
if nbr_month == month_indexs[1]:
a, b, rho, m, sigma = params_month1
rf = min(0.0002, rf_container.get(1).get(strike))
elif nbr_month == month_indexs[2]:
a, b, rho, m, sigma = params_month2
rf = min(0.0002, rf_container.get(2).get(strike))
else:
def block_arrival_time(self, block_hash):
data = self.block_data(block_hash)
if "received_time" in data:
return pd.to_datetime(data["received_time"], unit='s')
else:
return None
def preprocess_RAW_TD(self, Raw_TD):
processed_dates = pd.to_datetime(Raw_TD.index)
Raw_TD.index = processed_dates
return Raw_TD
# grab the annual table select
plants_eia860_tbl = pt['plants_eia860']
plants_eia860_select = sa.sql.select([plants_eia860_tbl])
if start_date is not None:
start_date = pd.to_datetime(start_date)
plants_eia860_select = plants_eia860_select.where(
plants_eia860_tbl.c.report_date >= start_date
)
if end_date is not None:
end_date = pd.to_datetime(end_date)
plants_eia860_select = plants_eia860_select.where(
plants_eia860_tbl.c.report_date <= end_date
)
plants_eia860_df = pd.read_sql(plants_eia860_select, pudl_engine)
plants_eia860_df['report_date'] = \
pd.to_datetime(plants_eia860_df['report_date'])
# plant glue table
plants_g_eia_tbl = pt['plants_eia']
plants_g_eia_select = sa.sql.select([
plants_g_eia_tbl.c.plant_id_eia,
plants_g_eia_tbl.c.plant_id_pudl,
])
plants_g_eia_df = pd.read_sql(plants_g_eia_select, pudl_engine)
out_df = pd.merge(plants_eia_df, plants_eia860_df,
how='left', on=['plant_id_eia', ])
out_df = pd.merge(out_df, plants_g_eia_df,
how='left', on=['plant_id_eia', ])
utils_eia_tbl = pt['utilities_eia']
#empty_subproduct = []
for (state, city, product, subproduct, country, freq), group in \
df.groupby(['state', 'city','product','subproduct', 'country', 'freq']):
# missing_dates = list(set(all_dates) - set(group['date']))
# # print missing_dates
# # print len(all_dates), len(group['date']), len(missing_dates)
# # break
# missing_piece = get_piece(missing_dates, group.iloc[0,])
#wrong data
if (state, city, product, subproduct, freq) == \
('West Bengal', 'Kolkata', 'Urad Dal', 'None','day'):
print (state, city, product, subproduct, freq)
print group[group['date'] == pd.to_datetime('2011-02-03')]
group[group['date'] == pd.to_datetime('2011-02-03')] = np.nan
if (state, city, product, subproduct, freq) == \
('Rajasthan', 'Jaipur', 'Salt Pack (Iodised)', 'None', 'day'):
print (state, city, product, subproduct, freq)
print group[group['date'] == pd.to_datetime('2010-05-05')]
group[group['date'] == pd.to_datetime('2010-05-05')] = np.nan
group[group['date'] == pd.to_datetime('2010-05-06')] = np.nan
group[group['date'] == pd.to_datetime('2010-05-07')] = np.nan
#common part
#filter out things not in filter_lst
if len(filter_lst) != 0 and product not in filter_lst:
continue