How to use the pandas.to_datetime function in pandas

To help you get started, we’ve selected a few pandas examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github tidepool-org / data-analytics / projects / iCGM-test-matrix / snapshot_processor.py View on Github external
def get_snapshot(data,
                 file_name,
                 evaluation_point_loc):
    """Main function wrapper to assemble snapshot dataframes"""

    # Start by getting the 48-hour window ± 24hrs around the evaluation point
    evaluation_index = data.index[data.id == evaluation_point_loc]

    data["rounded_local_time"] = \
        pd.to_datetime(data["est.localTime"],
                       utc=True).dt.ceil(freq="5min")

    evaluation_time = \
        pd.to_datetime(data.loc[evaluation_index,
                                'rounded_local_time'].values[0],
                       utc=True)

    df_misc = get_time_to_calculate_at(evaluation_time)

    start_time = evaluation_time - datetime.timedelta(days=1)
    end_time = evaluation_time + datetime.timedelta(days=1)

    snapshot_df = data[(data['rounded_local_time'] >= start_time) &
                       (data['rounded_local_time'] <= end_time)]

    # Get pumpSettings list of active schedules
    active_schedule = get_active_schedule(data,
                                          snapshot_df,
                                          file_name,
                                          evaluation_point_loc,
github dataroot / Kaggle-CV / kostya / dataset_creator.py View on Github external
def prof_data_creation(self, all_data):
        """
        Creates dataset called prof_data compirising data of professionals who answered at least one question
        """
        # Select only professionals who answered at least one question
        active_professionals = pd.DataFrame({'professionals_id': all_data.professionals_id.unique()})
        prof_data = self.professionals.merge(active_professionals, how='right', on='professionals_id')
        
        # Extract state or country from location
        prof_data['professionals_state'] = prof_data['professionals_location'].apply(lambda loc: str(loc).split(', ')[-1])
        
        # Transform dates from string representation to datetime object
        prof_data.professionals_date_joined = pd.to_datetime(prof_data.professionals_date_joined)
        
        # Count the number of answered questions by each professional
        number_answered = all_data[['questions_id', 'professionals_id']].groupby('professionals_id').count()
        number_answered = number_answered.rename({'questions_id': 'professionals_questions_answered'}, axis=1)
        
        # Add professionals_questions_answered feature to prof_data
        prof_data = prof_data.merge(number_answered, left_on='professionals_id', right_index=True)
        
        # Get average question age for every professional among questions he answered
        average_question_age = (
            all_data.groupby('professionals_id')
            .questions_age.mean(numeric_only=False)
        )
        average_question_age = pd.DataFrame({'professionals_average_question_age': average_question_age})
        
        # Add professionals_average_question_age feature to prof_data
github Unidata / siphon / siphon / simplewebservice / ndbc.py View on Github external
names=col_names, sep=r'\s+')

        df['time'] = pd.to_datetime(df[['year', 'month', 'day', 'hour', 'minute']], utc=True)

        df['hours'] = np.floor(df['hourly_low_pressure_time'] / 100)
        df['minutes'] = df['hourly_low_pressure_time'] - df['hours'] * 100
        df['hours'] = df['hours'].replace(99, np.nan)
        df['minutes'] = df['minutes'].replace(99, np.nan)
        df['hourly_low_pressure_time'] = pd.to_datetime(df[['year', 'month', 'day', 'hours',
                                                            'minutes']], utc=True)

        df['hours'] = np.floor(df['hourly_high_wind_time'] / 100)
        df['minutes'] = df['hourly_high_wind_time'] - df['hours'] * 100
        df['hours'] = df['hours'].replace(99, np.nan)
        df['minutes'] = df['minutes'].replace(99, np.nan)
        df['hourly_high_wind_time'] = pd.to_datetime(df[['year', 'month', 'day',
                                                         'hours', 'minutes']], utc=True)
        df = df.drop(columns=['year', 'month', 'day', 'hour', 'minute', 'hours', 'minutes'])
        df.units = col_units
        return df
github andrebrener / crypto_predictor / model.py View on Github external
def clean_df(df, tech_analysis):
    clean_df = df[['date', 'price', 'signal']].copy()
    col_name = '{}_signal'.format(tech_analysis)
    clean_df.columns = ['date', 'price', col_name]
    clean_df['date'] = pd.to_datetime(clean_df['date'])
    return clean_df
github Justdcy / subway_traffic_forecast-tianchi / main.py View on Github external
#df1 = pd.get_dummies(df,columns=['lineID'])
    df['startTime'] = df['time'].apply(lambda x: x[:15].replace(time_str, '-01-29') + '0:00')

    
    df = df.groupby(['startTime','stationID']).status.agg(['count', 'sum']).reset_index()
    df = test.merge(df, 'left', ['stationID','startTime'])


    df['time'] = df['startTime'].apply(lambda x: x[:15].replace('-01-29', time_str) + '0:00')

    del df['startTime'],df['endTime']
    
    # base time
    df['day']     = df['time'].apply(lambda x: int(x[8:10]))
    
    df['week']    = pd.to_datetime(df['time']).dt.dayofweek + 1
    #df['weekend'] = (pd.to_datetime(df.time).dt.weekday >=5).astype(int)
    df['hour']    = df['time'].apply(lambda x: int(x[11:13]))
    df['minute']  = df['time'].apply(lambda x: int(x[14:15]+'0'))    

    result = df.copy()    
  
    # in,out
    result['inNums']  = result['sum']
    result['outNums'] = result['count'] - result['sum']
    #
    result['day_since_first'] = result['day'] - 1 
    
    ###rank复赛记得有提分,初赛没有用起来,当时打开方式不对
    #result['rank'] = (result['stationID']+1)*(result['day']*144+result['hour']*6+result['minute'])
    result.fillna(0, inplace=True)
    del result['sum'],result['count']
github SarahWang0102 / OptionPricing / svi_performance_insample.py View on Github external
vols, spot, mktData, mktFlds, optionData, optionFlds, optionids = wind_data.get_wind_data(evalDate)

rf_container = svi_data.calculate_PCParity_riskFreeRate(evalDate, daycounter, calendar)
dividend_ts  = ql.YieldTermStructureHandle(ql.FlatForward(evalDate, 0.0, daycounter))

month_indexs = wind_data.get_contract_months(evalDate)
sse = 0
print("-" * 80)
print("SVI In Sample Performance:")
print("=" * 80)
print(" %15s %25s %25s " % ("market price","model price", "square error(* e-4)"))

for idx, optionid in enumerate(optionids):
    optionDataIdx = optionData[optionFlds.index('wind_code')].index(optionid)
    mdate = pd.to_datetime(optionData[optionFlds.index('exercise_date')][optionDataIdx])
    maturitydt = ql.Date(mdate.day, mdate.month, mdate.year)
    mktindex = mktData[mktFlds.index('option_code')].index(optionid)
    strike = optionData[optionFlds.index('exercise_price')][optionDataIdx]
    close = mktData[mktFlds.index('close')][mktindex]
    ttm = daycounter.yearFraction(evalDate, maturitydt)
    nbr_month = maturitydt.month()
    if nbr_month == month_indexs[0]:
        a, b, rho, m, sigma = params_month0
        rf = min(0.0002, rf_container.get(0).get(strike))
    if nbr_month == month_indexs[1]:
        a, b, rho, m, sigma = params_month1
        rf = min(0.0002, rf_container.get(1).get(strike))
    elif nbr_month == month_indexs[2]:
        a, b, rho, m, sigma = params_month2
        rf = min(0.0002, rf_container.get(2).get(strike))
    else:
github citp / BlockSci / pyblocksci / blocksci / blockchain_info.py View on Github external
def block_arrival_time(self, block_hash):
        data = self.block_data(block_hash)
        
        if "received_time" in data:
            return pd.to_datetime(data["received_time"], unit='s')
        else:
            return None
github manuwhs / Trapyng / libs / InformationClasses / CTimeData / TimeData_DDBB.py View on Github external
def preprocess_RAW_TD(self, Raw_TD):
    processed_dates = pd.to_datetime(Raw_TD.index)
    Raw_TD.index = processed_dates
    return Raw_TD
github catalyst-cooperative / pudl / src / pudl / output / eia860.py View on Github external
# grab the annual table select
    plants_eia860_tbl = pt['plants_eia860']
    plants_eia860_select = sa.sql.select([plants_eia860_tbl])
    if start_date is not None:
        start_date = pd.to_datetime(start_date)
        plants_eia860_select = plants_eia860_select.where(
            plants_eia860_tbl.c.report_date >= start_date
        )
    if end_date is not None:
        end_date = pd.to_datetime(end_date)
        plants_eia860_select = plants_eia860_select.where(
            plants_eia860_tbl.c.report_date <= end_date
        )
    plants_eia860_df = pd.read_sql(plants_eia860_select, pudl_engine)
    plants_eia860_df['report_date'] = \
        pd.to_datetime(plants_eia860_df['report_date'])

    # plant glue table
    plants_g_eia_tbl = pt['plants_eia']
    plants_g_eia_select = sa.sql.select([
        plants_g_eia_tbl.c.plant_id_eia,
        plants_g_eia_tbl.c.plant_id_pudl,
    ])
    plants_g_eia_df = pd.read_sql(plants_g_eia_select, pudl_engine)

    out_df = pd.merge(plants_eia_df, plants_eia860_df,
                      how='left', on=['plant_id_eia', ])

    out_df = pd.merge(out_df, plants_g_eia_df,
                      how='left', on=['plant_id_eia', ])

    utils_eia_tbl = pt['utilities_eia']
github f4bD3v / humanitas / analysis / preproc / df_build_func.py View on Github external
#empty_subproduct = []

    for (state, city, product, subproduct, country, freq), group in \
            df.groupby(['state', 'city','product','subproduct', 'country', 'freq']):

        # missing_dates = list(set(all_dates) - set(group['date']))
        # # print missing_dates
        # # print len(all_dates), len(group['date']), len(missing_dates)
        # # break
        # missing_piece = get_piece(missing_dates, group.iloc[0,])

        #wrong data
        if (state, city, product, subproduct, freq) == \
            ('West Bengal', 'Kolkata', 'Urad Dal', 'None','day'):
            print (state, city, product, subproduct, freq)
            print group[group['date'] == pd.to_datetime('2011-02-03')]
            group[group['date'] == pd.to_datetime('2011-02-03')] = np.nan

        if (state, city, product, subproduct, freq) == \
            ('Rajasthan', 'Jaipur', 'Salt Pack (Iodised)', 'None', 'day'):
            print (state, city, product, subproduct, freq)
            print group[group['date'] == pd.to_datetime('2010-05-05')]
            group[group['date'] == pd.to_datetime('2010-05-05')] = np.nan
            group[group['date'] == pd.to_datetime('2010-05-06')] = np.nan
            group[group['date'] == pd.to_datetime('2010-05-07')] = np.nan

        #common part

        #filter out things not in filter_lst
        if len(filter_lst) != 0 and product not in filter_lst:
            continue