How to use the pandas.read_sql function in pandas

To help you get started, we’ve selected a few pandas examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github kayak / fireant / fireant / database / base.py View on Github external
def fetch_dataframes(self, *queries, **kwargs):
        connection = kwargs.get("connection")
        dataframes = []
        for query in queries:
            dataframes.append(
                pd.read_sql(query, connection, coerce_float=True, parse_dates=True)
            )
        return dataframes
github nilmtk / nilmtk / nilmtk / dataset_converters / dataport / download_dataport.py View on Github external
for f in os.listdir(metadata_dir):
        if re.search('^building', f):
            os.remove(join(metadata_dir, f))

    """
    TODO:
    The section below can be altered or removed,
    since the restructured Dataport
    now has only one electricity_egauge_minutes table.
    """
    # get tables in database schema
    sql_query = ("SELECT table_name" +
                 " FROM information_schema.views" +
                 " WHERE table_schema ='" + database_schema + "'" +
                 " ORDER BY table_name")
    database_tables = pd.read_sql(sql_query, conn)['table_name'].tolist()
    database_tables = [t for t in database_tables if user_selected_table in t]
    # if user has specified buildings
    if periods_to_load:
        buildings_to_load = list(periods_to_load.keys())
    else:
        # get buildings present in all tables
        sql_query = ''
        for table in database_tables:
            sql_query = (sql_query + '(SELECT DISTINCT dataid' +
                         ' FROM "' + database_schema + '".' + table +
                         ') UNION ')
        sql_query = sql_query[:-7]
        sql_query = (sql_query + ' ORDER BY dataid')
        buildings_to_load = pd.read_sql(sql_query, conn)['dataid'].tolist()

    # for each user specified building or all buildings in database
github catalyst-cooperative / pudl / pudl / outputs.py View on Github external
Returns:
        bf_df: a pandas dataframe.

    """
    pudl_engine = init.connect_db(testing=testing)
    bf_eia923_tbl = pt['boiler_fuel_eia923']
    bf_eia923_select = sa.sql.select([bf_eia923_tbl, ])
    if start_date is not None:
        bf_eia923_select = bf_eia923_select.where(
            bf_eia923_tbl.c.report_date >= start_date
        )
    if end_date is not None:
        bf_eia923_select = bf_eia923_select.where(
            bf_eia923_tbl.c.report_date <= end_date
        )
    bf_df = pd.read_sql(bf_eia923_select, pudl_engine)

    # The total heat content is also useful in its own right, and we'll keep it
    # around.  Also needed to calculate average heat content per unit of fuel.
    bf_df['total_heat_content_mmbtu'] = bf_df['fuel_qty_consumed'] * \
        bf_df['fuel_mmbtu_per_unit']

    # Create a date index for grouping based on freq
    by = ['plant_id_eia', 'boiler_id', 'fuel_type_pudl']
    if freq is not None:
        # In order to calculate the weighted average sulfur
        # content and ash content we need to calculate these totals.
        bf_df['total_sulfur_content'] = bf_df['fuel_qty_consumed'] * \
            bf_df['sulfur_content_pct']
        bf_df['total_ash_content'] = bf_df['fuel_qty_consumed'] * \
            bf_df['ash_content_pct']
        bf_df = bf_df.set_index(pd.DatetimeIndex(bf_df.report_date))
github catalyst-cooperative / pudl / src / pudl / output / eia923.py View on Github external
Returns:
        pandas.DataFrame: A DataFrame containing all records from the EIA 923
        Boiler Fuel table.

    """
    bf_eia923_tbl = pt['boiler_fuel_eia923']
    bf_eia923_select = sa.sql.select([bf_eia923_tbl, ])
    if start_date is not None:
        bf_eia923_select = bf_eia923_select.where(
            bf_eia923_tbl.c.report_date >= start_date
        )
    if end_date is not None:
        bf_eia923_select = bf_eia923_select.where(
            bf_eia923_tbl.c.report_date <= end_date
        )
    bf_df = pd.read_sql(bf_eia923_select, pudl_engine)

    # The total heat content is also useful in its own right, and we'll keep it
    # around.  Also needed to calculate average heat content per unit of fuel.
    bf_df['total_heat_content_mmbtu'] = bf_df['fuel_consumed_units'] * \
        bf_df['fuel_mmbtu_per_unit']

    # Create a date index for grouping based on freq
    by = ['plant_id_eia', 'boiler_id', 'fuel_type_code_pudl']
    if freq is not None:
        # In order to calculate the weighted average sulfur
        # content and ash content we need to calculate these totals.
        bf_df['total_sulfur_content'] = bf_df['fuel_consumed_units'] * \
            bf_df['sulfur_content_pct']
        bf_df['total_ash_content'] = bf_df['fuel_consumed_units'] * \
            bf_df['ash_content_pct']
        bf_df = bf_df.set_index(pd.DatetimeIndex(bf_df.report_date))
github catalyst-cooperative / pudl / pudl / outputs.py View on Github external
"""Pull the more complete PUDL/EIA boiler generator associations."""
    pudl_engine = init.connect_db(testing=testing)
    bga_eia_tbl = pt['boiler_generator_assn_eia']
    bga_eia_select = sa.sql.select([bga_eia_tbl])

    if start_date is not None:
        start_date = pd.to_datetime(start_date)
        bga_eia_select = bga_eia_select.where(
            bga_eia_tbl.c.report_date >= start_date
        )
    if end_date is not None:
        end_date = pd.to_datetime(end_date)
        bga_eia_select = bga_eia_select.where(
            bga_eia_tbl.c.report_date <= end_date
        )
    bga_eia_df = pd.read_sql(bga_eia_select, pudl_engine)
    out_df = extend_annual(bga_eia_df,
                           start_date=start_date, end_date=end_date)
    return out_df
github draperjames / qtpandas / qtpandas / models / DataFrameModel.py View on Github external
pandas.read_sql and get a DataFrameModel.

    NOTE: The chunksize option is overridden to None always (for now).

    Reference:
    http://pandas.pydata.org/pandas-docs/version/0.18.1/generated/pandas.read_sql.html
    pandas.read_sql(sql, con, index_col=None, coerce_float=True,
                    params=None, parse_dates=None, columns=None, chunksize=None)



    :return: DataFrameModel
    """

    # TODO: Decide if chunksize is worth keeping and how to handle?
    df = pandas.read_sql(sql, con, index_col, coerce_float,
                    params, parse_dates, columns, chunksize=None)
    return DataFrameModel(df, filePath=filePath)
github dssg / cincinnati / model / features / util.py View on Github external
def population_in_tracts():
    engine = create_engine(uri)
    populations = ("SELECT tract, sum(\"P0010001\") AS population "
                   "FROM shape_files.census_pop_housing "
                   "GROUP BY tract")
    populations = pd.read_sql(populations, con=engine)
    populations = populations.set_index("tract")
    return populations
github cbbing / stock / trade_process / strategy / stop_loss.py View on Github external
获取后复权价格
    Parameters
    ----------
    symbol
    trade_date

    Returns
    -------

    """
    if len(symbol) == 6:
        sql = "SELECT close FROM hq_db.stock_kline_fq where code='{}' and date>='{}' order by date asc".format(symbol, trade_date)
    else:
        sql = "SELECT close FROM hq_db.stock_kline_fq where code='{}' and date>='{}' order by date asc".format(
            symbol, trade_date)
    df = pd.read_sql(sql, engine)
    closes = df['close'].get_values()
    if len(closes) == 0:
        # raise Exception("get code close_price error!")
        return -1

    return closes[0]
github simoncos / zhihu-analysis-python / zhihu_analysis.py View on Github external
def plot_user_distribution():

    conn = sqlite3.connect("zhihu.db")
    user_data = pd.read_sql('select * from User', conn) #database data -> pandas.DataFrame    
    conn.close()
    
    feature_list = ['followee_num', 'follower_num', 'answer_num', 'agree_num', 'thanks_num']   
    
    for feature in feature_list:    
        pylab.figure(feature)
        pylab.title("Distribution of " + feature + ' to Individual User')
        pylab.xlabel("Individual User(sum=26161)")
        pylab.ylabel(feature + " of user")
        user = list(range(len(user_data)))
        feature_value = sorted(list(user_data[feature]), reverse=True)#DataFrame -> list -> do sort
        pylab.scatter(user, feature_value)
        print 'mean of', feature, np.mean(list(user_data[feature]))
        print 'median of', feature, np.median(list(user_data[feature]))
        print 'standard deviation of', np.std(list(user_data[feature])), '\n'
github Morgan-Stanley / treadmill / lib / python / treadmill / cli / admin / checkout / __init__.py View on Github external
def _print_query(conn, sql, index_col=None):
    """Print query results."""
    row_factory = conn.row_factory
    try:
        conn.row_factory = None
        frame = pd.read_sql(
            sql, conn, index_col=index_col
        )
        columns = {
            col: col.replace('_', '-')
            for col in frame.columns
        }
        frame.rename(columns=columns, inplace=True)
        if not frame.empty:
            pd.set_option('max_rows', None)
            pd.set_option('expand_frame_repr', False)
            print('---')
            print(frame)
            print('')

    finally:
        conn.row_factory = row_factory