Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def fetch_dataframes(self, *queries, **kwargs):
connection = kwargs.get("connection")
dataframes = []
for query in queries:
dataframes.append(
pd.read_sql(query, connection, coerce_float=True, parse_dates=True)
)
return dataframes
for f in os.listdir(metadata_dir):
if re.search('^building', f):
os.remove(join(metadata_dir, f))
"""
TODO:
The section below can be altered or removed,
since the restructured Dataport
now has only one electricity_egauge_minutes table.
"""
# get tables in database schema
sql_query = ("SELECT table_name" +
" FROM information_schema.views" +
" WHERE table_schema ='" + database_schema + "'" +
" ORDER BY table_name")
database_tables = pd.read_sql(sql_query, conn)['table_name'].tolist()
database_tables = [t for t in database_tables if user_selected_table in t]
# if user has specified buildings
if periods_to_load:
buildings_to_load = list(periods_to_load.keys())
else:
# get buildings present in all tables
sql_query = ''
for table in database_tables:
sql_query = (sql_query + '(SELECT DISTINCT dataid' +
' FROM "' + database_schema + '".' + table +
') UNION ')
sql_query = sql_query[:-7]
sql_query = (sql_query + ' ORDER BY dataid')
buildings_to_load = pd.read_sql(sql_query, conn)['dataid'].tolist()
# for each user specified building or all buildings in database
Returns:
bf_df: a pandas dataframe.
"""
pudl_engine = init.connect_db(testing=testing)
bf_eia923_tbl = pt['boiler_fuel_eia923']
bf_eia923_select = sa.sql.select([bf_eia923_tbl, ])
if start_date is not None:
bf_eia923_select = bf_eia923_select.where(
bf_eia923_tbl.c.report_date >= start_date
)
if end_date is not None:
bf_eia923_select = bf_eia923_select.where(
bf_eia923_tbl.c.report_date <= end_date
)
bf_df = pd.read_sql(bf_eia923_select, pudl_engine)
# The total heat content is also useful in its own right, and we'll keep it
# around. Also needed to calculate average heat content per unit of fuel.
bf_df['total_heat_content_mmbtu'] = bf_df['fuel_qty_consumed'] * \
bf_df['fuel_mmbtu_per_unit']
# Create a date index for grouping based on freq
by = ['plant_id_eia', 'boiler_id', 'fuel_type_pudl']
if freq is not None:
# In order to calculate the weighted average sulfur
# content and ash content we need to calculate these totals.
bf_df['total_sulfur_content'] = bf_df['fuel_qty_consumed'] * \
bf_df['sulfur_content_pct']
bf_df['total_ash_content'] = bf_df['fuel_qty_consumed'] * \
bf_df['ash_content_pct']
bf_df = bf_df.set_index(pd.DatetimeIndex(bf_df.report_date))
Returns:
pandas.DataFrame: A DataFrame containing all records from the EIA 923
Boiler Fuel table.
"""
bf_eia923_tbl = pt['boiler_fuel_eia923']
bf_eia923_select = sa.sql.select([bf_eia923_tbl, ])
if start_date is not None:
bf_eia923_select = bf_eia923_select.where(
bf_eia923_tbl.c.report_date >= start_date
)
if end_date is not None:
bf_eia923_select = bf_eia923_select.where(
bf_eia923_tbl.c.report_date <= end_date
)
bf_df = pd.read_sql(bf_eia923_select, pudl_engine)
# The total heat content is also useful in its own right, and we'll keep it
# around. Also needed to calculate average heat content per unit of fuel.
bf_df['total_heat_content_mmbtu'] = bf_df['fuel_consumed_units'] * \
bf_df['fuel_mmbtu_per_unit']
# Create a date index for grouping based on freq
by = ['plant_id_eia', 'boiler_id', 'fuel_type_code_pudl']
if freq is not None:
# In order to calculate the weighted average sulfur
# content and ash content we need to calculate these totals.
bf_df['total_sulfur_content'] = bf_df['fuel_consumed_units'] * \
bf_df['sulfur_content_pct']
bf_df['total_ash_content'] = bf_df['fuel_consumed_units'] * \
bf_df['ash_content_pct']
bf_df = bf_df.set_index(pd.DatetimeIndex(bf_df.report_date))
"""Pull the more complete PUDL/EIA boiler generator associations."""
pudl_engine = init.connect_db(testing=testing)
bga_eia_tbl = pt['boiler_generator_assn_eia']
bga_eia_select = sa.sql.select([bga_eia_tbl])
if start_date is not None:
start_date = pd.to_datetime(start_date)
bga_eia_select = bga_eia_select.where(
bga_eia_tbl.c.report_date >= start_date
)
if end_date is not None:
end_date = pd.to_datetime(end_date)
bga_eia_select = bga_eia_select.where(
bga_eia_tbl.c.report_date <= end_date
)
bga_eia_df = pd.read_sql(bga_eia_select, pudl_engine)
out_df = extend_annual(bga_eia_df,
start_date=start_date, end_date=end_date)
return out_df
pandas.read_sql and get a DataFrameModel.
NOTE: The chunksize option is overridden to None always (for now).
Reference:
http://pandas.pydata.org/pandas-docs/version/0.18.1/generated/pandas.read_sql.html
pandas.read_sql(sql, con, index_col=None, coerce_float=True,
params=None, parse_dates=None, columns=None, chunksize=None)
:return: DataFrameModel
"""
# TODO: Decide if chunksize is worth keeping and how to handle?
df = pandas.read_sql(sql, con, index_col, coerce_float,
params, parse_dates, columns, chunksize=None)
return DataFrameModel(df, filePath=filePath)
def population_in_tracts():
engine = create_engine(uri)
populations = ("SELECT tract, sum(\"P0010001\") AS population "
"FROM shape_files.census_pop_housing "
"GROUP BY tract")
populations = pd.read_sql(populations, con=engine)
populations = populations.set_index("tract")
return populations
获取后复权价格
Parameters
----------
symbol
trade_date
Returns
-------
"""
if len(symbol) == 6:
sql = "SELECT close FROM hq_db.stock_kline_fq where code='{}' and date>='{}' order by date asc".format(symbol, trade_date)
else:
sql = "SELECT close FROM hq_db.stock_kline_fq where code='{}' and date>='{}' order by date asc".format(
symbol, trade_date)
df = pd.read_sql(sql, engine)
closes = df['close'].get_values()
if len(closes) == 0:
# raise Exception("get code close_price error!")
return -1
return closes[0]
def plot_user_distribution():
conn = sqlite3.connect("zhihu.db")
user_data = pd.read_sql('select * from User', conn) #database data -> pandas.DataFrame
conn.close()
feature_list = ['followee_num', 'follower_num', 'answer_num', 'agree_num', 'thanks_num']
for feature in feature_list:
pylab.figure(feature)
pylab.title("Distribution of " + feature + ' to Individual User')
pylab.xlabel("Individual User(sum=26161)")
pylab.ylabel(feature + " of user")
user = list(range(len(user_data)))
feature_value = sorted(list(user_data[feature]), reverse=True)#DataFrame -> list -> do sort
pylab.scatter(user, feature_value)
print 'mean of', feature, np.mean(list(user_data[feature]))
print 'median of', feature, np.median(list(user_data[feature]))
print 'standard deviation of', np.std(list(user_data[feature])), '\n'
def _print_query(conn, sql, index_col=None):
"""Print query results."""
row_factory = conn.row_factory
try:
conn.row_factory = None
frame = pd.read_sql(
sql, conn, index_col=index_col
)
columns = {
col: col.replace('_', '-')
for col in frame.columns
}
frame.rename(columns=columns, inplace=True)
if not frame.empty:
pd.set_option('max_rows', None)
pd.set_option('expand_frame_repr', False)
print('---')
print(frame)
print('')
finally:
conn.row_factory = row_factory