Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_getters(builtin_pkg):
req = build_req_tuple(
{'int': '1', 'empty_int': '', 'str': 'hello', 'empty_str': '', 'bool': 'true', 'float': '1.1'}
)
val = utils.get_str_arg(req, 'str')
assert isinstance(val, str) and val == 'hello'
val = utils.get_str_arg(req, 'str_def', default='def')
assert val == 'def'
val = utils.get_str_arg(req, 'empty_str')
assert val is None
with mock.patch('{}.str'.format(builtin_pkg), mock.Mock(side_effect=Exception)):
val = utils.get_str_arg(req, 'str', default='def')
assert val == 'def'
val = utils.get_int_arg(req, 'int')
assert isinstance(val, int) and val == 1
val = utils.get_int_arg(req, 'int_def', default=2)
assert val == 2
val = utils.get_int_arg(req, 'empty_int')
assert val is None
with mock.patch('{}.int'.format(builtin_pkg), mock.Mock(side_effect=Exception)):
val = utils.get_int_arg(req, 'int', default=2)
def test_getters(builtin_pkg):
req = build_req_tuple(
{'int': '1', 'empty_int': '', 'str': 'hello', 'empty_str': '', 'bool': 'true', 'float': '1.1'}
)
val = utils.get_str_arg(req, 'str')
assert isinstance(val, str) and val == 'hello'
val = utils.get_str_arg(req, 'str_def', default='def')
assert val == 'def'
val = utils.get_str_arg(req, 'empty_str')
assert val is None
with mock.patch('{}.str'.format(builtin_pkg), mock.Mock(side_effect=Exception)):
val = utils.get_str_arg(req, 'str', default='def')
assert val == 'def'
val = utils.get_int_arg(req, 'int')
assert isinstance(val, int) and val == 1
val = utils.get_int_arg(req, 'int_def', default=2)
assert val == 2
val = utils.get_int_arg(req, 'empty_int')
assert val is None
with mock.patch('{}.int'.format(builtin_pkg), mock.Mock(side_effect=Exception)):
val = utils.get_int_arg(req, 'int', default=2)
assert val == 2
val = utils.get_bool_arg(req, 'bool')
assert isinstance(val, bool) and val
val = utils.get_float_arg(req, 'float')
assert isinstance(val, float) and val == 1.1
val = utils.get_float_arg(req, 'int_def', default=2.0)
:param dateCol: string from flask.request.args['dateCol'] with name of date-type column in dateframe for timeseries
:param date: string from flask.request.args['date'] date value in dateCol to filter dataframe to
:returns: JSON {
data: [{col1: 0.123, col2: 0.123, index: 1},...,{col1: 0.123, col2: 0.123, index: N}],
stats: {
correlated: 50,
only_in_s0: 1,
only_in_s1: 2,
pearson: 0.987,
spearman: 0.879,
}
x: col1,
y: col2
} or {error: 'Exception message', traceback: 'Exception stacktrace'}
"""
cols = get_str_arg(request, 'cols')
cols = cols.split(',')
query = get_str_arg(request, 'query')
date = get_str_arg(request, 'date')
date_col = get_str_arg(request, 'dateCol')
try:
data = DATA[data_id]
data = data[data[date_col] == date] if date else data
if query:
data = data.query(query)
data = data[list(set(cols))].dropna(how='any')
data[str('index')] = data.index
s0 = data[cols[0]]
s1 = data[cols[1]]
pearson = s0.corr(s1, method='pearson')
spearman = s0.corr(s1, method='spearman')
:param data_id: integer string identifier for a D-Tale process's data
:type data_id: str
:param query: string from flask.request.args['query'] which is applied to DATA using the query() function
:param cols: comma-separated string from flask.request.args['cols'] containing names of two columns in dataframe
:param dateCol: string from flask.request.args['dateCol'] with name of date-type column in dateframe for timeseries
:returns: JSON {
data: {:col1:col2: {data: [{corr: 0.99, date: 'YYYY-MM-DD'},...], max: 0.99, min: 0.99}
} or {error: 'Exception message', traceback: 'Exception stacktrace'}
"""
try:
query = get_str_arg(request, 'query')
data = DATA[data_id]
data = data.query(query) if query is not None else data
cols = get_str_arg(request, 'cols')
cols = cols.split(',')
date_col = get_str_arg(request, 'dateCol')
data = data.groupby(date_col)[list(set(cols))].corr(method='pearson')
data.index.names = ['date', 'column']
data = data.reset_index()
data = data[data.column == cols[0]][['date', cols[1]]]
data.columns = ['date', 'corr']
return jsonify(build_chart(data, 'date', 'corr'))
except BaseException as e:
return jsonify(dict(error=str(e), traceback=str(traceback.format_exc())))
:param agg: string from flask.request.args['agg'] points to a specific function that can be applied to
:func: pandas.core.groupby.DataFrameGroupBy. Possible values are: count, first, last mean,
median, min, max, std, var, mad, prod, sum
:returns: JSON {
data: {
series1: { x: [x1, x2, ..., xN], y: [y1, y2, ..., yN] },
series2: { x: [x1, x2, ..., xN], y: [y1, y2, ..., yN] },
...,
seriesN: { x: [x1, x2, ..., xN], y: [y1, y2, ..., yN] },
},
min: minY,
max: maxY,
} or {error: 'Exception message', traceback: 'Exception stacktrace'}
"""
try:
query = get_str_arg(request, 'query')
data = DATA[data_id]
if query:
try:
data = data.query(query)
except BaseException as e:
return jsonify(dict(error='Invalid query: {}'.format(str(e))))
if not len(data):
return jsonify(dict(error='query "{}" found no data, please alter'.format(query)))
x = get_str_arg(request, 'x')
y = get_str_arg(request, 'y')
group_col = get_str_arg(request, 'group')
if group_col is not None:
group_col = group_col.split(',')
agg = get_str_arg(request, 'agg')
return jsonify(build_chart(data, x, y, group_col, agg))
except BaseException as e:
stats: {
correlated: 50,
only_in_s0: 1,
only_in_s1: 2,
pearson: 0.987,
spearman: 0.879,
}
x: col1,
y: col2
} or {error: 'Exception message', traceback: 'Exception stacktrace'}
"""
cols = get_str_arg(request, 'cols')
cols = cols.split(',')
query = get_str_arg(request, 'query')
date = get_str_arg(request, 'date')
date_col = get_str_arg(request, 'dateCol')
try:
data = DATA[data_id]
data = data[data[date_col] == date] if date else data
if query:
data = data.query(query)
data = data[list(set(cols))].dropna(how='any')
data[str('index')] = data.index
s0 = data[cols[0]]
s1 = data[cols[1]]
pearson = s0.corr(s1, method='pearson')
spearman = s0.corr(s1, method='spearman')
stats = dict(
pearson='N/A' if pd.isnull(pearson) else pearson,
spearman='N/A' if pd.isnull(spearman) else spearman,
correlated=len(data),
try:
query = get_str_arg(request, 'query')
data = DATA[data_id]
if query:
try:
data = data.query(query)
except BaseException as e:
return jsonify(dict(error='Invalid query: {}'.format(str(e))))
if not len(data):
return jsonify(dict(error='query "{}" found no data, please alter'.format(query)))
x = get_str_arg(request, 'x')
y = get_str_arg(request, 'y')
group_col = get_str_arg(request, 'group')
if group_col is not None:
group_col = group_col.split(',')
agg = get_str_arg(request, 'agg')
return jsonify(build_chart(data, x, y, group_col, agg))
except BaseException as e:
return jsonify(dict(error=str(e), traceback=str(traceback.format_exc())))
"""
:class:`flask:flask.Flask` route which gathers Pearson correlations against all combinations of columns with
numeric data using :meth:`pandas:pandas.DataFrame.corr`
On large datasets with no :attr:`numpy:numpy.nan` data this code will use :meth:`numpy:numpy.corrcoef`
for speed purposes
:param data_id: integer string identifier for a D-Tale process's data
:type data_id: str
:param query: string from flask.request.args['query'] which is applied to DATA using the query() function
:returns: JSON {
data: [{column: col1, col1: 1.0, col2: 0.99, colN: 0.45},...,{column: colN, col1: 0.34, col2: 0.88, colN: 1.0}],
} or {error: 'Exception message', traceback: 'Exception stacktrace'}
"""
try:
query = get_str_arg(request, 'query')
data = DATA[data_id]
data = data.query(query) if query is not None else data
valid_corr_cols = []
valid_date_cols = []
for col_info in DTYPES[data_id]:
name, dtype = map(col_info.get, ['name', 'dtype'])
dtype = classify_type(dtype)
if dtype in ['I', 'F']:
valid_corr_cols.append(name)
elif dtype == 'D':
# even if a datetime column exists, we need to make sure that there is enough data for a date
# to warrant a correlation, https://github.com/man-group/dtale/issues/43
date_counts = data[name].dropna().value_counts()
if len(date_counts[date_counts > 1]) > 1:
valid_date_cols.append(name)
def get_histogram(data_id):
"""
:class:`flask:flask.Flask` route which returns output from numpy.histogram to front-end as JSON
:param data_id: integer string identifier for a D-Tale process's data
:type data_id: str
:param col: string from flask.request.args['col'] containing name of a column in your dataframe
:param query: string from flask.request.args['query'] which is applied to DATA using the query() function
:param bins: the number of bins to display in your histogram, options on the front-end are 5, 10, 20, 50
:returns: JSON {results: DATA, desc: output from pd.DataFrame[col].describe(), success: True/False}
"""
col = get_str_arg(request, 'col', 'values')
query = get_str_arg(request, 'query')
bins = get_int_arg(request, 'bins', 20)
try:
data = DATA[data_id]
if query:
data = data.query(query)
selected_col = find_selected_column(data, col)
data = data[~pd.isnull(data[selected_col])][[selected_col]]
hist = np.histogram(data, bins=bins)
desc = load_describe(data[selected_col])
return jsonify(data=[json_float(h) for h in hist[0]], labels=['{0:.1f}'.format(l) for l in hist[1]], desc=desc)
except BaseException as e:
return jsonify(dict(error=str(e), traceback=str(traceback.format_exc())))