Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def label_encoder(self, features=None, prefix='label_encoded_', allow_unseen=False):
'''Requires vaex.ml: Create :class:`vaex.ml.transformations.LabelEncoder` and fit it.
:param features: List of features to encode.
:param prefix: Prefix for the names of the encoded features.
:param allow_unseen: If True, encode unseen value as -1, otherwise an error is raised.
'''
features = features or self.df.get_column_names()
features = _ensure_strings_from_expressions(features)
label_encoder = LabelEncoder(features=features, prefix=prefix, allow_unseen=allow_unseen)
label_encoder.fit(self.df)
return label_encoder
def lightgbm_model(self, target, num_boost_round, features=None, copy=False, params={},
prediction_name='lightgbm_prediction'):
'''Requires vaex.ml: create a lightgbm model and train/fit it.
:param target: The target variable to predict.
:param num_boost_round: Number of boosting iterations.
:param features: List of features to train on.
:param bool copy: Copy data or use the modified xgboost library for efficient transfer.
:return vaex.ml.lightgbm.LightGBMModel: Fitted LightGBM model.
'''
from .lightgbm import LightGBMModel
dataframe = self.df
features = features or self.df.get_column_names(virtual=True)
features = _ensure_strings_from_expressions(features)
booster = LightGBMModel(prediction_name=prediction_name,
num_boost_round=num_boost_round,
features=features,
params=params)
booster.fit(dataframe, target, copy=copy)
return booster
shape = (shape,) * 2
binby = []
x = _ensure_strings_from_expressions(x)
y = _ensure_strings_from_expressions(y)
for expression in [y, x]:
if expression is not None:
binby = [expression] + binby
fig = pylab.gcf()
if figsize is not None:
fig.set_size_inches(*figsize)
import re
what_units = None
whats = _ensure_list(what)
selections = _ensure_list(selection)
selections = _ensure_strings_from_expressions(selections)
if y is None:
waslist, [x, ] = vaex.utils.listify(x)
else:
waslist, [x, y] = vaex.utils.listify(x, y)
x = list(zip(x, y))
limits = [limits]
# every plot has its own vwhat for now
vwhats = _expand_limits(vwhat, len(x)) # TODO: we're abusing this function..
logger.debug("x: %s", x)
limits, shape = self.limits(x, limits, shape=shape)
shape = shape[0]
logger.debug("limits: %r", limits)
# mapping of a grid axis to a label
:param ylabel:
:param aspect:
:param tight_layout: call pylab.tight_layout or not
:param colorbar: plot a colorbar or not
:param interpolation: interpolation for imshow, possible options are: 'nearest', 'bilinear', 'bicubic', see matplotlib for more
:param return_extra:
:return:
"""
import pylab
import matplotlib
n = _parse_n(normalize)
if type(shape) == int:
shape = (shape,) * 2
binby = []
x = _ensure_strings_from_expressions(x)
y = _ensure_strings_from_expressions(y)
for expression in [y, x]:
if expression is not None:
binby = [expression] + binby
fig = pylab.gcf()
if figsize is not None:
fig.set_size_inches(*figsize)
import re
what_units = None
whats = _ensure_list(what)
selections = _ensure_list(selection)
selections = _ensure_strings_from_expressions(selections)
if y is None:
waslist, [x, ] = vaex.utils.listify(x)
else:
def _grid(self, expr, what=None, shape=64, limits=None, f='identity', n=None, selection=None, progress=None):
import re
f = _parse_f(f)
n = _parse_n(n)
# if type(shape) == int:
# shape = (shape,)
binby = []
expr = _ensure_strings_from_expressions(expr)
expr = _ensure_list(expr)
for expression in expr:
if expression is not None:
binby = [expression] + binby
limits = self.df.limits(binby, limits)
if type(shape) == int:
shape = [shape] * len(expr)
if isinstance(what, (vaex.stat.Expression)):
grid = what.calculate(self.df, binby=binby, limits=limits, shape=shape, selection=selection)
else:
what = what.strip()
groups = re.match("(.*)\\((.*)\\)", what).groups()
if groups and len(groups) == 2:
function = groups[0]
def expand(self, stop=[]):
"""Expand the expression such that no virtual columns occurs, only normal columns.
Example:
>>> df = vaex.example()
>>> r = np.sqrt(df.data.x**2 + df.data.y**2)
>>> r.expand().expression
'sqrt(((x ** 2) + (y ** 2)))'
"""
stop = _ensure_strings_from_expressions(stop)
def translate(id):
if id in self.ds.virtual_columns and id not in stop:
return self.ds.virtual_columns[id]
expr = expresso.translate(self.ast, translate)
return Expression(self.ds, expr)
cbar = None
color_values = color[i]
# This builds the data needed for the tooltip display, including the template
hovertemplate = ''
if tooltip_title[i] is not None:
hover_title = self.df.evaluate(tooltip_title[i])
hovertemplate += '<b>%{hovertext}</b><br>'
else:
hover_title = None
hovertemplate += '<br>' + x[i] + '=%{x}'
hovertemplate += '<br>' + y[i] + '=%{y}'
if tooltip_data is not None:
tooltip_data = _ensure_strings_from_expressions(tooltip_data)
customdata = np.array(self.df.evaluate(', '.join(tooltip_data), selection=selection_value)).T
for j, expr in enumerate(tooltip_data):
hovertemplate += '<br>' + expr + '=%{customdata[' + str(j) + ']}'
else:
customdata = None
hovertemplate += ''
# the plotting starts here
marker = go.scatter.Marker(color=color_values, size=size_values, showscale=colorbar,
colorscale=colormap, symbol=symbol_value, colorbar=cbar)
trace = go.Scatter(x=x_values, y=y_values, error_x=xerr_object, error_y=yerr_object,
mode='markers',
marker=marker,
hovertemplate=hovertemplate,
customdata=customdata,
:param labels: Annotate the points with these text values
:param selection: Single selection expression, or None
:param length_limit: maximum number of rows it will plot
:param length_check: should we do the maximum row check or not?
:param label: label for the legend
:param xlabel: label for x axis, if None .label(x) is used
:param ylabel: label for y axis, if None .label(y) is used
:param errorbar_kwargs: extra dict with arguments passed to plt.errorbar
:param kwargs: extra arguments passed to pylab.scatter
:return:
"""
import pylab as plt
x = _ensure_strings_from_expressions(x)
y = _ensure_strings_from_expressions(y)
label = str(label or selection)
selection = _ensure_strings_from_expressions(selection)
if length_check:
count = self.count(selection=selection)
if count > length_limit:
raise ValueError("the number of rows (%d) is above the limit (%d), pass length_check=False, or increase length_limit" % (count, length_limit))
x_values = self.evaluate(x, selection=selection)
y_values = self.evaluate(y, selection=selection)
if s_expr:
kwargs["s"] = self.evaluate(s_expr, selection=selection)
if c_expr:
kwargs["c"] = self.evaluate(c_expr, selection=selection)
plt.xlabel(xlabel or self.label(x))
plt.ylabel(ylabel or self.label(y))
s = plt.scatter(x_values, y_values, label=label, **kwargs)
if labels:
label_values = self.evaluate(labels, selection=selection)
for i, label_value in enumerate(label_values):
_widget_selection_undo,
_widget_selection_redo])
# Put them together in the control-widget: this is what is contained within the navigation drawer
control_widget = vue.Layout(pa_1=True, column=True, children=[_widget_f,
_widget_selection_space,
_widget_selection,
_widget_selection_mode,
_widget_history_box])
# The output widget
_widget_output = widgets.Output()
# The widget for the temporary output of the progressbar
_widget_progress_output = widgets.Output()
if isinstance(x, list) is False:
x = [x]
x = _ensure_strings_from_expressions(x)
num_traces = len(x)
# make consistency checks
args = self._arg_len_check(num_traces, shape=shape, color=color, lw=lw, ls=ls,
label=label, selection=selection)
shape, color, lw, ls, label, selection = args
traces = []
for i in range(num_traces):
xar, counts = self._grid(expr=x[i], what=what, shape=shape[i], limits=limits,
f=_widget_f.v_model, n=n, selection=selection[i], progress=progress)
line = go.scatter.Line(color=color[i], width=lw[i], dash=ls[i])
traces.append(go.Scatter(x=xar, y=counts, mode='lines', line_shape='hv', line=line, name=label[i]))
# Append a dummy scatter to enable selection
traces.append(go.Scatter(y=[None]))