How to use the vaex.utils._ensure_strings_from_expressions function in vaex

To help you get started, we’ve selected a few vaex examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github vaexio / vaex / packages / vaex-ml / vaex / ml / __init__.py View on Github external
def label_encoder(self, features=None, prefix='label_encoded_', allow_unseen=False):
        '''Requires vaex.ml: Create :class:`vaex.ml.transformations.LabelEncoder` and fit it.

        :param features: List of features to encode.
        :param prefix: Prefix for the names of the encoded features.
        :param allow_unseen: If True, encode unseen value as -1, otherwise an error is raised.
        '''
        features = features or self.df.get_column_names()
        features = _ensure_strings_from_expressions(features)
        label_encoder = LabelEncoder(features=features, prefix=prefix, allow_unseen=allow_unseen)
        label_encoder.fit(self.df)
        return label_encoder
github vaexio / vaex / packages / vaex-ml / vaex / ml / __init__.py View on Github external
def lightgbm_model(self, target, num_boost_round, features=None, copy=False, params={},
                    prediction_name='lightgbm_prediction'):
        '''Requires vaex.ml: create a lightgbm model and train/fit it.

        :param target: The target variable to predict.
        :param num_boost_round: Number of boosting iterations.
        :param features: List of features to train on.
        :param bool copy: Copy data or use the modified xgboost library for efficient transfer.
        :return vaex.ml.lightgbm.LightGBMModel: Fitted LightGBM model.
        '''
        from .lightgbm import LightGBMModel
        dataframe = self.df
        features = features or self.df.get_column_names(virtual=True)
        features = _ensure_strings_from_expressions(features)

        booster = LightGBMModel(prediction_name=prediction_name,
                                num_boost_round=num_boost_round,
                                features=features,
                                params=params)
        booster.fit(dataframe, target, copy=copy)
        return booster
github vaexio / vaex / packages / vaex-viz / vaex / viz / mpl.py View on Github external
shape = (shape,) * 2
    binby = []
    x = _ensure_strings_from_expressions(x)
    y = _ensure_strings_from_expressions(y)
    for expression in [y, x]:
        if expression is not None:
            binby = [expression] + binby
    fig = pylab.gcf()
    if figsize is not None:
        fig.set_size_inches(*figsize)
    import re

    what_units = None
    whats = _ensure_list(what)
    selections = _ensure_list(selection)
    selections = _ensure_strings_from_expressions(selections)

    if y is None:
        waslist, [x, ] = vaex.utils.listify(x)
    else:
        waslist, [x, y] = vaex.utils.listify(x, y)
        x = list(zip(x, y))
        limits = [limits]

    # every plot has its own vwhat for now
    vwhats = _expand_limits(vwhat, len(x))  # TODO: we're abusing this function..
    logger.debug("x: %s", x)
    limits, shape = self.limits(x, limits, shape=shape)
    shape = shape[0]
    logger.debug("limits: %r", limits)

    # mapping of a grid axis to a label
github vaexio / vaex / packages / vaex-viz / vaex / viz / mpl.py View on Github external
:param ylabel:
    :param aspect:
    :param tight_layout: call pylab.tight_layout or not
    :param colorbar: plot a colorbar or not
    :param interpolation: interpolation for imshow, possible options are: 'nearest', 'bilinear', 'bicubic', see matplotlib for more
    :param return_extra:
    :return:
    """
    import pylab
    import matplotlib
    n = _parse_n(normalize)
    if type(shape) == int:
        shape = (shape,) * 2
    binby = []
    x = _ensure_strings_from_expressions(x)
    y = _ensure_strings_from_expressions(y)
    for expression in [y, x]:
        if expression is not None:
            binby = [expression] + binby
    fig = pylab.gcf()
    if figsize is not None:
        fig.set_size_inches(*figsize)
    import re

    what_units = None
    whats = _ensure_list(what)
    selections = _ensure_list(selection)
    selections = _ensure_strings_from_expressions(selections)

    if y is None:
        waslist, [x, ] = vaex.utils.listify(x)
    else:
github vaexio / vaex / packages / vaex-plotly / vaex / plotly / __init__.py View on Github external
def _grid(self, expr, what=None, shape=64, limits=None, f='identity', n=None, selection=None, progress=None):

        import re

        f = _parse_f(f)
        n = _parse_n(n)

        # if type(shape) == int:
        #     shape = (shape,)
        binby = []
        expr = _ensure_strings_from_expressions(expr)
        expr = _ensure_list(expr)
        for expression in expr:
            if expression is not None:
                binby = [expression] + binby
        limits = self.df.limits(binby, limits)

        if type(shape) == int:
            shape = [shape] * len(expr)

        if isinstance(what, (vaex.stat.Expression)):
            grid = what.calculate(self.df, binby=binby, limits=limits, shape=shape, selection=selection)
        else:
            what = what.strip()
            groups = re.match("(.*)\\((.*)\\)", what).groups()
            if groups and len(groups) == 2:
                function = groups[0]
github vaexio / vaex / packages / vaex-core / vaex / expression.py View on Github external
def expand(self, stop=[]):
        """Expand the expression such that no virtual columns occurs, only normal columns.

        Example:

        >>> df = vaex.example()
        >>> r = np.sqrt(df.data.x**2 + df.data.y**2)
        >>> r.expand().expression
        'sqrt(((x ** 2) + (y ** 2)))'

        """
        stop = _ensure_strings_from_expressions(stop)
        def translate(id):
            if id in self.ds.virtual_columns and id not in stop:
                return self.ds.virtual_columns[id]
        expr = expresso.translate(self.ast, translate)
        return Expression(self.ds, expr)
github vaexio / vaex / packages / vaex-plotly / vaex / plotly / __init__.py View on Github external
cbar = None
                color_values = color[i]

            # This builds the data needed for the tooltip display, including the template
            hovertemplate = ''
            if tooltip_title[i] is not None:
                hover_title = self.df.evaluate(tooltip_title[i])
                hovertemplate += '<b>%{hovertext}</b><br>'
            else:
                hover_title = None

            hovertemplate += '<br>' + x[i] + '=%{x}'
            hovertemplate += '<br>' + y[i] + '=%{y}'

            if tooltip_data is not None:
                tooltip_data = _ensure_strings_from_expressions(tooltip_data)
                customdata = np.array(self.df.evaluate(', '.join(tooltip_data), selection=selection_value)).T
                for j, expr in enumerate(tooltip_data):
                    hovertemplate += '<br>' + expr + '=%{customdata[' + str(j) + ']}'
            else:
                customdata = None
            hovertemplate += ''

            # the plotting starts here
            marker = go.scatter.Marker(color=color_values, size=size_values, showscale=colorbar,
                                       colorscale=colormap, symbol=symbol_value, colorbar=cbar)

            trace = go.Scatter(x=x_values, y=y_values, error_x=xerr_object, error_y=yerr_object,
                               mode='markers',
                               marker=marker,
                               hovertemplate=hovertemplate,
                               customdata=customdata,
github vaexio / vaex / packages / vaex-viz / vaex / viz / mpl.py View on Github external
:param labels: Annotate the points with these text values
    :param selection: Single selection expression, or None
    :param length_limit: maximum number of rows it will plot
    :param length_check: should we do the maximum row check or not?
    :param label: label for the legend
    :param xlabel: label for x axis, if None .label(x) is used
    :param ylabel: label for y axis, if None .label(y) is used
    :param errorbar_kwargs: extra dict with arguments passed to plt.errorbar
    :param kwargs: extra arguments passed to pylab.scatter
    :return:
    """
    import pylab as plt
    x = _ensure_strings_from_expressions(x)
    y = _ensure_strings_from_expressions(y)
    label = str(label or selection)
    selection = _ensure_strings_from_expressions(selection)
    if length_check:
        count = self.count(selection=selection)
        if count > length_limit:
            raise ValueError("the number of rows (%d) is above the limit (%d), pass length_check=False, or increase length_limit" % (count, length_limit))
    x_values = self.evaluate(x, selection=selection)
    y_values = self.evaluate(y, selection=selection)
    if s_expr:
        kwargs["s"] = self.evaluate(s_expr, selection=selection)
    if c_expr:
        kwargs["c"] = self.evaluate(c_expr, selection=selection)
    plt.xlabel(xlabel or self.label(x))
    plt.ylabel(ylabel or self.label(y))
    s = plt.scatter(x_values, y_values, label=label, **kwargs)
    if labels:
        label_values = self.evaluate(labels, selection=selection)
        for i, label_value in enumerate(label_values):
github vaexio / vaex / packages / vaex-plotly / vaex / plotly / __init__.py View on Github external
_widget_selection_undo,
                                                     _widget_selection_redo])
        # Put them together in the control-widget: this is what is contained within the navigation drawer
        control_widget = vue.Layout(pa_1=True, column=True, children=[_widget_f,
                                                                      _widget_selection_space,
                                                                      _widget_selection,
                                                                      _widget_selection_mode,
                                                                      _widget_history_box])
        # The output widget
        _widget_output = widgets.Output()
        # The widget for the temporary output of the progressbar
        _widget_progress_output = widgets.Output()

        if isinstance(x, list) is False:
            x = [x]
        x = _ensure_strings_from_expressions(x)
        num_traces = len(x)
        # make consistency checks
        args = self._arg_len_check(num_traces, shape=shape, color=color, lw=lw, ls=ls,
                                   label=label, selection=selection)
        shape, color, lw, ls, label, selection = args

        traces = []
        for i in range(num_traces):

            xar, counts = self._grid(expr=x[i], what=what, shape=shape[i], limits=limits,
                                     f=_widget_f.v_model, n=n, selection=selection[i], progress=progress)

            line = go.scatter.Line(color=color[i], width=lw[i], dash=ls[i])
            traces.append(go.Scatter(x=xar, y=counts, mode='lines', line_shape='hv', line=line, name=label[i]))
        # Append a dummy scatter to enable selection
        traces.append(go.Scatter(y=[None]))