How to use the vaex.column.ColumnStringArrow function in vaex

To help you get started, we’ve selected a few vaex examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github vaexio / vaex / packages / vaex-arrow / vaex_arrow / convert.py View on Github external
arrow_type = arrow_array.type
    buffers = arrow_array.buffers()
    if len(buffers) == 2:
        return numpy_array_from_arrow_array(arrow_array)
    elif len(buffers) == 3 and  isinstance(arrow_array.type, type(pyarrow.string())):
        bitmap_buffer, offsets, string_bytes = arrow_array.buffers()
        if arrow_array.null_count == 0:
            null_bitmap = None  # we drop any null_bitmap when there are no null counts
        else:
            null_bitmap = np.frombuffer(bitmap_buffer, 'uint8', len(bitmap_buffer))
        offsets = np.frombuffer(offsets, np.int32, len(offsets)//4)
        if string_bytes is None:
            string_bytes = np.array([], dtype='S1')
        else:
            string_bytes = np.frombuffer(string_bytes, 'S1', len(string_bytes))
        column = ColumnStringArrow(offsets, string_bytes, len(arrow_array), null_bitmap=null_bitmap)
        return column
    else:
        raise TypeError('type unsupported: %r' % arrow_type)
github vaexio / vaex / packages / vaex-core / vaex / functions.py View on Github external
2  is coming
      3  our
      4  way.

    >>> df.text.str.lower()
    Expression = str_lower(text)
    Length: 5 dtype: str (expression)
    ---------------------------------
    0    something
    1  very pretty
    2    is coming
    3          our
    4         way.
    """
    sl = _to_string_sequence(x).lower()
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
github vaexio / vaex / packages / vaex-core / vaex / functions.py View on Github external
2  is coming
      3  our
      4  way.

    >>> df.text.str.replace(pat='et', repl='__')
    Expression = str_replace(text, pat='et', repl='__')
    Length: 5 dtype: str (expression)
    ---------------------------------
    0    Som__hing
    1  very pr__ty
    2    is coming
    3          our
    4         way.
    """
    sl = _to_string_sequence(x).replace(pat, repl, n, flags, regex)
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
github vaexio / vaex / packages / vaex-core / vaex / functions.py View on Github external
3  our
      4  way.

    >>> df.text.str.rstrip(to_strip='ing')
    Expression = str_rstrip(text, to_strip='ing')
    Length: 5 dtype: str (expression)
    ---------------------------------
    0       Someth
    1  very pretty
    2       is com
    3          our
    4         way.
    """
    # in c++ we give empty string the same meaning as None
    sl = _to_string_sequence(x).rstrip('' if to_strip is None else to_strip) if to_strip != '' else x
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
github vaexio / vaex / packages / vaex-core / vaex / functions.py View on Github external
3  our
      4  way.

    >>> df.text.str.lstrip(to_strip='very ')
    Expression = str_lstrip(text, to_strip='very ')
    Length: 5 dtype: str (expression)
    ---------------------------------
    0  Something
    1     pretty
    2  is coming
    3        our
    4       way.
    """
    # in c++ we give empty string the same meaning as None
    sl = _to_string_sequence(x).lstrip('' if to_strip is None else to_strip) if to_strip != '' else x
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
github vaexio / vaex / packages / vaex-core / vaex / functions.py View on Github external
4  way.


    >>> df.text.str.upper()
    Expression = str_upper(text)
    Length: 5 dtype: str (expression)
    ---------------------------------
    0    SOMETHING
    1  VERY PRETTY
    2    IS COMING
    3          OUR
    4         WAY.

    """
    sl = _to_string_sequence(x).upper()
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
github vaexio / vaex / packages / vaex-core / vaex / export.py View on Github external
to_offset = 0  # we need this for selections
            to_offset_unselected = 0 # we need this for filtering
            count = len(dataset_input)# if not selection else dataset_input.length_unfiltered()
            is_string = dtype == str_type
            # TODO: if no filter, selection or mask, we can choose the quick path for str
            string_byte_offset = 0

            for i1, i2 in vaex.utils.subdivide(count, max_length=max_length):
                logger.debug("from %d to %d (total length: %d, output length: %d)", i1, i2, len(dataset_input), N)
                values = dataset_input.evaluate(column_name, i1=i1, i2=i2, filtered=True, parallel=False, selection=selection)
                no_values = len(values)
                if no_values:
                    if is_string:
                        # for strings, we don't take sorting/shuffling into account when building the structure
                        to_column = to_array
                        assert isinstance(to_column, ColumnStringArrow)
                        from_sequence = _to_string_sequence(values)
                        to_sequence = to_column.string_sequence.slice(to_offset, to_offset+no_values, string_byte_offset)
                        string_byte_offset += to_sequence.fill_from(from_sequence)
                        to_offset += no_values
                    else:
                        fill_value = np.nan if dtype.kind == "f" else None
                        # assert np.ma.isMaskedArray(to_array) == np.ma.isMaskedArray(values), "to (%s) and from (%s) array are not of both masked or unmasked (%s)" %\
                        # (np.ma.isMaskedArray(to_array), np.ma.isMaskedArray(values), column_name)
                        if shuffle or sort:
                            target_set_item = order_array[i1:i2]
                        else:
                            target_set_item = slice(to_offset, to_offset + no_values)
                        if dtype.type == np.datetime64:
                            values = values.view(np.int64)
                        if np.ma.isMaskedArray(to_array) and np.ma.isMaskedArray(values):
                            to_array.data[target_set_item] = values.filled(fill_value)
github vaexio / vaex / packages / vaex-core / vaex / functions.py View on Github external
2  is coming
      3  our
      4  way.

    >>> df.text.str.ljust(width=10, fillchar='!')
    Expression = str_ljust(text, width=10, fillchar='!')
    Length: 5 dtype: str (expression)
    ---------------------------------
    0   Something!
    1  very pretty
    2   is coming!
    3   our!!!!!!!
    4   way.!!!!!!
    """
    sl = _to_string_sequence(x).pad(width, fillchar, False, True)
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
github vaexio / vaex / packages / vaex-core / vaex / functions.py View on Github external
2  is coming
      3  our
      4  way.

    >>> df.text.str.pad(width=10, side='left', fillchar='!')
    Expression = str_pad(text, width=10, side='left', fillchar='!')
    Length: 5 dtype: str (expression)
    ---------------------------------
    0   !Something
    1  very pretty
    2   !is coming
    3   !!!!!!!our
    4   !!!!!!way.
    """
    sl = _to_string_sequence(x).pad(width, fillchar, side in ['left', 'both'], side in ['right', 'both'])
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
github vaexio / vaex / packages / vaex-core / vaex / functions.py View on Github external
2  is coming
      3  our
      4  way.

    >>> df.text.str.repeat(3)
    Expression = str_repeat(text, 3)
    Length: 5 dtype: str (expression)
    ---------------------------------
    0        SomethingSomethingSomething
    1  very prettyvery prettyvery pretty
    2        is comingis comingis coming
    3                          ourourour
    4                       way.way.way.
    """
    sl = _to_string_sequence(x).repeat(repeats)
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)