Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_count_1d_strings():
x = np.array([-1, -2, 0.5, 1.5, 4.5, 5], dtype='f8')
y = x.astype(str).astype('O')
y[2] = None
y = vaex.column._to_string_sequence(y)
bins = 5
binner = vaex.superagg.BinnerScalar_float64('x', 0, 5, bins)
binner.set_data(x)
grid = vaex.superagg.Grid([binner])
agg = vaex.superagg.AggCount_string(grid)
agg.set_data(y, 0)
agg_data = np.asarray(agg)
grid.bin([agg])
assert agg_data.tolist() == [0, 2, 0, 1, 0, 0, 1, 1]
1 very pretty
2 is coming
3 our
4 6
>>> df.text.str.isdigit()
Expression = str_isdigit(text)
Length: 5 dtype: bool (expression)
----------------------------------
0 False
1 False
2 False
3 False
4 True
"""
return _to_string_sequence(x).isdigit()
1 very pretty
2 is coming
3 our
4 way.
>>> df.text.str.title()
Expression = str_title(text)
Length: 5 dtype: str (expression)
---------------------------------
0 Something
1 Very Pretty
2 Is Coming
3 Our
4 Way.
"""
sl = _to_string_sequence(x).title()
return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)
1 very pretty
2 is coming
3 our
4 way.
>>> df.text.str.byte_length()
Expression = str_byte_length(text)
Length: 5 dtype: int64 (expression)
-----------------------------------
0 9
1 11
2 9
3 3
4 4
"""
return _to_string_sequence(x).byte_length()
def check_array(x, dtype):
if dtype == str_type:
x = vaex.column._to_string_sequence(x)
else:
x = vaex.utils.as_contiguous(x)
if x.dtype.kind in "mM":
# we pass datetime as int
x = x.view('uint64')
return x
block_map = {expr: block for expr, block in zip(self.expressions_all, blocks)}
1 very pretty
2 is coming
3 our
4 way.
>>> df.text.str.islower()
Expression = str_islower(text)
Length: 5 dtype: bool (expression)
----------------------------------
0 False
1 True
2 True
3 True
4 True
"""
return _to_string_sequence(x).islower()
1 very pretty
2 is coming
3 our
4 way.
>>> df.text.str.isalpha()
Expression = str_isalpha(text)
Length: 5 dtype: bool (expression)
----------------------------------
0 True
1 False
2 False
3 True
4 False
"""
return _to_string_sequence(x).isalpha()
1 very pretty
2 is coming
3 our
4 way.
>>> df.text.str.count(pat="et", regex=False)
Expression = str_count(text, pat='et', regex=False)
Length: 5 dtype: int64 (expression)
-----------------------------------
0 1
1 1
2 0
3 0
4 0
"""
return _to_string_sequence(x).count(pat, regex)
4 True
>>> df.text.str.equals('our')
Expression = str_equals(text, 'our')
Length: 5 dtype: bool (expression)
----------------------------------
0 False
1 False
2 False
3 True
4 False
"""
xmask = None
ymask = None
if not isinstance(x, six.string_types):
x = _to_string_sequence(x)
if not isinstance(y, six.string_types):
y = _to_string_sequence(y)
equals_mask = x.equals(y)
return equals_mask
2 is coming
3 our
4 way.
>>> df.text.str.rstrip(to_strip='ing')
Expression = str_rstrip(text, to_strip='ing')
Length: 5 dtype: str (expression)
---------------------------------
0 Someth
1 very pretty
2 is com
3 our
4 way.
"""
# in c++ we give empty string the same meaning as None
sl = _to_string_sequence(x).rstrip('' if to_strip is None else to_strip) if to_strip != '' else x
return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)