Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def instr(str, substr):
"""
Locate the position of the first occurrence of substr column in the given string.
Returns null if either of the arguments are null.
.. note:: The position is not zero based, but 1 based index. Returns 0 if substr
could not be found in str.
>>> df = spark.createDataFrame([('abcd',)], ['s',])
>>> df.select(instr(df.s, 'b').alias('s')).collect()
[Row(s=2)]
"""
sc = SparkContext._active_spark_context
return Column(sc._jvm.functions.instr(_to_java_column(str), substr))
def smvDayOfMonth(self):
"""Extract day of month component from a timestamp
Example:
>>> df.select(col("dob").smvDayOfMonth())
Returns:
(Column): IntegerType. Day of month component as integer (range 1-31), or null if input column is null
"""
jc = self._jColumnHelper.smvDayOfMonth()
return Column(jc)
def expr(str):
"""Parses the expression string into the column that it represents
>>> df.select(expr("length(name)")).collect()
[Row(length(name)=5), Row(length(name)=3)]
"""
sc = SparkContext._active_spark_context
return Column(sc._jvm.functions.expr(str))
def to_utc_timestamp(timestamp, tz):
"""
Given a timestamp, which corresponds to a certain time of day in the given timezone, returns
another timestamp that corresponds to the same time of day in UTC.
>>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
>>> df.select(to_utc_timestamp(df.t, "PST").alias('t')).collect()
[Row(t=datetime.datetime(1997, 2, 28, 18, 30))]
"""
sc = SparkContext._active_spark_context
return Column(sc._jvm.functions.to_utc_timestamp(_to_java_column(timestamp), tz))
def _(col1, col2):
sc = SparkContext._active_spark_context
# users might write ints for simplicity. This would throw an error on the JVM side.
jc = getattr(sc._jvm.functions, name)(col1._jc if isinstance(col1, Column) else float(col1),
col2._jc if isinstance(col2, Column) else float(col2))
return Column(jc)
_.__name__ = name
def quarter(col):
"""
Extract the quarter of a given date as integer.
>>> df = sqlContext.createDataFrame([('2015-04-08',)], ['a'])
>>> df.select(quarter('a').alias('quarter')).collect()
[Row(quarter=2)]
"""
sc = SparkContext._active_spark_context
return Column(sc._jvm.functions.quarter(_to_java_column(col)))
def minute(col):
"""
Extract the minutes of a given date as integer.
>>> df = sqlContext.createDataFrame([('2015-04-08 13:08:15',)], ['a'])
>>> df.select(minute('a').alias('minute')).collect()
[Row(minute=8)]
"""
sc = SparkContext._active_spark_context
return Column(sc._jvm.functions.minute(_to_java_column(col)))
def concat_ws(sep, *cols):
"""
Concatenates multiple input string columns together into a single string column,
using the given separator.
>>> df = spark.createDataFrame([('abcd','123')], ['s', 'd'])
>>> df.select(concat_ws('-', df.s, df.d).alias('s')).collect()
[Row(s=u'abcd-123')]
"""
sc = SparkContext._active_spark_context
return Column(sc._jvm.functions.concat_ws(sep, _to_seq(sc, cols, _to_java_column)))
def struct(*cols):
"""Creates a new struct column.
:param cols: list of column names (string) or list of :class:`Column` expressions
>>> df.select(struct('age', 'name').alias("struct")).collect()
[Row(struct=Row(age=2, name=u'Alice')), Row(struct=Row(age=5, name=u'Bob'))]
>>> df.select(struct([df.age, df.name]).alias("struct")).collect()
[Row(struct=Row(age=2, name=u'Alice')), Row(struct=Row(age=5, name=u'Bob'))]
"""
sc = SparkContext._active_spark_context
if len(cols) == 1 and isinstance(cols[0], (list, set)):
cols = cols[0]
jc = sc._jvm.functions.struct(_to_seq(sc, cols, _to_java_column))
return Column(jc)
Column.smvPlusWeeks = lambda c, delta: Column(colhelper(c).smvPlusWeeks(delta))
Column.smvPlusMonths = lambda c, delta: Column(colhelper(c).smvPlusMonths(delta))