How to use the pyspark.sql.column.Column function in pyspark

To help you get started, we’ve selected a few pyspark examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github qubole / spark-on-lambda / python / pyspark / sql / functions.py View on Github external
def instr(str, substr):
    """
    Locate the position of the first occurrence of substr column in the given string.
    Returns null if either of the arguments are null.

    .. note:: The position is not zero based, but 1 based index. Returns 0 if substr
        could not be found in str.

    >>> df = spark.createDataFrame([('abcd',)], ['s',])
    >>> df.select(instr(df.s, 'b').alias('s')).collect()
    [Row(s=2)]
    """
    sc = SparkContext._active_spark_context
    return Column(sc._jvm.functions.instr(_to_java_column(str), substr))
github TresAmigosSD / SMV / src / main / python / smv / helpers.py View on Github external
def smvDayOfMonth(self):
        """Extract day of month component from a timestamp

            Example:
                >>> df.select(col("dob").smvDayOfMonth())

            Returns:
                (Column): IntegerType. Day of month component as integer (range 1-31), or null if input column is null
        """
        jc = self._jColumnHelper.smvDayOfMonth()
        return Column(jc)
github qubole / spark-on-lambda / python / pyspark / sql / functions.py View on Github external
def expr(str):
    """Parses the expression string into the column that it represents

    >>> df.select(expr("length(name)")).collect()
    [Row(length(name)=5), Row(length(name)=3)]
    """
    sc = SparkContext._active_spark_context
    return Column(sc._jvm.functions.expr(str))
github qubole / spark-on-lambda / python / pyspark / sql / functions.py View on Github external
def to_utc_timestamp(timestamp, tz):
    """
    Given a timestamp, which corresponds to a certain time of day in the given timezone, returns
    another timestamp that corresponds to the same time of day in UTC.

    >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
    >>> df.select(to_utc_timestamp(df.t, "PST").alias('t')).collect()
    [Row(t=datetime.datetime(1997, 2, 28, 18, 30))]
    """
    sc = SparkContext._active_spark_context
    return Column(sc._jvm.functions.to_utc_timestamp(_to_java_column(timestamp), tz))
github UCLA-VAST / blaze / spark-1.5.1 / python / pyspark / sql / functions.py View on Github external
def _(col1, col2):
        sc = SparkContext._active_spark_context
        # users might write ints for simplicity. This would throw an error on the JVM side.
        jc = getattr(sc._jvm.functions, name)(col1._jc if isinstance(col1, Column) else float(col1),
                                              col2._jc if isinstance(col2, Column) else float(col2))
        return Column(jc)
    _.__name__ = name
github UCLA-VAST / blaze / spark-1.5.1 / python / pyspark / sql / functions.py View on Github external
def quarter(col):
    """
    Extract the quarter of a given date as integer.

    >>> df = sqlContext.createDataFrame([('2015-04-08',)], ['a'])
    >>> df.select(quarter('a').alias('quarter')).collect()
    [Row(quarter=2)]
    """
    sc = SparkContext._active_spark_context
    return Column(sc._jvm.functions.quarter(_to_java_column(col)))
github UCLA-VAST / blaze / spark-1.5.1 / python / pyspark / sql / functions.py View on Github external
def minute(col):
    """
    Extract the minutes of a given date as integer.

    >>> df = sqlContext.createDataFrame([('2015-04-08 13:08:15',)], ['a'])
    >>> df.select(minute('a').alias('minute')).collect()
    [Row(minute=8)]
    """
    sc = SparkContext._active_spark_context
    return Column(sc._jvm.functions.minute(_to_java_column(col)))
github qubole / spark-on-lambda / python / pyspark / sql / functions.py View on Github external
def concat_ws(sep, *cols):
    """
    Concatenates multiple input string columns together into a single string column,
    using the given separator.

    >>> df = spark.createDataFrame([('abcd','123')], ['s', 'd'])
    >>> df.select(concat_ws('-', df.s, df.d).alias('s')).collect()
    [Row(s=u'abcd-123')]
    """
    sc = SparkContext._active_spark_context
    return Column(sc._jvm.functions.concat_ws(sep, _to_seq(sc, cols, _to_java_column)))
github UCLA-VAST / blaze / spark-1.5.1 / python / pyspark / sql / functions.py View on Github external
def struct(*cols):
    """Creates a new struct column.

    :param cols: list of column names (string) or list of :class:`Column` expressions

    >>> df.select(struct('age', 'name').alias("struct")).collect()
    [Row(struct=Row(age=2, name=u'Alice')), Row(struct=Row(age=5, name=u'Bob'))]
    >>> df.select(struct([df.age, df.name]).alias("struct")).collect()
    [Row(struct=Row(age=2, name=u'Alice')), Row(struct=Row(age=5, name=u'Bob'))]
    """
    sc = SparkContext._active_spark_context
    if len(cols) == 1 and isinstance(cols[0], (list, set)):
        cols = cols[0]
    jc = sc._jvm.functions.struct(_to_seq(sc, cols, _to_java_column))
    return Column(jc)
github TresAmigosSD / SMV / server / smv.py View on Github external
Column.smvPlusWeeks  = lambda c, delta: Column(colhelper(c).smvPlusWeeks(delta))
Column.smvPlusMonths = lambda c, delta: Column(colhelper(c).smvPlusMonths(delta))