How to use the pyspark.since function in pyspark

To help you get started, we’ve selected a few pyspark examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github qubole / spark-on-lambda / python / pyspark / ml / classification.py View on Github external
    @since("2.0.0")
    def featuresCol(self):
        """
        Field in "predictions" which gives the features of each instance
        as a vector.
        """
        return self._call_java("featuresCol")
github qubole / spark-on-lambda / python / pyspark / sql / functions.py View on Github external
@since(1.5)
@ignore_unicode_prefix
def substring_index(str, delim, count):
    """
    Returns the substring from string str before count occurrences of the delimiter delim.
    If count is positive, everything the left of the final delimiter (counting from left) is
    returned. If count is negative, every to the right of the final delimiter (counting from the
    right) is returned. substring_index performs a case-sensitive match when searching for delim.

    >>> df = spark.createDataFrame([('a.b.c.d',)], ['s'])
    >>> df.select(substring_index(df.s, '.', 2).alias('s')).collect()
    [Row(s=u'a.b')]
    >>> df.select(substring_index(df.s, '.', -3).alias('s')).collect()
    [Row(s=u'b.c.d')]
    """
    sc = SparkContext._active_spark_context
    return Column(sc._jvm.functions.substring_index(_to_java_column(str), delim, count))
github qubole / spark-on-lambda / python / pyspark / ml / regression.py View on Github external
    @since("1.4.0")
    def getFeatureSubsetStrategy(self):
        """
        Gets the value of featureSubsetStrategy or its default value.
        """
        return self.getOrDefault(self.featureSubsetStrategy)
github qubole / spark-on-lambda / python / pyspark / sql / functions.py View on Github external
@since(2.0)
@ignore_unicode_prefix
def window(timeColumn, windowDuration, slideDuration=None, startTime=None):
    """Bucketize rows into one or more time windows given a timestamp specifying column. Window
    starts are inclusive but the window ends are exclusive, e.g. 12:05 will be in the window
    [12:05,12:10) but not in [12:00,12:05). Windows can support microsecond precision. Windows in
    the order of months are not supported.

    The time column must be of :class:`pyspark.sql.types.TimestampType`.

    Durations are provided as strings, e.g. '1 second', '1 day 12 hours', '2 minutes'. Valid
    interval strings are 'week', 'day', 'hour', 'minute', 'second', 'millisecond', 'microsecond'.
    If the ``slideDuration`` is not provided, the windows will be tumbling windows.

    The startTime is the offset with respect to 1970-01-01 00:00:00 UTC with which to start
    window intervals. For example, in order to have hourly tumbling windows that start 15 minutes
    past the hour, e.g. 12:15-13:15, 13:15-14:15... provide `startTime` as `15 minutes`.
github qubole / spark-on-lambda / python / pyspark / sql / functions.py View on Github external
@since(1.5)
def log2(col):
    """Returns the base-2 logarithm of the argument.

    >>> spark.createDataFrame([(4,)], ['a']).select(log2('a').alias('log2')).collect()
    [Row(log2=2.0)]
    """
    sc = SparkContext._active_spark_context
    return Column(sc._jvm.functions.log2(_to_java_column(col)))
github qubole / spark-on-lambda / python / pyspark / ml / feature.py View on Github external
    @since("1.6.0")
    def getMin(self):
        """
        Gets the value of min or its default value.
        """
        return self.getOrDefault(self.min)
github qubole / spark-on-lambda / python / pyspark / ml / clustering.py View on Github external
    @since("2.0.0")
    def getTopicConcentration(self):
        """
        Gets the value of :py:attr:`topicConcentration` or its default value.
        """
        return self.getOrDefault(self.topicConcentration)
github qubole / spark-on-lambda / python / pyspark / mllib / recommendation.py View on Github external
    @since("1.4.0")
    def recommendUsers(self, product, num):
        """
        Recommends the top "num" number of users for a given product and
        returns a list of Rating objects sorted by the predicted rating in
        descending order.
        """
        return list(self.call("recommendUsers", product, num))
github qubole / spark-on-lambda / python / pyspark / ml / feature.py View on Github external
    @since("1.6.0")
    def getMinDF(self):
        """
        Gets the value of minDF or its default value.
        """
        return self.getOrDefault(self.minDF)
github qubole / spark-on-lambda / python / pyspark / ml / classification.py View on Github external
    @since("2.0.0")
    def evaluate(self, dataset):
        """
        Evaluates the model on a test dataset.

        :param dataset:
          Test dataset to evaluate model on, where dataset is an
          instance of :py:class:`pyspark.sql.DataFrame`
        """
        if not isinstance(dataset, DataFrame):
            raise ValueError("dataset must be a DataFrame but got %s." % type(dataset))
        java_blr_summary = self._call_java("evaluate", dataset)
        return BinaryLogisticRegressionSummary(java_blr_summary)