How to use the streamz.collection.Streaming function in streamz

To help you get started, we’ve selected a few streamz examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github python-streamz / streamz / streamz / dataframe / core.py View on Github external
window = self.n
        elif self.value is not None:
            diff = aggregations.diff_loc
            window = self.value

        outstream = stream.accumulate(aggregations.windowed_groupby_accumulator,
                                      agg=agg,
                                      start=None,
                                      returns_state=True,
                                      diff=diff,
                                      window=window)

        for typ, s_type in _stream_types[stream_type]:
            if isinstance(example, typ):
                return s_type(outstream, example)
        return Streaming(outstream, example, stream_type=stream_type)
github python-streamz / streamz / streamz / dataframe / core.py View on Github external
def _accumulate(self, Agg, **kwargs):
        stream_type = 'updating'

        if isinstance(self.grouper, Streaming):
            stream = self.root.stream.zip(self.grouper.stream)
            grouper_example = self.grouper.example
            agg = Agg(self.index, grouper=None, **kwargs)
        else:
            stream = self.root.stream
            grouper_example = self.grouper
            agg = Agg(self.index, grouper=self.grouper, **kwargs)

        # Compute example
        state = agg.initial(self.root.example, grouper=grouper_example)
        if hasattr(grouper_example, 'iloc'):
            grouper_example = grouper_example.iloc[:0]
        elif isinstance(grouper_example, (np.ndarray, pd.Index)):
            grouper_example = grouper_example[:0]
        _, example = agg.on_new(state,
                                self.root.example.iloc[:0],
github python-streamz / streamz / streamz / collection.py View on Github external
else:
        s = streams[0]

        if isinstance(args[0], Streaming):
            stream = s.stream.map(func, *args[1:], **kwargs)
        else:
            other = [(i, arg) for i, arg in enumerate(args)
                     if not isinstance(arg, Streaming)]
            stream = s.stream.map(partial_by_order, function=func, other=other,
                                  **kwargs)

    for typ, s_type in _stream_types[stream_type]:
        if isinstance(example, typ):
            return s_type(stream, example)
    return Streaming(stream, example, stream_type=stream_type)
github python-streamz / streamz / streamz / dataframe / core.py View on Github external
grouper_example = grouper_example.iloc[:0]
        elif isinstance(grouper_example, (np.ndarray, pd.Index)):
            grouper_example = grouper_example[:0]
        _, example = agg.on_new(state,
                                self.root.example.iloc[:0],
                                grouper=grouper_example)

        outstream = stream.accumulate(aggregations.groupby_accumulator,
                                      agg=agg,
                                      start=None,
                                      returns_state=True)

        for typ, s_type in _stream_types[stream_type]:
            if isinstance(example, typ):
                return s_type(outstream, example)
        return Streaming(outstream, example, stream_type=stream_type)
github python-streamz / streamz / streamz / dataframe / core.py View on Github external
import operator
from time import time

import numpy as np
import pandas as pd
from tornado.ioloop import IOLoop
from tornado import gen

from ..collection import Streaming, _stream_types, OperatorMixin
from ..sources import Source
from ..utils import M
from . import aggregations


class BaseFrame(Streaming):

    def round(self, decimals=0):
        """ Round elements in frame """
        return self.map_partitions(M.round, self, decimals=decimals)

    def reset_index(self):
        """ Reset Index """
        return self.map_partitions(M.reset_index, self)

    def set_index(self, index, **kwargs):
        """ Set Index """
        return self.map_partitions(M.set_index, self, index, **kwargs)

    def tail(self, n=5):
        """ Round elements in frame """
        return self.map_partitions(M.tail, self, n=n)
github python-streamz / streamz / streamz / batch.py View on Github external
from .collection import Streaming, _stream_types
import toolz
import toolz.curried


class Batch(Streaming):
    """ A Stream of tuples or lists

    This streaming collection manages batches of Python objects such as lists
    of text or dictionaries.  By batching many elements together we reduce
    overhead from Python.

    This library is typically used at the early stages of data ingestion before
    handing off to streaming dataframes

    Examples
    --------
    >>> text = Streaming.from_file(myfile)  # doctest: +SKIP
    >>> b = text.partition(100).map(json.loads)  # doctest: +SKIP
    """
    def __init__(self, stream=None, example=None):
        if example is None:
github python-streamz / streamz / streamz / collection.py View on Github external
def map_partitions(func, *args, **kwargs):
    """ Map a function across all batch elements of this stream

    The output stream type will be determined by the action of that
    function on the example

    See Also
    --------
    Streaming.accumulate_partitions
    """
    example = kwargs.pop('example', None)
    if example is None:
        example = func(*[getattr(arg, 'example', arg) for arg in args], **kwargs)

    streams = [arg for arg in args if isinstance(arg, Streaming)]
    if 'stream_type' in kwargs:
        stream_type = kwargs['stream_type']
    else:
        stream_type = ('streaming'
                       if any(s._stream_type == 'streaming' for s in streams)
                       else 'updating')

    if len(streams) > 1:
        stream = type(streams[0].stream).zip(*[getattr(arg, 'stream', arg) for arg in args])
        stream = stream.map(apply_args, func, kwargs)

    else:
        s = streams[0]

        if isinstance(args[0], Streaming):
            stream = s.stream.map(func, *args[1:], **kwargs)