How to use the streamz.dataframe.core.DataFrame function in streamz

To help you get started, we’ve selected a few streamz examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github python-streamz / streamz / streamz / dataframe / core.py View on Github external
def stop(self):
        self.continue_[0] = False

    @staticmethod
    @gen.coroutine
    def _cb(interval, freq, source, continue_):
        last = time()
        while continue_[0]:
            yield gen.sleep(interval)
            now = time()
            yield source._emit((last, now, freq))
            last = now


_stream_types['streaming'].append((pd.DataFrame, DataFrame))
_stream_types['streaming'].append((pd.Index, Index))
_stream_types['streaming'].append((pd.Series, Series))
_stream_types['updating'].append((pd.DataFrame, DataFrames))
_stream_types['updating'].append((pd.Series, Seriess))
github python-streamz / streamz / streamz / dataframe / core.py View on Github external
def __setitem__(self, key, value):
        if isinstance(value, Series):
            result = self.assign(**{key: value})
        elif isinstance(value, DataFrame):
            result = self.assign(**{k: value[c] for k, c in zip(key, value.columns)})
        else:
            example = self.example.copy()
            example[key] = value
            result = self.map_partitions(pd.DataFrame.assign, self, **{key: value})

        self.stream = result.stream
        self.example = result.example
        return self
github python-streamz / streamz / streamz / dataframe / core.py View on Github external
def __init__(self, *args, **kwargs):
        # {'x': sdf.x + 1, 'y': sdf.y - 1}
        if len(args) == 1 and not kwargs and isinstance(args[0], dict):
            def concat(tup, columns=None):
                result = pd.concat(tup, axis=1)
                result.columns = columns
                return result

            columns, values = zip(*args[0].items())
            stream = type(values[0].stream).zip(*[v.stream for v in values])
            stream = stream.map(concat, columns=list(columns))
            example = pd.DataFrame({k: getattr(v, 'example', v)
                                    for k, v in args[0].items()})
            DataFrame.__init__(self, stream, example)
        else:
            return super(DataFrame, self).__init__(*args, **kwargs)
github python-streamz / streamz / streamz / dataframe / core.py View on Github external
def __init__(self, *args, **kwargs):
        # {'x': sdf.x + 1, 'y': sdf.y - 1}
        if len(args) == 1 and not kwargs and isinstance(args[0], dict):
            def concat(tup, columns=None):
                result = pd.concat(tup, axis=1)
                result.columns = columns
                return result

            columns, values = zip(*args[0].items())
            stream = type(values[0].stream).zip(*[v.stream for v in values])
            stream = stream.map(concat, columns=list(columns))
            example = pd.DataFrame({k: getattr(v, 'example', v)
                                    for k, v in args[0].items()})
            DataFrame.__init__(self, stream, example)
        else:
            return super(DataFrame, self).__init__(*args, **kwargs)
github python-streamz / streamz / streamz / dataframe / core.py View on Github external
def _random_df(tup):
    last, now, freq = tup
    index = pd.DatetimeIndex(start=(last + freq.total_seconds()) * 1e9,
                             end=now * 1e9,
                             freq=freq)

    df = pd.DataFrame({'x': np.random.random(len(index)),
                       'y': np.random.poisson(size=len(index)),
                       'z': np.random.normal(0, 1, size=len(index))},
                       index=index)
    return df


class Random(DataFrame):
    """ A streaming dataframe of random data

    The x column is uniformly distributed.
    The y column is poisson distributed.
    The z column is normally distributed.

    This class is experimental and will likely be removed in the future

    Parameters
    ----------
    freq: timedelta
        The time interval between records
    interval: timedelta
        The time interval between new dataframes, should be significantly
        larger than freq