How to use the streamz.sources.FromKafkaBatched function in streamz

To help you get started, we’ve selected a few streamz examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github python-streamz / streamz / streamz / sources.py View on Github external
def __init__(self, topic, consumer_params, poll_interval='1s', npartitions=1,
                 ensure_io_loop=True, **kwargs):
        self.consumer_params = consumer_params
        self.topic = topic
        self.npartitions = npartitions
        self.positions = [0] * npartitions
        self.poll_interval = convert_interval(poll_interval)
        self.stopped = True

        super(FromKafkaBatched, self).__init__(ensure_io_loop=True, **kwargs)
github python-streamz / streamz / streamz / sources.py View on Github external
npartitions: int
        Number of partitions in the topic
    start: bool (False)
        Whether to start polling upon instantiation

    Example
    -------

    >>> source = Stream.from_kafka_batched('mytopic',
    ...           {'bootstrap.servers': 'localhost:9092',
    ...            'group.id': 'streamz'}, npartitions=4)  # doctest: +SKIP
    """
    if dask:
        from distributed.client import default_client
        kwargs['loop'] = default_client().loop
    source = FromKafkaBatched(topic, consumer_params,
                              poll_interval=poll_interval,
                              npartitions=npartitions, **kwargs)
    if dask:
        source = source.scatter()

    if start:
        source.start()

    return source.starmap(get_message_batch)
github python-streamz / streamz / streamz / sources.py View on Github external
engine=None, **kwargs):
        self.consumer_params = consumer_params
        # Override the auto-commit config to enforce custom streamz checkpointing
        self.consumer_params['enable.auto.commit'] = 'false'
        if 'auto.offset.reset' not in self.consumer_params.keys():
            consumer_params['auto.offset.reset'] = 'earliest'
        self.topic = topic
        self.npartitions = npartitions
        self.positions = [0] * npartitions
        self.poll_interval = convert_interval(poll_interval)
        self.max_batch_size = max_batch_size
        self.keys = keys
        self.engine = engine
        self.stopped = True

        super(FromKafkaBatched, self).__init__(ensure_io_loop=True, **kwargs)
github python-streamz / streamz / streamz / sources.py View on Github external
----------
    If 'auto.offset.reset': 'latest' is set in the consumer configs, the stream starts reading messages
    from the latest offset. Else, if it's set to 'earliest', it will read from the start offset.

    Examples
    --------

    >>> source = Stream.from_kafka_batched('mytopic',
    ...           {'bootstrap.servers': 'localhost:9092',
    ...            'group.id': 'streamz'}, npartitions=4)  # doctest: +SKIP

    """
    if dask:
        from distributed.client import default_client
        kwargs['loop'] = default_client().loop
    source = FromKafkaBatched(topic, consumer_params,
                              poll_interval=poll_interval,
                              npartitions=npartitions,
                              max_batch_size=max_batch_size,
                              keys=keys,
                              engine=engine,
                              **kwargs)
    if dask:
        source = source.scatter()

    if start:
        source.start()

    if engine == "cudf": # pragma: no cover
        return source.starmap(get_message_batch_cudf)
    else:
        return source.starmap(get_message_batch)