Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def __init__(self, topic, consumer_params, poll_interval='1s', npartitions=1,
ensure_io_loop=True, **kwargs):
self.consumer_params = consumer_params
self.topic = topic
self.npartitions = npartitions
self.positions = [0] * npartitions
self.poll_interval = convert_interval(poll_interval)
self.stopped = True
super(FromKafkaBatched, self).__init__(ensure_io_loop=True, **kwargs)
npartitions: int
Number of partitions in the topic
start: bool (False)
Whether to start polling upon instantiation
Example
-------
>>> source = Stream.from_kafka_batched('mytopic',
... {'bootstrap.servers': 'localhost:9092',
... 'group.id': 'streamz'}, npartitions=4) # doctest: +SKIP
"""
if dask:
from distributed.client import default_client
kwargs['loop'] = default_client().loop
source = FromKafkaBatched(topic, consumer_params,
poll_interval=poll_interval,
npartitions=npartitions, **kwargs)
if dask:
source = source.scatter()
if start:
source.start()
return source.starmap(get_message_batch)
engine=None, **kwargs):
self.consumer_params = consumer_params
# Override the auto-commit config to enforce custom streamz checkpointing
self.consumer_params['enable.auto.commit'] = 'false'
if 'auto.offset.reset' not in self.consumer_params.keys():
consumer_params['auto.offset.reset'] = 'earliest'
self.topic = topic
self.npartitions = npartitions
self.positions = [0] * npartitions
self.poll_interval = convert_interval(poll_interval)
self.max_batch_size = max_batch_size
self.keys = keys
self.engine = engine
self.stopped = True
super(FromKafkaBatched, self).__init__(ensure_io_loop=True, **kwargs)
----------
If 'auto.offset.reset': 'latest' is set in the consumer configs, the stream starts reading messages
from the latest offset. Else, if it's set to 'earliest', it will read from the start offset.
Examples
--------
>>> source = Stream.from_kafka_batched('mytopic',
... {'bootstrap.servers': 'localhost:9092',
... 'group.id': 'streamz'}, npartitions=4) # doctest: +SKIP
"""
if dask:
from distributed.client import default_client
kwargs['loop'] = default_client().loop
source = FromKafkaBatched(topic, consumer_params,
poll_interval=poll_interval,
npartitions=npartitions,
max_batch_size=max_batch_size,
keys=keys,
engine=engine,
**kwargs)
if dask:
source = source.scatter()
if start:
source.start()
if engine == "cudf": # pragma: no cover
return source.starmap(get_message_batch_cudf)
else:
return source.starmap(get_message_batch)