Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
file = open(filename, mode=mode)
def write(text):
file.write(prefix + text + suffix)
if flush:
file.flush()
upstream.sink(write)
return file
class Source(Stream):
_graphviz_shape = 'doubleoctagon'
@Stream.register_api(staticmethod)
class from_textfile(Source):
""" Stream data from a text file
Parameters
----------
f: file or string
poll_interval: Number
Interval to poll file for new data in seconds
delimiter: str ("\n")
Character(s) to use to split the data into parts
start: bool (False)
Whether to start running immediately; otherwise call stream.start()
explicitly.
Example
-------
stream_name = kwargs.pop("stream_name", None)
self.kwargs = kwargs
self.args = args
Stream.__init__(self, upstream, stream_name=stream_name)
_global_sinks.add(self)
def update(self, x, who=None):
result = self.func(x, *self.args, **self.kwargs)
if gen.isawaitable(result):
return result
else:
return []
@Stream.register_api()
class map(Stream):
""" Apply a function to every element in the stream
Parameters
----------
func: callable
*args :
The arguments to pass to the function.
**kwargs:
Keyword arguments to pass to func
Examples
--------
>>> source = Stream()
>>> source.map(lambda x: 2*x).sink(print)
>>> for i in range(5):
self.server = EmitServer(**self.server_kwargs)
self.server.listen(self.port)
def start(self):
if self.stopped:
self.loop.add_callback(self._start_server)
self.stopped = False
def stop(self):
if not self.stopped:
self.server.stop()
self.server = None
self.stopped = True
@Stream.register_api(staticmethod)
class from_http_server(Source):
"""Listen for HTTP POSTs on given port
Each connection will emit one event, containing the body data of
the request
Parameters
----------
port : int
The port to listen on
path : str
Specific path to listen on. Can be regex, but content is not used.
start : bool
Whether to immediately startup the server. Usually you want to connect
downstream nodes first, and then call ``.start()``.
server_kwargs : dict or None
class Source(Stream):
_graphviz_shape = 'doubleoctagon'
def __init__(self, **kwargs):
self.stopped = True
super(Source, self).__init__(**kwargs)
def stop(self): # pragma: no cover
# fallback stop method - for poll functions with while not self.stopped
if not self.stopped:
self.stopped = True
@Stream.register_api(staticmethod)
class from_textfile(Source):
""" Stream data from a text file
Parameters
----------
f: file or string
Source of the data. If string, will be opened.
poll_interval: Number
Interval to poll file for new data in seconds
delimiter: str
Character(s) to use to split the data into parts
start: bool
Whether to start running immediately; otherwise call stream.start()
explicitly.
from_end: bool
Whether to begin streaming from the end of the file (i.e., only emit
@Stream.register_api(staticmethod)
def from_kafka_batched(topic, consumer_params, poll_interval='1s',
npartitions=1, start=False, dask=False,
max_batch_size=10000, keys=False,
engine=None, **kwargs):
""" Get messages and keys (optional) from Kafka in batches
Uses the confluent-kafka library,
https://docs.confluent.io/current/clients/confluent-kafka-python/
This source will emit lists of messages for each partition of a single given
topic per time interval, if there is new data. If using dask, one future
will be produced per partition per time-step, if there is data.
Checkpointing is achieved through the use of reference counting. A reference
counter is emitted downstream for each batch of data. A callback is
triggered when the reference count reaches zero and the offsets are
@Stream.register_api(staticmethod)
def from_kafka_batched(topic, consumer_params, poll_interval='1s',
npartitions=1, start=False, dask=False, **kwargs):
""" Get messages from Kafka in batches
Uses the confluent-kafka library,
https://docs.confluent.io/current/clients/confluent-kafka-python/
This source will emit lists of messages for each partition of a single given
topic per time interval, if there is new data. If using dask, one future
will be produced per partition per time-step, if there is data.
Parameters
----------
topic: str
Kafka topic to consume from
consumer_params: dict