Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
line = self.file.read()
if line:
buffer = buffer + line
if self.delimiter in buffer:
parts = buffer.split(self.delimiter)
buffer = parts.pop(-1)
for part in parts:
yield self._emit(part + self.delimiter)
else:
yield gen.sleep(self.poll_interval)
if self.stopped:
break
@Stream.register_api(staticmethod)
class filenames(Source):
""" Stream over filenames in a directory
Parameters
----------
path: string
Directory path or globstring over which to search for files
poll_interval: Number
Seconds between checking path
start: bool (False)
Whether to start running immediately; otherwise call stream.start()
explicitly.
Examples
--------
>>> source = Stream.filenames('path/to/dir') # doctest: +SKIP
>>> source = Stream.filenames('path/to/*.csv', poll_interval=0.500) # doctest: +SKIP
while not self.stopped:
self.started = True
line = self.file.read()
if line:
buffer = buffer + line
if self.delimiter in buffer:
parts = buffer.split(self.delimiter)
buffer = parts.pop(-1)
for part in parts:
yield self._emit(part + self.delimiter)
else:
yield gen.sleep(self.poll_interval)
@Stream.register_api(staticmethod)
class filenames(Source):
""" Stream over filenames in a directory
Parameters
----------
path: string
Directory path or globstring over which to search for files
poll_interval: Number
Seconds between checking path
start: bool (False)
Whether to start running immediately; otherwise call stream.start()
explicitly.
Examples
--------
>>> source = Stream.filenames('path/to/dir') # doctest: +SKIP
>>> source = Stream.filenames('path/to/*.csv', poll_interval=0.500) # doctest: +SKIP
@gen.coroutine
def do_poll(self):
while True:
filenames = set(glob(self.path))
new = filenames - self.seen
for fn in sorted(new):
self.seen.add(fn)
yield self._emit(fn)
yield gen.sleep(self.poll_interval) # TODO: remove poll if delayed
if self.stopped:
break
@Stream.register_api(staticmethod)
class from_kafka(Source):
""" Accepts messages from Kafka
Uses the confluent-kafka library,
https://docs.confluent.io/current/clients/confluent-kafka-python/
Parameters
----------
topics: list of str
Labels of Kafka topics to consume from
consumer_params: dict
Settings to set up the stream, see
https://docs.confluent.io/current/clients/confluent-kafka-python/#configuration
https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
Examples:
bootstrap.servers: Connection string(s) (host:port) by which to reach Kafka
@gen.coroutine
def do_poll(self):
while True:
filenames = set(glob(self.path))
new = filenames - self.seen
for fn in sorted(new):
self.seen.add(fn)
yield self._emit(fn)
yield gen.sleep(self.poll_interval) # TODO: remove poll if delayed
if self.stopped:
break
@Stream.register_api(staticmethod)
class from_tcp(Source):
"""
Creates events by reading from a socket using tornado TCPServer
The stream of incoming bytes is split on a given delimiter, and the parts
become the emitted events.
Parameters
----------
port : int
The port to open and listen on. It only gets opened when the source
is started, and closed upon ``stop()``
delimiter : bytes
The incoming data will be split on this value. The resulting events
will still have the delimiter at the end.
start : bool
Whether to immediately initiate the source. You probably want to
process.proc.terminate()
def start(self):
"""Start external process"""
if self.stopped:
self.loop.add_callback(self._start_process)
self.stopped = False
def stop(self):
"""Shutdown external process"""
if not self.stopped:
self.stopped = True
@Stream.register_api(staticmethod)
class from_kafka(Source):
""" Accepts messages from Kafka
Uses the confluent-kafka library,
https://docs.confluent.io/current/clients/confluent-kafka-python/
Parameters
----------
topics: list of str
Labels of Kafka topics to consume from
consumer_params: dict
Settings to set up the stream, see
https://docs.confluent.io/current/clients/confluent-kafka-python/#configuration
https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
Examples:
bootstrap.servers, Connection string(s) (host:port) by which to reach
def start(self):
"""Start HTTP server and listen"""
if self.stopped:
self.loop.add_callback(self._start_server)
self.stopped = False
def stop(self):
"""Shutdown HTTP server"""
if not self.stopped:
self.server.stop()
self.server = None
self.stopped = True
@Stream.register_api(staticmethod)
class from_process(Source):
"""Messages from a running external process
This doesn't work on Windows
Parameters
----------
cmd : list of str or str
Command to run: program name, followed by arguments
open_kwargs : dict
To pass on the the process open function, see ``subprocess.Popen``.
with_stderr : bool
Whether to include the process STDERR in the stream
start : bool
Whether to immediately startup the process. Usually you want to connect
downstream nodes first, and then call ``.start()``.
class Source(Stream):
_graphviz_shape = 'doubleoctagon'
def __init__(self, **kwargs):
self.stopped = True
super(Source, self).__init__(**kwargs)
def stop(self): # pragma: no cover
# fallback stop method - for poll functions with while not self.stopped
if not self.stopped:
self.stopped = True
@Stream.register_api(staticmethod)
class from_textfile(Source):
""" Stream data from a text file
Parameters
----------
f: file or string
Source of the data. If string, will be opened.
poll_interval: Number
Interval to poll file for new data in seconds
delimiter: str
Character(s) to use to split the data into parts
start: bool
Whether to start running immediately; otherwise call stream.start()
explicitly.
from_end: bool
Whether to begin streaming from the end of the file (i.e., only emit
lines appended after the stream starts).
def write(text):
file.write(prefix + text + suffix)
if flush:
file.flush()
upstream.sink(write)
return file
class Source(Stream):
_graphviz_shape = 'doubleoctagon'
@Stream.register_api(staticmethod)
class from_textfile(Source):
""" Stream data from a text file
Parameters
----------
f: file or string
poll_interval: Number
Interval to poll file for new data in seconds
delimiter: str ("\n")
Character(s) to use to split the data into parts
start: bool (False)
Whether to start running immediately; otherwise call stream.start()
explicitly.
Example
-------
>>> source = Stream.from_textfile('myfile.json') # doctest: +SKIP
def __init__(self, **kwargs):
self.stopped = True
super(Source, self).__init__(**kwargs)
self.server.listen(self.port)
def start(self):
if self.stopped:
self.loop.add_callback(self._start_server)
self.stopped = False
def stop(self):
if not self.stopped:
self.server.stop()
self.server = None
self.stopped = True
@Stream.register_api(staticmethod)
class from_http_server(Source):
"""Listen for HTTP POSTs on given port
Each connection will emit one event, containing the body data of
the request
Parameters
----------
port : int
The port to listen on
path : str
Specific path to listen on. Can be regex, but content is not used.
start : bool
Whether to immediately startup the server. Usually you want to connect
downstream nodes first, and then call ``.start()``.
server_kwargs : dict or None
If given, set of further parameters to pass on to HTTPServer