Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# repartition does not have any state
pass
def initialize(self, config, context):
self.logger.debug("RepartitionBolt's Component-specific config: \n%s" % str(config))
self.processed = 0
self.emitted = 0
def process(self, tup):
self.emit([tup.values[0]], stream='output')
self.processed += 1
self.emitted += 1
self.ack(tup)
# pylint: disable=protected-access,deprecated-method
class RepartitionStreamlet(Streamlet):
"""RepartitionStreamlet"""
def __init__(self, num_partitions, repartition_function, parent):
super(RepartitionStreamlet, self).__init__()
if not callable(repartition_function):
raise RuntimeError("Repartition function has to be callable")
if len(inspect.getargspec(repartition_function)) != 2:
raise RuntimeError("Repartition function should take 2 arguments")
if not isinstance(parent, Streamlet):
raise RuntimeError("Parent of FlatMap Streamlet has to be a Streamlet")
self._parent = parent
self._repartition_function = repartition_function
self.set_num_partitions(num_partitions)
# pylint: disable=no-self-use
def _calculate_inputs(self):
return {GlobalStreamId(self._parent.get_name(), self._parent._output) :
class ReduceGrouping(ICustomGrouping):
def prepare(self, context, component, stream, target_tasks):
self.target_tasks = target_tasks
def choose_tasks(self, values):
assert isinstance(values, list) and len(values) == 1
userdata = values[0]
if not isinstance(userdata, collections.Iterable) or len(userdata) != 2:
raise RuntimeError("Tuples going to reduce must be iterable of length 2")
# only emits to the first task id
hashvalue = hash(userdata[0])
target_index = hashvalue % len(self.target_tasks)
return [self.target_tasks[target_index]]
# pylint: disable=protected-access
class ReduceByWindowStreamlet(Streamlet):
"""ReduceByWindowStreamlet"""
def __init__(self, window_config, reduce_function, parent):
super(ReduceByWindowStreamlet, self).__init__()
if not isinstance(window_config, WindowConfig):
raise RuntimeError("window config has to be of type WindowConfig")
if not callable(reduce_function):
raise RuntimeError("ReduceByWindow function has to be callable")
if not isinstance(parent, Streamlet):
raise RuntimeError("Parent of FlatMap Streamlet has to be a Streamlet")
self._parent = parent
self._window_config = window_config
self._reduce_function = reduce_function
self.set_num_partitions(parent.get_num_partitions())
def _calculate_inputs(self):
return {GlobalStreamId(self._parent.get_name(), self._parent._output) :
def pre_save(self, checkpoint_id):
# logBolt does not have any state
pass
def initialize(self, config, context):
self.logger.debug("LogBolt's Component-specific config: \n%s" % str(config))
self.processed = 0
def process(self, tup):
self.logger.info(str(tup.values[0]))
self.processed += 1
self.ack(tup)
# pylint: disable=protected-access
class LogStreamlet(Streamlet):
"""LogStreamlet"""
def __init__(self, parent):
super(LogStreamlet, self).__init__()
if not isinstance(parent, Streamlet):
raise RuntimeError("Parent of Log Streamlet has to be a Streamlet")
self._parent = parent
self.set_num_partitions(parent.get_num_partitions())
def _calculate_inputs(self):
return {GlobalStreamId(self._parent.get_name(), self._parent._output) :
Grouping.SHUFFLE}
def _build_this(self, builder, stage_names):
if not self.get_name():
self.set_name(self._default_stage_name_calculator("log", stage_names))
if self.get_name() in stage_names:
def __init__(self, parent):
super(ConsumeStreamlet, self).__init__()
if not isinstance(parent, Streamlet):
raise RuntimeError("Parent of Consume Streamlet has to be a Streamlet")
self._parent = parent
self.set_num_partitions(parent.get_num_partitions())
class ReduceGrouping(ICustomGrouping):
def prepare(self, context, component, stream, target_tasks):
self.target_tasks = target_tasks
def choose_tasks(self, values):
assert isinstance(values, list) and len(values) == 1
userdata = values[0]
if not isinstance(userdata, collections.Iterable) or len(userdata) != 2:
raise RuntimeError("Tuples going to reduce must be iterable of length 2")
# only emits to the first task id
hashvalue = hash(userdata[0])
target_index = hashvalue % len(self.target_tasks)
return [self.target_tasks[target_index]]
# pylint: disable=protected-access
class ReduceByKeyAndWindowStreamlet(Streamlet):
"""ReduceByKeyAndWindowStreamlet"""
def __init__(self, window_config, reduce_function, parent):
super(ReduceByKeyAndWindowStreamlet, self).__init__()
if not isinstance(window_config, WindowConfig):
raise RuntimeError("window config has to be a WindowConfig")
if not callable(reduce_function):
raise RuntimeError("ReduceByKeyAndWindow function has to be callable")
if not isinstance(parent, Streamlet):
raise RuntimeError("Parent of Filter Streamlet has to be a Streamlet")
self._window_config = window_config
self._reduce_function = reduce_function
self._parent = parent
def _calculate_inputs(self):
return {GlobalStreamId(self._parent.get_name(), self._parent._output) :
Grouping.custom("heronpy.streamlet.impl.reducebykeyandwindowbolt.ReduceGrouping")}
def __init__(self, transform_operator, parent):
super(TransformStreamlet, self).__init__()
if not isinstance(transform_operator, TransformOperator):
raise RuntimeError("Transform Operator has to be a TransformOperator")
if not isinstance(parent, Streamlet):
raise RuntimeError("parent of Transform Streamlet has to be a Streamlet")
self._transform_operator = transform_operator
self._parent = parent
self.set_num_partitions(parent.get_num_partitions())
def __init__(self, parent):
super(LogStreamlet, self).__init__()
if not isinstance(parent, Streamlet):
raise RuntimeError("Parent of Log Streamlet has to be a Streamlet")
self._parent = parent
self.set_num_partitions(parent.get_num_partitions())
def initialize(self, config, context):
self.logger.debug("ConsumeBolt's Component-specific config: \n%s" % str(config))
self.processed = 0
if ConsumeBolt.CONSUMEFUNCTION in config:
self._consume_function = config[ConsumeBolt.CONSUMEFUNCTION]
else:
raise RuntimeError("ConsumeBolt needs to be passed consume function")
def process(self, tup):
self._consume_function(tup.values[0])
self.processed += 1
self.ack(tup)
# pylint: disable=protected-access
class ConsumeStreamlet(Streamlet):
"""ConsumeStreamlet"""
def __init__(self, parent):
super(ConsumeStreamlet, self).__init__()
if not isinstance(parent, Streamlet):
raise RuntimeError("Parent of Consume Streamlet has to be a Streamlet")
self._parent = parent
self.set_num_partitions(parent.get_num_partitions())
def _calculate_inputs(self):
return {GlobalStreamId(self._parent.get_name(), self._parent._output) :
Grouping.SHUFFLE}
def _build_this(self, builder, stage_names):
if not self.get_name():
self.set_name(self._default_stage_name_calculator("consume", stage_names))
if self.get_name() in stage_names:
def __init__(self, filter_function, parent):
super(FilterStreamlet, self).__init__()
if not callable(filter_function):
raise RuntimeError("Filter function has to be callable")
if not isinstance(parent, Streamlet):
raise RuntimeError("Parent of Filter Streamlet has to be a Streamlet")
self._parent = parent
self._filter_function = filter_function
self.set_num_partitions(parent.get_num_partitions())
class JoinGrouping(ICustomGrouping):
def prepare(self, context, component, stream, target_tasks):
self.target_tasks = target_tasks
def choose_tasks(self, values):
assert isinstance(values, list) and len(values) == 1
userdata = values[0]
if not isinstance(userdata, collections.Iterable) or len(userdata) != 2:
raise RuntimeError("Tuples going to join must be iterable of length 2")
# only emits to the first task id
hashvalue = hash(userdata[0])
target_index = hashvalue % len(self.target_tasks)
return [self.target_tasks[target_index]]
# pylint: disable=protected-access
class JoinStreamlet(Streamlet):
"""JoinStreamlet"""
def __init__(self, join_type, window_config, join_function, left, right):
super(JoinStreamlet, self).__init__()
if not join_type in [JoinBolt.INNER, JoinBolt.OUTER_RIGHT, JoinBolt.OUTER_LEFT]:
raise RuntimeError("join type has to be of one of inner, outer, left")
if not isinstance(window_config, WindowConfig):
raise RuntimeError("window config has to be of type WindowConfig")
if not callable(join_function):
raise RuntimeError("Join function has to be callable")
if not isinstance(left, Streamlet):
raise RuntimeError("Parent of Join has to be a Streamlet")
if not isinstance(right, Streamlet):
raise RuntimeError("Parent of Join has to be a Streamlet")
self._join_type = join_type
self._window_config = window_config
self._join_function = join_function