Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
self.memory_target_fraction or self.memory_spill_fraction
):
try:
from zict import Buffer, File, Func
except ImportError:
raise ImportError(
"Please `python -m pip install zict` for spill-to-disk workers"
)
path = os.path.join(self.local_directory, "storage")
storage = Func(
partial(serialize_bytelist, on_error="raise"),
deserialize_bytes,
File(path),
)
target = int(float(self.memory_limit) * self.memory_target_fraction)
self.data = Buffer({}, storage, target, weight)
self.data.memory = self.data.fast
self.data.disk = self.data.slow
else:
self.data = dict()
self.actors = {}
self.loop = loop or IOLoop.current()
self.reconnect = reconnect
self.executor = executor or ThreadPoolExecutor(
self.nthreads, thread_name_prefix="Dask-Worker-Threads'"
)
self.actor_executor = ThreadPoolExecutor(
1, thread_name_prefix="Dask-Actor-Threads"
)
self.batched_stream = BatchedSend(interval="2ms", loop=self.loop)
self.name = name
def __init__(self, worker, target_memory, storage_path):
self.worker = worker
# Build exact chain that dask already provides,
# Fill local memory to target then spill to disk
file = zict.File(storage_path)
self.slow = zict.Func(partial(protocol.serialize_bytelist, on_error='raise'), protocol.deserialize_bytes, file)
self.fast = LocalStore()
self.l1 = zict.Buffer(self.fast, self.slow, target_memory, lambda x, y: safe_sizeof(y))
logger.info(f'Setting fast store memory limit {target_memory} bytes and disk spill path {storage_path}')
self.l2 = redis.client.Redis(REDIS_HOST, int(REDIS_PORT), int(REDIS_DB))
self.disk_func = Func(
functools.partial(serialize_bytelist, on_error="raise"),
deserialize_bytes,
File(self.disk_func_path),
)
if memory_limit == 0:
self.host_buffer = self.host_func
else:
self.host_buffer = Buffer(
self.host_func, self.disk_func, memory_limit, weight=weight
)
self.device_keys = set()
self.device_func = dict()
self.device_host_func = Func(device_to_host, host_to_device, self.host_buffer)
self.device_buffer = Buffer(
self.device_func, self.device_host_func, device_memory_limit, weight=weight
)
self.device = self.device_buffer.fast.d
self.host = self.host_buffer if memory_limit == 0 else self.host_buffer.fast.d
self.disk = None if memory_limit == 0 else self.host_buffer.slow.d
# For Worker compatibility only, where `fast` is host memory buffer
self.fast = self.host_buffer if memory_limit == 0 else self.host_buffer.fast
def setup(self, worker):
self.cache = Buffer(
fast={},
slow=Func(
dump=blosc.pack_array,
load=blosc.unpack_array,
d=Buffer(
fast={},
slow=LRU(
n=self._maxdisk,
d=File(os.path.join(worker.local_directory, 'cache')),
weight=lambda k, v: len(v),
),
n=self._maxcompressed,
weight=lambda k, v: len(v),
),
),
n=self._maxmem,
if not os.path.exists(local_directory):
os.makedirs(local_directory, exist_ok=True)
local_directory = os.path.join(local_directory, "dask-worker-space")
self.disk_func_path = os.path.join(local_directory, "storage")
self.host_func = dict()
self.disk_func = Func(
functools.partial(serialize_bytelist, on_error="raise"),
deserialize_bytes,
File(self.disk_func_path),
)
if memory_limit == 0:
self.host_buffer = self.host_func
else:
self.host_buffer = Buffer(
self.host_func, self.disk_func, memory_limit, weight=weight
)
self.device_keys = set()
self.device_func = dict()
self.device_host_func = Func(device_to_host, host_to_device, self.host_buffer)
self.device_buffer = Buffer(
self.device_func, self.device_host_func, device_memory_limit, weight=weight
)
self.device = self.device_buffer.fast.d
self.host = self.host_buffer if memory_limit == 0 else self.host_buffer.fast.d
self.disk = None if memory_limit == 0 else self.host_buffer.slow.d
# For Worker compatibility only, where `fast` is host memory buffer
self.fast = self.host_buffer if memory_limit == 0 else self.host_buffer.fast
def setup(self, worker):
self.cache = Buffer(
fast={},
slow=Func(
dump=blosc.pack_array,
load=blosc.unpack_array,
d=Buffer(
fast={},
slow=LRU(
n=self._maxdisk,
d=File(os.path.join(worker.local_directory, 'cache')),
weight=lambda k, v: len(v),
),
n=self._maxcompressed,
weight=lambda k, v: len(v),
),
),
n=self._maxmem,
weight=lambda k, v: v.nbytes,
)
self.lock = Lock()
self.hits = 0
self.misses = 0