Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def recursive_hash(coll, prev_hash=None):
"""
Function that recursively hashes collections of objects.
"""
if prev_hash is None:
prev_hash = []
if (not isinstance(coll, list) and not isinstance(coll, dict)
and not isinstance(coll, tuple) and not isinstance(coll, set)):
if callable(coll):
prev_hash.append(joblib_hash(joblib_getsource(coll)[0]))
else:
prev_hash.append(joblib_hash(coll))
elif isinstance(coll, dict):
# Special case for dicts: inspect both keys and values
for (key, val) in coll.items():
recursive_hash(key, prev_hash)
recursive_hash(val, prev_hash)
else:
for val in coll:
recursive_hash(val, prev_hash)
return prev_hash
def clear(self, warn=True):
""" Empty the function's cache.
"""
func_dir = self._get_func_dir(mkdir=False)
if self._verbose and warn:
self.warn("Clearing cache %s" % func_dir)
if os.path.exists(func_dir):
shutil.rmtree(func_dir, ignore_errors=True)
mkdirp(func_dir)
func_code, _, first_line = get_func_code(self.func)
func_code_file = os.path.join(func_dir, 'func_code.py')
self._write_func_code(func_code_file, func_code, first_line)
def clear(self, warn=True):
"""Empty the function's cache."""
func_id = _build_func_identifier(self.func)
if self._verbose > 0 and warn:
self.warn("Clearing function cache identified by %s" % func_id)
self.store_backend.clear_path([func_id, ])
func_code, _, first_line = get_func_code(self.func)
self._write_func_code(func_code, first_line)
Calculates and returns the hash corresponding to a dask task
``task`` using the hashes of its dependencies, input arguments
and source code of the function associated to the task. Any
available hashes are passed in ``keyhashmap``.
"""
# assert task is not None
fnhash_list = []
arghash_list = []
dephash_list = []
if isinstance(task, tuple):
# A tuple would correspond to a delayed function
for taskelem in task:
if callable(taskelem):
# function
sourcecode = joblib_getsource(taskelem)[0]
fnhash_list.append(joblib_hash(sourcecode))
else:
try:
# Assume a dask graph key.
dephash_list.append(keyhashmap[taskelem])
except Exception:
# Else hash the object.
arghash_list.extend(recursive_hash(taskelem))
else:
try:
# Assume a dask graph key.
dephash_list.append(keyhashmap[task])
except Exception:
# Else hash the object.
arghash_list.extend(recursive_hash(task))
def _subs_tasks_with_src(self, computation: Any) -> Any:
"""Replace task functions by their source code."""
if type(computation) is list:
# This computation is a list of computations.
computation = [
self._subs_tasks_with_src(x) for x in computation]
elif dask.core.istask(computation):
# This computation is a task.
src = joblib.func_inspect.get_func_code(computation[0])[0]
computation = (src,) + computation[1:]
return computation
try:
if self.func in _FUNCTION_HASHES:
# We use as an identifier the id of the function and its
# hash. This is more likely to falsely change than have hash
# collisions, thus we are on the safe side.
func_hash = self._hash_func()
if func_hash == _FUNCTION_HASHES[self.func]:
return True
except TypeError:
# Some callables are not hashable
pass
# Here, we go through some effort to be robust to dynamically
# changing code and collision. We cannot inspect.getsource
# because it is not reliable when using IPython's magic "%run".
func_code, source_file, first_line = get_func_code(self.func)
func_id = _build_func_identifier(self.func)
try:
old_func_code, old_first_line =\
extract_first_line(
self.store_backend.get_cached_func_code([func_id]))
except (IOError, OSError): # some backend can also raise OSError
self._write_func_code(func_code, first_line)
return False
if old_func_code == func_code:
return True
# We have differing code, is this because we are referring to
# different functions, or because the function we are referring to has
# changed?
def _check_previous_func_code(self, stacklevel=2):
"""
stacklevel is the depth a which this function is called, to
issue useful warnings to the user.
"""
# Here, we go through some effort to be robust to dynamically
# changing code and collision. We cannot inspect.getsource
# because it is not reliable when using IPython's magic "%run".
func_code, source_file, first_line = get_func_code(self.func)
func_dir = self._get_func_dir()
func_code_file = os.path.join(func_dir, 'func_code.py')
try:
with open(func_code_file) as infile:
old_func_code, old_first_line = \
extract_first_line(infile.read())
except IOError:
self._write_func_code(func_code_file, func_code, first_line)
return False
if old_func_code == func_code:
return True
# We have differing code, is this because we are refering to
# differing functions, or because the function we are refering as
# changed?