How to use the joblib.hash function in joblib

To help you get started, we’ve selected a few joblib examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github VinF / deer / examples / test_CRAR / run_simple_maze.py View on Github external
internal_dim=2)
    
    test_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 1.)

    # --- Instantiate agent ---
    agent = NeuralAgent(
        env,
        learning_algo,
        parameters.replay_memory_size,
        max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))),
        parameters.batch_size,
        rng,
        test_policy=test_policy)

    # --- Create unique filename for FindBestController ---
    h = hash(vars(parameters), hash_name="sha1")
    fname = "test_" + h
    print("The parameters hash is: {}".format(h))
    print("The parameters are: {}".format(parameters))

    # As for the discount factor and the learning rate, one can update periodically the parameter of the epsilon-greedy
    # policy implemented by the agent. This controllers has a bit more capabilities, as it allows one to choose more
    # precisely when to update epsilon: after every X action, episode or epoch. This parameter can also be reset every
    # episode or epoch (or never, hence the resetEvery='none').
    agent.attach(bc.EpsilonController(
        initial_e=parameters.epsilon_start,
        e_decays=parameters.epsilon_decay,
        e_min=parameters.epsilon_min,
        evaluate_on='action',
        periodicity=1,
        reset_every='none'))
github scikit-learn / scikit-learn / sklearn / utils / estimator_checks.py View on Github external
# Fit the model
    estimator.fit(X, y)

    # Compare the state of the model parameters with the original parameters
    new_params = estimator.get_params()
    for param_name, original_value in original_params.items():
        new_value = new_params[param_name]

        # We should never change or mutate the internal state of input
        # parameters by default. To check this we use the joblib.hash function
        # that introspects recursively any subobjects to compute a checksum.
        # The only exception to this rule of immutable constructor parameters
        # is possible RandomState instance but in this check we explicitly
        # fixed the random_state params recursively to be integer seeds.
        assert joblib.hash(new_value) == joblib.hash(original_value), (
            "Estimator %s should not change or mutate "
            " the parameter %s from %s to %s during fit."
            % (name, param_name, original_value, new_value))
github m2dsupsdlclass / lectures-labs / labs / 03_neural_recsys / movielens_paramsearch.py View on Github external
def _evaluate_one(**kwargs):
    params = DEFAULT_PARAMS.copy()
    params.update(kwargs)
    params_digest = joblib.hash(params)

    results = params.copy()
    results['digest'] = params_digest
    results_folder = Path('results')
    results_folder.mkdir(exist_ok=True)
    folder = results_folder.joinpath(params_digest)
    folder.mkdir(exist_ok=True)
    if len(list(folder.glob("*/results.json"))) == 4:
        print('Skipping')

    split_idx = params.get('split_idx', 0)
    print("Evaluating model on split #%d:" % split_idx)
    pprint(params)

    ratings_train, ratings_test = train_test_split(
        all_ratings, test_size=0.2, random_state=split_idx)
github hackalog / cookiecutter-easydata / {{ cookiecutter.repo_name }} / {{ cookiecutter.module_name }} / data / datasets.py View on Github external
exclude_list: list or None
            List of attributes to skip.
            if None, skips ['metadata']

        hash_type: {'sha1', 'md5'}
            Algorithm to use for hashing. Must be valid joblib hash type
        """
        if exclude_list is None:
            exclude_list = ['metadata']

        ret = {}
        hashes = {}
        for key, value in self.items():
            if key in exclude_list:
                continue
            data_hash = joblib.hash(value, hash_name=hash_type)
            hashes[key] = f"{hash_type}:{data_hash}"
        ret["hashes"] = hashes
        return ret
github radix-ai / graphchain / graphchain / funcutils.py View on Github external
dephash_list.append(keyhashmap[taskelem])
                except Exception:
                    # Else hash the object.
                    arghash_list.extend(recursive_hash(taskelem))
    else:
        try:
            # Assume a dask graph key.
            dephash_list.append(keyhashmap[task])
        except Exception:
            # Else hash the object.
            arghash_list.extend(recursive_hash(task))

    # Calculate subhashes
    src_hash = joblib_hash("".join(fnhash_list))
    arg_hash = joblib_hash("".join(arghash_list))
    dep_hash = joblib_hash("".join(dephash_list))

    subhashes = {"src": src_hash, "arg": arg_hash, "dep": dep_hash}
    objhash = joblib_hash(src_hash + arg_hash + dep_hash)
    return objhash, subhashes
github hackalog / cookiecutter-easydata / {{ cookiecutter.repo_name }} / {{ cookiecutter.module_name }} / data / dset.py View on Github external
exclude_list: list or None
            List of attributes to skip.
            if None, skips ['metadata']

        hash_type: {'sha1', 'md5', 'sha256'}
            Algorithm to use for hashing
        """
        if exclude_list is None:
            exclude_list = ['metadata']

        ret = {'hash_type': hash_type}
        for key, value in self.items():
            if key in exclude_list:
                continue
            ret[f"{key}_hash"] = joblib.hash(value, hash_name=hash_type)
        return ret
github NeuralEnsemble / python-neo / neo / rawio / baserawio.py View on Github external
else:
                dirname = os.path.expanduser('~/.config/neo_rawio_cache')
            dirname = os.path.join(dirname, self.__class__.__name__)

            if not os.path.exists(dirname):
                os.makedirs(dirname)
        elif cache_path == 'same_as_resource':
            dirname = os.path.dirname(ressource_name)
        else:
            assert os.path.exists(cache_path), \
                'cache_path do not exists use "home" or "same_as_file" to make this auto'

        # the hash of the ressource (dir of file) is done with filename+datetime
        # TODO make something more sofisticated when rawmode='one-dir' that use all filename and datetime
        d = dict(ressource_name=ressource_name, mtime=os.path.getmtime(ressource_name))
        hash = joblib.hash(d, hash_name='md5')

        # name is compund by the real_n,ame and the hash
        name = '{}_{}'.format(os.path.basename(ressource_name), hash)
        self.cache_filename = os.path.join(dirname, name)

        if os.path.exists(self.cache_filename):
            self.logger.warning('Use existing cache file {}'.format(self.cache_filename))
            self._cache = joblib.load(self.cache_filename)
        else:
            self.logger.warning('Create cache file {}'.format(self.cache_filename))
            self._cache = {}
            self.dump_cache()
github hackalog / cookiecutter-easydata / {{ cookiecutter.repo_name }} / {{ cookiecutter.module_name }} / models / predict.py View on Github external
output_dataset = f'{model_name}_exp_{dataset_name}_{run_number}'

    os.makedirs(output_path, exist_ok=True)

    dataset = Dataset.load(dataset_name)

    model, model_meta = load_model(model_name)

    # add experiment metadata
    experiment = {
        'model_name': model_name,
        'dataset_name': dataset_name,
        'run_number': run_number,
        'hash_type': hash_type,
        'input_data_hash': joblib.hash(dataset.data, hash_name=hash_type),
        'input_target_hash': joblib.hash(dataset.target, hash_name=hash_type),
        'model_hash': joblib.hash(model, hash_name=hash_type),
    }
    logger.debug(f"Predict: Applying {model_name} to {dataset_name}")
    metadata_fq = output_path / f'{output_dataset}.metadata'

    if metadata_fq.exists() and force is False:
        cached_metadata = Dataset.load(output_dataset, data_path=output_path,
                                       metadata_only=True)
        if experiment.items() <= cached_metadata['experiment'].items():
            logger.info("Experiment has already been run. Returning Cached Result")
            return Dataset.load(output_dataset, data_path=output_path)
        else:
            raise Exception(f'An Experiment with name {output_dataset} exists already, '
                            'but metadata has changed. '
                            'Use `force=True` to overwrite, or change one of '
                            '`run_number` or `output_dataset`')
github sdvillal / jagged / jagged / benchmarks / storage_bench.py View on Github external
# pandify
    start = time()
    columns = pd.Index('%d' % col for col in range(jagged.shape[1]))
    roundtripped = [pd.DataFrame(data, copy=False, columns=columns) for data in roundtripped]
    measurements['pandify_time'] = (time() - start)

    # sum (will give a small idea on the overhead of laziness / mmap)
    measurements['before_sum_mem'] = available_ram()
    start = time()
    measurements['suma'] = float(np.sum([np.nansum(df['6']) for df in roundtripped]))
    measurements['sum_time'] = time() - start
    measurements['after_sum_mem'] = available_ram()

    # get a checksum from the whole collection
    start = time()
    measurements['checksum'] = joblib.hash(tuple(joblib.hash(df) for df in roundtripped))
    measurements['checksum_time'] = time() - start

    return measurements
github hackalog / cookiecutter-easydata / {{ cookiecutter.repo_name }} / {{ cookiecutter.module_name }} / data / datasets.py View on Github external
adding or removing keys as specified.

        hash_type: {'md5', 'sha1'}
            Hash algorithm to use
        ignore: list
            list of keys to ignore
        kwargs:
            key/value pairs to add before hashing
        """
        if ignore is None:
            ignore = ['download_dir']
        my_dict = {**self.to_dict(), **kwargs}
        for key in ignore:
            my_dict.pop(key, None)

        return joblib.hash(my_dict, hash_name=hash_type)