How to use the joblib.Memory function in joblib

To help you get started, we’ve selected a few joblib examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github arthurmensch / modl / examples / TSP / update_dict_reduction_same_batch.py View on Github external
def run(n_jobs=1, n_epochs=10):
    # Exp def
    redundancies = [25]
    global_exp = dict(n_components=100, alpha=1,
                      l1_ratio=0,
                      pen_l1_ratio=.9,
                      learning_rate=0.9,
                      Dx_agg='average',
                      G_agg='average',
                      AB_agg='full')
    ref_batch_size = 200
    exps = [dict(batch_size=int(ref_batch_size),
                 reduction=reduction)
            for reduction in np.linspace(5, 5, 1)]

    mem = Memory(cachedir=expanduser('~/cache'))
    face = misc.face(gray=True)

    # Convert from uint8 representation with values between 0 and 255 to
    # a floating point representation with values between 0 and 1.
    face = face / 255

    height, width = face.shape

    # Distort the right half of the image
    print('Distorting image...')
    distorted = face.copy()
    # distorted[:, width // 2:] += 0.075 * np.random.randn(height, width // 2)

    # Extract all reference patches from the left half of the image
    print('Extracting reference patches...')
    t0 = time()
github arthurmensch / cogspaces / exps / scripts / find_init.py View on Github external
import numpy as np
from joblib import Memory
from nilearn.input_data import NiftiMasker
from numpy.linalg import lstsq
from os.path import expanduser, join

from cogspaces.datasets.dictionaries import fetch_atlas_modl
from cogspaces.datasets.utils import fetch_mask, get_output_dir

modl_atlas = fetch_atlas_modl()
mask = fetch_mask()['hcp']
dict_512 = modl_atlas['components512']
dict_128 = modl_atlas['components128']

mem = Memory(cachedir=expanduser('~/cache'))
masker = NiftiMasker(mask_img=mask, memory=mem).fit()
dict_512 = masker.transform(dict_512)

keep = np.load('keep.npy')
dict_128 = masker.transform(dict_128)

dict_512 = dict_512[keep]

loadings, _, _, _ = lstsq(dict_512.T, dict_128.T)
loadings = loadings.T
loadings_ = np.zeros((128, 512))
loadings_[:, keep] = loadings
loadings = loadings_
output_dir = get_output_dir()
np.save(join(modl_atlas['data_dir'], 'loadings_128_gm_masked.npy'), loadings)
github edraizen / molmimic / molmimic / parsers / APBS.py View on Github external
import os, sys
from tempfile import mkdtemp
import subprocess

pdb2pqr_src = os.path.join(os.path.dirname(subprocess.check_output(["which", "pdb2pqr"])), "src")
sys.path.append(pdb2pqr_src)

from psize import Psize
from joblib import Memory

from molmimic.util import silence_stdout, silence_stderr

cachedir = mkdtemp()
memory = Memory(cachedir=cachedir, verbose=0)

@memory.cache
def run_apbs(pqr_file):
	"""Run APBS. Calculates correct size using Psize and defualt from Chimera
	"""
	file_prefix = os.path.splitext(pqr_file)[0]
	input_file = "{}.apbs_input".format(file_prefix)
	output_prefix = "{}.apbs_output".format(file_prefix)

	ps = Psize()
	ps.runPsize(pqr_file)
	cglen = "{:.2f} {:.2f} {:.2f}".format(*ps.getCoarseGridDims())
	fglen = "{:.2f} {:.2f} {:.2f}".format(*ps.getFineGridDims())
	dime = "{:d} {:d} {:d}".format(*ps.getFineGridPoints())

	with open(input_file, "w") as f:
github arthurmensch / modl / examples / images / data.py View on Github external
def load_data(source, scale, gray):
    return load_images(source, scale=scale,
                       gray=gray, memory=Memory(cachedir='None'))
github neurospin / pypreprocess / pypreprocess / io_utils.py View on Github external
-------
    returns nifit image object

    """

    if isinstance(threeD_img_filenames, str):
        return nibabel.load(threeD_img_filenames)

    if output_dir is None:
        output_dir = tempfile.mkdtemp()

    # prepare for smart caching
    merge_cache_dir = os.path.join(output_dir, "merge")
    if not os.path.exists(merge_cache_dir):
        os.makedirs(merge_cache_dir)
    merge_mem = joblib.Memory(cachedir=merge_cache_dir, verbose=5)

    # merging proper
    fourD_img = merge_mem.cache(nibabel.concat_images)(threeD_img_filenames,
                                                       check_affines=False
                                                       )

    # sanity
    if len(fourD_img.shape) == 5:
        fourD_img = nibabel.Nifti1Image(
            fourD_img.get_data()[..., ..., ..., 0, ...],
            fourD_img.get_affine())

    # save image to disk
    if not output_filename is None:
        merge_mem.cache(nibabel.save)(fourD_img, output_filename)
github joblib / joblib / examples / nested_parallel_memory.py View on Github external
results = [data_processing_mean(data, col) for col in range(data.shape[1])]
stop = time.time()

print('\nSequential processing')
print('Elapsed time for the entire processing: {:.2f} s'
      .format(stop - start))

###############################################################################
# ``costly_compute`` is expensive to compute and it is used as an intermediate
# step in ``data_processing_mean``. Therefore, it is interesting to store the
# intermediate results from ``costly_compute`` using :class:`joblib.Memory`.

from joblib import Memory

location = './cachedir'
memory = Memory(location, verbose=0)
costly_compute_cached = memory.cache(costly_compute)


###############################################################################
# Now, we define ``data_processing_mean_using_cache`` which benefits from the
# cache by calling ``costly_compute_cached``

def data_processing_mean_using_cache(data, column):
    """Compute the mean of a column."""
    return costly_compute_cached(data, column).mean()


###############################################################################
# Then, we execute the same processing in parallel and caching the intermediate
# results.
github mne-tools / mne-python / doc / sphinxext / cited_mne.py View on Github external
import time
import random
import requests

import numpy as np
from joblib import Memory
from BeautifulSoup import BeautifulSoup

from mne.externals.tempita import Template
from mne.commands.utils import get_optparser

# cache to avoid making too many calls to Google Scholar
cachedir = 'cachedir'
if not os.path.exists(cachedir):
    os.mkdir(cachedir)
mem = Memory(cachedir=cachedir, verbose=2)

UA = ('Mozilla/5.0 (X11; U; FreeBSD i386; en-US; rv:1.9.2.9) '
      'Gecko/20100913 Firefox/3.6.9')

# ##### Templates for citations #####
html = (u""":orphan:

.. _cited:

Publications by users
=====================

Papers citing MNE (%d) as extracted from Google Scholar (on %s).

""")
github neurospin / pypreprocess / pypreprocess / subject_data.py View on Github external
def _niigz2nii(self):
        """
        Convert .nii.gz to .nii (crucial for SPM).

        """
        if self.scratch is None:
            self.scratch = self.output_dir
        if self.caching:
            cache_dir = os.path.join(self.scratch, 'cache_dir')
            mem = Memory(cache_dir, verbose=100)
        else:
            mem = Memory(None, verbose=0)
        self._sanitize_session_output_dirs()
        self._sanitize_session_scratch_dirs()
        if None not in [self.func, self.n_sessions,
                        self.session_scratch_dirs]:
            self.func = [mem.cache(do_niigz2nii)(
                self.func[sess], output_dir=self.session_scratch_dirs[sess])
                for sess in range(self.n_sessions)]
        if self.anat is not None:
            self.anat = mem.cache(do_niigz2nii)(
                self.anat, output_dir=self.anat_scratch_dir)
github PaulEmmanuelSotir / NYC_TaxiTripDuration / feature_engineering.py View on Github external
def load_data(datadir, trainset, testset, valid_size, output_size, embed_discrete_features=False, max_distinct_values=None, cache_read_only=False):
    if cache_read_only:
        dest = '/output/cache'
        shutil.copytree(datadir, dest)
        datadir = dest
    memory = Memory(cachedir=os.path.join(datadir, 'cache'))

    @memory.cache(ignore=['datadir'])
    def _cached_light_load_data(datadir, trainset, testset, valid_size, embed_discrete_features, max_distinct_values):
        features = ['vendor_id', 'passenger_count', 'pickup_latitude', 'pickup_longitude', 'dropoff_latitude', 'dropoff_longitude']
        df_all = pd.concat((pd.read_csv(os.path.join(datadir, trainset)), pd.read_csv(os.path.join(datadir, testset))))
        df_all['pickup_datetime'] = df_all['pickup_datetime'].apply(pd.Timestamp)
        df_all['dropoff_datetime'] = df_all['dropoff_datetime'].apply(pd.Timestamp)
        df_all['trip_duration_log'] = np.log(df_all['trip_duration'] + 1)

        def _add_feature(name, value):
            features.append(name)
            df_all[name] = value

        _, indices = np.unique(df_all['id'], return_inverse=True)
        _add_feature('id_idx', indices)
        # Date time features
github dblalock / bolt / experiments / python / product_quantize.py View on Github external
#!/usr/bin/env python

import time
import numpy as np

from .utils import kmeans, orthonormalize_rows, random_rotation

from joblib import Memory
_memory = Memory('.', verbose=0)


# ================================================================ PQ

@_memory.cache
def learn_pq(X, ncentroids, nsubvects, subvect_len, max_kmeans_iters=16):
    codebooks = np.empty((ncentroids, nsubvects, subvect_len))
    assignments = np.empty((X.shape[0], nsubvects), dtype=np.int)

    # print "codebooks shape: ", codebooks.shape

    for i in range(nsubvects):
        start_col = i * subvect_len
        end_col = start_col + subvect_len
        X_in = X[:, start_col:end_col]
        centroids, labels = kmeans(X_in, ncentroids, max_iter=max_kmeans_iters)