How to use the pytools.memoize function in pytools

To help you get started, we’ve selected a few pytools examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github inducer / pycuda / pycuda / gpuarray.py View on Github external
@memoize
def _splay_backend(n, dev):
    # heavily modified from cublas
    from pycuda.tools import DeviceData
    devdata = DeviceData(dev)

    min_threads = devdata.warp_size
    max_threads = 128
    max_blocks = 4 * devdata.thread_blocks_per_mp \
            * dev.get_attribute(drv.device_attribute.MULTIPROCESSOR_COUNT)

    if n < min_threads:
        block_count = 1
        threads_per_block = min_threads
    elif n < (max_blocks * min_threads):
        block_count = (n + min_threads - 1) // min_threads
        threads_per_block = min_threads
github inducer / pycuda / examples / transpose.py View on Github external
@memoize
def _get_transpose_kernel():
    mod = SourceModule("""
      #define BLOCK_SIZE %(block_size)d
      #define A_BLOCK_STRIDE (BLOCK_SIZE*a_width)
      #define A_T_BLOCK_STRIDE (BLOCK_SIZE*a_height)

      __global__ void transpose(float *A_t, float *A, int a_width, int a_height)
      {
        // Base indices in A and A_t
        int base_idx_a   = blockIdx.x*BLOCK_SIZE + blockIdx.y*A_BLOCK_STRIDE;
        int base_idx_a_t = blockIdx.y*BLOCK_SIZE + blockIdx.x*A_T_BLOCK_STRIDE;

        // Global indices in A and A_t
        int glob_idx_a   = base_idx_a + threadIdx.x + a_width*threadIdx.y;
        int glob_idx_a_t = base_idx_a_t + threadIdx.x + a_height*threadIdx.y;
github inducer / loopy / loopy / isl_helpers.py View on Github external
    @memoize
    def is_bounded(set):
        assert set.dim(dim_type.set) == 0
        return (set
                .move_dims(dim_type.set, 0,
                    dim_type.param, 0, set.dim(dim_type.param))
                .is_bounded())
github pypr / pysph / pysph / base / tree / point_octree.py View on Github external
@memoize
def _get_unique_cids_kernel(ctx):
    return GenericScanKernel(
        ctx, np.int32, neutral="0",
        arguments=r"""int *cids, int *unique_cids_map,
                int *unique_cids, int *unique_cids_count""",
        input_expr="(i == 0 || cids[i] != cids[i-1])",
        scan_expr="a + b",
        output_statement=r"""
            if (item != prev_item) {
github inducer / pymbolic / pymbolic / interop / maxima.py View on Github external
@pytools.memoize
def _cached_eval_expr_with_setup(assignments, expr):
    global _kernel_instance
    if _kernel_instance is None:
        _kernel_instance = MaximaKernel()

    return _kernel_instance.clean_eval_expr_with_setup(assignments, expr)
github laanwj / dwarf_to_c / src / pytools / debug.py View on Github external
@memoize
def get_run_debug_directory():
    def creator(name):
        from os import mkdir
        mkdir(name)
        return name

    return make_unique_filesystem_object("run-debug", creator=creator)
github lebedov / scikit-cuda / skcuda / misc.py View on Github external
@memoize
def select_block_grid_sizes(dev, data_shape, threads_per_block=None):
    """
    Determine CUDA block and grid dimensions given device constraints.

    Determine the CUDA block and grid dimensions allowed by a GPU
    device that are sufficient for processing every element of an
    array in a separate thread.

    Parameters
    ----------
    d : pycuda.driver.Device
        Device object to be used.
    data_shape : tuple
        Shape of input data array. Must be of length 2.
    threads_per_block : int, optional
        Number of threads to execute in each block. If this is None,
github pypr / pysph / pysph / cpy / jit.py View on Github external
    @memoize(key=kernel_cache_key_args)
    def _generate_kernel(self, *args):
        if self.func is not None:
            arg_types = self.get_type_info_from_args(*args)
            helper = AnnotationHelper(self.func, arg_types)
            helper.annotate()
            self.func = helper.func
        return self._generate()
github pypr / pysph / pysph / base / gpu_helper_kernels.py View on Github external
@memoize(key=lambda *args: tuple(args))
def get_scan(inp_f, out_f, dtype, backend):
    return Scan(input=inp_f, output=out_f, dtype=dtype,
                backend=backend)
github pypr / pysph / pysph / cpy / jit.py View on Github external
    @memoize(key=kernel_cache_key_kwargs, use_kwargs=True)
    def _generate_kernel(self, **kwargs):
        if self.input_func is not None:
            arg_types = self.get_type_info_from_kwargs(
                self.input_func, **kwargs)
            arg_types['return_'] = dtype_to_knowntype(self.dtype)
            helper = AnnotationHelper(self.input_func, arg_types)
            helper.annotate()
            self.input_func = helper.func

        if self.output_func is not None:
            arg_types = self.get_type_info_from_kwargs(
                self.output_func, **kwargs)
            helper = AnnotationHelper(self.output_func, arg_types)
            helper.annotate()
            self.output_func = helper.func