How to use the pytools.product function in pytools

To help you get started, we’ve selected a few pytools examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github quantumsim / quantumsim / quantumsim / pauli_vectors / cuda.py View on Github external
dint = 1
        for i in sorted(rest_shape):
            if i * dint > 256 // (dim0_out * dim1_out):
                break
            else:
                dint *= i

        # dim_a_out, dim_b_out, d_internal (arbitrary)
        block = (dim0_out, dim1_out, dint)
        blocksize = dim1_out * dim0_out * dint
        sh_mem_size = dint * dim1_in * dim0_in  # + ptm.size
        grid_size = max(1, (new_size - 1) // blocksize + 1)
        grid = (grid_size, 1, 1)

        dim_z = pytools.product(self._data.shape[qubit1 + 1:])
        dim_y = pytools.product(self._data.shape[qubit0 + 1:qubit1])
        dim_rho = new_size  # self.data.size

        _two_qubit_general_ptm.prepared_call(
            grid,
            block,
            self._data.gpudata,
            self._work_data.gpudata,
            ptm_gpu.gpudata,
            dim0_in, dim1_in,
            dim_z,
            dim_y,
            dim_rho,
            shared_size=8 * sh_mem_size)

        self._data, self._work_data = self._work_data, self._data
github quantumsim / quantumsim / quantumsim / pauli_vectors / cuda.py View on Github external
dint = 1
        for i in sorted(rest_shape):
            if i * dint > 256 // (dim0_out * dim1_out):
                break
            else:
                dint *= i

        # dim_a_out, dim_b_out, d_internal (arbitrary)
        block = (dim0_out, dim1_out, dint)
        blocksize = dim1_out * dim0_out * dint
        sh_mem_size = dint * dim1_in * dim0_in  # + ptm.size
        grid_size = max(1, (new_size - 1) // blocksize + 1)
        grid = (grid_size, 1, 1)

        dim_z = pytools.product(self._data.shape[qubit1 + 1:])
        dim_y = pytools.product(self._data.shape[qubit0 + 1:qubit1])
        dim_rho = new_size  # self.data.size

        _two_qubit_general_ptm.prepared_call(
            grid,
            block,
            self._data.gpudata,
            self._work_data.gpudata,
            ptm_gpu.gpudata,
            dim0_in, dim1_in,
            dim_z,
            dim_y,
            dim_rho,
            shared_size=8 * sh_mem_size)

        self._data, self._work_data = self._work_data, self._data
github quantumsim / quantumsim / quantumsim / pauli_vectors / numpy.py View on Github external
def diagonal(self, *, get_data=True):
        no_trace_tensors = [basis.computational_basis_vectors
                            for basis in self.bases]

        trace_argument = []
        n_qubits = self.n_qubits
        for i, ntt in enumerate(no_trace_tensors):
            trace_argument.append(ntt)
            trace_argument.append([i + n_qubits, i])

        indices = list(range(n_qubits))
        out_indices = list(range(n_qubits, 2 * n_qubits))
        complex_dm_dimension = pytools.product(self.dim_hilbert)
        return np.einsum(self._data, indices, *trace_argument, out_indices,
                         optimize=True).real.reshape(complex_dm_dimension)
github quantumsim / quantumsim / quantumsim / pauli_vectors / cuda.py View on Github external
if len(ptm.shape) != 4:
            raise ValueError(
                "`ptm` must be a 4D array, got {}D".format(len(ptm.shape)))

        # bit0 must be the more significant bit (bit 0 is msb)
        if qubit0 > qubit1:
            qubit0, qubit1 = qubit1, qubit0
            ptm = np.einsum("abcd -> badc", ptm)

        new_shape = list(self._data.shape)
        dim0_out, dim1_out, dim0_in, dim1_in = ptm.shape
        assert new_shape[qubit1] == dim1_in
        assert new_shape[qubit0] == dim0_in
        new_shape[qubit1] = dim1_out
        new_shape[qubit0] = dim0_out
        new_size = pytools.product(new_shape)
        new_size_bytes = new_size * 8

        if self._work_data.gpudata.size < new_size_bytes:
            # reallocate
            self._work_data.gpudata.free()
            self._work_data = ga.empty(new_shape, np.float64)
            self._work_data.gpudata.size = self._work_data.nbytes
        else:
            # reallocation not required,
            # reshape but reuse allocation
            self._work_data = ga.GPUArray(
                shape=new_shape,
                dtype=np.float64,
                gpudata=self._work_data.gpudata,
            )
github inducer / boxtree / boxtree / pyfmmlib_integration.py View on Github external
                lambda nterms: product(self.expansion_shape(nterms)))
github inducer / boxtree / boxtree / distributed / calculation.py View on Github external
if len(slice_indices) == 0:
            return

        level_start_slice_indices = np.searchsorted(
            slice_indices, self.tree.level_start_box_nrs)
        mpole_updates_start = 0

        for ilevel in range(self.tree.nlevels):
            start, stop = level_start_slice_indices[ilevel:ilevel+2]
            if stop > start:
                level_start_box_idx, mpoles_current_level = \
                    self.multipole_expansions_view(mpoles, ilevel)
                mpoles_shape = (stop - start,) + mpoles_current_level.shape[1:]

                from pytools import product
                mpole_updates_end = mpole_updates_start + product(mpoles_shape)

                mpoles_current_level[
                    slice_indices[start:stop] - level_start_box_idx
                ] += mpole_updates[
                    mpole_updates_start:mpole_updates_end
                ].reshape(mpoles_shape)

                mpole_updates_start = mpole_updates_end
github inducer / loopy / loopy / kernel.py View on Github external
    @property
    def nbytes(self):
        from pytools import product
        return product(si for si in self.shape)*self.dtype.itemsize
github inducer / loopy / loopy / target / pyopencl.py View on Github external
from pymbolic.mapper.evaluator import UnknownVariableError
    try:
        glens = evaluate(glens, parameters)
        llens = evaluate(llens, parameters)
    except UnknownVariableError as name:
        from warnings import warn
        warn("could not check axis bounds because no value "
                "for variable '%s' was passed to check_kernels()"
                % name, LoopyAdvisory)
    else:
        for i in range(len(llens)):
            if llens[i] > device.max_work_item_sizes[i]:
                raise LoopyError("group axis %d too big" % i)

        from pytools import product
        if product(llens) > device.max_work_group_size:
            raise LoopyError("work group too big")

    local_mem_use = kernel.local_mem_use()

    from pyopencl.characterize import usable_local_mem_size
    import numbers
    if isinstance(local_mem_use, numbers.Integral):
        if local_mem_use > usable_local_mem_size(device):
            raise LoopyError("using too much local memory")
    else:
        warn_with_kernel(kernel, "non_constant_local_mem",
                "The amount of local memory used by the kernel "
                "is not a constant. This will likely cause problems.")

    from loopy.kernel.data import ConstantArg
    const_arg_count = sum(
github inducer / loopy / loopy / target / cuda.py View on Github external
if self.target.extern_c:
            from cgen import Extern
            fdecl = Extern("C", fdecl)

        from loopy.schedule import get_insn_ids_for_block_at
        _, local_grid_size = \
                codegen_state.kernel.get_grid_sizes_for_insn_ids_as_exprs(
                        get_insn_ids_for_block_at(
                            codegen_state.kernel.schedule, schedule_index))

        from loopy.symbolic import get_dependencies
        if not get_dependencies(local_grid_size):
            # Sizes can't have parameter dependencies if they are
            # to be used in static thread block size.
            from pytools import product
            nthreads = product(local_grid_size)

            fdecl = CudaLaunchBounds(nthreads, fdecl)

        return FunctionDeclarationWrapper(fdecl)
github quantumsim / quantumsim / quantumsim / dm10_general.py View on Github external
gpudata=self._work_data.gpudata,
                    )

        ptm_gpu = self._cached_gpuarray(ptm)

        print(int(ptm_gpu.gpudata))

        # dim_a_out, dim_b_out, d_internal (arbitrary)
        dint = min(16, self.data.size//(dim1_out*dim0_out))
        block = (dim1_out, dim0_out, dint)
        blocksize = dim0_out*dim1_out*dint
        gridsize = max(1, (new_size-1)//blocksize+1)
        grid = (gridsize, 1, 1)

        dim_z = pytools.product(self.data.shape[bit0+1:])
        dim_y = pytools.product(self.data.shape[bit1+1:bit0])
        dim_rho = self.data.size

        print(bit0, bit1)
        print(dim_z, dim_y, dim_rho)
        print(block, grid)

        _two_qubit_general_ptm.prepared_call(grid,
                                     block,
                                     self.data.gpudata,
                                     self._work_data.gpudata,
                                     ptm_gpu.gpudata,
                                     dim1_in, dim0_in,
                                     dim_z,
                                     dim_y,
                                     dim_rho,
                                     shared_size=8 * (ptm.size + blocksize)