Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
dint = 1
for i in sorted(rest_shape):
if i * dint > 256 // (dim0_out * dim1_out):
break
else:
dint *= i
# dim_a_out, dim_b_out, d_internal (arbitrary)
block = (dim0_out, dim1_out, dint)
blocksize = dim1_out * dim0_out * dint
sh_mem_size = dint * dim1_in * dim0_in # + ptm.size
grid_size = max(1, (new_size - 1) // blocksize + 1)
grid = (grid_size, 1, 1)
dim_z = pytools.product(self._data.shape[qubit1 + 1:])
dim_y = pytools.product(self._data.shape[qubit0 + 1:qubit1])
dim_rho = new_size # self.data.size
_two_qubit_general_ptm.prepared_call(
grid,
block,
self._data.gpudata,
self._work_data.gpudata,
ptm_gpu.gpudata,
dim0_in, dim1_in,
dim_z,
dim_y,
dim_rho,
shared_size=8 * sh_mem_size)
self._data, self._work_data = self._work_data, self._data
dint = 1
for i in sorted(rest_shape):
if i * dint > 256 // (dim0_out * dim1_out):
break
else:
dint *= i
# dim_a_out, dim_b_out, d_internal (arbitrary)
block = (dim0_out, dim1_out, dint)
blocksize = dim1_out * dim0_out * dint
sh_mem_size = dint * dim1_in * dim0_in # + ptm.size
grid_size = max(1, (new_size - 1) // blocksize + 1)
grid = (grid_size, 1, 1)
dim_z = pytools.product(self._data.shape[qubit1 + 1:])
dim_y = pytools.product(self._data.shape[qubit0 + 1:qubit1])
dim_rho = new_size # self.data.size
_two_qubit_general_ptm.prepared_call(
grid,
block,
self._data.gpudata,
self._work_data.gpudata,
ptm_gpu.gpudata,
dim0_in, dim1_in,
dim_z,
dim_y,
dim_rho,
shared_size=8 * sh_mem_size)
self._data, self._work_data = self._work_data, self._data
def diagonal(self, *, get_data=True):
no_trace_tensors = [basis.computational_basis_vectors
for basis in self.bases]
trace_argument = []
n_qubits = self.n_qubits
for i, ntt in enumerate(no_trace_tensors):
trace_argument.append(ntt)
trace_argument.append([i + n_qubits, i])
indices = list(range(n_qubits))
out_indices = list(range(n_qubits, 2 * n_qubits))
complex_dm_dimension = pytools.product(self.dim_hilbert)
return np.einsum(self._data, indices, *trace_argument, out_indices,
optimize=True).real.reshape(complex_dm_dimension)
if len(ptm.shape) != 4:
raise ValueError(
"`ptm` must be a 4D array, got {}D".format(len(ptm.shape)))
# bit0 must be the more significant bit (bit 0 is msb)
if qubit0 > qubit1:
qubit0, qubit1 = qubit1, qubit0
ptm = np.einsum("abcd -> badc", ptm)
new_shape = list(self._data.shape)
dim0_out, dim1_out, dim0_in, dim1_in = ptm.shape
assert new_shape[qubit1] == dim1_in
assert new_shape[qubit0] == dim0_in
new_shape[qubit1] = dim1_out
new_shape[qubit0] = dim0_out
new_size = pytools.product(new_shape)
new_size_bytes = new_size * 8
if self._work_data.gpudata.size < new_size_bytes:
# reallocate
self._work_data.gpudata.free()
self._work_data = ga.empty(new_shape, np.float64)
self._work_data.gpudata.size = self._work_data.nbytes
else:
# reallocation not required,
# reshape but reuse allocation
self._work_data = ga.GPUArray(
shape=new_shape,
dtype=np.float64,
gpudata=self._work_data.gpudata,
)
lambda nterms: product(self.expansion_shape(nterms)))
if len(slice_indices) == 0:
return
level_start_slice_indices = np.searchsorted(
slice_indices, self.tree.level_start_box_nrs)
mpole_updates_start = 0
for ilevel in range(self.tree.nlevels):
start, stop = level_start_slice_indices[ilevel:ilevel+2]
if stop > start:
level_start_box_idx, mpoles_current_level = \
self.multipole_expansions_view(mpoles, ilevel)
mpoles_shape = (stop - start,) + mpoles_current_level.shape[1:]
from pytools import product
mpole_updates_end = mpole_updates_start + product(mpoles_shape)
mpoles_current_level[
slice_indices[start:stop] - level_start_box_idx
] += mpole_updates[
mpole_updates_start:mpole_updates_end
].reshape(mpoles_shape)
mpole_updates_start = mpole_updates_end
@property
def nbytes(self):
from pytools import product
return product(si for si in self.shape)*self.dtype.itemsize
from pymbolic.mapper.evaluator import UnknownVariableError
try:
glens = evaluate(glens, parameters)
llens = evaluate(llens, parameters)
except UnknownVariableError as name:
from warnings import warn
warn("could not check axis bounds because no value "
"for variable '%s' was passed to check_kernels()"
% name, LoopyAdvisory)
else:
for i in range(len(llens)):
if llens[i] > device.max_work_item_sizes[i]:
raise LoopyError("group axis %d too big" % i)
from pytools import product
if product(llens) > device.max_work_group_size:
raise LoopyError("work group too big")
local_mem_use = kernel.local_mem_use()
from pyopencl.characterize import usable_local_mem_size
import numbers
if isinstance(local_mem_use, numbers.Integral):
if local_mem_use > usable_local_mem_size(device):
raise LoopyError("using too much local memory")
else:
warn_with_kernel(kernel, "non_constant_local_mem",
"The amount of local memory used by the kernel "
"is not a constant. This will likely cause problems.")
from loopy.kernel.data import ConstantArg
const_arg_count = sum(
if self.target.extern_c:
from cgen import Extern
fdecl = Extern("C", fdecl)
from loopy.schedule import get_insn_ids_for_block_at
_, local_grid_size = \
codegen_state.kernel.get_grid_sizes_for_insn_ids_as_exprs(
get_insn_ids_for_block_at(
codegen_state.kernel.schedule, schedule_index))
from loopy.symbolic import get_dependencies
if not get_dependencies(local_grid_size):
# Sizes can't have parameter dependencies if they are
# to be used in static thread block size.
from pytools import product
nthreads = product(local_grid_size)
fdecl = CudaLaunchBounds(nthreads, fdecl)
return FunctionDeclarationWrapper(fdecl)
gpudata=self._work_data.gpudata,
)
ptm_gpu = self._cached_gpuarray(ptm)
print(int(ptm_gpu.gpudata))
# dim_a_out, dim_b_out, d_internal (arbitrary)
dint = min(16, self.data.size//(dim1_out*dim0_out))
block = (dim1_out, dim0_out, dint)
blocksize = dim0_out*dim1_out*dint
gridsize = max(1, (new_size-1)//blocksize+1)
grid = (gridsize, 1, 1)
dim_z = pytools.product(self.data.shape[bit0+1:])
dim_y = pytools.product(self.data.shape[bit1+1:bit0])
dim_rho = self.data.size
print(bit0, bit1)
print(dim_z, dim_y, dim_rho)
print(block, grid)
_two_qubit_general_ptm.prepared_call(grid,
block,
self.data.gpudata,
self._work_data.gpudata,
ptm_gpu.gpudata,
dim1_in, dim0_in,
dim_z,
dim_y,
dim_rho,
shared_size=8 * (ptm.size + blocksize)