Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
result, stdout, stderr = call_capture_output(
[self.cc]
+ ["-M"]
+ ["-D%s" % define for define in self.defines]
+ ["-U%s" % undefine for undefine in self.undefines]
+ ["-I%s" % idir for idir in self.include_dirs]
+ self.cflags
+ source_files
)
if result != 0:
raise CompileError("getting dependencies failed: "+stderr)
lines = join_continued_lines(stdout.split("\n"))
from pytools import flatten
return set(flatten(
line.split()[2:] for line in lines))
def combine(self, values):
from pytools import flatten
return set(flatten(values))
shape=new_shape,
dtype=np.float64,
gpudata=self._work_data.gpudata,
)
idx = []
# todo: can be built more efficiently
for i, pb in enumerate(self.bases):
if i == bit:
idx.append([pb.comp_basis_indices[state]])
else:
idx.append(list(range(pb.dim_pauli)))
idx_j = np.array(list(pytools.flatten(idx))).astype(np.uint32)
idx_i = np.cumsum([0]+[len(i) for i in idx][:-1]).astype(np.uint32)
xshape = np.array(self.data.shape, np.uint32)
yshape = np.array(new_shape, np.uint32)
xshape_gpu = self._cached_gpuarray(xshape)
yshape_gpu = self._cached_gpuarray(yshape)
idx_i_gpu = self._cached_gpuarray(idx_i)
idx_j_gpu = self._cached_gpuarray(idx_j)
block = (2**8, 1, 1)
grid = (max(1, (self._work_data.size-1)//2**8 + 1), 1, 1)
_multitake.prepared_call(
grid,
if target_gpu_array is None:
if self._work_data.gpudata.size < diag_size*8:
self._work_data.gpudata.free()
self._work_data = ga.empty(diag_shape, np.float64)
self._work_data.gpudata.size = self._work_data.nbytes
target_gpu_array = self._work_data
else:
assert target_gpu_array.size >= diag_size
idx = [[pb.comp_basis_indices[i]
for i in range(pb.dim_hilbert)
if pb.comp_basis_indices[i] is not None]
for pb in self.bases
]
idx_j = np.array(list(pytools.flatten(idx))).astype(np.uint32)
idx_i = np.cumsum([0]+[len(i) for i in idx][:-1]).astype(np.uint32)
xshape = np.array(self.data.shape, np.uint32)
yshape = np.array(diag_shape, np.uint32)
xshape_gpu = self._cached_gpuarray(xshape)
yshape_gpu = self._cached_gpuarray(yshape)
idx_i_gpu = self._cached_gpuarray(idx_i)
idx_j_gpu = self._cached_gpuarray(idx_j)
block = (2**8, 1, 1)
grid = (max(1, (diag_size-1)//2**8 + 1), 1, 1)
if len(yshape) == 0:
# brain-dead case, but should be handled according to exp.
def index_list_backend(self, ilists):
from pytools import single_valued
ilist_length = single_valued(len(il) for il in ilists)
assert ilist_length == self.plan.dofs_per_face
from cgen import Typedef, POD
from pytools import flatten
flat_ilists_uncast = numpy.array(list(flatten(ilists)))
if numpy.max(flat_ilists_uncast) >= 256:
tp = numpy.uint16
else:
tp = numpy.uint8
flat_ilists = numpy.asarray(flat_ilists_uncast, dtype=tp)
assert (flat_ilists == flat_ilists_uncast).all()
return GPUIndexLists(
type=tp,
code=[Typedef(POD(tp, "index_list_entry_t"))],
device_memory=cuda.to_device(flat_ilists),
bytes=flat_ilists.size*flat_ilists.itemsize,
)
def make_linear_comb_kernel_with_result_dtype(
result_dtype, scalar_dtypes, vector_dtypes):
comp_count = len(vector_dtypes)
from pytools import flatten
return ElementwiseKernel([VectorArg(result_dtype, "result")] + list(flatten(
(ScalarArg(scalar_dtypes[i], "a%d_fac" % i),
VectorArg(vector_dtypes[i], "a%d" % i))
for i in range(comp_count))),
"result[i] = " + " + ".join("a%d_fac*a%d[i]" % (i, i)
for i in range(comp_count)))
def index_list_backend(self, ilists):
from pytools import single_valued
ilist_length = single_valued(len(il) for il in ilists)
assert ilist_length == self.plan.given.dofs_per_face()
from codepy.cgen import Typedef, POD
from pytools import flatten
flat_ilists_uncast = numpy.array(list(flatten(ilists)))
if numpy.max(flat_ilists_uncast) >= 256:
tp = numpy.uint16
else:
tp = numpy.uint8
flat_ilists = numpy.asarray(flat_ilists_uncast, dtype=tp)
assert (flat_ilists == flat_ilists_uncast).all()
return GPUIndexLists(
type=tp,
code=[Typedef(POD(tp, "index_list_entry_t"))],
device_memory=cuda.to_device(flat_ilists),
bytes=flat_ilists.size*flat_ilists.itemsize,
)
@memoize_method
def get_next_step(self, available_names, done_insns):
from pytools import all, argmax2
available_insns = [
(insn, insn.priority) for insn in self.instructions
if insn not in done_insns
and all(dep.name in available_names
for dep in insn.get_dependencies())]
if not available_insns:
raise self.NoInstructionAvailable
from pytools import flatten
discardable_vars = set(available_names) - set(flatten(
[dep.name for dep in insn.get_dependencies()]
for insn in self.instructions
if insn not in done_insns))
# {{{ make sure results do not get discarded
from hedge.tools import with_object_array_or_scalar
from hedge.optemplate.mappers import DependencyMapper
dm = DependencyMapper(composite_leaves=False)
def remove_result_variable(result_expr):
# The extra dependency mapper run is necessary
# because, for instance, subscripts can make it
# into the result expression, which then does
# not consist of just variables.
@memoize
def make_linear_comb_kernel_with_result_dtype(
result_dtype, scalar_dtypes, vector_dtypes):
comp_count = len(vector_dtypes)
from pytools import flatten
return ElementwiseKernel([VectorArg(result_dtype, "result")] + list(flatten(
(ScalarArg(scalar_dtypes[i], "a%d_fac" % i),
VectorArg(vector_dtypes[i], "a%d" % i))
for i in range(comp_count))),
"result[i] = " + " + ".join("a%d_fac*a%d[i]" % (i, i)
for i in range(comp_count)))