Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
p.add_argument('Nrun', nargs='?', default=10, type=int)
p = p.parse_args()
N = p.N
Nrun = p.Nrun
test_pycuda() # not necessary, just FYI
A = np.asfortranarray(rand(N, N).astype(np.float32))
B = np.asfortranarray(rand(N, N).astype(np.float32))
D = np.asfortranarray(np.zeros_like(A, order='F'))
s = timer()
dA = cuda.cuda.to_device(A) # alloc and copy input data
dB = cuda.cuda.to_device(B)
dD = cuda.cuda.to_device(D, copy=False) # alloc only
print(timer() - s)
# NumPy
numpy_time = 1000000
for _ in range(Nrun):
start = timer()
E = A.dot(B)
T = timer() - start
if T < numpy_time:
numpy_time = T
print("Numpy took %f seconds" % numpy_time)
gemm(dA, dB, dD)
p = ArgumentParser(description='Matmul benchmark')
p.add_argument('N', nargs='?', default=1000, type=int)
p.add_argument('Nrun', nargs='?', default=10, type=int)
p = p.parse_args()
N = p.N
Nrun = p.Nrun
test_pycuda() # not necessary, just FYI
A = np.asfortranarray(rand(N, N).astype(np.float32))
B = np.asfortranarray(rand(N, N).astype(np.float32))
D = np.asfortranarray(np.zeros_like(A, order='F'))
s = timer()
dA = cuda.cuda.to_device(A) # alloc and copy input data
dB = cuda.cuda.to_device(B)
dD = cuda.cuda.to_device(D, copy=False) # alloc only
print(timer() - s)
# NumPy
numpy_time = 1000000
for _ in range(Nrun):
start = timer()
E = A.dot(B)
T = timer() - start
if T < numpy_time:
numpy_time = T
print("Numpy took %f seconds" % numpy_time)
gemm(dA, dB, dD)