How to use the cupy.cuda function in cupy

To help you get started, we’ve selected a few cupy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github chainer / chainer / chainermn / communicators / _memory_utility.py View on Github external
def _batched_unpack_params(params_data, buffer, dtype, stream=None):
    n_params = params_data.n_params
    n_elems = params_data.n_elems
    params_dptr = params_data.dptr
    params_dtype = params_data.dtype
    params_size_csum = params_data.size_csum
    buf_dtype = _communication_utility._get_nccl_type_id(dtype)
    n_threads = 128
    n_blocks = (n_elems + n_threads - 1) // n_threads
    if stream is None:
        stream = cp.cuda.get_current_stream()
    with stream:
        _cupy_batched_unpack_params()(
            (n_blocks, ), (n_threads, ),
            (buffer.memory.ptr, buf_dtype, n_elems,
             params_dptr, params_dtype, params_size_csum, n_params))
github chainer / chainer / tests / cupy_tests / core_tests / test_scan.py View on Github external
def test_multi_gpu(self):
        with cuda.Device(0):
            a = cupy.zeros((10,))
            cupy.core.core.scan(a)
        with cuda.Device(1):
            a = cupy.zeros((10,))
            cupy.core.core.scan(a)
github bwohlberg / sporco / tests / cupy / admm / test_cbpdn.py View on Github external
from __future__ import division
from builtins import object

import pickle
import pytest
try:
    import cupy as cp
    try:
        cp.cuda.Device(0).compute_capability
    except cp.cuda.runtime.CUDARuntimeError:
        pytest.skip("GPU device inaccessible", allow_module_level=True)
except ImportError:
    pytest.skip("cupy not installed", allow_module_level=True)


from sporco.cupy.admm import cbpdn
import sporco.cupy.linalg as sl
from sporco.cupy.util import list2array



class TestSet01(object):

    def setup_method(self, method):
        cp.random.seed(12345)
github Santosh-Gupta / SpeedTorch / SpeedTorch / CPUCupyPinned.py View on Github external
def optInit(self):
        if self.CPUPinn == True:
            cupy.cuda.set_allocator(my_pinned_allocator)

        self._preInit()
        self.CUPYmemmap = []
        for optVar in self.optVarList:
            self.CUPYmemmap.append( cupy.load( self.fileName+optVar+'.cpy.npy' , mmap_mode = 'r+' )  )

        if self.CPUPinn == True:
            cupy.cuda.set_allocator(None)
github musyoku / chainer-speech-recognition / run / gram_ctc / cnn / train.py View on Github external
loader.save_stats(stats_directory)

	# バッチサイズの調整
	print("Searching for the best batch size ...")
	batch_iter_train = loader.get_training_batch_iterator(batchsizes_train, augmentation=augmentation, gpu=using_gpu)
	for _ in range(30):
		for x_batch, x_length_batch, t_batch, t_length_batch, bigram_batch, bucket_id in batch_iter_train:
			try:
				with chainer.using_config("train", True):
					y_batch = model(x_batch)
					loss = gram_ctc(y_batch, t_batch, bigram_batch, ID_BLANK, x_length_batch, t_length_batch)
					if args.joint_training:
						loss += F.connectionist_temporal_classification(y_batch, t_batch, ID_BLANK, x_length_batch, t_length_batch)
					loss.backward()
			except Exception as e:
				if isinstance(e, cupy.cuda.runtime.CUDARuntimeError):
					batchsizes_train[bucket_id] = max(batchsizes_train[bucket_id] - 16, 4)
					print("new batchsize {} for bucket {}".format(batchsizes_train[bucket_id], bucket_id + 1))
			break
	batchsizes_dev = [size * 3 for size in batchsizes_train]

	# 学習
	printb("[Training]")
	epochs = Iteration(args.epochs)
	report = Report(log_filename)
	
	for epoch in epochs:
		sum_loss = 0

		# パラメータの更新
		batch_iter_train = loader.get_training_batch_iterator(batchsizes_train, augmentation=augmentation, gpu=using_gpu)
		total_iterations_train = batch_iter_train.get_total_iterations()
github michaelnowotny / cocos / cocos / numerics / numerical_package_bundle.py View on Github external
def synchronize(cls):
        import cupy
        cupy.cuda.Stream.null.synchronize()
github chainer / chainer / cupy / random / generator.py View on Github external
def set_stream(self, stream=None):
        if stream is None:
            stream = cuda.Stream()
        curand.setStream(self._generator, stream.ptr)
github pstuvwx / Deep_VoiceChanger / convertoer.py View on Github external
def main():
    path = input("wave path...")

    bps, wave = waver.load(path)

    generator_ab = Unet()
    cp.cuda.Device(0).use()
    generator_ab.to_gpu()

    netpath = input("net path...")
    chainer.serializers.load_npz(netpath, generator_ab)

    with chainer.using_config('train', False):
        batch_a = load_comp(wave, 32)
        x_a = convert.concat_examples(batch_a, 0)
        x_a = chainer.Variable(x_a)

        x_ab = generator_ab(x_a)

        x_a = cp.asnumpy(x_a.data)
        x_ab = cp.asnumpy(x_ab.data)

        save_comp('a.wav',  bps, x_a,  side, pow_scale, fft_resca)
github chainer / chainer / chainer / backends / cuda.py View on Github external
def __eq__(self, other):
        return isinstance(other, DummyDeviceType)

    def __ne__(self, other):
        return not (self == other)


DummyDevice = DummyDeviceType()


# ------------------------------------------------------------------------------
# Global states
# ------------------------------------------------------------------------------
if available:
    memory_pool = cuda.MemoryPool()
    cuda.set_allocator(memory_pool.malloc)
    pinned_memory_pool = cuda.PinnedMemoryPool()
    cuda.set_pinned_memory_allocator(pinned_memory_pool.malloc)


_integer_types = six.integer_types + (numpy.integer,)
if six.PY2:
    try:
        from future.types.newint import newint as _newint
        _integer_types += (_newint,)
    except ImportError:
        pass


# ------------------------------------------------------------------------------
# Global states
# ------------------------------------------------------------------------------
github ken-nakanishi / qupy / qupy / operator.py View on Github external
from __future__ import division
import numpy as np
import os
import math
import cmath

dtype = getattr(np, os.environ.get('QUPY_DTYPE', 'complex128'))
device = int(os.environ.get('QUPY_GPU', -1))

if device >= 0:
    import cupy
    cupy.cuda.Device(device).use()
    xp = cupy
else:
    xp = np


I = xp.array([[1, 0], [0, 1]], dtype=dtype)
X = xp.array([[0, 1], [1, 0]], dtype=dtype)
Y = xp.array([[0, -1j], [1j, 0]], dtype=dtype)
Z = xp.array([[1, 0], [0, -1]], dtype=dtype)
H = xp.array([[1, 1], [1, -1]], dtype=dtype) / math.sqrt(2)
S = xp.array([[1, 0], [0, 1j]], dtype=dtype)
T = xp.array([[1, 0], [0, (1 + 1j) / math.sqrt(2)]], dtype=dtype)
Sdag = xp.array([[1, 0], [0, -1j]], dtype=dtype)
Tdag = xp.array([[1, 0], [0, (1 - 1j) / math.sqrt(2)]], dtype=dtype)
sqrt_not = xp.array([[1 + 1j, 1 - 1j], [1 - 1j, 1 + 1j]], dtype=dtype) / 2