How to use the cupy.cuda.Device function in cupy

To help you get started, we’ve selected a few cupy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github bwohlberg / sporco / tests / cupy / admm / test_tvl1.py View on Github external
from __future__ import division
from builtins import object
from past.utils import old_div

import pytest
import numpy as np
try:
    import cupy as cp
    try:
        cp.cuda.Device(0).compute_capability
    except cp.cuda.runtime.CUDARuntimeError:
        pytest.skip("GPU device inaccessible", allow_module_level=True)
except ImportError:
    pytest.skip("cupy not installed", allow_module_level=True)


from sporco.cupy.admm import tvl1
import sporco.cupy.metric as sm


class TestSet01(object):

    def setup_method(self, method):
        cp.random.seed(12345)
        self.D = cp.random.randn(16, 15)
github espnet / espnet / test / test_e2e_mt.py View on Github external
ngpu = 2
    device_ids = list(range(ngpu))
    args = make_arg()
    model = m.E2E(6, 5, args)
    if "pytorch" in module:
        model = torch.nn.DataParallel(model, device_ids)
        batch = prepare_inputs("pytorch", is_cuda=True)
        model.cuda()
        loss = 1. / ngpu * model(*batch)
        loss.backward(loss.new_ones(ngpu))  # trainable
    else:
        import copy
        import cupy
        losses = []
        for device in device_ids:
            with cupy.cuda.Device(device):
                batch = prepare_inputs("chainer", is_cuda=True)
                _model = copy.deepcopy(model)  # Transcribed from training.updaters.ParallelUpdater
                _model.to_gpu()
                loss = 1. / ngpu * _model(*batch)
                losses.append(loss)

        for loss in losses:
            loss.backward()  # trainable
github chainer / chainer / tests / cupy_tests / cuda_tests / test_memory.py View on Github external
def test_double_free(self):
        with cupy.cuda.Device(0):
            mem = self.pool.malloc(1).mem
            mem.free()
            mem.free()
github chainer / chainer / cupy / util.py View on Github external
def ret(*args, **kwargs):
            global _memoized_funcs

            arg_key = (args, frozenset(kwargs.items()))
            if for_each_device:
                arg_key = (cuda.Device().id, arg_key)

            memo = getattr(f, '_cupy_memo', None)
            if memo is None:
                memo = f._cupy_memo = {}
                _memoized_funcs.append(f)

            result = memo.get(arg_key, None)
            if result is None:
                result = f(*args, **kwargs)
                memo[arg_key] = result
            return result
github L1aoXingyu / faster_rcnn.pytorch / models / faster_rcnn.py View on Github external
import cupy as cp
import numpy as np
import torch
import torch.nn.functional as F
from mxtorch.vision.bbox_tools import loc2bbox
from torch import nn
from torch.autograd import Variable

from config import opt
from data.utils import preprocess
from .utils.nms import non_maximum_suppression

# Set default cuda device.
torch.cuda.set_device(opt.ctx)
cp.cuda.Device(opt.ctx).use()


class FasterRCNN(nn.Module):
    """Base class for Faster R-CNN.

    This is a base class for Faster R-CNN links supporting object detection
    API [#]_. The following three stages constitute Faster R-CNN.

    1. **Feature extraction**: Images are taken and their \
        feature maps are calculated.
    2. **Region Proposal Networks**: Given the feature maps calculated in \
        the previous stage, produce set of RoIs around objects.
    3. **Localization and Classification Heads**: Using feature maps that \
        belong to the proposed RoIs, classify the categories of the objects \
        in the RoIs and improve localizations.
github Santosh-Gupta / SpeedTorch / SpeedTorch / CUPYLive.py View on Github external
def getData(self, indexes):
        torch.cuda.synchronize()
        cupy.cuda.Device().synchronize()
        
        return from_dlpack( self.CUPYcorpus[indexes].toDlpack() )
github Santosh-Gupta / SpeedTorch / SpeedTorch / CUPYLive.py View on Github external
def afterOptimizerStep(self, retrievedPosIndexes , retrievedNegIndexes = None):
        torch.cuda.synchronize()
        cupy.cuda.Device().synchronize()
        
        reshapedRetrieval = self._getReshapedRetrieval( retrievedPosIndexes, retrievedNegIndexes )

        for idx, optVar in enumerate(self.optVarList):
            self.CUPYcorpi[idx][ reshapedRetrieval ] = (
                cupy.fromDlpack( to_dlpack( self.given_optimizer.state_dict()['state'][ self.optimizerKey ][optVar] ) )  )
github mne-tools / mne-python / mne / cuda.py View on Github external
def _set_cuda_device(device_id, verbose=None):
    """Set the CUDA device."""
    import cupy
    cupy.cuda.Device(device_id).use()
    logger.info('Now using CUDA device {}'.format(device_id))
github anoopkunchukuttan / geomm / geomm_multi.py View on Github external
translation = collections.defaultdict(int)
                translation5 = collections.defaultdict(list)
                translation10 = collections.defaultdict(list)

                t=time.time()
                nbrhood_x=np.zeros(xw.shape[0])
                nbrhood_z=np.zeros(zw.shape[0])
                nbrhood_z2=cp.zeros(zw.shape[0])
                for i in range(0, len(src), BATCH_SIZE):
                    j = min(i + BATCH_SIZE, len(src))
                    similarities = xw[src[i:j]].dot(zw.T)
                    similarities_x = -1*np.partition(-1*similarities,args.csls_neighbourhood-1 ,axis=1)
                    nbrhood_x[src[i:j]]=np.mean(similarities_x[:,:args.csls_neighbourhood],axis=1)

                batch_num=1
                with cp.cuda.Device(1):
                    for i in range(0, zw.shape[0], BATCH_SIZE):
                        j = min(i + BATCH_SIZE, zw.shape[0])
                        similarities = -1*cp.partition(-1*cp.dot(cp.asarray(zw[i:j]),cp.transpose(cp.asarray(xw))),args.csls_neighbourhood-1 ,axis=1)[:,:args.csls_neighbourhood]
                        nbrhood_z2[i:j]=(cp.mean(similarities[:,:args.csls_neighbourhood],axis=1))
                        batch_num+=1
                    nbrhood_z=cp.asnumpy(nbrhood_z2)

                for i in range(0, len(src), BATCH_SIZE):
                    j = min(i + BATCH_SIZE, len(src))
                    similarities = xw[src[i:j]].dot(zw.T)
                    similarities = np.transpose(np.transpose(2*similarities) - nbrhood_x[src[i:j]])- nbrhood_z
                    nn = similarities.argmax(axis=1).tolist()
                    similarities = np.argsort((similarities),axis=1)

                    nn5 = (similarities[:,-5:])
                    nn10 = (similarities[:,-10:])
github StrangerZhang / pyECO / eco / tracker.py View on Github external
def __init__(self, is_color):
        self._is_color = is_color
        self._frame_num = 0
        self._frames_since_last_train = 0
        if config.use_gpu:
            cp.cuda.Device(config.gpu_id).use()