How to use the torch.no_grad function in torch

To help you get started, we’ve selected a few torch examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github espnet / espnet / espnet / nets / pytorch_backend / e2e_tts_fastspeech.py View on Github external
def _forward(self, xs, ilens, ys=None, olens=None, spembs=None, is_inference=False):
        # forward encoder
        x_masks = self._source_mask(ilens)
        hs, _ = self.encoder(xs, x_masks)  # (B, Tmax, adim)

        # integrate speaker embedding
        if self.spk_embed_dim is not None:
            hs = self._integrate_with_spk_embed(hs, spembs)

        # forward duration predictor and length regulator
        d_masks = make_pad_mask(ilens).to(xs.device)
        if is_inference:
            d_outs = self.duration_predictor.inference(hs, d_masks)  # (B, Tmax)
            hs = self.length_regulator(hs, d_outs, ilens)  # (B, Lmax, adim)
        else:
            with torch.no_grad():
                ds = self.duration_calculator(xs, ilens, ys, olens, spembs)  # (B, Tmax)
            d_outs = self.duration_predictor(hs, d_masks)  # (B, Tmax)
            hs = self.length_regulator(hs, ds, ilens)  # (B, Lmax, adim)

        # forward decoder
        if olens is not None:
            if self.reduction_factor > 1:
                olens_in = olens.new([olen // self.reduction_factor for olen in olens])
            else:
                olens_in = olens
            h_masks = self._source_mask(olens_in)
        else:
            h_masks = None
        zs, _ = self.decoder(hs, h_masks)  # (B, Lmax, adim)
        before_outs = self.feat_out(zs).view(zs.size(0), -1, self.odim)  # (B, Lmax, odim)
github emedinac / S3Pool / main.py View on Github external
def test(epoch):
    global best_acc, all_weights
    net.eval()
    with torch.no_grad():
        te_loss, correct, total = 0, 0, 0
        for batch_idx, (inputs, targets) in enumerate(testloader):
            if use_cuda:
                inputs, targets = inputs.cuda(), targets.cuda()
            inputs, targets = Variable(inputs), Variable(targets)
            outputs = net(inputs, training=False)
            loss = criterion(outputs, targets)

            te_loss += loss.data.item()
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += predicted.eq(targets.data).sum().cpu()

            progress_bar(batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
                % (te_loss/(batch_idx+1), 100.*correct/total, correct, total))
github facebookresearch / PoseWarper / lib / core / function_PoseAgg.py View on Github external
filenames_counter = 0
    imgnums = []
    idx = 0

    use_warping = config['MODEL']['USE_WARPING_TEST']

    ############3
    preds_output_dir = config.OUTPUT_DIR + 'keypoint_preds/'
    if config.SAVE_PREDS:
        output_filenames_map_file = preds_output_dir + 'filenames_map.npy'
        if not os.path.exists(preds_output_dir):
            os.makedirs(preds_output_dir)
    ####################


    with torch.no_grad():
      end = time.time()
      if not use_warping:
        for i, (input, target, target_weight, meta) in enumerate(val_loader):

            for ff in range(len(meta['image'])):
                cur_nm = meta['image'][ff]
                if not cur_nm in filenames_map:
                    filenames_map[cur_nm] = [filenames_counter]
                else:
                    filenames_map[cur_nm].append(filenames_counter)
                filenames_counter +=1

            # compute output
            outputs = model(input)
            if isinstance(outputs, list):
                output = outputs[-1]
github zju-vipa / KamalEngine / kamal / slim / distillation / data_free / zskt.py View on Github external
def step(self):
        start_time = time.perf_counter()
        
        # Adv
        z = torch.randn( self.z_dim ).to(self.device)
        fake = self.generator( z )
        self.optim_g.zero_grad()
        t_out = self.teacher( fake )
        s_out = self.student( fake )
        loss_g = -kldiv( s_out, t_out )
        loss_g.backward()
        self.optim_g.step()

        with torch.no_grad():
            fake = self.generator( z )
            t_out = self.teacher( fake.detach() )
        for _ in range(10):
            self.optim_s.zero_grad()
            s_out = self.student( fake.detach() )
            loss_s = kldiv( s_out, t_out )
            loss_s.backward()
            self.optim_s.step()
        
        loss_dict = {
            'loss_g': loss_g,
            'loss_s': loss_s,
        }

        step_time = time.perf_counter() - start_time
github quanpn90 / NMTGMinor / onmt / legacy / ParallelTransformer / Models.py View on Github external
def forward_grow(self, input):
        """
        Inputs Shapes: 
            input: batch_size x len_src (wanna tranpose)
        
        Outputs Shapes:
            out: batch_size x len_src x d_model
            mask_src 
            
        """
        
        with torch.no_grad():
            """ Embedding: batch_size x len_src x d_model """
            emb = embedded_dropout(self.word_lut, input, dropout=self.word_dropout if self.training else 0)
            """ Scale the emb by sqrt(d_model) """
            
            if self.time == 'positional_encoding':
                emb = emb * math.sqrt(self.model_size)
            """ Adding positional encoding """
            emb = self.time_transformer(emb)
            if isinstance(emb, tuple):
                emb = emb[0]
            emb = self.preprocess_layer(emb)
            
            mask_src = input.data.eq(onmt.constants.PAD).unsqueeze(1) # batch_size x len_src x 1 for broadcasting
            
            pad_mask = torch.autograd.Variable(input.data.ne(onmt.constants.PAD)) # batch_size x len_src
            #~ pad_mask = None
github NVIDIA / DeepLearningExamples / PyTorch / LanguageModeling / Transformer-XL / pytorch / eval.py View on Github external
def evaluate(eval_iter, model, meters, max_size=None, repeat=1):
    total_len, total_loss = 0, 0.
    torch.cuda.synchronize()
    start_time = time.time()
    with torch.no_grad():
        mems = None
        for _ in range(repeat):
            for idx, (data, target, seq_len) in enumerate(eval_iter):
                if max_size and idx >= max_size:
                    break
                torch.cuda.synchronize()
                start_iter = time.time()
                ret = model(data, target, mems)
                torch.cuda.synchronize()
                elapsed = time.time() - start_iter
                loss, mems = ret[0], ret[1:]
                loss = loss.mean()
                total_loss += seq_len * loss.item()
                total_len += seq_len
                meters['eval_latency'].update(elapsed)
                target_tokens = target.numel()
github nanoporetech / taiyaki / taiyaki / layers.py View on Github external
def init_(param, value):
    """Set parameter value (inplace) from tensor, numpy array, list or tuple"""
    value_as_tensor = torch.tensor(value, dtype=param.data.dtype)
    with torch.no_grad():
        param.set_(value_as_tensor)
github CCQC / PES-Learn / peslearn / ml / neural_network.py View on Github external
failures = 0
        decay_attempts = 0
        prev_best = None
        decay_start = False
    
        for epoch in range(1,maxit):
            def closure():
                optimizer.zero_grad()
                y_pred = model(self.Xtr)
                loss = torch.sqrt(metric(y_pred, self.ytr)) # passing RMSE instead of MSE improves precision IMMENSELY
                loss.backward()
                return loss
            optimizer.step(closure)
            # validate
            if epoch % val_freq == 0:
                with torch.no_grad():
                    tmp_pred = model(self.Xvalid) 
                    tmp_loss = metric(tmp_pred, self.yvalid)
                    val_error_rmse = np.sqrt(tmp_loss.item() * loss_descaler) * hartree2cm # loss_descaler converts MSE in scaled data domain to MSE in unscaled data domain
                    if best_val_error:
                        if val_error_rmse < best_val_error:
                            prev_best = best_val_error * 1.0
                            best_val_error = val_error_rmse * 1.0 
                    else:
                        record = True
                        best_val_error = val_error_rmse * 1.0 
                        prev_best = best_val_error
                    if verbose:
                        print("Epoch {} Validation RMSE (cm-1): {:5.3f}".format(epoch, val_error_rmse))
                    if decay_start:
                        scheduler.step(val_error_rmse)
github hav4ik / Hydra / src / applications / trainers / mgda.py View on Github external
for task_idx, task_id in enumerate(self.task_ids):
                data, target = loader_iterators[task_id].next()
                data, target = data.to(self.device), target.to(self.device)

                # prepare grads
                self.model.zero_grad()

                # do inference with backward
                output = self.model(data, task_id)
                loss = self.losses[task_id](output, target)
                loss.backward()
                if self.normalize is not None:
                    loss_log[task_idx] = loss

                # save the body grads to temp_body_grad
                with torch.no_grad():
                    for ctrl, block in self.model.control_blocks(task_id):
                        if self.temp_grad[ctrl.index] is None:
                            self.temp_grad[ctrl.index] = []
                            for p in block.parameters():
                                self.temp_grad[ctrl.index].append(torch.empty(
                                    len(ctrl.serving_tasks), p.grad.numel(),
                                    device=self.device))

                        for i, p in enumerate(block.parameters()):
                            grad_idx = ctrl.serving_tasks[task_id]
                            self.temp_grad[ctrl.index][i][grad_idx] = \
                                p.grad.view(p.grad.numel())

                # calculate training metrics
                with torch.no_grad():
                    train_losses_ts[task_id] += loss.sum()
github zju-vipa / KamalEngine / kamal / amalgamation / layerwise_amalgamation.py View on Github external
def step_fn(self, engine, batch):
        start_time = time.perf_counter()
        batch = move_to_device(batch, self._device)
        data = batch[0]
        s_out = self.student( data )
        with torch.no_grad():
            t_out = [ teacher( data ) for teacher in self.teachers ]
        loss_amal = 0
        loss_recons = 0
        for amal_block, hooks, C in self._amal_blocks:
            features = [ h.feat_out for h in hooks ]
            fs, fts = features[0], features[1:]
            rep, _fs, _fts = amal_block( fs, fts )
            loss_amal += F.mse_loss( _fs, rep.detach() )
            loss_recons += sum( [ F.mse_loss( _ft, ft ) for (_ft, ft) in zip( _fts, fts ) ] )
        loss_kd = tasks.loss.kldiv( s_out, torch.cat( t_out, dim=1 ) )
        #loss_kd = F.mse_loss( s_out, torch.cat( t_out, dim=1 ) )
        loss_dict = { "loss_kd":      self._weights[0] * loss_kd,
                      "loss_amal":    self._weights[1] * loss_amal,
                      "loss_recons":  self._weights[2] * loss_recons }
        loss = sum(loss_dict.values())
        self.optimizer.zero_grad()