How to use the fastai.basic_data.DatasetType function in fastai

To help you get started, we’ve selected a few fastai examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github microsoft / computervision-recipes / utils_cv / classification / model.py View on Github external
Args:
        learn: Learner object that will be used for prediction
        dl: DataLoader the model will use to load samples
        with_loss: If True, it will also return the loss on each prediction
        n_batch: Number of batches to predict. If not specified, it will run the predictions for n batches
            where n = sample size // BATCH_SIZE
        pbar: ProgressBar object
    """

    # Note: In Fastai, for DatasetType.Train, only the output of complete minibatches is computed. Ie if one has 101 images,
    # and uses a minibatch size of 16, then len(feats) is 96 and not 101. For DatasetType.Valid this is not the case,
    # and len(feats) is as expected 101. A way around this is to use DatasetType.Fix instead when referring to the training set.
    # See e.g. issue: https://forums.fast.ai/t/get-preds-returning-less-results-than-length-of-original-dataset/34148
    if dl == DatasetType.Train:
        dl = DatasetType.Fix

    lf = learn.loss_func if with_loss else None
    return fastai.basic_train.get_preds(
        learn.model,
        dl,
        cb_handler=CallbackHandler(learn.callbacks),
        activ=_loss_func2activ(learn.loss_func),
        loss_func=lf,
        n_batch=n_batch,
        pbar=pbar,
    )
github robinniesert / kaggle-champs / train.py View on Github external
partial(SaveModelCallback, every='improvement', mode='min',
            monitor='group_mean_log_mae', name=model_str)
]
learn = Learner(db, model, metrics=[rmse, mae], callback_fns=callback_fns,
                wd=args.wd, loss_func=contribs_rmse_loss)
if args.start_epoch > 0:
    learn.load(model_str)
    torch.cuda.empty_cache()
if distributed_train: learn = learn.to_distributed(args.local_rank)

learn.fit_one_cycle(args.epochs, max_lr=args.lr, start_epoch=args.start_epoch)


# make predictions
val_contrib_preds = learn.get_preds(DatasetType.Valid)
test_contrib_preds = learn.get_preds(DatasetType.Test)
val_preds = val_contrib_preds[0][:,-1].detach().numpy() * C.SC_STD + C.SC_MEAN
test_preds = test_contrib_preds[0][:,-1].detach().numpy() * C.SC_STD + C.SC_MEAN


# store results
store_submit(test_preds, model_str, print_head=True)
store_oof(val_preds, model_str, print_head=True)
github robinniesert / kaggle-champs / snapshot_train.py View on Github external
else: learn.load(model_str)
torch.cuda.empty_cache()
if distributed_train: learn = learn.to_distributed(args.local_rank)

learn.fit(args.epochs)


# make predictions
n_val = len(train_df[train_df['molecule_id'].isin(val_mol_ids)])
val_preds = np.zeros((n_val, args.epochs))
test_preds = np.zeros((len(test_df), args.epochs))
for m in range(args.epochs):
    print(f'Predicting for model {m}')
    learn.load(model_se_str+f'_{m}')
    val_contrib_preds = learn.get_preds(DatasetType.Valid)
    test_contrib_preds = learn.get_preds(DatasetType.Test)
    val_preds[:,m] = val_contrib_preds[0][:,-1].detach().numpy()
    test_preds[:,m] = test_contrib_preds[0][:,-1].detach().numpy()
val_preds = val_preds * C.SC_STD + C.SC_MEAN
test_preds = test_preds * C.SC_STD + C.SC_MEAN


# store results
store_submit(pd.DataFrame(test_preds), snapshots_str, print_head=True)
store_oof(pd.DataFrame(val_preds), snapshots_str, print_head=True)
github robinniesert / kaggle-champs / predict.py View on Github external
partial(GradientClipping, clip=10), GroupMeanLogMAE,
    partial(SaveModelCallback, every='improvement', mode='min',
            monitor='group_mean_log_mae', name=model_str)
]
learn = Learner(db, model, metrics=[rmse, mae], callback_fns=callback_fns, 
                wd=wd, loss_func=contribs_rmse_loss)
learn.load(model_str)


# check if validation metrics are correct
print(learn.validate())


# make predictions
val_contrib_preds = learn.get_preds(DatasetType.Valid)
test_contrib_preds = learn.get_preds(DatasetType.Test)
val_preds = val_contrib_preds[0][:,-1].detach().numpy() * C.SC_STD + C.SC_MEAN
test_preds = test_contrib_preds[0][:,-1].detach().numpy() * C.SC_STD + C.SC_MEAN


# store results
store_submit(test_preds, model_str, print_head=True)
store_oof(val_preds, model_str, print_head=True)
github fastai / fastai / fastai / callbacks / tensorboard.py View on Github external
def _update_batches_if_needed(self)->None:
        "one_batch function is extremely slow with large datasets.  This is caching the result as an optimization."
        if self.learn.data.valid_dl is None: return # Running learning rate finder, so return
        update_batches = self.data is not self.learn.data
        if not update_batches: return
        self.data = self.learn.data
        self.trn_batch = self._get_new_batch(ds_type=DatasetType.Train)
        self.val_batch = self._get_new_batch(ds_type=DatasetType.Valid)
github fastai / fastai / fastai / vision / tta.py View on Github external
def _tta_only(learn:Learner, ds_type:DatasetType=DatasetType.Valid, activ:nn.Module=None, scale:float=1.35) -> Iterator[List[Tensor]]:
    "Computes the outputs for several augmented inputs for TTA"
    dl = learn.dl(ds_type)
    ds = dl.dataset
    old = ds.tfms
    activ = ifnone(activ, _loss_func2activ(learn.loss_func))
    augm_tfm = [o for o in learn.data.train_ds.tfms if o.tfm not in
               (crop_pad, flip_lr, dihedral, zoom)]
    try:
        pbar = master_bar(range(8))
        for i in pbar:
            row = 1 if i&1 else 0
            col = 1 if i&2 else 0
            flip = i&4
            d = {'row_pct':row, 'col_pct':col, 'is_random':False}
            tfm = [*augm_tfm, zoom(scale=scale, **d), crop_pad(**d)]
            if flip: tfm.append(flip_lr(p=1.))
github jantic / DeOldify / fasterai / tensorboard.py View on Github external
def write(self, learn:Learner, trn_batch:Tuple, val_batch:Tuple, iteration:int, tbwriter:SummaryWriter):
        self._write_for_dstype(learn=learn, batch=val_batch, iteration=iteration,
                             tbwriter=tbwriter, ds_type=DatasetType.Valid)
        self._write_for_dstype(learn=learn, batch=trn_batch, iteration=iteration,
                             tbwriter=tbwriter, ds_type=DatasetType.Train)
github jantic / DeOldify / fasterai / tensorboard.py View on Github external
def write(self, learn:Learner, trn_batch:Tuple, val_batch:Tuple, iteration:int, tbwriter:SummaryWriter):
        self._write_for_dstype(learn=learn, batch=val_batch, iteration=iteration,
                             tbwriter=tbwriter, ds_type=DatasetType.Valid)
        self._write_for_dstype(learn=learn, batch=trn_batch, iteration=iteration,
                             tbwriter=tbwriter, ds_type=DatasetType.Train)
github jantic / DeOldify / fasterai / tensorboard.py View on Github external
def _update_batches_if_needed(self):
        # one_batch function is extremely slow with large datasets.  This is an optimization.
        # Note that also we want to always show the same batches so we can see changes 
        # in tensorboard
        update_batches = self.data is not self.learn.data

        if update_batches:
            self.data = self.learn.data
            self.trn_batch = self.learn.data.one_batch(
                ds_type=DatasetType.Train, detach=True, denorm=False, cpu=False)
            self.val_batch = self.learn.data.one_batch(
                ds_type=DatasetType.Valid, detach=True, denorm=False, cpu=False)
github fastai / fastai / fastai / callbacks / tensorboard.py View on Github external
def _update_batches_if_needed(self)->None:
        "one_batch function is extremely slow with large datasets.  This is caching the result as an optimization."
        if self.learn.data.valid_dl is None: return # Running learning rate finder, so return
        update_batches = self.data is not self.learn.data
        if not update_batches: return
        self.data = self.learn.data
        self.trn_batch = self._get_new_batch(ds_type=DatasetType.Train)
        self.val_batch = self._get_new_batch(ds_type=DatasetType.Valid)