How to use the dataset.Dictionary.load_from_file function in dataset

To help you get started, we’ve selected a few dataset examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github jnhwkim / ban-vqa / dataset.py View on Github external
vals = normalize(inds, vals)

    tfidf = torch.sparse.FloatTensor(torch.LongTensor(inds), torch.FloatTensor(vals))
    tfidf = tfidf.coalesce()

    # Latent word embeddings
    emb_dim = 300
    glove_file = 'data/glove/glove.6B.%dd.txt' % emb_dim
    weights, word2emb = utils.create_glove_embedding_init(dictionary.idx2word[N:], glove_file)
    print('tf-idf stochastic matrix (%d x %d) is generated.' % (tfidf.size(0), tfidf.size(1)))

    return tfidf, weights


if __name__=='__main__':
    dictionary = Dictionary.load_from_file('data/dictionary.pkl')
    tfidf, weights = tfidf_from_questions(['train', 'val', 'test2015'], dictionary)

if __name__=='__main2__':
    from torch.utils.data import DataLoader

    dictionary = Dictionary.load_from_file('data/dictionary.pkl')
    train_dset = VQAFeatureDataset('val', dictionary, adaptive=True)
    # name = 'train'
    # eval_dset = VQAFeatureDataset(name, dictionary)
    # vg_dset = VisualGenomeFeatureDataset(name, eval_dset.features, eval_dset.spatials, dictionary)

    # train_loader = DataLoader(vg_dset, 10, shuffle=True, num_workers=1)

    loader = DataLoader(train_dset, 10, shuffle=True, num_workers=1, collate_fn=utils.trim_collate)
    for i, (v, b, q, a) in enumerate(loader):
        print(v.size())
github jnhwkim / ban-vqa / tools / create_dictionary.py View on Github external
if word not in word2emb:
            continue
        weights[idx] = word2emb[word]
    return weights, word2emb


if __name__ == '__main__':
    args = parse_args()
    dataroot = 'data' if args.task == 'vqa' else 'data/flickr30k'

    dictionary_path = os.path.join(dataroot, 'dictionary.pkl')

    d = create_dictionary(dataroot, args.task)
    d.dump_to_file(dictionary_path)

    d = Dictionary.load_from_file(dictionary_path)
    emb_dim = 300
    glove_file = 'data/glove/glove.6B.%dd.txt' % emb_dim
    weights, word2emb = create_glove_embedding_init(d.idx2word, glove_file)
    np.save(os.path.join(dataroot, 'glove6b_init_%dd.npy' % emb_dim), weights)
github linjieli222 / VQA_ReGAT / tools / create_dictionary.py View on Github external
vals = entry.split(' ')
        word = vals[0]
        vals = list(map(float, vals[1:]))
        word2emb[word] = np.array(vals)
    for idx, word in enumerate(idx2word):
        if word not in word2emb:
            continue
        weights[idx] = word2emb[word]
    return weights, word2emb


if __name__ == '__main__':
    d = create_dictionary('data')
    d.dump_to_file('data/glove/dictionary.pkl')

    d = Dictionary.load_from_file('data/dictionary.pkl')
    emb_dim = 300
    glove_file = 'data/glove/glove.6B.%dd.txt' % emb_dim
    weights, word2emb = create_glove_embedding_init(d.idx2word, glove_file)
    np.save('/data/glove/glove6b_init_%dd.npy' % emb_dim, weights)
github jnhwkim / ban-vqa / main.py View on Github external
torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    torch.backends.cudnn.benchmark = True

    if args.task == 'vqa':
        from train import train
        dict_path = 'data/dictionary.pkl'
        dictionary = Dictionary.load_from_file(dict_path)
        train_dset = VQAFeatureDataset('train', dictionary, adaptive=True)
        val_dset = VQAFeatureDataset('val', dictionary, adaptive=True)
        w_emb_path = 'data/glove6b_init_300d.npy'

    elif args.task == 'flickr':
        from train_flickr import train
        dict_path = 'data/flickr30k/dictionary.pkl'
        dictionary = Dictionary.load_from_file(dict_path)
        train_dset = Flickr30kFeatureDataset('train', dictionary)
        val_dset = Flickr30kFeatureDataset('val', dictionary)
        w_emb_path = 'data/flickr30k/glove6b_init_300d.npy'
        args.op = ''
        args.gamma = 1
        args.tfidf = False

    utils.create_dir(args.output)
    logger = utils.Logger(os.path.join(args.output, 'args.txt'))
    logger.write(args.__repr__())

    batch_size = args.batch_size

    constructor = 'build_%s' % args.model
    model = getattr(base_model, constructor)(train_dset, args.num_hid, args.op, args.gamma, args.task).cuda()
github SinghJasdeep / Attention-on-Attention-for-VQA / main.py View on Github external
seed = random.randint(1, 10000)
        random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed(args.seed)
    else:
        seed = args.seed
        torch.manual_seed(args.seed)
        torch.cuda.manual_seed(args.seed)
    torch.backends.cudnn.benchmark = True

    output = args.output + args.model + '_' + str(args.num_hid) + '_' + args.activation + '_' + args.optimizer +\
            '_D' + str(args.dropout) + '_DL' + str(args.dropout_L) + '_DG' + str(args.dropout_G) + '_DW' + str(args.dropout_W) \
            + '_DC' + str(args.dropout_C) + '_w' + str(args.weight_decay) + '_SD' + str(seed) \
            + '_initializer_' + args.initializer

    dictionary = Dictionary.load_from_file('data/dictionary.pkl')
    train_dset = VQAFeatureDataset('train', dictionary)

    if args.model == 'baseline':
        model = build_baseline(train_dset, num_hid=args.num_hid, dropout= args.dropout, norm=args.norm,\
                               activation=args.activation, dropL=args.dropout_L, dropG=args.dropout_G,\
                               dropW=args.dropout_W, dropC=args.dropout_C)
    elif args.model == 'A1':
        model = build_model_A1(train_dset, num_hid=args.num_hid, dropout= args.dropout, norm=args.norm,\
                               activation=args.activation, dropL=args.dropout_L, dropG=args.dropout_G,\
                               dropW=args.dropout_W, dropC=args.dropout_C)
    elif args.model == 'A2':
        model = build_model_A2(train_dset, num_hid=args.num_hid, dropout= args.dropout, norm=args.norm,\
                               activation=args.activation, dropL=args.dropout_L, dropG=args.dropout_G,\
                               dropW=args.dropout_W, dropC=args.dropout_C)
    elif args.model == 'A3':
        model = build_model_A3(train_dset, num_hid=args.num_hid, dropout= args.dropout, norm=args.norm,\
github linjieli222 / VQA_ReGAT / main.py View on Github external
if args.seed != -1:
        print("Predefined randam seed %d" % args.seed)
    else:
        # fix seed
        args.seed = random.randint(1, 10000)
        print("Choose random seed %d" % args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    if "ban" == args.fusion:
        fusion_methods = args.fusion+"_"+str(args.ban_gamma)
    else:
        fusion_methods = args.fusion

    dictionary = Dictionary.load_from_file(
                    join(args.data_folder, 'glove/dictionary.pkl'))
    if args.dataset == "vqa_cp":
        coco_train_features = Image_Feature_Loader(
                            'train', args.relation_type,
                            adaptive=args.adaptive, dataroot=args.data_folder)
        coco_val_features = Image_Feature_Loader(
                            'val', args.relation_type,
                            adaptive=args.adaptive, dataroot=args.data_folder)
        val_dset = VQA_cp_Dataset(
                    'test', dictionary, coco_train_features, coco_val_features,
                    adaptive=args.adaptive, pos_emb_dim=args.imp_pos_emb_dim,
                    dataroot=args.data_folder)
        train_dset = VQA_cp_Dataset(
                    'train', dictionary, coco_train_features,
                    coco_val_features, adaptive=args.adaptive,
                    pos_emb_dim=args.imp_pos_emb_dim,
github hengyuan-hu / bottom-up-attention-vqa / main.py View on Github external
parser.add_argument('--model', type=str, default='baseline0_newatt')
    parser.add_argument('--output', type=str, default='saved_models/exp0')
    parser.add_argument('--batch_size', type=int, default=512)
    parser.add_argument('--seed', type=int, default=1111, help='random seed')
    args = parser.parse_args()
    return args


if __name__ == '__main__':
    args = parse_args()

    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    torch.backends.cudnn.benchmark = True

    dictionary = Dictionary.load_from_file('data/dictionary.pkl')
    train_dset = VQAFeatureDataset('train', dictionary)
    eval_dset = VQAFeatureDataset('val', dictionary)
    batch_size = args.batch_size

    constructor = 'build_%s' % args.model
    model = getattr(base_model, constructor)(train_dset, args.num_hid).cuda()
    model.w_emb.init_embedding('data/glove6b_init_300d.npy')

    model = nn.DataParallel(model).cuda()

    train_loader = DataLoader(train_dset, batch_size, shuffle=True, num_workers=1)
    eval_loader =  DataLoader(eval_dset, batch_size, shuffle=True, num_workers=1)
    train(model, train_loader, eval_loader, args.epochs, args.output)