How to use the dataset.load_data function in dataset

To help you get started, we’ve selected a few dataset examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github THUDM / KOBE / core / api.py View on Github external
def __init__(self, config, **opt):
        # Load config used for training and merge with testing options
        self.config = yaml.load(open(config, "r"))
        self.config = Namespace(**{**self.config, **opt})

        # Load training data.pkl for src and tgt vocabs
        self.data = load_data(self.config)

        # Load trained model checkpoints
        device, devices_ids = misc_utils.set_cuda(self.config)
        self.model, _ = build_model(None, self.config, device)
        self.model.eval()
github microsoft / MSMARCO-Question-Answering / Baseline / scripts / predict.py View on Github external
def reload_state(checkpoint, config, args):
    """
    Reload state before predicting.
    """
    print('Loading Model...')
    model, id_to_token, id_to_char = BidafModel.from_checkpoint(
        config['bidaf'], checkpoint)

    token_to_id = {tok: id_ for id_, tok in id_to_token.items()}
    char_to_id = {char: id_ for id_, char in id_to_char.items()}

    len_tok_voc = len(token_to_id)
    len_char_voc = len(char_to_id)

    with open(args.data) as f_o:
        data, _ = load_data(json.load(f_o), span_only=True, answered_only=True)
    data = tokenize_data(data, token_to_id, char_to_id)

    id_to_token = {id_: tok for tok, id_ in token_to_id.items()}
    id_to_char = {id_: char for char, id_ in char_to_id.items()}

    data = get_loader(data, args)

    if len_tok_voc != len(token_to_id):
        need = set(tok for id_, tok in id_to_token.items()
                   if id_ >= len_tok_voc)

        if args.word_rep:
            with open(args.word_rep) as f_o:
                pre_trained = SymbolEmbSourceText(
                    f_o, need)
        else:
github MathsXDC / DaSiamRPNWithOfflineTraining / code / train.py View on Github external
if __name__ == '__main__':
    torch.backends.cudnn.enabled=False # to temporally remove the issue "illegal access to memory"

    args = parse_args()
    gpu_id = args.gpu_id
    if gpu_id is None:
        DEVICE = torch.device(f'cpu')
    else:
        DEVICE = torch.device(f'cuda:{gpu_id}')

    z_size = (127, 127)
    x_size = (255, 255)
    batch_size = num_domains = 50
    num_epoches = 100

    loader = dataset.load_data(batch_size, z_size, x_size)['train']

    net = SiamRPNBIG()
    net.train().to(DEVICE)
    # load_pretrained_weights(net, "./SiamRPNBIG.model")
    optimizer = torch.optim.Adam(net.parameters(), weight_decay=0.001, lr=0.001)

    for i_ep in range(num_epoches):
        for i_iter, sample in tqdm(enumerate(loader), total=len(loader)):
            zs = sample['template'].to(DEVICE)
            xs = sample['search_region'].to(DEVICE)
            gt_boxes = sample['gt_box'] #.to(DEVICE)

            optimizer.zero_grad()

            net.template(zs)
            reg_output, cls_output, _ = net.forward(xs) # of shape (50, 4*5, 17, 17), (50, 2*5, 17, 17)
github dmlc / dgl / examples / pytorch / gcn.py View on Github external
def main(args):
    # load and preprocess dataset
    adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data(args.dataset)
    features = preprocess_features(features)

    # initialize graph
    g = DGLGraph(adj)

    # create GCN model
    model = GCN(features.shape[1],
                args.num_hidden,
                y_train.shape[1],
                args.num_layers,
                F.relu,
                args.dropout)

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
github spacemanidol / MSMARCO / Q+A / Baseline / scripts / predict.py View on Github external
def reload_state(checkpoint, config, args):
    """
    Reload state before predicting.
    """
    print('Loading Model...')
    model, id_to_token, id_to_char = BidafModel.from_checkpoint(
        config['bidaf'], checkpoint)

    token_to_id = {tok: id_ for id_, tok in id_to_token.items()}
    char_to_id = {char: id_ for id_, char in id_to_char.items()}

    len_tok_voc = len(token_to_id)
    len_char_voc = len(char_to_id)

    with open(args.data) as f_o:
        data, _ = load_data(json.load(f_o), span_only=True, answered_only=True)
    data = tokenize_data(data, token_to_id, char_to_id)

    id_to_token = {id_: tok for tok, id_ in token_to_id.items()}
    id_to_char = {id_: char for char, id_ in char_to_id.items()}

    data = get_loader(data, args)

    if len_tok_voc != len(token_to_id):
        need = set(tok for id_, tok in id_to_token.items()
                   if id_ >= len_tok_voc)

        if args.word_rep:
            with open(args.word_rep) as f_o:
                pre_trained = SymbolEmbSourceText(
                    f_o, need)
        else:
github dmlc / dgl / examples / pytorch / geniepath.py View on Github external
def main(args):
    # dropout parameters
    input_dropout = args.idrop
    attention_dropout = args.adrop

    # load and preprocess dataset
    adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data(args.dataset)
    features = preprocess_features(features)

    # initialize graph
    g = DGLGraph(adj)

    # create model
    model = GeniePath(args.num_layers,
                features.shape[1],
                args.num_hidden,
                y_train.shape[1],
                args.num_heads,
                F.elu,
                input_dropout,
                attention_dropout,
                args.residual)
    model.train(g, features, y_train, epochs=args.epochs)
github milkpku / BetaElephant / policy_experiment / policy.add-all / trainer.py View on Github external
def train(args):

    device = args.device
    load_path = args.load_path
    # load data
    train_data = load_data('train')
    val_data = load_data('validation')

    # load model
    with tf.device('/gpu:%d' % device):
        model = get_model('train')

    # trainer init
    optimizer = Config.optimizer
    train_step = optimizer.minimize(model.loss)

    # init session and server
    sess = tf.InteractiveSession()
    saver = tf.train.Saver()
    if load_path==None:
        sess.run(tf.initialize_all_variables())
    else:
        saver.restore(sess, load_path)
github PetraVidnerova / GAKeras / fitness.py View on Github external
def __init__(self, train_name):
        
        # load train data 
        self.X, self.y = load_data(train_name)
github dmlc / dgl / examples / pytorch / gat.py View on Github external
def main(args):
    # dropout parameters
    input_dropout = 0.2
    attention_dropout = 0.2

    # load and preprocess dataset
    adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data(args.dataset)
    features = preprocess_features(features)

    # initialize graph
    g = DGLGraph(adj)

    # create model
    model = GAT(args.num_layers,
                features.shape[1],
                args.num_hidden,
                y_train.shape[1],
                args.num_heads,
                F.elu,
                input_dropout,
                attention_dropout,
                args.residual)