How to use the dgl.contrib.sampling function in dgl

To help you get started, we’ve selected a few dgl examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github dmlc / dgl / tests / compute / test_dis_sampler.py View on Github external
def start_sampler():
    g = generate_rand_graph(100)
    namebook = { 0:'127.0.0.1:50051' }
    sender = dgl.contrib.sampling.SamplerSender(namebook)
    for i, subg in enumerate(dgl.contrib.sampling.NeighborSampler(
            g, 1, 100, neighbor_type='in', num_workers=4)):
        sender.send(subg, 0)
    sender.signal(0)
github dmlc / dgl / tests / compute / test_randomwalk.py View on Github external
def test_random_walk_with_restart():
    edge_list = [(0, 1), (1, 2), (2, 3), (3, 4),
                 (4, 3), (3, 2), (2, 1), (1, 0)]
    seeds = [0, 1]
    max_nodes = 10

    g = dgl.DGLGraph(edge_list)

    # test normal RWR
    traces = dgl.contrib.sampling.random_walk_with_restart(g, seeds, 0.2, max_nodes)
    assert len(traces) == len(seeds)
    for traces_per_seed in traces:
        total_nodes = 0
        for t in traces_per_seed:
            total_nodes += len(t)
            trace_diff = np.diff(F.zerocopy_to_numpy(t), axis=-1)
            assert (np.abs(trace_diff) == 1).all()
        assert total_nodes >= max_nodes

    # test RWR with early stopping
    traces = dgl.contrib.sampling.random_walk_with_restart(
            g, seeds, 1, 100, max_nodes, 1)
    assert len(traces) == len(seeds)
    for traces_per_seed in traces:
        assert sum(len(t) for t in traces_per_seed) < 100
github dmlc / dgl / tests / compute / test_randomwalk.py View on Github external
def test_metapath_random_walk():
    g1 = dgl.bipartite(([0, 1, 2, 3], [0, 1, 2, 3]), 'a', 'ab', 'b')
    g2 = dgl.bipartite(([0, 0, 1, 1, 2, 2, 3, 3], [1, 3, 2, 0, 3, 1, 0, 2]), 'b', 'ba', 'a')
    G = dgl.hetero_from_relations([g1, g2])
    seeds = [0, 1]
    traces = dgl.contrib.sampling.metapath_random_walk(G, ['ab', 'ba'] * 4, seeds, 3)
    for seed, traces_per_seed in zip(seeds, traces):
        assert len(traces_per_seed) == 3
        for trace in traces_per_seed:
            assert len(trace) == 8
            trace = np.insert(F.asnumpy(trace), 0, seed)
            for i in range(4):
                assert g1.has_edge_between(trace[2 * i], trace[2 * i + 1])
                assert g2.has_edge_between(trace[2 * i + 1], trace[2 * i + 2])
github dmlc / dgl / tutorials / models / 1_gnn / 8_sse_mx.py View on Github external
def train_on_subgraphs(g, label_nodes, batch_size,
                       steady_state_operator, predictor, trainer):
    # To train SSE, we create two subgraph samplers with the
    # `NeighborSampler` API for each phase.
 
    # The first phase samples from all vertices in the graph.
    sampler = dgl.contrib.sampling.NeighborSampler(
            g, batch_size, g.number_of_nodes(), num_hops=1)
    sampler_iter = iter(sampler)
 
    # The second phase only samples from labeled vertices.
    sampler_train = dgl.contrib.sampling.NeighborSampler(
            g, batch_size, g.number_of_nodes(), seed_nodes=label_nodes, num_hops=1)
    sampler_train_iter = iter(sampler_train)

    for i in range(n_embedding_updates):
        subg = next(sampler_iter)
        # Currently, subgraphing does not copy or share features
        # automatically.  Therefore, we need to copy the node
        # embeddings of the subgraph from the parent graph with
        # `copy_from_parent()` before computing...
        subg.copy_from_parent()
        update_embeddings_subgraph(subg, steady_state_operator)
        # ... and copy them back to the parent graph.
        g.ndata['h'][subg.layer_parent_nid(-1)] = subg.layers[-1].data['h']
    for i in range(n_parameter_updates):
        try:
            subg = next(sampler_train_iter)
github dmlc / dgl / examples / mxnet / sampling / dis_sampling / dis_gcn_cv_sc.py View on Github external
n_classes,
                           n_layers,
                           mx.nd.relu,
                           prefix='GCN')

    infer_model.initialize(ctx=ctx)

    # use optimizer
    print(model.collect_params())
    kv_type = 'dist_sync' if distributed else 'local'
    trainer = gluon.Trainer(model.collect_params(), 'adam',
                            {'learning_rate': args.lr, 'wd': args.weight_decay},
                            kvstore=mx.kv.create(kv_type))

    # Create sampler receiver
    sampler = dgl.contrib.sampling.SamplerReceiver(graph=g, addr=args.ip, num_sender=args.num_sampler)

    # initialize graph
    dur = []
    adj = g.adjacency_matrix().as_in_context(g_ctx)
    for epoch in range(args.n_epochs):
        start = time.time()
        if distributed:
            msg_head = "Worker {:d}, epoch {:d}".format(g.worker_id, epoch)
        else:
            msg_head = "epoch {:d}".format(epoch)
        for nf in sampler:
            for i in range(n_layers):
                agg_history_str = 'agg_h_{}'.format(i)
                dests = nf.layer_parent_nid(i+1).as_in_context(g_ctx)
                # TODO we could use DGLGraph.pull to implement this, but the current
                # implementation of pull is very slow. Let's manually do it for now.
github dmlc / dgl / examples / mxnet / sse / sse_batch.py View on Github external
elif i % args.gpu == 0:
                trainer.step(len(seeds) * len(losses))
                for loss in losses:
                    train_loss += loss.asnumpy()[0]
                losses = []

            if i % args.num_parallel_subgraphs == 0:
                end1 = time.time()
                print("process " + str(args.num_parallel_subgraphs)
                        + " subgraphs takes " + str(end1 - start1))
                start1 = end1

        if args.cache_subgraph:
            sampler.restart()
        else:
            sampler = dgl.contrib.sampling.NeighborSampler(g, args.batch_size, neigh_expand,
                                                           neighbor_type='in',
                                                           num_workers=args.num_parallel_subgraphs,
                                                           seed_nodes=train_vs, shuffle=True)

        # test set accuracy
        logits = model_infer(g, eval_vs)
        y_bar = mx.nd.argmax(logits, axis=1)
        y = eval_labels
        accuracy = mx.nd.sum(y_bar == y) / len(y)
        accuracy = accuracy.asnumpy()[0]

        # update the inference model.
        infer_params = model_infer.collect_params()
        for key in infer_params:
            idx = trainer._param2idx[key]
            trainer._kvstore.pull(idx, out=infer_params[key].data())
github dmlc / dgl / examples / mxnet / sampling / dis_sampling / dis_graphsage_cv.py View on Github external
nf.copy_to_parent(node_embed_names=node_embed_names)
        mx.nd.waitall()
        print(msg_head + ': training takes ' + str(time.time() - start))

        infer_params = infer_model.collect_params()

        for key in infer_params:
            idx = trainer._param2idx[key]
            trainer._kvstore.pull(idx, out=infer_params[key].data())

        num_acc = 0.
        num_tests = 0

        if not distributed or g.worker_id == 0:
            for nf in dgl.contrib.sampling.NeighborSampler(g, args.test_batch_size,
                                                           g.number_of_nodes(),
                                                           neighbor_type='in',
                                                           num_hops=n_layers,
                                                           seed_nodes=test_nid,
                                                           add_self_loop=True):
                node_embed_names = [['preprocess', 'features']]
                for i in range(n_layers):
                    node_embed_names.append(['norm', 'subg_norm'])
                nf.copy_from_parent(node_embed_names=node_embed_names, ctx=ctx)

                pred = infer_model(nf)
                batch_nids = nf.layer_parent_nid(-1)
                batch_labels = g.nodes[batch_nids].data['labels'].as_in_context(ctx)
                num_acc += (pred.argmax(axis=1) == batch_labels).sum().asscalar()
                num_tests += nf.layer_size(-1)
                if distributed:
github dmlc / dgl / examples / mxnet / sampling / dis_sampling / dis_gcn_cv_sc.py View on Github external
nf.copy_to_parent(node_embed_names=node_embed_names)
        mx.nd.waitall()
        print(msg_head + ': training takes ' + str(time.time() - start))

        infer_params = infer_model.collect_params()

        for key in infer_params:
            idx = trainer._param2idx[key]
            trainer._kvstore.pull(idx, out=infer_params[key].data())

        num_acc = 0.
        num_tests = 0

        if not distributed or g.worker_id == 0:
            for nf in dgl.contrib.sampling.NeighborSampler(g, args.test_batch_size,
                                                           g.number_of_nodes(),
                                                           neighbor_type='in',
                                                           num_hops=n_layers,
                                                           seed_nodes=test_nid):
                node_embed_names = [['preprocess']]
                for i in range(n_layers):
                    node_embed_names.append(['norm'])

                nf.copy_from_parent(node_embed_names=node_embed_names, ctx=ctx)
                pred = infer_model(nf)
                batch_nids = nf.layer_parent_nid(-1)
                batch_labels = g.nodes[batch_nids].data['labels'].as_in_context(ctx)
                num_acc += (pred.argmax(axis=1) == batch_labels).sum().asscalar()
                num_tests += nf.layer_size(-1)
                if distributed:
                    g._sync_barrier()
github dmlc / dgl / examples / pytorch / sampling / gcn_cv_sc.py View on Github external
optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            node_embed_names = [['h_{}'.format(i)] for i in range(n_layers)]
            node_embed_names.append([])
            nf.copy_to_parent(node_embed_names=node_embed_names)


        for infer_param, param in zip(infer_model.parameters(), model.parameters()):
            infer_param.data.copy_(param.data)


        num_acc = 0.

        for nf in dgl.contrib.sampling.NeighborSampler(g, args.test_batch_size,
                                                       g.number_of_nodes(),
                                                       neighbor_type='in',
                                                       num_workers=32,
                                                       num_hops=n_layers,
                                                       seed_nodes=test_nid):
            node_embed_names = [['preprocess']]
            for i in range(n_layers):
                node_embed_names.append(['norm'])
            nf.copy_from_parent(node_embed_names=node_embed_names)

            infer_model.eval()
            with torch.no_grad():
                pred = infer_model(nf)
                batch_nids = nf.layer_parent_nid(-1).to(device=pred.device).long()
                batch_labels = labels[batch_nids]
                num_acc += (pred.argmax(dim=1) == batch_labels).sum().cpu().item()
github dmlc / dgl / examples / mxnet / sampling / gcn_cv_sc.py View on Github external
nf.copy_to_parent(node_embed_names=node_embed_names)
        mx.nd.waitall()
        print(msg_head + ': training takes ' + str(time.time() - start))

        infer_params = infer_model.collect_params()

        for key in infer_params:
            idx = trainer._param2idx[key]
            trainer._kvstore.pull(idx, out=infer_params[key].data())

        num_acc = 0.
        num_tests = 0

        if not distributed or g.worker_id == 0:
            for nf in dgl.contrib.sampling.NeighborSampler(g, args.test_batch_size,
                                                           g.number_of_nodes(),
                                                           neighbor_type='in',
                                                           num_hops=n_layers,
                                                           seed_nodes=test_nid):
                node_embed_names = [['preprocess']]
                for i in range(n_layers):
                    node_embed_names.append(['norm'])

                nf.copy_from_parent(node_embed_names=node_embed_names, ctx=ctx)
                pred = infer_model(nf)
                batch_nids = nf.layer_parent_nid(-1)
                batch_labels = g.nodes[batch_nids].data['labels'].as_in_context(ctx)
                num_acc += (pred.argmax(axis=1) == batch_labels).sum().asscalar()
                num_tests += nf.layer_size(-1)
                if distributed:
                    g._sync_barrier()