Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
import networkx as nx
from node2vec import Node2Vec
# FILES
EMBEDDING_FILENAME = './embeddings.emb'
EMBEDDING_MODEL_FILENAME = './embeddings.model'
# Create a graph
graph = nx.fast_gnp_random_graph(n=100, p=0.5)
# Precompute probabilities and generate walks
node2vec = Node2Vec(graph, dimensions=64, walk_length=30, num_walks=200, workers=4)
# Embed
model = node2vec.fit(window=10, min_count=1, batch_words=4) # Any keywords acceptable by gensim.Word2Vec can be passed, `diemnsions` and `workers` are automatically passed (from the Node2Vec constructor)
# Look for most similar nodes
model.wv.most_similar('2') # Output node names are always strings
# Save embeddings for later use
model.wv.save_word2vec_format(EMBEDDING_FILENAME)
# Save model for later use
model.save(EMBEDDING_MODEL_FILENAME)
def _generate_walks(self) -> list:
"""
Generates the random walks which will be used as the skip-gram input.
:return: List of walks. Each walk is a list of nodes.
"""
flatten = lambda l: [item for sublist in l for item in sublist]
# Split num_walks for each worker
num_walks_lists = np.array_split(range(self.num_walks), self.workers)
walk_results = Parallel(n_jobs=self.workers, temp_folder=self.temp_folder, require=self.require)(
delayed(parallel_generate_walks)(self.d_graph,
self.walk_length,
len(num_walks),
idx,
self.sampling_strategy,
self.NUM_WALKS_KEY,
self.WALK_LENGTH_KEY,
self.NEIGHBORS_KEY,
self.PROBABILITIES_KEY,
self.FIRST_TRAVEL_KEY,
self.quiet) for
idx, num_walks
in enumerate(num_walks_lists, 1))
walks = flatten(walk_results)
return walks
print('Number of positive testing samples: ', len(test_pos))
print('Number of negative testing samples: ', len(test_neg))
train_graph = read_graph(
file=config.train_graph,
get_connected_graph=False,
remove_selfloops=False,
get_directed=False)
print(
'Train graph created: {} nodes, {} edges'.format(train_graph.number_of_nodes(), train_graph.number_of_edges()))
print('Number of connected components: ', nx.number_connected_components(train_graph))
if config.train:
if config.resume_training:
_ = learn_embeddings(walks=None)
else:
G = node2vec.Graph(train_graph, args.directed, args.p, args.q)
G.preprocess_transition_probs()
walks = G.simulate_walks(args.num_walks, args.walk_length)
# learn the embeddings
_ = learn_embeddings(walks)
embeddings_file = None
checkpoint_file = None
if config.evaluate:
if config.model is not 'rnn':
embeddings_file = config.embeddings_dir + config.output_file
else:
checkpoint_file = config.checkpoint_dir + config.checkpoint_name
print(checkpoint_file)
# evaluate embeddings in link prediction
_ = learn_embeddings(walks=None,
def generate_node2vec_embeddings(A, emd_size=128, negative_injection=False, train_neg=None):
if negative_injection:
row, col = train_neg
A = A.copy()
A[row, col] = 1 # inject negative train
A[col, row] = 1 # inject negative train
nx_G = nx.from_scipy_sparse_matrix(A)
G = node2vec.Graph(nx_G, is_directed=False, p=1, q=1)
G.preprocess_transition_probs()
walks = G.simulate_walks(num_walks=10, walk_length=80)
walks = [map(str, walk) for walk in walks]
model = Word2Vec(walks, size=emd_size, window=10, min_count=0, sg=1,
workers=8, iter=1)
wv = model.wv
embeddings = np.zeros([A.shape[0], emd_size], dtype='float32')
sum_embeddings = 0
empty_list = []
for i in range(A.shape[0]):
if str(i) in wv:
embeddings[i] = wv.word_vec(str(i))
sum_embeddings += embeddings[i]
else:
empty_list.append(i)
mean_embedding = sum_embeddings / (A.shape[0] - len(empty_list))
print('Parameters:\n')
print('p = %f\n' %args.p)
print('q = %f\n' %args.q)
print('num walks = %d\n' %args.num_walks)
nx_G = read_graph()
print('read graph')
#G = node2vec.Graph(nx_G, args.directed, args.p, args.q)
#print 'defined G'
G = node2vec.Graph(nx_G, args.directed, args.p, args.q, args.preprocessing)
print('defined G')
#print(G.preprocessing)
if G.preprocessing:
G.preprocess_transition_probs()
print('preprocessed')
#G.preprocess_transition_probs()
#print 'preprocessed'
G.simulate_walks(args.num_walks, args.walk_length, args.output, args.p, args.q)
print('defined walk')
print("--- %s seconds ---" % (time.time() - start_time))
def main(args):
'''
Pipeline for representational learning for all nodes in a graph.
'''
start_time = time.time()
nx_G = read_graph()
print 'read graph'
G = node2vec.Graph(nx_G, args.directed, args.p, args.q)
print 'defined G'
G.preprocess_transition_probs()
print 'preprocessed'
walks = G.simulate_walks(args.num_walks, args.walk_length)
print 'defined walk'
learn_embeddings(walks)
print 'learned embeddings'
print("--- %s seconds ---" % (time.time() - start_time))
walk_length = int(input("Enter length of each walk: "))
D = int(input("Enter dimensionality of the feature vectors: "))
W = int(input("Enter window size: "))
epoch = int(input("Enter number of iterations: "))
output = input("Enter output file: ")
P = 0.001
Q = 1
graph = node2vec.Graph(nx_graph, is_directed=nx.is_directed(nx_graph), p=P, q=Q)
graph.preprocess_transition_probs()
walks = graph.simulate_walks(num_walks=num_walks, walk_length=walk_length)
node_model1 = learn_node_features_2(walks=walks, dim=D/2, window=W, epoch=epoch)
P = 1
Q = 0.001
graph = node2vec.Graph(nx_graph, is_directed=nx.is_directed(nx_graph), p=P, q=Q)
graph.preprocess_transition_probs()
walks = graph.simulate_walks(num_walks=num_walks, walk_length=walk_length)
node_model2 = learn_node_features_2(walks=walks, dim=D/2, window=W, epoch=epoch)
save_node_features(nm1=node_model1, nm2=node_model2, nodes=nx.nodes(nx_graph), dim=D, output=output)
print(" LINE ===> Success")
def run():
P = 1
Q = 1
graph = node2vec.Graph(nx_graph, is_directed=nx.is_directed(nx_graph), p=P, q=Q)
graph.preprocess_transition_probs()
num_walks = int(input("Enter no. of walks to sample for each node: "))
walk_length = int(input("Enter length of each walk: "))
walks = graph.simulate_walks(num_walks=num_walks, walk_length=walk_length)
D = int(input("Enter dimensionality of the feature vectors: "))
W = int(input("Enter window size: "))
epoch = int(input("Enter number of iterations: "))
output = input("Enter output file: ")
learn_node_features(walks=walks, dim=D, window=W, epoch=epoch, output=output)
print(" DeepWalk ===> Success")
def main():
FLAGS = tf.app.flags.FLAGS
inputEdgeFile = FLAGS.inputEdgeFile
inputFeatureFile = FLAGS.inputFeatureFile
inputLabelFile = FLAGS.inputLabelFile
outputEmbedFile = FLAGS.outputEmbedFile
window_size = FLAGS.window_size
# Read graph
nx_G = read_graph(FLAGS, inputEdgeFile)
# Perform random walks to generate graph context
G = node2vec.Graph(nx_G, FLAGS.directed, FLAGS.p, FLAGS.q)
G.preprocess_transition_probs()
walks = G.simulate_walks(FLAGS.num_walks, FLAGS.walk_length)
# Read features
print('reading features...')
X = read_feature(inputFeatureFile)
print('generating graph context pairs...')
start_time = time.time()
all_pairs = generate_graph_context_all_pairs(walks, window_size)
end_time = time.time()
print('time consumed for constructing graph context: %.2f' % (end_time - start_time))
nodes = nx_G.nodes()
X_target = construct_traget_neighbors(nx_G, X, FLAGS, mode='WAN')
test_neg = pickle.load(open(config.test_neg, 'rb'))
# train_pos, train_neg, test_pos, test_neg = create_train_test_splits(0.5, 0.5, nx_G)
# train_neg, test_neg = create_train_test_splits(0.5, 0.5, nx_G)
print('Number of positive training samples: ', len(train_pos))
print('Number of negative training samples: ', len(train_neg))
print('Number of positive testing samples: ', len(test_pos))
print('Number of negative testing samples: ', len(test_neg))
train_graph = read_graph(
file=config.train_graph,
get_connected_graph=False,
remove_selfloops=False)
print(
'Train graph created: {} nodes, {} edges'.format(train_graph.number_of_nodes(), train_graph.number_of_edges()))
print('Number of connected components: ', nx.number_connected_components(train_graph))
G = node2vec.Graph(train_graph, args.directed, args.p, args.q)
G.preprocess_transition_probs()
walks = G.simulate_walks(args.num_walks, args.walk_length)
# walks = [['1', '2345', '3356', '4446', '5354', '6124', '7457', '8445', '9790', '1022', '1133'],
# ['6914', '1022', '9780', '8445', '7457', '6123', '5354', '4446', '3356', '2345', '1'],
# ['6914', '1022', '9790', '8445', '7457', '6123', '5354', '4446', '3356', '2345', '1'],
# ['6914', '1022', '9790', '8445', '7457', '6123', '5354', '4446', '3356', '2345', '1'],
# ['6914', '1022', '9790', '8445', '7457', '6123', '5354', '4446', '3356', '2345', '1', '9999', '5000', '2000', '1000']]
node_embeddings = learn_embeddings(walks)
# for training
train_pos_edge_embs = get_edge_embeddings(train_pos, node_embeddings)
train_neg_edge_embs = get_edge_embeddings(train_neg, node_embeddings)
train_set = np.concatenate([train_pos_edge_embs, train_neg_edge_embs])
# labels: 1-> link exists, 0-> false edge
train_labels = np.zeros(len(train_set))