Source code for gemben.evaluation.evaluate_link_prediction

try: import cPickle as pickle
except: import pickle
from gemben.evaluation import metrics
from gemben.utils import evaluation_util, graph_util
import numpy as np
import networkx as nx
import pdb
from time import time

import sys
# sys.path.insert(0, './')
from gemben.utils import embed_util


[docs]def evaluateStaticLinkPrediction(train_digraph, test_digraph, graph_embedding, X, node_l=None, sample_ratio_e=None, is_undirected=True, store_predictions=1): """This function evaluates the static link prediction accuracy of the embedding algorithms. Args: train_digraph (Object): directed networkx graph object used for training the algorithm. test_digraph (Object): directed networkx graph object to be used for testing the algorithm. graph_embedding (object): Object of the embedding algorithm class defined in gemben/embedding. X (Vector): Embedding of the the nodes of the graph. node_l (Int): Number of nodes in the graph. sample_ratio_e (Float): The ratio used to sample the original graph for evaluation purpose. is_undirected (bool): Boolean flag to denote whether the graph is directed or not. store_prediction (Int): Stores the predicted values. Returns: Numpy Array: Consiting of Mean average precision and the precision curve values. """ node_num = train_digraph.number_of_nodes() # evaluation if sample_ratio_e: eval_edge_pairs = evaluation_util.getRandomEdgePairs( node_num, sample_ratio_e, is_undirected ) else: eval_edge_pairs = None if X is None: # If not an embedding approach, store the new subgraph graph_embedding.learn_embedding(train_digraph) estimated_adj = graph_embedding.get_reconstructed_adj(X, node_l) predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx( estimated_adj, is_undirected=is_undirected, edge_pairs=eval_edge_pairs ) filtered_edge_list = [e for e in predicted_edge_list if not train_digraph.has_edge(e[0], e[1])] if 'partition' in train_digraph.node[0]: filtered_edge_list = [e for e in predicted_edge_list if train_digraph.node[e[0]]['partition'] != train_digraph.node[e[1]]['partition']] pickle.dump(filtered_edge_list, open('gemben/nodeListMap/preds.pickle', 'wb')) pickle.dump(test_digraph, open('gemben/nodeListMap/test_graph.pickle', 'wb')) t1 = time() MAP = metrics.computeMAP(filtered_edge_list, test_digraph) t2 = time() prec_curv, _ = metrics.computePrecisionCurve( filtered_edge_list, test_digraph ) t3 = time() print('MAP computation time: %f sec, prec: %f sec' % (t2 - t1, t3 - t2)) return (MAP, prec_curv)
[docs]def expLPT(digraph, graph_embedding, res_pre, m_summ, K=100000, is_undirected=True): """This function is used to experiment graph reconstruction for temporally varying graphs. Args: digraph (Object): directed networkx graph object. graph_embedding (object): Object of the embedding algorithm class defined in gemben/embedding. res_pre (Str): Prefix to be used to save the result. m_summ (Str): String to denote the name of the summary file. K (Int): The maximum value to be use to get the precision curves. is_undirected (bool): Boolean flag to denote whether the graph is directed or not. """ print('\tLink Prediction Temporal') t1 = time() # learn graph embedding on whole graph X, _ = graph_embedding.learn_embedding(graph=digraph) t2 = time() print('\t\tTime taken to learn the embedding: %f sec' % (t2 - t1)) estimated_adj = graph_embedding.get_reconstructed_adj(X) predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx( estimated_adj, is_undirected=is_undirected ) filtered_edge_list = [e for e in predicted_edge_list if not digraph.has_edge(e[0], e[1])] if 'partition' in digraph.node[0]: filtered_edge_list = [e for e in predicted_edge_list if digraph.node[e[0]]['partition'] != digraph.node[e[1]]['partition']] sorted_edges = sorted(predicted_edge_list, key=lambda x: x[2], reverse=True) print('\t\tPredicted edge list computed in %f sec. Saving edge list.' % (time() - t2)) pickle.dump( sorted_edges[:K], open('%s_%s_predEdgeList.pickle' % (res_pre, m_summ), 'wb') ) print('\t\tSaved edge list.')
[docs]def expLP(digraph, graph_embedding, n_sample_nodes_l, rounds, res_pre, m_summ, train_ratio=0.8, no_python=True, K=32768, is_undirected=True, sampling_scheme="u_rand"): """This function is used to experiment link prediction. Args: digraph (Object): directed networkx graph object. graph_embedding (object): Object of the embedding algorithm class defined in gemben/embedding. n_sampled_node_l (Int): Number of nodes in the graph. rounds (Int): The number of times the graph reconstruction is performed. res_pre (Str): Prefix to be used to save the result. train_ratio (Float): The split used for dividing the traing and testing data. no_python (Bool): Flag to denote if python is used. m_summ (Str): String to denote the name of the summary file. K (Int): The maximum value to be use to get the precision curves. sampling_scheme (Str): Sampling schme used to sample nodes to be reconstructed. is_undirected (bool): Boolean flag to denote whether the graph is directed or not. Returns: Numpy Array: Consisting of Mean average precision. """ print('\tLink Prediction') MAP = {} prec_curv = {} n_sample_nodes_l = [min(int(n), digraph.number_of_nodes()) for n in n_sample_nodes_l] # Randomly hide (1-train_ratio)*100% of links node_num = digraph.number_of_nodes() train_digraph, test_digraph = evaluation_util.splitDiGraphToTrainTest( digraph, train_ratio=train_ratio, is_undirected=is_undirected ) # Ensure the resulting train subgraph is connected if not nx.is_connected(train_digraph.to_undirected()): train_digraph = max( nx.weakly_connected_component_subgraphs(train_digraph), key=len ) tdl_nodes = train_digraph.nodes() nodeListMap = dict(zip(tdl_nodes, range(len(tdl_nodes)))) train_digraph = nx.relabel_nodes(train_digraph, nodeListMap, copy=True) test_digraph = test_digraph.subgraph(tdl_nodes) ### unfroze the graph test_digraph = nx.Graph(test_digraph) ####nx.relabel_nodes(test_digraph, nodeListMap, copy=False) test_digraph = nx.relabel_nodes(test_digraph, nodeListMap, copy=True) pickle.dump(nodeListMap, open('gemben/nodeListMap/lp_lcc.pickle', 'wb')) t1 = time() # learn graph embedding on train subgraph print( 'Link Prediction train graph n_nodes: %d, n_edges: %d' % ( train_digraph.number_of_nodes(), train_digraph.number_of_edges()) ) X, _ = graph_embedding.learn_embedding( graph=train_digraph, no_python=no_python ) if X is not None and X.shape[0] != train_digraph.number_of_nodes(): pdb.set_trace() print('Time taken to learn the embedding: %f sec' % (time() - t1)) # sample test graph for evaluation and store results node_l = None if not n_sample_nodes_l: n_sample_nodes_l = [node_num] summ_file = open('%s_%s_%s.lpsumm' % (res_pre, m_summ, sampling_scheme), 'w') summ_file.write('Method\t%s\n' % metrics.getMetricsHeader()) for n_s in n_sample_nodes_l: n_s = int(n_s) n_s = min(n_s, train_digraph.number_of_nodes()) MAP[n_s] = [None] * rounds prec_curv[n_s] = [None] * rounds for round_id in range(rounds): if sampling_scheme == "u_rand": train_digraph_s, node_l = graph_util.sample_graph( train_digraph, n_s ) else: train_digraph_s, node_l = graph_util.sample_graph_rw( train_digraph, n_s ) if X is not None: X_sub = X[node_l] else: X_sub = None test_digraph_s = test_digraph.subgraph(node_l) nodeListMap = dict(zip(node_l, range(len(node_l)))) pickle.dump(nodeListMap, open('gemben/nodeListMap/lp_lcc_samp.pickle', 'wb')) test_digraph_s = nx.relabel_nodes(test_digraph_s, nodeListMap, copy=True) MAP[n_s][round_id], prec_curv[n_s][round_id] = \ evaluateStaticLinkPrediction(train_digraph_s, test_digraph_s, graph_embedding, X_sub, node_l=node_l, is_undirected=is_undirected) prec_curv[n_s][round_id] = prec_curv[n_s][round_id][:K] summ_file.write('\tn_s:%d, %f/%f\t%s\n' % ( n_s, np.mean(MAP[n_s]), np.std(MAP[n_s]), metrics.getPrecisionReport( prec_curv[n_s][0], len(prec_curv[n_s][0]) ) )) summ_file.close() #if len(prec_curv[-1][0]) < 100: #pdb.set_trace() pickle.dump([MAP, prec_curv, n_sample_nodes_l], open('%s_%s_%s_%s.lp' % (res_pre, m_summ, sampling_scheme, str(train_ratio)), 'wb')) print('Link prediction evaluation complete. Time: %f sec' % (time() - t1)) # prec_curv2 = [p[4096] for p in prec_curv[prec_curv.keys()[0]]] return MAP[list(MAP.keys())[0]] # prec_curv2