Source code for gemben.evaluation.evaluate_graph_reconstruction

try: import cPickle as pickle
except: import pickle
from gemben.evaluation import metrics
from gemben.utils import evaluation_util, graph_util
import networkx as nx
import numpy as np


[docs]def evaluateStaticGraphReconstruction(digraph, graph_embedding, X_stat, node_l=None, file_suffix=None, sample_ratio_e=None, is_undirected=True, is_weighted=False): """This function evaluates the graph reconstruction accuracy of the embedding algorithms. Args: digraph (Object): directed networkx graph object. graph_embedding (object): Object of the embedding algorithm class defined in gemben/embedding. X_stat (Vector): Embedding of the the nodes of the graph. node_l (Int): Number of nodes in the graph. file_suffix (Str): The name of the algorithm and dataset used to save the embedding. sample_ratio_e (Float): The ratio used to sample the original graph for evaluation purpose. is_undirected (bool): Boolean flag to denote whether the graph is directed or not. is_weighted (bool): Boolean flag to denote whether the edges of the graph is weighted. Returns: Numpy Array: Consiting of Mean average precision precision curve, errors and error baselines. """ node_num = digraph.number_of_nodes() # evaluation if sample_ratio_e: eval_edge_pairs = evaluation_util.getRandomEdgePairs( node_num, sample_ratio_e, is_undirected ) else: eval_edge_pairs = None if file_suffix is None: estimated_adj = graph_embedding.get_reconstructed_adj(X_stat, node_l) else: estimated_adj = graph_embedding.get_reconstructed_adj( X_stat, file_suffix, node_l ) predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx( estimated_adj, is_undirected=is_undirected, edge_pairs=eval_edge_pairs ) if 'partition' in digraph.node[0]: predicted_edge_list = [e for e in predicted_edge_list if digraph.node[e[0]]['partition'] != digraph.node[e[1]]['partition']] MAP = metrics.computeMAP(predicted_edge_list, digraph) prec_curv, _ = metrics.computePrecisionCurve(predicted_edge_list, digraph) # If weighted, compute the error in reconstructed weights of observed edges if is_weighted: digraph_adj = nx.to_numpy_matrix(digraph) estimated_adj[digraph_adj == 0] = 0 err = np.linalg.norm(digraph_adj - estimated_adj) err_baseline = np.linalg.norm(digraph_adj) else: err = None err_baseline = None return (MAP, prec_curv, err, err_baseline)
[docs]def expGR(digraph, graph_embedding, X, n_sampled_nodes_l, rounds, res_pre, m_summ, K=10000, is_undirected=True, sampling_scheme="u_rand"): """This function is used to experiment graph reconstruction. Args: digraph (Object): directed networkx graph object. graph_embedding (object): Object of the embedding algorithm class defined in gemben/embedding. X (Vector): Embedding of the the nodes of the graph. n_sampled_node_l (Int): Number of nodes in the graph. rounds (Int): The number of times the graph reconstruction is performed. res_pre (Str): Prefix to be used to save the result. m_summ (Str): String to denote the name of the summary file. K (Int): The maximum value to be use to get the precision curves. sampling_scheme (Str): Sampling schme used to sample nodes to be reconstructed. is_undirected (bool): Boolean flag to denote whether the graph is directed or not. Returns: Numpy Array: Consisting of Mean average precision. """ print('\tGraph Reconstruction') summ_file = open('%s_%s_%s.grsumm' % (res_pre, m_summ, sampling_scheme), 'w') summ_file.write('Method\t%s\n' % metrics.getMetricsHeader()) n_sample_nodes_l = [min(int(n), digraph.number_of_nodes()) for n in n_sample_nodes_l] if not n_sample_nodes_l: n_sample_nodes_l = [node_num] MAP = {} prec_curv = {} err = {} err_b = {} n_nodes = {} n_edges = {} # if digraph.number_of_nodes() <= n_sampled_nodes: # rounds = 1 for n_s in n_sampled_nodes_l: n_s = int(n_s) MAP[n_s] = [None] * rounds prec_curv[n_s] = [None] * rounds err[n_s] = [None] * rounds err_b[n_s] = [None] * rounds n_nodes[n_s] = [None] * rounds n_edges[n_s] = [None] * rounds for rid in range(rounds): if sampling_scheme == "u_rand": sampled_digraph, node_l = graph_util.sample_graph( digraph, n_sampled_nodes=n_s ) else: sampled_digraph, node_l = graph_util.sample_graph_rw( digraph, n_sampled_nodes=n_s ) n_nodes[n_s][rid] = sampled_digraph.number_of_nodes() n_edges[n_s][rid] = sampled_digraph.number_of_edges() print('\t\tRound: %d/%d, n_nodes: %d, n_edges:%d\n' % (rid, rounds, n_nodes[n_s][rid], n_edges[n_s][rid])) sampled_X = X[node_l] MAP[n_s][rid], prec_curv[n_s][rid], err[n_s][rid], err_b[n_s][rid] = \ evaluateStaticGraphReconstruction(sampled_digraph, graph_embedding, sampled_X, node_l, is_undirected=is_undirected) prec_curv[n_s][rid] = prec_curv[n_s][rid][:K] summ_file.write('n_s:%d' % n_s) try: summ_file.write('\tErr: %f/%f\n' % (np.mean(err[n_s]), np.std(err[n_s]))) summ_file.write('\tErr_b: %f/%f\n' % (np.mean(err_b[n_s]), np.std(err_b[n_s]))) except TypeError: pass summ_file.write('\t%f/%f\t%s\n' % (np.mean(MAP[n_s]), np.std(MAP[n_s]), metrics.getPrecisionReport(prec_curv[n_s][0], n_edges[n_s][0]))) pickle.dump([n_nodes, n_edges, MAP, prec_curv, err, err_b, n_sampled_nodes_l], open('%s_%s_%s.gr' % (res_pre, m_summ, sampling_scheme), 'wb')) return MAP[list(MAP.keys())[0]]