Source code for gemben.evaluation.evaluate_graph_reconstruction

try: import cPickle as pickle
except: import pickle
from gemben.evaluation import metrics
from gemben.utils import evaluation_util, graph_util
import networkx as nx
import numpy as np


[docs]def evaluateStaticGraphReconstruction(digraph, graph_embedding,
                                      X_stat, node_l=None, file_suffix=None,
                                      sample_ratio_e=None, is_undirected=True,
                                      is_weighted=False):
    """This function evaluates the graph reconstruction accuracy of the embedding algorithms.

        Args:
            digraph (Object): directed networkx graph object.
            graph_embedding (object): Object of the embedding algorithm class defined in gemben/embedding.
            X_stat (Vector): Embedding of the the nodes of the graph.
            node_l (Int): Number of nodes in the graph.
            file_suffix (Str): The name of the algorithm and dataset used to save the embedding.
            sample_ratio_e (Float): The ratio used to sample the original graph for evaluation purpose.
            is_undirected (bool): Boolean flag to denote whether the graph is directed or not.
            is_weighted (bool): Boolean flag to denote whether the edges of the graph is weighted.

        Returns:
            Numpy Array: Consiting of Mean average precision precision curve, errors and error baselines.
    """

    node_num = digraph.number_of_nodes()
    # evaluation
    if sample_ratio_e:
        eval_edge_pairs = evaluation_util.getRandomEdgePairs(
            node_num,
            sample_ratio_e,
            is_undirected
        )
    else:
        eval_edge_pairs = None
    if file_suffix is None:
        estimated_adj = graph_embedding.get_reconstructed_adj(X_stat, node_l)
    else:
        estimated_adj = graph_embedding.get_reconstructed_adj(
            X_stat,
            file_suffix,
            node_l
        )
    predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx(
        estimated_adj,
        is_undirected=is_undirected,
        edge_pairs=eval_edge_pairs
    )
    if 'partition' in digraph.node[0]:
        predicted_edge_list = [e for e in predicted_edge_list if digraph.node[e[0]]['partition'] != digraph.node[e[1]]['partition']]

    MAP = metrics.computeMAP(predicted_edge_list, digraph)
    prec_curv, _ = metrics.computePrecisionCurve(predicted_edge_list, digraph)
    # If weighted, compute the error in reconstructed weights of observed edges
    if is_weighted:
        digraph_adj = nx.to_numpy_matrix(digraph)
        estimated_adj[digraph_adj == 0] = 0
        err = np.linalg.norm(digraph_adj - estimated_adj)
        err_baseline = np.linalg.norm(digraph_adj)
    else:
        err = None
        err_baseline = None
    return (MAP, prec_curv, err, err_baseline)


[docs]def expGR(digraph, graph_embedding,
          X, n_sampled_nodes_l, rounds,
          res_pre, m_summ,
          K=10000,
          is_undirected=True,
          sampling_scheme="u_rand"):

    """This function is used to experiment graph reconstruction.

        Args:
            digraph (Object): directed networkx graph object.
            graph_embedding (object): Object of the embedding algorithm class defined in gemben/embedding.
            X (Vector): Embedding of the the nodes of the graph.
            n_sampled_node_l (Int): Number of nodes in the graph.
            rounds (Int): The number of times the graph reconstruction is performed.
            res_pre (Str): Prefix to be used to save the result.
            m_summ (Str): String to denote the name of the summary file. 
            K (Int): The maximum value to be use to get the precision curves.
            sampling_scheme (Str): Sampling schme used to sample nodes to be reconstructed.
            is_undirected (bool): Boolean flag to denote whether the graph is directed or not.

        Returns:
            Numpy Array: Consisting of Mean average precision.
    """

    print('\tGraph Reconstruction')
    summ_file = open('%s_%s_%s.grsumm' % (res_pre, m_summ, sampling_scheme), 'w')
    summ_file.write('Method\t%s\n' % metrics.getMetricsHeader())
    n_sample_nodes_l = [min(int(n), digraph.number_of_nodes()) for n in n_sample_nodes_l]
    if not n_sample_nodes_l:
        n_sample_nodes_l = [node_num]
    MAP = {}
    prec_curv = {}
    err = {}
    err_b = {}
    n_nodes = {}
    n_edges = {}
    # if digraph.number_of_nodes() <= n_sampled_nodes:
    #     rounds = 1
    for n_s in n_sampled_nodes_l:
        n_s = int(n_s)
        MAP[n_s] = [None] * rounds
        prec_curv[n_s] = [None] * rounds
        err[n_s] = [None] * rounds
        err_b[n_s] = [None] * rounds
        n_nodes[n_s] = [None] * rounds
        n_edges[n_s] = [None] * rounds
        for rid in range(rounds):
            if sampling_scheme == "u_rand":
                sampled_digraph, node_l = graph_util.sample_graph(
                    digraph,
                    n_sampled_nodes=n_s
                )
            else:
                sampled_digraph, node_l = graph_util.sample_graph_rw(
                    digraph,
                    n_sampled_nodes=n_s
                )
            n_nodes[n_s][rid] = sampled_digraph.number_of_nodes()
            n_edges[n_s][rid] = sampled_digraph.number_of_edges()
            print('\t\tRound: %d/%d, n_nodes: %d, n_edges:%d\n' % (rid,
                                                                   rounds,
                                                                   n_nodes[n_s][rid],
                                                                   n_edges[n_s][rid]))
            sampled_X = X[node_l]
            MAP[n_s][rid], prec_curv[n_s][rid], err[n_s][rid], err_b[n_s][rid] = \
                evaluateStaticGraphReconstruction(sampled_digraph, graph_embedding,
                                                  sampled_X, node_l,
                                                  is_undirected=is_undirected)
            prec_curv[n_s][rid] = prec_curv[n_s][rid][:K]
        summ_file.write('n_s:%d' % n_s)
        try:
            summ_file.write('\tErr: %f/%f\n' % (np.mean(err[n_s]), np.std(err[n_s])))
            summ_file.write('\tErr_b: %f/%f\n' % (np.mean(err_b[n_s]), np.std(err_b[n_s])))
        except TypeError:
            pass
        summ_file.write('\t%f/%f\t%s\n' % (np.mean(MAP[n_s]), np.std(MAP[n_s]),
                                           metrics.getPrecisionReport(prec_curv[n_s][0],
                                                                      n_edges[n_s][0])))
    pickle.dump([n_nodes,
                 n_edges,
                 MAP,
                 prec_curv,
                 err,
                 err_b,
                 n_sampled_nodes_l],
                open('%s_%s_%s.gr' % (res_pre, m_summ, sampling_scheme), 'wb'))
    return MAP[list(MAP.keys())[0]]