Experiment with BenchmarkΒΆ

Example to run the benchmark across all the baseline embedding algorithms.

from subprocess import call
import itertools
try: import cPickle as pickle
except: import pickle
import json
from argparse import ArgumentParser
import networkx as nx
import pandas as pd
import pdb
import os
import sys
from time import time
# sys.path.insert(0, './')
from gemben.utils import graph_gens

methClassMap = {"gf": "GraphFactorization",
                "hope": "HOPE",
                "lap": "LaplacianEigenmaps",
                "node2vec": "node2vec",
                "sdne": "SDNE",
                "pa": "PreferentialAttachment",
                "rand": "RandomEmb",
                "cn": "CommonNeighbors",
                "aa": "AdamicAdar",
                "jc": "JaccardCoefficient"}

if __name__ == "__main__":
    ''' Sample usage
    python experiments/exp_synthetic.py -syn_names all -plot_hyp_data 1 -meths all
    '''
    t1 = time()
    parser = ArgumentParser(description='Graph Embedding Benchmark Experiments')
    parser.add_argument('-data', '--data_sets',
                        help='dataset names (default: barabasi_albert_graph)')
    parser.add_argument('-dims', '--dimensions',
                        help='embedding dimensions list(default: 128)')
    parser.add_argument('-meth', '--methods',
                        help='method list (default: all methods)')
    parser.add_argument('-plot_hyp_data', '--plot_hyp_data',
                        help='plot the hyperparameter results (default: False)')
    parser.add_argument('-rounds', '--rounds',
                        help='number of rounds (default: 20)')
    parser.add_argument('-s_sch', '--samp_scheme',
                        help='sampling scheme (default: rw)')
    parser.add_argument('-lexp', '--lexp',
                        help='load experiment (default: False)')
    params = json.load(
        open('gemben/experiments/config/params_benchmark.conf', 'r')
    )
    args = vars(parser.parse_args())
    print (args)
    syn_hyps = json.load(
        open('gemben/experiments/config/syn_hypRange.conf', 'r')
    )
    for k, v in args.items():
        if v is not None:
            params[k] = v
    params["rounds"] = int(params["rounds"])
    if params["data_sets"] == "all":
        params["data_sets"] = syn_hyps.keys()
    else:
        params["data_sets"] = params["data_sets"].split(',')
    params["lexp"] = bool(int(params["lexp"]))
    params["plot_hyp_data"] = bool(int(params["plot_hyp_data"]))
    if params["methods"] == "all":
        params["methods"] = methClassMap.keys()
    else:
        params["methods"] = params["methods"].split(',')
    params["dimensions"] = params["dimensions"].split(',')
    samp_scheme = params["samp_scheme"]
    for syn_data in params["data_sets"]:
        syn_hyp_range = syn_hyps[syn_data]
        hyp_keys = list(syn_hyp_range.keys())
        if syn_data == "binary_community_graph":
            graphClass = getattr(graph_gens, syn_data)
        else:
            graphClass = getattr(nx, syn_data)
        ev_cols = ["GR MAP", "LP MAP", "LP P@100", "NC F1 score"]
        for dim in params["dimensions"]:
            dim = int(dim)
            for meth in params["methods"]:
                if not params["lexp"]:
                    hyp_df = pd.DataFrame(
                        columns=hyp_keys + ev_cols + ["Round Id"]
                    )
                    hyp_r_idx = 0
                    for hyp in itertools.product(*syn_hyp_range.values()):
                        hyp_dict = dict(zip(hyp_keys, hyp))
                        hyp_str = '_'.join(
                            "%s=%r" % (key, val) for (key, val) in hyp_dict.items()
                        )
                        syn_data_folder = 'benchmark_%s_%s' % (syn_data, hyp_str)
                        hyp_df_row = dict(zip(hyp_keys, hyp))
                        for r_id in range(params["rounds"]):
                            G = graphClass(**hyp_dict)
                            if not os.path.exists("gemben/data/%s" % syn_data_folder):
                                os.makedirs("gemben/data/%s" % syn_data_folder)
                            nx.write_gpickle(
                                G, 'gemben/data/%s/graph.gpickle' % syn_data_folder
                            )
                            os.system(
                                "python gem/experiments/exp.py -data %s -meth %s -dim %d -rounds 1 -s_sch %s -exp lp" % (syn_data_folder, meth, dim, samp_scheme)
                            )
                            MAP, prec, n_samps = pickle.load(
                                open('gemben/results/%s_%s_%d_%s.lp' % (syn_data_folder, meth, dim, samp_scheme), 'rb')
                            )
                            hyp_df.loc[hyp_r_idx, hyp_keys] = \
                                pd.Series(hyp_df_row)
                            prec_100 = prec[int(n_samps[0])][0][100]
                            hyp_df.loc[hyp_r_idx, ev_cols + ["Round Id"]] = \
                                [0, MAP[int(n_samps[0])][0], prec_100, 0, r_id]
                            hyp_r_idx += 1
                    hyp_df.to_hdf(
                        "gemben/intermediate/%s_%s_lp_%s_dim_%d_data_hyp.h5" % (syn_data, meth, samp_scheme, dim),
                        "df"
                    )
            if params["plot_hyp_data"]:
                from gem.utils import plot_util
                plot_util.plot_hyp_data2(
                    hyp_keys, ["lp"], params["methods"], syn_data, samp_scheme, dim
                )
    print('Total time taken: %f sec' % (time() - t1))

Total running time of the script: ( 0 minutes 0.000 seconds)

Gallery generated by Sphinx-Gallery