Module to run a single or multiple examplesΒΆ

Module to run the benchmark across all the baseline embedding algorithms.

from subprocess import call
import itertools
try: import cPickle as pickle
except: import pickle
import json
import networkx as nx
import pandas as pd
import pdb
import os
import sys
from time import time
from gemben.utils import graph_gens

methClassMap = {"gf": "GraphFactorization",
                "hope": "HOPE",
                "lap": "LaplacianEigenmaps",
                "node2vec": "node2vec",
                "sdne": "SDNE",
                "pa": "PreferentialAttachment",
                "rand": "RandomEmb",
                "cn": "CommonNeighbors",
                "aa": "AdamicAdar",
                "jc": "JaccardCoefficient"}

class exp:
    def __init__(self, domain="social", method="sdne", rounds=1,lexp=False, samp_scheme='rw', plot_hyp_data=False):

        t1 = time()
        self.params = json.load(
            open('gemben/experiments/config/params_benchmark.conf', 'r')
        )
        self.domain_graph_map = json.load(
            open('gemben/experiments/config/domain_graph_map.conf', 'r')
        )
        # graph_hyp_range: {N: [128, 256, 512, 1024], deg: [4, 6,8, 10, 12]}
        self.graph_hyp_range = json.load(
            open('gemben/experiments/config/graph_hyp_range.conf', 'r')
        )
        # def_graph_hyps: {N: 1024, deg: 8, dia: None, dim: 128}
        self.def_graph_hyps = json.load(
            open('gemben/experiments/config/def_graph_hyps.conf', 'r')
        )

        self.params["rounds"] = rounds
        self.params["graphs"] = self.domain_graph_map[domain]
        self.params["lexp"] = lexp
        self.params["plot_hyp_data"] = plot_hyp_data
        if method == "all":
            self.params["methods"] = methClassMap.keys()
        elif len(method)>1:
            self.params["methods"] = method.split(',')
        else:
            self.params["methods"] = self.method
        self.samp_scheme = samp_scheme


    def run(self):

        try:
          os.makedirs("gemben/intermediate")
        except:
          pass
        try:
          os.makedirs("gemben/results")
        except:
          pass
        try:
          os.makedirs("gemben/temp")
        except:
          pass

        graph_hyp_keys = list(self.graph_hyp_range.keys())
        ev_cols = ["LP MAP", "LP P@100"]
        for meth , graph in itertools.product(*[self.params["methods"],self.params["graphs"]]):
            hyp_df = pd.DataFrame(
                    columns=graph_hyp_keys + ev_cols + ["Round Id"]
                )
            hyp_r_idx = 0
            for hyp_key in graph_hyp_keys:

                for curr_hyp_key_range, r_id in itertools.product(
                    *[graph_hyp_range[hyp_key], range(self.params["rounds"])]
                ):

                    if r_id == 0:
                        f_hyp = 1
                    else:
                        f_hyp = 0

                    curr_hyps = self.def_graph_hyps.copy()

                    curr_hyps[hyp_key] = curr_hyp_key_range
                    curr_hyps["domain"] = self.params["domain_name"]
                    hyp_str = '_'.join(
                                "%s=%s" % (key, str(val).strip("'")) for (key, val) in curr_hyps.items()
                            )

                    hyp_str_graph_name = '_'.join(
                                "%s=%s" % (key, str(val).strip("'")) for (key, val) in curr_hyps.items() if key != 'dim'
                            )

                    syn_data_folder = 'benchmark_%s_%s_%s' % (graph, hyp_str_graph_name, r_id)

                    graphClass = getattr(graph_gens, graph)

                    try:
                        nx.read_gpickle(
                                'gemben/data/%s/graph.gpickle' % syn_data_folder
                      )
                    except:
                        flag =  1
                        ##### flag = 0 means the labels are continous on lcc
                        while flag:
                            print("Graph is generating...")
                            G = graphClass(**curr_hyps)[0]
                            if len(set(G.nodes())) == G.number_of_nodes() and list(G.nodes())[-1] == G.number_of_nodes() -1:
                                flag = 0
                        if G:
                            if not os.path.exists("gemben/data/%s" % syn_data_folder):
                                os.makedirs("gemben/data/%s" % syn_data_folder)
                            nx.write_gpickle(
                                    G, 'gemben/data/%s/graph.gpickle' % syn_data_folder
                        )
                    perf_exp = not self.params["lexp"]
                    if self.params["lexp"]:
                      try:
                        MAP, prec, n_samps = pickle.load(
                          open('gemben/results/%s_%s_%d_%s.lp' % (
                              syn_data_folder, meth,
                              curr_hyps["dim"], self.samp_scheme), 'rb'))
                      except:
                          perf_exp = 1
                           ##### only find the best hyp for first round
                    if perf_exp:
                        os.system(
                          "python3 gemben/experiments/exp.py -data %s -meth %s -dim %d -rounds 1 -find_hyp %d -s_sch %s -exp lp" % (
                              syn_data_folder,
                              meth,
                              curr_hyps["dim"],
                              f_hyp,
                              self.samp_scheme
                          )
                        )
                    MAP, prec, n_samps = pickle.load(
                    open('gemben/results/%s_%s_%d_%s.lp' % (
                        syn_data_folder, meth,
                        curr_hyps["dim"], self.samp_scheme), 'rb'))
                    hyp_df.loc[hyp_r_idx, graph_hyp_keys] = \
                    pd.Series(curr_hyps)
                    #prec_100 = prec[int(n_samps[0])][0][100]
                    try:
                      prec_100 = list(prec.values())[0][0][100]
                    except:
                      pdb.set_trace()
                    f_temp = open("gemben/temp/%s_%s_%s_lp_%s_data_hyp.txt" % (
                      self.params["domain_name"], graph, meth, self.samp_scheme), 'a')
                    f_temp.write('%s: round: %d, MAP: %f, prec_100: %f' % (hyp_str, r_id, list(MAP.values())[0][0], prec_100))
                    f_temp.close()
                    hyp_df.loc[hyp_r_idx, ev_cols + ["Round Id"]] = \
                    [list(MAP.values())[0][0], prec_100, r_id]
                    #[MAP[int(n_samps[0])][0], prec_100, r_id]
                    hyp_r_idx += 1

            hyp_df.to_hdf(
                "gemben/intermediate/%s_%s_%s_lp_%s_data_hyp.h5" % (
                    self.params["domain_name"], graph, meth, self.samp_scheme),
                "df"
            )
            print('Experiments done for %s, %s' % (graph, meth))

Total running time of the script: ( 0 minutes 0.000 seconds)

Gallery generated by Sphinx-Gallery