Source code for gemben.utils.plot_util

try: import cPickle as pickle
except: import pickle
from os import environ
import matplotlib
import os
if os.name == 'posix' and 'DISPLAY' not in os.environ:
    disp_avlbl = False
    import matplotlib
    matplotlib.use('Agg')
import matplotlib.pyplot as plt
import itertools
from matplotlib import rc
import random
import seaborn
import numpy as np
import pandas as pd
import pdb

font = {'family': 'serif', 'serif': ['computer modern roman']}
rc('text', usetex=True)
rc('font', weight='bold')
rc('font', size=20)
rc('lines', markersize=10)
rc('xtick', labelsize=20)
rc('ytick', labelsize=20)
rc('axes', labelsize='x-large')
rc('axes', labelweight='bold')
rc('axes', titlesize='x-large')
rc('axes', linewidth=3)
plt.rc('font', **font)
seaborn.set_style("darkgrid")

figsize_d = {2: (5, 2),
             4: (9, 2)}

m_name_l = {"gf": "GF",
            "hope": "HOPE",
            "lap": "LE",
            "lle": "LLE",
            "node2vec": "node2vec",
            "sdne": "SDNE",
            "pa": "PA",
            "rand": "Rand",
            "cn": "CN",
            "jc": "JC",
            "aa": "AA"}

expMap = {"gr": "GR MAP", "lp": "LP MAP",
          "nc": "NC F1 score"}
expMap2 = {"gr": "GR MAP", "lp": "LP P@100",
           "nc": "NC F1 score"}


[docs]def get_node_color(node_community): """Function to get the node colors for the communities. """ cnames = [item[0] for item in matplotlib.colors.cnames.items()] node_colors = [cnames[c] for c in node_community] return node_colors
[docs]def plot(x_s, y_s, fig_n, x_lab, y_lab, file_save_path, title, legendLabels=None, show=False): """Function to plot the graph with respective embeddings. """ plt.rcParams.update({'font.size': 16, 'font.weight': 'bold'}) markers = ['o', '*', 'v', 'D', '<', 's', '+', '^', '>'] colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k'] series = [] plt.figure(fig_n) i = 0 for i in range(len(x_s)): # n_points = len(x_s[i]) # n_points = int(n_points/10) + random.randint(1,100) # x = x_s[i][::n_points] # y = y_s[i][::n_points] x = x_s[i] y = y_s[i] series.append(plt.plot(x, y, color=colors[i], linewidth=2, marker=markers[i], markersize=8)) plt.xlabel(x_lab, fontsize=16, fontweight='bold') plt.ylabel(y_lab, fontsize=16, fontweight='bold') plt.title(title, fontsize=16, fontweight='bold') if legendLabels: plt.legend([s[0] for s in series], legendLabels) plt.savefig(file_save_path) if show: plt.show()
[docs]def plot_ts(ts_df, plot_title, eventDates, eventLabels=None, save_file_name=None, xLabel=None, yLabel=None, show=False): """Function to plot the time series data. """ ax = ts_df.plot(title=plot_title, marker='*', markerfacecolor='red', markersize=10, linestyle='solid') colors = ['r', 'g', 'c', 'm', 'y', 'b', 'k'] if not eventLabels: for eventDate in eventDates: # Show event as a red vertical line ax.axvline(eventDate, color='r', linestyle='--', lw=2) else: for idx in range(len(eventDates)): ax.axvline(eventDates[idx], color=colors[idx], linestyle='--', lw=2, label=eventLabels[idx]) ax.legend() if xLabel: ax.set_xlabel(xLabel, fontweight='bold') if yLabel: ax.set_ylabel(yLabel, fontweight='bold') fig = ax.get_figure() if save_file_name: fig.savefig(save_file_name, bbox_inches='tight') if show: fig.show()
[docs]def turn_latex(key_str): """Function to convert special words to latex comaptible ones.""" if key_str in ['mu', 'rho', 'beta', 'alpha', 'gamma']: return '$\%s$' % key_str else: return '$%s$' % key_str.upper()
[docs]def plot_hyp_data2(hyp_keys, exp_param, meths, data, s_sch="u_rand", dim=2): """Function to plot the result of hyper-parameter exploration.""" font = {'family': 'serif', 'serif': ['computer modern roman']} rc('text', usetex=True) rc('font', weight='bold') rc('font', size=8) rc('lines', markersize=2.5) rc('lines', linewidth=0.5) rc('xtick', labelsize=6) rc('ytick', labelsize=6) rc('axes', labelsize='small') rc('axes', labelweight='bold') rc('axes', titlesize='small') rc('axes', linewidth=1) plt.rc('font', **font) seaborn.set_style("darkgrid") for exp in exp_param: df_all = pd.DataFrame() n_meths = 0 for meth in meths: try: df = pd.read_hdf( "gemben/intermediate/%s_%s_%s_%s_dim_%d_data_hyp.h5" % (data, meth, exp, s_sch, dim), "df" ) n_meths += 1 except: print('%s_%s_%s_%s_dim_%d_data_hyp.h5 not found. Ignoring data set' % (data, meth, exp, s_sch, dim)) continue # Check if experiment is in the dataframe if expMap[exp] not in df: continue df["Method"] = m_name_l[meth] # pdb.set_trace() df_all = df_all.append(df).reset_index() df_all = df_all.drop(['index'], axis=1) if df_all.empty: continue df = df_all col_names = df.columns col_rename_d = {} for col_name in col_names: col_rename_d[col_name] = col_name.replace('_', '\ ') df.rename(columns=col_rename_d, inplace=True) for hyp_key in hyp_keys: # hyp_key_ren = hyp_key.replace('_', '\ ') df_trun = df[hyp_keys + ["Round Id", expMap[exp], expMap2[exp], "Method"]] df_grouped = df_trun rem_hyp_keys = list(set(hyp_keys) - {hyp_key}) val_lists = [df_grouped[r_k].unique() for r_k in rem_hyp_keys] n_cols = len(list(itertools.product(*val_lists))) if len(df_grouped[hyp_key].unique()) < 3: continue plot_shape = (1, n_cols) fin1, axarray1 = plt.subplots(1, n_cols, figsize=figsize_d[n_cols]) fin2, axarray2 = plt.subplots(1, n_cols, figsize=figsize_d[n_cols]) for plt_idx, hyp_vals in enumerate(itertools.product(*val_lists)): plot_idx = np.unravel_index(plt_idx, plot_shape) hyp_dict = dict(zip(rem_hyp_keys, hyp_vals)) hyp_str = ', '.join( "%s:%r" % (turn_latex(key), val) for (key, val) in hyp_dict.iteritems() if len(df_grouped[key].unique()) > 1 ) df_temp = df_grouped for hyp_idx, hyp_val in enumerate(hyp_vals): df_temp = df_temp[df_temp[rem_hyp_keys[hyp_idx]] == hyp_val] if len(df_temp[hyp_key].unique()) < 3: continue print('Plotting %s: %s' % (exp, hyp_key)) try: ax = seaborn.tsplot(time=hyp_key, value=expMap[exp], unit="Round Id", condition="Method", data=df_temp, ax=axarray1[plot_idx[0], plot_idx[1]]) if plot_idx[1]: ax.set_ylabel('') if not plot_idx[0]: ax.set_xlabel('') except IndexError: try: ax = seaborn.tsplot(time=hyp_key, value=expMap[exp], unit="Round Id", condition="Method", data=df_temp, ax=axarray1[plt_idx]) except: import pdb pdb.set_trace() if plt_idx: ax.set_ylabel('') ax.set_title(hyp_str) hyp_values = df_grouped[hyp_key].unique() l_diff = hyp_values[-1] - hyp_values[-2] f_diff = hyp_values[1] - hyp_values[0] l_f_diff_r = l_diff / f_diff if l_f_diff_r > 1: log_base = pow(l_f_diff_r, 1.0 / (len(hyp_values) - 2)) ax.set_xscale('log', basex=round(log_base)) marker = ["o", "s", "D", "^", "v", "8", "*", "p", "1", "h"] for line_i in range(len(ax.lines)): ax.lines[line_i].set_marker(marker[line_i]) # ax.grid() ax.legend_.remove() try: ax = seaborn.tsplot(time=hyp_key, value=expMap2[exp], unit="Round Id", condition="Method", data=df_temp, ax=axarray2[plot_idx[0], plot_idx[1]]) if plot_idx[1]: ax.set_ylabel('') if not plot_idx[0]: ax.set_xlabel('') except IndexError: ax = seaborn.tsplot(time=hyp_key, value=expMap2[exp], unit="Round Id", condition="Method", data=df_temp, ax=axarray2[plt_idx]) if plt_idx: ax.set_ylabel('') ax.set_title(hyp_str) if l_f_diff_r > 1: log_base = pow(l_f_diff_r, 1.0 / (len(hyp_values) - 2)) ax.set_xscale('log', basex=round(log_base)) marker = ["o", "s", "D", "^", "v", "8", "*", "p", "1", "h"] for line_i in range(len(ax.lines)): ax.lines[line_i].set_marker(marker[line_i]) # ax.grid() ax.legend_.remove() for col_idx in range(axarray1.shape[0]): box = axarray1[col_idx].get_position() axarray1[col_idx].set_position( [box.x0, box.y0 + box.height * 0.1, box.width, box.height * 0.9] ) box = axarray2[col_idx].get_position() axarray2[col_idx].set_position( [box.x0, box.y0 + box.height * 0.1, box.width, box.height * 0.9] ) fin1.legend(loc='lower center', bbox_to_anchor=(0.45, -0.01), ncol=n_meths, fancybox=True, shadow=True) fin2.legend(loc='lower center', bbox_to_anchor=(0.45, -0.01), ncol=n_meths, fancybox=True, shadow=True) fin1.savefig( 'gemben/plots/data_hyp/%s_%s_%s_%d_%s.pdf' % (data, exp, s_sch, dim, hyp_key), dpi=300, format='pdf', bbox_inches='tight' ) fin2.savefig( 'gemben/plots/data_hyp/%s_%s_%s_%d_%s_p100.pdf' % (data, exp, s_sch, dim, hyp_key), dpi=300, format='pdf', bbox_inches='tight' ) fin1.clf() fin2.clf()
[docs]def plot_hyp_data(hyp_keys, exp_param, meths, data, s_sch="u_rand", dim=2): """Function to plot the result of hyperparameter exploration. """ for exp in exp_param: df_all = pd.DataFrame() for meth in meths: try: df = pd.read_hdf( "gemben/intermediate/%s_%s_%s_%s_dim_%d_data_hyp.h5" % (data, meth, exp, s_sch, dim), "df" ) except: print('%s_%s_%s_%s_dim_%d_data_hyp.h5 not found. Ignoring data set' % (data, meth, exp, s_sch, dim)) continue # Check if experiment is in the dataframe if expMap[exp] not in df: continue df["Method"] = m_name_l[meth] # pdb.set_trace() df_all = df_all.append(df).reset_index() df_all = df_all.drop(['index'], axis=1) if df_all.empty: continue df = df_all col_names = df.columns col_rename_d = {} for col_name in col_names: col_rename_d[col_name] = col_name.replace('_', '\ ') df.rename(columns=col_rename_d, inplace=True) for hyp_key in hyp_keys: # hyp_key_ren = hyp_key.replace('_', '\ ') df_trun = df[hyp_keys + ["Round Id", expMap[exp], expMap2[exp], "Method"]] df_grouped = df_trun rem_hyp_keys = list(set(hyp_keys) - {hyp_key}) val_lists = [df_grouped[r_k].unique() for r_k in rem_hyp_keys] for hyp_vals in itertools.product(*val_lists): hyp_dict = dict(zip(rem_hyp_keys, hyp_vals)) hyp_str = '_'.join("%s=%r" % (key,val) for (key,val) in hyp_dict.iteritems()) df_temp = df_grouped for hyp_idx, hyp_val in enumerate(hyp_vals): df_temp = df_temp[df_temp[rem_hyp_keys[hyp_idx]] == hyp_val] if len(df_temp[hyp_key].unique()) < 3: continue print('Plotting %s: %s' % (exp, hyp_key)) ax = seaborn.tsplot(time=hyp_key, value=expMap[exp], unit="Round Id", condition="Method", data=df_temp) hyp_values = df_grouped[hyp_key].unique() l_diff = hyp_values[-1] - hyp_values[-2] f_diff = hyp_values[1] - hyp_values[0] l_f_diff_r = l_diff / f_diff if l_f_diff_r > 1: log_base = pow(l_f_diff_r, 1.0 / (len(hyp_values) - 2)) ax.set_xscale('log', basex=round(log_base)) marker = ["o", "s", "D", "^", "v", "8", "*", "p", "1", "h"] for line_i in range(len(ax.lines)): ax.lines[line_i].set_marker(marker[line_i]) # ax.grid() ax.legend() plt.savefig( 'gemben/plots/data_hyp/%s_%s_%s_%d_%s.pdf' % (data, exp, s_sch, dim, hyp_str), dpi=300, format='pdf', bbox_inches='tight' ) plt.clf() ax = seaborn.tsplot(time=hyp_key, value=expMap2[exp], unit="Round Id", condition="Method", data=df_temp) hyp_values = df_grouped[hyp_key].unique() l_diff = hyp_values[-1] - hyp_values[-2] f_diff = hyp_values[1] - hyp_values[0] l_f_diff_r = l_diff / f_diff if l_f_diff_r > 1: log_base = pow(l_f_diff_r, 1.0 / (len(hyp_values) - 2)) ax.set_xscale('log', basex=round(log_base)) marker = ["o", "s", "D", "^", "v", "8", "*", "p", "1", "h"] for line_i in range(len(ax.lines)): ax.lines[line_i].set_marker(marker[line_i]) # ax.grid() ax.legend() plt.savefig( 'gemben/plots/data_hyp/%s_%s_%s_%d_%s_p_100.pdf' % (data, exp, s_sch, dim, hyp_str), dpi=300, format='pdf', bbox_inches='tight' ) plt.clf()
[docs]def plot_hyp(hyp_keys, exp_param, meth, data, s_sch="u_rand"): """Function to explore the hyperparameters. """ for exp in exp_param: df = pd.read_hdf( "gemben/intermediate/%s_%s_%s_%s_hyp.h5" % (data, meth, exp, s_sch), "df" ) col_names = df.columns col_rename_d = {} for col_name in col_names: col_rename_d[col_name] = col_name.replace('_', '\ ') df.rename(columns=col_rename_d, inplace=True) for hyp_key in hyp_keys: hyp_key_ren = hyp_key.replace('_', '\ ') df_trun = df[[hyp_key_ren, "Round Id", expMap[exp]]] try: df_grouped = df_trun.groupby([hyp_key_ren, "Round Id"]).max().reset_index() except TypeError: df_trun[hyp_key_ren + "2"] = \ df_trun[hyp_key_ren].apply(lambda x: str(x)) df_trun[hyp_key_ren] = df_trun[hyp_key_ren + "2"].copy() df_trun = df_trun.drop([hyp_key_ren + "2"], axis=1) df_grouped = df_trun.groupby([hyp_key_ren, "Round Id"]).max().reset_index() if len(df_grouped[hyp_key_ren].unique()) < 3: continue try: print('Plotting %s: %s' % (exp, hyp_key)) ax = seaborn.tsplot(time=hyp_key_ren, value=expMap[exp], unit="Round Id", data=df_grouped) hyp_values = df_grouped[hyp_key_ren].unique() l_diff = hyp_values[-1] - hyp_values[-2] f_diff = hyp_values[1] - hyp_values[0] l_f_diff_r = l_diff / f_diff if l_f_diff_r > 1: log_base = pow(l_f_diff_r, 1.0 / (len(hyp_values) - 2)) ax.set_xscale('log', basex=round(log_base)) marker = ["o", "s", "D", "^", "v", "8", "*", "p", "1", "h"] for line_i in range(len(ax.lines)): ax.lines[line_i].set_marker(marker[line_i]) # ax.grid() ax.legend() except ValueError: ax = seaborn.barplot(x=hyp_key_ren, y=expMap[exp], data=df_grouped) except ZeroDivisionError: print('Only 2 points provided to plot hyperparameters') continue plt.savefig( 'gemben/plots/hyp/%s_%s_%s_%s_%s.pdf' % (data, meth, exp, s_sch, hyp_key), dpi=300, format='pdf', bbox_inches='tight' ) plt.clf()
[docs]def plot_hyp_all(hyp_keys, exp_param, meth, data_sets, s_sch="u_rand"): """Function to plot all the hyper-parameter results. """ for exp in exp_param: df_all = pd.DataFrame() for data in data_sets: try: df = pd.read_hdf( "gemben/intermediate/%s_%s_%s_%s_hyp.h5" % (data, meth, exp, s_sch), "df" ) except: print('%s_%s_%s_%s_hyp.h5 not found. Ignoring data set' % (data, meth, exp, s_sch)) continue # Check if experiment is in the dataframe if expMap[exp] not in df: continue df["Data"] = data # pdb.set_trace() df_all = df_all.append(df).reset_index() df_all = df_all.drop(['index'], axis=1) if df_all.empty: continue col_names = df_all.columns col_rename_d = {} for col_name in col_names: col_rename_d[col_name] = col_name.replace('_', '\ ') df_all.rename(columns=col_rename_d, inplace=True) for hyp_key in hyp_keys: hyp_key_ren = hyp_key.replace('_', '\ ') df_trun = df_all[[hyp_key_ren, "Round Id", expMap[exp], "Data"]] try: df_grouped = \ df_trun.groupby([hyp_key_ren, "Round Id", "Data"]).max().reset_index() except TypeError: df_trun[hyp_key_ren + "2"] = \ df_trun[hyp_key_ren].apply(lambda x: str(x)) df_trun[hyp_key_ren] = df_trun[hyp_key_ren + "2"].copy() df_trun = df_trun.drop([hyp_key_ren + "2"], axis=1) df_grouped = df_trun.groupby([hyp_key_ren, "Round Id", "Data"]).max().reset_index() if len(df_grouped[df_grouped['Data'] == data_sets[0]][hyp_key_ren].unique()) < 3: continue try: print('Plotting %s: %s' % (exp, hyp_key)) if hyp_key_ren == 'inout\ p': hyp_key_ren = 'q' elif hyp_key_ren == 'ret\ p': hyp_key_ren = 'p' df_grouped.rename(columns={expMap[exp]: m_name_l[meth]}, inplace=True) try: df_grouped.rename(columns={'inout\ p': 'q'}, inplace=True) except: pass try: df_grouped.rename(columns={'ret\ p': 'p'}, inplace=True) except: pass ax = seaborn.tsplot(time=hyp_key_ren, value=m_name_l[meth], unit="Round Id", condition="Data", data=df_grouped) hyp_values = df_grouped[hyp_key_ren].unique() l_diff = hyp_values[-1] - hyp_values[-2] f_diff = hyp_values[1] - hyp_values[0] l_f_diff_r = l_diff / f_diff if l_f_diff_r > 1: log_base = pow(l_f_diff_r, 1.0 / (len(hyp_values) - 2)) ax.set_xscale('log', basex=round(log_base)) marker = ["o", "s", "D", "^", "v", "8", "*", "p", "1", "h"] for line_i in range(len(ax.lines)): ax.lines[line_i].set_marker(marker[line_i]) # ax.grid() ax.legend() except ValueError: ax = seaborn.barplot(x="Data", y=m_name_l[meth], hue=hyp_key_ren, data=df_grouped) except ZeroDivisionError: print('Only 2 points provided to plot hyperparameters') continue except: pdb.set_trace() plt.savefig( 'gemben/plots/hyp/%s_%s_%s_%s.pdf' % (meth, exp, s_sch, hyp_key), dpi=300, format='pdf', bbox_inches='tight' ) plt.clf()
[docs]def plot_p_at_k(res_pre, res_suffix, exp_type, m_names_f, m_names, d_arr, n_rounds, save_fig_name, K=1024, plot_d=False, plot_ratio=0.8, s_sch="u_rand"): """Function to plot precision at k.""" log_K = int(np.log2(K)) + 1 num_k = log_K - 3 df_map = pd.DataFrame(np.zeros((n_rounds * len(m_names) * len(d_arr), 4)), columns=['d', 'Method', 'Round id', 'MAP']) df_p_100 = pd.DataFrame(np.zeros((n_rounds * len(m_names) * len(d_arr), 4)), columns=['d', 'Method', 'Round id', 'P@100']) df_p_100_idx = 0 df_map_idx = 0 MAP = [None] * len(d_arr) for d_idx, d in enumerate(d_arr): d = int(d) df_prec = pd.DataFrame( np.zeros((n_rounds * len(m_names) * num_k, 4)), columns=['k', 'Method', 'Round id', 'precision@k'] ) df_idx = 0 MAP[d_idx] = [None] * len(m_names_f) k_range = [2**i for i in range(3, log_K)] p_at_k_ind = [2**i - 1 for i in range(3, log_K)] for idx, method in enumerate(m_names_f): try: with open('%s_%s_%d_%s_%s%s' % (res_pre, method, d, s_sch, str(plot_ratio), res_suffix), 'rb') as f: if exp_type == 'gr': [_, _, MAP[d_idx][idx], prec_curv, _, _, n_s] = \ pickle.load(f) else: [MAP[d_idx][idx], prec_curv, n_s] = pickle.load(f) try: prec_curv = list(prec_curv.values())[0] #prec_curv = prec_curv[int(n_s[0])] except: pdb.set_trace() for round_id in range(min(n_rounds, len(prec_curv))): p_at_k = np.array(prec_curv[round_id][:K]) if p_at_k.shape[0] == 0: print('%s_%s_%d%s: Encountered missing precision curve' \ % (res_pre, method, d, res_suffix)) continue df_map.loc[df_map_idx, 'd'] = d # df_map.loc[df_map_idx, 'MAP'] = MAP[d_idx][idx][int(n_s[0])][round_id] df_map.loc[df_map_idx, 'MAP'] = list(MAP[d_idx][idx].values())[0][round_id] df_map.loc[df_map_idx, 'Method'] = m_names[idx] df_map.loc[df_map_idx, 'Round id'] = round_id df_map_idx += 1 df_p_100.loc[df_p_100_idx, 'd'] = d df_p_100.loc[df_p_100_idx, 'P@100'] = p_at_k[100] df_p_100.loc[df_p_100_idx, 'Method'] = m_names[idx] df_p_100.loc[df_p_100_idx, 'Round id'] = round_id df_p_100_idx += 1 df_prec.loc[df_idx:df_idx + num_k - 1, 'k'] = k_range df_prec.loc[df_idx:df_idx + num_k - 1, 'precision@k'] = \ p_at_k[p_at_k_ind] df_prec.loc[df_idx:df_idx + num_k - 1, 'Method'] = \ m_names[idx] df_prec.loc[df_idx:df_idx + num_k - 1, 'Round id'] = \ round_id df_idx += num_k except IOError: print('File %s_%s_%d_%s_%s%s not found. Ignoring it for p@k plot' \ % (res_pre, method, d, s_sch, str(plot_ratio), res_suffix)) continue # except: # pdb.set_trace() if d == 128: df_prec = df_prec[:df_idx] # seaborn.FacetGrid.set(xticks=[2**i for i in range(3, log_K)]) # ax = seaborn.factorplot(x='k', y='precision@k', # hue='Method', units='Round id', # data=df_prec) ax = seaborn.tsplot(time='k', value='precision@k', unit='Round id', condition='Method', data=df_prec) ax.set_xscale('log', basex=2) marker = ["o", "s", "D", "^", "v", "8", "*", "p", "1", "h"] for line_i in range(len(ax.lines)): ax.lines[line_i].set_marker(marker[line_i]) # ax.grid() # ax.legend_.remove() plt.savefig('%s_d_%d_plot_ratio_%s.pdf' % (save_fig_name, d, str(plot_ratio)), dpi=300, format='pdf', bbox_inches='tight') plt.clf() df_map = df_map[:df_map_idx] ax = seaborn.barplot(x="Method", y="MAP", data=df_map) plt.savefig('%s_d_%d_plot_ratio_%s_map.pdf' % (save_fig_name, d, str(plot_ratio)), dpi=300, format='pdf', bbox_inches='tight') plt.savefig('%s_d_%d_%s_map.png' % (save_fig_name, d, str(plot_ratio)), dpi=300, bbox_inches='tight') plt.clf() if plot_d and len(d_arr) > 1: df_map = df_map[:df_map_idx] ax = seaborn.tsplot(time='d', value='MAP', unit='Round id', condition='Method', data=df_map) ax.set_xscale('log', basex=2) marker = ["o", "s", "D", "^", "v", "8", "*", "p", "1", "h"] for line_i in range(len(ax.lines)): ax.lines[line_i].set_marker(marker[line_i]) # ax.grid() # ax.legend_.remove() ax.legend() plt.savefig('%s_%s_map.pdf' % (save_fig_name, str(plot_ratio)), dpi=300, format='pdf', bbox_inches='tight') plt.savefig('%s_%s_map.png' % (save_fig_name, str(plot_ratio)), dpi=300, bbox_inches='tight') plt.clf() df_p_100 = df_p_100[:df_p_100_idx] ax = seaborn.tsplot(time='d', value='P@100', unit='Round id', condition='Method', data=df_p_100) ax.set_xscale('log', basex=2) marker = ["o", "s", "D", "^", "v", "8", "*", "p", "1", "h"] for line_i in range(len(ax.lines)): ax.lines[line_i].set_marker(marker[line_i]) # ax.grid() # ax.legend_.remove() ax.legend() plt.savefig('%s_%s_p_100.pdf' % (save_fig_name, str(plot_ratio)), dpi=300, format='pdf', bbox_inches='tight') plt.savefig('%s_%s_p_100.png' % (save_fig_name, str(plot_ratio)), dpi=300, bbox_inches='tight') plt.clf() return MAP
[docs]def plot_F1(res_pre, res_suffix, exp_type, m_names_f, m_names, d_arr, n_rounds, save_fig_name, K=1024, plot_d=False): """Function to plot the F1-score. """ df_f1_glob = pd.DataFrame( np.zeros((n_rounds * len(m_names) * len(d_arr), 5)), columns=['d', 'Method', 'Round id', 'Micro-F1 score', 'Macro-F1 score'] ) df_f1_glob_idx = 0 for d in d_arr: d = int(d) df = pd.DataFrame(np.zeros((n_rounds * len(m_names) * K, 5)), columns=['Train ratio', 'Method', 'Round id', 'Micro-F1 score', 'Macro-F1 score']) df_idx = 0 for idx, method in enumerate(m_names_f): try: with open('%s_%s_%d%s' % (res_pre, method, d, res_suffix), 'rb') as f: [test_ratio_arr, micro, macro] = pickle.load(f) n_xlabels = len(test_ratio_arr) for round_id in range(min(n_rounds, len(micro))): microF1 = micro[round_id] macroF1 = macro[round_id] df_f1_glob.loc[df_f1_glob_idx, 'd'] = d df_f1_glob.loc[df_f1_glob_idx, 'Micro-F1 score'] = \ microF1[len(test_ratio_arr) // 2] df_f1_glob.loc[df_f1_glob_idx, 'Macro-F1 score'] = \ macroF1[len(test_ratio_arr) // 2] df_f1_glob.loc[df_f1_glob_idx, 'Method'] = m_names[idx] df_f1_glob.loc[df_f1_glob_idx, 'Round id'] = round_id df_f1_glob_idx += 1 df.loc[df_idx:df_idx + n_xlabels - 1, 'Train ratio'] = \ [(1.0 - test_r) for test_r in test_ratio_arr] df.loc[df_idx:df_idx + n_xlabels - 1, 'Micro-F1 score'] = \ microF1 df.loc[df_idx:df_idx + n_xlabels - 1, 'Macro-F1 score'] = \ macroF1 df.loc[df_idx:df_idx + n_xlabels - 1, 'Method'] = \ m_names[idx] df.loc[df_idx:df_idx + n_xlabels - 1, 'Round id'] = round_id df_idx += n_xlabels except IOError: print('File %s_%s_%d%s not found. Ignoring it for NC plot' \ % (res_pre, method, d, res_suffix)) continue if d == 128: df = df[:df_idx] ax = seaborn.tsplot(time='Train ratio', value='Micro-F1 score', unit='Round id', condition='Method', data=df) marker = ["o", "s", "D", "^", "v", "8", "*", "p", "1", "h"] for line_i in range(len(ax.lines)): ax.lines[line_i].set_marker(marker[line_i]) # ax.grid() ax.legend_.remove() plt.savefig('%s_d_%d_micro.pdf' % (save_fig_name, d), dpi=300, format='pdf', bbox_inches='tight') plt.clf() ax = seaborn.tsplot(time='Train ratio', value='Macro-F1 score', unit='Round id', condition='Method', data=df) for line_i in range(len(ax.lines)): ax.lines[line_i].set_marker(marker[line_i]) # ax.grid() ax.legend_.remove() plt.savefig('%s_d_%d_macro.pdf' % (save_fig_name, d), dpi=300, format='pdf', bbox_inches='tight') plt.clf() if plot_d and len(d_arr) > 1: df_f1_glob = df_f1_glob[:df_f1_glob_idx] ax = seaborn.tsplot(time='d', value='Micro-F1 score', unit='Round id', condition='Method', data=df_f1_glob) ax.set_xscale('log', basex=2) marker = ["o", "s", "D", "^", "v", "8", "*", "p", "1", "h"] for line_i in range(len(ax.lines)): ax.lines[line_i].set_marker(marker[line_i]) # ax.grid() ax.legend_.remove() plt.savefig('%s_micro.pdf' % (save_fig_name), dpi=300, format='pdf', bbox_inches='tight') plt.clf() ax = seaborn.tsplot(time='d', value='Macro-F1 score', unit='Round id', condition='Method', data=df_f1_glob) ax.set_xscale('log', basex=2) for line_i in range(len(ax.lines)): ax.lines[line_i].set_marker(marker[line_i]) # ax.grid() ax.legend_.remove() plt.savefig('%s_macro.pdf' % (save_fig_name), dpi=300, format='pdf', bbox_inches='tight') plt.clf()
[docs]def plotExpRes(res_pre, methods, exp, d_arr, save_fig_pre, n_rounds, plot_d, plot_ratio=0.8, samp_scheme="u_rand", K=1024): """Function to plot experiment results for maps. """ m_names = [m_name_l[meth] for meth in methods] map_gr = None map_lp = None if "gr" in exp: print('GR') map_gr = plot_p_at_k(res_pre, '.gr', 'gr', methods, m_names, d_arr, n_rounds, '%s_gr' % save_fig_pre, K=K, plot_d=plot_d, s_sch=samp_scheme) if "lp" in exp: print('LP') map_lp = plot_p_at_k(res_pre, '.lp', 'lp', methods, m_names, d_arr, n_rounds, '%s_lp' % save_fig_pre, K=K, plot_d=plot_d, plot_ratio=plot_ratio, s_sch=samp_scheme) if "nc" in exp: print('NC') plot_F1(res_pre, '.nc', 'nc', methods, m_names, d_arr, n_rounds, '%s_nc' % save_fig_pre, K=K, plot_d=plot_d) return map_gr, map_lp