Source code for gemben.embedding.gf

#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import networkx as nx
import numpy as np
import scipy.io as sio
import pdb
import matplotlib.pyplot as plt

from subprocess import call

from .static_graph_embedding import StaticGraphEmbedding
from gemben.utils import graph_util, plot_util
from gemben.evaluation import visualize_embedding as viz
from time import time


[docs]class GraphFactorization(StaticGraphEmbedding): """`Graph Factorization`_. Graph Factorization factorizes the adjacency matrix with regularization. Args: hyper_dict (object): Hyper parameters. kwargs (dict): keyword arguments, form updating the parameters Examples: >>> from gemben.embedding.gf import GraphFactorization >>> edge_f = 'data/karate.edgelist' >>> G = graph_util.loadGraphFromEdgeListTxt(edge_f, directed=False) >>> G = G.to_directed() >>> res_pre = 'results/testKarate' >>> graph_util.print_graph_stats(G) >>> t1 = time() >>> embedding = GraphFactorization(2, 100000, 1 * 10**-4, 1.0) >>> embedding.learn_embedding(graph=G, edge_f=None, is_weighted=True, no_python=True) >>> print ('Graph Factorization:Training time: %f' % (time() - t1)) >>> viz.plot_embedding2D(embedding.get_embedding(), di_graph=G, node_colors=None) >>> plt.show() .. _Graph Factorization: https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/40839.pdf """ def __init__(self, *hyper_dict, **kwargs): ''' Initialize the GraphFactorization class Args: d: dimension of the embedding eta: learning rate of sgd regu: regularization coefficient of magnitude of weights max_iter: max iterations in sgd print_step: #iterations to log the prgoress (step%print_step) ''' hyper_params = { 'print_step': 10000, 'method_name': 'graph_factor_sgd' } hyper_params.update(kwargs) for key in hyper_params.keys(): self.__setattr__('_%s' % key, hyper_params[key]) for dictionary in hyper_dict: for key in dictionary: self.__setattr__('_%s' % key, dictionary[key])
[docs] def get_method_name(self): return self._method_name
[docs] def get_method_summary(self): return '%s_%d' % (self._method_name, self._d)
def _get_f_value(self, graph): f1 = 0 for i, j, w in graph.edges(data='weight', default=1): f1 += (w - np.dot(self._X[i, :], self._X[j, :]))**2 f2 = self._regu * (np.linalg.norm(self._X)**2) return [f1, f2, f1 + f2]
[docs] def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=True): c_flag = True if not graph and not edge_f: raise Exception('graph/edge_f needed') if no_python: if sys.platform[0] == "w": args = ["gem/c_exe/gf.exe"] else: args = ["gem/c_exe/gf"] if not graph and not edge_f: raise Exception('graph/edge_f needed') if edge_f: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) graphFileName = 'gem/intermediate/%s_gf.graph' % self._data_set embFileName = 'gem/intermediate/%s_%d_gf.emb' % (self._data_set, self._d) # try: # f = open(graphFileName, 'r') # f.close() # except IOError: graph_util.saveGraphToEdgeListTxt(graph, graphFileName) args.append(graphFileName) args.append(embFileName) args.append("1") # Verbose args.append("1") # Weighted args.append("%d" % self._d) args.append("%f" % self._eta) args.append("%f" % self._regu) args.append("%d" % self._max_iter) args.append("%d" % self._print_step) t1 = time() try: call(args) except Exception as e: print(str(e)) c_flag = False print('./gf not found. Reverting to Python implementation. Please compile gf, place node2vec in the path and grant executable permission') if c_flag: try: self._X = graph_util.loadEmbedding(embFileName) except FileNotFoundError: self._X = np.random.randn(graph.number_of_nodes(), self._d) t2 = time() try: call(["rm", embFileName]) except: pass return self._X, (t2 - t1) if not graph: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) t1 = time() self._node_num = graph.number_of_nodes() self._X = 0.01 * np.random.randn(self._node_num, self._d) for iter_id in range(self._max_iter): if not iter_id % self._print_step: [f1, f2, f] = self._get_f_value(graph) print('\t\tIter id: %d, Objective: %g, f1: %g, f2: %g' % ( iter_id, f, f1, f2 )) for i, j, w in graph.edges(data='weight', default=1): if j <= i: continue term1 = -(w - np.dot(self._X[i, :], self._X[j, :])) * self._X[j, :] term2 = self._regu * self._X[i, :] delPhi = term1 + term2 self._X[i, :] -= self._eta * delPhi t2 = time() return self._X, (t2 - t1)
[docs] def get_embedding(self): return self._X
[docs] def get_edge_weight(self, i, j): return np.dot(self._X[i, :], self._X[j, :])
[docs] def get_reconstructed_adj(self, X=None, node_l=None): if X is not None: node_num = X.shape[0] self._X = X else: node_num = self._node_num adj_mtx_r = np.zeros((node_num, node_num)) for v_i in range(node_num): for v_j in range(node_num): if v_i == v_j: continue adj_mtx_r[v_i, v_j] = self.get_edge_weight(v_i, v_j) return adj_mtx_r
if __name__ == '__main__': # load Zachary's Karate graph edge_f = 'data/karate.edgelist' G = graph_util.loadGraphFromEdgeListTxt(edge_f, directed=False) G = G.to_directed() res_pre = 'results/testKarate' graph_util.print_graph_stats(G) t1 = time() embedding = GraphFactorization(2, 100000, 1 * 10**-4, 1.0) embedding.learn_embedding(graph=G, edge_f=None, is_weighted=True, no_python=True) print ('Graph Factorization:\n\tTraining time: %f' % (time() - t1)) viz.plot_embedding2D(embedding.get_embedding(), di_graph=G, node_colors=None) plt.show()