#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import networkx as nx
import numpy as np
import scipy.io as sio
import pdb
import matplotlib.pyplot as plt
from subprocess import call
from .static_graph_embedding import StaticGraphEmbedding
from gemben.utils import graph_util, plot_util
from gemben.evaluation import visualize_embedding as viz
from time import time
[docs]class GraphFactorization(StaticGraphEmbedding):
"""`Graph Factorization`_.
Graph Factorization factorizes the adjacency matrix with regularization.
Args:
hyper_dict (object): Hyper parameters.
kwargs (dict): keyword arguments, form updating the parameters
Examples:
>>> from gemben.embedding.gf import GraphFactorization
>>> edge_f = 'data/karate.edgelist'
>>> G = graph_util.loadGraphFromEdgeListTxt(edge_f, directed=False)
>>> G = G.to_directed()
>>> res_pre = 'results/testKarate'
>>> graph_util.print_graph_stats(G)
>>> t1 = time()
>>> embedding = GraphFactorization(2, 100000, 1 * 10**-4, 1.0)
>>> embedding.learn_embedding(graph=G, edge_f=None,
is_weighted=True, no_python=True)
>>> print ('Graph Factorization:Training time: %f' % (time() - t1))
>>> viz.plot_embedding2D(embedding.get_embedding(),
di_graph=G, node_colors=None)
>>> plt.show()
.. _Graph Factorization:
https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/40839.pdf
"""
def __init__(self, *hyper_dict, **kwargs):
''' Initialize the GraphFactorization class
Args:
d: dimension of the embedding
eta: learning rate of sgd
regu: regularization coefficient of magnitude of weights
max_iter: max iterations in sgd
print_step: #iterations to log the prgoress (step%print_step)
'''
hyper_params = {
'print_step': 10000,
'method_name': 'graph_factor_sgd'
}
hyper_params.update(kwargs)
for key in hyper_params.keys():
self.__setattr__('_%s' % key, hyper_params[key])
for dictionary in hyper_dict:
for key in dictionary:
self.__setattr__('_%s' % key, dictionary[key])
[docs] def get_method_name(self):
return self._method_name
[docs] def get_method_summary(self):
return '%s_%d' % (self._method_name, self._d)
def _get_f_value(self, graph):
f1 = 0
for i, j, w in graph.edges(data='weight', default=1):
f1 += (w - np.dot(self._X[i, :], self._X[j, :]))**2
f2 = self._regu * (np.linalg.norm(self._X)**2)
return [f1, f2, f1 + f2]
[docs] def learn_embedding(self, graph=None, edge_f=None,
is_weighted=False, no_python=True):
c_flag = True
if not graph and not edge_f:
raise Exception('graph/edge_f needed')
if no_python:
if sys.platform[0] == "w":
args = ["gem/c_exe/gf.exe"]
else:
args = ["gem/c_exe/gf"]
if not graph and not edge_f:
raise Exception('graph/edge_f needed')
if edge_f:
graph = graph_util.loadGraphFromEdgeListTxt(edge_f)
graphFileName = 'gem/intermediate/%s_gf.graph' % self._data_set
embFileName = 'gem/intermediate/%s_%d_gf.emb' % (self._data_set, self._d)
# try:
# f = open(graphFileName, 'r')
# f.close()
# except IOError:
graph_util.saveGraphToEdgeListTxt(graph, graphFileName)
args.append(graphFileName)
args.append(embFileName)
args.append("1") # Verbose
args.append("1") # Weighted
args.append("%d" % self._d)
args.append("%f" % self._eta)
args.append("%f" % self._regu)
args.append("%d" % self._max_iter)
args.append("%d" % self._print_step)
t1 = time()
try:
call(args)
except Exception as e:
print(str(e))
c_flag = False
print('./gf not found. Reverting to Python implementation. Please compile gf, place node2vec in the path and grant executable permission')
if c_flag:
try:
self._X = graph_util.loadEmbedding(embFileName)
except FileNotFoundError:
self._X = np.random.randn(graph.number_of_nodes(), self._d)
t2 = time()
try:
call(["rm", embFileName])
except:
pass
return self._X, (t2 - t1)
if not graph:
graph = graph_util.loadGraphFromEdgeListTxt(edge_f)
t1 = time()
self._node_num = graph.number_of_nodes()
self._X = 0.01 * np.random.randn(self._node_num, self._d)
for iter_id in range(self._max_iter):
if not iter_id % self._print_step:
[f1, f2, f] = self._get_f_value(graph)
print('\t\tIter id: %d, Objective: %g, f1: %g, f2: %g' % (
iter_id,
f,
f1,
f2
))
for i, j, w in graph.edges(data='weight', default=1):
if j <= i:
continue
term1 = -(w - np.dot(self._X[i, :], self._X[j, :])) * self._X[j, :]
term2 = self._regu * self._X[i, :]
delPhi = term1 + term2
self._X[i, :] -= self._eta * delPhi
t2 = time()
return self._X, (t2 - t1)
[docs] def get_embedding(self):
return self._X
[docs] def get_edge_weight(self, i, j):
return np.dot(self._X[i, :], self._X[j, :])
[docs] def get_reconstructed_adj(self, X=None, node_l=None):
if X is not None:
node_num = X.shape[0]
self._X = X
else:
node_num = self._node_num
adj_mtx_r = np.zeros((node_num, node_num))
for v_i in range(node_num):
for v_j in range(node_num):
if v_i == v_j:
continue
adj_mtx_r[v_i, v_j] = self.get_edge_weight(v_i, v_j)
return adj_mtx_r
if __name__ == '__main__':
# load Zachary's Karate graph
edge_f = 'data/karate.edgelist'
G = graph_util.loadGraphFromEdgeListTxt(edge_f, directed=False)
G = G.to_directed()
res_pre = 'results/testKarate'
graph_util.print_graph_stats(G)
t1 = time()
embedding = GraphFactorization(2, 100000, 1 * 10**-4, 1.0)
embedding.learn_embedding(graph=G, edge_f=None,
is_weighted=True, no_python=True)
print ('Graph Factorization:\n\tTraining time: %f' % (time() - t1))
viz.plot_embedding2D(embedding.get_embedding(),
di_graph=G, node_colors=None)
plt.show()