Skip to content
Snippets Groups Projects
Commit 359b60a4 authored by Michael Scholkemper's avatar Michael Scholkemper
Browse files

code upload

parent 14aa8f11
No related branches found
No related tags found
No related merge requests found
# Blind extraction of equitable partitions from graph signals
The project code will be made available here.
The extended paper is availible here and on arxiv.
## Code Installation
Simply run `pip install -r code/requirements.txt` to install the required python packages or install them manually.
## Code Usage
To replicate the results of our paper, use the Experiments.ipynb, which will guide you through the process that lead to the figures displayed in the paper.
\ No newline at end of file
This diff is collapsed.
import numpy as np
import networkx as nx
import copy
from sklearn.mixture import BayesianGaussianMixture, GaussianMixture
from sklearn.cluster import KMeans
# HashFunction used in WL computation - essentially a lookup table
# Has a dict that is indexed by some python hashable values
class HashFunction:
def __init__(self):
self.reset()
#apply returns the hash for the index, if not in the dict a new value is returned and stored in the dict
def apply(self, value):
if value not in self.hash_dict:
self.hash_dict[value] = self.hash_counter
self.hash_counter += 1
return self.hash_dict[value]
#reset the hash-lookup-table
def reset(self):
self.hash_dict = {}
self.hash_counter = 2
wl_hash = HashFunction()
# Weisfeiler-Lehman algorithm
# Input:
# Graph graph1,
# number of iterations,
# if you want to stop early in case a stable partition is reached before the number of iterations is up,
# a hash_dict - in case you want to share it between mutliple runs of the WL
def weisfeiler_lehman(graph1: nx.Graph, iterations=-1, early_stopping=True, hash=wl_hash):
if iterations == -1:
iterations = len(graph1)
Gamma1 = np.ones(len(graph1), dtype=int)
set_colors_by_iteration = []
colors_by_iteration = []
#For the number of iterations
for t in range(iterations):
#copying is necessary, as WL has a synchronous update
tmp_Gamma1 = np.copy(Gamma1)
#storing for return with output
colors_by_iteration.append(copy.deepcopy(Gamma1))
set_colors_by_iteration.append(set(Gamma1))
#update colors of each node
for node in range(len(graph1)):
Gamma1[node] = hash.apply((Gamma1[node], tuple(sorted([tmp_Gamma1[n] for n in graph1[node]]))))
#if coloring is equivalent to previous iteration, stop early
if is_equivalent(Gamma1, tmp_Gamma1) and early_stopping:
return tmp_Gamma1, t, set_colors_by_iteration, colors_by_iteration
colors_by_iteration.append(copy.deepcopy(Gamma1))
set_colors_by_iteration.append(set(Gamma1))
return Gamma1, iterations, set_colors_by_iteration, colors_by_iteration
#If the input is faulty - i.e. the constraints cannot be met - the behaviour of the function is undetermined.
# Input:
# array with sizes of each class,
# matrix that gives the number of links between each class (this is A^\pi)
# flag if a non-simple graph should be returned - default is false meaning the returned graph is simple
def sample_csEP(nodes_per_group, links_to_other_group, multi_graph_okay=False):
group_offset = []
num_nodes = 0
links_needed = np.zeros((np.sum(nodes_per_group), len(nodes_per_group)), dtype=int)
#initialize links needed - this is essentially AH and determines for each node how many neighbours to each class it has.
for i in range(len(nodes_per_group)):
for node in range(nodes_per_group[i]):
links_needed[node + num_nodes, :] = links_to_other_group[i]
group_offset.append(num_nodes)
num_nodes += nodes_per_group[i]
#intialize adjacency matrix
adjacency_matrix = np.zeros((np.sum(nodes_per_group), np.sum(nodes_per_group)), dtype=int)
#add as many edges as necessary for each node and class
for group in range(len(nodes_per_group)):
for node in range(group_offset[group], group_offset[group] + nodes_per_group[group]):
for other_group in range(len(links_to_other_group[group])):
chosen_links = np.random.permutation([(node, i) for i in range(group_offset[other_group], group_offset[other_group] + nodes_per_group[other_group]) if links_needed[i, group] > 0 and node != i])
for link in list(chosen_links[:links_needed[node, other_group]]):
adjacency_matrix[link[0], link[1]] = 1
adjacency_matrix[link[1], link[0]] = 1
links_needed[node, other_group] -= 1
links_needed[link[1], group] -= 1
#For small graphs it is possible to have traversed all candidates before all constraints are met. Thus the procedure is repeated until the number of needed links is met.
while links_needed[node, other_group] > 0:
chosen_links = np.random.permutation([(node, i) for i in range(group_offset[other_group], group_offset[other_group] + nodes_per_group[other_group]) if links_needed[i, group] > 0])
link = chosen_links[0]
adjacency_matrix[link[0], link[1]] += 1
adjacency_matrix[link[1], link[0]] += 1 if link[0] != link[1] else 0
links_needed[node, other_group] -= 1
links_needed[link[1], group] -= 1 if node != link[1] else 0
group_offset.append(np.sum(nodes_per_group))
links_to_other_group_per_node = []
for i,g in enumerate(nodes_per_group):
links_to_other_group_per_node.extend([links_to_other_group[i]] * g)
assert np.all([[np.sum([adjacency_matrix[i,j] for j in range(group_offset[group], group_offset[group+1])]) for group in range(len(nodes_per_group))] for i in range(len(adjacency_matrix))] == links_to_other_group_per_node)
#The output adjacency matrix may not be simple, therefore we try to make it simple.
adjacency_matrix = make_simple_graph(adjacency_matrix, nodes_per_group, links_to_other_group, group_offset)
assert np.all([[np.sum([adjacency_matrix[i,j] for j in range(group_offset[group], group_offset[group+1])]) for group in range(len(nodes_per_group))] for i in range(len(adjacency_matrix))] == links_to_other_group_per_node)
assert len([(i,j) for i in range(len(adjacency_matrix)) for j in range(len(adjacency_matrix)) if adjacency_matrix[i,j] > 1 or (i==j and adjacency_matrix[i,j] > 0)]) == 0
return adjacency_matrix
# function to rewire edges that are multiedges or selfloops as these are undesirable, but do tend to exist in the sampling process.
# Imput:
# preliminary output of sample_csEP (possibly non-simple random graph with EP)
# array with sizes of each class,
# matrix that gives the number of links between each class (this is A^\pi)
# array with the first node of class i at the i-th index
def make_simple_graph(adjacency_matrix, nodes_per_group, links_to_other_group, group_offset):
# first fix selfloops (to edges within a class)
diagonal_entries_to_fix = [(i,i) for i in range(len(adjacency_matrix)) if adjacency_matrix[i,i] > 0]
for edge in diagonal_entries_to_fix:
for i in range(adjacency_matrix[edge[0],edge[1]]):
# if the number of selfloops is greater or equal to 2, we have more possibilities to swap edges
if adjacency_matrix[edge[0],edge[1]] >= 2:
group_x = [g for g in range(len(nodes_per_group)) if edge[0] >= group_offset[g] and edge[0] < group_offset[g+1]][0]
group_y = [g for g in range(len(nodes_per_group)) if edge[1] >= group_offset[g] and edge[1] < group_offset[g+1]][0]
candidates_x = [n for n in range(group_offset[group_x], group_offset[group_x + 1]) if n != edge[1] and adjacency_matrix[edge[1], n] < 1]
candidates_y = [n for n in range(group_offset[group_y], group_offset[group_y + 1]) if n != edge[0] and adjacency_matrix[n, edge[0]] < 1]
choice = np.random.permutation([(x,y) for x in candidates_x for y in candidates_y if adjacency_matrix[x,y] >= 1 and x != y])[0]
adjacency_matrix[edge[0], choice[1]] = 1
adjacency_matrix[choice[1], edge[0]] = 1
adjacency_matrix[choice[0], edge[1]] = 1
adjacency_matrix[edge[1], choice[0]] = 1
adjacency_matrix[edge[0], edge[1]] -= 1
adjacency_matrix[edge[1], edge[0]] -= 1
adjacency_matrix[choice[0], choice[1]] -= 1
adjacency_matrix[choice[1], choice[0]] -= 1
# if there is only one self-loop, we have some other node that also has one selfloop and we connnect these
elif adjacency_matrix[edge[0],edge[1]] == 1:
group = [g for g in range(len(nodes_per_group)) if edge[0] >= group_offset[g] and edge[0] < group_offset[g+1]][0]
candidates = [n for n in range(group_offset[group], group_offset[group + 1]) if n != edge[0]]
choice = np.random.permutation([(x,x) for x in candidates if adjacency_matrix[x,x] >= 1])[0]
adjacency_matrix[edge[0], choice[1]] += 1
adjacency_matrix[choice[1], edge[0]] += 1
adjacency_matrix[edge[1], edge[0]] -= 1
adjacency_matrix[choice[0], choice[1]] -= 1
else:
continue
#fix multiedges
edges_to_fix =[(i,j) for i in range(len(adjacency_matrix)) for j in range(i+1,len(adjacency_matrix)) if adjacency_matrix[i,j] > 1 and i != j]
# similar procedure to above, but fixing edges that are chosen mutliple times
for edge in edges_to_fix:
for i in range(adjacency_matrix[edge[0],edge[1]]-1):
group_x = [g for g in range(len(nodes_per_group)) if edge[0] >= group_offset[g] and edge[0] < group_offset[g+1]][0]
group_y = [g for g in range(len(nodes_per_group)) if edge[1] >= group_offset[g] and edge[1] < group_offset[g+1]][0]
candidates_x = [n for n in range(group_offset[group_x], group_offset[group_x + 1]) if n != edge[1] and adjacency_matrix[edge[1], n] < 1]
candidates_y = [n for n in range(group_offset[group_y], group_offset[group_y + 1]) if n != edge[0] and adjacency_matrix[n, edge[0]] < 1]
choice = np.random.permutation([(x,y) for x in candidates_x for y in candidates_y if adjacency_matrix[x,y] >= 1 and x != y])[0]
adjacency_matrix[edge[0], choice[1]] = 1
adjacency_matrix[choice[1], edge[0]] = 1
adjacency_matrix[choice[0], edge[1]] = 1
adjacency_matrix[edge[1], choice[0]] = 1
adjacency_matrix[edge[0], edge[1]] -= 1
adjacency_matrix[edge[1], edge[0]] -= 1
adjacency_matrix[choice[0], choice[1]] -= 1
adjacency_matrix[choice[1], choice[0]] -= 1
if adjacency_matrix[choice[0], choice[1]] > 0:
return make_simple_graph(adjacency_matrix, nodes_per_group, links_to_other_group)
return adjacency_matrix
# blindWL algorithm,
# Input:
# Sigma is the approximate oracle,
# maximum number of clusters that the GaussianMixtures can find,
# number of initialisations for the GaussianMixtures,
# method (either bgm - BayesianGaussianMixture or gm - GaussianMixture),
# Initial coloring (as partition indicator matrix),
# verbosity: 0 silent, 1 some intermediate output, 2 full output,
# flag if the output should be the partition indicator matrix or an array with the colors
def blind_WL_from_outputs(Sigma, max_num_components=10, n_init=2, method="bgm", H=[], verbose=0, output_H=False):
#Sigma is the approximate oracle
#We can encorporate previous knowledge of the classes by inputting H, if not, H is the all-ones-vector
if len(H) == 0:
H = np.ones((Sigma.shape[0],1))
dim_B = 1
else:
B = np.array([[1 if np.isclose(i, H[j], atol=0.00001) else 0 for j in range(len(H))] for i in np.unique(np.array(H).round(decimals=5))])
H = B.transpose()
dim_B = H.shape[1]
# We normalize Sigma for a better behaviour of the clustering algorithms
A = Sigma / np.linalg.norm(Sigma, 2)
if verbose == 2: print("A", A)
# For a maximum number of |V| iterations we commpute the blindWL update
for i in range(Sigma.shape[1]):
if verbose == 2: print("H", H.shape, H)
H = H @ np.diag([1/sum(H[:,x]) for x in range(H.shape[1])]) * Sigma.shape[0]
X = A@H
if verbose == 2: print("H", H.shape, H)
if verbose == 1: print("X ",X.shape, X)
# Fitting the BayesianGaussianMixture to X = AH
if method == "bgm":
BGM = BayesianGaussianMixture(n_components=max_num_components, mean_precision_prior=0.01,weight_concentration_prior_type="dirichlet_distribution", n_init=n_init, max_iter=1000, covariance_type='tied')
colors = BGM.fit_predict(X)
# If some component weight is very small, i.e. not extremely relevant, we fit a simpler model
if min(BGM.weights_) < 1/(2*max_num_components):
BGM = BayesianGaussianMixture(n_components=sum(BGM.weights_ >=1/(2*max_num_components)), mean_precision_prior=0.01, weight_concentration_prior_type="dirichlet_distribution",n_init=n_init, max_iter=1000, covariance_type='tied')
colors = BGM.fit_predict(X)
# Fitting a GaussianMixture to X = AH
elif method == "gm":
BGM = GaussianMixture(n_components=max_num_components)
colors = BGM.fit_predict(X)
# If some component weight is very small, i.e. not extremely relevant, we fit a simpler model
if min(BGM.weights_) < 1/(2*max_num_components):
BGM = GaussianMixture(n_components=sum(BGM.weights_ >=1/(2*max_num_components)))
colors = BGM.fit_predict(X)
# Compute the new partition idicator matrix B
if verbose == 1: print("colors ", colors)
B = [[1 if i == colors[j] else 0 for j in range(len(A))] for i in np.unique(colors)]
if verbose == 2: print("B ", np.array(B).transpose())
# If the number of clusters has not changed from the previous update, we stop early
if len(B) == dim_B:
# You can choose to output H or output an array assinging colors
if output_H:
return np.array(B).transpose()
else:
return colors
dim_B = len(B)
B = np.array(B).transpose()
H = B
# You can choose to output H or output an array assinging colors
if output_H:
return np.array(B).transpose()
else:
return colors
# Spectral clustering algorithm
# Input:
# Sample covariance matrix Sigma
# maximum number of clusters that the GaussianMixtures can find or the number of clusters for KMeans,
# number of initialisations for the GaussianMixtures and KMeans,
# number of top EVs to consider in the clustering, default is number of clusters
# method (either bgm - BayesianGaussianMixture or kmeans - KMeans),
def EP_from_outputs(Sigma, max_num_components=10, n_init=2, num_top_vectors=0, method="bgm"):
if num_top_vectors == 0:
num_top_vectors = max_num_components
# Get the Eigenvectors
U,E,V = np.linalg.svd(Sigma)
U = U[:, :num_top_vectors]
best_score = np.NINF
best_predict = None
for i in range(1, max_num_components):
# Fit the models to the first i EVs and pick best as the prediction
if method == "bgm":
BGM = BayesianGaussianMixture(n_components=i, mean_precision_prior=0.01,weight_concentration_prior_type="dirichlet_distribution", n_init=n_init)
BGM_predict = BGM.fit_predict(U[:,:i])
score = BGM.score(U[:,:i])
if min(BGM.weights_) < 1/max_num_components:
return best_predict
else:
if score > best_score:
best_score = score
best_predict = BGM_predict
elif method == "kmeans":
KM = KMeans(n_clusters=max_num_components, n_init=n_init)
best_predict = KM.fit_predict(U[:,:max_num_components])
return best_predict
# Subroutine of isequivalent, computes if c_1 refines c_2
# Input:
# color1,
# color2
def is_equiv_subroutine(c1, c2):
color_map = {}
# for each node of c_1, store the color of nodes in c_2
for i in range(len(c1)):
if c1[i] not in color_map:
color_map[c1[i]] = c2[i]
# if the color has already been assigned, check that it is the same color
else:
# if not, c_1 does not refine c_2
if color_map[c1[i]] != c2[i]:
return False
return True
# Computes if c_1 is equivalent to c_2
# Input:
# color1,
# color2
def is_equivalent(c1, c2):
return is_equiv_subroutine(c1, c2) and is_equiv_subroutine(c2, c1)
code/combined_view2.png

169 KiB

numpy
networkx
sklearn
matplotlib
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment