Reference: Juliette Stehlé, Nicolas Voirin, Alain Barrat, Ciro Cattuto, Lorenzo Isella, Jean-François Pinton, Marco Quaggiotto, Wouter Van den Broeck, Corinne Régis, Bruno Lina and Philippe Vanhems. PLOS ONE 6(8): e23176 (2011). doi:10.1371/journal.pone.0023176
Original data: http://www.sociopatterns.org/datasets/primary-school-cumulative-networks/
%matplotlib inline
from __future__ import division
import numpy as np
import pandas as pd
pd.set_option('display.precision', 2)
# Network
import networkx as nx
import community # louvain
from fa2 import ForceAtlas2
from distanceclosure.utils import _dist2prox as dist2prox
# Matplotlib
import matplotlib as mpl
import matplotlib.style
mpl.style.use('default')
mpl.rcParams['mathtext.fontset'] = 'cm'
mpl.rcParams['mathtext.rm'] = 'serif'
import matplotlib.pyplot as plt
# Others
import math
import random
from IPython.core.display import display, Math
from collections import OrderedDict
from scipy.stats import entropy
# clusim
from clusim.clustering import Clustering
from clusim.clusimelement import element_sim, element_sim_elscore
# SocioPatterns Network
project = 'primary-school'
# The normalization convertion from
normalization = 'social' # [options:] 'time', 'time_all' or 'social'
# Network File
rGpickle = 'results/%s/%s-%s.gpickle' % (project, project, normalization)
# The Networkx node attribute containing the Meta labels
module_attribute = 'class'
# Metadata nodecolors
dict_meta_color = {
'Teachers':'#4c4c4c', #gray
'1A':'#009999', # Cyan
'1B':'#66ffff',
'2A':'#004c00', #Green
'2B':'#66b266',
'3B':'#996300', # Orange
'3A':'#ffc966',
'4A':'#000099', # Blue
'4B':'#6666ff',
'5A':'#990000', # Red
'5B':'#ff6666'
}
# Title for plots
project_title = 'Primary school'
Both metric and the ultrametric backbones have been computed with an the distanceclosure package. Below we simply load a NetworkX graph that contains metric and semi-metric information for each edge.
G = nx.read_gpickle(rGpickle)
In the Primary School data set, we manually allocate teachers, originally on their own module, to the module of their respective class. All variabels with an appending 'p' are denoting a ' (prime) variable.
Gp = G.copy()
Gp.node[1753]['class'] = '1A'
Gp.node[1745]['class'] = '1B'
Gp.node[1852]['class'] = '2A'
Gp.node[1650]['class'] = '2B'
Gp.node[1746]['class'] = '3A'
Gp.node[1709]['class'] = '3B'
Gp.node[1653]['class'] = '4A'
Gp.node[1521]['class'] = '4B'
Gp.node[1668]['class'] = '5A'
Gp.node[1824]['class'] = '5B'
These helper functions retrive information from the NetworkX Graph object
def get_graph_variables(G, *arg, **kwargs):
dM = nx.get_node_attributes(G, *arg)
s = set(dM.values())
n = len(s)
sM = { m : set([k for k,v in dM.items() if v==m]) for m in s }
#
return n, s, sM, dM
# Strip G to the original Graph, without additional metric closure edges.
def generate_original_graph(G):
GO = G.copy()
edges2remove = [(i,j) for i,j,d in G.edges(data=True) if 'original' not in d]
GO.remove_edges_from(edges2remove)
return GO
GO = generate_original_graph(G)
GOp = generate_original_graph(Gp)
# Metric Graph, only metric edges
def generate_metric_graph(G):
GM = G.copy()
edges2remove = [(i,j) for i,j,d in G.edges(data=True) if d['metric']==False]
GM.remove_edges_from(edges2remove)
return GM
GM = generate_metric_graph(G)
GMp = generate_metric_graph(Gp)
Displays some basic statistics for comparison.
nO, sO, sMO, dMO = get_graph_variables(GO, module_attribute)
nOp, sOp, sMOp, dMOp = get_graph_variables(GOp, module_attribute)
n = G.number_of_nodes()
isolates = len( list( nx.isolates(G) ) )
isolates_percent = isolates/n
e_possible = int(( (n*n) - n) / 2)
e_total = G.number_of_edges()
original_components = nx.number_connected_components(GO)
n_metalabels = len(np.unique(nx.get_node_attributes(GO, module_attribute).values()))
n_metalabels_exception = len(np.unique(nx.get_node_attributes(GOp, module_attribute).values()))
e_original = 0
e_metric = 0
e_ultrametric = 0
e_semimetric = 0
e_s_gt_1 = 0
e_s_gt_1_original = 0
e_d_eq_infty = 0
e_bij_gt_1 = 0
e_bji_gt_1 = 0
distortion = 0
for eid,(i,j,d) in enumerate( G.edges(data=True), start=0):
# Original Edges
if (d.get('original')==True):
e_original += 1
# Metric Edges
if (d.get('metric')==True):
e_metric += 1
# UltraMetric Edges
if (d.get('ultrametric')==True):
e_ultrametric += 1
# Semi-metric edges
if (d.get('metric')==False):
e_semimetric += 1
# S values
if (d.get('s_value'))>1.0:
e_s_gt_1 += 1
if (d.get('s_value'))>1.0 and (d.get('original')==True):
e_s_gt_1_original += 1
if (d.get('distance')==np.inf):
e_d_eq_infty += 1
# B_ij values
if (d.get('b_ij_value'))>1.0:
e_bij_gt_1 += 1
# B_ji values
if (d.get('b_ji_value'))>1.0:
e_bji_gt_1 += 1
# Distortion
distortion += abs(dist2prox(d['distance_metric_closure']) - d.get('proximity'))
distortion_norm = (2 * distortion) / (n * (n - 1))
e_original_percent = e_original/e_total
e_metric_percent = e_metric/e_original
e_semimetric_percent = e_semimetric/e_total
e_s_gt_1_percent = e_s_gt_1/e_total
e_s_gt_1_original_percent = e_s_gt_1_original/e_original
e_d_eq_infty_percent = e_d_eq_infty/e_total
e_bij_gt_1_percent = e_bij_gt_1/e_total
e_bji_gt_1_percent = e_bji_gt_1/e_total
print 'Meta-labels: {:,d}'.format(n_metalabels)
# EXCEPTION
print 'Meta-labels (exception): {:,d}'.format(n_metalabels_exception)
display(Math('D_w'))
print 'Nodes: {:,d}'.format(n)
print 'Possible Edges: {:,d}'.format(e_possible)
print 'Edges: {:,d} ({:.2%} of possible edges)'.format(e_original , e_original_percent)
print 'Connected Components: {:,d}'.format(original_components)
print 'Isolates: {:,d} ({:.2%})'.format(isolates, isolates_percent)
if (original_components == 1) and (e_possible != e_total):
raise Exception('After Closure, the graph must be fully connected ({:d} != {:d}), given there was only one connected component'.format(e_possible, e_total))
display(Math('B_w'))
print 'Metric Edges: {:,d} ({:.2%} of the original {:,d} edges)'.format(e_metric, e_metric_percent, e_original)
display(Math('D^C_w'))
print 'Semi-Metric edges: {:,d} ({:.2%})'.format(e_semimetric, e_semimetric_percent)
print 'S>1: {:,d} ({:.2%} of original edges; {:.2%} of the total edges)'.format(e_s_gt_1, e_s_gt_1_original_percent, e_s_gt_1_percent)
print 'B_ij>1 , B_ji>1: {:,d} ({:.2%} of total) , {:,d} ({:.2%} of total)'.format(e_bij_gt_1, e_bij_gt_1_percent , e_bji_gt_1, e_bji_gt_1_percent)
print 'Edges where D=\infty: {:,d} ({:.2%} of the total {:,d} edges)'.format(e_d_eq_infty, e_d_eq_infty_percent, e_total)
print 'Distortion \delta (\Delta): {:.4f} ({:.2f})'.format(distortion_norm, distortion)
The threshold backbone is a null model where edges with the smallest proximity are removed until the same number of edges as the metric backbone is achieved.
# Remove the lowest number of edges that keeps the network with the same number of edges as the G_metric (e_metric).
def generate_threshold_graph(G, edges_to_keep=0):
GT = G.copy()
edges2remove = sorted(GT.edges(data=True), key=lambda x: x[2]['weight'])[ : -edges_to_keep ]
GT.remove_edges_from(edges2remove)
return GT
GT = generate_threshold_graph(GO, e_metric)
The random backbone is a null model where edges are removed at random until the same number of edges as the metric backbone network is achieved.
# Generate a random graph and remove n number of edges
def generate_random_graph(G, edges_to_keep=0):
GR = G.copy()
edges2remove = random.sample(GR.edges(data=True), edges_to_keep)
GR.remove_edges_from(edges2remove)
return GR
# A generative function to yield 'n' random graphs
def generate_n_random_graphs(G, n=1, *arg, **kwargs):
for i in xrange(n):
yield generate_random_graph(G, *arg)
GR = generate_random_graph(GO, e_original-e_metric)
def compute_louvain(G):
dM = community.best_partition(G)
nx.set_node_attributes(G, name='module-louvain', values=dM)
# Original
compute_louvain(GO)
n, s, sM, dM = get_graph_variables(GO, 'module-louvain')
print "G Original Louvain : {:d}".format(n)
# Metric
compute_louvain(GM)
n, s, sM, dM = get_graph_variables(GM, 'module-louvain')
print "G Metric Louvain : {:d}".format(n)
# Threshold
compute_louvain(GT)
n, s, sM, dM = get_graph_variables(GT, 'module-louvain')
print "G Threshold Louvain: {:d}".format(n)
# Random
compute_louvain(GR)
ns = []
for i in xrange(100):
_GR = generate_random_graph(GO, e_original-e_metric)
compute_louvain(_GR)
n, s, sM, dM = get_graph_variables(_GR, 'module-louvain')
ns.append(n)
print "G Random Louvain : {:.2f}±{:.2f}".format(np.mean(ns), np.std(ns))
Node layout is defined by a python implementation of Gephi's ForceAtlas2 algorithm.
forceatlas2 = ForceAtlas2(outboundAttractionDistribution=False, linLogMode=False, adjustSizes=False,
edgeWeightInfluence=1.0, jitterTolerance=1.0, barnesHutOptimize=False, barnesHutTheta=1.2,
multiThreaded=False, scalingRatio=1.2, strongGravityMode=False, gravity=1.0, verbose=True)
# Node position for NetworkX
pos = forceatlas2.forceatlas2_networkx_layout(GO, pos=None, iterations=2000)
fig, ax = plt.subplots(1,1,figsize=(7,6), facecolor='w')
node_color = [dict_meta_color[d[module_attribute]] for n,d in GO.nodes(data=True)]
nx.draw_networkx_nodes(GO, ax=ax, pos=pos, cmap=plt.get_cmap('jet'), node_size=150, node_color=node_color, edgecolors='#b2b2b2')
nx.draw_networkx_edges(GO, pos=pos, ax=ax, edge_color='k', alpha=0.1, style='solid')
ax.set_title('{:s} - Original Graph - Meta-label modules'.format(project_title))
ax.axes.get_xaxis().set_visible(False); ax.axes.get_yaxis().set_visible(False)
plt.tight_layout()
#plt.show()
plt.savefig('images/{:s}/{:s}/graph_original_metalabels.png'.format(project,normalization), dpi=150)
fig,axes = plt.subplots(1,4,figsize=(10,3), facecolor='w')
((ax1,ax2,ax3,ax4)) = axes
node_size = 30
cmap = 'nipy_spectral'
# Original (Louvain)
ax = ax1
node_color = [d['module-louvain'] for n,d in GO.nodes(data=True)]
nx.draw_networkx_nodes(GO, pos=pos, ax=ax, cmap=plt.get_cmap(cmap), node_size=node_size, node_color=node_color, edgecolors='#b2b2b2')
edge_color = [d['weight'] for i,j,d in GO.edges(data=True)]
nx.draw_networkx_edges(GO, pos=pos, ax=ax, edge_color='k', alpha=0.1, style='solid')
# Metric (Louvain)
ax = ax2
node_color = [d['module-louvain'] for n,d in GM.nodes(data=True)]
nx.draw_networkx_nodes(GM, pos=pos, ax=ax, cmap=plt.get_cmap(cmap), node_size=node_size, node_color=node_color, edgecolors='#b2b2b2')
edge_color = [d['weight'] for i,j,d in GM.edges(data=True)]
nx.draw_networkx_edges(GM, pos=pos, ax=ax, edge_color='k', alpha=0.3, style='solid')
# Threshold (Louvain)
ax = ax3
node_color = [d['module-louvain'] for n,d in GT.nodes(data=True)]
nx.draw_networkx_nodes(GT, pos=pos, ax=ax, cmap=plt.get_cmap(cmap), node_size=node_size, node_color=node_color, edgecolors='#b2b2b2')
edge_color = [d['weight'] for i,j,d in GT.edges(data=True)]
nx.draw_networkx_edges(GT, pos=pos, ax=ax, edge_color='k', alpha=0.3, style='solid')
# Random (Louvain)
ax = ax4
node_color = [d['module-louvain'] for n,d in GR.nodes(data=True)]
nx.draw_networkx_nodes(GR, pos=pos, ax=ax, cmap=plt.get_cmap(cmap), node_size=node_size, node_color=node_color, edgecolors='#b2b2b2')
edge_color = [d['weight'] for i,j,d in GR.edges(data=True)]
nx.draw_networkx_edges(GR, pos=pos, ax=ax, edge_color='k', alpha=0.3, style='solid')
# Draw
for ax in axes.flatten():
ax.tick_params(axis='both', which='both', bottom='off', top='off', left='off', right='off', labelbottom='off', labelleft='off')
ax1.set_title('Original')
ax2.set_title('Metric')
ax3.set_title('Threshold')
ax4.set_title('Random')
ax1.set_ylabel('Louvain', rotation=90, ha='center')
plt.tight_layout()
#plt.show()
plt.savefig('images/{:s}/{:s}/graph_comparison.png'.format(project,normalization), dpi=150)
fig,axes = plt.subplots(1,4,figsize=(10,3), facecolor='w')
((ax1,ax2,ax3,ax4)) = axes
node_size = 30
cmap = 'nipy_spectral'
forceatlas2 = ForceAtlas2(outboundAttractionDistribution=False, linLogMode=False, adjustSizes=False,
edgeWeightInfluence=1.0, jitterTolerance=1.0, barnesHutOptimize=False, barnesHutTheta=1.2,
multiThreaded=False, scalingRatio=1.2, strongGravityMode=False, gravity=1.0, verbose=False)
posm = forceatlas2.forceatlas2_networkx_layout(GM, pos=pos, iterations=2000)
post = forceatlas2.forceatlas2_networkx_layout(GT, pos=pos, iterations=2000)
posr = forceatlas2.forceatlas2_networkx_layout(GR, pos=pos, iterations=2000)
# Original (Louvain)
ax = ax1
node_color = [d['module-louvain'] for n,d in GO.nodes(data=True)]
nx.draw_networkx_nodes(GO, pos=pos, ax=ax, cmap=plt.get_cmap(cmap), node_size=node_size, node_color=node_color, edgecolors='#b2b2b2')
edge_color = [d['weight'] for i,j,d in GO.edges(data=True)]
nx.draw_networkx_edges(GO, pos=pos, ax=ax, edge_color='k', alpha=0.1, style='solid')
# Metric (Louvain)
ax = ax2
node_color = [d['module-louvain'] for n,d in GM.nodes(data=True)]
nx.draw_networkx_nodes(GM, pos=posm, ax=ax, cmap=plt.get_cmap(cmap), node_size=node_size, node_color=node_color, edgecolors='#b2b2b2')
edge_color = [d['weight'] for i,j,d in GM.edges(data=True)]
nx.draw_networkx_edges(GM, pos=posm, ax=ax, edge_color='k', alpha=0.3, style='solid')
# Threshold (Louvain)
ax = ax3
node_color = [d['module-louvain'] for n,d in GT.nodes(data=True)]
nx.draw_networkx_nodes(GT, pos=post, ax=ax, cmap=plt.get_cmap(cmap), node_size=node_size, node_color=node_color, edgecolors='#b2b2b2')
edge_color = [d['weight'] for i,j,d in GT.edges(data=True)]
nx.draw_networkx_edges(GT, pos=post, ax=ax, edge_color='k', alpha=0.3, style='solid')
# Random (Louvain)
ax = ax4
node_color = [d['module-louvain'] for n,d in GR.nodes(data=True)]
nx.draw_networkx_nodes(GR, pos=posr, ax=ax, cmap=plt.get_cmap(cmap), node_size=node_size, node_color=node_color, edgecolors='#b2b2b2')
edge_color = [d['weight'] for i,j,d in GR.edges(data=True)]
nx.draw_networkx_edges(GR, pos=posr, ax=ax, edge_color='k', alpha=0.3, style='solid')
# Draw
for ax in axes.flatten():
ax.tick_params(axis='both', which='both', bottom='off', top='off', left='off', right='off', labelbottom='off', labelleft='off')
ax1.set_title('Original')
ax2.set_title('Metric')
ax3.set_title('Threshold')
ax4.set_title('Random')
ax1.set_ylabel('Louvain', rotation=90, ha='center')
plt.tight_layout()
#plt.show()
plt.savefig('images/{:s}/{:s}/graph_comparison_layout.png'.format(project,normalization), dpi=150)
Please refer to the index page for a descriptive detail over each formula.
\begin{equation} h_{A \to B} = \frac{ \sum_{i}^{m_A} H(\mathcal{A}_i) }{ m_A \cdot \log_2(m_B) } \quad , \quad h_{B \to A} = \frac{ \sum_{j}^{m_B} H(\mathcal{B}_j) }{ m_B \cdot \log_2(m_A) } \quad . \end{equation}
# Luis Rocha DEBUG
A = {
'A1':set([4,5,6,7]),
'A2':set([8,9,10,11]),
'A3':set([1,2,3,12])
}
B = {
'B1':set([4,6,7]),
'B2':set([2,8,5,10,11]),
'B3':set([1,3,12,9])
}
C = {
'C1':set([]),
'C2':set([]),
'C3':set([]),
'C4':set([1,2,3,4,5,6,7,8,9,10,11])
}
def calculate_h(A,B):
# Intersection
df_I = pd.DataFrame.from_dict( OrderedDict([( i , OrderedDict([(j,iv.intersection(jv)) for j,jv in B.items()]) ) for i,iv in A.items()]), orient='index')
df_Ic = df_I.applymap(len) # size of sets
ma, mb = df_I.shape
#to == 'B'
sA = pd.Series(A, name='A')
sAc = sA.apply(len)
df_BA = df_Ic.divide(sAc.values, axis='index')
sH = df_BA.apply(entropy, axis=1, base=2)
h_A2B = (1/(ma * math.log(mb,2))) * sH.sum()
#to =='A'
sB = pd.Series(B, name='B')
sBc = sB.apply(len)
df_AB = df_Ic.divide(sBc.values, axis='columns')
sH = df_AB.apply(entropy, axis=0, base=2)
h_B2A = (1/(mb * math.log(ma,2))) * sH.sum()
#
return h_A2B, h_B2A
\begin{equation} y_{AB} = \frac{ \sum_{i}^{m_A} \sum_{j}^{m_B} P(A_i,B_j) }{ \sqrt{ m_A \cdot m_B} } \quad \text{where} \quad P(A_i,B_j) = \frac{ |A_i \cap B_j| }{ |A_i \cup B_j| } \quad . \end{equation}
def calculate_y(A,B):
df_I = pd.DataFrame.from_dict( OrderedDict([( i , OrderedDict([(j,iv.intersection(jv)) for j,jv in B.items()]) ) for i,iv in A.items()]), orient='index')
df_U = pd.DataFrame.from_dict( OrderedDict([( i , OrderedDict([(j,iv.union(jv)) for j,jv in B.items()]) ) for i,iv in A.items()]), orient='index')
ma, mb = df_I.shape
df_Ic = df_I.applymap(len)
df_Uc = df_U.applymap(len)
df_S = df_Ic.divide( df_Uc )
return df_S.sum().sum() / math.sqrt(ma * mb)
\begin{equation} J_{A \to B} = \frac{ \sum_i^{m_A} \max_j^{m_B} P(A_i,B_j) }{ m_A } \quad , \quad J_{B \to A}= \frac{ \sum_j^{m_B} \max_i^{m_A} P(A_i,B_j) }{ m_B } \end{equation}
def calculate_j(A,B):
df_I = pd.DataFrame.from_dict( OrderedDict([( i , OrderedDict([(j,iv.intersection(jv)) for j,jv in B.items()]) ) for i,iv in A.items()]), orient='index')
df_U = pd.DataFrame.from_dict( OrderedDict([( i , OrderedDict([(j,iv.union(jv)) for j,jv in B.items()]) ) for i,iv in A.items()]), orient='index')
ma, mb = df_I.shape
df_Ic = df_I.applymap(len)
df_Uc = df_U.applymap(len)
df_S = df_Ic.divide( df_Uc )
sMaxA, sMaxB = df_S.max(axis=0), df_S.max(axis=1)
jA2B = sMaxB.sum() / ma
jB2A = sMaxA.sum() / mb
return jA2B, jB2A
print 'A = Meta labels'
print 'B = Original proximity'
GOp = generate_original_graph(Gp)
compute_louvain(GOp)
_, _, A, _ = get_graph_variables(GOp, module_attribute)
_, _, B, _ = get_graph_variables(GOp, 'module-louvain')
hA2B, hB2A = calculate_h(A,B)
print "h_(A->B): {:.3f} h_(B->A): {:.3f}".format(hA2B,hB2A)
print 'A = Meta labels'
print 'B = Metric Backbone'
GOp = generate_original_graph(Gp)
GMp = generate_metric_graph(GOp)
compute_louvain(GOp)
compute_louvain(GMp)
_, _, A, _ = get_graph_variables(GOp, module_attribute)
_, _, B, _ = get_graph_variables(GMp, 'module-louvain')
hA2B, hB2A = calculate_h(A,B)
print "h_(A->B): {:.2f} h_(B->A): {:.2f}".format(hA2B,hB2A)
print
print 'A = Original proximity'
print 'B = Metric Backbone'
_, _, A, _ = get_graph_variables(GOp, 'module-louvain')
_, _, B, _ = get_graph_variables(GMp, 'module-louvain')
hA2B, hB2A = calculate_h(A,B)
print "h_(A->B): {:.2f} h_(B->A): {:.2f}".format(hA2B,hB2A)
print 'A = Meta labels'
print 'B = Threshold Backbone'
GOp = generate_original_graph(Gp)
GTp = generate_threshold_graph(GOp, e_metric)
compute_louvain(GOp)
compute_louvain(GTp)
_, _, A, _ = get_graph_variables(GOp, module_attribute)
_, _, B, _ = get_graph_variables(GTp, 'module-louvain')
hA2B, hB2A = calculate_h(A,B)
print "h_(A->B): {:.2f} h_(B->A): {:.2f}".format(hA2B,hB2A)
print
print 'A = Original proximity'
print 'B = Threshold (metric) Backbone'
_, _, A, _ = get_graph_variables(GOp, 'module-louvain')
_, _, B, _ = get_graph_variables(GTp, 'module-louvain')
hA2B, hB2A = calculate_h(A,B)
print "h_(A->B): {:.2f} h_(B->A): {:.2f}".format(hA2B,hB2A)
print 'A = Meta labels / Original proximity'
print 'B = Random Backbone'
d = {
('Meta labels','h_(A->B)'):[],
('Meta labels','h_(B->A)'):[],
('Original proximity','h_(A->B)'):[],
('Original proximity','h_(B->A)'):[],
}
GOp = generate_original_graph(Gp)
compute_louvain(GOp)
for GRp in generate_n_random_graphs(GOp,100,e_original-e_metric):
compute_louvain(GRp)
_, _, A, _ = get_graph_variables(GOp, module_attribute)
_, _, B, _ = get_graph_variables(GRp, 'module-louvain')
hA2B, hB2A = calculate_h(A,B)
d[('Meta labels','h_(A->B)')].append( hA2B )
d[('Meta labels','h_(B->A)')].append( hB2A )
_, _, A, _ = get_graph_variables(GOp, 'module-louvain')
_, _, B, _ = get_graph_variables(GRp, 'module-louvain')
hA2B, hB2A = calculate_h(A,B)
d[('Original proximity','h_(A->B)')].append( hA2B )
d[('Original proximity','h_(B->A)')].append( hB2A )
df = pd.DataFrame.from_dict(d)
#print df
df = df.apply(['mean','std'], axis=0).T
display(df)
print 'A = Meta labels'
print 'B = Original proximity'
GOp = generate_original_graph(Gp)
compute_louvain(GOp)
_, _, A, _ = get_graph_variables(GOp, module_attribute)
_, _, B, _ = get_graph_variables(GOp, 'module-louvain')
print "y_(AB): {:.3f}".format(calculate_y(A,B))
GOp = generate_original_graph(Gp)
GMp = generate_metric_graph(GOp)
compute_louvain(GOp)
compute_louvain(GMp)
print 'A = Meta labels'
print 'B = Metric Backbone'
_, _, A, _ = get_graph_variables(GOp, module_attribute)
_, _, B, _ = get_graph_variables(GMp, 'module-louvain')
print "y_(AB): {:.3f}".format(calculate_y(A,B))
print
print 'A = Original Graph'
print 'B = Metric Backbone'
_, _, A, _ = get_graph_variables(GOp, 'module-louvain')
_, _, B, _ = get_graph_variables(GMp, 'module-louvain')
print "y_(AB): {:.3f}".format(calculate_y(A,B))
GOp = generate_original_graph(Gp)
GTp = generate_threshold_graph(GOp, e_metric)
compute_louvain(GOp)
compute_louvain(GTp)
print 'A = Meta labels'
print 'B = Threshold (metric) Backbone'
_, _, A, _ = get_graph_variables(GOp, module_attribute)
_, _, B, _ = get_graph_variables(GTp, 'module-louvain')
print "y_(AB): {:.3f}".format(calculate_y(A,B))
print
print 'A = Original proximity'
print 'B = Threshold Backbone'
_, _, A, _ = get_graph_variables(GOp, 'module-louvain')
_, _, B, _ = get_graph_variables(GTp, 'module-louvain')
print "y_(AB): {:.3f}".format(calculate_y(A,B))
print 'A = Meta labels / Original proximity'
print 'B = Random Backbone'
d = {
('Meta label','y_(AB)'):[],
('Original proximity','y_(AB)'):[],
}
GOp = generate_original_graph(Gp)
compute_louvain(GOp)
for GRp in generate_n_random_graphs(GOp,100,e_original-e_metric):
compute_louvain(GRp)
_, _, A, _ = get_graph_variables(GOp, module_attribute)
_, _, B, _ = get_graph_variables(GRp, 'module-louvain')
d[('Meta label','y_(AB)')].append( calculate_y(A,B) )
_, _, A, _ = get_graph_variables(GOp, 'module-louvain')
_, _, B, _ = get_graph_variables(GRp, 'module-louvain')
d[('Original proximity','y_(AB)')].append( calculate_y(A,B) )
df = pd.DataFrame.from_dict(d)
#print df
df = df.apply(['mean','std'], axis=0).T
display(df)
print 'A = Meta labels'
print 'B = Original proximity'
GOp = generate_original_graph(Gp)
compute_louvain(GOp)
_, _, A, _ = get_graph_variables(GOp, module_attribute)
_, _, B, _ = get_graph_variables(GOp, 'module-louvain')
jA2B, jB2A = calculate_j(A,B)
print "J_(A->B): {:.2f} J_(B->A): {:.2f}".format(jA2B,jB2A)
print 'A = Meta labels'
print 'B = Metric Backbone'
GOp = generate_original_graph(Gp)
GMp = generate_metric_graph(GOp)
compute_louvain(GOp)
compute_louvain(GMp)
_, _, A, _ = get_graph_variables(GOp, module_attribute)
_, _, B, _ = get_graph_variables(GMp, 'module-louvain')
jA2B, jB2A = calculate_j(A,B)
print "J_(A->B): {:.2f} J_(B->A): {:.2f}".format(jA2B,jB2A)
print
print 'A = Original proximity'
print 'B = Metric Backbone'
_, _, A, _ = get_graph_variables(GOp, 'module-louvain')
_, _, B, _ = get_graph_variables(GMp, 'module-louvain')
jA2B, jB2A = calculate_j(A,B)
print "J_(A->B): {:.2f} J_(B->A): {:.2f}".format(jA2B,jB2A)
print 'A = Meta labels'
print 'B = Threshold Backbone'
GOp = generate_original_graph(Gp)
GTp = generate_threshold_graph(GOp, e_metric)
compute_louvain(GOp)
compute_louvain(GTp)
_, _, A, _ = get_graph_variables(GOp, module_attribute)
_, _, B, _ = get_graph_variables(GTp, 'module-louvain')
jA2B, jB2A = calculate_j(A,B)
print "J_(A->B): {:.2f} J_(B->A): {:.2f}".format(jA2B,jB2A)
print
print 'A = Original proximity'
print 'B = Threshold Backbone'
_, _, A, _ = get_graph_variables(GOp, 'module-louvain')
_, _, B, _ = get_graph_variables(GTp, 'module-louvain')
jA2B, jB2A = calculate_j(A,B)
print "J_(A->B): {:.2f} J_(B->A): {:.2f}".format(jA2B,jB2A)
print 'A = Meta labels'
print 'B = Random Backbone'
d = {
('Meta label','J_(A->B)'):[],
('Meta label','J_(B->A)'):[],
#
('Original proximity','J_(A->B)'):[],
('Original proximity','J_(B->A)'):[],
}
GOp = generate_original_graph(Gp)
compute_louvain(GOp)
for GRp in generate_n_random_graphs(GOp,100,e_original-e_metric):
compute_louvain(GRp)
_, _, A, _ = get_graph_variables(GOp, module_attribute)
_, _, B, _ = get_graph_variables(GRp, 'module-louvain')
jA2B, jB2A = calculate_j(A,B)
d[('Meta label','J_(A->B)')].append( jA2B )
d[('Meta label','J_(B->A)')].append( jB2A )
_, _, A, _ = get_graph_variables(GOp, 'module-louvain')
_, _, B, _ = get_graph_variables(GRp, 'module-louvain')
jA2B, jB2A = calculate_j(A,B)
d[('Original proximity','J_(A->B)')].append( jA2B )
d[('Original proximity','J_(B->A)')].append( jB2A )
df = pd.DataFrame.from_dict(d)
#print df
df = df.apply(['mean','std'], axis=0).T
display(df)
Paper: A.J. Gates, I.B. Wood, W.P. Hetrick, Y. Ahn. (2017). On comparing clusterings: an element-centric framework unifies overlaps and hierarchy. arXiv preprint arXiv:1706.06136
print 'A = Meta labels'
print 'B = Original Proximity'
GOp = generate_original_graph(Gp)
compute_louvain(GOp)
_, _, A, _ = get_graph_variables(GOp, module_attribute)
_, _, B, _ = get_graph_variables(GOp, 'module-louvain')
CA = Clustering(clus2elm_dict=A)
CB = Clustering(clus2elm_dict=B)
print "Clusim: {:.2f}".format( element_sim(CA,CB) )
print 'A = Meta labels'
print 'B = Metric Backbone'
GOp = generate_original_graph(Gp)
GMp = generate_metric_graph(GOp)
compute_louvain(GOp)
compute_louvain(GMp)
_, _, A, _ = get_graph_variables(GOp, module_attribute)
_, _, B, _ = get_graph_variables(GMp, 'module-louvain')
CA = Clustering(clus2elm_dict=A)
CB = Clustering(clus2elm_dict=B)
print "Clusim: {:.2f}".format( element_sim(CA,CB) )
print 'A = Original Proximity'
print 'B = Metric Backbone'
_, _, A, _ = get_graph_variables(GOp, 'module-louvain')
_, _, B, _ = get_graph_variables(GMp, 'module-louvain')
CA = Clustering(clus2elm_dict=A)
CB = Clustering(clus2elm_dict=B)
print "Clusim: {:.2f}".format( element_sim(CA,CB) )
print 'A = Meta labels'
print 'B = Threshold Backbone'
GOp = generate_original_graph(Gp)
GTp = generate_threshold_graph(GOp, e_metric)
compute_louvain(GOp)
compute_louvain(GTp)
_, _, A, _ = get_graph_variables(GOp, module_attribute)
_, _, B, _ = get_graph_variables(GTp, 'module-louvain')
CA = Clustering(clus2elm_dict=A)
CB = Clustering(clus2elm_dict=B)
print "Louvain: {:.2f}".format( element_sim(CA,CB) )
print 'A = Original Proximity'
print 'B = Threshold Backbone'
_, _, A, _ = get_graph_variables(GOp, 'module-louvain')
_, _, B, _ = get_graph_variables(GTp, 'module-louvain')
CA = Clustering(clus2elm_dict=A)
CB = Clustering(clus2elm_dict=B)
print "Louvain: {:.2f}".format( element_sim(CA,CB) )
print 'A = Meta labels'
print 'B = Random Backbone'
d = {
('Meta label','AB'):[],
('Original proximity','AB'):[],
}
GOp = generate_original_graph(Gp)
compute_louvain(GOp)
for GRp in generate_n_random_graphs(GOp,100,e_original-e_metric):
compute_louvain(GRp)
_, _, A, _ = get_graph_variables(GOp, module_attribute)
_, _, B, _ = get_graph_variables(GRp, 'module-louvain')
CA = Clustering(clus2elm_dict=A)
CB = Clustering(clus2elm_dict=B)
d[('Meta label','AB')].append( element_sim(CA,CB) )
_, _, A, _ = get_graph_variables(GOp, 'module-louvain')
_, _, B, _ = get_graph_variables(GRp, 'module-louvain')
CA = Clustering(clus2elm_dict=A)
CB = Clustering(clus2elm_dict=B)
d[('Original proximity','AB')].append( element_sim(CA,CB) )
df = pd.DataFrame.from_dict(d)
#print df
df = df.apply(['mean','std'], axis=0).T
display(df)