Network & Metric Backbone comparison

Network: Primary school


Reference: Juliette Stehlé, Nicolas Voirin, Alain Barrat, Ciro Cattuto, Lorenzo Isella, Jean-François Pinton, Marco Quaggiotto, Wouter Van den Broeck, Corinne Régis, Bruno Lina and Philippe Vanhems. PLOS ONE 6(8): e23176 (2011). doi:10.1371/journal.pone.0023176

Original data: http://www.sociopatterns.org/datasets/primary-school-cumulative-networks/

In [1]:
%matplotlib inline
from __future__ import division
import numpy as np
import pandas as pd
pd.set_option('display.precision', 2)
# Network
import networkx as nx
import community # louvain
from fa2 import ForceAtlas2
from distanceclosure.utils import _dist2prox as dist2prox
# Matplotlib
import matplotlib as mpl
import matplotlib.style
mpl.style.use('default')
mpl.rcParams['mathtext.fontset'] = 'cm'
mpl.rcParams['mathtext.rm'] = 'serif'
import matplotlib.pyplot as plt
# Others
import math
import random
from IPython.core.display import display, Math
from collections import OrderedDict
from scipy.stats import entropy
# clusim
from clusim.clustering import Clustering
from clusim.clusimelement import element_sim, element_sim_elscore

Init parameters

In [2]:
# SocioPatterns Network
project = 'primary-school'
# The normalization convertion from 
normalization = 'social' # [options:] 'time', 'time_all' or 'social'
# Network File
rGpickle  = 'results/%s/%s-%s.gpickle' % (project, project, normalization)
# The Networkx node attribute containing the Meta labels
module_attribute = 'class'
# Metadata nodecolors
dict_meta_color = {
    'Teachers':'#4c4c4c', #gray
    '1A':'#009999', # Cyan
    '1B':'#66ffff',
    '2A':'#004c00', #Green
    '2B':'#66b266',
    '3B':'#996300', # Orange
    '3A':'#ffc966',
    '4A':'#000099', # Blue
    '4B':'#6666ff',
    '5A':'#990000', # Red
    '5B':'#ff6666'
}
# Title for plots 
project_title = 'Primary school'

Loading Network

Both metric and the ultrametric backbones have been computed with an the distanceclosure package. Below we simply load a NetworkX graph that contains metric and semi-metric information for each edge.

In [3]:
G = nx.read_gpickle(rGpickle)

Exception rules

In the Primary School data set, we manually allocate teachers, originally on their own module, to the module of their respective class. All variabels with an appending 'p' are denoting a ' (prime) variable.

In [4]:
Gp = G.copy()
Gp.node[1753]['class'] = '1A'
Gp.node[1745]['class'] = '1B'
Gp.node[1852]['class'] = '2A'
Gp.node[1650]['class'] = '2B'
Gp.node[1746]['class'] = '3A'
Gp.node[1709]['class'] = '3B'
Gp.node[1653]['class'] = '4A'
Gp.node[1521]['class'] = '4B'
Gp.node[1668]['class'] = '5A'
Gp.node[1824]['class'] = '5B'

Helper functions

These helper functions retrive information from the NetworkX Graph object

In [5]:
def get_graph_variables(G, *arg, **kwargs):
    dM = nx.get_node_attributes(G, *arg)
    s = set(dM.values())
    n = len(s)
    sM = { m : set([k for k,v in dM.items() if v==m]) for m in s }
    #
    return n, s, sM, dM
In [6]:
# Strip G to the original Graph, without additional metric closure edges.
def generate_original_graph(G):
    GO = G.copy()
    edges2remove = [(i,j) for i,j,d in G.edges(data=True) if 'original' not in d]
    GO.remove_edges_from(edges2remove)
    return GO

GO = generate_original_graph(G)
GOp = generate_original_graph(Gp)
In [7]:
# Metric Graph, only metric edges
def generate_metric_graph(G):
    GM = G.copy()
    edges2remove = [(i,j) for i,j,d in G.edges(data=True) if d['metric']==False]
    GM.remove_edges_from(edges2remove)
    return GM

GM = generate_metric_graph(G)
GMp = generate_metric_graph(Gp)

Graph Statistics

Displays some basic statistics for comparison.

In [8]:
nO, sO, sMO, dMO = get_graph_variables(GO, module_attribute)  
nOp, sOp, sMOp, dMOp = get_graph_variables(GOp, module_attribute)
In [9]:
n = G.number_of_nodes()
isolates = len( list( nx.isolates(G) ) )
isolates_percent = isolates/n

e_possible = int(( (n*n) - n) / 2)
e_total = G.number_of_edges()

original_components = nx.number_connected_components(GO)

n_metalabels = len(np.unique(nx.get_node_attributes(GO, module_attribute).values()))
n_metalabels_exception = len(np.unique(nx.get_node_attributes(GOp, module_attribute).values()))

e_original = 0
e_metric = 0
e_ultrametric = 0
e_semimetric = 0
e_s_gt_1 = 0
e_s_gt_1_original = 0
e_d_eq_infty = 0
e_bij_gt_1 = 0
e_bji_gt_1 = 0
distortion = 0

for eid,(i,j,d) in enumerate( G.edges(data=True), start=0):

    # Original Edges
    if (d.get('original')==True):
        e_original += 1
    # Metric Edges
    if (d.get('metric')==True):
        e_metric += 1 
    # UltraMetric Edges
    if (d.get('ultrametric')==True):
        e_ultrametric += 1

    # Semi-metric edges
    if (d.get('metric')==False):
        e_semimetric += 1

    # S values
    if (d.get('s_value'))>1.0:
        e_s_gt_1 += 1
    if (d.get('s_value'))>1.0 and (d.get('original')==True):
        e_s_gt_1_original += 1

    if (d.get('distance')==np.inf):
        e_d_eq_infty += 1

    # B_ij values
    if (d.get('b_ij_value'))>1.0:
        e_bij_gt_1 += 1

    # B_ji values
    if (d.get('b_ji_value'))>1.0:
        e_bji_gt_1 += 1

    # Distortion

    distortion += abs(dist2prox(d['distance_metric_closure']) - d.get('proximity'))
    

distortion_norm = (2 * distortion) / (n * (n - 1))
e_original_percent = e_original/e_total
e_metric_percent = e_metric/e_original
e_semimetric_percent = e_semimetric/e_total
e_s_gt_1_percent = e_s_gt_1/e_total
e_s_gt_1_original_percent = e_s_gt_1_original/e_original
e_d_eq_infty_percent = e_d_eq_infty/e_total
e_bij_gt_1_percent = e_bij_gt_1/e_total
e_bji_gt_1_percent = e_bji_gt_1/e_total

print 'Meta-labels: {:,d}'.format(n_metalabels) 
# EXCEPTION
print 'Meta-labels (exception): {:,d}'.format(n_metalabels_exception)

display(Math('D_w'))
print 'Nodes: {:,d}'.format(n)
print 'Possible Edges: {:,d}'.format(e_possible)
print 'Edges: {:,d} ({:.2%} of possible edges)'.format(e_original , e_original_percent)
print 'Connected Components: {:,d}'.format(original_components)
print 'Isolates: {:,d} ({:.2%})'.format(isolates, isolates_percent)
if (original_components == 1) and (e_possible != e_total):
    raise Exception('After Closure, the graph must be fully connected ({:d} != {:d}), given there was only one connected component'.format(e_possible, e_total))

display(Math('B_w'))
print 'Metric Edges: {:,d} ({:.2%} of the original {:,d} edges)'.format(e_metric, e_metric_percent, e_original)

display(Math('D^C_w'))
print 'Semi-Metric edges: {:,d} ({:.2%})'.format(e_semimetric, e_semimetric_percent)
print 'S>1: {:,d} ({:.2%} of original edges; {:.2%} of the total edges)'.format(e_s_gt_1, e_s_gt_1_original_percent, e_s_gt_1_percent)
print 'B_ij>1 , B_ji>1: {:,d} ({:.2%} of total) , {:,d} ({:.2%} of total)'.format(e_bij_gt_1, e_bij_gt_1_percent , e_bji_gt_1, e_bji_gt_1_percent)
print 'Edges where D=\infty: {:,d} ({:.2%} of the total {:,d} edges)'.format(e_d_eq_infty, e_d_eq_infty_percent, e_total)

print 'Distortion \delta (\Delta): {:.4f} ({:.2f})'.format(distortion_norm, distortion)
Meta-labels: 11
Meta-labels (exception): 10
$$D_w$$
Nodes: 242
Possible Edges: 29,161
Edges: 8,317 (28.52% of possible edges)
Connected Components: 1
Isolates: 0 (0.00%)
$$B_w$$
Metric Edges: 790 (9.50% of the original 8,317 edges)
$$D^C_w$$
Semi-Metric edges: 28,371 (97.29%)
S>1: 7,527 (90.50% of original edges; 25.81% of the total edges)
B_ij>1 , B_ji>1: 20,844 (71.48% of total) , 20,844 (71.48% of total)
Edges where D=\infty: 20,844 (71.48% of the total 29,161 edges)
Distortion \delta (\Delta): 0.0082 (238.17)

(Null Model) Threshold Backbone

The threshold backbone is a null model where edges with the smallest proximity are removed until the same number of edges as the metric backbone is achieved.

In [10]:
# Remove the lowest number of edges that keeps the network with the same number of edges as the G_metric (e_metric).
def generate_threshold_graph(G, edges_to_keep=0):
    GT = G.copy()
    edges2remove = sorted(GT.edges(data=True), key=lambda x: x[2]['weight'])[ : -edges_to_keep ]
    GT.remove_edges_from(edges2remove)
    return GT

GT = generate_threshold_graph(GO, e_metric)

(Null Model) Random Backbone

The random backbone is a null model where edges are removed at random until the same number of edges as the metric backbone network is achieved.

In [11]:
# Generate a random graph and remove n number of edges
def generate_random_graph(G, edges_to_keep=0):
    GR = G.copy()
    edges2remove = random.sample(GR.edges(data=True), edges_to_keep)
    GR.remove_edges_from(edges2remove)
    return GR

# A generative function to yield 'n' random graphs
def generate_n_random_graphs(G, n=1, *arg, **kwargs):
    for i in xrange(n):
        yield generate_random_graph(G, *arg)

GR = generate_random_graph(GO, e_original-e_metric)

Modularity Algorithms

Louvain Modularity

In [12]:
def compute_louvain(G):
    dM = community.best_partition(G)
    nx.set_node_attributes(G, name='module-louvain', values=dM)
In [13]:
# Original
compute_louvain(GO)
n, s, sM, dM = get_graph_variables(GO, 'module-louvain')
print "G Original Louvain : {:d}".format(n)

# Metric
compute_louvain(GM)
n, s, sM, dM = get_graph_variables(GM, 'module-louvain')
print "G Metric Louvain   : {:d}".format(n)

# Threshold
compute_louvain(GT)
n, s, sM, dM = get_graph_variables(GT, 'module-louvain')
print "G Threshold Louvain: {:d}".format(n)

# Random
compute_louvain(GR)
ns = []
for i in xrange(100):
    _GR = generate_random_graph(GO, e_original-e_metric)
    compute_louvain(_GR)
    n, s, sM, dM = get_graph_variables(_GR, 'module-louvain')
    ns.append(n)
print "G Random Louvain   : {:.2f}±{:.2f}".format(np.mean(ns), np.std(ns))
G Original Louvain : 8
G Metric Louvain   : 9
G Threshold Louvain: 10
G Random Louvain   : 21.14±2.64

Visualizing Networks

Node layout is defined by a python implementation of Gephi's ForceAtlas2 algorithm.

In [14]:
forceatlas2 = ForceAtlas2(outboundAttractionDistribution=False, linLogMode=False, adjustSizes=False,
    edgeWeightInfluence=1.0, jitterTolerance=1.0, barnesHutOptimize=False, barnesHutTheta=1.2,
    multiThreaded=False, scalingRatio=1.2, strongGravityMode=False, gravity=1.0, verbose=True)
# Node position for NetworkX
pos = forceatlas2.forceatlas2_networkx_layout(GO, pos=None, iterations=2000)
100%|██████████| 2000/2000 [00:01<00:00, 1023.46it/s]
('Repulsion forces', ' took ', '0.66', ' seconds')
('Gravitational forces', ' took ', '0.06', ' seconds')
('Attraction forces', ' took ', '0.39', ' seconds')
('AdjustSpeedAndApplyForces step', ' took ', '0.29', ' seconds')

Original Network

In [15]:
fig, ax = plt.subplots(1,1,figsize=(7,6), facecolor='w')

node_color = [dict_meta_color[d[module_attribute]] for n,d in GO.nodes(data=True)]
nx.draw_networkx_nodes(GO, ax=ax, pos=pos, cmap=plt.get_cmap('jet'), node_size=150, node_color=node_color, edgecolors='#b2b2b2')
nx.draw_networkx_edges(GO, pos=pos, ax=ax, edge_color='k', alpha=0.1, style='solid')

ax.set_title('{:s} - Original Graph - Meta-label modules'.format(project_title))
ax.axes.get_xaxis().set_visible(False); ax.axes.get_yaxis().set_visible(False)

plt.tight_layout()
#plt.show()
plt.savefig('images/{:s}/{:s}/graph_original_metalabels.png'.format(project,normalization), dpi=150)
In [16]:
fig,axes = plt.subplots(1,4,figsize=(10,3), facecolor='w')
((ax1,ax2,ax3,ax4)) = axes

node_size = 30
cmap = 'nipy_spectral'

# Original (Louvain)
ax = ax1
node_color = [d['module-louvain'] for n,d in GO.nodes(data=True)]
nx.draw_networkx_nodes(GO, pos=pos, ax=ax, cmap=plt.get_cmap(cmap), node_size=node_size, node_color=node_color, edgecolors='#b2b2b2')
edge_color = [d['weight'] for i,j,d in GO.edges(data=True)]
nx.draw_networkx_edges(GO, pos=pos, ax=ax, edge_color='k', alpha=0.1, style='solid')

# Metric (Louvain)
ax = ax2
node_color = [d['module-louvain'] for n,d in GM.nodes(data=True)]
nx.draw_networkx_nodes(GM, pos=pos, ax=ax, cmap=plt.get_cmap(cmap), node_size=node_size, node_color=node_color, edgecolors='#b2b2b2')
edge_color = [d['weight'] for i,j,d in GM.edges(data=True)]
nx.draw_networkx_edges(GM, pos=pos, ax=ax, edge_color='k', alpha=0.3, style='solid')

# Threshold (Louvain)
ax = ax3
node_color = [d['module-louvain'] for n,d in GT.nodes(data=True)]
nx.draw_networkx_nodes(GT, pos=pos, ax=ax, cmap=plt.get_cmap(cmap), node_size=node_size, node_color=node_color, edgecolors='#b2b2b2')
edge_color = [d['weight'] for i,j,d in GT.edges(data=True)]
nx.draw_networkx_edges(GT, pos=pos, ax=ax, edge_color='k', alpha=0.3, style='solid')

# Random (Louvain)
ax = ax4
node_color = [d['module-louvain'] for n,d in GR.nodes(data=True)]
nx.draw_networkx_nodes(GR, pos=pos, ax=ax, cmap=plt.get_cmap(cmap), node_size=node_size, node_color=node_color, edgecolors='#b2b2b2')
edge_color = [d['weight'] for i,j,d in GR.edges(data=True)]
nx.draw_networkx_edges(GR, pos=pos, ax=ax, edge_color='k', alpha=0.3, style='solid')


# Draw
for ax in axes.flatten():
    ax.tick_params(axis='both', which='both', bottom='off', top='off', left='off', right='off', labelbottom='off', labelleft='off')

ax1.set_title('Original')
ax2.set_title('Metric')
ax3.set_title('Threshold')
ax4.set_title('Random')

ax1.set_ylabel('Louvain', rotation=90, ha='center')

plt.tight_layout()
#plt.show()
plt.savefig('images/{:s}/{:s}/graph_comparison.png'.format(project,normalization), dpi=150)
In [17]:
fig,axes = plt.subplots(1,4,figsize=(10,3), facecolor='w')
((ax1,ax2,ax3,ax4)) = axes

node_size = 30
cmap = 'nipy_spectral'

forceatlas2 = ForceAtlas2(outboundAttractionDistribution=False, linLogMode=False, adjustSizes=False,
    edgeWeightInfluence=1.0, jitterTolerance=1.0, barnesHutOptimize=False, barnesHutTheta=1.2,
    multiThreaded=False, scalingRatio=1.2, strongGravityMode=False, gravity=1.0, verbose=False)
posm = forceatlas2.forceatlas2_networkx_layout(GM, pos=pos, iterations=2000)
post = forceatlas2.forceatlas2_networkx_layout(GT, pos=pos, iterations=2000)
posr = forceatlas2.forceatlas2_networkx_layout(GR, pos=pos, iterations=2000)

# Original (Louvain)
ax = ax1
node_color = [d['module-louvain'] for n,d in GO.nodes(data=True)]
nx.draw_networkx_nodes(GO, pos=pos, ax=ax, cmap=plt.get_cmap(cmap), node_size=node_size, node_color=node_color, edgecolors='#b2b2b2')
edge_color = [d['weight'] for i,j,d in GO.edges(data=True)]
nx.draw_networkx_edges(GO, pos=pos, ax=ax, edge_color='k', alpha=0.1, style='solid')

# Metric (Louvain)
ax = ax2
node_color = [d['module-louvain'] for n,d in GM.nodes(data=True)]
nx.draw_networkx_nodes(GM, pos=posm, ax=ax, cmap=plt.get_cmap(cmap), node_size=node_size, node_color=node_color, edgecolors='#b2b2b2')
edge_color = [d['weight'] for i,j,d in GM.edges(data=True)]
nx.draw_networkx_edges(GM, pos=posm, ax=ax, edge_color='k', alpha=0.3, style='solid')

# Threshold (Louvain)
ax = ax3
node_color = [d['module-louvain'] for n,d in GT.nodes(data=True)]
nx.draw_networkx_nodes(GT, pos=post, ax=ax, cmap=plt.get_cmap(cmap), node_size=node_size, node_color=node_color, edgecolors='#b2b2b2')
edge_color = [d['weight'] for i,j,d in GT.edges(data=True)]
nx.draw_networkx_edges(GT, pos=post, ax=ax, edge_color='k', alpha=0.3, style='solid')

# Random (Louvain)
ax = ax4
node_color = [d['module-louvain'] for n,d in GR.nodes(data=True)]
nx.draw_networkx_nodes(GR, pos=posr, ax=ax, cmap=plt.get_cmap(cmap), node_size=node_size, node_color=node_color, edgecolors='#b2b2b2')
edge_color = [d['weight'] for i,j,d in GR.edges(data=True)]
nx.draw_networkx_edges(GR, pos=posr, ax=ax, edge_color='k', alpha=0.3, style='solid')


# Draw
for ax in axes.flatten():
    ax.tick_params(axis='both', which='both', bottom='off', top='off', left='off', right='off', labelbottom='off', labelleft='off')

ax1.set_title('Original')
ax2.set_title('Metric')
ax3.set_title('Threshold')
ax4.set_title('Random')

ax1.set_ylabel('Louvain', rotation=90, ha='center')

plt.tight_layout()
#plt.show()
plt.savefig('images/{:s}/{:s}/graph_comparison_layout.png'.format(project,normalization), dpi=150)

Quantifying module similarity between Backbone and Original Network

Please refer to the index page for a descriptive detail over each formula.

\begin{equation} h_{A \to B} = \frac{ \sum_{i}^{m_A} H(\mathcal{A}_i) }{ m_A \cdot \log_2(m_B) } \quad , \quad h_{B \to A} = \frac{ \sum_{j}^{m_B} H(\mathcal{B}_j) }{ m_B \cdot \log_2(m_A) } \quad . \end{equation}

In [18]:
# Luis Rocha DEBUG
A = {
    'A1':set([4,5,6,7]),
    'A2':set([8,9,10,11]),
    'A3':set([1,2,3,12])
}
B = {
    'B1':set([4,6,7]),
    'B2':set([2,8,5,10,11]),
    'B3':set([1,3,12,9])
}
C = {
    'C1':set([]),
    'C2':set([]),
    'C3':set([]),
    'C4':set([1,2,3,4,5,6,7,8,9,10,11])
}
In [19]:
def calculate_h(A,B):
    # Intersection
    df_I = pd.DataFrame.from_dict( OrderedDict([( i , OrderedDict([(j,iv.intersection(jv)) for j,jv in B.items()]) ) for i,iv in A.items()]), orient='index')
    df_Ic = df_I.applymap(len) # size of sets
    ma, mb = df_I.shape
    #to == 'B'
    sA = pd.Series(A, name='A')
    sAc = sA.apply(len)
    df_BA = df_Ic.divide(sAc.values, axis='index')
    sH = df_BA.apply(entropy, axis=1, base=2)
    h_A2B = (1/(ma * math.log(mb,2))) * sH.sum()
    #to =='A'
    sB = pd.Series(B, name='B')
    sBc = sB.apply(len)
    df_AB = df_Ic.divide(sBc.values, axis='columns')
    sH = df_AB.apply(entropy, axis=0, base=2)
    h_B2A = (1/(mb * math.log(ma,2))) * sH.sum()
    #
    return h_A2B, h_B2A

\begin{equation} y_{AB} = \frac{ \sum_{i}^{m_A} \sum_{j}^{m_B} P(A_i,B_j) }{ \sqrt{ m_A \cdot m_B} } \quad \text{where} \quad P(A_i,B_j) = \frac{ |A_i \cap B_j| }{ |A_i \cup B_j| } \quad . \end{equation}

In [20]:
def calculate_y(A,B):
    df_I = pd.DataFrame.from_dict( OrderedDict([( i , OrderedDict([(j,iv.intersection(jv)) for j,jv in B.items()]) ) for i,iv in A.items()]), orient='index')
    df_U = pd.DataFrame.from_dict( OrderedDict([( i , OrderedDict([(j,iv.union(jv)) for j,jv in B.items()]) ) for i,iv in A.items()]), orient='index')
    ma, mb = df_I.shape
    df_Ic = df_I.applymap(len)
    df_Uc = df_U.applymap(len)
    df_S = df_Ic.divide( df_Uc )
    return df_S.sum().sum() / math.sqrt(ma * mb)

\begin{equation} J_{A \to B} = \frac{ \sum_i^{m_A} \max_j^{m_B} P(A_i,B_j) }{ m_A } \quad , \quad J_{B \to A}= \frac{ \sum_j^{m_B} \max_i^{m_A} P(A_i,B_j) }{ m_B } \end{equation}

In [21]:
def calculate_j(A,B):
    df_I = pd.DataFrame.from_dict( OrderedDict([( i , OrderedDict([(j,iv.intersection(jv)) for j,jv in B.items()]) ) for i,iv in A.items()]), orient='index')
    df_U = pd.DataFrame.from_dict( OrderedDict([( i , OrderedDict([(j,iv.union(jv)) for j,jv in B.items()]) ) for i,iv in A.items()]), orient='index')
    ma, mb = df_I.shape
    df_Ic = df_I.applymap(len)
    df_Uc = df_U.applymap(len)
    df_S = df_Ic.divide( df_Uc )
    sMaxA, sMaxB = df_S.max(axis=0), df_S.max(axis=1)
    jA2B = sMaxB.sum() / ma
    jB2A = sMaxA.sum() / mb
    return jA2B, jB2A

$h_{A \to B}$ and $h_{B \to A}$

In [22]:
print 'A = Meta labels'
print 'B = Original proximity'

GOp = generate_original_graph(Gp)
compute_louvain(GOp)

_, _, A, _ = get_graph_variables(GOp, module_attribute)
_, _, B, _ = get_graph_variables(GOp, 'module-louvain')
hA2B, hB2A = calculate_h(A,B)
print "h_(A->B): {:.3f} h_(B->A): {:.3f}".format(hA2B,hB2A)
A = Meta labels
B = Original proximity
h_(A->B): 0.022 h_(B->A): 0.095
In [23]:
print 'A = Meta labels'
print 'B = Metric Backbone'

GOp = generate_original_graph(Gp)
GMp = generate_metric_graph(GOp)

compute_louvain(GOp)
compute_louvain(GMp)

_, _, A, _ = get_graph_variables(GOp, module_attribute)
_, _, B, _ = get_graph_variables(GMp, 'module-louvain')
hA2B, hB2A = calculate_h(A,B)
print "h_(A->B): {:.2f} h_(B->A): {:.2f}".format(hA2B,hB2A)
print
print 'A = Original proximity'
print 'B = Metric Backbone'

_, _, A, _ = get_graph_variables(GOp, 'module-louvain')
_, _, B, _ = get_graph_variables(GMp, 'module-louvain')
hA2B, hB2A = calculate_h(A,B)
print "h_(A->B): {:.2f} h_(B->A): {:.2f}".format(hA2B,hB2A)
A = Meta labels
B = Metric Backbone
h_(A->B): 0.02 h_(B->A): 0.05

A = Original proximity
B = Metric Backbone
h_(A->B): 0.10 h_(B->A): 0.07
In [24]:
print 'A = Meta labels'
print 'B = Threshold Backbone'

GOp = generate_original_graph(Gp)
GTp = generate_threshold_graph(GOp, e_metric)

compute_louvain(GOp)
compute_louvain(GTp)

_, _, A, _ = get_graph_variables(GOp, module_attribute)
_, _, B, _ = get_graph_variables(GTp, 'module-louvain')
hA2B, hB2A = calculate_h(A,B)
print "h_(A->B): {:.2f} h_(B->A): {:.2f}".format(hA2B,hB2A)
print
print 'A = Original proximity'
print 'B = Threshold (metric) Backbone'

_, _, A, _ = get_graph_variables(GOp, 'module-louvain')
_, _, B, _ = get_graph_variables(GTp, 'module-louvain')
hA2B, hB2A = calculate_h(A,B)
print "h_(A->B): {:.2f} h_(B->A): {:.2f}".format(hA2B,hB2A)
A = Meta labels
B = Threshold Backbone
h_(A->B): 0.09 h_(B->A): 0.09

A = Original proximity
B = Threshold (metric) Backbone
h_(A->B): 0.14 h_(B->A): 0.07
In [25]:
print 'A = Meta labels / Original proximity'
print 'B = Random Backbone'

d = {
    ('Meta labels','h_(A->B)'):[],
    ('Meta labels','h_(B->A)'):[],
    ('Original proximity','h_(A->B)'):[],
    ('Original proximity','h_(B->A)'):[],
}

GOp = generate_original_graph(Gp)
compute_louvain(GOp)

for GRp in generate_n_random_graphs(GOp,100,e_original-e_metric):
    compute_louvain(GRp)

    _, _, A, _ = get_graph_variables(GOp, module_attribute)
    _, _, B, _ = get_graph_variables(GRp, 'module-louvain')
    hA2B, hB2A = calculate_h(A,B)
    d[('Meta labels','h_(A->B)')].append( hA2B )
    d[('Meta labels','h_(B->A)')].append( hB2A )
    
    _, _, A, _ = get_graph_variables(GOp, 'module-louvain')
    _, _, B, _ = get_graph_variables(GRp, 'module-louvain')
    hA2B, hB2A = calculate_h(A,B)
    d[('Original proximity','h_(A->B)')].append( hA2B )
    d[('Original proximity','h_(B->A)')].append( hB2A )
    
df = pd.DataFrame.from_dict(d)
#print df
df = df.apply(['mean','std'], axis=0).T
display(df)
A = Meta labels / Original proximity
B = Random Backbone
mean std
Meta labels h_(A->B) 0.42 0.03
h_(B->A) 0.25 0.04
Original proximity h_(A->B) 0.45 0.03
h_(B->A) 0.22 0.04

$y_{AB}$ values

Metric / Ultrametric Backbone

In [26]:
print 'A = Meta labels'
print 'B = Original proximity'

GOp = generate_original_graph(Gp)
compute_louvain(GOp)

_, _, A, _ = get_graph_variables(GOp, module_attribute)
_, _, B, _ = get_graph_variables(GOp, 'module-louvain')
print "y_(AB): {:.3f}".format(calculate_y(A,B))
A = Meta labels
B = Original proximity
y_(AB): 0.875
In [27]:
GOp = generate_original_graph(Gp)
GMp = generate_metric_graph(GOp)

compute_louvain(GOp)
compute_louvain(GMp)

print 'A = Meta labels'
print 'B = Metric Backbone'

_, _, A, _ = get_graph_variables(GOp, module_attribute)
_, _, B, _ = get_graph_variables(GMp, 'module-louvain')
print "y_(AB): {:.3f}".format(calculate_y(A,B))
print
print 'A = Original Graph'
print 'B = Metric Backbone'

_, _, A, _ = get_graph_variables(GOp, 'module-louvain')
_, _, B, _ = get_graph_variables(GMp, 'module-louvain')
print "y_(AB): {:.3f}".format(calculate_y(A,B))
A = Meta labels
B = Metric Backbone
y_(AB): 0.936

A = Original Graph
B = Metric Backbone
y_(AB): 0.800
In [28]:
GOp = generate_original_graph(Gp)
GTp = generate_threshold_graph(GOp, e_metric)

compute_louvain(GOp)
compute_louvain(GTp)

print 'A = Meta labels'
print 'B = Threshold (metric) Backbone'

_, _, A, _ = get_graph_variables(GOp, module_attribute)
_, _, B, _ = get_graph_variables(GTp, 'module-louvain')
print "y_(AB): {:.3f}".format(calculate_y(A,B))
print
print 'A = Original proximity'
print 'B = Threshold Backbone'

_, _, A, _ = get_graph_variables(GOp, 'module-louvain')
_, _, B, _ = get_graph_variables(GTp, 'module-louvain')
print "y_(AB): {:.3f}".format(calculate_y(A,B))
A = Meta labels
B = Threshold (metric) Backbone
y_(AB): 0.856

A = Original proximity
B = Threshold Backbone
y_(AB): 0.789
In [29]:
print 'A = Meta labels / Original proximity'
print 'B = Random Backbone'

d = {
    ('Meta label','y_(AB)'):[],
    ('Original proximity','y_(AB)'):[],
}

GOp = generate_original_graph(Gp)
compute_louvain(GOp)

for GRp in generate_n_random_graphs(GOp,100,e_original-e_metric):
    compute_louvain(GRp)
    
    _, _, A, _ = get_graph_variables(GOp, module_attribute)
    _, _, B, _ = get_graph_variables(GRp, 'module-louvain')
    d[('Meta label','y_(AB)')].append( calculate_y(A,B) )
    
    _, _, A, _ = get_graph_variables(GOp, 'module-louvain')
    _, _, B, _ = get_graph_variables(GRp, 'module-louvain')
    d[('Original proximity','y_(AB)')].append( calculate_y(A,B) )
    
df = pd.DataFrame.from_dict(d)
#print df
df = df.apply(['mean','std'], axis=0).T
display(df)
A = Meta labels / Original proximity
B = Random Backbone
mean std
Meta label y_(AB) 0.53 0.02
Original proximity y_(AB) 0.51 0.03

$J_{A \to B}$ and $J_{B \to A}$ values

In [30]:
print 'A = Meta labels'
print 'B = Original proximity'

GOp = generate_original_graph(Gp)

compute_louvain(GOp)

_, _, A, _ = get_graph_variables(GOp, module_attribute)
_, _, B, _ = get_graph_variables(GOp, 'module-louvain')
jA2B, jB2A = calculate_j(A,B)
print "J_(A->B): {:.2f} J_(B->A): {:.2f}".format(jA2B,jB2A)
A = Meta labels
B = Original proximity
J_(A->B): 0.78 J_(B->A): 0.85
In [31]:
print 'A = Meta labels'
print 'B = Metric Backbone'

GOp = generate_original_graph(Gp)
GMp = generate_metric_graph(GOp)

compute_louvain(GOp)
compute_louvain(GMp)

_, _, A, _ = get_graph_variables(GOp, module_attribute)
_, _, B, _ = get_graph_variables(GMp, 'module-louvain')
jA2B, jB2A = calculate_j(A,B)
print "J_(A->B): {:.2f} J_(B->A): {:.2f}".format(jA2B,jB2A)
print
print 'A = Original proximity'
print 'B = Metric Backbone'

_, _, A, _ = get_graph_variables(GOp, 'module-louvain')
_, _, B, _ = get_graph_variables(GMp, 'module-louvain')
jA2B, jB2A = calculate_j(A,B)
print "J_(A->B): {:.2f} J_(B->A): {:.2f}".format(jA2B,jB2A)
A = Meta labels
B = Metric Backbone
J_(A->B): 0.88 J_(B->A): 0.93

A = Original proximity
B = Metric Backbone
J_(A->B): 0.72 J_(B->A): 0.69
In [32]:
print 'A = Meta labels'
print 'B = Threshold Backbone'

GOp = generate_original_graph(Gp)
GTp = generate_threshold_graph(GOp, e_metric)

compute_louvain(GOp)
compute_louvain(GTp)

_, _, A, _ = get_graph_variables(GOp, module_attribute)
_, _, B, _ = get_graph_variables(GTp, 'module-louvain')
jA2B, jB2A = calculate_j(A,B)
print "J_(A->B): {:.2f} J_(B->A): {:.2f}".format(jA2B,jB2A)
print 
print 'A = Original proximity'
print 'B = Threshold Backbone'

_, _, A, _ = get_graph_variables(GOp, 'module-louvain')
_, _, B, _ = get_graph_variables(GTp, 'module-louvain')
jA2B, jB2A = calculate_j(A,B)
print "J_(A->B): {:.2f} J_(B->A): {:.2f}".format(jA2B,jB2A)
A = Meta labels
B = Threshold Backbone
J_(A->B): 0.79 J_(B->A): 0.79

A = Original proximity
B = Threshold Backbone
J_(A->B): 0.73 J_(B->A): 0.66
In [33]:
print 'A = Meta labels'
print 'B = Random Backbone'

d = {
    ('Meta label','J_(A->B)'):[],
    ('Meta label','J_(B->A)'):[],
    #
    ('Original proximity','J_(A->B)'):[],
    ('Original proximity','J_(B->A)'):[],
}

GOp = generate_original_graph(Gp)

compute_louvain(GOp)

for GRp in generate_n_random_graphs(GOp,100,e_original-e_metric):
    compute_louvain(GRp)

    _, _, A, _ = get_graph_variables(GOp, module_attribute)
    _, _, B, _ = get_graph_variables(GRp, 'module-louvain')
    jA2B, jB2A = calculate_j(A,B)
    d[('Meta label','J_(A->B)')].append( jA2B )
    d[('Meta label','J_(B->A)')].append( jB2A )
    
    _, _, A, _ = get_graph_variables(GOp, 'module-louvain')
    _, _, B, _ = get_graph_variables(GRp, 'module-louvain')
    jA2B, jB2A = calculate_j(A,B)
    d[('Original proximity','J_(A->B)')].append( jA2B )
    d[('Original proximity','J_(B->A)')].append( jB2A )
    
df = pd.DataFrame.from_dict(d)
#print df
df = df.apply(['mean','std'], axis=0).T
display(df)
A = Meta labels
B = Random Backbone
mean std
Meta label J_(A->B) 0.41 0.04
J_(B->A) 0.27 0.03
Original proximity J_(A->B) 0.42 0.05
J_(B->A) 0.25 0.03

clusim

Paper: A.J. Gates, I.B. Wood, W.P. Hetrick, Y. Ahn. (2017). On comparing clusterings: an element-centric framework unifies overlaps and hierarchy. arXiv preprint arXiv:1706.06136

In [34]:
print 'A = Meta labels'
print 'B = Original Proximity'

GOp = generate_original_graph(Gp)
compute_louvain(GOp)

_, _, A, _ = get_graph_variables(GOp, module_attribute)
_, _, B, _ = get_graph_variables(GOp, 'module-louvain')
CA = Clustering(clus2elm_dict=A)
CB = Clustering(clus2elm_dict=B)
print "Clusim: {:.2f}".format( element_sim(CA,CB) )
A = Meta labels
B = Original Proximity
Clusim: 0.77
In [35]:
print 'A = Meta labels'
print 'B = Metric Backbone'

GOp = generate_original_graph(Gp)
GMp = generate_metric_graph(GOp)

compute_louvain(GOp)
compute_louvain(GMp)

_, _, A, _ = get_graph_variables(GOp, module_attribute)
_, _, B, _ = get_graph_variables(GMp, 'module-louvain')
CA = Clustering(clus2elm_dict=A)
CB = Clustering(clus2elm_dict=B)
print "Clusim: {:.2f}".format( element_sim(CA,CB) )

print 'A = Original Proximity'
print 'B = Metric Backbone'

_, _, A, _ = get_graph_variables(GOp, 'module-louvain')
_, _, B, _ = get_graph_variables(GMp, 'module-louvain')
CA = Clustering(clus2elm_dict=A)
CB = Clustering(clus2elm_dict=B)
print "Clusim: {:.2f}".format( element_sim(CA,CB) )
A = Meta labels
B = Metric Backbone
Clusim: 0.88
A = Original Proximity
B = Metric Backbone
Clusim: 0.66
In [36]:
print 'A = Meta labels'
print 'B = Threshold Backbone'

GOp = generate_original_graph(Gp)
GTp = generate_threshold_graph(GOp, e_metric)

compute_louvain(GOp)
compute_louvain(GTp)

_, _, A, _ = get_graph_variables(GOp, module_attribute)
_, _, B, _ = get_graph_variables(GTp, 'module-louvain')
CA = Clustering(clus2elm_dict=A)
CB = Clustering(clus2elm_dict=B)
print "Louvain: {:.2f}".format( element_sim(CA,CB) )

print 'A = Original Proximity'
print 'B = Threshold Backbone'

_, _, A, _ = get_graph_variables(GOp, 'module-louvain')
_, _, B, _ = get_graph_variables(GTp, 'module-louvain')
CA = Clustering(clus2elm_dict=A)
CB = Clustering(clus2elm_dict=B)
print "Louvain: {:.2f}".format( element_sim(CA,CB) )
A = Meta labels
B = Threshold Backbone
Louvain: 0.78
A = Original Proximity
B = Threshold Backbone
Louvain: 0.67
In [37]:
print 'A = Meta labels'
print 'B = Random Backbone'

d = {
    ('Meta label','AB'):[],
    ('Original proximity','AB'):[],
}

GOp = generate_original_graph(Gp)
compute_louvain(GOp)

for GRp in generate_n_random_graphs(GOp,100,e_original-e_metric):
    compute_louvain(GRp)

    _, _, A, _ = get_graph_variables(GOp, module_attribute)
    _, _, B, _ = get_graph_variables(GRp, 'module-louvain')
    CA = Clustering(clus2elm_dict=A)
    CB = Clustering(clus2elm_dict=B)
    d[('Meta label','AB')].append( element_sim(CA,CB) )
    
    _, _, A, _ = get_graph_variables(GOp, 'module-louvain')
    _, _, B, _ = get_graph_variables(GRp, 'module-louvain')
    CA = Clustering(clus2elm_dict=A)
    CB = Clustering(clus2elm_dict=B)
    d[('Original proximity','AB')].append( element_sim(CA,CB) )
    
df = pd.DataFrame.from_dict(d)
#print df
df = df.apply(['mean','std'], axis=0).T
display(df)
A = Meta labels
B = Random Backbone
mean std
Meta label AB 0.35 0.04
Original proximity AB 0.33 0.04