Network & Metric Backbone comparison

Network: Workplace


Reference: Data on face-to-face contacts in an office building suggest a low-cost vaccination strategy based on community linkers", M. Génois et al., Network Science 3, 326 (2015).

Original data: http://www.sociopatterns.org/publications/data-on-face-to-face-contacts-in-an-office-building-suggest-a-low-cost-vaccination-strategy-based-on-community-linkers/

In [1]:
%matplotlib inline
from __future__ import division
import numpy as np
import pandas as pd
pd.set_option('display.precision', 2)
# Network
import networkx as nx
import community # louvain
from fa2 import ForceAtlas2
from distanceclosure.utils import _dist2prox as dist2prox
# Matplotlib
import matplotlib as mpl
import matplotlib.style
mpl.style.use('default')
mpl.rcParams['mathtext.fontset'] = 'cm'
mpl.rcParams['mathtext.rm'] = 'serif'
import matplotlib.pyplot as plt
# Others
import math
import random
from IPython.core.display import display, Math
from collections import OrderedDict
from scipy.stats import entropy
# clusim
from clusim.clustering import Clustering
from clusim.clusimelement import element_sim, element_sim_elscore

Init parameters

In [2]:
# SocioPatterns Network
source = 'sociopatterns'
project = 'workplace'
# The normalization convertion from 
normalization = 'social' # [options:] 'time', 'time_all' or 'social'
# Network File
rGpickle  = 'results/%s/%s/graph-%s.gpickle' % (source, project, normalization)
# The Networkx node attribute containing the Meta labels
module_attribute = 'class'
# Metadata nodecolors
dict_meta_color = {
    'DMI' : '#FFA500', #Orange
    'DSE' : '#228B22', #ForestGreen
    'DMCT': '#4B0082', #Indigo
    'DST' : '#A0522D', #Siena -> Brown(ish)
    'DISQ': '#6b8e23', #OliveDrab
    'SFLE': '#DC143C', #Crimson
    'DCAR': '#C71585', #MediumVioletRed
    'SSI' : '#32CD32', #LimeGreen
    'SRH' : '#00008B', #DarkBlue
    'SCOM': '#7FFFD4', #AquaMarine
    'SDOC': '#FFA07A', #LightSalmon
    'DG'  : '#FFFF00', #Yellow
}
# Title for plots 
project_title = 'Workplace'

Loading Network

Both metric and the ultrametric backbones have been computed with an the distanceclosure package. Below we simply load a NetworkX graph that contains metric and semi-metric information for each edge.

In [3]:
G = nx.read_gpickle(rGpickle)

Exception rules

In the Workplace data set, we remove isolated nodes. All variabels with an appending 'p' are denoting a ' (prime) variable.

In [4]:
Gp = G.copy()
Gp.remove_nodes_from(list(nx.isolates(Gp)))

Helper functions

These helper functions retrive information from the NetworkX Graph object

In [5]:
def get_graph_variables(G, *arg, **kwargs):
    dM = nx.get_node_attributes(G, *arg)
    s = set(dM.values())
    n = len(s)
    sM = { m : set([k for k,v in dM.items() if v==m]) for m in s }
    #
    return n, s, sM, dM
In [6]:
# Strip G to the original Graph, without additional metric closure edges.
def generate_original_graph(G):
    GO = G.copy()
    edges2remove = [(i,j) for i,j,d in G.edges(data=True) if 'original' not in d]
    GO.remove_edges_from(edges2remove)
    return GO

GO = generate_original_graph(Gp)
In [7]:
# Metric Graph, only metric edges
def generate_metric_graph(G):
    GM = G.copy()
    edges2remove = [(i,j) for i,j,d in G.edges(data=True) if d['metric']==False]
    GM.remove_edges_from(edges2remove)
    return GM

GM = generate_metric_graph(Gp)

Graph Statistics

Displays some basic statistics for comparison.

In [8]:
nO, sO, sMO, dMO = get_graph_variables(GO, module_attribute)  
In [9]:
n = G.number_of_nodes()
isolates = len( list( nx.isolates(G) ) )
isolates_percent = isolates/n

e_possible = int(( (n*n) - n) / 2)
e_total = G.number_of_edges()

original_components = nx.number_connected_components(GO)

n_metalabels = len(np.unique(nx.get_node_attributes(GO, module_attribute).values()))

e_original = 0
e_metric = 0
e_ultrametric = 0
e_semimetric = 0
e_s_gt_1 = 0
e_s_gt_1_original = 0
e_d_eq_infty = 0
e_bij_gt_1 = 0
e_bji_gt_1 = 0
distortion = 0

for eid,(i,j,d) in enumerate( G.edges(data=True), start=0):

    # Original Edges
    if (d.get('original')==True):
        e_original += 1
    # Metric Edges
    if (d.get('metric')==True):
        e_metric += 1 
    # UltraMetric Edges
    if (d.get('ultrametric')==True):
        e_ultrametric += 1

    # Semi-metric edges
    if (d.get('metric')==False):
        e_semimetric += 1

    # S values
    if (d.get('s_value'))>1.0:
        e_s_gt_1 += 1
    if (d.get('s_value'))>1.0 and (d.get('original')==True):
        e_s_gt_1_original += 1

    if (d.get('distance')==np.inf):
        e_d_eq_infty += 1

    # B_ij values
    if (d.get('b_ij_value'))>1.0:
        e_bij_gt_1 += 1

    # B_ji values
    if (d.get('b_ji_value'))>1.0:
        e_bji_gt_1 += 1

    # Distortion

    distortion += abs(dist2prox(d['distance_metric_closure']) - d.get('proximity'))
    

distortion_norm = (2 * distortion) / (n * (n - 1))
e_original_percent = e_original/e_total
e_metric_percent = e_metric/e_original
e_semimetric_percent = e_semimetric/e_total
e_s_gt_1_percent = e_s_gt_1/e_total
e_s_gt_1_original_percent = e_s_gt_1_original/e_original
e_d_eq_infty_percent = e_d_eq_infty/e_total
e_bij_gt_1_percent = e_bij_gt_1/e_total
e_bji_gt_1_percent = e_bji_gt_1/e_total

print 'Meta-labels: {:,d}'.format(n_metalabels) 

display(Math('D_w'))
print 'Nodes: {:,d}'.format(n)
print 'Possible Edges: {:,d}'.format(e_possible)
print 'Edges: {:,d} ({:.2%} of possible edges)'.format(e_original , e_original_percent)
print 'Connected Components: {:,d}'.format(original_components)
print 'Isolates: {:,d} ({:.2%})'.format(isolates, isolates_percent)

display(Math('B_w'))
print 'Metric Edges: {:,d} ({:.2%} of the original {:,d} edges)'.format(e_metric, e_metric_percent, e_original)

display(Math('D^C_w'))
print 'Semi-Metric edges: {:,d} ({:.2%})'.format(e_semimetric, e_semimetric_percent)
print 'S>1: {:,d} ({:.2%} of original edges; {:.2%} of the total edges)'.format(e_s_gt_1, e_s_gt_1_original_percent, e_s_gt_1_percent)
print 'B_ij>1 , B_ji>1: {:,d} ({:.2%} of total) , {:,d} ({:.2%} of total)'.format(e_bij_gt_1, e_bij_gt_1_percent , e_bji_gt_1, e_bji_gt_1_percent)
print 'Edges where D=\infty: {:,d} ({:.2%} of the total {:,d} edges)'.format(e_d_eq_infty, e_d_eq_infty_percent, e_total)

print 'Distortion \delta (\Delta): {:.4f} ({:.2f})'.format(distortion_norm, distortion)
Meta-labels: 12
$$D_w$$
Nodes: 232
Possible Edges: 26,796
Edges: 4,274 (18.24% of possible edges)
Connected Components: 1
Isolates: 15 (6.47%)
$$B_w$$
Metric Edges: 745 (17.43% of the original 4,274 edges)
$$D^C_w$$
Semi-Metric edges: 22,691 (96.82%)
S>1: 3,529 (82.57% of original edges; 15.06% of the total edges)
B_ij>1 , B_ji>1: 18,386 (78.45% of total) , 18,648 (79.57% of total)
Edges where D=\infty: 19,162 (81.76% of the total 23,436 edges)
Distortion \delta (\Delta): 0.0086 (230.75)

(Null Model) Threshold Backbone

The threshold backbone is a null model where edges with the smallest proximity are removed until the same number of edges as the metric backbone is achieved.

In [10]:
# Remove the lowest number of edges that keeps the network with the same number of edges as the G_metric (e_metric).
def generate_threshold_graph(G, edges_to_keep=0):
    GT = G.copy()
    edges2remove = sorted(GT.edges(data=True), key=lambda x: x[2]['weight'])[ : -edges_to_keep ]
    GT.remove_edges_from(edges2remove)
    return GT

GT = generate_threshold_graph(GO, e_metric)

(Null Model) Random Backbone

The random backbone is a null model where edges are removed at random until the same number of edges as the metric backbone network is achieved.

In [11]:
# Generate a random graph and remove n number of edges
def generate_random_graph(G, edges_to_keep=0):
    GR = G.copy()
    edges2remove = random.sample(GR.edges(data=True), edges_to_keep)
    GR.remove_edges_from(edges2remove)
    return GR

# A generative function to yield 'n' random graphs
def generate_n_random_graphs(G, n=1, *arg, **kwargs):
    for i in xrange(n):
        yield generate_random_graph(G, *arg)

GR = generate_random_graph(GO, e_original-e_metric)

Modularity Algorithms

Louvain Modularity

In [12]:
def compute_louvain(G):
    dM = community.best_partition(G)
    nx.set_node_attributes(G, name='module-louvain', values=dM)
In [29]:
# Original
compute_louvain(GO)
n, s, sM, dM = get_graph_variables(GO, 'module-louvain')
print "G Original Louvain : {:d}".format(n)

# Metric
compute_louvain(GM)
n, s, sM, dM = get_graph_variables(GM, 'module-louvain')
print "G Metric Louvain   : {:d}".format(n)

# Threshold
compute_louvain(GT)
n, s, sM, dM = get_graph_variables(GT, 'module-louvain')
print "G Threshold Louvain: {:d}".format(n)

# Random
compute_louvain(GR)
ns = []
for i in xrange(100):
    _GR = generate_random_graph(GO, e_original-e_metric)
    compute_louvain(_GR)
    n, s, sM, dM = get_graph_variables(_GR, 'module-louvain')
    ns.append(n)
print "G Random Louvain   : {:.2f}(±{:.2f})".format(np.mean(ns), np.std(ns))
G Original Louvain : 25
G Metric Louvain   : 13
G Threshold Louvain: 17
G Random Louvain   : 35.16(±2.07)

Visualizing Networks

Node layout is defined by a python implementation of Gephi's ForceAtlas2 algorithm.

In [14]:
forceatlas2 = ForceAtlas2(outboundAttractionDistribution=False, linLogMode=False, adjustSizes=False,
    edgeWeightInfluence=1.0, jitterTolerance=1.0, barnesHutOptimize=False, barnesHutTheta=1.2,
    multiThreaded=False, scalingRatio=1.2, strongGravityMode=False, gravity=1.0, verbose=True)
# Node position for NetworkX
pos = forceatlas2.forceatlas2_networkx_layout(GO, pos=None, iterations=2000)
100%|██████████| 2000/2000 [00:00<00:00, 2281.12it/s]
('Repulsion forces', ' took ', '0.29', ' seconds')
('Gravitational forces', ' took ', '0.03', ' seconds')
('Attraction forces', ' took ', '0.12', ' seconds')
('AdjustSpeedAndApplyForces step', ' took ', '0.13', ' seconds')

Original Network

In [15]:
fig, ax = plt.subplots(1,1,figsize=(7,6), facecolor='w')

node_color = [dict_meta_color[d[module_attribute]] for n,d in GO.nodes(data=True)]
nx.draw_networkx_nodes(GO, ax=ax, pos=pos, cmap=plt.get_cmap('jet'), node_size=150, node_color=node_color, edgecolors='#b2b2b2')
nx.draw_networkx_edges(GO, pos=pos, ax=ax, edge_color='k', alpha=0.5, style='solid')

ax.set_title('{:s} - Original Graph - Meta-label modules'.format(project_title))
ax.axes.get_xaxis().set_visible(False); ax.axes.get_yaxis().set_visible(False)

plt.tight_layout()
#plt.show()
plt.savefig('images/{:s}/{:s}/{:s}/graph_original_metalabels.png'.format(source,project,normalization), dpi=150)
In [57]:
fig,axes = plt.subplots(1,4,figsize=(11,3), facecolor='w')
((ax1,ax2,ax3,ax4)) = axes

node_size = 30
cmap = 'nipy_spectral'

# Original (Louvain)
ax = ax1
node_color = [d['module-louvain'] for n,d in GO.nodes(data=True)]
nx.draw_networkx_nodes(GO, ax=ax, cmap=plt.get_cmap(cmap), pos=pos, node_size=node_size, node_color=node_color, edgecolors='#b2b2b2')
edge_color = [d['weight'] for i,j,d in GO.edges(data=True)]
nx.draw_networkx_edges(GO, pos=pos, ax=ax, edge_color='k', alpha=0.3, style='solid')

# Metric (Louvain)
ax = ax2
node_color = [d['module-louvain'] for n,d in GM.nodes(data=True)]
nx.draw_networkx_nodes(GM, pos=pos, ax=ax, cmap=plt.get_cmap(cmap), node_size=node_size, node_color=node_color, edgecolors='#b2b2b2')
edge_color = [d['weight'] for i,j,d in GM.edges(data=True)]
nx.draw_networkx_edges(GM, pos=pos, ax=ax, edge_color='k', alpha=0.5, style='solid')

# Threshold (Louvain)
ax = ax3
node_color = [d['module-louvain'] for n,d in GT.nodes(data=True)]
nx.draw_networkx_nodes(GT, pos=pos, ax=ax, cmap=plt.get_cmap(cmap), node_size=node_size, node_color=node_color, edgecolors='#b2b2b2')
edge_color = [d['weight'] for i,j,d in GT.edges(data=True)]
nx.draw_networkx_edges(GT, pos=pos, ax=ax, edge_color='k', alpha=0.5, style='solid')

# Random (Louvain)
ax = ax4
node_color = [d['module-louvain'] for n,d in GR.nodes(data=True)]
nx.draw_networkx_nodes(GR, pos=pos, ax=ax, cmap=plt.get_cmap(cmap), node_size=node_size, node_color=node_color, edgecolors='#b2b2b2')
edge_color = [d['weight'] for i,j,d in GR.edges(data=True)]
nx.draw_networkx_edges(GR, pos=pos, ax=ax, edge_color='k', alpha=0.5, style='solid')


# Draw
for ax in axes.flatten():
    ax.tick_params(axis='both', which='both', bottom='off', top='off', left='off', right='off', labelbottom='off', labelleft='off')

ax1.set_title('Original')
ax2.set_title('Metric')
ax3.set_title('Threshold')
ax4.set_title('Random')

ax1.set_ylabel('Louvain', rotation=90, ha='center')

plt.tight_layout()
#plt.show()
plt.savefig('images/{:s}/{:s}/{:s}/graph_comparison.png'.format(source,project,normalization), dpi=150)
In [17]:
fig,axes = plt.subplots(1,4,figsize=(11,3), facecolor='w')
((ax1,ax2,ax3,ax4)) = axes

node_size = 30
cmap = 'nipy_spectral'

forceatlas2 = ForceAtlas2(outboundAttractionDistribution=False, linLogMode=False, adjustSizes=False,
    edgeWeightInfluence=1.0, jitterTolerance=1.0, barnesHutOptimize=False, barnesHutTheta=1.2,
    multiThreaded=False, scalingRatio=1.2, strongGravityMode=False, gravity=1.0, verbose=False)
posm = forceatlas2.forceatlas2_networkx_layout(GM, pos=pos, iterations=2000)
post = forceatlas2.forceatlas2_networkx_layout(GT, pos=pos, iterations=2000)
posr = forceatlas2.forceatlas2_networkx_layout(GR, pos=pos, iterations=2000)

# Original (Louvain)
ax = ax1
node_color = [d['module-louvain'] for n,d in GO.nodes(data=True)]
nx.draw_networkx_nodes(GO, pos=pos, ax=ax, cmap=plt.get_cmap(cmap), node_size=node_size, node_color=node_color, edgecolors='#b2b2b2')
edge_color = [d['weight'] for i,j,d in GO.edges(data=True)]
nx.draw_networkx_edges(GO, pos=pos, ax=ax, edge_color='k', alpha=0.3, style='solid')

# Metric (Louvain)
ax = ax2
node_color = [d['module-louvain'] for n,d in GM.nodes(data=True)]
nx.draw_networkx_nodes(GM, pos=posm, ax=ax, cmap=plt.get_cmap(cmap), node_size=node_size, node_color=node_color, edgecolors='#b2b2b2')
edge_color = [d['weight'] for i,j,d in GM.edges(data=True)]
nx.draw_networkx_edges(GM, pos=posm, ax=ax, edge_color='k', alpha=0.5, style='solid')

# Threshold (Louvain)
ax = ax3
node_color = [d['module-louvain'] for n,d in GT.nodes(data=True)]
nx.draw_networkx_nodes(GT, pos=post, ax=ax, cmap=plt.get_cmap(cmap), node_size=node_size, node_color=node_color, edgecolors='#b2b2b2')
edge_color = [d['weight'] for i,j,d in GT.edges(data=True)]
nx.draw_networkx_edges(GT, pos=post, ax=ax, edge_color='k', alpha=0.5, style='solid')

# Random (Louvain)
ax = ax4
node_color = [d['module-louvain'] for n,d in GR.nodes(data=True)]
nx.draw_networkx_nodes(GR, pos=posr, ax=ax, cmap=plt.get_cmap(cmap), node_size=node_size, node_color=node_color, edgecolors='#b2b2b2')
edge_color = [d['weight'] for i,j,d in GR.edges(data=True)]
nx.draw_networkx_edges(GR, pos=posr, ax=ax, edge_color='k', alpha=0.5, style='solid')


# Draw
for ax in axes.flatten():
    ax.tick_params(axis='both', which='both', bottom='off', top='off', left='off', right='off', labelbottom='off', labelleft='off')

ax1.set_title('Original')
ax2.set_title('Metric')
ax3.set_title('Threshold')
ax4.set_title('Random')

ax1.set_ylabel('Louvain', rotation=90, ha='center')

plt.tight_layout()
#plt.show()
plt.savefig('images/{:s}/{:s}/{:s}/graph_comparison_layout.png'.format(source,project,normalization), dpi=150)

Quantifying module similarity between Backbone and Original Network

Please refer to the index page for a descriptive detail over each formula.

\begin{equation} h_{A \to B} = \frac{ \sum_{i}^{m_A} H(\mathcal{A}_i) }{ m_A \cdot \log_2(m_B) } \quad , \quad h_{B \to A} = \frac{ \sum_{j}^{m_B} H(\mathcal{B}_j) }{ m_B \cdot \log_2(m_A) } \quad . \end{equation}

In [18]:
def calculate_h(A,B):
    # Intersection
    df_I = pd.DataFrame.from_dict( OrderedDict([( i , OrderedDict([(j,iv.intersection(jv)) for j,jv in B.items()]) ) for i,iv in A.items()]), orient='index')
    df_Ic = df_I.applymap(len) # size of sets
    ma, mb = df_I.shape
    #to == 'B'
    sA = pd.Series(A, name='A')
    sAc = sA.apply(len)
    df_BA = df_Ic.divide(sAc.values, axis='index')
    sH = df_BA.apply(entropy, axis=1, base=2)
    h_A2B = (1/(ma * math.log(mb,2))) * sH.sum()
    #to =='A'
    sB = pd.Series(B, name='B')
    sBc = sB.apply(len)
    df_AB = df_Ic.divide(sBc.values, axis='columns')
    sH = df_AB.apply(entropy, axis=0, base=2)
    h_B2A = (1/(mb * math.log(ma,2))) * sH.sum()
    return h_A2B, h_B2A

\begin{equation} y_{AB} = \frac{ \sum_{i}^{m_A} \sum_{j}^{m_B} P(A_i,B_j) }{ \sqrt{ m_A \cdot m_B} } \quad \text{where} \quad P(A_i,B_j) = \frac{ |A_i \cap B_j| }{ |A_i \cup B_j| } \quad . \end{equation}

In [19]:
def calculate_y(A,B):
    df_I = pd.DataFrame.from_dict( OrderedDict([( i , OrderedDict([(j,iv.intersection(jv)) for j,jv in B.items()]) ) for i,iv in A.items()]), orient='index')
    df_U = pd.DataFrame.from_dict( OrderedDict([( i , OrderedDict([(j,iv.union(jv)) for j,jv in B.items()]) ) for i,iv in A.items()]), orient='index')
    ma, mb = df_I.shape
    df_Ic = df_I.applymap(len)
    df_Uc = df_U.applymap(len)
    df_S = df_Ic.divide( df_Uc )
    return df_S.sum().sum() / math.sqrt(ma * mb)

\begin{equation} J_{A \to B} = \frac{ \sum_i^{m_A} \max_j^{m_B} P(A_i,B_j) }{ m_A } \quad , \quad J_{B \to A}= \frac{ \sum_j^{m_B} \max_i^{m_A} P(A_i,B_j) }{ m_B } \end{equation}

In [20]:
def calculate_j(A,B):
    df_I = pd.DataFrame.from_dict( OrderedDict([( i , OrderedDict([(j,iv.intersection(jv)) for j,jv in B.items()]) ) for i,iv in A.items()]), orient='index')
    df_U = pd.DataFrame.from_dict( OrderedDict([( i , OrderedDict([(j,iv.union(jv)) for j,jv in B.items()]) ) for i,iv in A.items()]), orient='index')
    ma, mb = df_I.shape
    df_Ic = df_I.applymap(len)
    df_Uc = df_U.applymap(len)
    df_S = df_Ic.divide( df_Uc )
    sMaxA, sMaxB = df_S.max(axis=0), df_S.max(axis=1)
    jA2B = sMaxB.sum() / ma
    jB2A = sMaxA.sum() / mb
    return jA2B, jB2A

$h_{A \to B}$ and $h_{B \to A}$

In [40]:
print 'A = Meta labels'
print 'B = Original proximity'
GO = generate_original_graph(Gp)

compute_louvain(GO)

_, _, A, _ = get_graph_variables(GO, module_attribute)
_, _, B, _ = get_graph_variables(GO, 'module-louvain')
hA2B, hB2A = calculate_h(A,B)
print "h_(A->B): {:.2f} h_(B->A): {:.2f}".format(hA2B,hB2A)
A = Meta labels
B = Original proximity
h_(A->B): 0.09 h_(B->A): 0.09
In [41]:
print 'A = Meta labels'
print 'B = Metric Backbone'

GO = generate_original_graph(Gp)
GM = generate_metric_graph(GO)

compute_louvain(GO)
compute_louvain(GM)

_, _, A, _ = get_graph_variables(GO, module_attribute)
_, _, B, _ = get_graph_variables(GM, 'module-louvain')
hA2B, hB2A = calculate_h(A,B)
print "h_(A->B): {:.2f} h_(B->A): {:.2f}".format(hA2B,hB2A)
print
print 'A = Original proximity'
print 'B = Metric Backbone'

_, _, A, _ = get_graph_variables(GO, 'module-louvain')
_, _, B, _ = get_graph_variables(GM, 'module-louvain')
hA2B, hB2A = calculate_h(A,B)
print "h_(A->B): {:.2f} h_(B->A): {:.2f}".format(hA2B,hB2A)
A = Meta labels
B = Metric Backbone
h_(A->B): 0.15 h_(B->A): 0.11

A = Original proximity
B = Metric Backbone
h_(A->B): 0.14 h_(B->A): 0.08
In [32]:
print 'A = Meta labels'
print 'B = Threshold Backbone'

GO = generate_original_graph(Gp)
GT = generate_threshold_graph(GO, e_metric)

compute_louvain(GO)
compute_louvain(GT)

_, _, A, _ = get_graph_variables(GO, module_attribute)
_, _, B, _ = get_graph_variables(GT, 'module-louvain')
hA2B, hB2A = calculate_h(A,B)
print "h_(A->B): {:.2f} h_(B->A): {:.2f}".format(hA2B,hB2A)
print
print 'A = Original proximity'
print 'B = Threshold (metric) Backbone'

_, _, A, _ = get_graph_variables(GO, 'module-louvain')
_, _, B, _ = get_graph_variables(GT, 'module-louvain')
hA2B, hB2A = calculate_h(A,B)
print "h_(A->B): {:.2f} h_(B->A): {:.2f}".format(hA2B,hB2A)
A = Meta labels
B = Threshold Backbone
h_(A->B): 0.17 h_(B->A): 0.10

A = Original proximity
B = Threshold (metric) Backbone
h_(A->B): 0.16 h_(B->A): 0.07
In [33]:
print 'A = Meta labels / Original proximity'
print 'B = Random Backbone'

d = {
    ('Meta labels','h_(A->B)'):[],
    ('Meta labels','h_(B->A)'):[],
    ('Original proximity','h_(A->B)'):[],
    ('Original proximity','h_(B->A)'):[],
}

GO = generate_original_graph(Gp)
compute_louvain(GO)

for GRp in generate_n_random_graphs(GO,100,e_original-e_metric):
    compute_louvain(GR)

    _, _, A, _ = get_graph_variables(GO, module_attribute)
    _, _, B, _ = get_graph_variables(GR, 'module-louvain')
    hA2B, hB2A = calculate_h(A,B)
    d[('Meta labels','h_(A->B)')].append( hA2B )
    d[('Meta labels','h_(B->A)')].append( hB2A )
    
    _, _, A, _ = get_graph_variables(GO, 'module-louvain')
    _, _, B, _ = get_graph_variables(GR, 'module-louvain')
    hA2B, hB2A = calculate_h(A,B)
    d[('Original proximity','h_(A->B)')].append( hA2B )
    d[('Original proximity','h_(B->A)')].append( hB2A )
    
df = pd.DataFrame.from_dict(d)
#print df
df = df.apply(['mean','std'], axis=0).T
display(df)
A = Meta labels / Original proximity
B = Random Backbone
mean std
Meta labels h_(A->B) 0.40 6.69e-16
h_(B->A) 0.27 5.58e-17
Original proximity h_(A->B) 0.19 1.39e-16
h_(B->A) 0.21 2.51e-16

$y_{AB}$ values

Metric / Ultrametric Backbone

In [43]:
print 'A = Meta labels'
print 'B = Original proximity'

GO = generate_original_graph(Gp)
compute_louvain(GO)

_, _, A, _ = get_graph_variables(GO, module_attribute)
_, _, B, _ = get_graph_variables(GO, 'module-louvain')
print "y_(AB): {:.2f}".format(calculate_y(A,B))
A = Meta labels
B = Original proximity
y_(AB): 0.86
In [44]:
GO = generate_original_graph(Gp)
GM = generate_metric_graph(GO)

compute_louvain(GO)
compute_louvain(GM)

print 'A = Meta labels'
print 'B = Metric Backbone'

_, _, A, _ = get_graph_variables(GO, module_attribute)
_, _, B, _ = get_graph_variables(GM, 'module-louvain')
print "y_(AB): {:.2f}".format(calculate_y(A,B))
print
print 'A = Original Graph'
print 'B = Metric Backbone'

_, _, A, _ = get_graph_variables(GO, 'module-louvain')
_, _, B, _ = get_graph_variables(GM, 'module-louvain')
print "y_(AB): {:.2f}".format(calculate_y(A,B))
A = Meta labels
B = Metric Backbone
y_(AB): 0.80

A = Original Graph
B = Metric Backbone
y_(AB): 0.81
In [45]:
GO = generate_original_graph(Gp)
GT = generate_threshold_graph(GO, e_metric)

compute_louvain(GO)
compute_louvain(GT)

print 'A = Meta labels'
print 'B = Threshold (metric) Backbone'

_, _, A, _ = get_graph_variables(GO, module_attribute)
_, _, B, _ = get_graph_variables(GT, 'module-louvain')
print "y_(AB): {:.2f}".format(calculate_y(A,B))
print
print 'A = Original proximity'
print 'B = Threshold Backbone'

_, _, A, _ = get_graph_variables(GO, 'module-louvain')
_, _, B, _ = get_graph_variables(GT, 'module-louvain')
print "y_(AB): {:.2f}".format(calculate_y(A,B))
A = Meta labels
B = Threshold (metric) Backbone
y_(AB): 0.60

A = Original proximity
B = Threshold Backbone
y_(AB): 0.66
In [46]:
print 'A = Meta labels / Original proximity'
print 'B = Random Backbone'

d = {
    ('Meta label','y_(AB)'):[],
    ('Original proximity','y_(AB)'):[],
}

GO = generate_original_graph(Gp)
compute_louvain(GO)

for GRp in generate_n_random_graphs(GO,100,e_original-e_metric):
    compute_louvain(GR)
    
    _, _, A, _ = get_graph_variables(GO, module_attribute)
    _, _, B, _ = get_graph_variables(GR, 'module-louvain')
    d[('Meta label','y_(AB)')].append( calculate_y(A,B) )
    
    _, _, A, _ = get_graph_variables(GO, 'module-louvain')
    _, _, B, _ = get_graph_variables(GR, 'module-louvain')
    d[('Original proximity','y_(AB)')].append( calculate_y(A,B) )
    
df = pd.DataFrame.from_dict(d)
#print df
df = df.apply(['mean','std'], axis=0).T
display(df)
A = Meta labels / Original proximity
B = Random Backbone
mean std
Meta label y_(AB) 0.45 9.48e-16
Original proximity y_(AB) 0.47 5.02e-16

$J_{A \to B}$ and $J_{B \to A}$ values

In [47]:
print 'A = Meta labels'
print 'B = Original proximity'

GO = generate_original_graph(Gp)

compute_louvain(GO)

_, _, A, _ = get_graph_variables(GO, module_attribute)
_, _, B, _ = get_graph_variables(GO, 'module-louvain')
jA2B, jB2A = calculate_j(A,B)
print "J_(A->B): {:.2f} J_(B->A): {:.2f}".format(jA2B,jB2A)
A = Meta labels
B = Original proximity
J_(A->B): 0.77 J_(B->A): 0.90
In [48]:
print 'A = Meta labels'
print 'B = Metric Backbone'

GO = generate_original_graph(Gp)
GM = generate_metric_graph(GO)

compute_louvain(GO)
compute_louvain(GM)

_, _, A, _ = get_graph_variables(GO, module_attribute)
_, _, B, _ = get_graph_variables(GM, 'module-louvain')
jA2B, jB2A = calculate_j(A,B)
print "J_(A->B): {:.2f} J_(B->A): {:.2f}".format(jA2B,jB2A)
print
print 'A = Original proximity'
print 'B = Metric Backbone'

_, _, A, _ = get_graph_variables(GO, 'module-louvain')
_, _, B, _ = get_graph_variables(GM, 'module-louvain')
jA2B, jB2A = calculate_j(A,B)
print "J_(A->B): {:.2f} J_(B->A): {:.2f}".format(jA2B,jB2A)
A = Meta labels
B = Metric Backbone
J_(A->B): 0.78 J_(B->A): 0.74

A = Original proximity
B = Metric Backbone
J_(A->B): 0.84 J_(B->A): 0.69
In [49]:
print 'A = Meta labels'
print 'B = Threshold Backbone'

GO = generate_original_graph(Gp)
GT = generate_threshold_graph(GO, e_metric)

compute_louvain(GO)
compute_louvain(GT)

_, _, A, _ = get_graph_variables(GO, module_attribute)
_, _, B, _ = get_graph_variables(GT, 'module-louvain')
jA2B, jB2A = calculate_j(A,B)
print "J_(A->B): {:.2f} J_(B->A): {:.2f}".format(jA2B,jB2A)
print 
print 'A = Original proximity'
print 'B = Threshold Backbone'

_, _, A, _ = get_graph_variables(GO, 'module-louvain')
_, _, B, _ = get_graph_variables(GT, 'module-louvain')
jA2B, jB2A = calculate_j(A,B)
print "J_(A->B): {:.2f} J_(B->A): {:.2f}".format(jA2B,jB2A)
A = Meta labels
B = Threshold Backbone
J_(A->B): 0.59 J_(B->A): 0.45

A = Original proximity
B = Threshold Backbone
J_(A->B): 0.72 J_(B->A): 0.47
In [50]:
print 'A = Meta labels -> Louvain, InfoMap'
print 'B = Random (metric) Proximity'

d = {
    ('Meta label','J_(A->B)'):[],
    ('Meta label','J_(B->A)'):[],
    #
    ('Original proximity','J_(A->B)'):[],
    ('Original proximity','J_(B->A)'):[],
}

GO = generate_original_graph(Gp)

compute_louvain(GO)

for GR in generate_n_random_graphs(GO,100,e_original-e_metric):
    compute_louvain(GR)

    _, _, A, _ = get_graph_variables(GO, module_attribute)
    _, _, B, _ = get_graph_variables(GR, 'module-louvain')
    jA2B, jB2A = calculate_j(A,B)
    d[('Meta label','J_(A->B)')].append( jA2B )
    d[('Meta label','J_(B->A)')].append( jB2A )
    
    _, _, A, _ = get_graph_variables(GO, 'module-louvain')
    _, _, B, _ = get_graph_variables(GR, 'module-louvain')
    jA2B, jB2A = calculate_j(A,B)
    d[('Original proximity','J_(A->B)')].append( jA2B )
    d[('Original proximity','J_(B->A)')].append( jB2A )
    
df = pd.DataFrame.from_dict(d)
#print df
df = df.apply(['mean','std'], axis=0).T
display(df)
A = Meta labels -> Louvain, InfoMap
B = Random (metric) Proximity
mean std
Meta label J_(A->B) 0.32 0.04
J_(B->A) 0.23 0.03
Original proximity J_(A->B) 0.34 0.04
J_(B->A) 0.22 0.03

Clusim

Paper: A.J. Gates, I.B. Wood, W.P. Hetrick, Y. Ahn. (2017). On comparing clusterings: an element-centric framework unifies overlaps and hierarchy. arXiv preprint arXiv:1706.06136

In [51]:
print 'A = Meta labels'
print 'B = Original Proximity'

GO = generate_original_graph(Gp)
compute_louvain(GO)

_, _, A, _ = get_graph_variables(GO, module_attribute)
_, _, B, _ = get_graph_variables(GO, 'module-louvain')
CA = Clustering(clus2elm_dict=A)
CB = Clustering(clus2elm_dict=B)
print "Clusim: {:.2f}".format( element_sim(CA,CB) )
A = Meta labels
B = Original Proximity
Clusim: 0.84
In [52]:
print 'A = Meta labels'
print 'B = Metric Backbone'

GO = generate_original_graph(Gp)
GM = generate_metric_graph(GO)

compute_louvain(GO)
compute_louvain(GM)

_, _, A, _ = get_graph_variables(GO, module_attribute)
_, _, B, _ = get_graph_variables(GM, 'module-louvain')
CA = Clustering(clus2elm_dict=A)
CB = Clustering(clus2elm_dict=B)
print "Clusim: {:.2f}".format( element_sim(CA,CB) )

print 'A = Original Proximity'
print 'B = Metric Backbone'

_, _, A, _ = get_graph_variables(GO, 'module-louvain')
_, _, B, _ = get_graph_variables(GM, 'module-louvain')
CA = Clustering(clus2elm_dict=A)
CB = Clustering(clus2elm_dict=B)
print "Clusim: {:.2f}".format( element_sim(CA,CB) )
A = Meta labels
B = Metric Backbone
Clusim: 0.68
A = Original Proximity
B = Metric Backbone
Clusim: 0.68
In [54]:
print 'A = Meta labels'
print 'B = Threshold Backbone'

GO = generate_original_graph(Gp)
GT = generate_threshold_graph(GO, e_metric)

compute_louvain(GO)
compute_louvain(GT)

_, _, A, _ = get_graph_variables(GO, module_attribute)
_, _, B, _ = get_graph_variables(GT, 'module-louvain')
CA = Clustering(clus2elm_dict=A)
CB = Clustering(clus2elm_dict=B)
print "Louvain: {:.2f}".format( element_sim(CA,CB) )

print 'A = Original Proximity'
print 'B = Threshold Backbone'

_, _, A, _ = get_graph_variables(GO, 'module-louvain')
_, _, B, _ = get_graph_variables(GT, 'module-louvain')
CA = Clustering(clus2elm_dict=A)
CB = Clustering(clus2elm_dict=B)
print "Louvain: {:.2f}".format( element_sim(CA,CB) )
A = Meta labels
B = Threshold Backbone
Louvain: 0.57
A = Original Proximity
B = Threshold Backbone
Louvain: 0.63
In [56]:
print 'A = Meta labels'
print 'B = Random Backbone'

d = {
    ('Meta label','AB'):[],
    ('Original proximity','AB'):[],
}

GO = generate_original_graph(Gp)
compute_louvain(GO)

for GR in generate_n_random_graphs(GO,100,e_original-e_metric):
    compute_louvain(GR)

    _, _, A, _ = get_graph_variables(GO, module_attribute)
    _, _, B, _ = get_graph_variables(GR, 'module-louvain')
    CA = Clustering(clus2elm_dict=A)
    CB = Clustering(clus2elm_dict=B)
    d[('Meta label','AB')].append( element_sim(CA,CB) )
    
    _, _, A, _ = get_graph_variables(GO, 'module-louvain')
    _, _, B, _ = get_graph_variables(GR, 'module-louvain')
    CA = Clustering(clus2elm_dict=A)
    CB = Clustering(clus2elm_dict=B)
    d[('Original proximity','AB')].append( element_sim(CA,CB) )
    
df = pd.DataFrame.from_dict(d)
#print df
df = df.apply(['mean','std'], axis=0).T
display(df)
A = Meta labels
B = Random Backbone
mean std
Meta label AB 0.29 0.04
Original proximity AB 0.28 0.04