Network & Metric Backbone comparison¶

Network: High school¶

Reference: R. Mastrandrea, J. Fournet, A. Barrat, Contact patterns in a high school: a comparison between data collected using wearable sensors, contact diaries and friendship surveys. PLoS ONE 10(9): e0136497 (2015).

Original data: http://www.sociopatterns.org/datasets/high-school-contact-and-friendship-networks/

%matplotlib inline
from __future__ import division
import numpy as np
import pandas as pd
pd.set_option('display.precision', 2)
# Network
import networkx as nx
import community # louvain
from fa2 import ForceAtlas2
from distanceclosure.utils import _dist2prox as dist2prox
# Matplotlib
import matplotlib as mpl
import matplotlib.style
mpl.style.use('default')
mpl.rcParams['mathtext.fontset'] = 'cm'
mpl.rcParams['mathtext.rm'] = 'serif'
import matplotlib.pyplot as plt
# Others
import math
import random
from IPython.core.display import display, Math
from collections import OrderedDict
from scipy.stats import entropy
# clusim
from clusim.clustering import Clustering
from clusim.clusimelement import element_sim, element_sim_elscore

Init parameters¶

# SocioPatterns Network
project = 'high-school'
# The normalization convertion from 
normalization = 'social' # [options:] 'time', 'time_all' or 'social'
# Network File
rGpickle  = 'results/%s/%s-%s.gpickle' % (project, project, normalization)
# The Networkx node attribute containing the Meta labels
module_attribute = 'class'
# Metadata nodecolors
dict_meta_color = {
    '2BIO1':'#ff0000', #red
    '2BIO2':'#ff6666',
    '2BIO3':'#990000',
    'MP'   :'#0000ff', #Blue
    'MP*1' :'#00007f',
    'MP*2' :'#7f7fff',
    'PC*'  :'#004c00', #Green
    'PC'   :'#66b266',
    'PSI*' :'#ffa500'
}
# Title for plots 
project_title = 'High school'

Loading Network¶

Both metric and the ultrametric backbones have been computed with an the distanceclosure package. Below we simply load a NetworkX graph that contains metric and semi-metric information for each edge.

G = nx.read_gpickle(rGpickle)

Helper functions¶

These helper functions retrive information from the NetworkX Graph object

def get_graph_variables(G, *arg, **kwargs):
    dM = nx.get_node_attributes(G, *arg)
    s = set(dM.values())
    n = len(s)
    sM = { m : set([k for k,v in dM.items() if v==m]) for m in s }
    #
    return n, s, sM, dM

# Strip G to the original Graph, without additional metric closure edges.
def generate_original_graph(G):
    GO = G.copy()
    edges2remove = [(i,j) for i,j,d in G.edges(data=True) if 'original' not in d]
    GO.remove_edges_from(edges2remove)
    return GO

GO = generate_original_graph(G)

# Metric Graph, only metric edges
def generate_metric_graph(G):
    GM = G.copy()
    edges2remove = [(i,j) for i,j,d in G.edges(data=True) if d['metric']==False]
    GM.remove_edges_from(edges2remove)
    return GM

GM = generate_metric_graph(G)

Graph Statistics¶

Displays some basic statistics for comparison.

nO, sO, sMO, dMO = get_graph_variables(GO, module_attribute)

n = G.number_of_nodes()
isolates = len( list( nx.isolates(G) ) )
isolates_percent = isolates/n

e_possible = int(( (n*n) - n) / 2)
e_total = G.number_of_edges()

original_components = nx.number_connected_components(GO)

n_metalabels = len(np.unique(nx.get_node_attributes(GO, module_attribute).values()))

e_original = 0
e_metric = 0
e_ultrametric = 0
e_semimetric = 0
e_s_gt_1 = 0
e_s_gt_1_original = 0
e_d_eq_infty = 0
e_bij_gt_1 = 0
e_bji_gt_1 = 0
distortion = 0

for eid,(i,j,d) in enumerate( G.edges(data=True), start=0):

    # Original Edges
    if (d.get('original')==True):
        e_original += 1
    # Metric Edges
    if (d.get('metric')==True):
        e_metric += 1 
    # UltraMetric Edges
    if (d.get('ultrametric')==True):
        e_ultrametric += 1

    # Semi-metric edges
    if (d.get('metric')==False):
        e_semimetric += 1

    # S values
    if (d.get('s_value'))>1.0:
        e_s_gt_1 += 1
    if (d.get('s_value'))>1.0 and (d.get('original')==True):
        e_s_gt_1_original += 1

    if (d.get('distance')==np.inf):
        e_d_eq_infty += 1

    # B_ij values
    if (d.get('b_ij_value'))>1.0:
        e_bij_gt_1 += 1

    # B_ji values
    if (d.get('b_ji_value'))>1.0:
        e_bji_gt_1 += 1

    # Distortion

    distortion += abs(dist2prox(d['distance_metric_closure']) - d.get('proximity'))
    

distortion_norm = (2 * distortion) / (n * (n - 1))
e_original_percent = e_original/e_total
e_metric_percent = e_metric/e_original
e_semimetric_percent = e_semimetric/e_total
e_s_gt_1_percent = e_s_gt_1/e_total
e_s_gt_1_original_percent = e_s_gt_1_original/e_original
e_d_eq_infty_percent = e_d_eq_infty/e_total
e_bij_gt_1_percent = e_bij_gt_1/e_total
e_bji_gt_1_percent = e_bji_gt_1/e_total

print 'Meta-labels: {:,d}'.format(n_metalabels) 

display(Math('D_w'))
print 'Nodes: {:,d}'.format(n)
print 'Possible Edges: {:,d}'.format(e_possible)
print 'Edges: {:,d} ({:.2%} of possible edges)'.format(e_original , e_original_percent)
print 'Connected Components: {:,d}'.format(original_components)
print 'Isolates: {:,d} ({:.2%})'.format(isolates, isolates_percent)
if (original_components == 1) and (e_possible != e_total):
    raise Exception('After Closure, the graph must be fully connected ({:d} != {:d}), given there was only one connected component'.format(e_possible, e_total))

display(Math('B_w'))
print 'Metric Edges: {:,d} ({:.2%} of the original {:,d} edges)'.format(e_metric, e_metric_percent, e_original)

display(Math('D^C_w'))
print 'Semi-Metric edges: {:,d} ({:.2%})'.format(e_semimetric, e_semimetric_percent)
print 'S>1: {:,d} ({:.2%} of original edges; {:.2%} of the total edges)'.format(e_s_gt_1, e_s_gt_1_original_percent, e_s_gt_1_percent)
print 'B_ij>1 , B_ji>1: {:,d} ({:.2%} of total) , {:,d} ({:.2%} of total)'.format(e_bij_gt_1, e_bij_gt_1_percent , e_bji_gt_1, e_bji_gt_1_percent)
print 'Edges where D=\infty: {:,d} ({:.2%} of the total {:,d} edges)'.format(e_d_eq_infty, e_d_eq_infty_percent, e_total)

print 'Distortion \delta (\Delta): {:.4f} ({:.2f})'.format(distortion_norm, distortion)

Meta-labels: 9

Nodes: 327
Possible Edges: 53,301
Edges: 5,818 (10.92% of possible edges)
Connected Components: 1
Isolates: 0 (0.00%)

Metric Edges: 603 (10.36% of the original 5,818 edges)

Semi-Metric edges: 52,698 (98.87%)
S>1: 5,215 (89.64% of original edges; 9.78% of the total edges)
B_ij>1 , B_ji>1: 47,482 (89.08% of total) , 47,483 (89.08% of total)
Edges where D=\infty: 47,483 (89.08% of the total 53,301 edges)
Distortion \delta (\Delta): 0.0094 (501.18)

(Null Model) Threshold Backbone¶

The threshold backbone is a null model where edges with the smallest proximity are removed until the same number of edges as the metric backbone is achieved.

# Remove the lowest number of edges that keeps the network with the same number of edges as the G_metric (e_metric).
def generate_threshold_graph(G, edges_to_keep=0):
    GT = G.copy()
    edges2remove = sorted(GT.edges(data=True), key=lambda x: x[2]['weight'])[ : -edges_to_keep ]
    GT.remove_edges_from(edges2remove)
    return GT

GT = generate_threshold_graph(GO, e_metric)

(Null Model) Random Backbone¶

The random backbone is a null model where edges are removed at random until the same number of edges as the metric backbone network is achieved.

# Generate a random graph and remove n number of edges
def generate_random_graph(G, edges_to_keep=0):
    GR = G.copy()
    edges2remove = random.sample(GR.edges(data=True), edges_to_keep)
    GR.remove_edges_from(edges2remove)
    return GR

# A generative function to yield 'n' random graphs
def generate_n_random_graphs(G, n=1, *arg, **kwargs):
    for i in xrange(n):
        yield generate_random_graph(G, *arg)

GR = generate_random_graph(GO, e_original-e_metric)

Modularity Algorithms¶

Louvain Modularity¶

def compute_louvain(G):
    dM = community.best_partition(G)
    nx.set_node_attributes(G, name='module-louvain', values=dM)

# Original
compute_louvain(GO)
n, s, sM, dM = get_graph_variables(GO, 'module-louvain')
print "G Original Louvain : {:d}".format(n)

# Metric
compute_louvain(GM)
n, s, sM, dM = get_graph_variables(GM, 'module-louvain')
print "G Metric Louvain   : {:d}".format(n)

# Threshold
compute_louvain(GT)
n, s, sM, dM = get_graph_variables(GT, 'module-louvain')
print "G Threshold Louvain: {:d}".format(n)

# Random
compute_louvain(GR)
ns = []
for i in xrange(100):
    _GR = generate_random_graph(GO, e_original-e_metric)
    compute_louvain(_GR)
    n, s, sM, dM = get_graph_variables(_GR, 'module-louvain')
    ns.append(n)
print "G Random Louvain   : {:.2f}±{:.2f}".format(np.mean(ns), np.std(ns))

G Original Louvain : 10
G Metric Louvain   : 17
G Threshold Louvain: 28
G Random Louvain   : 47.88±3.89

Visualizing Networks¶

Node layout is defined by a python implementation of Gephi's ForceAtlas2 algorithm.

forceatlas2 = ForceAtlas2(outboundAttractionDistribution=False, linLogMode=False, adjustSizes=False,
    edgeWeightInfluence=1.0, jitterTolerance=1.0, barnesHutOptimize=False, barnesHutTheta=1.2,
    multiThreaded=False, scalingRatio=1.2, strongGravityMode=False, gravity=1.0, verbose=True)
# Node position for NetworkX
pos = forceatlas2.forceatlas2_networkx_layout(GO, pos=None, iterations=2000)

100%|██████████| 2000/2000 [00:03<00:00, 658.74it/s]

('Repulsion forces', ' took ', '1.37', ' seconds')
('Gravitational forces', ' took ', '0.09', ' seconds')
('Attraction forces', ' took ', '0.37', ' seconds')
('AdjustSpeedAndApplyForces step', ' took ', '0.47', ' seconds')

Original Network¶

fig, ax = plt.subplots(1,1,figsize=(7,6), facecolor='w')

node_color = [dict_meta_color[d[module_attribute]] for n,d in GO.nodes(data=True)]
nx.draw_networkx_nodes(GO, ax=ax, pos=pos, cmap=plt.get_cmap('jet'), node_size=150, node_color=node_color, edgecolors='#b2b2b2')
nx.draw_networkx_edges(GO, pos=pos, ax=ax, edge_color='k', alpha=0.1, style='solid')

ax.set_title('{:s} - Original Graph - Meta-label modules'.format(project_title))
ax.axes.get_xaxis().set_visible(False); ax.axes.get_yaxis().set_visible(False)

plt.tight_layout()
#plt.show()
plt.savefig('images/{:s}/{:s}/graph_original_metalabels.png'.format(project,normalization), dpi=150)

fig,axes = plt.subplots(1,4,figsize=(10,3), facecolor='w')
((ax1,ax2,ax3,ax4)) = axes

node_size = 30
cmap = 'nipy_spectral'

# Original (Louvain)
ax = ax1
node_color = [d['module-louvain'] for n,d in GO.nodes(data=True)]
nx.draw_networkx_nodes(GO, ax=ax, cmap=plt.get_cmap(cmap), pos=pos, node_size=node_size, node_color=node_color, edgecolors='#b2b2b2')
edge_color = [d['weight'] for i,j,d in GO.edges(data=True)]
nx.draw_networkx_edges(GO, pos=pos, ax=ax, edge_color='k', alpha=0.1, style='solid')

# Metric (Louvain)
ax = ax2
node_color = [d['module-louvain'] for n,d in GM.nodes(data=True)]
nx.draw_networkx_nodes(GM, pos=pos, ax=ax, cmap=plt.get_cmap(cmap), node_size=node_size, node_color=node_color, edgecolors='#b2b2b2')
edge_color = [d['weight'] for i,j,d in GM.edges(data=True)]
nx.draw_networkx_edges(GM, pos=pos, ax=ax, edge_color='k', alpha=0.3, style='solid')

# Threshold (Louvain)
ax = ax3
node_color = [d['module-louvain'] for n,d in GT.nodes(data=True)]
nx.draw_networkx_nodes(GT, pos=pos, ax=ax, cmap=plt.get_cmap(cmap), node_size=node_size, node_color=node_color, edgecolors='#b2b2b2')
edge_color = [d['weight'] for i,j,d in GT.edges(data=True)]
nx.draw_networkx_edges(GT, pos=pos, ax=ax, edge_color='k', alpha=0.3, style='solid')

# Random (Louvain)
ax = ax4
node_color = [d['module-louvain'] for n,d in GR.nodes(data=True)]
nx.draw_networkx_nodes(GR, pos=pos, ax=ax, cmap=plt.get_cmap(cmap), node_size=node_size, node_color=node_color, edgecolors='#b2b2b2')
edge_color = [d['weight'] for i,j,d in GR.edges(data=True)]
nx.draw_networkx_edges(GR, pos=pos, ax=ax, edge_color='k', alpha=0.3, style='solid')


# Draw
for ax in axes.flatten():
    ax.tick_params(axis='both', which='both', bottom='off', top='off', left='off', right='off', labelbottom='off', labelleft='off')

ax1.set_title('Original')
ax2.set_title('Metric')
ax3.set_title('Threshold')
ax4.set_title('Random')

ax1.set_ylabel('Louvain', rotation=90, ha='center')

plt.tight_layout()
#plt.show()
plt.savefig('images/{:s}/{:s}/graph_comparison.png'.format(project,normalization), dpi=150)

fig,axes = plt.subplots(1,4,figsize=(10,3), facecolor='w')
((ax1,ax2,ax3,ax4)) = axes

node_size = 30
cmap = 'nipy_spectral'

forceatlas2 = ForceAtlas2(outboundAttractionDistribution=False, linLogMode=False, adjustSizes=False,
    edgeWeightInfluence=1.0, jitterTolerance=1.0, barnesHutOptimize=False, barnesHutTheta=1.2,
    multiThreaded=False, scalingRatio=1.2, strongGravityMode=False, gravity=1.0, verbose=False)
posm = forceatlas2.forceatlas2_networkx_layout(GM, pos=pos, iterations=2000)
post = forceatlas2.forceatlas2_networkx_layout(GT, pos=pos, iterations=2000)
posr = forceatlas2.forceatlas2_networkx_layout(GR, pos=pos, iterations=2000)

# Original (Louvain)
ax = ax1
node_color = [d['module-louvain'] for n,d in GO.nodes(data=True)]
nx.draw_networkx_nodes(GO, pos=pos, ax=ax, cmap=plt.get_cmap(cmap), node_size=node_size, node_color=node_color, edgecolors='#b2b2b2')
edge_color = [d['weight'] for i,j,d in GO.edges(data=True)]
nx.draw_networkx_edges(GO, pos=pos, ax=ax, edge_color='k', alpha=0.1, style='solid')

# Metric (Louvain)
ax = ax2
node_color = [d['module-louvain'] for n,d in GM.nodes(data=True)]
nx.draw_networkx_nodes(GM, pos=posm, ax=ax, cmap=plt.get_cmap(cmap), node_size=node_size, node_color=node_color, edgecolors='#b2b2b2')
edge_color = [d['weight'] for i,j,d in GM.edges(data=True)]
nx.draw_networkx_edges(GM, pos=posm, ax=ax, edge_color='k', alpha=0.3, style='solid')

# Threshold (Louvain)
ax = ax3
node_color = [d['module-louvain'] for n,d in GT.nodes(data=True)]
nx.draw_networkx_nodes(GT, pos=post, ax=ax, cmap=plt.get_cmap(cmap), node_size=node_size, node_color=node_color, edgecolors='#b2b2b2')
edge_color = [d['weight'] for i,j,d in GT.edges(data=True)]
nx.draw_networkx_edges(GT, pos=post, ax=ax, edge_color='k', alpha=0.3, style='solid')

# Random (Louvain)
ax = ax4
node_color = [d['module-louvain'] for n,d in GR.nodes(data=True)]
nx.draw_networkx_nodes(GR, pos=posr, ax=ax, cmap=plt.get_cmap(cmap), node_size=node_size, node_color=node_color, edgecolors='#b2b2b2')
edge_color = [d['weight'] for i,j,d in GR.edges(data=True)]
nx.draw_networkx_edges(GR, pos=posr, ax=ax, edge_color='k', alpha=0.3, style='solid')


# Draw
for ax in axes.flatten():
    ax.tick_params(axis='both', which='both', bottom='off', top='off', left='off', right='off', labelbottom='off', labelleft='off')

ax1.set_title('Original')
ax2.set_title('Metric')
ax3.set_title('Threshold')
ax4.set_title('Random')

ax1.set_ylabel('Louvain', rotation=90, ha='center')

plt.tight_layout()
#plt.show()
plt.savefig('images/{:s}/{:s}/graph_comparison_layout.png'.format(project,normalization), dpi=150)

Quantifying module similarity between Backbone and Original Network¶

Please refer to the index page for a descriptive detail over each formula.

\begin{equation} h_{A \to B} = \frac{ \sum_{i}^{m_A} H(\mathcal{A}_i) }{ m_A \cdot \log_2(m_B) } \quad , \quad h_{B \to A} = \frac{ \sum_{j}^{m_B} H(\mathcal{B}_j) }{ m_B \cdot \log_2(m_A) } \quad . \end{equation}

def calculate_h(A,B):
    # Intersection
    df_I = pd.DataFrame.from_dict( OrderedDict([( i , OrderedDict([(j,iv.intersection(jv)) for j,jv in B.items()]) ) for i,iv in A.items()]), orient='index')
    df_Ic = df_I.applymap(len) # size of sets
    ma, mb = df_I.shape
    #to == 'B'
    sA = pd.Series(A, name='A')
    sAc = sA.apply(len)
    df_BA = df_Ic.divide(sAc.values, axis='index')
    sH = df_BA.apply(entropy, axis=1)
    h_A2B = (1/(ma * math.log(mb,2))) * sH.sum()
    #to =='A'
    sB = pd.Series(B, name='B')
    sBc = sB.apply(len)
    df_AB = df_Ic.divide(sBc.values, axis='columns')
    sH = df_AB.apply(entropy, axis=0)
    h_B2A = (1/(mb * math.log(ma,2))) * sH.sum()
    return h_A2B, h_B2A

\begin{equation} y_{AB} = \frac{ \sum_{i}^{m_A} \sum_{j}^{m_B} P(A_i,B_j) }{ \sqrt{ m_A \cdot m_B} } \quad \text{where} \quad P(A_i,B_j) = \frac{ |A_i \cap B_j| }{ |A_i \cup B_j| } \quad . \end{equation}

def calculate_y(A,B):
    df_I = pd.DataFrame.from_dict( OrderedDict([( i , OrderedDict([(j,iv.intersection(jv)) for j,jv in B.items()]) ) for i,iv in A.items()]), orient='index')
    df_U = pd.DataFrame.from_dict( OrderedDict([( i , OrderedDict([(j,iv.union(jv)) for j,jv in B.items()]) ) for i,iv in A.items()]), orient='index')
    ma, mb = df_I.shape
    df_Ic = df_I.applymap(len)
    df_Uc = df_U.applymap(len)
    df_S = df_Ic.divide( df_Uc )
    return df_S.sum().sum() / math.sqrt(ma * mb)

\begin{equation} J_{A \to B} = \frac{ \sum_i^{m_A} \max_j^{m_B} P(A_i,B_j) }{ m_A } \quad , \quad J_{B \to A}= \frac{ \sum_j^{m_B} \max_i^{m_A} P(A_i,B_j) }{ m_B } \end{equation}

def calculate_j(A,B):
    df_I = pd.DataFrame.from_dict( OrderedDict([( i , OrderedDict([(j,iv.intersection(jv)) for j,jv in B.items()]) ) for i,iv in A.items()]), orient='index')
    df_U = pd.DataFrame.from_dict( OrderedDict([( i , OrderedDict([(j,iv.union(jv)) for j,jv in B.items()]) ) for i,iv in A.items()]), orient='index')
    ma, mb = df_I.shape
    df_Ic = df_I.applymap(len)
    df_Uc = df_U.applymap(len)
    df_S = df_Ic.divide( df_Uc )
    sMaxA, sMaxB = df_S.max(axis=0), df_S.max(axis=1)
    jA2B = sMaxB.sum() / ma
    jB2A = sMaxA.sum() / mb
    return jA2B, jB2A

$h_{A \to B}$ and $h_{B \to A}$¶

print 'A = Meta labels'
print 'B = Original proximity'

GO = generate_original_graph(G)

compute_louvain(GO)

_, _, A, _ = get_graph_variables(GO, module_attribute)
_, _, B, _ = get_graph_variables(GO, 'module-louvain')
hA2B, hB2A = calculate_h(A,B)
print "h_(A->B): {:.2f} h_(B->A): {:.2f}".format(hA2B,hB2A)

A = Meta labels
B = Original proximity
h_(A->B): 0.07 h_(B->A): 0.06

print 'A = Meta labels'
print 'B = Metric Backbone'

GO = generate_original_graph(G)
GM = generate_metric_graph(GO)

compute_louvain(GO)
compute_louvain(GM)

_, _, A, _ = get_graph_variables(GO, module_attribute)
_, _, B, _ = get_graph_variables(GM, 'module-louvain')
hA2B, hB2A = calculate_h(A,B)
print "h_(A->B): {:.2f} h_(B->A): {:.2f}".format(hA2B,hB2A)
print
print 'A = Original proximity'
print 'B = Metric Backbone'

_, _, A, _ = get_graph_variables(GO, 'module-louvain')
_, _, B, _ = get_graph_variables(GM, 'module-louvain')
hA2B, hB2A = calculate_h(A,B)
print "h_(A->B): {:.2f} h_(B->A): {:.2f}".format(hA2B,hB2A)

A = Meta labels
B = Metric Backbone
h_(A->B): 0.15 h_(B->A): 0.08

A = Original proximity
B = Metric Backbone
h_(A->B): 0.10 h_(B->A): 0.03

print 'A = Meta labels'
print 'B = Threshold Backbone'

GO = generate_original_graph(G)
GT = generate_threshold_graph(GO, e_metric)

compute_louvain(GO)
compute_louvain(GT)

_, _, A, _ = get_graph_variables(GO, module_attribute)
_, _, B, _ = get_graph_variables(GT, 'module-louvain')
hA2B, hB2A = calculate_h(A,B)
print "h_(A->B): {:.2f} h_(B->A): {:.2f}".format(hA2B,hB2A)
print
print 'A = Original proximity'
print 'B = Threshold (metric) Backbone'

_, _, A, _ = get_graph_variables(GO, 'module-louvain')
_, _, B, _ = get_graph_variables(GT, 'module-louvain')
hA2B, hB2A = calculate_h(A,B)
print "h_(A->B): {:.2f} h_(B->A): {:.2f}".format(hA2B,hB2A)

A = Meta labels
B = Threshold Backbone
h_(A->B): 0.18 h_(B->A): 0.04

A = Original proximity
B = Threshold (metric) Backbone
h_(A->B): 0.13 h_(B->A): 0.01

print 'A = Meta labels / Original proximity'
print 'B = Random Backbone'

d = {
    ('Meta labels','h_(A->B)'):[],
    ('Meta labels','h_(B->A)'):[],
    ('Original proximity','h_(A->B)'):[],
    ('Original proximity','h_(B->A)'):[],
}

GO = generate_original_graph(G)
compute_louvain(GO)

for GRp in generate_n_random_graphs(GO,100,e_original-e_metric):
    compute_louvain(GRp)

    _, _, A, _ = get_graph_variables(GO, module_attribute)
    _, _, B, _ = get_graph_variables(GR, 'module-louvain')
    hA2B, hB2A = calculate_h(A,B)
    d[('Meta labels','h_(A->B)')].append( hA2B )
    d[('Meta labels','h_(B->A)')].append( hB2A )
    
    _, _, A, _ = get_graph_variables(GO, 'module-louvain')
    _, _, B, _ = get_graph_variables(GR, 'module-louvain')
    hA2B, hB2A = calculate_h(A,B)
    d[('Original proximity','h_(A->B)')].append( hA2B )
    d[('Original proximity','h_(B->A)')].append( hB2A )
    
df = pd.DataFrame.from_dict(d)
#print df
df = df.apply(['mean','std'], axis=0).T
display(df)

A = Meta labels / Original proximity
B = Random Backbone

$y_{AB}$ values¶

Metric / Ultrametric Backbone¶

print 'A = Meta labels'
print 'B = Original proximity'

GOp = generate_original_graph(G)
compute_louvain(GOp)

_, _, A, _ = get_graph_variables(GOp, module_attribute)
_, _, B, _ = get_graph_variables(GOp, 'module-louvain')
print "y_(AB): {:.3f}".format(calculate_y(A,B))

A = Meta labels
B = Original proximity
y_(AB): 0.880

GO = generate_original_graph(G)
GM = generate_metric_graph(GO)

compute_louvain(GO)
compute_louvain(GM)

print 'A = Meta labels'
print 'B = Metric Backbone'

_, _, A, _ = get_graph_variables(GO, module_attribute)
_, _, B, _ = get_graph_variables(GM, 'module-louvain')
print "y_(AB): {:.3f}".format(calculate_y(A,B))
print
print 'A = Original Graph'
print 'B = Metric Backbone'

_, _, A, _ = get_graph_variables(GO, 'module-louvain')
_, _, B, _ = get_graph_variables(GM, 'module-louvain')
print "y_(AB): {:.3f}".format(calculate_y(A,B))

A = Meta labels
B = Metric Backbone
y_(AB): 0.691

A = Original Graph
B = Metric Backbone
y_(AB): 0.754

GO = generate_original_graph(G)
GT = generate_threshold_graph(GO, e_metric)

compute_louvain(GO)
compute_louvain(GT)

print 'A = Meta labels'
print 'B = Threshold (metric) Backbone'

_, _, A, _ = get_graph_variables(GO, module_attribute)
_, _, B, _ = get_graph_variables(GT, 'module-louvain')
print "y_(AB): {:.3f}".format(calculate_y(A,B))
print
print 'A = Original proximity'
print 'B = Threshold Backbone'

_, _, A, _ = get_graph_variables(GO, 'module-louvain')
_, _, B, _ = get_graph_variables(GT, 'module-louvain')
print "y_(AB): {:.3f}".format(calculate_y(A,B))

A = Meta labels
B = Threshold (metric) Backbone
y_(AB): 0.544

A = Original proximity
B = Threshold Backbone
y_(AB): 0.595

print 'A = Meta labels / Original proximity'
print 'B = Random Backbone'

d = {
    ('Meta label','y_(AB)'):[],
    ('Original proximity','y_(AB)'):[],
}

GO = generate_original_graph(G)
compute_louvain(GO)

for GRp in generate_n_random_graphs(GO,100,e_original-e_metric):
    compute_louvain(GRp)
    
    _, _, A, _ = get_graph_variables(GO, module_attribute)
    _, _, B, _ = get_graph_variables(GR, 'module-louvain')
    d[('Meta label','y_(AB)')].append( calculate_y(A,B) )
    
    _, _, A, _ = get_graph_variables(GO, 'module-louvain')
    _, _, B, _ = get_graph_variables(GR, 'module-louvain')
    d[('Original proximity','y_(AB)')].append( calculate_y(A,B) )
    
df = pd.DataFrame.from_dict(d)
#print df
df = df.apply(['mean','std'], axis=0).T
display(df)

A = Meta labels / Original proximity
B = Random Backbone

$J_{A \to B}$ and $J_{B \to A}$ values¶

print 'A = Meta labels'
print 'B = Original proximity'

GO = generate_original_graph(G)

compute_louvain(GO)

_, _, A, _ = get_graph_variables(GO, module_attribute)
_, _, B, _ = get_graph_variables(GO, 'module-louvain')
jA2B, jB2A = calculate_j(A,B)
print "J_(A->B): {:.2f} J_(B->A): {:.2f}".format(jA2B,jB2A)

A = Meta labels
B = Original proximity
J_(A->B): 0.89 J_(B->A): 0.81

print 'A = Meta labels'
print 'B = Metric Backbone'

GO = generate_original_graph(G)
GM = generate_metric_graph(GO)

compute_louvain(GO)
compute_louvain(GM)

_, _, A, _ = get_graph_variables(GO, module_attribute)
_, _, B, _ = get_graph_variables(GM, 'module-louvain')
jA2B, jB2A = calculate_j(A,B)
print "J_(A->B): {:.2f} J_(B->A): {:.2f}".format(jA2B,jB2A)
print
print 'A = Original proximity'
print 'B = Metric Backbone'

_, _, A, _ = get_graph_variables(GO, 'module-louvain')
_, _, B, _ = get_graph_variables(GM, 'module-louvain')
jA2B, jB2A = calculate_j(A,B)
print "J_(A->B): {:.2f} J_(B->A): {:.2f}".format(jA2B,jB2A)

A = Meta labels
B = Metric Backbone
J_(A->B): 0.75 J_(B->A): 0.47

A = Original proximity
B = Metric Backbone
J_(A->B): 0.82 J_(B->A): 0.56

print 'A = Meta labels'
print 'B = Threshold Backbone'

GO = generate_original_graph(G)
GT = generate_threshold_graph(GO, e_metric)

compute_louvain(GO)
compute_louvain(GT)

_, _, A, _ = get_graph_variables(GO, module_attribute)
_, _, B, _ = get_graph_variables(GT, 'module-louvain')
jA2B, jB2A = calculate_j(A,B)
print "J_(A->B): {:.2f} J_(B->A): {:.2f}".format(jA2B,jB2A)
print 
print 'A = Original proximity'
print 'B = Threshold Backbone'

_, _, A, _ = get_graph_variables(GO, 'module-louvain')
_, _, B, _ = get_graph_variables(GT, 'module-louvain')
jA2B, jB2A = calculate_j(A,B)
print "J_(A->B): {:.2f} J_(B->A): {:.2f}".format(jA2B,jB2A)

A = Meta labels
B = Threshold Backbone
J_(A->B): 0.67 J_(B->A): 0.29

A = Original proximity
B = Threshold Backbone
J_(A->B): 0.75 J_(B->A): 0.35

print 'A = Meta labels -> Louvain, InfoMap'
print 'B = Random (metric) Proximity'

d = {
    ('Meta label','J_(A->B)'):[],
    ('Meta label','J_(B->A)'):[],
    #
    ('Original proximity','J_(A->B)'):[],
    ('Original proximity','J_(B->A)'):[],
}

GO = generate_original_graph(G)

compute_louvain(GO)

for GRp in generate_n_random_graphs(GO,100,e_original-e_metric):
    compute_louvain(GR)

    _, _, A, _ = get_graph_variables(GO, module_attribute)
    _, _, B, _ = get_graph_variables(GR, 'module-louvain')
    jA2B, jB2A = calculate_j(A,B)
    d[('Meta label','J_(A->B)')].append( jA2B )
    d[('Meta label','J_(B->A)')].append( jB2A )
    
    _, _, A, _ = get_graph_variables(GO, 'module-louvain')
    _, _, B, _ = get_graph_variables(GR, 'module-louvain')
    jA2B, jB2A = calculate_j(A,B)
    d[('Original proximity','J_(A->B)')].append( jA2B )
    d[('Original proximity','J_(B->A)')].append( jB2A )
    
df = pd.DataFrame.from_dict(d)
#print df
df = df.apply(['mean','std'], axis=0).T
display(df)

A = Meta labels -> Louvain, InfoMap
B = Random (metric) Proximity

Clusim¶

Paper: A.J. Gates, I.B. Wood, W.P. Hetrick, Y. Ahn. (2017). On comparing clusterings: an element-centric framework unifies overlaps and hierarchy. arXiv preprint arXiv:1706.06136

print 'A = Meta labels'
print 'B = Original Proximity'

GO = generate_original_graph(G)
compute_louvain(GO)

_, _, A, _ = get_graph_variables(GO, module_attribute)
_, _, B, _ = get_graph_variables(GO, 'module-louvain')
CA = Clustering(clus2elm_dict=A)
CB = Clustering(clus2elm_dict=B)
print "Clusim: {:.2f}".format( element_sim(CA,CB) )

A = Meta labels
B = Original Proximity
Clusim: 0.87

print 'A = Meta labels'
print 'B = Metric Backbone'

GO = generate_original_graph(G)
GM = generate_metric_graph(GO)

compute_louvain(GO)
compute_louvain(GM)

_, _, A, _ = get_graph_variables(GO, module_attribute)
_, _, B, _ = get_graph_variables(GM, 'module-louvain')
CA = Clustering(clus2elm_dict=A)
CB = Clustering(clus2elm_dict=B)
print "Clusim: {:.2f}".format( element_sim(CA,CB) )

print 'A = Original Proximity'
print 'B = Metric Backbone'

_, _, A, _ = get_graph_variables(GO, 'module-louvain')
_, _, B, _ = get_graph_variables(GM, 'module-louvain')
CA = Clustering(clus2elm_dict=A)
CB = Clustering(clus2elm_dict=B)
print "Clusim: {:.2f}".format( element_sim(CA,CB) )

A = Meta labels
B = Metric Backbone
Clusim: 0.66
A = Original Proximity
B = Metric Backbone
Clusim: 0.72

print 'A = Meta labels'
print 'B = Threshold Backbone'

GO = generate_original_graph(G)
GT = generate_threshold_graph(GO, e_metric)

compute_louvain(GO)
compute_louvain(GT)

_, _, A, _ = get_graph_variables(GO, module_attribute)
_, _, B, _ = get_graph_variables(GT, 'module-louvain')
CA = Clustering(clus2elm_dict=A)
CB = Clustering(clus2elm_dict=B)
print "Louvain: {:.2f}".format( element_sim(CA,CB) )

print 'A = Original Proximity'
print 'B = Threshold Backbone'

_, _, A, _ = get_graph_variables(GO, 'module-louvain')
_, _, B, _ = get_graph_variables(GT, 'module-louvain')
CA = Clustering(clus2elm_dict=A)
CB = Clustering(clus2elm_dict=B)
print "Louvain: {:.2f}".format( element_sim(CA,CB) )

A = Meta labels
B = Threshold Backbone
Louvain: 0.56
A = Original Proximity
B = Threshold Backbone
Louvain: 0.62

print 'A = Meta labels'
print 'B = Random Backbone'

d = {
    ('Meta label','AB'):[],
    ('Original proximity','AB'):[],
}

GO = generate_original_graph(G)
compute_louvain(GO)

for GR in generate_n_random_graphs(GO,100,e_original-e_metric):
    compute_louvain(GR)

    _, _, A, _ = get_graph_variables(GO, module_attribute)
    _, _, B, _ = get_graph_variables(GR, 'module-louvain')
    CA = Clustering(clus2elm_dict=A)
    CB = Clustering(clus2elm_dict=B)
    d[('Meta label','AB')].append( element_sim(CA,CB) )
    
    _, _, A, _ = get_graph_variables(GO, 'module-louvain')
    _, _, B, _ = get_graph_variables(GR, 'module-louvain')
    CA = Clustering(clus2elm_dict=A)
    CB = Clustering(clus2elm_dict=B)
    d[('Original proximity','AB')].append( element_sim(CA,CB) )
    
df = pd.DataFrame.from_dict(d)
#print df
df = df.apply(['mean','std'], axis=0).T
display(df)

A = Meta labels
B = Random Backbone

		mean	std
Meta labels	h_(A->B)	0.31	1.12e-16
Meta labels	h_(B->A)	0.07	8.37e-17
Original proximity	h_(A->B)	0.29	3.35e-16
Original proximity	h_(B->A)	0.08	1.95e-16

		mean	std
Meta label	y_(AB)	0.38	5.58e-16
Original proximity	y_(AB)	0.37	3.35e-16

		mean	std
Meta label	J_(A->B)	0.36	9.48e-16
Meta label	J_(B->A)	0.14	2.23e-16
Original proximity	J_(A->B)	0.33	2.23e-16
Original proximity	J_(B->A)	0.13	1.12e-16