summaryrefslogtreecommitdiff
path: root/scripts/compute_stats.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/compute_stats.py')
-rw-r--r--scripts/compute_stats.py30
1 files changed, 29 insertions, 1 deletions
diff --git a/scripts/compute_stats.py b/scripts/compute_stats.py
index 2e88252..6f21f63 100644
--- a/scripts/compute_stats.py
+++ b/scripts/compute_stats.py
@@ -8,6 +8,7 @@ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
import numpy as np
import pandas as pd
+import igraph as ig
from config import NETWORKS, METHODS, RESULTS_DIR
from load_data import load_edge_list, load_communities
@@ -50,7 +51,8 @@ def compute_cluster_stats(network_name, method_name, com_path):
# Per-cluster stats
cluster_details = []
- for com_id, nodes in sorted(non_singleton_coms.items()):
+ total_clusters = len(non_singleton_coms)
+ for ci, (com_id, nodes) in enumerate(sorted(non_singleton_coms.items())):
n = len(nodes)
# Internal edges
m_internal = 0
@@ -67,6 +69,21 @@ def compute_cluster_stats(network_name, method_name, com_path):
degree_density = m_internal / n if n > 0 else 0.0
conductance = c_boundary / (2 * m_internal + c_boundary) if (2 * m_internal + c_boundary) > 0 else 0.0
+ # Minimum edge cut via igraph
+ mincut = 0
+ if n >= 2 and m_internal >= 1:
+ node_list = sorted(nodes)
+ local_map = {nd: i for i, nd in enumerate(node_list)}
+ edges = []
+ for nd in node_list:
+ for nbr in neighbors.get(nd, set()):
+ if nbr in nodes and local_map[nd] < local_map[nbr]:
+ edges.append((local_map[nd], local_map[nbr]))
+ sg = ig.Graph(n=n, edges=edges, directed=False)
+ mincut = sg.mincut().value
+
+ mincut_over_log10n = mincut / np.log10(n) if n > 1 else 0.0
+
cluster_details.append({
"com_id": com_id,
"n": n,
@@ -75,8 +92,13 @@ def compute_cluster_stats(network_name, method_name, com_path):
"edge_density": edge_density,
"degree_density": degree_density,
"conductance": conductance,
+ "mincut": int(mincut),
+ "mincut_over_log10n": mincut_over_log10n,
})
+ if (ci + 1) % 500 == 0:
+ print(f" ... {ci+1}/{total_clusters} clusters processed")
+
# Per-node mixing parameter
mixing_params = []
for node in all_nodes:
@@ -107,6 +129,12 @@ def compute_cluster_stats(network_name, method_name, com_path):
"median_edge_density": np.median([d["edge_density"] for d in cluster_details]) if cluster_details else 0,
"mean_conductance": np.mean([d["conductance"] for d in cluster_details]) if cluster_details else 0,
"mean_degree_density": np.mean([d["degree_density"] for d in cluster_details]) if cluster_details else 0,
+ "mean_mincut": np.mean([d["mincut"] for d in cluster_details]) if cluster_details else 0,
+ "median_mincut": np.median([d["mincut"] for d in cluster_details]) if cluster_details else 0,
+ "mean_mincut_over_log10n": np.mean([d["mincut_over_log10n"] for d in cluster_details]) if cluster_details else 0,
+ "n_connected": sum(1 for d in cluster_details if d["mincut"] > 0),
+ "n_disconnected": sum(1 for d in cluster_details if d["mincut"] == 0),
+ "n_wellconnected": sum(1 for d in cluster_details if d["mincut"] > np.log10(d["n"])),
}
return summary, cluster_details, mixing_params