From bffb5a6c064c49a87f83435368a4f8f891b4e46e Mon Sep 17 00:00:00 2001 From: Yuren Hao Date: Tue, 24 Feb 2026 08:53:02 +0000 Subject: Add edge connectivity analysis, use official network_evaluation scripts - Switch accuracy computation to official network_evaluation scripts (clustering_accuracy with graph-tool NMI/AMI and sklearn ARI) - Add minimum edge cut / log10(n) and well-connectedness stats - Add edge connectivity boxplots and well-connected fraction bar chart - Add "What I Learned and Open Questions" section to discussion - Fix author name and minor LaTeX issues --- scripts/compute_stats.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) (limited to 'scripts/compute_stats.py') diff --git a/scripts/compute_stats.py b/scripts/compute_stats.py index 2e88252..6f21f63 100644 --- a/scripts/compute_stats.py +++ b/scripts/compute_stats.py @@ -8,6 +8,7 @@ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) import numpy as np import pandas as pd +import igraph as ig from config import NETWORKS, METHODS, RESULTS_DIR from load_data import load_edge_list, load_communities @@ -50,7 +51,8 @@ def compute_cluster_stats(network_name, method_name, com_path): # Per-cluster stats cluster_details = [] - for com_id, nodes in sorted(non_singleton_coms.items()): + total_clusters = len(non_singleton_coms) + for ci, (com_id, nodes) in enumerate(sorted(non_singleton_coms.items())): n = len(nodes) # Internal edges m_internal = 0 @@ -67,6 +69,21 @@ def compute_cluster_stats(network_name, method_name, com_path): degree_density = m_internal / n if n > 0 else 0.0 conductance = c_boundary / (2 * m_internal + c_boundary) if (2 * m_internal + c_boundary) > 0 else 0.0 + # Minimum edge cut via igraph + mincut = 0 + if n >= 2 and m_internal >= 1: + node_list = sorted(nodes) + local_map = {nd: i for i, nd in enumerate(node_list)} + edges = [] + for nd in node_list: + for nbr in neighbors.get(nd, set()): + if nbr in nodes and local_map[nd] < local_map[nbr]: + edges.append((local_map[nd], local_map[nbr])) + sg = ig.Graph(n=n, edges=edges, directed=False) + mincut = sg.mincut().value + + mincut_over_log10n = mincut / np.log10(n) if n > 1 else 0.0 + cluster_details.append({ "com_id": com_id, "n": n, @@ -75,8 +92,13 @@ def compute_cluster_stats(network_name, method_name, com_path): "edge_density": edge_density, "degree_density": degree_density, "conductance": conductance, + "mincut": int(mincut), + "mincut_over_log10n": mincut_over_log10n, }) + if (ci + 1) % 500 == 0: + print(f" ... {ci+1}/{total_clusters} clusters processed") + # Per-node mixing parameter mixing_params = [] for node in all_nodes: @@ -107,6 +129,12 @@ def compute_cluster_stats(network_name, method_name, com_path): "median_edge_density": np.median([d["edge_density"] for d in cluster_details]) if cluster_details else 0, "mean_conductance": np.mean([d["conductance"] for d in cluster_details]) if cluster_details else 0, "mean_degree_density": np.mean([d["degree_density"] for d in cluster_details]) if cluster_details else 0, + "mean_mincut": np.mean([d["mincut"] for d in cluster_details]) if cluster_details else 0, + "median_mincut": np.median([d["mincut"] for d in cluster_details]) if cluster_details else 0, + "mean_mincut_over_log10n": np.mean([d["mincut_over_log10n"] for d in cluster_details]) if cluster_details else 0, + "n_connected": sum(1 for d in cluster_details if d["mincut"] > 0), + "n_disconnected": sum(1 for d in cluster_details if d["mincut"] == 0), + "n_wellconnected": sum(1 for d in cluster_details if d["mincut"] > np.log10(d["n"])), } return summary, cluster_details, mixing_params -- cgit v1.2.3