diff options
| author | Yuren Hao <blackhao0426@gmail.com> | 2026-02-24 08:53:02 +0000 |
|---|---|---|
| committer | YurenHao0426 <blackhao0426@gmail.com> | 2026-02-24 08:53:02 +0000 |
| commit | bffb5a6c064c49a87f83435368a4f8f891b4e46e (patch) | |
| tree | 90c6ce4dd373a91331d32c585e26e08bd2d016c9 /scripts/compute_stats.py | |
| parent | 8f63cf9f41bbdb8d55cd4679872d2b4ae2129324 (diff) | |
- Switch accuracy computation to official network_evaluation scripts
(clustering_accuracy with graph-tool NMI/AMI and sklearn ARI)
- Add minimum edge cut / log10(n) and well-connectedness stats
- Add edge connectivity boxplots and well-connected fraction bar chart
- Add "What I Learned and Open Questions" section to discussion
- Fix author name and minor LaTeX issues
Diffstat (limited to 'scripts/compute_stats.py')
| -rw-r--r-- | scripts/compute_stats.py | 30 |
1 files changed, 29 insertions, 1 deletions
diff --git a/scripts/compute_stats.py b/scripts/compute_stats.py index 2e88252..6f21f63 100644 --- a/scripts/compute_stats.py +++ b/scripts/compute_stats.py @@ -8,6 +8,7 @@ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) import numpy as np import pandas as pd +import igraph as ig from config import NETWORKS, METHODS, RESULTS_DIR from load_data import load_edge_list, load_communities @@ -50,7 +51,8 @@ def compute_cluster_stats(network_name, method_name, com_path): # Per-cluster stats cluster_details = [] - for com_id, nodes in sorted(non_singleton_coms.items()): + total_clusters = len(non_singleton_coms) + for ci, (com_id, nodes) in enumerate(sorted(non_singleton_coms.items())): n = len(nodes) # Internal edges m_internal = 0 @@ -67,6 +69,21 @@ def compute_cluster_stats(network_name, method_name, com_path): degree_density = m_internal / n if n > 0 else 0.0 conductance = c_boundary / (2 * m_internal + c_boundary) if (2 * m_internal + c_boundary) > 0 else 0.0 + # Minimum edge cut via igraph + mincut = 0 + if n >= 2 and m_internal >= 1: + node_list = sorted(nodes) + local_map = {nd: i for i, nd in enumerate(node_list)} + edges = [] + for nd in node_list: + for nbr in neighbors.get(nd, set()): + if nbr in nodes and local_map[nd] < local_map[nbr]: + edges.append((local_map[nd], local_map[nbr])) + sg = ig.Graph(n=n, edges=edges, directed=False) + mincut = sg.mincut().value + + mincut_over_log10n = mincut / np.log10(n) if n > 1 else 0.0 + cluster_details.append({ "com_id": com_id, "n": n, @@ -75,8 +92,13 @@ def compute_cluster_stats(network_name, method_name, com_path): "edge_density": edge_density, "degree_density": degree_density, "conductance": conductance, + "mincut": int(mincut), + "mincut_over_log10n": mincut_over_log10n, }) + if (ci + 1) % 500 == 0: + print(f" ... {ci+1}/{total_clusters} clusters processed") + # Per-node mixing parameter mixing_params = [] for node in all_nodes: @@ -107,6 +129,12 @@ def compute_cluster_stats(network_name, method_name, com_path): "median_edge_density": np.median([d["edge_density"] for d in cluster_details]) if cluster_details else 0, "mean_conductance": np.mean([d["conductance"] for d in cluster_details]) if cluster_details else 0, "mean_degree_density": np.mean([d["degree_density"] for d in cluster_details]) if cluster_details else 0, + "mean_mincut": np.mean([d["mincut"] for d in cluster_details]) if cluster_details else 0, + "median_mincut": np.median([d["mincut"] for d in cluster_details]) if cluster_details else 0, + "mean_mincut_over_log10n": np.mean([d["mincut_over_log10n"] for d in cluster_details]) if cluster_details else 0, + "n_connected": sum(1 for d in cluster_details if d["mincut"] > 0), + "n_disconnected": sum(1 for d in cluster_details if d["mincut"] == 0), + "n_wellconnected": sum(1 for d in cluster_details if d["mincut"] > np.log10(d["n"])), } return summary, cluster_details, mixing_params |
