diff options
| author | YurenHao0426 <blackhao0426@gmail.com> | 2026-02-24 08:40:49 +0000 |
|---|---|---|
| committer | YurenHao0426 <blackhao0426@gmail.com> | 2026-02-24 08:40:49 +0000 |
| commit | 8f63cf9f41bbdb8d55cd4679872d2b4ae2129324 (patch) | |
| tree | ab5c95888849e854f2346db856c7edece7c8b8a7 /scripts/run_leiden.py | |
EC-SBM community detection analysis: full pipeline and writeup
Implement community detection on 3 EC-SBM networks (polblogs, topology,
internet_as) using 5 methods (Leiden-Mod, Leiden-CPM at 0.1 and 0.01,
Infomap, graph-tool SBM). Compute AMI/ARI/NMI accuracy, cluster statistics,
and generate figures and LaTeX report.
Diffstat (limited to 'scripts/run_leiden.py')
| -rw-r--r-- | scripts/run_leiden.py | 62 |
1 files changed, 62 insertions, 0 deletions
diff --git a/scripts/run_leiden.py b/scripts/run_leiden.py new file mode 100644 index 0000000..375b6bb --- /dev/null +++ b/scripts/run_leiden.py @@ -0,0 +1,62 @@ +"""Run Leiden community detection (modularity or CPM).""" + +import argparse +import sys +import os +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +import igraph as ig +import leidenalg + +from config import NETWORKS, RESULTS_DIR, SEED +from load_data import load_edge_list, build_igraph, save_communities + + +def run_leiden(network_name, method_name, quality, resolution=None): + net = NETWORKS[network_name] + edge_df = load_edge_list(net["edge_tsv"]) + g, name_to_idx, idx_to_name = build_igraph(edge_df) + + print(f" Graph: {g.vcount()} nodes, {g.ecount()} edges") + + if quality == "modularity": + partition = leidenalg.find_partition( + g, leidenalg.ModularityVertexPartition, seed=SEED + ) + elif quality == "cpm": + partition = leidenalg.find_partition( + g, leidenalg.CPMVertexPartition, + resolution_parameter=resolution, seed=SEED + ) + else: + raise ValueError(f"Unknown quality function: {quality}") + + print(f" Found {len(partition)} communities, modularity={partition.modularity:.4f}") + + # Convert partition to node2com dict + node2com = {} + for comm_id, members in enumerate(partition): + for idx in members: + node2com[idx_to_name[idx]] = str(comm_id) + + out_path = os.path.join(RESULTS_DIR, network_name, method_name, "com.tsv") + save_communities(node2com, out_path) + print(f" Saved to {out_path}") + return node2com + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--network", required=True) + parser.add_argument("--method", required=True, + choices=["leiden_mod", "leiden_cpm_01", "leiden_cpm_001"]) + args = parser.parse_args() + + method_configs = { + "leiden_mod": {"quality": "modularity"}, + "leiden_cpm_01": {"quality": "cpm", "resolution": 0.1}, + "leiden_cpm_001": {"quality": "cpm", "resolution": 0.01}, + } + cfg = method_configs[args.method] + print(f"Running {args.method} on {args.network}...") + run_leiden(args.network, args.method, **cfg) |
