diff options
| author | YurenHao0426 <blackhao0426@gmail.com> | 2026-02-24 08:40:49 +0000 |
|---|---|---|
| committer | YurenHao0426 <blackhao0426@gmail.com> | 2026-02-24 08:40:49 +0000 |
| commit | 8f63cf9f41bbdb8d55cd4679872d2b4ae2129324 (patch) | |
| tree | ab5c95888849e854f2346db856c7edece7c8b8a7 /scripts/run_infomap.py | |
EC-SBM community detection analysis: full pipeline and writeup
Implement community detection on 3 EC-SBM networks (polblogs, topology,
internet_as) using 5 methods (Leiden-Mod, Leiden-CPM at 0.1 and 0.01,
Infomap, graph-tool SBM). Compute AMI/ARI/NMI accuracy, cluster statistics,
and generate figures and LaTeX report.
Diffstat (limited to 'scripts/run_infomap.py')
| -rw-r--r-- | scripts/run_infomap.py | 61 |
1 files changed, 61 insertions, 0 deletions
diff --git a/scripts/run_infomap.py b/scripts/run_infomap.py new file mode 100644 index 0000000..e53c77d --- /dev/null +++ b/scripts/run_infomap.py @@ -0,0 +1,61 @@ +"""Run Infomap community detection.""" + +import argparse +import sys +import os +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +import infomap + +from config import NETWORKS, RESULTS_DIR, SEED +from load_data import load_edge_list, save_communities + + +def run_infomap(network_name): + net = NETWORKS[network_name] + edge_df = load_edge_list(net["edge_tsv"]) + + # Build node mapping + import pandas as pd + all_nodes = pd.unique(edge_df[["src", "tgt"]].values.ravel("K")) + name_to_idx = {name: i for i, name in enumerate(all_nodes)} + idx_to_name = {i: name for name, i in name_to_idx.items()} + + n = len(all_nodes) + print(f" Network: {n} nodes, {len(edge_df)} edges") + + im = infomap.Infomap("--two-level --flow-model undirected --seed {}".format(SEED)) + + for _, row in edge_df.iterrows(): + im.add_link(name_to_idx[row["src"]], name_to_idx[row["tgt"]]) + + im.run() + + print(f" Found {im.num_top_modules} top modules, codelength={im.codelength:.4f}") + + # Extract communities + node2com = {} + for node_id in im.tree: + if node_id.is_leaf: + node2com[idx_to_name[node_id.node_id]] = str(node_id.module_id) + + # Assign singleton communities to any nodes not in infomap output + max_com = max(int(c) for c in node2com.values()) + 1 + for idx in range(n): + name = idx_to_name[idx] + if name not in node2com: + node2com[name] = str(max_com) + max_com += 1 + + out_path = os.path.join(RESULTS_DIR, network_name, "infomap", "com.tsv") + save_communities(node2com, out_path) + print(f" Saved to {out_path}") + return node2com + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--network", required=True) + args = parser.parse_args() + print(f"Running Infomap on {args.network}...") + run_infomap(args.network) |
