diff options
Diffstat (limited to 'scripts/run_infomap.py')
| -rw-r--r-- | scripts/run_infomap.py | 61 |
1 files changed, 61 insertions, 0 deletions
diff --git a/scripts/run_infomap.py b/scripts/run_infomap.py new file mode 100644 index 0000000..e53c77d --- /dev/null +++ b/scripts/run_infomap.py @@ -0,0 +1,61 @@ +"""Run Infomap community detection.""" + +import argparse +import sys +import os +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +import infomap + +from config import NETWORKS, RESULTS_DIR, SEED +from load_data import load_edge_list, save_communities + + +def run_infomap(network_name): + net = NETWORKS[network_name] + edge_df = load_edge_list(net["edge_tsv"]) + + # Build node mapping + import pandas as pd + all_nodes = pd.unique(edge_df[["src", "tgt"]].values.ravel("K")) + name_to_idx = {name: i for i, name in enumerate(all_nodes)} + idx_to_name = {i: name for name, i in name_to_idx.items()} + + n = len(all_nodes) + print(f" Network: {n} nodes, {len(edge_df)} edges") + + im = infomap.Infomap("--two-level --flow-model undirected --seed {}".format(SEED)) + + for _, row in edge_df.iterrows(): + im.add_link(name_to_idx[row["src"]], name_to_idx[row["tgt"]]) + + im.run() + + print(f" Found {im.num_top_modules} top modules, codelength={im.codelength:.4f}") + + # Extract communities + node2com = {} + for node_id in im.tree: + if node_id.is_leaf: + node2com[idx_to_name[node_id.node_id]] = str(node_id.module_id) + + # Assign singleton communities to any nodes not in infomap output + max_com = max(int(c) for c in node2com.values()) + 1 + for idx in range(n): + name = idx_to_name[idx] + if name not in node2com: + node2com[name] = str(max_com) + max_com += 1 + + out_path = os.path.join(RESULTS_DIR, network_name, "infomap", "com.tsv") + save_communities(node2com, out_path) + print(f" Saved to {out_path}") + return node2com + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--network", required=True) + args = parser.parse_args() + print(f"Running Infomap on {args.network}...") + run_infomap(args.network) |
