1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
|
"""Run Infomap community detection."""
import argparse
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
import infomap
from config import NETWORKS, RESULTS_DIR, SEED
from load_data import load_edge_list, save_communities
def run_infomap(network_name):
net = NETWORKS[network_name]
edge_df = load_edge_list(net["edge_tsv"])
# Build node mapping
import pandas as pd
all_nodes = pd.unique(edge_df[["src", "tgt"]].values.ravel("K"))
name_to_idx = {name: i for i, name in enumerate(all_nodes)}
idx_to_name = {i: name for name, i in name_to_idx.items()}
n = len(all_nodes)
print(f" Network: {n} nodes, {len(edge_df)} edges")
im = infomap.Infomap("--two-level --flow-model undirected --seed {}".format(SEED))
for _, row in edge_df.iterrows():
im.add_link(name_to_idx[row["src"]], name_to_idx[row["tgt"]])
im.run()
print(f" Found {im.num_top_modules} top modules, codelength={im.codelength:.4f}")
# Extract communities
node2com = {}
for node_id in im.tree:
if node_id.is_leaf:
node2com[idx_to_name[node_id.node_id]] = str(node_id.module_id)
# Assign singleton communities to any nodes not in infomap output
max_com = max(int(c) for c in node2com.values()) + 1
for idx in range(n):
name = idx_to_name[idx]
if name not in node2com:
node2com[name] = str(max_com)
max_com += 1
out_path = os.path.join(RESULTS_DIR, network_name, "infomap", "com.tsv")
save_communities(node2com, out_path)
print(f" Saved to {out_path}")
return node2com
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--network", required=True)
args = parser.parse_args()
print(f"Running Infomap on {args.network}...")
run_infomap(args.network)
|