summaryrefslogtreecommitdiff
path: root/experiments/run_wikics_paper_setup.py
diff options
context:
space:
mode:
authorYurenHao0426 <blackhao0426@gmail.com>2026-05-04 23:05:16 -0500
committerYurenHao0426 <blackhao0426@gmail.com>2026-05-04 23:05:16 -0500
commitbd9333eda60a9029a198acaeacb1eca4312bd1e8 (patch)
tree7544c347b7ac4e8629fa1cc0fcf341d48cb69e2e /experiments/run_wikics_paper_setup.py
Initial release: GRAFT (KAFT) — NeurIPS 2026 submission code
Topology-factorized Jacobian-aligned feedback for deep GNNs. Includes: - src/: GraphGrAPETrainer (KAFT) + BP / DFA / DFA-GNN / VanillaGrAPE baselines + multi-probe alignment estimator + dataset / sparse-mm utilities. - experiments/: 19 runners reproducing every figure / table in the paper. - figures/: 4 generators + the 4 PDFs cited in the report. - paper/: NeurIPS .tex and consolidated experiments_master notes. Smoke test: 50-epoch Cora GCN L=4 gives BP 77.3% / KAFT 79.0%.
Diffstat (limited to 'experiments/run_wikics_paper_setup.py')
-rw-r--r--experiments/run_wikics_paper_setup.py146
1 files changed, 146 insertions, 0 deletions
diff --git a/experiments/run_wikics_paper_setup.py b/experiments/run_wikics_paper_setup.py
new file mode 100644
index 0000000..a2bf879
--- /dev/null
+++ b/experiments/run_wikics_paper_setup.py
@@ -0,0 +1,146 @@
+#!/usr/bin/env python3
+"""H15 WikiCS paper-setup depth sweep — Wikipedia academic articles.
+~11.7K nodes, avg deg ~4.1, 10-class, undirected. Sparse + few-class
+fits GRAFT's regime profile. Test BP vs GRAFT at L ∈ {3,5,10,14,20} × 5 seeds.
+"""
+import sys, time
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch_geometric.datasets import WikiCS
+from torch_geometric.nn import GCNConv
+from torch_geometric.utils import add_self_loops, degree, to_undirected
+
+sys.path.insert(0, '/home/yurenh2/graph-grape')
+from src.trainers import GraphGrAPETrainer
+
+device = torch.device('cuda:0') # CUDA_VISIBLE_DEVICES=2 maps cuda:0 → physical GPU 2
+
+
+def build_A_hat(edge_index, N):
+ edge_index, _ = add_self_loops(edge_index, num_nodes=N)
+ row, col = edge_index
+ deg = degree(row, num_nodes=N, dtype=torch.float)
+ dis = deg.pow(-0.5); dis[dis == float('inf')] = 0
+ return torch.sparse_coo_tensor(edge_index, dis[row]*dis[col], (N, N)).coalesce()
+
+
+def build_row_norm(edge_index, N):
+ ei, _ = add_self_loops(edge_index, num_nodes=N)
+ row, col = ei
+ deg = degree(row, num_nodes=N, dtype=torch.float).clamp(min=1)
+ A_row = torch.sparse_coo_tensor(ei, 1.0/deg[row], (N,N)).coalesce()
+ A_row_T = torch.sparse_coo_tensor(ei.flip(0), 1.0/deg[col], (N,N)).coalesce()
+ return A_row, A_row_T
+
+
+def paper_split(N, y, seed, train_frac=0.05, n_val=500):
+ g = torch.Generator().manual_seed(seed)
+ train_mask = torch.zeros(N, dtype=torch.bool)
+ val_mask = torch.zeros(N, dtype=torch.bool)
+ test_mask = torch.zeros(N, dtype=torch.bool)
+ C = int(y.max()) + 1
+ for c in range(C):
+ idx = (y == c).nonzero().flatten()
+ idx = idx[torch.randperm(idx.size(0), generator=g)]
+ n_tr = max(1, int(round(train_frac * idx.size(0))))
+ train_mask[idx[:n_tr]] = True
+ remaining = (~train_mask).nonzero().flatten()
+ remaining = remaining[torch.randperm(remaining.size(0), generator=g)]
+ val_mask[remaining[:n_val]] = True
+ test_mask[remaining[n_val:]] = True
+ return train_mask, val_mask, test_mask
+
+
+class GCN(nn.Module):
+ def __init__(self, in_dim, hidden, out_dim, L):
+ super().__init__()
+ self.convs = nn.ModuleList([GCNConv(in_dim if i==0 else hidden,
+ hidden if i<L-1 else out_dim) for i in range(L)])
+
+ def forward(self, x, ei):
+ for l, c in enumerate(self.convs):
+ x = c(x, ei)
+ if l < len(self.convs)-1:
+ x = F.relu(x)
+ return x
+
+
+def bp_one(L, seed, d, tm, vm, tem, epochs=200, lr=0.01, hidden=64):
+ torch.manual_seed(seed); np.random.seed(seed); torch.cuda.manual_seed_all(seed)
+ m = GCN(d.x.shape[1], hidden, int(d.y.max())+1, L).to(device)
+ opt = torch.optim.Adam(m.parameters(), lr=lr, weight_decay=5e-4)
+ @torch.no_grad()
+ def ev(mask):
+ m.eval()
+ out = m(d.x.float(), d.edge_index)
+ return (out[mask].argmax(1) == d.y[mask]).float().mean().item()
+ bv = bt = 0
+ for ep in range(epochs):
+ m.train()
+ out = m(d.x.float(), d.edge_index)
+ loss = F.cross_entropy(out[tm], d.y[tm])
+ opt.zero_grad(); loss.backward(); opt.step()
+ if ep % 5 == 0:
+ v = ev(vm)
+ if v > bv: bv, bt = v, ev(tem)
+ return bt
+
+
+def graft_one(L, seed, d, A_hat, A_row, A_row_T, tm, vm, tem,
+ epochs=200, lr=0.01, hidden=64):
+ torch.manual_seed(seed); np.random.seed(seed); torch.cuda.manual_seed_all(seed)
+ data = {
+ 'X': d.x.float(), 'A_hat': A_hat, 'A_row': A_row, 'A_row_T': A_row_T,
+ 'y': d.y, 'train_mask': tm, 'val_mask': vm, 'test_mask': tem,
+ 'num_features': d.x.shape[1], 'num_classes': int(d.y.max())+1,
+ 'num_nodes': d.num_nodes, 'traces': {},
+ }
+ trainer = GraphGrAPETrainer(
+ data=data, hidden_dim=hidden, lr=lr, weight_decay=5e-4,
+ lr_feedback=0.5, num_probes=64, topo_mode='fixed_A', max_topo_power=3,
+ diffusion_alpha=0.5, diffusion_iters=10,
+ num_layers=L, residual_alpha=0.0, backbone='gcn',
+ use_batchnorm=False, dropout=0.0,
+ )
+ trainer.align_mode = 'chain_norm'
+ bv = bt = 0
+ for ep in range(epochs):
+ trainer.train_step()
+ if ep % 5 == 0:
+ v = trainer.evaluate('val_mask')
+ if v > bv: bv, bt = v, trainer.evaluate('test_mask')
+ return bt
+
+
+def main():
+ d = WikiCS(root='/home/yurenh2/graph-grape/data/WikiCS')[0].to(device)
+ # WikiCS edges already undirected; ensure undirected just in case
+ d.edge_index = to_undirected(d.edge_index, num_nodes=d.num_nodes)
+ N = d.num_nodes
+ A_hat = build_A_hat(d.edge_index, N)
+ A_row, A_row_T = build_row_norm(d.edge_index, N)
+ print(f'WikiCS: N={N}, deg={d.edge_index.shape[1]/N:.2f}, C={int(d.y.max())+1}, F={d.x.shape[1]}', flush=True)
+
+ seeds = [0, 1, 2, 3, 4]
+ depths = [3, 5, 10, 14, 20]
+ for L in depths:
+ bp_a, gf_a = [], []
+ for s in seeds:
+ tm, vm, tem = paper_split(N, d.y.cpu(), s)
+ tm = tm.to(device); vm = vm.to(device); tem = tem.to(device)
+ t0 = time.time()
+ bp = bp_one(L, s, d, tm, vm, tem)
+ t1 = time.time()
+ gf = graft_one(L, s, d, A_hat, A_row, A_row_T, tm, vm, tem)
+ t2 = time.time()
+ bp_a.append(bp); gf_a.append(gf)
+ print(f' L={L} s={s}: BP={bp:.4f}({t1-t0:.0f}s) GRAFT={gf:.4f}({t2-t1:.0f}s)', flush=True)
+ bp_m, bp_sd = np.mean(bp_a), np.std(bp_a)
+ gf_m, gf_sd = np.mean(gf_a), np.std(gf_a)
+ print(f'>>> L={L}: BP {bp_m:.4f}±{bp_sd:.4f} GRAFT {gf_m:.4f}±{gf_sd:.4f} Δ={gf_m-bp_m:+.3f}', flush=True)
+
+
+if __name__ == '__main__':
+ main()