diff options
Diffstat (limited to 'experiments/run_cs_full.py')
| -rw-r--r-- | experiments/run_cs_full.py | 147 |
1 files changed, 147 insertions, 0 deletions
diff --git a/experiments/run_cs_full.py b/experiments/run_cs_full.py new file mode 100644 index 0000000..d66fc59 --- /dev/null +++ b/experiments/run_cs_full.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python3 +"""H19 CitationFull-CiteSeer (4.2K, deg 2.5, 6-class) — same regime as Planetoid CiteSeer. +Quick BP + GRAFT depth sweep to confirm/extend the 'GRAFT wins on real sparse citation' story.""" + +import torch, sys, numpy as np, time +import torch.nn as nn, torch.nn.functional as F +from torch_geometric.datasets import CitationFull +from torch_geometric.nn import GCNConv +from torch_geometric.utils import add_self_loops, degree + +sys.path.insert(0, '/home/yurenh2/graph-grape') +from src.trainers import GraphGrAPETrainer + +DATA_ROOT = '/home/yurenh2/graph-grape/data/CFull' +device = torch.device('cuda:0') + + +def build_A_hat(edge_index, N): + edge_index, _ = add_self_loops(edge_index, num_nodes=N) + row, col = edge_index + deg = degree(row, num_nodes=N, dtype=torch.float) + dis = deg.pow(-0.5); dis[dis == float('inf')] = 0 + return torch.sparse_coo_tensor(edge_index, dis[row]*dis[col], (N, N)).coalesce() + + +def build_row_norm(edge_index, N): + ei, _ = add_self_loops(edge_index, num_nodes=N) + row, col = ei + deg = degree(row, num_nodes=N, dtype=torch.float).clamp(min=1) + A_row = torch.sparse_coo_tensor(ei, 1.0/deg[row], (N,N)).coalesce() + A_row_T = torch.sparse_coo_tensor(ei.flip(0), 1.0/deg[col], (N,N)).coalesce() + return A_row, A_row_T + + +def make_split(N, seed, y, n_per_class=20, n_val=500): + g = torch.Generator().manual_seed(seed) + train_mask = torch.zeros(N, dtype=torch.bool) + val_mask = torch.zeros(N, dtype=torch.bool) + test_mask = torch.zeros(N, dtype=torch.bool) + C = int(y.max()) + 1 + for c in range(C): + idx = (y == c).nonzero().flatten() + idx = idx[torch.randperm(idx.size(0), generator=g)] + train_mask[idx[:n_per_class]] = True + remaining = (~train_mask).nonzero().flatten() + remaining = remaining[torch.randperm(remaining.size(0), generator=g)] + val_mask[remaining[:n_val]] = True + test_mask[remaining[n_val:]] = True + return train_mask, val_mask, test_mask + + +class GCN(nn.Module): + def __init__(self, in_dim, hidden, out_dim, L, dropout=0.1): + super().__init__() + self.convs = nn.ModuleList([GCNConv(in_dim if i==0 else hidden, + hidden if i<L-1 else out_dim) for i in range(L)]) + self.dropout = dropout + + def forward(self, x, ei): + for l, c in enumerate(self.convs): + x = c(x, ei) + if l < len(self.convs)-1: + x = F.relu(x) + if self.dropout>0: x = F.dropout(x, self.dropout, self.training) + return x + + +def bp_one(L, seed, d, train_mask, val_mask, test_mask, epochs=200, lr=5e-3, hidden=128, dropout=0.1): + torch.manual_seed(seed); np.random.seed(seed); torch.cuda.manual_seed_all(seed) + model = GCN(d.x.shape[1], hidden, int(d.y.max())+1, L, dropout=dropout).to(device) + opt = torch.optim.AdamW(model.parameters(), lr=lr) + sched = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=epochs) + + @torch.no_grad() + def ev(m): + model.eval() + out = model(d.x.float(), d.edge_index) + return (out[m].argmax(1) == d.y[m]).float().mean().item() + + bv, bt = 0, 0 + for ep in range(epochs): + model.train() + out = model(d.x.float(), d.edge_index) + loss = F.cross_entropy(out[train_mask], d.y[train_mask]) + opt.zero_grad(); loss.backward(); opt.step() + sched.step() + if ep % 5 == 0: + v = ev(val_mask) + if v > bv: bv = v; bt = ev(test_mask) + return bt + + +def graft_one(L, seed, d, A_hat, A_row, A_row_T, train_mask, val_mask, test_mask, + epochs=200, lr=5e-3, hidden=128): + torch.manual_seed(seed); np.random.seed(seed); torch.cuda.manual_seed_all(seed) + data = { + 'X': d.x.float(), 'A_hat': A_hat, 'A_row': A_row, 'A_row_T': A_row_T, + 'y': d.y, 'train_mask': train_mask, 'val_mask': val_mask, 'test_mask': test_mask, + 'num_features': d.x.shape[1], 'num_classes': int(d.y.max())+1, + 'num_nodes': d.num_nodes, 'traces': {}, + } + trainer = GraphGrAPETrainer( + data=data, hidden_dim=hidden, lr=lr, weight_decay=0.0, + lr_feedback=0.5, num_probes=64, topo_mode='fixed_A', max_topo_power=3, + diffusion_alpha=0.5, diffusion_iters=10, + num_layers=L, residual_alpha=0.0, backbone='gcn', + use_batchnorm=False, dropout=0.0, + ) + trainer.align_mode = 'chain_norm' + bv, bt = 0, 0 + for ep in range(epochs): + trainer.train_step() + if ep % 5 == 0: + v = trainer.evaluate('val_mask') + if v > bv: bv = v; bt = trainer.evaluate('test_mask') + return bt + + +def main(): + d = CitationFull(root=DATA_ROOT, name='CiteSeer')[0].to(device) + N = d.num_nodes + A_hat = build_A_hat(d.edge_index, N) + A_row, A_row_T = build_row_norm(d.edge_index, N) + + print(f'CitationFull-CiteSeer: N={N}, deg={d.edge_index.shape[1]/N:.1f}, C={int(d.y.max())+1}', flush=True) + + bp_res, gf_res = {}, {} + for L in [3, 5, 10, 16]: + bp_accs, gf_accs = [], [] + for s in [0, 1, 2]: + tm, vm, tem = make_split(N, s, d.y.cpu()) + tm, vm, tem = tm.to(device), vm.to(device), tem.to(device) + t0 = time.time() + bp = bp_one(L, s, d, tm, vm, tem) + t1 = time.time() + gf = graft_one(L, s, d, A_hat, A_row, A_row_T, tm, vm, tem) + t2 = time.time() + bp_accs.append(bp); gf_accs.append(gf) + print(f' L={L} s={s}: BP={bp:.4f}({t1-t0:.0f}s) GRAFT={gf:.4f}({t2-t1:.0f}s)', flush=True) + bp_m, bp_sd = np.mean(bp_accs), np.std(bp_accs) + gf_m, gf_sd = np.mean(gf_accs), np.std(gf_accs) + bp_res[L] = (bp_m, bp_sd); gf_res[L] = (gf_m, gf_sd) + print(f'>>> L={L}: BP {bp_m:.4f}±{bp_sd:.4f} GRAFT {gf_m:.4f}±{gf_sd:.4f} Δ={gf_m-bp_m:+.3f}', flush=True) + + +if __name__ == '__main__': + main() |
