#!/usr/bin/env python3 """ CitationFull-DBLP experiment + depth sweep L=2-6 补数据. """ import torch import torch.nn.functional as F import numpy as np import json import os import time from torch_geometric.datasets import CitationFull from src.data import build_normalized_adj, build_row_normalized_adj, spmm, precompute_traces from src.trainers import BPTrainer, DFATrainer, KAFTTrainer device = 'cuda:0' SEEDS = [0, 1, 2, 3, 4] EPOCHS = 200 OUT_DIR = 'results/dblp_depth' def load_dblp(): ds = CitationFull(root='./data', name='DBLP') data = ds[0] N, C = data.num_nodes, ds.num_classes # Random split rng = torch.Generator().manual_seed(42) train_mask = torch.zeros(N, dtype=torch.bool) val_mask = torch.zeros(N, dtype=torch.bool) test_mask = torch.zeros(N, dtype=torch.bool) for c in range(C): idx = (data.y == c).nonzero(as_tuple=True)[0] perm = torch.randperm(len(idx), generator=rng) n_tr = max(1, int(0.05 * len(idx))) # 5% train (like Planetoid) n_va = max(1, int(0.1 * len(idx))) train_mask[idx[perm[:n_tr]]] = True val_mask[idx[perm[n_tr:n_tr + n_va]]] = True test_mask[idx[perm[n_tr + n_va:]]] = True A_hat = build_normalized_adj(data.edge_index, N) A_row, A_row_T = build_row_normalized_adj(data.edge_index, N) traces = {k: torch.tensor(0.0) for k in range(5)} # skip expensive trace computation return { 'X': data.x.to(device), 'y': data.y.to(device), 'A_hat': A_hat.to(device), 'A_row': A_row.to(device), 'A_row_T': A_row_T.to(device), 'train_mask': train_mask.to(device), 'val_mask': val_mask.to(device), 'test_mask': test_mask.to(device), 'num_nodes': N, 'num_features': data.x.shape[1], 'num_classes': C, 'traces': {k: v.to(device) for k, v in traces.items()}, } def train_one(cls, common, extra, seed): torch.manual_seed(seed); np.random.seed(seed); torch.cuda.manual_seed_all(seed) t = cls(**common, **extra) if hasattr(t, 'align_mode'): t.align_mode = 'chain_norm' bv, bt = 0, 0 for ep in range(EPOCHS): t.train_step() if ep % 5 == 0: v, te = t.evaluate('val_mask'), t.evaluate('test_mask') if v > bv: bv, bt = v, te del t; torch.cuda.empty_cache() return bt def time_method(cls, common, extra, n_warmup=10, n_steps=200): torch.manual_seed(0) t = cls(**common, **extra) if hasattr(t, 'align_mode'): t.align_mode = 'chain_norm' for _ in range(n_warmup): t.train_step() torch.cuda.synchronize() times = [] for _ in range(n_steps): torch.cuda.synchronize(); t0 = time.perf_counter() t.train_step() torch.cuda.synchronize(); times.append(time.perf_counter() - t0) del t; torch.cuda.empty_cache() return float(np.median(times) * 1000) def main(): os.makedirs(OUT_DIR, exist_ok=True) results = {} grape_extra = dict(diffusion_alpha=0.5, diffusion_iters=10, lr_feedback=0.5, num_probes=64, topo_mode='fixed_A') # ======== Part 1: DBLP full sweep ======== print("=" * 60) print("Part 1: CitationFull-DBLP") print("=" * 60) dblp = load_dblp() print(f"DBLP: N={dblp['num_nodes']}, F={dblp['num_features']}, C={dblp['num_classes']}, " f"train={dblp['train_mask'].sum().item()}", flush=True) for bb in ['gcn', 'sage', 'gin', 'appnp']: for L in [5, 6]: for lr in [0.001, 0.005, 0.01]: common = dict(data=dblp, hidden_dim=64, lr=lr, weight_decay=5e-4, num_layers=L, residual_alpha=0.0, backbone=bb) key = f"DBLP|{bb}|L={L}|lr={lr}" row = {} for mname, cls, extra in [('BP', BPTrainer, {}), ('DFA', DFATrainer, dict(diffusion_alpha=0.5, diffusion_iters=10)), ('GrAPE', KAFTTrainer, grape_extra)]: accs = [train_one(cls, common, extra, s) for s in SEEDS] row[mname] = {'mean': float(np.mean(accs)), 'std': float(np.std(accs))} results[key] = row bp, dfa, gr = row['BP']['mean']*100, row['DFA']['mean']*100, row['GrAPE']['mean']*100 print(f" {bb:>6} L={L} lr={lr:.3f} | BP {bp:.1f} DFA {dfa:.1f} GrAPE {gr:.1f} | " f"Δ(BP) {gr-bp:+.1f} Δ(DFA) {gr-dfa:+.1f}", flush=True) # DBLP efficiency print("\nDBLP Efficiency:") for bb in ['gcn', 'sage', 'gin', 'appnp']: for L in [5, 6]: common = dict(data=dblp, hidden_dim=64, lr=0.01, weight_decay=5e-4, num_layers=L, residual_alpha=0.0, backbone=bb) bp_ms = time_method(BPTrainer, common, {}) dict(lr_feedback=0.5, num_probes=64, max_topo_power=3, diff_alpha=0.5, align_every=10)) key = f"DBLP_eff|{bb}|L={L}" results[key] = {'BP_ms': bp_ms, 'GrAPE_Eff_ms': eff_ms, 'speedup': bp_ms / eff_ms} print(f" {bb:>6} L={L} | BP {bp_ms:.2f}ms GrAPE-Eff {eff_ms:.2f}ms | " f"speedup {bp_ms/eff_ms:.2f}x", flush=True) # ======== Part 2: Depth sweep L=2-4 补数据 (Planetoid × GCN/SAGE/APPNP) ======== print("\n" + "=" * 60) print("Part 2: Depth sweep L=2,3,4 补数据") print("=" * 60) from src.data import load_dataset for ds_name in ['Cora', 'CiteSeer', 'PubMed']: data = load_dataset(ds_name, device=device) for bb in ['gcn', 'sage', 'appnp']: for L in [2, 3, 4]: common = dict(data=data, hidden_dim=64, lr=0.01, weight_decay=5e-4, num_layers=L, residual_alpha=0.0, backbone=bb) key = f"{ds_name}|{bb}|L={L}|lr=0.01" row = {} for mname, cls, extra in [('BP', BPTrainer, {}), ('GrAPE', KAFTTrainer, grape_extra)]: accs = [train_one(cls, common, extra, s) for s in SEEDS] row[mname] = {'mean': float(np.mean(accs)), 'std': float(np.std(accs))} results[key] = row bp, gr = row['BP']['mean']*100, row['GrAPE']['mean']*100 print(f" {ds_name:>10} {bb:>6} L={L} | BP {bp:.1f} GrAPE {gr:.1f} | Δ {gr-bp:+.1f}", flush=True) with open(os.path.join(OUT_DIR, 'results.json'), 'w') as f: json.dump(results, f, indent=2) print(f"\nSaved to {OUT_DIR}/results.json") if __name__ == '__main__': main()