From bd9333eda60a9029a198acaeacb1eca4312bd1e8 Mon Sep 17 00:00:00 2001 From: YurenHao0426 Date: Mon, 4 May 2026 23:05:16 -0500 Subject: =?UTF-8?q?Initial=20release:=20GRAFT=20(KAFT)=20=E2=80=94=20NeurI?= =?UTF-8?q?PS=202026=20submission=20code?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Topology-factorized Jacobian-aligned feedback for deep GNNs. Includes: - src/: GraphGrAPETrainer (KAFT) + BP / DFA / DFA-GNN / VanillaGrAPE baselines + multi-probe alignment estimator + dataset / sparse-mm utilities. - experiments/: 19 runners reproducing every figure / table in the paper. - figures/: 4 generators + the 4 PDFs cited in the report. - paper/: NeurIPS .tex and consolidated experiments_master notes. Smoke test: 50-epoch Cora GCN L=4 gives BP 77.3% / KAFT 79.0%. --- experiments/run_diag_section23_v2.py | 172 +++++++++++++++++++++++++++++++++++ 1 file changed, 172 insertions(+) create mode 100644 experiments/run_diag_section23_v2.py (limited to 'experiments/run_diag_section23_v2.py') diff --git a/experiments/run_diag_section23_v2.py b/experiments/run_diag_section23_v2.py new file mode 100644 index 0000000..f583031 --- /dev/null +++ b/experiments/run_diag_section23_v2.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python3 +"""§2.3 diagnostic v2 — faithful reproduction of original methodology. + +Uses src.trainers.BPTrainer (the actual training stack used in the paper), +matching results/gradient_reach_20seeds/per_seed_data.json which shows +GCN L=10 weight grad norms = 0.0 for all 20 seeds × 10 layers. + +Adds beyond the original: + - pre-activation grad G_Z[l] = ||dL/dZ_l||_F and RMS-normed variant + - forward magnitudes M[l] = ||H_l||_F and RMS-normed + - centered dispersion D[l] = ||H_l - mean||_F / D_0 + - frozen linear probe probe_acc[l] on H_l + +Backbone: GCN. Cora. 100 epochs (matches original). 20 seeds. Depths {6, 10, 20}. +Output: results/diag_section23/diag_data_v2.json +""" +import json, os, sys +import numpy as np +import torch +import torch.nn.functional as F +from sklearn.linear_model import LogisticRegression +from sklearn.preprocessing import StandardScaler + +sys.path.insert(0, '/home/yurenh2/graph-grape') +from src.data import load_dataset, spmm +from src.trainers import BPTrainer + +DEVICE = 'cuda:0' # CUDA_VISIBLE_DEVICES=2 → cuda:0 +HIDDEN = 64 +LR = 0.01 +WD = 5e-4 +EPOCHS = 100 +SEEDS = list(range(20)) +OUT_DIR = '/home/yurenh2/graph-grape/results/diag_section23' +os.makedirs(OUT_DIR, exist_ok=True) + + +def forward_with_intermediates(bp, capture_for_grad=False): + """Re-implement BPTrainer.forward() but capture per-layer Z (pre-act) and H (post-act). + H[0] = X (input features). For l = 1..L: H[l] = relu(Z[l-1]) (or Z[l-1] for last layer). + Z[0..L-1] are pre-activation outputs of each conv. + """ + X = bp.data['X'] + H_list = [X] + Z_list = [] + H = X + H0 = None + for l in range(bp.num_layers): + if l > 0 and l < bp.num_layers - 1 and bp.residual_alpha > 0 and H0 is not None: + H = (1 - bp.residual_alpha) * H + bp.residual_alpha * H0 + Z = bp._graph_conv(H, bp.weights[l], l) + if capture_for_grad: + Z.retain_grad() + Z_list.append(Z) + if l < bp.num_layers - 1: + H = F.relu(Z) + if l == 0: + H0 = H + else: + H = Z # final logits, no relu + H_list.append(H) + return H_list[-1], Z_list, H_list # logits, Z's, H's + + +def diagnose(seed, L, data): + torch.manual_seed(seed); np.random.seed(seed); torch.cuda.manual_seed_all(seed) + bp = BPTrainer(data=data, hidden_dim=HIDDEN, lr=LR, weight_decay=WD, + num_layers=L, residual_alpha=0.0, backbone='gcn') + + for _ in range(EPOCHS): + bp.train_step() + + # Diagnostic forward at epoch 100 + bp.optimizer.zero_grad() + logits, Zs, Hs = forward_with_intermediates(bp, capture_for_grad=True) + mask = data['train_mask'] + loss = F.cross_entropy(logits[mask], data['y'][mask]) + loss.backward(retain_graph=False) + + # Weight gradients (original methodology) + W_grads_F = [float(bp.weights[l].grad.detach().norm().item()) for l in range(L)] + W_grads_rms = [g / np.sqrt(bp.weights[l].numel()) for g, l in zip(W_grads_F, range(L))] + + # Pre-activation gradients on Z_l (l=0..L-1) + Z_grads_F = [] + Z_grads_rms = [] + for z in Zs: + if z.grad is None: + Z_grads_F.append(0.0); Z_grads_rms.append(0.0); continue + N, d_ = z.shape + gf = float(z.grad.detach().norm().item()) + Z_grads_F.append(gf) + Z_grads_rms.append(gf / np.sqrt(N * d_)) + + # Forward state metrics on H_l (l=0..L) + M_F, M_rms = [], [] + D_raw = [] + for H in Hs: + N, d_ = H.shape + mf = float(H.detach().norm().item()) + M_F.append(mf) + M_rms.append(mf / np.sqrt(N * d_)) + mu = H.detach().mean(0, keepdim=True) + D_raw.append(float((H.detach() - mu).norm().item())) + D0 = D_raw[0] if D_raw[0] > 0 else 1.0 + D_norm = [d / D0 for d in D_raw] + + # Frozen linear probe on each H_l + probe_acc = [] + ytr = data['y'][data['train_mask']].cpu().numpy() + yte = data['y'][data['test_mask']].cpu().numpy() + train_mask_b = data['train_mask'] + test_mask_b = data['test_mask'] + for H in Hs: + Xtr = H.detach()[train_mask_b].cpu().numpy() + Xte = H.detach()[test_mask_b].cpu().numpy() + try: + sc = StandardScaler().fit(Xtr) + Xtr_s = sc.transform(Xtr) + Xte_s = sc.transform(Xte) + clf = LogisticRegression(max_iter=2000, C=1.0).fit(Xtr_s, ytr) + acc = float(clf.score(Xte_s, yte)) + except Exception: + acc = float('nan') + probe_acc.append(acc) + + bp_acc = bp.evaluate('test_mask') + + del bp; torch.cuda.empty_cache() + return dict(L=L, seed=seed, bp_acc=bp_acc, + W_grads_F=W_grads_F, W_grads_rms=W_grads_rms, + Z_grads_F=Z_grads_F, Z_grads_rms=Z_grads_rms, + M_F=M_F, M_rms=M_rms, D_raw=D_raw, D_norm=D_norm, + probe_acc=probe_acc) + + +def main(): + data = load_dataset('Cora', device=DEVICE) + print(f"Cora: N={data['X'].shape[0]}, F={data['X'].shape[1]}, " + f"C={data['num_classes']}", flush=True) + + all_results = {} + for L in [20, 10, 6]: + print(f'\n=== L={L} ===', flush=True) + rows = [] + for s in SEEDS: + r = diagnose(s, L, data) + rows.append(r) + wg = r['W_grads_F'] + print(f" L={L} s={s:2d} acc={r['bp_acc']:.4f} " + f"W_grads[0,mid,-1]=[{wg[0]:.2e}, {wg[len(wg)//2]:.2e}, {wg[-1]:.2e}] " + f"Z_grad[out]={r['Z_grads_F'][-1]:.2e}", flush=True) + all_results[f'L={L}'] = rows + + out_path = os.path.join(OUT_DIR, 'diag_data_v2.json') + with open(out_path, 'w') as f: + json.dump(all_results, f, indent=2) + print(f'\nSaved {out_path}') + + print('\n=== summary ===') + for k, rows in all_results.items(): + Wg = np.array([r['W_grads_F'] for r in rows]) + n_under = int((Wg < 1e-38).sum()) + n_total = Wg.size + accs = np.array([r['bp_acc'] for r in rows]) + print(f' {k}: BP acc {accs.mean():.4f}±{accs.std():.4f} ' + f'W_grads_F median={np.median(Wg):.3e} ' + f'<1e-38: {n_under}/{n_total} cells') + + +if __name__ == '__main__': + main() -- cgit v1.2.3