diff options
| author | YurenHao0426 <blackhao0426@gmail.com> | 2026-05-04 23:12:29 -0500 |
|---|---|---|
| committer | YurenHao0426 <blackhao0426@gmail.com> | 2026-05-04 23:12:29 -0500 |
| commit | 174083f4e340afc192d57aacc6422c99530e9e59 (patch) | |
| tree | 28932cc5d17ed2869dfe33b470f30cd602555edf /src/trainers.py | |
| parent | ba6ead6d7a41b7ed78bb228181b7262d0c75d2eb (diff) | |
- src/trainers.py: removed class VanillaGrAPETrainer (~30 lines) and
cleaned the module-level methods-compared docstring.
- experiments/run_ablation_20seeds.py: dropped VanillaGrAPE row from the
4-method ablation grid; sweep is now BP → DFA → DFA-GNN → KAFT.
Smoke test: BPTrainer / DFATrainer / DFAGNNTrainer / KAFTTrainer all train
cleanly at GCN L=4, Cora, 50 epochs (test acc 77.3 / 76.6 / 78.4 / 79.0).
Diffstat (limited to 'src/trainers.py')
| -rw-r--r-- | src/trainers.py | 42 |
1 files changed, 4 insertions, 38 deletions
diff --git a/src/trainers.py b/src/trainers.py index 7589fc9..9da6a6a 100644 --- a/src/trainers.py +++ b/src/trainers.py @@ -3,11 +3,10 @@ Training methods for KAFT experiments. Generalized to L-layer residual GCN. Methods compared: - BP — Standard backprop GCN - DFA — Fixed random R, P=I - DFA-GNN — Fixed random R, P=Â^{L-l} - VanillaGrAPE — Aligned R (per layer), P=I - KAFT — Aligned R (per layer) + topology P=Â^{L-l} + BP — Standard backprop GCN + DFA — Fixed random R, P=I + DFA-GNN — Fixed random R, P=Â^{L-l} + KAFT — Learned R (per layer) + topology P=Â^{min(L-1-l, K)} """ import torch @@ -575,39 +574,6 @@ class DFAGNNTrainer(_FeedbackTrainerBase): # --------------------------------------------------------------------------- -# Vanilla GrAPE Trainer -# --------------------------------------------------------------------------- - -class VanillaGrAPETrainer(_FeedbackTrainerBase): - """Aligned R per layer, no topology (P=I).""" - - def __init__(self, data, hidden_dim, lr, weight_decay, - lr_feedback=0.5, num_probes=64, - diffusion_alpha=0.5, diffusion_iters=10, - num_layers=2, residual_alpha=0.0, backbone='gcn', **_kw): - super().__init__(data, hidden_dim, lr, weight_decay, - diffusion_alpha, diffusion_iters, - num_layers, residual_alpha, backbone, - _kw.get('use_batchnorm', False), _kw.get('dropout', 0.0)) - self.lr_fb = lr_feedback - self.num_probes = num_probes - # One R per hidden layer - self.Rs = [torch.randn(self.d_out, hidden_dim, device=self.device) * 0.01 - for _ in range(num_layers - 1)] - - def _alignment_step(self, inter): - metrics = {} - for l in range(self.num_layers - 1): - cos = _align_R_layer(self, l) - metrics[f'cos_feat_L{l}'] = cos - metrics['cos_feat'] = sum(metrics.values()) / len(metrics) - return metrics - - def _compute_hidden_feedback(self, l, inter, E_bar): - return E_bar @ self.Rs[l] - - -# --------------------------------------------------------------------------- # KAFT Trainer # --------------------------------------------------------------------------- |
