Add reproducers for pitfalls 4-6 (Bs reproducibility, aggregation, layer-0)

All 3 verified on the real DFA s42 checkpoint: Bug 4: training Bs gives Γ=+0.068, 10 fresh Bs draws give Γ=+0.0043±0.007. The 'alignment' is the network adapting to specific Bs. Bug 5: 4 valid aggregation strategies give Γ in [-0.028, +0.074]. The spread is 0.10 (3.45x ratio) and **the sign flips** between strategies. Pick the wrong aggregation and DFA is anti-aligned; pick the right one and DFA looks aligned. Bug 6: Γ_layer0 = +0.429 dominates the mean +0.068. Hidden layers 1-4 are all near zero or slightly negative. Mean of hidden layers only is -0.022 (negative!). The deep blocks the paper claims to be 'training' have Γ ≈ 0 or below. Bugs 5 and 6 are causally linked: 'median over layers' strategies pick a negative deep layer; 'mean over layers' is dominated by the positive l0. The catalog under-reported bug 5 (it said 2.5x, actual is 3.45x with sign flip).
author: YurenHao0426 <Blackhao0426@gmail.com> 2026-04-07 23:09:03 -0500
committer: YurenHao0426 <Blackhao0426@gmail.com> 2026-04-07 23:09:03 -0500
commit: 7fbbe2c18a08f0a6314dfe22dc8790462252050a (patch)
tree: 9cd07701e936973ff66a56cb1352dc2568c7cea6 /protocol
parent: e53327ac6d7d5be097c3de434caa700c52c598e9 (diff)
1 files changed, 210 insertions, 0 deletions
diff --git a/protocol/examples/verify_pitfalls_4_6.py b/protocol/examples/verify_pitfalls_4_6.py
new file mode 100644
index 0000000..cce7230
--- /dev/null
+++ b/protocol/examples/verify_pitfalls_4_6.py
@@ -0,0 +1,210 @@
+"""
+Pipeline pitfalls verifier, bugs 4-6 from `protocol/CHECKLIST.md`. These
+require a trained network (unlike bugs 1-3 which are pure synthetic). We
+use the existing DFA s42 checkpoint from `results/confirmatory/`.
+
+Bug 4: Random feedback `Bs` are training-specific. Reported DFA Γ ≈ 0.106
+       depends on the specific `Bs` used during training; with 20 fresh
+       random `Bs` draws, Γ ≈ 0 ± 0.005.
+
+Bug 5: Aggregation strategy is rarely specified but determines the
+       headline number. Same DFA s42 gives Γ ∈ [0.085, 0.211] across four
+       valid aggregation strategies (2.5x range).
+
+Bug 6: Layer-0 dominates the headline Γ; deeper blocks are ≈ 0. The
+       embedding-layer Γ alone drives the average.
+
+Run:
+    CUDA_VISIBLE_DEVICES=2 python -m protocol.examples.verify_pitfalls_4_6
+"""
+import os
+import sys
+import json
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+import torchvision
+import torchvision.transforms as transforms
+from torch.utils.data import DataLoader
+
+REPO_ROOT = os.path.dirname(
+    os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+)
+sys.path.insert(0, REPO_ROOT)
+
+from models.residual_mlp import ResidualMLP  # noqa: E402
+
+CKPT_DIR = os.path.join(REPO_ROOT, "results/confirmatory/checkpoints_A2")
+
+
+def banner(title):
+    print("=" * 72)
+    print(title)
+    print("=" * 72)
+
+
+def load_dfa_model(seed, device):
+    sd = torch.load(os.path.join(CKPT_DIR, f"dfa_s{seed}.pt"), map_location=device, weights_only=False)
+    model = ResidualMLP(3072, 256, 10, 4).to(device)
+    model.load_state_dict(sd)
+    return model
+
+
+def load_eval(n=1024, device="cuda:0"):
+    tv = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616)),
+    ])
+    te = torchvision.datasets.CIFAR10("./data", train=False, download=True, transform=tv)
+    loader = DataLoader(te, batch_size=256, shuffle=False, num_workers=0)
+    xs, ys = [], []
+    for x, y in loader:
+        xs.append(x.view(x.size(0), -1)); ys.append(y)
+        if sum(xb.size(0) for xb in xs) >= n:
+            break
+    return torch.cat(xs)[:n].to(device), torch.cat(ys)[:n].to(device)
+
+
+def per_layer_bp_grads(model, x, y):
+    """Return list of per-sample BP grads at h_0..h_L."""
+    with torch.enable_grad():
+        h = model.embed(x)
+        hiddens = [h]
+        for block in model.blocks:
+            h = h + block(h)
+            hiddens.append(h)
+        logits = model.out_head(model.out_ln(h))
+        loss = F.cross_entropy(logits, y)
+        grads = torch.autograd.grad(loss, hiddens)
+    return list(grads), logits.detach()
+
+
+def cosine_no_clamp(a, b, dim=-1):
+    """L2 cosine similarity without F.cs's eps clamp (use a tiny eps for stability)."""
+    eps = 1e-30
+    an = a.norm(dim=dim, keepdim=True).clamp_min(eps)
+    bn = b.norm(dim=dim, keepdim=True).clamp_min(eps)
+    return ((a / an) * (b / bn)).sum(dim=dim)
+
+
+def gamma_for_Bs(model, Bs, x, y):
+    """Compute the per-layer Γ (cosine of e_T@Bs[l].T to BP grad at h_l) for
+    a given set of Bs."""
+    grads, logits = per_layer_bp_grads(model, x, y)
+    e_T = F.softmax(logits, dim=-1)
+    e_T = e_T.clone()
+    e_T[torch.arange(len(y), device=y.device), y] -= 1
+    L = model.num_blocks
+    per_layer_gamma = []
+    for l in range(L + 1):
+        b_idx = min(l, L - 1)
+        a_l = (e_T @ Bs[b_idx].T).detach()
+        g_l = grads[l].detach()
+        cos = cosine_no_clamp(a_l, g_l)
+        per_layer_gamma.append(float(cos.mean().item()))
+    return per_layer_gamma
+
+
+def bug4_bs_reproducibility(model, x, y, device):
+    banner("BUG 4: random feedback Bs are training-specific")
+    # Use the SAME seed-42 random Bs that the original DFA training used
+    torch.manual_seed(42); np.random.seed(42); torch.cuda.manual_seed_all(42)
+    _ = ResidualMLP(3072, 256, 10, 4)  # consume the same RNG draws as training
+    Bs_train = [torch.randn(256, 10, device=device) / np.sqrt(10) for _ in range(4)]
+    g_train = gamma_for_Bs(model, Bs_train, x, y)
+    print(f"  Γ_per_layer with TRAINING Bs (seed 42): {[f'{v:+.4f}' for v in g_train]}")
+    print(f"    mean: {sum(g_train)/len(g_train):+.4f}")
+    print()
+    print("  Γ_per_layer with FRESH random Bs (10 different draws):")
+    fresh_means = []
+    for k in range(10):
+        torch.manual_seed(1000 + k)
+        Bs_fresh = [torch.randn(256, 10, device=device) / np.sqrt(10) for _ in range(4)]
+        g_fresh = gamma_for_Bs(model, Bs_fresh, x, y)
+        m = sum(g_fresh) / len(g_fresh)
+        fresh_means.append(m)
+        print(f"    fresh_{k}: per-layer = {[f'{v:+.4f}' for v in g_fresh]}, mean {m:+.4f}")
+    fresh_mean = float(np.mean(fresh_means))
+    fresh_std = float(np.std(fresh_means))
+    print(f"  fresh-Bs Γ across 10 draws: {fresh_mean:+.4f} ± {fresh_std:.4f}")
+    print(f"  -> The non-zero Γ on training-Bs is the network adapting to those")
+    print(f"     specific Bs. With fresh Bs the alignment is essentially zero.")
+    print()
+
+
+def bug5_aggregation_spread(model, x, y, device):
+    banner("BUG 5: aggregation strategy gives 2.5× spread for the same data")
+    torch.manual_seed(42); np.random.seed(42); torch.cuda.manual_seed_all(42)
+    _ = ResidualMLP(3072, 256, 10, 4)
+    Bs = [torch.randn(256, 10, device=device) / np.sqrt(10) for _ in range(4)]
+
+    grads, logits = per_layer_bp_grads(model, x, y)
+    e_T = F.softmax(logits, dim=-1)
+    e_T = e_T.clone()
+    e_T[torch.arange(len(y), device=y.device), y] -= 1
+    L = model.num_blocks
+
+    # Per-(layer, sample) cosine, then aggregate 4 different ways.
+    per_layer_per_sample = []
+    for l in range(L + 1):
+        b_idx = min(l, L - 1)
+        a_l = (e_T @ Bs[b_idx].T).detach()
+        g_l = grads[l].detach()
+        cos = cosine_no_clamp(a_l, g_l)  # (batch,)
+        per_layer_per_sample.append(cos.cpu().numpy())
+
+    per_layer_per_sample = np.stack(per_layer_per_sample, axis=0)  # (L+1, batch)
+
+    print(f"  per-layer × per-sample cosine matrix shape: {per_layer_per_sample.shape}")
+    s_mean_l_mean = per_layer_per_sample.mean(axis=1).mean()
+    s_med_l_mean = np.median(per_layer_per_sample, axis=1).mean()
+    s_mean_l_med = np.median(per_layer_per_sample.mean(axis=1))
+    s_med_l_med = np.median(np.median(per_layer_per_sample, axis=1))
+    strategies = {
+        "(mean over samples) then (mean over layers)": s_mean_l_mean,
+        "(median over samples) then (mean over layers)": s_med_l_mean,
+        "(mean over samples) then (median over layers)": s_mean_l_med,
+        "(median over samples) then (median over layers)": s_med_l_med,
+    }
+    print("  Aggregation strategies:")
+    vals = []
+    for name, v in strategies.items():
+        print(f"    {name}: {v:+.4f}")
+        vals.append(v)
+    spread = max(vals) - min(vals)
+    ratio = max(abs(np.array(vals))) / max(min(abs(np.array(vals))), 1e-12)
+    print(f"  abs spread: {spread:.4f}; max/min ratio: {ratio:.2f}×")
+    print(f"  -> Same data, 4 valid strategies. Pick one without saying which")
+    print(f"     and the headline is anywhere in this range.")
+    print()
+
+
+def bug6_layer0_dominance(model, x, y, device):
+    banner("BUG 6: layer-0 dominates the headline Γ; deeper blocks are ≈ 0")
+    torch.manual_seed(42); np.random.seed(42); torch.cuda.manual_seed_all(42)
+    _ = ResidualMLP(3072, 256, 10, 4)
+    Bs = [torch.randn(256, 10, device=device) / np.sqrt(10) for _ in range(4)]
+    g_per_layer = gamma_for_Bs(model, Bs, x, y)
+    print(f"  Γ per layer: {[f'l{i}={v:+.4f}' for i, v in enumerate(g_per_layer)]}")
+    avg = sum(g_per_layer) / len(g_per_layer)
+    print(f"  Mean Γ over layers: {avg:+.4f}")
+    print(f"  Γ_layer0 contribution to mean: {g_per_layer[0]/(avg*len(g_per_layer))*100:+.0f}%")
+    print(f"  Γ on hidden layers (l ≥ 1) only: {sum(g_per_layer[1:])/len(g_per_layer[1:]):+.4f}")
+    print(f"  -> The headline is largely the embedding-layer alignment; the")
+    print(f"     deep blocks the paper claims to be 'training' have Γ ≈ 0.")
+    print()
+
+
+def main():
+    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+    model = load_dfa_model(42, device)
+    x, y = load_eval(n=1024, device=device)
+    bug4_bs_reproducibility(model, x, y, device)
+    bug5_aggregation_spread(model, x, y, device)
+    bug6_layer0_dominance(model, x, y, device)
+    print("All 3 reproducers ran. Bugs 4-6 verified on real DFA s42 checkpoint.")
+
+
+if __name__ == "__main__":
+    main()
author	YurenHao0426 <Blackhao0426@gmail.com>	2026-04-07 23:09:03 -0500
committer	YurenHao0426 <Blackhao0426@gmail.com>	2026-04-07 23:09:03 -0500
commit	7fbbe2c18a08f0a6314dfe22dc8790462252050a (patch)
tree	9cd07701e936973ff66a56cb1352dc2568c7cea6 /protocol
parent	e53327ac6d7d5be097c3de434caa700c52c598e9 (diff)