From 4172195ca318387e20e3576ab40187d4d2f08ebe Mon Sep 17 00:00:00 2001 From: YurenHao0426 Date: Tue, 7 Apr 2026 23:03:05 -0500 Subject: Cross-architecture temporal validation: 3 archs x 3 seeds x 2 methods MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ResMLP (4-block d=256, with out_ln, CIFAR-10): s42: DFA (a) ep 8, (b) ep 4, acc 0.308 s123: DFA (a) ep 11, (b) ep 4, acc 0.320 s456: DFA (a) ep 8, (b) ep 3, acc 0.300 ViT-Mini (4-block d=128, cls token + terminal LN, CIFAR-10): s42: DFA (a) ep 1, (b) ep 3, acc 0.256 s123: DFA (a) ep 1, (b) ep 2, acc 0.202 s456: DFA (a) ep 1, (b) ep 3, acc 0.253 StudentNet (4-block d=128, NO terminal LN, synthetic alpha=1.0): s42: DFA (a) ep 18, (b) NEVER, acc 0.332 s123: DFA (a) ep 14, (b) NEVER, acc 0.314 s456: DFA (a) ep 25, (b) NEVER, acc 0.336 BP: never fires on any seed x any architecture (9/9 sanity passes). Key cross-architecture finding: diagnostic (b) is specifically the LN- driven failure mode. Without out_ln, the BP grad never crosses the 1e-7 floor, even though (a) still fires (the residual stream still grows, just without the LN-cancellation pathology that drives the BP grad to the floor). This is the causal architectural control: (b) specifically tests 'is terminal-LN gradient cancellation active?' and (a) tests 'is the residual stream growing without bound?'. They are linked but separable. This is the ยง3 cross-architecture validation evidence. --- protocol/examples/temporal_diagnostic_evolution.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'protocol') diff --git a/protocol/examples/temporal_diagnostic_evolution.py b/protocol/examples/temporal_diagnostic_evolution.py index 35cf720..af0da84 100644 --- a/protocol/examples/temporal_diagnostic_evolution.py +++ b/protocol/examples/temporal_diagnostic_evolution.py @@ -64,7 +64,7 @@ def main(): import argparse p = argparse.ArgumentParser() p.add_argument("--seed", type=int, default=42) - p.add_argument("--arch", type=str, default="resmlp", choices=["resmlp", "vit"]) + p.add_argument("--arch", type=str, default="resmlp", choices=["resmlp", "vit", "no_outln"]) args = p.parse_args() if args.arch == "resmlp": snapshot_path = os.path.join( @@ -72,12 +72,18 @@ def main(): ) h_key = "hidden_norms" g_key = "bp_grad_norms_per_sample_med" - else: + elif args.arch == "vit": snapshot_path = os.path.join( REPO_ROOT, f"results/snapshot_vit_v1/snapshot_vit_s{args.seed}.json" ) h_key = "hidden_norms_cls" g_key = "bp_grad_per_sample_l2_med" + else: # no_outln (synthetic studentnet without terminal LN) + snapshot_path = os.path.join( + REPO_ROOT, f"results/snapshot_no_outln_v1/snapshot_noLN_s{args.seed}.json" + ) + h_key = "hidden_norms" + g_key = "bp_grad_per_sample_l2_med" if not os.path.exists(snapshot_path): print(f"snapshot not found: {snapshot_path}") return -- cgit v1.2.3