summaryrefslogtreecommitdiff
path: root/results
diff options
context:
space:
mode:
authorYurenHao0426 <Blackhao0426@gmail.com>2026-04-08 01:28:10 -0500
committerYurenHao0426 <Blackhao0426@gmail.com>2026-04-08 01:28:10 -0500
commit3b3557286af7fe9f7376e1e6122169ce8b0a17b1 (patch)
tree5ec2d1a6ab55984bea5c5aa3007e9c87709c1115 /results
parent41ace4c1d99a7a8436e42710135d44b925920850 (diff)
DISAMBIGUATION: vanilla DFA early-epoch checkpoints + cos measurement
Round 19's #3 critical experiment. Trained vanilla DFA s42 for 5 epochs, saved checkpoint at each, then measured per-layer cos(e_T B^T, BP grad). Key trajectory of ||g_l|| during vanilla DFA training: ep 0: ~1e-3 (random init, healthy) ep 1: ~1.4e-6 (3 OOM drop, STILL above 1e-7 floor) ep 2: ~3e-7 (above floor) ep 3: ~1.3e-7 (above floor, barely) ep 4: ~7e-8 (BELOW floor) ep 5: ~4e-8 (well below floor) So ep 1, 2, 3 vanilla checkpoints are in the MEANINGFUL ||g|| regime. Cos measurement on those: ep 1: l0=+0.42, l1=+0.005, l2=-0.028, l3=-0.039, l4=-0.038 ep 2: l0=+0.44, l1=-0.002, l2=-0.040, l3=-0.055, l4=-0.054 ep 3: l0=+0.43, l1=+0.007, l2=-0.039, l3=-0.054, l4=-0.054 DEEP-LAYER COSINES ARE ESSENTIALLY ZERO AT EVERY VANILLA EPOCH, even when ||g|| is in the meaningful regime (ep 1: ||g||=6.7e-7). Compare to penalized DFA s42 at 30 ep: deep cos = +0.17. Hypothesis B confirmed: the penalty CREATED the deep-layer alignment. It is a training outcome of the regularization, not a measurement-regime revelation. Paper implications: there are two distinct failure modes after all, but they are not 'scale + direction'. They are: (1) Measurement degeneracy via terminal LN gradient cancellation (caught by diagnostic (b)) (2) Low intrinsic credit quality of random feedback even in the meaningful regime (caught by direct cos measurement) The penalty partially alleviates BOTH (residual stream contained AND deep alignment improved from ~0 to +0.17), but neither fully.
Diffstat (limited to 'results')
-rw-r--r--results/vanilla_dfa_early_ckpts/vanilla_dfa_s42_log.json120
-rw-r--r--results/vanilla_dfa_early_run.log13
2 files changed, 133 insertions, 0 deletions
diff --git a/results/vanilla_dfa_early_ckpts/vanilla_dfa_s42_log.json b/results/vanilla_dfa_early_ckpts/vanilla_dfa_s42_log.json
new file mode 100644
index 0000000..bb211d4
--- /dev/null
+++ b/results/vanilla_dfa_early_ckpts/vanilla_dfa_s42_log.json
@@ -0,0 +1,120 @@
+{
+ "config": {
+ "seed": 42,
+ "max_epoch": 5,
+ "lr": 0.001,
+ "wd": 0.01,
+ "save_at": [
+ 1,
+ 2,
+ 3,
+ 4,
+ 5
+ ],
+ "output_dir": "results/vanilla_dfa_early_ckpts"
+ },
+ "log": [
+ {
+ "epoch": 0,
+ "h_norms": [
+ 8.718070030212402,
+ 8.780964851379395,
+ 8.812002182006836,
+ 8.828720092773438,
+ 8.893179893493652
+ ],
+ "g_norms": [
+ 0.0010068593546748161,
+ 0.000997236929833889,
+ 0.0009934091940522194,
+ 0.0009870644425973296,
+ 0.0009829498594626784
+ ]
+ },
+ {
+ "epoch": 1,
+ "h_norms": [
+ 802.1658935546875,
+ 3058.3173828125,
+ 4910.8408203125,
+ 6491.31494140625,
+ 7304.1533203125
+ ],
+ "g_norms": [
+ 2.526000798752648e-06,
+ 1.3625027577290894e-06,
+ 1.3747999219049234e-06,
+ 1.378283172925876e-06,
+ 1.3869492931917193e-06
+ ]
+ },
+ {
+ "epoch": 2,
+ "h_norms": [
+ 1580.3233642578125,
+ 13323.4609375,
+ 22799.7890625,
+ 32449.083984375,
+ 37769.375
+ ],
+ "g_norms": [
+ 1.142504174822534e-06,
+ 3.0893900770934124e-07,
+ 3.109949489044084e-07,
+ 3.1369876296594157e-07,
+ 3.1714918691250205e-07
+ ]
+ },
+ {
+ "epoch": 3,
+ "h_norms": [
+ 2344.037841796875,
+ 32468.109375,
+ 58174.1328125,
+ 87268.390625,
+ 104689.5234375
+ ],
+ "g_norms": [
+ 8.221624625548429e-07,
+ 1.3046170010966307e-07,
+ 1.2744629884764436e-07,
+ 1.2940536464611796e-07,
+ 1.2926365400289797e-07
+ ]
+ },
+ {
+ "epoch": 4,
+ "h_norms": [
+ 3132.316162109375,
+ 62912.65234375,
+ 117831.9375,
+ 183012.40625,
+ 223974.890625
+ ],
+ "g_norms": [
+ 6.481430432359048e-07,
+ 7.028656057173066e-08,
+ 6.718681078154987e-08,
+ 6.803762175877637e-08,
+ 6.805611008076085e-08
+ ]
+ },
+ {
+ "epoch": 5,
+ "h_norms": [
+ 3947.822998046875,
+ 109389.84375,
+ 203877.265625,
+ 330289.25,
+ 405665.21875
+ ],
+ "g_norms": [
+ 5.139339123161335e-07,
+ 4.3603474608744364e-08,
+ 4.2005297018476995e-08,
+ 4.2576637326874334e-08,
+ 4.249179852422458e-08
+ ]
+ }
+ ]
+} \ No newline at end of file
diff --git a/results/vanilla_dfa_early_run.log b/results/vanilla_dfa_early_run.log
new file mode 100644
index 0000000..660eca5
--- /dev/null
+++ b/results/vanilla_dfa_early_run.log
@@ -0,0 +1,13 @@
+Vanilla DFA early-epoch checkpoint sweep: seed=42, max_epoch=5
+ ep 0: h_norms=['8.72e+00', '8.78e+00', '8.81e+00', '8.83e+00', '8.89e+00'], g_norms=['1.01e-03', '9.97e-04', '9.93e-04', '9.87e-04', '9.83e-04']
+ ep 1: h_norms=['8.02e+02', '3.06e+03', '4.91e+03', '6.49e+03', '7.30e+03'], g_norms=['2.53e-06', '1.36e-06', '1.37e-06', '1.38e-06', '1.39e-06']
+ saved results/vanilla_dfa_early_ckpts/vanilla_dfa_s42_ep1.pt
+ ep 2: h_norms=['1.58e+03', '1.33e+04', '2.28e+04', '3.24e+04', '3.78e+04'], g_norms=['1.14e-06', '3.09e-07', '3.11e-07', '3.14e-07', '3.17e-07']
+ saved results/vanilla_dfa_early_ckpts/vanilla_dfa_s42_ep2.pt
+ ep 3: h_norms=['2.34e+03', '3.25e+04', '5.82e+04', '8.73e+04', '1.05e+05'], g_norms=['8.22e-07', '1.30e-07', '1.27e-07', '1.29e-07', '1.29e-07']
+ saved results/vanilla_dfa_early_ckpts/vanilla_dfa_s42_ep3.pt
+ ep 4: h_norms=['3.13e+03', '6.29e+04', '1.18e+05', '1.83e+05', '2.24e+05'], g_norms=['6.48e-07', '7.03e-08', '6.72e-08', '6.80e-08', '6.81e-08']
+ saved results/vanilla_dfa_early_ckpts/vanilla_dfa_s42_ep4.pt
+ ep 5: h_norms=['3.95e+03', '1.09e+05', '2.04e+05', '3.30e+05', '4.06e+05'], g_norms=['5.14e-07', '4.36e-08', '4.20e-08', '4.26e-08', '4.25e-08']
+ saved results/vanilla_dfa_early_ckpts/vanilla_dfa_s42_ep5.pt
+Saved results/vanilla_dfa_early_ckpts/vanilla_dfa_s42_log.json