diff options
| author | YurenHao0426 <Blackhao0426@gmail.com> | 2026-04-02 14:53:44 -0500 |
|---|---|---|
| committer | YurenHao0426 <Blackhao0426@gmail.com> | 2026-04-02 14:53:44 -0500 |
| commit | 6e280e59d492203ea7f7765a65949a6c256bf73a (patch) | |
| tree | cde981720f6f977757de2dbeb570893ef7b8beb5 /results | |
| parent | c8407db32d3a159613ce5f0a567843ed4c970a27 (diff) | |
Fix and recompute GELU ablation Gamma from checkpoints
ReLU MLP (L=4 d=256):
BP: acc=61.1%, Gamma=1.000, rho=0.998
DFA: acc=30.7%, Gamma=0.104, rho=-0.001
SB: acc=15.5%, Gamma=0.300, rho=0.159
CB: acc=28.7%, Gamma=0.298, rho=0.007
Note: SB/CB Gamma uses BP gradient as proxy (feedback nets not checkpointed).
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Diffstat (limited to 'results')
21 files changed, 56 insertions, 76 deletions
diff --git a/results/gelu_ablation/gelu_ablation_summary.csv b/results/gelu_ablation/gelu_ablation_summary.csv index eb0f4e2..56660f7 100644 --- a/results/gelu_ablation/gelu_ablation_summary.csv +++ b/results/gelu_ablation/gelu_ablation_summary.csv @@ -1,21 +1,21 @@ -activation,method,seed,acc,StateErr,Gamma,rho,naive_StateErr
-relu,bp,1024,0.6097,,1.0,0.9974461048841476,0.4324362277984619
-relu,bp,123,0.613,,1.0,0.997876763343811,0.5002583861351013
-relu,bp,42,0.6129,,1.0,0.9975161552429199,0.6057583093643188
-relu,bp,456,0.608,,1.0,0.9971267879009247,0.3411523401737213
-relu,bp,789,0.6095,,1.0,0.9979429095983505,0.27360349893569946
-relu,credit_bridge,1024,0.2969,,1.0,0.008778431452810764,1.527748942375183
-relu,credit_bridge,123,0.2644,,1.0,0.004790207836776972,0.1871446669101715
-relu,credit_bridge,42,0.3021,,1.0,-0.008028814569115639,0.27361318469047546
-relu,credit_bridge,456,0.2842,,1.0,-0.0007395216962322593,1.9933160543441772
-relu,credit_bridge,789,0.2898,,1.0,0.01000211015343666,0.6378490328788757
-relu,dfa,1024,0.312,,1.0,0.005766347981989384,1.1434805393218994
-relu,dfa,123,0.3072,,1.0,-0.00010712328366935253,53.51875686645508
-relu,dfa,42,0.3113,,1.0,0.010042589157819748,1.0625700950622559
-relu,dfa,456,0.2991,,1.0,-0.00946012232452631,0.08919885009527206
-relu,dfa,789,0.3065,,1.0,0.012797923758625984,1.1228328943252563
-relu,state_bridge,1024,0.1478,98715213479.28064,1.0,-0.0005375983892008662,0.06271713972091675
-relu,state_bridge,123,0.1868,537733263750.59454,1.0,0.13547618687152863,16.496652603149414
-relu,state_bridge,42,0.0797,94040208847.33952,1.0,0.22764702141284943,1.150978446006775
-relu,state_bridge,456,0.1848,18242855682493.645,1.0,0.27895813807845116,0.9801409244537354
-relu,state_bridge,789,0.1761,9722809715963.986,1.0,0.1464090496301651,1.3677430152893066
+activation,method,seed,acc,Gamma,rho,naive_StateErr
+relu,bp,1024,0.6097,1.0,0.997594803571701,0.4324362277984619
+relu,bp,123,0.613,1.0,0.997801199555397,0.5002583861351013
+relu,bp,42,0.6129,1.0,0.9974894374608994,0.6057583093643188
+relu,bp,456,0.608,1.0,0.9971255511045456,0.3411523401737213
+relu,bp,789,0.6095,1.0,0.9976071268320084,0.27360349893569946
+relu,credit_bridge,1024,0.2969,0.31933997943997383,0.013717508874833584,1.527748942375183
+relu,credit_bridge,123,0.2644,0.2815406396985054,0.008798256516456604,0.1871446669101715
+relu,credit_bridge,42,0.3021,0.30931171402335167,-0.0014418410137295723,0.27361318469047546
+relu,credit_bridge,456,0.2842,0.28274909779429436,0.007155103143304586,1.9933160543441772
+relu,credit_bridge,789,0.2898,0.29866537638008595,0.0052663288079202175,0.6378490328788757
+relu,dfa,1024,0.312,0.1079320443677716,-0.003417168278247118,1.1434805393218994
+relu,dfa,123,0.3072,0.11281919915927574,-0.01107116136699915,53.51875686645508
+relu,dfa,42,0.3113,0.10213438142091036,-0.002754530869424343,1.0625700950622559
+relu,dfa,456,0.2991,0.10119568518712185,0.004686151631176472,0.08919885009527206
+relu,dfa,789,0.3065,0.09626383980503306,0.007170299533754587,1.1228328943252563
+relu,state_bridge,1024,0.1478,0.24986908948631026,0.00920996442437172,0.06271713972091675
+relu,state_bridge,123,0.1868,0.29979344457387924,0.13403284549713135,16.496652603149414
+relu,state_bridge,42,0.0797,0.2942853837739676,0.23421041667461395,1.150978446006775
+relu,state_bridge,456,0.1848,0.3133043081033975,0.27685873582959175,0.9801409244537354
+relu,state_bridge,789,0.1761,0.34246295172488317,0.13864134749746881,1.3677430152893066
diff --git a/results/gelu_ablation/relu_bp_s1024.json b/results/gelu_ablation/relu_bp_s1024.json index aa8d306..e2fb511 100644 --- a/results/gelu_ablation/relu_bp_s1024.json +++ b/results/gelu_ablation/relu_bp_s1024.json @@ -3,8 +3,7 @@ "method": "bp", "seed": 1024, "acc": 0.6097, - "StateErr": null, "Gamma": 1.0, - "rho": 0.9974461048841476, + "rho": 0.997594803571701, "naive_StateErr": 0.4324362277984619 }
\ No newline at end of file diff --git a/results/gelu_ablation/relu_bp_s123.json b/results/gelu_ablation/relu_bp_s123.json index e3ffbd2..3c65d20 100644 --- a/results/gelu_ablation/relu_bp_s123.json +++ b/results/gelu_ablation/relu_bp_s123.json @@ -3,8 +3,7 @@ "method": "bp", "seed": 123, "acc": 0.613, - "StateErr": null, "Gamma": 1.0, - "rho": 0.997876763343811, + "rho": 0.997801199555397, "naive_StateErr": 0.5002583861351013 }
\ No newline at end of file diff --git a/results/gelu_ablation/relu_bp_s42.json b/results/gelu_ablation/relu_bp_s42.json index d35d1b6..4c3550c 100644 --- a/results/gelu_ablation/relu_bp_s42.json +++ b/results/gelu_ablation/relu_bp_s42.json @@ -3,8 +3,7 @@ "method": "bp", "seed": 42, "acc": 0.6129, - "StateErr": null, "Gamma": 1.0, - "rho": 0.9975161552429199, + "rho": 0.9974894374608994, "naive_StateErr": 0.6057583093643188 }
\ No newline at end of file diff --git a/results/gelu_ablation/relu_bp_s456.json b/results/gelu_ablation/relu_bp_s456.json index 0ddf182..d5729a9 100644 --- a/results/gelu_ablation/relu_bp_s456.json +++ b/results/gelu_ablation/relu_bp_s456.json @@ -3,8 +3,7 @@ "method": "bp", "seed": 456, "acc": 0.608, - "StateErr": null, "Gamma": 1.0, - "rho": 0.9971267879009247, + "rho": 0.9971255511045456, "naive_StateErr": 0.3411523401737213 }
\ No newline at end of file diff --git a/results/gelu_ablation/relu_bp_s789.json b/results/gelu_ablation/relu_bp_s789.json index f845b43..4fb4d48 100644 --- a/results/gelu_ablation/relu_bp_s789.json +++ b/results/gelu_ablation/relu_bp_s789.json @@ -3,8 +3,7 @@ "method": "bp", "seed": 789, "acc": 0.6095, - "StateErr": null, "Gamma": 1.0, - "rho": 0.9979429095983505, + "rho": 0.9976071268320084, "naive_StateErr": 0.27360349893569946 }
\ No newline at end of file diff --git a/results/gelu_ablation/relu_credit_bridge_s1024.json b/results/gelu_ablation/relu_credit_bridge_s1024.json index fd55fa1..b9d8efc 100644 --- a/results/gelu_ablation/relu_credit_bridge_s1024.json +++ b/results/gelu_ablation/relu_credit_bridge_s1024.json @@ -3,8 +3,7 @@ "method": "credit_bridge", "seed": 1024, "acc": 0.2969, - "StateErr": null, - "Gamma": 1.0, - "rho": 0.008778431452810764, + "Gamma": 0.31933997943997383, + "rho": 0.013717508874833584, "naive_StateErr": 1.527748942375183 }
\ No newline at end of file diff --git a/results/gelu_ablation/relu_credit_bridge_s123.json b/results/gelu_ablation/relu_credit_bridge_s123.json index 8d311e8..6338658 100644 --- a/results/gelu_ablation/relu_credit_bridge_s123.json +++ b/results/gelu_ablation/relu_credit_bridge_s123.json @@ -3,8 +3,7 @@ "method": "credit_bridge", "seed": 123, "acc": 0.2644, - "StateErr": null, - "Gamma": 1.0, - "rho": 0.004790207836776972, + "Gamma": 0.2815406396985054, + "rho": 0.008798256516456604, "naive_StateErr": 0.1871446669101715 }
\ No newline at end of file diff --git a/results/gelu_ablation/relu_credit_bridge_s42.json b/results/gelu_ablation/relu_credit_bridge_s42.json index 38fc810..5d22271 100644 --- a/results/gelu_ablation/relu_credit_bridge_s42.json +++ b/results/gelu_ablation/relu_credit_bridge_s42.json @@ -3,8 +3,7 @@ "method": "credit_bridge", "seed": 42, "acc": 0.3021, - "StateErr": null, - "Gamma": 1.0, - "rho": -0.008028814569115639, + "Gamma": 0.30931171402335167, + "rho": -0.0014418410137295723, "naive_StateErr": 0.27361318469047546 }
\ No newline at end of file diff --git a/results/gelu_ablation/relu_credit_bridge_s456.json b/results/gelu_ablation/relu_credit_bridge_s456.json index a6934ba..94864e4 100644 --- a/results/gelu_ablation/relu_credit_bridge_s456.json +++ b/results/gelu_ablation/relu_credit_bridge_s456.json @@ -3,8 +3,7 @@ "method": "credit_bridge", "seed": 456, "acc": 0.2842, - "StateErr": null, - "Gamma": 1.0, - "rho": -0.0007395216962322593, + "Gamma": 0.28274909779429436, + "rho": 0.007155103143304586, "naive_StateErr": 1.9933160543441772 }
\ No newline at end of file diff --git a/results/gelu_ablation/relu_credit_bridge_s789.json b/results/gelu_ablation/relu_credit_bridge_s789.json index 4f3ff0d..8eca544 100644 --- a/results/gelu_ablation/relu_credit_bridge_s789.json +++ b/results/gelu_ablation/relu_credit_bridge_s789.json @@ -3,8 +3,7 @@ "method": "credit_bridge", "seed": 789, "acc": 0.2898, - "StateErr": null, - "Gamma": 1.0, - "rho": 0.01000211015343666, + "Gamma": 0.29866537638008595, + "rho": 0.0052663288079202175, "naive_StateErr": 0.6378490328788757 }
\ No newline at end of file diff --git a/results/gelu_ablation/relu_dfa_s1024.json b/results/gelu_ablation/relu_dfa_s1024.json index 12efe95..dadb4dd 100644 --- a/results/gelu_ablation/relu_dfa_s1024.json +++ b/results/gelu_ablation/relu_dfa_s1024.json @@ -3,8 +3,7 @@ "method": "dfa", "seed": 1024, "acc": 0.312, - "StateErr": null, - "Gamma": 1.0, - "rho": 0.005766347981989384, + "Gamma": 0.1079320443677716, + "rho": -0.003417168278247118, "naive_StateErr": 1.1434805393218994 }
\ No newline at end of file diff --git a/results/gelu_ablation/relu_dfa_s123.json b/results/gelu_ablation/relu_dfa_s123.json index 717abcc..c970aee 100644 --- a/results/gelu_ablation/relu_dfa_s123.json +++ b/results/gelu_ablation/relu_dfa_s123.json @@ -3,8 +3,7 @@ "method": "dfa", "seed": 123, "acc": 0.3072, - "StateErr": null, - "Gamma": 1.0, - "rho": -0.00010712328366935253, + "Gamma": 0.11281919915927574, + "rho": -0.01107116136699915, "naive_StateErr": 53.51875686645508 }
\ No newline at end of file diff --git a/results/gelu_ablation/relu_dfa_s42.json b/results/gelu_ablation/relu_dfa_s42.json index ae9b9da..5e494ef 100644 --- a/results/gelu_ablation/relu_dfa_s42.json +++ b/results/gelu_ablation/relu_dfa_s42.json @@ -3,8 +3,7 @@ "method": "dfa", "seed": 42, "acc": 0.3113, - "StateErr": null, - "Gamma": 1.0, - "rho": 0.010042589157819748, + "Gamma": 0.10213438142091036, + "rho": -0.002754530869424343, "naive_StateErr": 1.0625700950622559 }
\ No newline at end of file diff --git a/results/gelu_ablation/relu_dfa_s456.json b/results/gelu_ablation/relu_dfa_s456.json index 85404d8..bf9c40e 100644 --- a/results/gelu_ablation/relu_dfa_s456.json +++ b/results/gelu_ablation/relu_dfa_s456.json @@ -3,8 +3,7 @@ "method": "dfa", "seed": 456, "acc": 0.2991, - "StateErr": null, - "Gamma": 1.0, - "rho": -0.00946012232452631, + "Gamma": 0.10119568518712185, + "rho": 0.004686151631176472, "naive_StateErr": 0.08919885009527206 }
\ No newline at end of file diff --git a/results/gelu_ablation/relu_dfa_s789.json b/results/gelu_ablation/relu_dfa_s789.json index bc16a09..9469fc0 100644 --- a/results/gelu_ablation/relu_dfa_s789.json +++ b/results/gelu_ablation/relu_dfa_s789.json @@ -3,8 +3,7 @@ "method": "dfa", "seed": 789, "acc": 0.3065, - "StateErr": null, - "Gamma": 1.0, - "rho": 0.012797923758625984, + "Gamma": 0.09626383980503306, + "rho": 0.007170299533754587, "naive_StateErr": 1.1228328943252563 }
\ No newline at end of file diff --git a/results/gelu_ablation/relu_state_bridge_s1024.json b/results/gelu_ablation/relu_state_bridge_s1024.json index a70242b..fc978fc 100644 --- a/results/gelu_ablation/relu_state_bridge_s1024.json +++ b/results/gelu_ablation/relu_state_bridge_s1024.json @@ -3,8 +3,7 @@ "method": "state_bridge", "seed": 1024, "acc": 0.1478, - "StateErr": 98715213479.28064, - "Gamma": 1.0, - "rho": -0.0005375983892008662, + "Gamma": 0.24986908948631026, + "rho": 0.00920996442437172, "naive_StateErr": 0.06271713972091675 }
\ No newline at end of file diff --git a/results/gelu_ablation/relu_state_bridge_s123.json b/results/gelu_ablation/relu_state_bridge_s123.json index f56548e..25f3798 100644 --- a/results/gelu_ablation/relu_state_bridge_s123.json +++ b/results/gelu_ablation/relu_state_bridge_s123.json @@ -3,8 +3,7 @@ "method": "state_bridge", "seed": 123, "acc": 0.1868, - "StateErr": 537733263750.59454, - "Gamma": 1.0, - "rho": 0.13547618687152863, + "Gamma": 0.29979344457387924, + "rho": 0.13403284549713135, "naive_StateErr": 16.496652603149414 }
\ No newline at end of file diff --git a/results/gelu_ablation/relu_state_bridge_s42.json b/results/gelu_ablation/relu_state_bridge_s42.json index e3c1036..fa18d59 100644 --- a/results/gelu_ablation/relu_state_bridge_s42.json +++ b/results/gelu_ablation/relu_state_bridge_s42.json @@ -3,8 +3,7 @@ "method": "state_bridge", "seed": 42, "acc": 0.0797, - "StateErr": 94040208847.33952, - "Gamma": 1.0, - "rho": 0.22764702141284943, + "Gamma": 0.2942853837739676, + "rho": 0.23421041667461395, "naive_StateErr": 1.150978446006775 }
\ No newline at end of file diff --git a/results/gelu_ablation/relu_state_bridge_s456.json b/results/gelu_ablation/relu_state_bridge_s456.json index 0f50fc8..d20f3d1 100644 --- a/results/gelu_ablation/relu_state_bridge_s456.json +++ b/results/gelu_ablation/relu_state_bridge_s456.json @@ -3,8 +3,7 @@ "method": "state_bridge", "seed": 456, "acc": 0.1848, - "StateErr": 18242855682493.645, - "Gamma": 1.0, - "rho": 0.27895813807845116, + "Gamma": 0.3133043081033975, + "rho": 0.27685873582959175, "naive_StateErr": 0.9801409244537354 }
\ No newline at end of file diff --git a/results/gelu_ablation/relu_state_bridge_s789.json b/results/gelu_ablation/relu_state_bridge_s789.json index 427e444..086e28b 100644 --- a/results/gelu_ablation/relu_state_bridge_s789.json +++ b/results/gelu_ablation/relu_state_bridge_s789.json @@ -3,8 +3,7 @@ "method": "state_bridge", "seed": 789, "acc": 0.1761, - "StateErr": 9722809715963.986, - "Gamma": 1.0, - "rho": 0.1464090496301651, + "Gamma": 0.34246295172488317, + "rho": 0.13864134749746881, "naive_StateErr": 1.3677430152893066 }
\ No newline at end of file |
