summaryrefslogtreecommitdiff
path: root/results
diff options
context:
space:
mode:
authorYurenHao0426 <Blackhao0426@gmail.com>2026-04-02 14:53:44 -0500
committerYurenHao0426 <Blackhao0426@gmail.com>2026-04-02 14:53:44 -0500
commit6e280e59d492203ea7f7765a65949a6c256bf73a (patch)
treecde981720f6f977757de2dbeb570893ef7b8beb5 /results
parentc8407db32d3a159613ce5f0a567843ed4c970a27 (diff)
Fix and recompute GELU ablation Gamma from checkpoints
ReLU MLP (L=4 d=256): BP: acc=61.1%, Gamma=1.000, rho=0.998 DFA: acc=30.7%, Gamma=0.104, rho=-0.001 SB: acc=15.5%, Gamma=0.300, rho=0.159 CB: acc=28.7%, Gamma=0.298, rho=0.007 Note: SB/CB Gamma uses BP gradient as proxy (feedback nets not checkpointed). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Diffstat (limited to 'results')
-rw-r--r--results/gelu_ablation/gelu_ablation_summary.csv42
-rw-r--r--results/gelu_ablation/relu_bp_s1024.json3
-rw-r--r--results/gelu_ablation/relu_bp_s123.json3
-rw-r--r--results/gelu_ablation/relu_bp_s42.json3
-rw-r--r--results/gelu_ablation/relu_bp_s456.json3
-rw-r--r--results/gelu_ablation/relu_bp_s789.json3
-rw-r--r--results/gelu_ablation/relu_credit_bridge_s1024.json5
-rw-r--r--results/gelu_ablation/relu_credit_bridge_s123.json5
-rw-r--r--results/gelu_ablation/relu_credit_bridge_s42.json5
-rw-r--r--results/gelu_ablation/relu_credit_bridge_s456.json5
-rw-r--r--results/gelu_ablation/relu_credit_bridge_s789.json5
-rw-r--r--results/gelu_ablation/relu_dfa_s1024.json5
-rw-r--r--results/gelu_ablation/relu_dfa_s123.json5
-rw-r--r--results/gelu_ablation/relu_dfa_s42.json5
-rw-r--r--results/gelu_ablation/relu_dfa_s456.json5
-rw-r--r--results/gelu_ablation/relu_dfa_s789.json5
-rw-r--r--results/gelu_ablation/relu_state_bridge_s1024.json5
-rw-r--r--results/gelu_ablation/relu_state_bridge_s123.json5
-rw-r--r--results/gelu_ablation/relu_state_bridge_s42.json5
-rw-r--r--results/gelu_ablation/relu_state_bridge_s456.json5
-rw-r--r--results/gelu_ablation/relu_state_bridge_s789.json5
21 files changed, 56 insertions, 76 deletions
diff --git a/results/gelu_ablation/gelu_ablation_summary.csv b/results/gelu_ablation/gelu_ablation_summary.csv
index eb0f4e2..56660f7 100644
--- a/results/gelu_ablation/gelu_ablation_summary.csv
+++ b/results/gelu_ablation/gelu_ablation_summary.csv
@@ -1,21 +1,21 @@
-activation,method,seed,acc,StateErr,Gamma,rho,naive_StateErr
-relu,bp,1024,0.6097,,1.0,0.9974461048841476,0.4324362277984619
-relu,bp,123,0.613,,1.0,0.997876763343811,0.5002583861351013
-relu,bp,42,0.6129,,1.0,0.9975161552429199,0.6057583093643188
-relu,bp,456,0.608,,1.0,0.9971267879009247,0.3411523401737213
-relu,bp,789,0.6095,,1.0,0.9979429095983505,0.27360349893569946
-relu,credit_bridge,1024,0.2969,,1.0,0.008778431452810764,1.527748942375183
-relu,credit_bridge,123,0.2644,,1.0,0.004790207836776972,0.1871446669101715
-relu,credit_bridge,42,0.3021,,1.0,-0.008028814569115639,0.27361318469047546
-relu,credit_bridge,456,0.2842,,1.0,-0.0007395216962322593,1.9933160543441772
-relu,credit_bridge,789,0.2898,,1.0,0.01000211015343666,0.6378490328788757
-relu,dfa,1024,0.312,,1.0,0.005766347981989384,1.1434805393218994
-relu,dfa,123,0.3072,,1.0,-0.00010712328366935253,53.51875686645508
-relu,dfa,42,0.3113,,1.0,0.010042589157819748,1.0625700950622559
-relu,dfa,456,0.2991,,1.0,-0.00946012232452631,0.08919885009527206
-relu,dfa,789,0.3065,,1.0,0.012797923758625984,1.1228328943252563
-relu,state_bridge,1024,0.1478,98715213479.28064,1.0,-0.0005375983892008662,0.06271713972091675
-relu,state_bridge,123,0.1868,537733263750.59454,1.0,0.13547618687152863,16.496652603149414
-relu,state_bridge,42,0.0797,94040208847.33952,1.0,0.22764702141284943,1.150978446006775
-relu,state_bridge,456,0.1848,18242855682493.645,1.0,0.27895813807845116,0.9801409244537354
-relu,state_bridge,789,0.1761,9722809715963.986,1.0,0.1464090496301651,1.3677430152893066
+activation,method,seed,acc,Gamma,rho,naive_StateErr
+relu,bp,1024,0.6097,1.0,0.997594803571701,0.4324362277984619
+relu,bp,123,0.613,1.0,0.997801199555397,0.5002583861351013
+relu,bp,42,0.6129,1.0,0.9974894374608994,0.6057583093643188
+relu,bp,456,0.608,1.0,0.9971255511045456,0.3411523401737213
+relu,bp,789,0.6095,1.0,0.9976071268320084,0.27360349893569946
+relu,credit_bridge,1024,0.2969,0.31933997943997383,0.013717508874833584,1.527748942375183
+relu,credit_bridge,123,0.2644,0.2815406396985054,0.008798256516456604,0.1871446669101715
+relu,credit_bridge,42,0.3021,0.30931171402335167,-0.0014418410137295723,0.27361318469047546
+relu,credit_bridge,456,0.2842,0.28274909779429436,0.007155103143304586,1.9933160543441772
+relu,credit_bridge,789,0.2898,0.29866537638008595,0.0052663288079202175,0.6378490328788757
+relu,dfa,1024,0.312,0.1079320443677716,-0.003417168278247118,1.1434805393218994
+relu,dfa,123,0.3072,0.11281919915927574,-0.01107116136699915,53.51875686645508
+relu,dfa,42,0.3113,0.10213438142091036,-0.002754530869424343,1.0625700950622559
+relu,dfa,456,0.2991,0.10119568518712185,0.004686151631176472,0.08919885009527206
+relu,dfa,789,0.3065,0.09626383980503306,0.007170299533754587,1.1228328943252563
+relu,state_bridge,1024,0.1478,0.24986908948631026,0.00920996442437172,0.06271713972091675
+relu,state_bridge,123,0.1868,0.29979344457387924,0.13403284549713135,16.496652603149414
+relu,state_bridge,42,0.0797,0.2942853837739676,0.23421041667461395,1.150978446006775
+relu,state_bridge,456,0.1848,0.3133043081033975,0.27685873582959175,0.9801409244537354
+relu,state_bridge,789,0.1761,0.34246295172488317,0.13864134749746881,1.3677430152893066
diff --git a/results/gelu_ablation/relu_bp_s1024.json b/results/gelu_ablation/relu_bp_s1024.json
index aa8d306..e2fb511 100644
--- a/results/gelu_ablation/relu_bp_s1024.json
+++ b/results/gelu_ablation/relu_bp_s1024.json
@@ -3,8 +3,7 @@
"method": "bp",
"seed": 1024,
"acc": 0.6097,
- "StateErr": null,
"Gamma": 1.0,
- "rho": 0.9974461048841476,
+ "rho": 0.997594803571701,
"naive_StateErr": 0.4324362277984619
} \ No newline at end of file
diff --git a/results/gelu_ablation/relu_bp_s123.json b/results/gelu_ablation/relu_bp_s123.json
index e3ffbd2..3c65d20 100644
--- a/results/gelu_ablation/relu_bp_s123.json
+++ b/results/gelu_ablation/relu_bp_s123.json
@@ -3,8 +3,7 @@
"method": "bp",
"seed": 123,
"acc": 0.613,
- "StateErr": null,
"Gamma": 1.0,
- "rho": 0.997876763343811,
+ "rho": 0.997801199555397,
"naive_StateErr": 0.5002583861351013
} \ No newline at end of file
diff --git a/results/gelu_ablation/relu_bp_s42.json b/results/gelu_ablation/relu_bp_s42.json
index d35d1b6..4c3550c 100644
--- a/results/gelu_ablation/relu_bp_s42.json
+++ b/results/gelu_ablation/relu_bp_s42.json
@@ -3,8 +3,7 @@
"method": "bp",
"seed": 42,
"acc": 0.6129,
- "StateErr": null,
"Gamma": 1.0,
- "rho": 0.9975161552429199,
+ "rho": 0.9974894374608994,
"naive_StateErr": 0.6057583093643188
} \ No newline at end of file
diff --git a/results/gelu_ablation/relu_bp_s456.json b/results/gelu_ablation/relu_bp_s456.json
index 0ddf182..d5729a9 100644
--- a/results/gelu_ablation/relu_bp_s456.json
+++ b/results/gelu_ablation/relu_bp_s456.json
@@ -3,8 +3,7 @@
"method": "bp",
"seed": 456,
"acc": 0.608,
- "StateErr": null,
"Gamma": 1.0,
- "rho": 0.9971267879009247,
+ "rho": 0.9971255511045456,
"naive_StateErr": 0.3411523401737213
} \ No newline at end of file
diff --git a/results/gelu_ablation/relu_bp_s789.json b/results/gelu_ablation/relu_bp_s789.json
index f845b43..4fb4d48 100644
--- a/results/gelu_ablation/relu_bp_s789.json
+++ b/results/gelu_ablation/relu_bp_s789.json
@@ -3,8 +3,7 @@
"method": "bp",
"seed": 789,
"acc": 0.6095,
- "StateErr": null,
"Gamma": 1.0,
- "rho": 0.9979429095983505,
+ "rho": 0.9976071268320084,
"naive_StateErr": 0.27360349893569946
} \ No newline at end of file
diff --git a/results/gelu_ablation/relu_credit_bridge_s1024.json b/results/gelu_ablation/relu_credit_bridge_s1024.json
index fd55fa1..b9d8efc 100644
--- a/results/gelu_ablation/relu_credit_bridge_s1024.json
+++ b/results/gelu_ablation/relu_credit_bridge_s1024.json
@@ -3,8 +3,7 @@
"method": "credit_bridge",
"seed": 1024,
"acc": 0.2969,
- "StateErr": null,
- "Gamma": 1.0,
- "rho": 0.008778431452810764,
+ "Gamma": 0.31933997943997383,
+ "rho": 0.013717508874833584,
"naive_StateErr": 1.527748942375183
} \ No newline at end of file
diff --git a/results/gelu_ablation/relu_credit_bridge_s123.json b/results/gelu_ablation/relu_credit_bridge_s123.json
index 8d311e8..6338658 100644
--- a/results/gelu_ablation/relu_credit_bridge_s123.json
+++ b/results/gelu_ablation/relu_credit_bridge_s123.json
@@ -3,8 +3,7 @@
"method": "credit_bridge",
"seed": 123,
"acc": 0.2644,
- "StateErr": null,
- "Gamma": 1.0,
- "rho": 0.004790207836776972,
+ "Gamma": 0.2815406396985054,
+ "rho": 0.008798256516456604,
"naive_StateErr": 0.1871446669101715
} \ No newline at end of file
diff --git a/results/gelu_ablation/relu_credit_bridge_s42.json b/results/gelu_ablation/relu_credit_bridge_s42.json
index 38fc810..5d22271 100644
--- a/results/gelu_ablation/relu_credit_bridge_s42.json
+++ b/results/gelu_ablation/relu_credit_bridge_s42.json
@@ -3,8 +3,7 @@
"method": "credit_bridge",
"seed": 42,
"acc": 0.3021,
- "StateErr": null,
- "Gamma": 1.0,
- "rho": -0.008028814569115639,
+ "Gamma": 0.30931171402335167,
+ "rho": -0.0014418410137295723,
"naive_StateErr": 0.27361318469047546
} \ No newline at end of file
diff --git a/results/gelu_ablation/relu_credit_bridge_s456.json b/results/gelu_ablation/relu_credit_bridge_s456.json
index a6934ba..94864e4 100644
--- a/results/gelu_ablation/relu_credit_bridge_s456.json
+++ b/results/gelu_ablation/relu_credit_bridge_s456.json
@@ -3,8 +3,7 @@
"method": "credit_bridge",
"seed": 456,
"acc": 0.2842,
- "StateErr": null,
- "Gamma": 1.0,
- "rho": -0.0007395216962322593,
+ "Gamma": 0.28274909779429436,
+ "rho": 0.007155103143304586,
"naive_StateErr": 1.9933160543441772
} \ No newline at end of file
diff --git a/results/gelu_ablation/relu_credit_bridge_s789.json b/results/gelu_ablation/relu_credit_bridge_s789.json
index 4f3ff0d..8eca544 100644
--- a/results/gelu_ablation/relu_credit_bridge_s789.json
+++ b/results/gelu_ablation/relu_credit_bridge_s789.json
@@ -3,8 +3,7 @@
"method": "credit_bridge",
"seed": 789,
"acc": 0.2898,
- "StateErr": null,
- "Gamma": 1.0,
- "rho": 0.01000211015343666,
+ "Gamma": 0.29866537638008595,
+ "rho": 0.0052663288079202175,
"naive_StateErr": 0.6378490328788757
} \ No newline at end of file
diff --git a/results/gelu_ablation/relu_dfa_s1024.json b/results/gelu_ablation/relu_dfa_s1024.json
index 12efe95..dadb4dd 100644
--- a/results/gelu_ablation/relu_dfa_s1024.json
+++ b/results/gelu_ablation/relu_dfa_s1024.json
@@ -3,8 +3,7 @@
"method": "dfa",
"seed": 1024,
"acc": 0.312,
- "StateErr": null,
- "Gamma": 1.0,
- "rho": 0.005766347981989384,
+ "Gamma": 0.1079320443677716,
+ "rho": -0.003417168278247118,
"naive_StateErr": 1.1434805393218994
} \ No newline at end of file
diff --git a/results/gelu_ablation/relu_dfa_s123.json b/results/gelu_ablation/relu_dfa_s123.json
index 717abcc..c970aee 100644
--- a/results/gelu_ablation/relu_dfa_s123.json
+++ b/results/gelu_ablation/relu_dfa_s123.json
@@ -3,8 +3,7 @@
"method": "dfa",
"seed": 123,
"acc": 0.3072,
- "StateErr": null,
- "Gamma": 1.0,
- "rho": -0.00010712328366935253,
+ "Gamma": 0.11281919915927574,
+ "rho": -0.01107116136699915,
"naive_StateErr": 53.51875686645508
} \ No newline at end of file
diff --git a/results/gelu_ablation/relu_dfa_s42.json b/results/gelu_ablation/relu_dfa_s42.json
index ae9b9da..5e494ef 100644
--- a/results/gelu_ablation/relu_dfa_s42.json
+++ b/results/gelu_ablation/relu_dfa_s42.json
@@ -3,8 +3,7 @@
"method": "dfa",
"seed": 42,
"acc": 0.3113,
- "StateErr": null,
- "Gamma": 1.0,
- "rho": 0.010042589157819748,
+ "Gamma": 0.10213438142091036,
+ "rho": -0.002754530869424343,
"naive_StateErr": 1.0625700950622559
} \ No newline at end of file
diff --git a/results/gelu_ablation/relu_dfa_s456.json b/results/gelu_ablation/relu_dfa_s456.json
index 85404d8..bf9c40e 100644
--- a/results/gelu_ablation/relu_dfa_s456.json
+++ b/results/gelu_ablation/relu_dfa_s456.json
@@ -3,8 +3,7 @@
"method": "dfa",
"seed": 456,
"acc": 0.2991,
- "StateErr": null,
- "Gamma": 1.0,
- "rho": -0.00946012232452631,
+ "Gamma": 0.10119568518712185,
+ "rho": 0.004686151631176472,
"naive_StateErr": 0.08919885009527206
} \ No newline at end of file
diff --git a/results/gelu_ablation/relu_dfa_s789.json b/results/gelu_ablation/relu_dfa_s789.json
index bc16a09..9469fc0 100644
--- a/results/gelu_ablation/relu_dfa_s789.json
+++ b/results/gelu_ablation/relu_dfa_s789.json
@@ -3,8 +3,7 @@
"method": "dfa",
"seed": 789,
"acc": 0.3065,
- "StateErr": null,
- "Gamma": 1.0,
- "rho": 0.012797923758625984,
+ "Gamma": 0.09626383980503306,
+ "rho": 0.007170299533754587,
"naive_StateErr": 1.1228328943252563
} \ No newline at end of file
diff --git a/results/gelu_ablation/relu_state_bridge_s1024.json b/results/gelu_ablation/relu_state_bridge_s1024.json
index a70242b..fc978fc 100644
--- a/results/gelu_ablation/relu_state_bridge_s1024.json
+++ b/results/gelu_ablation/relu_state_bridge_s1024.json
@@ -3,8 +3,7 @@
"method": "state_bridge",
"seed": 1024,
"acc": 0.1478,
- "StateErr": 98715213479.28064,
- "Gamma": 1.0,
- "rho": -0.0005375983892008662,
+ "Gamma": 0.24986908948631026,
+ "rho": 0.00920996442437172,
"naive_StateErr": 0.06271713972091675
} \ No newline at end of file
diff --git a/results/gelu_ablation/relu_state_bridge_s123.json b/results/gelu_ablation/relu_state_bridge_s123.json
index f56548e..25f3798 100644
--- a/results/gelu_ablation/relu_state_bridge_s123.json
+++ b/results/gelu_ablation/relu_state_bridge_s123.json
@@ -3,8 +3,7 @@
"method": "state_bridge",
"seed": 123,
"acc": 0.1868,
- "StateErr": 537733263750.59454,
- "Gamma": 1.0,
- "rho": 0.13547618687152863,
+ "Gamma": 0.29979344457387924,
+ "rho": 0.13403284549713135,
"naive_StateErr": 16.496652603149414
} \ No newline at end of file
diff --git a/results/gelu_ablation/relu_state_bridge_s42.json b/results/gelu_ablation/relu_state_bridge_s42.json
index e3c1036..fa18d59 100644
--- a/results/gelu_ablation/relu_state_bridge_s42.json
+++ b/results/gelu_ablation/relu_state_bridge_s42.json
@@ -3,8 +3,7 @@
"method": "state_bridge",
"seed": 42,
"acc": 0.0797,
- "StateErr": 94040208847.33952,
- "Gamma": 1.0,
- "rho": 0.22764702141284943,
+ "Gamma": 0.2942853837739676,
+ "rho": 0.23421041667461395,
"naive_StateErr": 1.150978446006775
} \ No newline at end of file
diff --git a/results/gelu_ablation/relu_state_bridge_s456.json b/results/gelu_ablation/relu_state_bridge_s456.json
index 0f50fc8..d20f3d1 100644
--- a/results/gelu_ablation/relu_state_bridge_s456.json
+++ b/results/gelu_ablation/relu_state_bridge_s456.json
@@ -3,8 +3,7 @@
"method": "state_bridge",
"seed": 456,
"acc": 0.1848,
- "StateErr": 18242855682493.645,
- "Gamma": 1.0,
- "rho": 0.27895813807845116,
+ "Gamma": 0.3133043081033975,
+ "rho": 0.27685873582959175,
"naive_StateErr": 0.9801409244537354
} \ No newline at end of file
diff --git a/results/gelu_ablation/relu_state_bridge_s789.json b/results/gelu_ablation/relu_state_bridge_s789.json
index 427e444..086e28b 100644
--- a/results/gelu_ablation/relu_state_bridge_s789.json
+++ b/results/gelu_ablation/relu_state_bridge_s789.json
@@ -3,8 +3,7 @@
"method": "state_bridge",
"seed": 789,
"acc": 0.1761,
- "StateErr": 9722809715963.986,
- "Gamma": 1.0,
- "rho": 0.1464090496301651,
+ "Gamma": 0.34246295172488317,
+ "rho": 0.13864134749746881,
"naive_StateErr": 1.3677430152893066
} \ No newline at end of file