summaryrefslogtreecommitdiff
path: root/results
diff options
context:
space:
mode:
authorYurenHao0426 <Blackhao0426@gmail.com>2026-04-07 23:26:32 -0500
committerYurenHao0426 <Blackhao0426@gmail.com>2026-04-07 23:26:32 -0500
commit665c9bb4ab3a5126c6fc191eecf42be7b703eb0c (patch)
tree9b1863521c05573925de77eff57eb2a4d2dee9ff /results
parent8f67bdeebac543961871b9896a62cd07b7a5be26 (diff)
Add d=512 ResMLP audit table (3 seeds): cross-width validation
Same protocol applied to the 4-block d=512 ResMLP variant (vs the d=256 default). 4 methods × 3 seeds = 12 conditions: BP @ d=512: trustworthy on all 3 seeds (acc 0.60-0.61) DFA @ d=512: walked back on all 3 seeds via (a)+(b) State Bridge @ d=512: walked back on all 3 seeds via (a)+(b), with drift sub-mode on s123 (stability 0.879) Credit Bridge @ d=512: walked back on all 3 seeds via (a)+(b) Width effect: max-per-block growth is HIGHER at d=512 (6e3-7e4) than at d=256 (~1e3). Larger width amplifies the explosion. The protocol verdicts are robust to this — same binary outcome, more extreme quantitative numbers. This is the cross-width validation: the protocol's findings are not d=256-specific. The §3 audit results generalize across the width dimension.
Diffstat (limited to 'results')
-rw-r--r--results/protocol_audit/audit_d512_3seed.json122
1 files changed, 122 insertions, 0 deletions
diff --git a/results/protocol_audit/audit_d512_3seed.json b/results/protocol_audit/audit_d512_3seed.json
new file mode 100644
index 0000000..4b5f6fb
--- /dev/null
+++ b/results/protocol_audit/audit_d512_3seed.json
@@ -0,0 +1,122 @@
+[
+ {
+ "method": "bp",
+ "seed": 42,
+ "acc": 0.6019,
+ "h_L": 389.4543151855469,
+ "g_L": 0.000248963973717764,
+ "stability": 0.20287561358677017,
+ "max_per_block": 1.00832723741786,
+ "verdict": "trustworthy"
+ },
+ {
+ "method": "dfa",
+ "seed": 42,
+ "acc": 0.3155,
+ "h_L": 2242272512.0,
+ "g_L": 6.923160378313753e-10,
+ "stability": 0.12479152232408523,
+ "max_per_block": 7788.42805432434,
+ "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor"
+ },
+ {
+ "method": "state_bridge",
+ "seed": 42,
+ "acc": 0.19,
+ "h_L": 158286368.0,
+ "g_L": 1.5313229573266085e-09,
+ "stability": 0.17412672605779436,
+ "max_per_block": 25812.527378026225,
+ "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor"
+ },
+ {
+ "method": "credit_bridge",
+ "seed": 42,
+ "acc": 0.2934,
+ "h_L": 1518098688.0,
+ "g_L": 4.899390892987299e-10,
+ "stability": 0.22219661739137436,
+ "max_per_block": 3210.4938156669996,
+ "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor"
+ },
+ {
+ "method": "bp",
+ "seed": 123,
+ "acc": 0.6078,
+ "h_L": 392.1978759765625,
+ "g_L": 0.00024487689370289445,
+ "stability": 0.13251967918541696,
+ "max_per_block": 1.0077101345860964,
+ "verdict": "trustworthy"
+ },
+ {
+ "method": "dfa",
+ "seed": 123,
+ "acc": 0.3087,
+ "h_L": 1905751040.0,
+ "g_L": 6.39826636117391e-10,
+ "stability": 0.3624845700131522,
+ "max_per_block": 6397.1934401457265,
+ "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor; BP grad direction is drift-dominated"
+ },
+ {
+ "method": "state_bridge",
+ "seed": 123,
+ "acc": 0.2194,
+ "h_L": 148185072.0,
+ "g_L": 3.956185157250047e-09,
+ "stability": 0.8785928308963775,
+ "max_per_block": 29735.815245576912,
+ "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor; BP grad direction is drift-dominated"
+ },
+ {
+ "method": "credit_bridge",
+ "seed": 123,
+ "acc": 0.2964,
+ "h_L": 1807671936.0,
+ "g_L": 3.837697937214557e-10,
+ "stability": 0.41688133887946605,
+ "max_per_block": 3153.5449714728416,
+ "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor; BP grad direction is drift-dominated"
+ },
+ {
+ "method": "bp",
+ "seed": 456,
+ "acc": 0.5999,
+ "h_L": 399.59320068359375,
+ "g_L": 0.00025932700373232365,
+ "stability": 0.16695057782861922,
+ "max_per_block": 1.00708821368371,
+ "verdict": "trustworthy"
+ },
+ {
+ "method": "dfa",
+ "seed": 456,
+ "acc": 0.3089,
+ "h_L": 2366543872.0,
+ "g_L": 7.316194317041891e-10,
+ "stability": 0.0037845497330029807,
+ "max_per_block": 7689.343169756613,
+ "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor"
+ },
+ {
+ "method": "state_bridge",
+ "seed": 456,
+ "acc": 0.1897,
+ "h_L": 3785490944.0,
+ "g_L": 1.2351282496769755e-10,
+ "stability": 0.24546371698379515,
+ "max_per_block": 7539.8150194888,
+ "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor"
+ },
+ {
+ "method": "credit_bridge",
+ "seed": 456,
+ "acc": 0.3069,
+ "h_L": 1116727552.0,
+ "g_L": 6.332067647996098e-10,
+ "stability": 0.21998102739453315,
+ "max_per_block": 2883.5489333321184,
+ "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor"
+ }
+] \ No newline at end of file