{ "reports": { "bp_s42": { "method_name": "BP", "notes": "4-block d=256 ResMLP, CIFAR-10, seed 42", "residual_norms": [ 251.83087158203125, 226.57342529296875, 212.16461181640625, 205.60723876953125, 205.75946044921875 ], "bp_grad_norms": [ 0.0004396044823806733, 0.0004709330096375197, 0.0004792391264345497, 0.00045345001854002476, 0.0003701267414726317 ], "stability_layer": 2, "cross_batch_stability": 0.09898398886952135, "headline_acc": 0.6149, "frozen_baseline_acc": null, "verdict": "trustworthy", "thresholds": { "g_norm_floor": 1e-07, "h_norm_explosion_ratio": 50.0, "stability_drift_ceiling": 0.3, "frozen_acc_margin_pp": 2.0 } }, "dfa_s42": { "method_name": "DFA", "notes": "4-block d=256 ResMLP, CIFAR-10, seed 42", "residual_norms": [ 35824.796875, 73202040.0, 174312304.0, 339040960.0, 435299520.0 ], "bp_grad_norms": [ 4.39066155877299e-07, 4.1912620041273385e-09, 4.183721813433294e-09, 4.174094847542165e-09, 4.174704582027289e-09 ], "stability_layer": 2, "cross_batch_stability": 0.047060725092887876, "headline_acc": 0.3107, "frozen_baseline_acc": 0.349, "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor; deep blocks fail to beat frozen-random baseline", "thresholds": { "g_norm_floor": 1e-07, "h_norm_explosion_ratio": 50.0, "stability_drift_ceiling": 0.3, "frozen_acc_margin_pp": 2.0 } }, "state_bridge_s42": { "method_name": "STATE_BRIDGE", "notes": "4-block d=256 ResMLP, CIFAR-10, seed 42", "residual_norms": [ 906.3201293945312, 11583499.0, 34872504.0, 208111168.0, 228665568.0 ], "bp_grad_norms": [ 8.369566785404459e-06, 1.996277365634569e-09, 1.9812380624983916e-09, 1.8405569290891322e-09, 1.8411722146893794e-09 ], "stability_layer": 2, "cross_batch_stability": 0.99180050028695, "headline_acc": 0.1695, "frozen_baseline_acc": 0.349, "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor; BP grad direction is drift-dominated; deep blocks fail to beat frozen-random baseline", "thresholds": { "g_norm_floor": 1e-07, "h_norm_explosion_ratio": 50.0, "stability_drift_ceiling": 0.3, "frozen_acc_margin_pp": 2.0 } }, "credit_bridge_s42": { "method_name": "CREDIT_BRIDGE", "notes": "4-block d=256 ResMLP, CIFAR-10, seed 42", "residual_norms": [ 13249.662109375, 24119914.0, 554824896.0, 548816832.0, 606231552.0 ], "bp_grad_norms": [ 7.185065555859182e-07, 1.1024462454045647e-09, 9.061909000962487e-10, 9.013046420314197e-10, 9.011226209665324e-10 ], "stability_layer": 2, "cross_batch_stability": 0.3516695586343606, "headline_acc": 0.2562, "frozen_baseline_acc": 0.349, "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor; BP grad direction is drift-dominated; deep blocks fail to beat frozen-random baseline", "thresholds": { "g_norm_floor": 1e-07, "h_norm_explosion_ratio": 50.0, "stability_drift_ceiling": 0.3, "frozen_acc_margin_pp": 2.0 } }, "ep_s42": { "method_name": "EP", "notes": "4-block d=256 ResMLP, CIFAR-10, seed 42", "residual_norms": [ 518.3867797851562, 579.6542358398438, 680.764892578125, 1145.8692626953125, 3286.841064453125 ], "bp_grad_norms": [ 0.00022257285309024155, 0.00022327345504891127, 0.00021209640544839203, 0.00021204684162512422, 0.00016422539192717522 ], "stability_layer": 2, "cross_batch_stability": -0.03589460700750351, "headline_acc": 0.359, "frozen_baseline_acc": null, "verdict": "trustworthy", "thresholds": { "g_norm_floor": 1e-07, "h_norm_explosion_ratio": 50.0, "stability_drift_ceiling": 0.3, "frozen_acc_margin_pp": 2.0 } }, "bp_s123": { "method_name": "BP", "notes": "4-block d=256 ResMLP, CIFAR-10, seed 123", "residual_norms": [ 253.8892822265625, 231.17062377929688, 215.71670532226562, 206.64784240722656, 198.23153686523438 ], "bp_grad_norms": [ 0.00040613432065583766, 0.0004288216005079448, 0.0004308059287723154, 0.000438842544099316, 0.0003089293895754963 ], "stability_layer": 2, "cross_batch_stability": 0.0870692086716493, "headline_acc": 0.6106, "frozen_baseline_acc": null, "verdict": "trustworthy", "thresholds": { "g_norm_floor": 1e-07, "h_norm_explosion_ratio": 50.0, "stability_drift_ceiling": 0.3, "frozen_acc_margin_pp": 2.0 } }, "dfa_s123": { "method_name": "DFA", "notes": "4-block d=256 ResMLP, CIFAR-10, seed 123", "residual_norms": [ 37112.44921875, 36312720.0, 230315440.0, 380822560.0, 391999424.0 ], "bp_grad_norms": [ 4.7457731966460415e-07, 2.89021429011882e-09, 2.8457014522587087e-09, 2.8453024381036585e-09, 2.8448703393024743e-09 ], "stability_layer": 2, "cross_batch_stability": 0.43620635635322996, "headline_acc": 0.3097, "frozen_baseline_acc": 0.349, "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor; BP grad direction is drift-dominated; deep blocks fail to beat frozen-random baseline", "thresholds": { "g_norm_floor": 1e-07, "h_norm_explosion_ratio": 50.0, "stability_drift_ceiling": 0.3, "frozen_acc_margin_pp": 2.0 } }, "state_bridge_s123": { "method_name": "STATE_BRIDGE", "notes": "4-block d=256 ResMLP, CIFAR-10, seed 123", "residual_norms": [ 667.2492065429688, 16098149.0, 20763102.0, 62817444.0, 71032832.0 ], "bp_grad_norms": [ 1.7076288713724352e-05, 2.25562746258845e-09, 2.259183506936324e-09, 2.2485666661964387e-09, 2.2383817022131325e-09 ], "stability_layer": 2, "cross_batch_stability": 0.5607693533102671, "headline_acc": 0.2471, "frozen_baseline_acc": 0.349, "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor; BP grad direction is drift-dominated; deep blocks fail to beat frozen-random baseline", "thresholds": { "g_norm_floor": 1e-07, "h_norm_explosion_ratio": 50.0, "stability_drift_ceiling": 0.3, "frozen_acc_margin_pp": 2.0 } }, "credit_bridge_s123": { "method_name": "CREDIT_BRIDGE", "notes": "4-block d=256 ResMLP, CIFAR-10, seed 123", "residual_norms": [ 15215.8994140625, 10566796.0, 97756936.0, 100126632.0, 103484824.0 ], "bp_grad_norms": [ 9.286936801800039e-07, 4.287085797471946e-09, 4.199999015241929e-09, 4.17046486234085e-09, 4.180252588525946e-09 ], "stability_layer": 2, "cross_batch_stability": 0.2499493021931913, "headline_acc": 0.3183, "frozen_baseline_acc": 0.349, "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor; deep blocks fail to beat frozen-random baseline", "thresholds": { "g_norm_floor": 1e-07, "h_norm_explosion_ratio": 50.0, "stability_drift_ceiling": 0.3, "frozen_acc_margin_pp": 2.0 } }, "ep_s123": { "method_name": "EP", "notes": "4-block d=256 ResMLP, CIFAR-10, seed 123", "residual_norms": [ 419.5404052734375, 463.3779602050781, 514.620361328125, 734.177734375, 8047.3076171875 ], "bp_grad_norms": [ 0.00014359146007336676, 0.00014363412628881633, 0.00013971966109238565, 0.0001398065942339599, 0.00010174066119361669 ], "stability_layer": 2, "cross_batch_stability": 0.11950824088934395, "headline_acc": 0.2924, "frozen_baseline_acc": null, "verdict": "trustworthy", "thresholds": { "g_norm_floor": 1e-07, "h_norm_explosion_ratio": 50.0, "stability_drift_ceiling": 0.3, "frozen_acc_margin_pp": 2.0 } }, "bp_s456": { "method_name": "BP", "notes": "4-block d=256 ResMLP, CIFAR-10, seed 456", "residual_norms": [ 252.45571899414062, 229.38861083984375, 214.40420532226562, 204.4502410888672, 196.94155883789062 ], "bp_grad_norms": [ 0.00044390829862095416, 0.00045150972437113523, 0.00048011011676862836, 0.00046427102643065155, 0.00040247690049000084 ], "stability_layer": 2, "cross_batch_stability": 0.11430172568394079, "headline_acc": 0.6186, "frozen_baseline_acc": null, "verdict": "trustworthy", "thresholds": { "g_norm_floor": 1e-07, "h_norm_explosion_ratio": 50.0, "stability_drift_ceiling": 0.3, "frozen_acc_margin_pp": 2.0 } }, "dfa_s456": { "method_name": "DFA", "notes": "4-block d=256 ResMLP, CIFAR-10, seed 456", "residual_norms": [ 37338.05078125, 95027664.0, 318074752.0, 585016960.0, 727085888.0 ], "bp_grad_norms": [ 4.006844847026514e-07, 1.9245969262726703e-09, 1.8939176893439935e-09, 1.892455525620562e-09, 1.8981318739008657e-09 ], "stability_layer": 2, "cross_batch_stability": -0.004968741205003527, "headline_acc": 0.2968, "frozen_baseline_acc": 0.349, "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor; deep blocks fail to beat frozen-random baseline", "thresholds": { "g_norm_floor": 1e-07, "h_norm_explosion_ratio": 50.0, "stability_drift_ceiling": 0.3, "frozen_acc_margin_pp": 2.0 } }, "state_bridge_s456": { "method_name": "STATE_BRIDGE", "notes": "4-block d=256 ResMLP, CIFAR-10, seed 456", "residual_norms": [ 573.7620849609375, 6005607.5, 181160848.0, 173630560.0, 161221984.0 ], "bp_grad_norms": [ 1.0508579180168454e-05, 2.864970927163313e-09, 2.424410006085509e-09, 2.4270003784465644e-09, 2.3956163719418555e-09 ], "stability_layer": 2, "cross_batch_stability": 0.034691754976908365, "headline_acc": 0.1991, "frozen_baseline_acc": 0.349, "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor; deep blocks fail to beat frozen-random baseline", "thresholds": { "g_norm_floor": 1e-07, "h_norm_explosion_ratio": 50.0, "stability_drift_ceiling": 0.3, "frozen_acc_margin_pp": 2.0 } }, "credit_bridge_s456": { "method_name": "CREDIT_BRIDGE", "notes": "4-block d=256 ResMLP, CIFAR-10, seed 456", "residual_norms": [ 16486.287109375, 17050302.0, 259499056.0, 255497504.0, 255497520.0 ], "bp_grad_norms": [ 5.92324568060576e-07, 2.4490782735142602e-09, 2.3854451747240546e-09, 2.3789590297695895e-09, 2.3789590297695895e-09 ], "stability_layer": 2, "cross_batch_stability": 0.5178194606055816, "headline_acc": 0.2927, "frozen_baseline_acc": 0.349, "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor; BP grad direction is drift-dominated; deep blocks fail to beat frozen-random baseline", "thresholds": { "g_norm_floor": 1e-07, "h_norm_explosion_ratio": 50.0, "stability_drift_ceiling": 0.3, "frozen_acc_margin_pp": 2.0 } }, "ep_s456": { "method_name": "EP", "notes": "4-block d=256 ResMLP, CIFAR-10, seed 456", "residual_norms": [ 238.81057739257812, 249.46739196777344, 302.7917175292969, 651.8840942382812, 3978.53466796875 ], "bp_grad_norms": [ 0.00029272810206748545, 0.0002923177380580455, 0.00028323367587290704, 0.00029317670851014555, 0.00011587901099119335 ], "stability_layer": 2, "cross_batch_stability": -0.024357840418815613, "headline_acc": 0.2976, "frozen_baseline_acc": null, "verdict": "trustworthy", "thresholds": { "g_norm_floor": 1e-07, "h_norm_explosion_ratio": 50.0, "stability_drift_ceiling": 0.3, "frozen_acc_margin_pp": 2.0 } } }, "summary": [ { "method": "bp", "seed": 42, "acc": 0.6149, "h_L": 205.75946044921875, "g_L": 0.0003701267414726317, "stability": 0.09898398886952135, "frozen_acc": null, "verdict": "trustworthy" }, { "method": "dfa", "seed": 42, "acc": 0.3107, "h_L": 435299520.0, "g_L": 4.174704582027289e-09, "stability": 0.047060725092887876, "frozen_acc": 0.349, "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor; deep blocks fail to beat frozen-random baseline" }, { "method": "state_bridge", "seed": 42, "acc": 0.1695, "h_L": 228665568.0, "g_L": 1.8411722146893794e-09, "stability": 0.99180050028695, "frozen_acc": 0.349, "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor; BP grad direction is drift-dominated; deep blocks fail to beat frozen-random baseline" }, { "method": "credit_bridge", "seed": 42, "acc": 0.2562, "h_L": 606231552.0, "g_L": 9.011226209665324e-10, "stability": 0.3516695586343606, "frozen_acc": 0.349, "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor; BP grad direction is drift-dominated; deep blocks fail to beat frozen-random baseline" }, { "method": "ep", "seed": 42, "acc": 0.359, "h_L": 3286.841064453125, "g_L": 0.00016422539192717522, "stability": -0.03589460700750351, "frozen_acc": null, "verdict": "trustworthy" }, { "method": "bp", "seed": 123, "acc": 0.6106, "h_L": 198.23153686523438, "g_L": 0.0003089293895754963, "stability": 0.0870692086716493, "frozen_acc": null, "verdict": "trustworthy" }, { "method": "dfa", "seed": 123, "acc": 0.3097, "h_L": 391999424.0, "g_L": 2.8448703393024743e-09, "stability": 0.43620635635322996, "frozen_acc": 0.349, "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor; BP grad direction is drift-dominated; deep blocks fail to beat frozen-random baseline" }, { "method": "state_bridge", "seed": 123, "acc": 0.2471, "h_L": 71032832.0, "g_L": 2.2383817022131325e-09, "stability": 0.5607693533102671, "frozen_acc": 0.349, "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor; BP grad direction is drift-dominated; deep blocks fail to beat frozen-random baseline" }, { "method": "credit_bridge", "seed": 123, "acc": 0.3183, "h_L": 103484824.0, "g_L": 4.180252588525946e-09, "stability": 0.2499493021931913, "frozen_acc": 0.349, "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor; deep blocks fail to beat frozen-random baseline" }, { "method": "ep", "seed": 123, "acc": 0.2924, "h_L": 8047.3076171875, "g_L": 0.00010174066119361669, "stability": 0.11950824088934395, "frozen_acc": null, "verdict": "trustworthy" }, { "method": "bp", "seed": 456, "acc": 0.6186, "h_L": 196.94155883789062, "g_L": 0.00040247690049000084, "stability": 0.11430172568394079, "frozen_acc": null, "verdict": "trustworthy" }, { "method": "dfa", "seed": 456, "acc": 0.2968, "h_L": 727085888.0, "g_L": 1.8981318739008657e-09, "stability": -0.004968741205003527, "frozen_acc": 0.349, "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor; deep blocks fail to beat frozen-random baseline" }, { "method": "state_bridge", "seed": 456, "acc": 0.1991, "h_L": 161221984.0, "g_L": 2.3956163719418555e-09, "stability": 0.034691754976908365, "frozen_acc": 0.349, "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor; deep blocks fail to beat frozen-random baseline" }, { "method": "credit_bridge", "seed": 456, "acc": 0.2927, "h_L": 255497520.0, "g_L": 2.3789590297695895e-09, "stability": 0.5178194606055816, "frozen_acc": 0.349, "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor; BP grad direction is drift-dominated; deep blocks fail to beat frozen-random baseline" }, { "method": "ep", "seed": 456, "acc": 0.2976, "h_L": 3978.53466796875, "g_L": 0.00011587901099119335, "stability": -0.024357840418815613, "frozen_acc": null, "verdict": "trustworthy" } ] }