summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--protocol/examples/audit_table.py71
-rw-r--r--results/protocol_audit/audit_table_s42_s123_s456.json591
2 files changed, 633 insertions, 29 deletions
diff --git a/protocol/examples/audit_table.py b/protocol/examples/audit_table.py
index 1a75d96..da0caa9 100644
--- a/protocol/examples/audit_table.py
+++ b/protocol/examples/audit_table.py
@@ -100,6 +100,11 @@ FROZEN_BASELINE_ACC = {
def main():
+ import argparse
+ p = argparse.ArgumentParser()
+ p.add_argument("--seeds", type=int, nargs="+", default=[42])
+ args = p.parse_args()
+
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}")
eval_batches = load_eval_batches(n_batches=10, batch_size=128, device=device)
@@ -107,44 +112,51 @@ def main():
methods = ["bp", "dfa", "state_bridge", "credit_bridge", "ep"]
rows = []
reports = {}
- for method in methods:
- print(f"\n### {method.upper()} (seed 42)")
- model = load_model(method, 42, device)
- acc = evaluate(model, device)
- report = diagnose(
- model=model,
- eval_batches=eval_batches,
- headline_acc=acc,
- frozen_baseline_acc=FROZEN_BASELINE_ACC.get(method),
- method_name=method.upper(),
- notes="4-block d=256 ResMLP, CIFAR-10, seed 42",
- )
- print(report)
- reports[method] = report.to_dict()
- rows.append({
- "method": method,
- "acc": acc,
- "h_L": report.residual_norms[-1],
- "g_L": report.bp_grad_norms[-1],
- "stability": report.cross_batch_stability,
- "frozen_acc": report.frozen_baseline_acc,
- "verdict": report.verdict,
- })
+ for seed in args.seeds:
+ for method in methods:
+ print(f"\n### {method.upper()} (seed {seed})")
+ try:
+ model = load_model(method, seed, device)
+ except FileNotFoundError as e:
+ print(f" SKIPPED: checkpoint not found ({e})")
+ continue
+ acc = evaluate(model, device)
+ report = diagnose(
+ model=model,
+ eval_batches=eval_batches,
+ headline_acc=acc,
+ frozen_baseline_acc=FROZEN_BASELINE_ACC.get(method),
+ method_name=method.upper(),
+ notes=f"4-block d=256 ResMLP, CIFAR-10, seed {seed}",
+ )
+ print(report)
+ reports[f"{method}_s{seed}"] = report.to_dict()
+ rows.append({
+ "method": method,
+ "seed": seed,
+ "acc": acc,
+ "h_L": report.residual_norms[-1],
+ "g_L": report.bp_grad_norms[-1],
+ "stability": report.cross_batch_stability,
+ "frozen_acc": report.frozen_baseline_acc,
+ "verdict": report.verdict,
+ })
# Compact summary table
- print("\n\n" + "=" * 100)
- print("AUDIT SUMMARY (single seed 42, 4-block d=256 ResMLP, CIFAR-10)")
- print("=" * 100)
+ print("\n\n" + "=" * 110)
+ print(f"AUDIT SUMMARY (seeds={args.seeds}, 4-block d=256 ResMLP, CIFAR-10)")
+ print("=" * 110)
header = (
- f"{'method':<16}{'acc':>8}{'||h_L||':>14}{'||g_L||':>14}"
+ f"{'method':<16}{'seed':>6}{'acc':>8}{'||h_L||':>14}{'||g_L||':>14}"
f"{'stab(L/2)':>12}{'frozen':>10} verdict"
)
print(header)
- print("-" * 100)
+ print("-" * 110)
for r in rows:
frozen = "n/a" if r["frozen_acc"] is None else f"{r['frozen_acc']:.4f}"
print(
f"{r['method']:<16}"
+ f"{r['seed']:>6}"
f"{r['acc']:>8.4f}"
f"{r['h_L']:>14.3e}"
f"{r['g_L']:>14.3e}"
@@ -152,7 +164,8 @@ def main():
f"{frozen:>10} {r['verdict']}"
)
- out_path = os.path.join(OUT_DIR, "audit_table_s42.json")
+ seeds_tag = "_".join(f"s{s}" for s in args.seeds)
+ out_path = os.path.join(OUT_DIR, f"audit_table_{seeds_tag}.json")
with open(out_path, "w") as f:
json.dump({"reports": reports, "summary": rows}, f, indent=2)
print(f"\nSaved {out_path}")
diff --git a/results/protocol_audit/audit_table_s42_s123_s456.json b/results/protocol_audit/audit_table_s42_s123_s456.json
new file mode 100644
index 0000000..9dc1599
--- /dev/null
+++ b/results/protocol_audit/audit_table_s42_s123_s456.json
@@ -0,0 +1,591 @@
+{
+ "reports": {
+ "bp_s42": {
+ "method_name": "BP",
+ "notes": "4-block d=256 ResMLP, CIFAR-10, seed 42",
+ "residual_norms": [
+ 251.83087158203125,
+ 226.57342529296875,
+ 212.16461181640625,
+ 205.60723876953125,
+ 205.75946044921875
+ ],
+ "bp_grad_norms": [
+ 0.0004396044823806733,
+ 0.0004709330096375197,
+ 0.0004792391264345497,
+ 0.00045345001854002476,
+ 0.0003701267414726317
+ ],
+ "stability_layer": 2,
+ "cross_batch_stability": 0.09898398886952135,
+ "headline_acc": 0.6149,
+ "frozen_baseline_acc": null,
+ "verdict": "trustworthy",
+ "thresholds": {
+ "g_norm_floor": 1e-07,
+ "h_norm_explosion_ratio": 50.0,
+ "stability_drift_ceiling": 0.3,
+ "frozen_acc_margin_pp": 2.0
+ }
+ },
+ "dfa_s42": {
+ "method_name": "DFA",
+ "notes": "4-block d=256 ResMLP, CIFAR-10, seed 42",
+ "residual_norms": [
+ 35824.796875,
+ 73202040.0,
+ 174312304.0,
+ 339040960.0,
+ 435299520.0
+ ],
+ "bp_grad_norms": [
+ 4.39066155877299e-07,
+ 4.1912620041273385e-09,
+ 4.183721813433294e-09,
+ 4.174094847542165e-09,
+ 4.174704582027289e-09
+ ],
+ "stability_layer": 2,
+ "cross_batch_stability": 0.047060725092887876,
+ "headline_acc": 0.3107,
+ "frozen_baseline_acc": 0.349,
+ "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor; deep blocks fail to beat frozen-random baseline",
+ "thresholds": {
+ "g_norm_floor": 1e-07,
+ "h_norm_explosion_ratio": 50.0,
+ "stability_drift_ceiling": 0.3,
+ "frozen_acc_margin_pp": 2.0
+ }
+ },
+ "state_bridge_s42": {
+ "method_name": "STATE_BRIDGE",
+ "notes": "4-block d=256 ResMLP, CIFAR-10, seed 42",
+ "residual_norms": [
+ 906.3201293945312,
+ 11583499.0,
+ 34872504.0,
+ 208111168.0,
+ 228665568.0
+ ],
+ "bp_grad_norms": [
+ 8.369566785404459e-06,
+ 1.996277365634569e-09,
+ 1.9812380624983916e-09,
+ 1.8405569290891322e-09,
+ 1.8411722146893794e-09
+ ],
+ "stability_layer": 2,
+ "cross_batch_stability": 0.99180050028695,
+ "headline_acc": 0.1695,
+ "frozen_baseline_acc": 0.349,
+ "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor; BP grad direction is drift-dominated; deep blocks fail to beat frozen-random baseline",
+ "thresholds": {
+ "g_norm_floor": 1e-07,
+ "h_norm_explosion_ratio": 50.0,
+ "stability_drift_ceiling": 0.3,
+ "frozen_acc_margin_pp": 2.0
+ }
+ },
+ "credit_bridge_s42": {
+ "method_name": "CREDIT_BRIDGE",
+ "notes": "4-block d=256 ResMLP, CIFAR-10, seed 42",
+ "residual_norms": [
+ 13249.662109375,
+ 24119914.0,
+ 554824896.0,
+ 548816832.0,
+ 606231552.0
+ ],
+ "bp_grad_norms": [
+ 7.185065555859182e-07,
+ 1.1024462454045647e-09,
+ 9.061909000962487e-10,
+ 9.013046420314197e-10,
+ 9.011226209665324e-10
+ ],
+ "stability_layer": 2,
+ "cross_batch_stability": 0.3516695586343606,
+ "headline_acc": 0.2562,
+ "frozen_baseline_acc": 0.349,
+ "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor; BP grad direction is drift-dominated; deep blocks fail to beat frozen-random baseline",
+ "thresholds": {
+ "g_norm_floor": 1e-07,
+ "h_norm_explosion_ratio": 50.0,
+ "stability_drift_ceiling": 0.3,
+ "frozen_acc_margin_pp": 2.0
+ }
+ },
+ "ep_s42": {
+ "method_name": "EP",
+ "notes": "4-block d=256 ResMLP, CIFAR-10, seed 42",
+ "residual_norms": [
+ 518.3867797851562,
+ 579.6542358398438,
+ 680.764892578125,
+ 1145.8692626953125,
+ 3286.841064453125
+ ],
+ "bp_grad_norms": [
+ 0.00022257285309024155,
+ 0.00022327345504891127,
+ 0.00021209640544839203,
+ 0.00021204684162512422,
+ 0.00016422539192717522
+ ],
+ "stability_layer": 2,
+ "cross_batch_stability": -0.03589460700750351,
+ "headline_acc": 0.359,
+ "frozen_baseline_acc": null,
+ "verdict": "trustworthy",
+ "thresholds": {
+ "g_norm_floor": 1e-07,
+ "h_norm_explosion_ratio": 50.0,
+ "stability_drift_ceiling": 0.3,
+ "frozen_acc_margin_pp": 2.0
+ }
+ },
+ "bp_s123": {
+ "method_name": "BP",
+ "notes": "4-block d=256 ResMLP, CIFAR-10, seed 123",
+ "residual_norms": [
+ 253.8892822265625,
+ 231.17062377929688,
+ 215.71670532226562,
+ 206.64784240722656,
+ 198.23153686523438
+ ],
+ "bp_grad_norms": [
+ 0.00040613432065583766,
+ 0.0004288216005079448,
+ 0.0004308059287723154,
+ 0.000438842544099316,
+ 0.0003089293895754963
+ ],
+ "stability_layer": 2,
+ "cross_batch_stability": 0.0870692086716493,
+ "headline_acc": 0.6106,
+ "frozen_baseline_acc": null,
+ "verdict": "trustworthy",
+ "thresholds": {
+ "g_norm_floor": 1e-07,
+ "h_norm_explosion_ratio": 50.0,
+ "stability_drift_ceiling": 0.3,
+ "frozen_acc_margin_pp": 2.0
+ }
+ },
+ "dfa_s123": {
+ "method_name": "DFA",
+ "notes": "4-block d=256 ResMLP, CIFAR-10, seed 123",
+ "residual_norms": [
+ 37112.44921875,
+ 36312720.0,
+ 230315440.0,
+ 380822560.0,
+ 391999424.0
+ ],
+ "bp_grad_norms": [
+ 4.7457731966460415e-07,
+ 2.89021429011882e-09,
+ 2.8457014522587087e-09,
+ 2.8453024381036585e-09,
+ 2.8448703393024743e-09
+ ],
+ "stability_layer": 2,
+ "cross_batch_stability": 0.43620635635322996,
+ "headline_acc": 0.3097,
+ "frozen_baseline_acc": 0.349,
+ "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor; BP grad direction is drift-dominated; deep blocks fail to beat frozen-random baseline",
+ "thresholds": {
+ "g_norm_floor": 1e-07,
+ "h_norm_explosion_ratio": 50.0,
+ "stability_drift_ceiling": 0.3,
+ "frozen_acc_margin_pp": 2.0
+ }
+ },
+ "state_bridge_s123": {
+ "method_name": "STATE_BRIDGE",
+ "notes": "4-block d=256 ResMLP, CIFAR-10, seed 123",
+ "residual_norms": [
+ 667.2492065429688,
+ 16098149.0,
+ 20763102.0,
+ 62817444.0,
+ 71032832.0
+ ],
+ "bp_grad_norms": [
+ 1.7076288713724352e-05,
+ 2.25562746258845e-09,
+ 2.259183506936324e-09,
+ 2.2485666661964387e-09,
+ 2.2383817022131325e-09
+ ],
+ "stability_layer": 2,
+ "cross_batch_stability": 0.5607693533102671,
+ "headline_acc": 0.2471,
+ "frozen_baseline_acc": 0.349,
+ "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor; BP grad direction is drift-dominated; deep blocks fail to beat frozen-random baseline",
+ "thresholds": {
+ "g_norm_floor": 1e-07,
+ "h_norm_explosion_ratio": 50.0,
+ "stability_drift_ceiling": 0.3,
+ "frozen_acc_margin_pp": 2.0
+ }
+ },
+ "credit_bridge_s123": {
+ "method_name": "CREDIT_BRIDGE",
+ "notes": "4-block d=256 ResMLP, CIFAR-10, seed 123",
+ "residual_norms": [
+ 15215.8994140625,
+ 10566796.0,
+ 97756936.0,
+ 100126632.0,
+ 103484824.0
+ ],
+ "bp_grad_norms": [
+ 9.286936801800039e-07,
+ 4.287085797471946e-09,
+ 4.199999015241929e-09,
+ 4.17046486234085e-09,
+ 4.180252588525946e-09
+ ],
+ "stability_layer": 2,
+ "cross_batch_stability": 0.2499493021931913,
+ "headline_acc": 0.3183,
+ "frozen_baseline_acc": 0.349,
+ "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor; deep blocks fail to beat frozen-random baseline",
+ "thresholds": {
+ "g_norm_floor": 1e-07,
+ "h_norm_explosion_ratio": 50.0,
+ "stability_drift_ceiling": 0.3,
+ "frozen_acc_margin_pp": 2.0
+ }
+ },
+ "ep_s123": {
+ "method_name": "EP",
+ "notes": "4-block d=256 ResMLP, CIFAR-10, seed 123",
+ "residual_norms": [
+ 419.5404052734375,
+ 463.3779602050781,
+ 514.620361328125,
+ 734.177734375,
+ 8047.3076171875
+ ],
+ "bp_grad_norms": [
+ 0.00014359146007336676,
+ 0.00014363412628881633,
+ 0.00013971966109238565,
+ 0.0001398065942339599,
+ 0.00010174066119361669
+ ],
+ "stability_layer": 2,
+ "cross_batch_stability": 0.11950824088934395,
+ "headline_acc": 0.2924,
+ "frozen_baseline_acc": null,
+ "verdict": "trustworthy",
+ "thresholds": {
+ "g_norm_floor": 1e-07,
+ "h_norm_explosion_ratio": 50.0,
+ "stability_drift_ceiling": 0.3,
+ "frozen_acc_margin_pp": 2.0
+ }
+ },
+ "bp_s456": {
+ "method_name": "BP",
+ "notes": "4-block d=256 ResMLP, CIFAR-10, seed 456",
+ "residual_norms": [
+ 252.45571899414062,
+ 229.38861083984375,
+ 214.40420532226562,
+ 204.4502410888672,
+ 196.94155883789062
+ ],
+ "bp_grad_norms": [
+ 0.00044390829862095416,
+ 0.00045150972437113523,
+ 0.00048011011676862836,
+ 0.00046427102643065155,
+ 0.00040247690049000084
+ ],
+ "stability_layer": 2,
+ "cross_batch_stability": 0.11430172568394079,
+ "headline_acc": 0.6186,
+ "frozen_baseline_acc": null,
+ "verdict": "trustworthy",
+ "thresholds": {
+ "g_norm_floor": 1e-07,
+ "h_norm_explosion_ratio": 50.0,
+ "stability_drift_ceiling": 0.3,
+ "frozen_acc_margin_pp": 2.0
+ }
+ },
+ "dfa_s456": {
+ "method_name": "DFA",
+ "notes": "4-block d=256 ResMLP, CIFAR-10, seed 456",
+ "residual_norms": [
+ 37338.05078125,
+ 95027664.0,
+ 318074752.0,
+ 585016960.0,
+ 727085888.0
+ ],
+ "bp_grad_norms": [
+ 4.006844847026514e-07,
+ 1.9245969262726703e-09,
+ 1.8939176893439935e-09,
+ 1.892455525620562e-09,
+ 1.8981318739008657e-09
+ ],
+ "stability_layer": 2,
+ "cross_batch_stability": -0.004968741205003527,
+ "headline_acc": 0.2968,
+ "frozen_baseline_acc": 0.349,
+ "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor; deep blocks fail to beat frozen-random baseline",
+ "thresholds": {
+ "g_norm_floor": 1e-07,
+ "h_norm_explosion_ratio": 50.0,
+ "stability_drift_ceiling": 0.3,
+ "frozen_acc_margin_pp": 2.0
+ }
+ },
+ "state_bridge_s456": {
+ "method_name": "STATE_BRIDGE",
+ "notes": "4-block d=256 ResMLP, CIFAR-10, seed 456",
+ "residual_norms": [
+ 573.7620849609375,
+ 6005607.5,
+ 181160848.0,
+ 173630560.0,
+ 161221984.0
+ ],
+ "bp_grad_norms": [
+ 1.0508579180168454e-05,
+ 2.864970927163313e-09,
+ 2.424410006085509e-09,
+ 2.4270003784465644e-09,
+ 2.3956163719418555e-09
+ ],
+ "stability_layer": 2,
+ "cross_batch_stability": 0.034691754976908365,
+ "headline_acc": 0.1991,
+ "frozen_baseline_acc": 0.349,
+ "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor; deep blocks fail to beat frozen-random baseline",
+ "thresholds": {
+ "g_norm_floor": 1e-07,
+ "h_norm_explosion_ratio": 50.0,
+ "stability_drift_ceiling": 0.3,
+ "frozen_acc_margin_pp": 2.0
+ }
+ },
+ "credit_bridge_s456": {
+ "method_name": "CREDIT_BRIDGE",
+ "notes": "4-block d=256 ResMLP, CIFAR-10, seed 456",
+ "residual_norms": [
+ 16486.287109375,
+ 17050302.0,
+ 259499056.0,
+ 255497504.0,
+ 255497520.0
+ ],
+ "bp_grad_norms": [
+ 5.92324568060576e-07,
+ 2.4490782735142602e-09,
+ 2.3854451747240546e-09,
+ 2.3789590297695895e-09,
+ 2.3789590297695895e-09
+ ],
+ "stability_layer": 2,
+ "cross_batch_stability": 0.5178194606055816,
+ "headline_acc": 0.2927,
+ "frozen_baseline_acc": 0.349,
+ "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor; BP grad direction is drift-dominated; deep blocks fail to beat frozen-random baseline",
+ "thresholds": {
+ "g_norm_floor": 1e-07,
+ "h_norm_explosion_ratio": 50.0,
+ "stability_drift_ceiling": 0.3,
+ "frozen_acc_margin_pp": 2.0
+ }
+ },
+ "ep_s456": {
+ "method_name": "EP",
+ "notes": "4-block d=256 ResMLP, CIFAR-10, seed 456",
+ "residual_norms": [
+ 238.81057739257812,
+ 249.46739196777344,
+ 302.7917175292969,
+ 651.8840942382812,
+ 3978.53466796875
+ ],
+ "bp_grad_norms": [
+ 0.00029272810206748545,
+ 0.0002923177380580455,
+ 0.00028323367587290704,
+ 0.00029317670851014555,
+ 0.00011587901099119335
+ ],
+ "stability_layer": 2,
+ "cross_batch_stability": -0.024357840418815613,
+ "headline_acc": 0.2976,
+ "frozen_baseline_acc": null,
+ "verdict": "trustworthy",
+ "thresholds": {
+ "g_norm_floor": 1e-07,
+ "h_norm_explosion_ratio": 50.0,
+ "stability_drift_ceiling": 0.3,
+ "frozen_acc_margin_pp": 2.0
+ }
+ }
+ },
+ "summary": [
+ {
+ "method": "bp",
+ "seed": 42,
+ "acc": 0.6149,
+ "h_L": 205.75946044921875,
+ "g_L": 0.0003701267414726317,
+ "stability": 0.09898398886952135,
+ "frozen_acc": null,
+ "verdict": "trustworthy"
+ },
+ {
+ "method": "dfa",
+ "seed": 42,
+ "acc": 0.3107,
+ "h_L": 435299520.0,
+ "g_L": 4.174704582027289e-09,
+ "stability": 0.047060725092887876,
+ "frozen_acc": 0.349,
+ "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor; deep blocks fail to beat frozen-random baseline"
+ },
+ {
+ "method": "state_bridge",
+ "seed": 42,
+ "acc": 0.1695,
+ "h_L": 228665568.0,
+ "g_L": 1.8411722146893794e-09,
+ "stability": 0.99180050028695,
+ "frozen_acc": 0.349,
+ "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor; BP grad direction is drift-dominated; deep blocks fail to beat frozen-random baseline"
+ },
+ {
+ "method": "credit_bridge",
+ "seed": 42,
+ "acc": 0.2562,
+ "h_L": 606231552.0,
+ "g_L": 9.011226209665324e-10,
+ "stability": 0.3516695586343606,
+ "frozen_acc": 0.349,
+ "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor; BP grad direction is drift-dominated; deep blocks fail to beat frozen-random baseline"
+ },
+ {
+ "method": "ep",
+ "seed": 42,
+ "acc": 0.359,
+ "h_L": 3286.841064453125,
+ "g_L": 0.00016422539192717522,
+ "stability": -0.03589460700750351,
+ "frozen_acc": null,
+ "verdict": "trustworthy"
+ },
+ {
+ "method": "bp",
+ "seed": 123,
+ "acc": 0.6106,
+ "h_L": 198.23153686523438,
+ "g_L": 0.0003089293895754963,
+ "stability": 0.0870692086716493,
+ "frozen_acc": null,
+ "verdict": "trustworthy"
+ },
+ {
+ "method": "dfa",
+ "seed": 123,
+ "acc": 0.3097,
+ "h_L": 391999424.0,
+ "g_L": 2.8448703393024743e-09,
+ "stability": 0.43620635635322996,
+ "frozen_acc": 0.349,
+ "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor; BP grad direction is drift-dominated; deep blocks fail to beat frozen-random baseline"
+ },
+ {
+ "method": "state_bridge",
+ "seed": 123,
+ "acc": 0.2471,
+ "h_L": 71032832.0,
+ "g_L": 2.2383817022131325e-09,
+ "stability": 0.5607693533102671,
+ "frozen_acc": 0.349,
+ "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor; BP grad direction is drift-dominated; deep blocks fail to beat frozen-random baseline"
+ },
+ {
+ "method": "credit_bridge",
+ "seed": 123,
+ "acc": 0.3183,
+ "h_L": 103484824.0,
+ "g_L": 4.180252588525946e-09,
+ "stability": 0.2499493021931913,
+ "frozen_acc": 0.349,
+ "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor; deep blocks fail to beat frozen-random baseline"
+ },
+ {
+ "method": "ep",
+ "seed": 123,
+ "acc": 0.2924,
+ "h_L": 8047.3076171875,
+ "g_L": 0.00010174066119361669,
+ "stability": 0.11950824088934395,
+ "frozen_acc": null,
+ "verdict": "trustworthy"
+ },
+ {
+ "method": "bp",
+ "seed": 456,
+ "acc": 0.6186,
+ "h_L": 196.94155883789062,
+ "g_L": 0.00040247690049000084,
+ "stability": 0.11430172568394079,
+ "frozen_acc": null,
+ "verdict": "trustworthy"
+ },
+ {
+ "method": "dfa",
+ "seed": 456,
+ "acc": 0.2968,
+ "h_L": 727085888.0,
+ "g_L": 1.8981318739008657e-09,
+ "stability": -0.004968741205003527,
+ "frozen_acc": 0.349,
+ "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor; deep blocks fail to beat frozen-random baseline"
+ },
+ {
+ "method": "state_bridge",
+ "seed": 456,
+ "acc": 0.1991,
+ "h_L": 161221984.0,
+ "g_L": 2.3956163719418555e-09,
+ "stability": 0.034691754976908365,
+ "frozen_acc": 0.349,
+ "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor; deep blocks fail to beat frozen-random baseline"
+ },
+ {
+ "method": "credit_bridge",
+ "seed": 456,
+ "acc": 0.2927,
+ "h_L": 255497520.0,
+ "g_L": 2.3789590297695895e-09,
+ "stability": 0.5178194606055816,
+ "frozen_acc": 0.349,
+ "verdict": "needs walk-back: residual stream exploded; BP grad at numerical floor; BP grad direction is drift-dominated; deep blocks fail to beat frozen-random baseline"
+ },
+ {
+ "method": "ep",
+ "seed": 456,
+ "acc": 0.2976,
+ "h_L": 3978.53466796875,
+ "g_L": 0.00011587901099119335,
+ "stability": -0.024357840418815613,
+ "frozen_acc": null,
+ "verdict": "trustworthy"
+ }
+ ]
+} \ No newline at end of file