1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
|
{
"matched_30ep_control": {
"config": {
"arch": "4-block d=256 pre-LN ResMLP",
"dataset": "CIFAR-10",
"epochs": 30,
"batch": 128,
"optimizer": "AdamW lr=1e-3 wd=0.01",
"schedule": "cosine",
"penalty": "lam*mean(||f_l(h_l)||^2) on residual branches"
},
"bp_no_pen_3seed": {
"per_seed": {
"42": 0.5851,
"123": 0.5845,
"456": 0.5863
},
"mean": 0.5853,
"std_ddof0": 0.00075,
"std_ddof1": 0.00092,
"source_files": [
"results/bp_no_penalty_30ep/bp_pen_lam0.0_s{42,123,456}.json"
]
},
"bp_with_pen_s42": {
"acc": 0.5303,
"lam": 0.01,
"source": "results/bp_with_penalty/bp_pen_lam0.01_s42.json",
"note": "single-seed (s42 only); v2.30 polish identified the matched 30-ep no-pen 3-seed control was missing"
},
"dfa_no_pen_3seed": {
"per_seed": {
"42": 0.307,
"123": 0.2985,
"456": 0.2966
},
"mean": 0.3007,
"std_ddof0": 0.0045,
"std_ddof1": 0.0055,
"source_files": [
"results/dfa_no_penalty_30ep/results_cifar10.json"
]
},
"dfa_with_pen_3seed": {
"per_seed": {
"42": 0.3593,
"123": 0.361,
"456": 0.3604
},
"mean": 0.3602,
"std_ddof0": 0.0007,
"std_ddof1": 0.0009,
"source_files": [
"results/dfa_pen_short/dfa_pen_lam0.01_s{42,123,456}.json"
],
"lam": 0.01
},
"penalty_costs": {
"bp_pen_cost_pp": 5.5,
"dfa_pen_rescue_pp": 5.9
},
"margins_vs_frozen_0.349": {
"bp_no_pen_pp": 23.6,
"bp_with_pen_pp": 18.1,
"dfa_no_pen_pp": -4.8,
"dfa_with_pen_pp": 1.1,
"sb_with_pen_pp": 10.4,
"cb_with_pen_pp": 1.1
}
}
}
|