summaryrefslogtreecommitdiff
path: root/results/nudging_test_3seed_summary.json
blob: b970c59accfb46257bf5a376af96f43d407f787b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
{
  "description": "Nudging test 3-seed values for \u00a74 \u00b64 cross-method functional dissociation. Each value is the test-loss change after a single step of size eta=0.01 along the per-layer credit direction at the converged checkpoint.",
  "eta": 0.01,
  "epochs": 30,
  "arch": "4-block d=256 pre-LN ResMLP",
  "penalty_lam": 0.01,
  "methods": {
    "state_bridge": {
      "per_seed": {
        "42": {
          "per_block": [
            -0.0035966814029961824,
            -0.0023783869110047817,
            -0.0017011994495987892,
            -0.0012592736165970564
          ],
          "deep_mean": -0.0017796199924002092,
          "note": "deep = blocks l1, l2, l3 (excluding l0)"
        },
        "123": {
          "per_block": [
            -0.003914414905011654,
            -0.002520129084587097,
            -0.0018878313712775707,
            -0.0014582867734134197
          ],
          "deep_mean": -0.0019554157430926957,
          "note": "deep = blocks l1, l2, l3 (excluding l0)"
        },
        "456": {
          "per_block": [
            -0.004026009701192379,
            -0.0028109778650105,
            -0.001904117758385837,
            -0.0014447440626099706
          ],
          "deep_mean": -0.002053279895335436,
          "note": "deep = blocks l1, l2, l3 (excluding l0)"
        }
      },
      "three_seed_deep_mean": -0.0019294385436094469,
      "three_seed_deep_std_ddof0": 0.00011322116053216024
    },
    "credit_bridge": {
      "per_seed": {
        "42": {
          "per_block": [
            -0.0005118446424603462,
            -0.0004658599500544369,
            -0.00044331286335363984,
            -0.00042594311526045203
          ],
          "deep_mean": -0.0004450386428895096,
          "note": "deep = blocks l1, l2, l3 (excluding l0)"
        },
        "123": {
          "per_block": [
            -0.00045391899766400456,
            -0.00041642854921519756,
            -0.00038977732765488327,
            -0.00037192515446804464
          ],
          "deep_mean": -0.00039271034377937514,
          "note": "deep = blocks l1, l2, l3 (excluding l0)"
        },
        "456": {
          "per_block": [
            -0.00048537791008129716,
            -0.00046242878306657076,
            -0.00043993344297632575,
            -0.00042223266791552305
          ],
          "deep_mean": -0.0004415316313194732,
          "note": "deep = blocks l1, l2, l3 (excluding l0)"
        }
      },
      "three_seed_deep_mean": -0.000426426872662786,
      "three_seed_deep_std_ddof0": 2.3884137309066322e-05
    },
    "dfa": {
      "per_seed": {
        "42": {
          "per_block": [
            -0.00013115769252181053,
            -5.455967038869858e-05,
            -5.524198058992624e-05,
            -5.596294067800045e-05
          ],
          "deep_mean": -5.525486388554176e-05,
          "note": "deep = blocks l1, l2, l3 (excluding l0)"
        },
        "123": {
          "per_block": [
            -0.00010294892126694322,
            -3.057112917304039e-05,
            -4.6447094064205885e-05,
            -5.68098621442914e-05
          ],
          "deep_mean": -4.460936179384589e-05,
          "note": "deep = blocks l1, l2, l3 (excluding l0)"
        },
        "456": {
          "per_block": [
            -0.00014482333790510893,
            -4.394981078803539e-05,
            -4.4448592234402895e-05,
            -5.99866034463048e-05
          ],
          "deep_mean": -4.946166882291436e-05,
          "note": "deep = blocks l1, l2, l3 (excluding l0)"
        }
      },
      "three_seed_deep_mean": -4.9775298167434004e-05,
      "three_seed_deep_std_ddof0": 4.3516626110321815e-06
    }
  },
  "ratios_3seed_means": {
    "SB / CB": 4.5246645258569975,
    "SB / DFA": 38.76297309398745,
    "CB / DFA": 8.56703803618358
  }
}