summaryrefslogtreecommitdiff
path: root/results/fa_early_ckpts/results_cifar10.json
blob: aa046e348bfb4aa54dcabfc4c98c854edacbd1a8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
{
  "42": {
    "fa": {
      "log": {
        "train_loss": [
          2.049124941177368,
          1.9718926105117798,
          1.9489588039398194,
          1.934002057762146,
          1.9262304685211182
        ],
        "train_acc": [
          0.2436,
          0.2745,
          0.29034,
          0.29818,
          0.301
        ],
        "test_acc": [
          0.2789,
          0.3111,
          0.3091,
          0.3094,
          0.3219
        ]
      },
      "diagnostics": {
        "bp_cosine": [
          0.0644938200712204,
          0.00024983659386634827,
          -0.006781474687159061,
          0.9952471852302551
        ],
        "perturbation_rho": [
          0.06215526908636093,
          -0.005929573439061642,
          0.029685823246836662,
          0.183500275015831
        ],
        "nudging": {
          "0.001": [
            -2.8510112315416336e-06,
            -6.868503987789154e-08,
            -1.4668330550193787e-08,
            -6.758375093340874e-06
          ],
          "0.003": [
            -8.508563041687012e-06,
            -1.6507692635059357e-07,
            -4.912726581096649e-08,
            -2.0218081772327423e-05
          ],
          "0.01": [
            -2.8386013582348824e-05,
            -6.488990038633347e-07,
            -1.6367994248867035e-07,
            -6.742705591022968e-05
          ]
        },
        "hidden_norms_per_layer": [
          960.3140869140625,
          11076.888671875,
          31092.83984375,
          34876.390625,
          29288.8984375
        ],
        "bp_grad_norms_per_layer": [
          2.04460939130513e-05,
          3.4170184335380327e-06,
          3.3133326269307872e-06,
          3.321988742754911e-06,
          3.197354772055405e-06
        ]
      },
      "drift": {
        "embed.weight": 9.203150246245913,
        "embed.bias": 7.782631309228306,
        "blocks.0.ln.weight": 0.5169422030448914,
        "blocks.0.w1.weight": 7.173286797050866,
        "blocks.0.w1.bias": 7.530177507927273,
        "blocks.0.w2.weight": 20.753186824233353,
        "blocks.1.ln.weight": 0.4266158640384674,
        "blocks.1.w1.weight": 6.365489317176397,
        "blocks.1.w1.bias": 7.642448312201989,
        "blocks.1.w2.weight": 16.266082583917918,
        "blocks.2.ln.weight": 0.366255521774292,
        "blocks.2.w1.weight": 5.12781119461704,
        "blocks.2.w1.bias": 5.784653325984481,
        "blocks.2.w2.weight": 14.283055474835482,
        "blocks.3.ln.weight": 0.3213387429714203,
        "blocks.3.w1.weight": 4.3690188550450015,
        "blocks.3.w1.bias": 4.601659627893413,
        "blocks.3.w2.weight": 12.971053381902397,
        "out_ln.weight": 0.05851004645228386,
        "out_head.weight": 1.1675271811094783,
        "out_head.bias": 0.45402486694117133
      }
    }
  },
  "123": {
    "fa": {
      "log": {
        "train_loss": [
          2.039442294845581,
          1.953437792930603,
          1.9257947838592528,
          1.9107846630096434,
          1.9030635565567016
        ],
        "train_acc": [
          0.25042,
          0.29012,
          0.30112,
          0.30812,
          0.31226
        ],
        "test_acc": [
          0.2905,
          0.3307,
          0.3419,
          0.3476,
          0.3478
        ]
      },
      "diagnostics": {
        "bp_cosine": [
          0.06648196280002594,
          -0.010407494381070137,
          -0.06906092911958694,
          0.9917651414871216
        ],
        "perturbation_rho": [
          0.031062623485922813,
          -0.02341378480195999,
          -0.033602140843868256,
          0.2669849991798401
        ],
        "nudging": {
          "0.001": [
            -3.1692907214164734e-06,
            -1.126900315284729e-07,
            7.380731403827667e-07,
            -1.1092517524957657e-05
          ],
          "0.003": [
            -9.447336196899414e-06,
            -3.655441105365753e-07,
            2.216547727584839e-06,
            -3.331666812300682e-05
          ],
          "0.01": [
            -3.144703805446625e-05,
            -1.210719347000122e-06,
            7.366761565208435e-06,
            -0.00011098524555563927
          ]
        },
        "hidden_norms_per_layer": [
          863.883056640625,
          6307.2734375,
          12256.30859375,
          20627.75390625,
          15648.552734375
        ],
        "bp_grad_norms_per_layer": [
          3.119367829640396e-05,
          5.960608177701943e-06,
          5.3465173550648615e-06,
          5.451070592243923e-06,
          5.225469521974446e-06
        ]
      },
      "drift": {
        "embed.weight": 8.803298126163122,
        "embed.bias": 6.680663743131999,
        "blocks.0.ln.weight": 0.4969744086265564,
        "blocks.0.w1.weight": 6.118721027737535,
        "blocks.0.w1.bias": 5.453192795258035,
        "blocks.0.w2.weight": 17.832094218833557,
        "blocks.1.ln.weight": 0.4233635365962982,
        "blocks.1.w1.weight": 5.605093419712821,
        "blocks.1.w1.bias": 6.1996663705372885,
        "blocks.1.w2.weight": 16.130887571112158,
        "blocks.2.ln.weight": 0.38245585560798645,
        "blocks.2.w1.weight": 5.4055954853998145,
        "blocks.2.w1.bias": 6.487736894934294,
        "blocks.2.w2.weight": 15.370923217597205,
        "blocks.3.ln.weight": 0.39037927985191345,
        "blocks.3.w1.weight": 5.164380800730237,
        "blocks.3.w1.bias": 5.6053151435754565,
        "blocks.3.w2.weight": 13.468106289535303,
        "out_ln.weight": 0.04700668901205063,
        "out_head.weight": 1.1282362053366835,
        "out_head.bias": 1.0759554656539119
      }
    }
  },
  "456": {
    "fa": {
      "log": {
        "train_loss": [
          2.064060781517029,
          1.9901417289733887,
          1.9581991654205322,
          1.9443307501983642,
          1.935314490966797
        ],
        "train_acc": [
          0.24014,
          0.268,
          0.28416,
          0.29098,
          0.29644
        ],
        "test_acc": [
          0.2713,
          0.3048,
          0.313,
          0.3135,
          0.3242
        ]
      },
      "diagnostics": {
        "bp_cosine": [
          0.06508420407772064,
          -0.013459235429763794,
          -0.00898228120058775,
          0.9861425161361694
        ],
        "perturbation_rho": [
          0.0332966186106205,
          -0.03956230729818344,
          0.008942835032939911,
          0.18084916472434998
        ],
        "nudging": {
          "0.001": [
            -2.798624336719513e-06,
            1.7695128917694092e-08,
            -1.3969838619232178e-08,
            -6.395392119884491e-06
          ],
          "0.003": [
            -8.38935375213623e-06,
            7.916241884231567e-09,
            -2.60770320892334e-08,
            -1.91426370292902e-05
          ],
          "0.01": [
            -2.7919188141822815e-05,
            9.639188647270203e-08,
            -4.912726581096649e-08,
            -6.371899507939816e-05
          ]
        },
        "hidden_norms_per_layer": [
          1010.2516479492188,
          13113.5537109375,
          22355.0546875,
          36300.3671875,
          30980.419921875
        ],
        "bp_grad_norms_per_layer": [
          1.7613443560549058e-05,
          3.159134394081775e-06,
          3.121002691841568e-06,
          3.1120944186113775e-06,
          2.9537256978073856e-06
        ]
      },
      "drift": {
        "embed.weight": 9.613138255724964,
        "embed.bias": 6.926131704202163,
        "blocks.0.ln.weight": 0.548354983329773,
        "blocks.0.w1.weight": 7.294897696552802,
        "blocks.0.w1.bias": 5.312217412593903,
        "blocks.0.w2.weight": 20.619584988017134,
        "blocks.1.ln.weight": 0.3894560933113098,
        "blocks.1.w1.weight": 5.488544569987622,
        "blocks.1.w1.bias": 4.831823702146538,
        "blocks.1.w2.weight": 15.40878297211816,
        "blocks.2.ln.weight": 0.37565672397613525,
        "blocks.2.w1.weight": 5.8266288518010825,
        "blocks.2.w1.bias": 6.58294580181279,
        "blocks.2.w2.weight": 15.720187648044627,
        "blocks.3.ln.weight": 0.33931589126586914,
        "blocks.3.w1.weight": 4.6498033455621615,
        "blocks.3.w1.bias": 3.4624320285535566,
        "blocks.3.w2.weight": 14.935221420670851,
        "out_ln.weight": 0.04563162103295326,
        "out_head.weight": 1.066836036120676,
        "out_head.bias": 0.553680529428652
      }
    }
  },
  "config": {
    "dataset": "cifar10",
    "d_hidden": 256,
    "num_blocks": 4,
    "batch_size": 128,
    "epochs": 5,
    "lr": 0.001,
    "lr_fb": 0.001,
    "wd": 0.01,
    "lam": 0.1,
    "K": 4,
    "sigma_bridge": 0.05,
    "ema_momentum": 0.995,
    "term_grad_weight": 1.0,
    "seeds": [
      42,
      123,
      456
    ],
    "gpu": 0,
    "output_dir": "results/fa_early_ckpts",
    "methods": [
      "fa"
    ],
    "random_targets": false,
    "penalty_lam": 0.0,
    "num_classes": 10
  }
}