summaryrefslogtreecommitdiff
path: root/results/optionSBCB_smoke/results_cifar10.json
blob: c2ab728d451ea54a7b00a8f5b346e4a314ec9119 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
{
  "42": {
    "state_bridge": {
      "log": {
        "train_loss": [
          2.32298729019165,
          2.3135665228271485,
          2.3065665995025633
        ],
        "train_acc": [
          0.09906,
          0.09972,
          0.09984
        ],
        "test_acc": [
          0.1088,
          0.0958,
          0.1037
        ],
        "state_pred_error": [
          0.13290744504570962,
          0.04644830721378326,
          0.02964444874405861
        ]
      },
      "diagnostics": {
        "bp_cosine": [
          0.02890743687748909,
          0.029086019843816757,
          0.02471546083688736,
          0.02299082651734352
        ],
        "perturbation_rho": [
          0.06398507952690125,
          0.03261129930615425,
          -0.0327291414141655,
          0.03132244944572449
        ],
        "nudging": {
          "0.001": [
            -8.400529623031616e-07,
            -5.029141902923584e-07,
            -2.8870999813079834e-07,
            -1.3969838619232178e-07
          ],
          "0.003": [
            -2.5480985641479492e-06,
            -1.559033989906311e-06,
            -7.82310962677002e-07,
            -4.7497451305389404e-07
          ],
          "0.01": [
            -8.374452590942383e-06,
            -5.166977643966675e-06,
            -2.5797635316848755e-06,
            -1.5459954738616943e-06
          ]
        },
        "hidden_norms_per_layer": [
          382.91375732421875,
          1350.580810546875,
          2999.959716796875,
          4744.67626953125,
          6224.6923828125
        ],
        "bp_grad_norms_per_layer": [
          1.2822482858609874e-05,
          1.0445839507156052e-05,
          1.0443974133522715e-05,
          1.04420678326278e-05,
          1.0443762221257202e-05
        ]
      },
      "drift": {
        "embed.weight": 3.211158219942791,
        "embed.bias": 7.062613177208634,
        "blocks.0.ln.weight": 0.21760645508766174,
        "blocks.0.w1.weight": 2.8678529976663016,
        "blocks.0.w1.bias": 3.956961082186717,
        "blocks.0.w2.weight": 9.394435771614729,
        "blocks.1.ln.weight": 0.18258990347385406,
        "blocks.1.w1.weight": 2.21922766779391,
        "blocks.1.w1.bias": 2.8283530013194156,
        "blocks.1.w2.weight": 7.529057036338004,
        "blocks.2.ln.weight": 0.1940218061208725,
        "blocks.2.w1.weight": 2.6529854316910684,
        "blocks.2.w1.bias": 3.025569884204286,
        "blocks.2.w2.weight": 8.47631449731586,
        "blocks.3.ln.weight": 0.1730765700340271,
        "blocks.3.w1.weight": 2.41631876651548,
        "blocks.3.w1.bias": 2.7588911555760287,
        "blocks.3.w2.weight": 7.962227392442747,
        "out_ln.weight": 0.03852882981300354,
        "out_head.weight": 0.2597139888044846,
        "out_head.bias": 0.22334529177207701
      }
    },
    "credit_bridge": {
      "log": {
        "train_loss": [
          2.320697575531006,
          2.3160406340026856,
          2.307044482879639
        ],
        "train_acc": [
          0.09946,
          0.09836,
          0.10142
        ],
        "test_acc": [
          0.0998,
          0.1055,
          0.0925
        ],
        "value_loss": [
          0.36024896956682206,
          0.6458554600691795,
          0.018822102856636047
        ]
      },
      "diagnostics": {
        "bp_cosine": [
          -0.0007630798500031233,
          -0.009494196623563766,
          -0.009180156514048576,
          -0.007028202060610056
        ],
        "perturbation_rho": [
          -0.014483902603387833,
          0.01178690791130066,
          -0.011402066797018051,
          0.02756977453827858
        ],
        "nudging": {
          "0.001": [
            -4.284083843231201e-08,
            6.146728992462158e-08,
            4.842877388000488e-08,
            3.91155481338501e-08
          ],
          "0.003": [
            -1.4528632164001465e-07,
            1.5459954738616943e-07,
            1.471489667892456e-07,
            6.891787052154541e-08
          ],
          "0.01": [
            -4.637986421585083e-07,
            6.239861249923706e-07,
            4.6193599700927734e-07,
            2.998858690261841e-07
          ]
        },
        "hidden_norms_per_layer": [
          1157.2740478515625,
          10477.9150390625,
          16051.2392578125,
          19347.453125,
          19974.22265625
        ],
        "bp_grad_norms_per_layer": [
          4.064333097630879e-06,
          3.1959411899151746e-06,
          3.1822878554521594e-06,
          3.1774463877809467e-06,
          3.1793581456440734e-06
        ]
      },
      "drift": {
        "embed.weight": 7.825915877663139,
        "embed.bias": 10.392833847164916,
        "blocks.0.ln.weight": 0.32047849893569946,
        "blocks.0.w1.weight": 6.762743850808533,
        "blocks.0.w1.bias": 8.151214078637885,
        "blocks.0.w2.weight": 17.774887825977164,
        "blocks.1.ln.weight": 0.28780925273895264,
        "blocks.1.w1.weight": 6.646412153864217,
        "blocks.1.w1.bias": 7.28996940865764,
        "blocks.1.w2.weight": 16.59156331214822,
        "blocks.2.ln.weight": 0.30320411920547485,
        "blocks.2.w1.weight": 6.002501250935489,
        "blocks.2.w1.bias": 5.318388908944272,
        "blocks.2.w2.weight": 16.00213789556674,
        "blocks.3.ln.weight": 0.24688510596752167,
        "blocks.3.w1.weight": 3.6772500170447437,
        "blocks.3.w1.bias": 4.008142009024597,
        "blocks.3.w2.weight": 13.91740354386689,
        "out_ln.weight": 0.03830864652991295,
        "out_head.weight": 0.3261551698718769,
        "out_head.bias": 0.2861540512185052
      }
    }
  },
  "config": {
    "dataset": "cifar10",
    "d_hidden": 256,
    "num_blocks": 4,
    "batch_size": 128,
    "epochs": 3,
    "lr": 0.001,
    "lr_fb": 0.001,
    "wd": 0.01,
    "lam": 0.1,
    "K": 4,
    "sigma_bridge": 0.05,
    "ema_momentum": 0.995,
    "term_grad_weight": 1.0,
    "seeds": [
      42
    ],
    "gpu": 0,
    "output_dir": "results/optionSBCB_smoke",
    "methods": [
      "state_bridge",
      "credit_bridge"
    ],
    "random_targets": true,
    "num_classes": 10
  }
}