1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
|
{
"42": {
"state_bridge": {
"log": {
"train_loss": [
2.32298729019165,
2.3135665228271485,
2.3065665995025633
],
"train_acc": [
0.09906,
0.09972,
0.09984
],
"test_acc": [
0.1088,
0.0958,
0.1037
],
"state_pred_error": [
0.13290744504570962,
0.04644830721378326,
0.02964444874405861
]
},
"diagnostics": {
"bp_cosine": [
0.02890743687748909,
0.029086019843816757,
0.02471546083688736,
0.02299082651734352
],
"perturbation_rho": [
0.06398507952690125,
0.03261129930615425,
-0.0327291414141655,
0.03132244944572449
],
"nudging": {
"0.001": [
-8.400529623031616e-07,
-5.029141902923584e-07,
-2.8870999813079834e-07,
-1.3969838619232178e-07
],
"0.003": [
-2.5480985641479492e-06,
-1.559033989906311e-06,
-7.82310962677002e-07,
-4.7497451305389404e-07
],
"0.01": [
-8.374452590942383e-06,
-5.166977643966675e-06,
-2.5797635316848755e-06,
-1.5459954738616943e-06
]
},
"hidden_norms_per_layer": [
382.91375732421875,
1350.580810546875,
2999.959716796875,
4744.67626953125,
6224.6923828125
],
"bp_grad_norms_per_layer": [
1.2822482858609874e-05,
1.0445839507156052e-05,
1.0443974133522715e-05,
1.04420678326278e-05,
1.0443762221257202e-05
]
},
"drift": {
"embed.weight": 3.211158219942791,
"embed.bias": 7.062613177208634,
"blocks.0.ln.weight": 0.21760645508766174,
"blocks.0.w1.weight": 2.8678529976663016,
"blocks.0.w1.bias": 3.956961082186717,
"blocks.0.w2.weight": 9.394435771614729,
"blocks.1.ln.weight": 0.18258990347385406,
"blocks.1.w1.weight": 2.21922766779391,
"blocks.1.w1.bias": 2.8283530013194156,
"blocks.1.w2.weight": 7.529057036338004,
"blocks.2.ln.weight": 0.1940218061208725,
"blocks.2.w1.weight": 2.6529854316910684,
"blocks.2.w1.bias": 3.025569884204286,
"blocks.2.w2.weight": 8.47631449731586,
"blocks.3.ln.weight": 0.1730765700340271,
"blocks.3.w1.weight": 2.41631876651548,
"blocks.3.w1.bias": 2.7588911555760287,
"blocks.3.w2.weight": 7.962227392442747,
"out_ln.weight": 0.03852882981300354,
"out_head.weight": 0.2597139888044846,
"out_head.bias": 0.22334529177207701
}
},
"credit_bridge": {
"log": {
"train_loss": [
2.320697575531006,
2.3160406340026856,
2.307044482879639
],
"train_acc": [
0.09946,
0.09836,
0.10142
],
"test_acc": [
0.0998,
0.1055,
0.0925
],
"value_loss": [
0.36024896956682206,
0.6458554600691795,
0.018822102856636047
]
},
"diagnostics": {
"bp_cosine": [
-0.0007630798500031233,
-0.009494196623563766,
-0.009180156514048576,
-0.007028202060610056
],
"perturbation_rho": [
-0.014483902603387833,
0.01178690791130066,
-0.011402066797018051,
0.02756977453827858
],
"nudging": {
"0.001": [
-4.284083843231201e-08,
6.146728992462158e-08,
4.842877388000488e-08,
3.91155481338501e-08
],
"0.003": [
-1.4528632164001465e-07,
1.5459954738616943e-07,
1.471489667892456e-07,
6.891787052154541e-08
],
"0.01": [
-4.637986421585083e-07,
6.239861249923706e-07,
4.6193599700927734e-07,
2.998858690261841e-07
]
},
"hidden_norms_per_layer": [
1157.2740478515625,
10477.9150390625,
16051.2392578125,
19347.453125,
19974.22265625
],
"bp_grad_norms_per_layer": [
4.064333097630879e-06,
3.1959411899151746e-06,
3.1822878554521594e-06,
3.1774463877809467e-06,
3.1793581456440734e-06
]
},
"drift": {
"embed.weight": 7.825915877663139,
"embed.bias": 10.392833847164916,
"blocks.0.ln.weight": 0.32047849893569946,
"blocks.0.w1.weight": 6.762743850808533,
"blocks.0.w1.bias": 8.151214078637885,
"blocks.0.w2.weight": 17.774887825977164,
"blocks.1.ln.weight": 0.28780925273895264,
"blocks.1.w1.weight": 6.646412153864217,
"blocks.1.w1.bias": 7.28996940865764,
"blocks.1.w2.weight": 16.59156331214822,
"blocks.2.ln.weight": 0.30320411920547485,
"blocks.2.w1.weight": 6.002501250935489,
"blocks.2.w1.bias": 5.318388908944272,
"blocks.2.w2.weight": 16.00213789556674,
"blocks.3.ln.weight": 0.24688510596752167,
"blocks.3.w1.weight": 3.6772500170447437,
"blocks.3.w1.bias": 4.008142009024597,
"blocks.3.w2.weight": 13.91740354386689,
"out_ln.weight": 0.03830864652991295,
"out_head.weight": 0.3261551698718769,
"out_head.bias": 0.2861540512185052
}
}
},
"config": {
"dataset": "cifar10",
"d_hidden": 256,
"num_blocks": 4,
"batch_size": 128,
"epochs": 3,
"lr": 0.001,
"lr_fb": 0.001,
"wd": 0.01,
"lam": 0.1,
"K": 4,
"sigma_bridge": 0.05,
"ema_momentum": 0.995,
"term_grad_weight": 1.0,
"seeds": [
42
],
"gpu": 0,
"output_dir": "results/optionSBCB_smoke",
"methods": [
"state_bridge",
"credit_bridge"
],
"random_targets": true,
"num_classes": 10
}
}
|