1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
|
{
"42": {
"fa": {
"log": {
"train_loss": [
2.049124941177368,
1.9718045804214477,
1.9505524127578735
],
"train_acc": [
0.2436,
0.27518,
0.2894
],
"test_acc": [
0.2789,
0.3087,
0.3122
]
},
"diagnostics": {
"bp_cosine": [
0.07161466032266617,
-0.008746136911213398,
-0.016568297520279884,
0.9941877722740173
],
"perturbation_rho": [
0.041674911975860596,
0.0022312882356345654,
-0.008362723514437675,
0.2924357056617737
],
"nudging": {
"0.001": [
-3.0086375772953033e-06,
5.8673322200775146e-08,
7.078051567077637e-08,
-9.350478649139404e-06
],
"0.003": [
-8.966773748397827e-06,
1.1548399925231934e-07,
1.7695128917694092e-07,
-2.79964879155159e-05
],
"0.01": [
-2.9983464628458023e-05,
3.6461278796195984e-07,
5.96512109041214e-07,
-9.332690387964249e-05
]
},
"hidden_norms_per_layer": [
828.2960815429688,
8464.98046875,
19738.599609375,
21368.41796875,
19217.69140625
],
"bp_grad_norms_per_layer": [
1.9956107280449942e-05,
4.579987034958322e-06,
4.49442450189963e-06,
4.525154963630484e-06,
4.308007646613987e-06
]
},
"drift": {
"embed.weight": 7.785380853670397,
"embed.bias": 6.819826161992119,
"blocks.0.ln.weight": 0.42689943313598633,
"blocks.0.w1.weight": 6.155192994774994,
"blocks.0.w1.bias": 6.637409518031749,
"blocks.0.w2.weight": 17.833861612088054,
"blocks.1.ln.weight": 0.352157860994339,
"blocks.1.w1.weight": 5.231153715209586,
"blocks.1.w1.bias": 6.610786582785788,
"blocks.1.w2.weight": 13.997490142949763,
"blocks.2.ln.weight": 0.3031489849090576,
"blocks.2.w1.weight": 4.218717880513288,
"blocks.2.w1.bias": 4.851412113278458,
"blocks.2.w2.weight": 12.191107541748561,
"blocks.3.ln.weight": 0.260187029838562,
"blocks.3.w1.weight": 3.545270787599183,
"blocks.3.w1.bias": 3.6891700957298967,
"blocks.3.w2.weight": 10.80288177600079,
"out_ln.weight": 0.04471425712108612,
"out_head.weight": 0.9764490646917799,
"out_head.bias": 0.41938112118479187
}
}
},
"config": {
"dataset": "cifar10",
"d_hidden": 256,
"num_blocks": 4,
"batch_size": 128,
"epochs": 3,
"lr": 0.001,
"lr_fb": 0.001,
"wd": 0.01,
"lam": 0.1,
"K": 4,
"sigma_bridge": 0.05,
"ema_momentum": 0.995,
"term_grad_weight": 1.0,
"seeds": [
42
],
"gpu": 0,
"output_dir": "results/fa_smoke_test",
"methods": [
"fa"
],
"random_targets": false,
"penalty_lam": 0.0,
"num_classes": 10
}
}
|