1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
|
[
{
"method": "dfa",
"L": 4,
"d_hidden": 256,
"seed": 123,
"warmup_ratio": 0.2,
"term_grad_weight": 1.0,
"test_acc": 0.3106,
"mean_gamma": 0.11738517042249441,
"mean_rho": 0.005885639227926731,
"mean_nudge": -4.444736987352371e-07,
"per_layer_gamma": [
0.4517223834991455,
0.012923447415232658,
0.004009386524558067,
0.0008854642510414124
],
"per_layer_rho": [
0.023542556911706924,
0.0,
0.0,
0.0
],
"per_layer_nudge": [
-1.7816200852394104e-06,
3.725290298461914e-09,
0.0,
0.0
]
},
{
"method": "cb_eT",
"L": 4,
"d_hidden": 256,
"seed": 123,
"warmup_ratio": 0.2,
"term_grad_weight": 1.0,
"test_acc": 0.2753,
"mean_gamma": 0.17617796920239925,
"mean_rho": 0.0019302130676805973,
"mean_nudge": -7.422640919685364e-07,
"per_layer_gamma": [
0.5222728252410889,
0.043591007590293884,
0.0659613385796547,
0.07288670539855957
],
"per_layer_rho": [
0.022736016660928726,
0.0,
-0.015015164390206337,
0.0
],
"per_layer_nudge": [
-2.9690563678741455e-06,
0.0,
0.0,
0.0
]
}
]
|