1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
|
[
{
"method": "dfa",
"L": 4,
"d": 256,
"seed": 42,
"warmup_ratio": 0.2,
"term_weight": 1.0,
"M": 4,
"test_acc": 0.3116,
"mean_gamma": 0.10076353600015864,
"mean_rho": -0.00476757250726223,
"mean_nudge": -3.260793164372444e-07,
"per_layer_gamma": [
0.42987197637557983,
0.002452872460708022,
-0.014306485652923584,
-0.014964219182729721
],
"per_layer_rho": [
-0.01907029002904892,
0.0,
0.0,
0.0
]
},
{
"method": "vec_eT_M4",
"L": 4,
"d": 256,
"seed": 42,
"warmup_ratio": 0.0,
"term_weight": 1.0,
"M": 4,
"test_acc": 0.1585,
"mean_gamma": 0.0070959172576294804,
"mean_rho": 0.004696090705692768,
"mean_nudge": 1.6298145055770874e-09,
"per_layer_gamma": [
0.02838953770697117,
-1.1787886251113378e-05,
1.5722671378171071e-06,
4.346942660049535e-06
],
"per_layer_rho": [
0.018784362822771072,
0.0,
0.0,
0.0
]
},
{
"method": "vec_eT_M4",
"L": 4,
"d": 256,
"seed": 42,
"warmup_ratio": 0.0,
"term_weight": 4.0,
"M": 4,
"test_acc": 0.1547,
"mean_gamma": -0.004175424227696567,
"mean_rho": 0.007184227928519249,
"mean_nudge": -3.725290298461914e-09,
"per_layer_gamma": [
-0.016694847494363785,
-1.839158903749194e-05,
5.305797913024435e-06,
6.2363747019844595e-06
],
"per_layer_rho": [
0.028736911714076996,
0.0,
0.0,
0.0
]
},
{
"method": "vec_eT_M4",
"L": 4,
"d": 256,
"seed": 42,
"warmup_ratio": 0.05,
"term_weight": 1.0,
"M": 4,
"test_acc": 0.1301,
"mean_gamma": 0.0003392250334854907,
"mean_rho": -0.006982688792049885,
"mean_nudge": 2.561137080192566e-09,
"per_layer_gamma": [
0.0014179275603964925,
2.4803875930956565e-06,
-5.080455230199732e-05,
-1.270326174562797e-05
],
"per_layer_rho": [
-0.02793075516819954,
0.0,
0.0,
0.0
]
},
{
"method": "vec_eT_M4",
"L": 4,
"d": 256,
"seed": 42,
"warmup_ratio": 0.05,
"term_weight": 4.0,
"M": 4,
"test_acc": 0.1585,
"mean_gamma": 0.001455232677017193,
"mean_rho": -0.004349564202129841,
"mean_nudge": 2.3283064365386963e-10,
"per_layer_gamma": [
0.005935228429734707,
-4.2289950215490535e-05,
-2.683553066162858e-05,
-4.517224078881554e-05
],
"per_layer_rho": [
-0.017398256808519363,
0.0,
0.0,
0.0
]
},
{
"method": "vec_eT_M4",
"L": 4,
"d": 256,
"seed": 42,
"warmup_ratio": 0.2,
"term_weight": 1.0,
"M": 4,
"test_acc": 0.2425,
"mean_gamma": 0.0011495156340970425,
"mean_rho": 0.00014320318587124348,
"mean_nudge": -8.847564458847046e-09,
"per_layer_gamma": [
0.004161187447607517,
8.946975140133873e-05,
0.00017050666792783886,
0.00017689866945147514
],
"per_layer_rho": [
0.0005728127434849739,
0.0,
0.0,
0.0
]
},
{
"method": "vec_eT_M4",
"L": 4,
"d": 256,
"seed": 42,
"warmup_ratio": 0.2,
"term_weight": 4.0,
"M": 4,
"test_acc": 0.1985,
"mean_gamma": 0.0039006864826660603,
"mean_rho": 0.0011303124483674765,
"mean_nudge": -1.234002411365509e-08,
"per_layer_gamma": [
0.014948057942092419,
0.0001860432676039636,
0.00021447567269206047,
0.00025416904827579856
],
"per_layer_rho": [
0.004521249793469906,
0.0,
0.0,
0.0
]
}
]
|