1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
|
{
"s_eT_tgw0.0_wr0.2": {
"test_acc": 0.532,
"mean_bp_cosine": 0.11989971622824669,
"mean_rho": 0.16079141944646835,
"mean_nudge": -0.010690351715311408,
"bp_cosine_per_layer": [
0.03718709945678711,
0.1347142904996872,
0.15199805796146393,
0.15569941699504852
],
"rho_per_layer": [
0.08883650600910187,
0.1502864807844162,
0.20964613556861877,
0.19439655542373657
],
"final_value_loss": 0.07598549057245255,
"s_type": "eT",
"term_grad_weight": 0.0,
"warmup_ratio": 0.2
},
"s_eT_tgw0.25_wr0.2": {
"test_acc": 0.558,
"mean_bp_cosine": 0.22698336280882359,
"mean_rho": 0.2679573856294155,
"mean_nudge": -0.0202574310824275,
"bp_cosine_per_layer": [
0.09379873424768448,
0.2528688311576843,
0.2722504138946533,
0.2890154719352722
],
"rho_per_layer": [
0.13511960208415985,
0.289243221282959,
0.33815300464630127,
0.30931371450424194
],
"final_value_loss": 0.16253001551628113,
"s_type": "eT",
"term_grad_weight": 0.25,
"warmup_ratio": 0.2
},
"s_eT_tgw1.0_wr0.2": {
"test_acc": 0.558,
"mean_bp_cosine": 0.45794273912906647,
"mean_rho": 0.5322257168591022,
"mean_nudge": -0.038075629621744156,
"bp_cosine_per_layer": [
0.17809242010116577,
0.5349531173706055,
0.5522423982620239,
0.5664830207824707
],
"rho_per_layer": [
0.22843755781650543,
0.5910820364952087,
0.6707864999771118,
0.638596773147583
],
"final_value_loss": 0.38718592133522034,
"s_type": "eT",
"term_grad_weight": 1.0,
"warmup_ratio": 0.2
},
"s_eT_tgw4.0_wr0.2": {
"test_acc": 0.3935,
"mean_bp_cosine": 0.5738203078508377,
"mean_rho": 0.5946865696460009,
"mean_nudge": -0.04504407802596688,
"bp_cosine_per_layer": [
0.07111608982086182,
0.6408071517944336,
0.787927508354187,
0.7954304814338684
],
"rho_per_layer": [
0.10197217017412186,
0.6885993480682373,
0.7956527471542358,
0.7925220131874084
],
"final_value_loss": 1.2652584648132323,
"s_type": "eT",
"term_grad_weight": 4.0,
"warmup_ratio": 0.2
}
}
|