[ { "method": "dfa", "L": 4, "d_hidden": 256, "seed": 42, "warmup_ratio": 0.2, "term_grad_weight": 1.0, "test_acc": 0.3116, "mean_gamma": 0.10076353600015864, "mean_rho": -0.00476757250726223, "mean_nudge": -3.260793164372444e-07, "per_layer_gamma": [ 0.42987197637557983, 0.002452872460708022, -0.014306485652923584, -0.014964219182729721 ], "per_layer_rho": [ -0.01907029002904892, 0.0, 0.0, 0.0 ], "per_layer_nudge": [ -1.3024546205997467e-06, -1.862645149230957e-09, 0.0, 0.0 ] }, { "method": "sb_eT", "L": 4, "d_hidden": 256, "seed": 42, "warmup_ratio": 0.2, "term_grad_weight": 1.0, "test_acc": 0.1737, "mean_gamma": 0.02469697833294049, "mean_rho": -0.012663604691624641, "mean_nudge": -1.6051344573497772e-06, "per_layer_gamma": [ 0.02607825957238674, 0.049343302845954895, 0.002323275664821267, 0.021043075248599052 ], "per_layer_rho": [ -0.050654418766498566, 0.0, 0.0, 0.0 ], "per_layer_nudge": [ -6.420537829399109e-06, 0.0, 0.0, 0.0 ] }, { "method": "cb_eT", "L": 4, "d_hidden": 256, "seed": 42, "warmup_ratio": 0.0, "term_grad_weight": 1.0, "test_acc": 0.1546, "mean_gamma": 0.010205775421354701, "mean_rho": 0.0012211860157549381, "mean_nudge": -5.587935447692871e-09, "per_layer_gamma": [ 0.028749946504831314, 0.012085458263754845, 5.072341991763096e-06, -1.7375425159116276e-05 ], "per_layer_rho": [ 0.0048847440630197525, 0.0, 0.0, 0.0 ], "per_layer_nudge": [ -2.2351741790771484e-08, 0.0, 0.0, 0.0 ] }, { "method": "cb_eT", "L": 4, "d_hidden": 256, "seed": 42, "warmup_ratio": 0.0, "term_grad_weight": 4.0, "test_acc": 0.1169, "mean_gamma": 0.004216111148707569, "mean_rho": 0.00741030735662207, "mean_nudge": -7.916241884231567e-09, "per_layer_gamma": [ 0.0018346477299928665, 0.019228298217058182, -0.002127251587808132, -0.0020712497644126415 ], "per_layer_rho": [ 0.03154653310775757, -0.001905303681269288, 0.0, 0.0 ], "per_layer_nudge": [ -2.60770320892334e-08, -5.587935447692871e-09, 0.0, 0.0 ] }, { "method": "cb_eT", "L": 4, "d_hidden": 256, "seed": 42, "warmup_ratio": 0.05, "term_grad_weight": 1.0, "test_acc": 0.1049, "mean_gamma": -0.0009609744556655642, "mean_rho": 0.0024358099326491356, "mean_nudge": 3.259629011154175e-09, "per_layer_gamma": [ -0.004597642458975315, -3.2639800338074565e-06, 0.0004314985126256943, 0.0003255101037211716 ], "per_layer_rho": [ 0.009743239730596542, 0.0, 0.0, 0.0 ], "per_layer_nudge": [ 1.30385160446167e-08, 0.0, 0.0, 0.0 ] }, { "method": "cb_eT", "L": 4, "d_hidden": 256, "seed": 42, "warmup_ratio": 0.05, "term_grad_weight": 4.0, "test_acc": 0.1798, "mean_gamma": 0.06056667093071155, "mean_rho": 0.03483579680323601, "mean_nudge": -1.0756775736808777e-06, "per_layer_gamma": [ 0.2081003189086914, 0.033366840332746506, 0.0005531693459488451, 0.00024635513545945287 ], "per_layer_rho": [ 0.13934318721294403, 0.0, 0.0, 0.0 ], "per_layer_nudge": [ -4.302710294723511e-06, 0.0, 0.0, 0.0 ] }, { "method": "cb_eT", "L": 4, "d_hidden": 256, "seed": 42, "warmup_ratio": 0.2, "term_grad_weight": 1.0, "test_acc": 0.283, "mean_gamma": 0.17921950668096542, "mean_rho": 0.008699589408934116, "mean_nudge": -8.21426510810852e-07, "per_layer_gamma": [ 0.5069771409034729, 0.05348020792007446, 0.07490736246109009, 0.08151331543922424 ], "per_layer_rho": [ 0.034798357635736465, 0.0, 0.0, 0.0 ], "per_layer_nudge": [ -3.285706043243408e-06, 0.0, 0.0, 0.0 ] }, { "method": "cb_eT", "L": 4, "d_hidden": 256, "seed": 42, "warmup_ratio": 0.2, "term_grad_weight": 4.0, "test_acc": 0.2854, "mean_gamma": 0.18741484452039003, "mean_rho": 0.0021140535827726126, "mean_nudge": -8.436618372797966e-07, "per_layer_gamma": [ 0.5276610851287842, 0.046022433787584305, 0.08646765351295471, 0.08950820565223694 ], "per_layer_rho": [ 0.00845621433109045, 0.0, 0.0, 0.0 ], "per_layer_nudge": [ -3.3746473491191864e-06, 0.0, 0.0, 0.0 ] }, { "method": "cb_deltaL", "L": 4, "d_hidden": 256, "seed": 42, "warmup_ratio": 0.0, "term_grad_weight": 1.0, "test_acc": 0.0953, "mean_gamma": 0.00023498532198695798, "mean_rho": -0.002640543971210718, "mean_nudge": 4.190951585769653e-09, "per_layer_gamma": [ 0.0009388166945427656, 1.1305664884275757e-06, -2.059467707482554e-09, -3.91361565377224e-09 ], "per_layer_rho": [ -0.010562175884842873, 0.0, 0.0, 0.0 ], "per_layer_nudge": [ 1.6763806343078613e-08, 0.0, 0.0, 0.0 ] }, { "method": "cb_deltaL", "L": 4, "d_hidden": 256, "seed": 42, "warmup_ratio": 0.0, "term_grad_weight": 4.0, "test_acc": 0.1084, "mean_gamma": -0.000962152166505803, "mean_rho": -0.006210822146385908, "mean_nudge": 9.313225746154785e-10, "per_layer_gamma": [ -0.003848549909889698, -5.8880857523035957e-08, 4.6535258979574223e-10, -3.4062858089711767e-10 ], "per_layer_rho": [ -0.024843288585543633, 0.0, 0.0, 0.0 ], "per_layer_nudge": [ 3.725290298461914e-09, 0.0, 0.0, 0.0 ] }, { "method": "cb_deltaL", "L": 4, "d_hidden": 256, "seed": 42, "warmup_ratio": 0.05, "term_grad_weight": 1.0, "test_acc": 0.1092, "mean_gamma": 0.009919490943730125, "mean_rho": -0.008342609740793705, "mean_nudge": -2.3283064365386963e-10, "per_layer_gamma": [ 0.03979068249464035, -0.0001326934725511819, 2.563164889579639e-05, -5.6568960644654e-06 ], "per_layer_rho": [ -0.03337043896317482, 0.0, 0.0, 0.0 ], "per_layer_nudge": [ -9.313225746154785e-10, 0.0, 0.0, 0.0 ] }, { "method": "cb_deltaL", "L": 4, "d_hidden": 256, "seed": 42, "warmup_ratio": 0.05, "term_grad_weight": 4.0, "test_acc": 0.1155, "mean_gamma": -0.0010879231473768236, "mean_rho": -0.0036583333276212215, "mean_nudge": 3.725290298461914e-09, "per_layer_gamma": [ -0.004351496696472168, -1.6072939956757182e-07, -2.6652518414493898e-08, -8.51111714439412e-09 ], "per_layer_rho": [ -0.014633333310484886, 0.0, 0.0, 0.0 ], "per_layer_nudge": [ 1.4901161193847656e-08, 0.0, 0.0, 0.0 ] }, { "method": "cb_deltaL", "L": 4, "d_hidden": 256, "seed": 42, "warmup_ratio": 0.2, "term_grad_weight": 1.0, "test_acc": 0.1851, "mean_gamma": 0.0015729351434856653, "mean_rho": 0.0014943215064704418, "mean_nudge": -7.450580596923828e-09, "per_layer_gamma": [ -0.0012846197932958603, 0.005665094591677189, 0.004892412573099136, -0.0029811467975378036 ], "per_layer_rho": [ 0.005977286025881767, 0.0, 0.0, 0.0 ], "per_layer_nudge": [ -2.9802322387695312e-08, 0.0, 0.0, 0.0 ] }, { "method": "cb_deltaL", "L": 4, "d_hidden": 256, "seed": 42, "warmup_ratio": 0.2, "term_grad_weight": 4.0, "test_acc": 0.1706, "mean_gamma": 0.007908080180641264, "mean_rho": -0.014252320863306522, "mean_nudge": -6.565824151039124e-08, "per_layer_gamma": [ 0.020493682473897934, -0.0016250908374786377, 0.002295387675985694, 0.010468341410160065 ], "per_layer_rho": [ -0.01426580548286438, -0.04274347797036171, 0.0, 0.0 ], "per_layer_nudge": [ -2.6263296604156494e-07, 0.0, 0.0, 0.0 ] } ]