{ "42": { "fa": { "log": { "train_loss": [ 2.001119369430542, 1.9333542645263673, 1.9240801361083983, 1.9216663201141357, 1.918965964050293, 1.9119908702850341, 1.9097738315582276, 1.9049057181167603, 1.9038385245513916, 1.901488224105835, 1.8975307934951782, 1.8936708712768555, 1.8894188814544677, 1.8864306787872314, 1.8895101571655273, 1.8835669344329835, 1.8832266155624389, 1.8791197660064698, 1.8797098063659667, 1.8761708308410645, 1.875095842552185, 1.8748220810317993, 1.8743982402801513, 1.8738871404266357, 1.8735411545562743, 1.8693548073577881, 1.8659294427871704, 1.8685178174591064, 1.8670922566986083, 1.8683236782455444 ], "train_acc": [ 0.27484, 0.30662, 0.31156, 0.3128, 0.31684, 0.3173, 0.32102, 0.32226, 0.3248, 0.32594, 0.32582, 0.33134, 0.33202, 0.33344, 0.33274, 0.33626, 0.33556, 0.33774, 0.33918, 0.33762, 0.34082, 0.34052, 0.34092, 0.34432, 0.34294, 0.3436, 0.34578, 0.34654, 0.34636, 0.3472 ], "test_acc": [ 0.3237, 0.3505, 0.3372, 0.3327, 0.3511, 0.3455, 0.3449, 0.344, 0.3413, 0.3462, 0.35, 0.3511, 0.3604, 0.3569, 0.3565, 0.3584, 0.3634, 0.3651, 0.3684, 0.362, 0.3674, 0.3586, 0.3704, 0.3688, 0.3729, 0.3715, 0.3702, 0.3713, 0.3715, 0.3713 ] }, "diagnostics": { "bp_cosine": [ 0.154661625623703, 0.148232564330101, 0.11714619398117065, 0.9808143377304077 ], "perturbation_rho": [ 0.2043638527393341, 0.03451352193951607, 0.20099492371082306, 0.6849091649055481 ], "nudging": { "0.001": [ -7.456343155354261e-06, -7.0015667006373405e-06, -6.1238533817231655e-06, -4.935957258567214e-05 ], "0.003": [ -2.2464897483587265e-05, -2.095731906592846e-05, -1.8515565898269415e-05, -0.00014814879978075624 ], "0.01": [ -7.48702441342175e-05, -6.984942592680454e-05, -6.168842082843184e-05, -0.0004938761703670025 ] }, "hidden_norms_per_layer": [ 12005.7001953125, 12007.416015625, 12011.4248046875, 12003.9306640625, 12006.5830078125 ], "bp_grad_norms_per_layer": [ 1.7161242794827558e-05, 1.712461198621895e-05, 1.7262802430195734e-05, 1.6996016711345874e-05, 1.602559132152237e-05 ] }, "drift": { "embed.weight": 78.51231332812849, "embed.bias": 102.12148052673881, "blocks.0.ln.weight": 0.2991768419742584, "blocks.0.w1.weight": 3.288291856838975, "blocks.0.w1.bias": 5.396373593768907, "blocks.0.w2.weight": 18.07176198506393, "blocks.1.ln.weight": 0.30484291911125183, "blocks.1.w1.weight": 3.363888651672944, "blocks.1.w1.bias": 5.6524749125654825, "blocks.1.w2.weight": 19.430774882635927, "blocks.2.ln.weight": 0.3102824091911316, "blocks.2.w1.weight": 3.4139876185805202, "blocks.2.w1.bias": 5.452178486528747, "blocks.2.w2.weight": 20.125989565290535, "blocks.3.ln.weight": 0.3048917055130005, "blocks.3.w1.weight": 3.480676314799551, "blocks.3.w1.bias": 5.215887111238424, "blocks.3.w2.weight": 18.601435796851646, "out_ln.weight": 0.20746967196464539, "out_head.weight": 2.830159264066497, "out_head.bias": 1.3353853268796754 } } }, "123": { "fa": { "log": { "train_loss": [ 1.9927266687774658, 1.9300689645767213, 1.92596540309906, 1.9193654559326172, 1.9174729382324218, 1.914334167137146, 1.9108343372344971, 1.9098404777526856, 1.9070983071517944, 1.9046820580291748, 1.9035546282958984, 1.902319673461914, 1.9007709796524048, 1.8978776998901368, 1.896000676651001, 1.8922497060394288, 1.889219735031128, 1.8912379706573486, 1.8899934759902954, 1.886697685585022, 1.8858332043457031, 1.8851475219726563, 1.8850083080673217, 1.8836419734191894, 1.8818570189666748, 1.880578341293335, 1.8828362002563477, 1.8791149730682373, 1.881042135925293, 1.8783651885986328 ], "train_acc": [ 0.28066, 0.30852, 0.31142, 0.3131, 0.31726, 0.31894, 0.31884, 0.3222, 0.32346, 0.325, 0.32962, 0.32534, 0.32718, 0.32974, 0.33498, 0.33296, 0.33498, 0.33642, 0.33698, 0.33676, 0.3368, 0.33892, 0.33858, 0.34048, 0.34248, 0.34264, 0.34458, 0.34442, 0.34196, 0.3428 ], "test_acc": [ 0.3339, 0.344, 0.3447, 0.3537, 0.3523, 0.3448, 0.3569, 0.3548, 0.3564, 0.3513, 0.3558, 0.3555, 0.3578, 0.3565, 0.3511, 0.3617, 0.3621, 0.3603, 0.3619, 0.36, 0.3663, 0.3679, 0.3665, 0.3645, 0.363, 0.3632, 0.3656, 0.3666, 0.3666, 0.366 ] }, "diagnostics": { "bp_cosine": [ 0.10381253063678741, 0.11509518325328827, 0.17561045289039612, 0.9849518537521362 ], "perturbation_rho": [ -0.013579179532825947, 0.098294198513031, 0.1665695607662201, 0.7168847322463989 ], "nudging": { "0.001": [ -3.704102709889412e-06, -5.607143975794315e-06, -8.841510862112045e-06, -5.108967889100313e-05 ], "0.003": [ -1.0923598892986774e-05, -1.6769510693848133e-05, -2.6412191800773144e-05, -0.00015316426288336515 ], "0.01": [ -3.6393641494214535e-05, -5.586724728345871e-05, -8.803850505501032e-05, -0.0005103998119011521 ] }, "hidden_norms_per_layer": [ 12513.1455078125, 12514.5771484375, 12517.3583984375, 12520.568359375, 12517.57421875 ], "bp_grad_norms_per_layer": [ 1.9153141693095677e-05, 1.9135366528644226e-05, 1.8911112420028076e-05, 1.892356522148475e-05, 1.7506923541077413e-05 ] }, "drift": { "embed.weight": 89.53228672658916, "embed.bias": 152.8584432062178, "blocks.0.ln.weight": 0.2654326856136322, "blocks.0.w1.weight": 3.3634291127986065, "blocks.0.w1.bias": 5.280393391584056, "blocks.0.w2.weight": 19.28885452796699, "blocks.1.ln.weight": 0.2610284984111786, "blocks.1.w1.weight": 3.3605846097214807, "blocks.1.w1.bias": 4.889820303164699, "blocks.1.w2.weight": 20.281310673104024, "blocks.2.ln.weight": 0.2743469476699829, "blocks.2.w1.weight": 3.391098035259505, "blocks.2.w1.bias": 4.733926864945133, "blocks.2.w2.weight": 20.84238400209855, "blocks.3.ln.weight": 0.28759220242500305, "blocks.3.w1.weight": 3.4831818014183473, "blocks.3.w1.bias": 4.539792496168549, "blocks.3.w2.weight": 17.922103982945213, "out_ln.weight": 0.23056229948997498, "out_head.weight": 2.947940047368198, "out_head.bias": 0.999532168893943 } } }, "456": { "fa": { "log": { "train_loss": [ 2.007471420669556, 1.9475741510772706, 1.935076278114319, 1.9286792455673218, 1.9233443587493897, 1.921987562637329, 1.9133965182113648, 1.9098651378631593, 1.9032709611511232, 1.9005253164291382, 1.8951593602752685, 1.8959063681793213, 1.8941862697982788, 1.8903014281845092, 1.887538351173401, 1.8868630823516845, 1.8855092962646485, 1.882849036026001, 1.88272844581604, 1.880844307899475, 1.8779692416000366, 1.8777563387298584, 1.8754584911346435, 1.8720009698104858, 1.8718394606018067, 1.871922555580139, 1.8742619204330444, 1.871095755996704, 1.8686067379379272, 1.8697797116470336 ], "train_acc": [ 0.27346, 0.30166, 0.30968, 0.31384, 0.3171, 0.31864, 0.32234, 0.32546, 0.32768, 0.32608, 0.33286, 0.33212, 0.33646, 0.33228, 0.33466, 0.33496, 0.33834, 0.34088, 0.33898, 0.34018, 0.3417, 0.34182, 0.3446, 0.34544, 0.34776, 0.34688, 0.3461, 0.34764, 0.3487, 0.34986 ], "test_acc": [ 0.3312, 0.3461, 0.3375, 0.3554, 0.3417, 0.349, 0.3492, 0.3315, 0.3524, 0.3561, 0.3452, 0.3612, 0.3675, 0.361, 0.3588, 0.3671, 0.3652, 0.3621, 0.3608, 0.3682, 0.3607, 0.3563, 0.3698, 0.3701, 0.3677, 0.373, 0.3704, 0.3674, 0.37, 0.3695 ] }, "diagnostics": { "bp_cosine": [ 0.1374516785144806, 0.16781684756278992, 0.13626089692115784, 0.9774131774902344 ], "perturbation_rho": [ 0.0486995093524456, 0.06349372863769531, 0.007340744137763977, 0.6342363357543945 ], "nudging": { "0.001": [ -5.1066745072603226e-06, -7.127760909497738e-06, -6.226240657269955e-06, -4.646868910640478e-05 ], "0.003": [ -1.5374505892395973e-05, -2.130062784999609e-05, -1.874461304396391e-05, -0.0001394655555486679 ], "0.01": [ -5.139666609466076e-05, -7.112661842256784e-05, -6.259605288505554e-05, -0.0004648104077205062 ] }, "hidden_norms_per_layer": [ 12142.6181640625, 12149.4814453125, 12152.4228515625, 12154.3623046875, 12153.6318359375 ], "bp_grad_norms_per_layer": [ 1.7370659406878985e-05, 1.7375436073052697e-05, 1.7465752534917556e-05, 1.755404082359746e-05, 1.6847530787345022e-05 ] }, "drift": { "embed.weight": 81.50708429789378, "embed.bias": 90.80998898870095, "blocks.0.ln.weight": 0.30019262433052063, "blocks.0.w1.weight": 3.2393405099782573, "blocks.0.w1.bias": 5.11706466812565, "blocks.0.w2.weight": 17.26999421209171, "blocks.1.ln.weight": 0.29391026496887207, "blocks.1.w1.weight": 3.2524768916503883, "blocks.1.w1.bias": 5.375142102957966, "blocks.1.w2.weight": 17.822565033735142, "blocks.2.ln.weight": 0.29711613059043884, "blocks.2.w1.weight": 3.347203060867532, "blocks.2.w1.bias": 5.458224100319586, "blocks.2.w2.weight": 19.038612136675116, "blocks.3.ln.weight": 0.3008574843406677, "blocks.3.w1.weight": 3.407494069111075, "blocks.3.w1.bias": 5.482950458082574, "blocks.3.w2.weight": 17.98442064571867, "out_ln.weight": 0.21060113608837128, "out_head.weight": 2.7473203281973158, "out_head.bias": 0.9871044007414218 } } }, "config": { "dataset": "cifar10", "d_hidden": 256, "num_blocks": 4, "batch_size": 128, "epochs": 30, "lr": 0.001, "lr_fb": 0.001, "wd": 0.01, "lam": 0.1, "K": 4, "sigma_bridge": 0.05, "ema_momentum": 0.995, "term_grad_weight": 1.0, "seeds": [ 42, 123, 456 ], "gpu": 0, "output_dir": "results/fa_penalty_30ep", "methods": [ "fa" ], "random_targets": false, "penalty_lam": 0.01, "num_classes": 10 } }