{ "42": { "fa": { "log": { "train_loss": [ 2.049124941177368, 1.9718926105117798, 1.9489588039398194, 1.934002057762146, 1.9262304685211182 ], "train_acc": [ 0.2436, 0.2745, 0.29034, 0.29818, 0.301 ], "test_acc": [ 0.2789, 0.3111, 0.3091, 0.3094, 0.3219 ] }, "diagnostics": { "bp_cosine": [ 0.0644938200712204, 0.00024983659386634827, -0.006781474687159061, 0.9952471852302551 ], "perturbation_rho": [ 0.06215526908636093, -0.005929573439061642, 0.029685823246836662, 0.183500275015831 ], "nudging": { "0.001": [ -2.8510112315416336e-06, -6.868503987789154e-08, -1.4668330550193787e-08, -6.758375093340874e-06 ], "0.003": [ -8.508563041687012e-06, -1.6507692635059357e-07, -4.912726581096649e-08, -2.0218081772327423e-05 ], "0.01": [ -2.8386013582348824e-05, -6.488990038633347e-07, -1.6367994248867035e-07, -6.742705591022968e-05 ] }, "hidden_norms_per_layer": [ 960.3140869140625, 11076.888671875, 31092.83984375, 34876.390625, 29288.8984375 ], "bp_grad_norms_per_layer": [ 2.04460939130513e-05, 3.4170184335380327e-06, 3.3133326269307872e-06, 3.321988742754911e-06, 3.197354772055405e-06 ] }, "drift": { "embed.weight": 9.203150246245913, "embed.bias": 7.782631309228306, "blocks.0.ln.weight": 0.5169422030448914, "blocks.0.w1.weight": 7.173286797050866, "blocks.0.w1.bias": 7.530177507927273, "blocks.0.w2.weight": 20.753186824233353, "blocks.1.ln.weight": 0.4266158640384674, "blocks.1.w1.weight": 6.365489317176397, "blocks.1.w1.bias": 7.642448312201989, "blocks.1.w2.weight": 16.266082583917918, "blocks.2.ln.weight": 0.366255521774292, "blocks.2.w1.weight": 5.12781119461704, "blocks.2.w1.bias": 5.784653325984481, "blocks.2.w2.weight": 14.283055474835482, "blocks.3.ln.weight": 0.3213387429714203, "blocks.3.w1.weight": 4.3690188550450015, "blocks.3.w1.bias": 4.601659627893413, "blocks.3.w2.weight": 12.971053381902397, "out_ln.weight": 0.05851004645228386, "out_head.weight": 1.1675271811094783, "out_head.bias": 0.45402486694117133 } } }, "123": { "fa": { "log": { "train_loss": [ 2.039442294845581, 1.953437792930603, 1.9257947838592528, 1.9107846630096434, 1.9030635565567016 ], "train_acc": [ 0.25042, 0.29012, 0.30112, 0.30812, 0.31226 ], "test_acc": [ 0.2905, 0.3307, 0.3419, 0.3476, 0.3478 ] }, "diagnostics": { "bp_cosine": [ 0.06648196280002594, -0.010407494381070137, -0.06906092911958694, 0.9917651414871216 ], "perturbation_rho": [ 0.031062623485922813, -0.02341378480195999, -0.033602140843868256, 0.2669849991798401 ], "nudging": { "0.001": [ -3.1692907214164734e-06, -1.126900315284729e-07, 7.380731403827667e-07, -1.1092517524957657e-05 ], "0.003": [ -9.447336196899414e-06, -3.655441105365753e-07, 2.216547727584839e-06, -3.331666812300682e-05 ], "0.01": [ -3.144703805446625e-05, -1.210719347000122e-06, 7.366761565208435e-06, -0.00011098524555563927 ] }, "hidden_norms_per_layer": [ 863.883056640625, 6307.2734375, 12256.30859375, 20627.75390625, 15648.552734375 ], "bp_grad_norms_per_layer": [ 3.119367829640396e-05, 5.960608177701943e-06, 5.3465173550648615e-06, 5.451070592243923e-06, 5.225469521974446e-06 ] }, "drift": { "embed.weight": 8.803298126163122, "embed.bias": 6.680663743131999, "blocks.0.ln.weight": 0.4969744086265564, "blocks.0.w1.weight": 6.118721027737535, "blocks.0.w1.bias": 5.453192795258035, "blocks.0.w2.weight": 17.832094218833557, "blocks.1.ln.weight": 0.4233635365962982, "blocks.1.w1.weight": 5.605093419712821, "blocks.1.w1.bias": 6.1996663705372885, "blocks.1.w2.weight": 16.130887571112158, "blocks.2.ln.weight": 0.38245585560798645, "blocks.2.w1.weight": 5.4055954853998145, "blocks.2.w1.bias": 6.487736894934294, "blocks.2.w2.weight": 15.370923217597205, "blocks.3.ln.weight": 0.39037927985191345, "blocks.3.w1.weight": 5.164380800730237, "blocks.3.w1.bias": 5.6053151435754565, "blocks.3.w2.weight": 13.468106289535303, "out_ln.weight": 0.04700668901205063, "out_head.weight": 1.1282362053366835, "out_head.bias": 1.0759554656539119 } } }, "456": { "fa": { "log": { "train_loss": [ 2.064060781517029, 1.9901417289733887, 1.9581991654205322, 1.9443307501983642, 1.935314490966797 ], "train_acc": [ 0.24014, 0.268, 0.28416, 0.29098, 0.29644 ], "test_acc": [ 0.2713, 0.3048, 0.313, 0.3135, 0.3242 ] }, "diagnostics": { "bp_cosine": [ 0.06508420407772064, -0.013459235429763794, -0.00898228120058775, 0.9861425161361694 ], "perturbation_rho": [ 0.0332966186106205, -0.03956230729818344, 0.008942835032939911, 0.18084916472434998 ], "nudging": { "0.001": [ -2.798624336719513e-06, 1.7695128917694092e-08, -1.3969838619232178e-08, -6.395392119884491e-06 ], "0.003": [ -8.38935375213623e-06, 7.916241884231567e-09, -2.60770320892334e-08, -1.91426370292902e-05 ], "0.01": [ -2.7919188141822815e-05, 9.639188647270203e-08, -4.912726581096649e-08, -6.371899507939816e-05 ] }, "hidden_norms_per_layer": [ 1010.2516479492188, 13113.5537109375, 22355.0546875, 36300.3671875, 30980.419921875 ], "bp_grad_norms_per_layer": [ 1.7613443560549058e-05, 3.159134394081775e-06, 3.121002691841568e-06, 3.1120944186113775e-06, 2.9537256978073856e-06 ] }, "drift": { "embed.weight": 9.613138255724964, "embed.bias": 6.926131704202163, "blocks.0.ln.weight": 0.548354983329773, "blocks.0.w1.weight": 7.294897696552802, "blocks.0.w1.bias": 5.312217412593903, "blocks.0.w2.weight": 20.619584988017134, "blocks.1.ln.weight": 0.3894560933113098, "blocks.1.w1.weight": 5.488544569987622, "blocks.1.w1.bias": 4.831823702146538, "blocks.1.w2.weight": 15.40878297211816, "blocks.2.ln.weight": 0.37565672397613525, "blocks.2.w1.weight": 5.8266288518010825, "blocks.2.w1.bias": 6.58294580181279, "blocks.2.w2.weight": 15.720187648044627, "blocks.3.ln.weight": 0.33931589126586914, "blocks.3.w1.weight": 4.6498033455621615, "blocks.3.w1.bias": 3.4624320285535566, "blocks.3.w2.weight": 14.935221420670851, "out_ln.weight": 0.04563162103295326, "out_head.weight": 1.066836036120676, "out_head.bias": 0.553680529428652 } } }, "config": { "dataset": "cifar10", "d_hidden": 256, "num_blocks": 4, "batch_size": 128, "epochs": 5, "lr": 0.001, "lr_fb": 0.001, "wd": 0.01, "lam": 0.1, "K": 4, "sigma_bridge": 0.05, "ema_momentum": 0.995, "term_grad_weight": 1.0, "seeds": [ 42, 123, 456 ], "gpu": 0, "output_dir": "results/fa_early_ckpts", "methods": [ "fa" ], "random_targets": false, "penalty_lam": 0.0, "num_classes": 10 } }