diff options
Diffstat (limited to 'results/fa_early_ckpts/results_cifar10.json')
| -rw-r--r-- | results/fa_early_ckpts/results_cifar10.json | 324 |
1 files changed, 324 insertions, 0 deletions
diff --git a/results/fa_early_ckpts/results_cifar10.json b/results/fa_early_ckpts/results_cifar10.json new file mode 100644 index 0000000..aa046e3 --- /dev/null +++ b/results/fa_early_ckpts/results_cifar10.json @@ -0,0 +1,324 @@ +{ + "42": { + "fa": { + "log": { + "train_loss": [ + 2.049124941177368, + 1.9718926105117798, + 1.9489588039398194, + 1.934002057762146, + 1.9262304685211182 + ], + "train_acc": [ + 0.2436, + 0.2745, + 0.29034, + 0.29818, + 0.301 + ], + "test_acc": [ + 0.2789, + 0.3111, + 0.3091, + 0.3094, + 0.3219 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.0644938200712204, + 0.00024983659386634827, + -0.006781474687159061, + 0.9952471852302551 + ], + "perturbation_rho": [ + 0.06215526908636093, + -0.005929573439061642, + 0.029685823246836662, + 0.183500275015831 + ], + "nudging": { + "0.001": [ + -2.8510112315416336e-06, + -6.868503987789154e-08, + -1.4668330550193787e-08, + -6.758375093340874e-06 + ], + "0.003": [ + -8.508563041687012e-06, + -1.6507692635059357e-07, + -4.912726581096649e-08, + -2.0218081772327423e-05 + ], + "0.01": [ + -2.8386013582348824e-05, + -6.488990038633347e-07, + -1.6367994248867035e-07, + -6.742705591022968e-05 + ] + }, + "hidden_norms_per_layer": [ + 960.3140869140625, + 11076.888671875, + 31092.83984375, + 34876.390625, + 29288.8984375 + ], + "bp_grad_norms_per_layer": [ + 2.04460939130513e-05, + 3.4170184335380327e-06, + 3.3133326269307872e-06, + 3.321988742754911e-06, + 3.197354772055405e-06 + ] + }, + "drift": { + "embed.weight": 9.203150246245913, + "embed.bias": 7.782631309228306, + "blocks.0.ln.weight": 0.5169422030448914, + "blocks.0.w1.weight": 7.173286797050866, + "blocks.0.w1.bias": 7.530177507927273, + "blocks.0.w2.weight": 20.753186824233353, + "blocks.1.ln.weight": 0.4266158640384674, + "blocks.1.w1.weight": 6.365489317176397, + "blocks.1.w1.bias": 7.642448312201989, + "blocks.1.w2.weight": 16.266082583917918, + "blocks.2.ln.weight": 0.366255521774292, + "blocks.2.w1.weight": 5.12781119461704, + "blocks.2.w1.bias": 5.784653325984481, + "blocks.2.w2.weight": 14.283055474835482, + "blocks.3.ln.weight": 0.3213387429714203, + "blocks.3.w1.weight": 4.3690188550450015, + "blocks.3.w1.bias": 4.601659627893413, + "blocks.3.w2.weight": 12.971053381902397, + "out_ln.weight": 0.05851004645228386, + "out_head.weight": 1.1675271811094783, + "out_head.bias": 0.45402486694117133 + } + } + }, + "123": { + "fa": { + "log": { + "train_loss": [ + 2.039442294845581, + 1.953437792930603, + 1.9257947838592528, + 1.9107846630096434, + 1.9030635565567016 + ], + "train_acc": [ + 0.25042, + 0.29012, + 0.30112, + 0.30812, + 0.31226 + ], + "test_acc": [ + 0.2905, + 0.3307, + 0.3419, + 0.3476, + 0.3478 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.06648196280002594, + -0.010407494381070137, + -0.06906092911958694, + 0.9917651414871216 + ], + "perturbation_rho": [ + 0.031062623485922813, + -0.02341378480195999, + -0.033602140843868256, + 0.2669849991798401 + ], + "nudging": { + "0.001": [ + -3.1692907214164734e-06, + -1.126900315284729e-07, + 7.380731403827667e-07, + -1.1092517524957657e-05 + ], + "0.003": [ + -9.447336196899414e-06, + -3.655441105365753e-07, + 2.216547727584839e-06, + -3.331666812300682e-05 + ], + "0.01": [ + -3.144703805446625e-05, + -1.210719347000122e-06, + 7.366761565208435e-06, + -0.00011098524555563927 + ] + }, + "hidden_norms_per_layer": [ + 863.883056640625, + 6307.2734375, + 12256.30859375, + 20627.75390625, + 15648.552734375 + ], + "bp_grad_norms_per_layer": [ + 3.119367829640396e-05, + 5.960608177701943e-06, + 5.3465173550648615e-06, + 5.451070592243923e-06, + 5.225469521974446e-06 + ] + }, + "drift": { + "embed.weight": 8.803298126163122, + "embed.bias": 6.680663743131999, + "blocks.0.ln.weight": 0.4969744086265564, + "blocks.0.w1.weight": 6.118721027737535, + "blocks.0.w1.bias": 5.453192795258035, + "blocks.0.w2.weight": 17.832094218833557, + "blocks.1.ln.weight": 0.4233635365962982, + "blocks.1.w1.weight": 5.605093419712821, + "blocks.1.w1.bias": 6.1996663705372885, + "blocks.1.w2.weight": 16.130887571112158, + "blocks.2.ln.weight": 0.38245585560798645, + "blocks.2.w1.weight": 5.4055954853998145, + "blocks.2.w1.bias": 6.487736894934294, + "blocks.2.w2.weight": 15.370923217597205, + "blocks.3.ln.weight": 0.39037927985191345, + "blocks.3.w1.weight": 5.164380800730237, + "blocks.3.w1.bias": 5.6053151435754565, + "blocks.3.w2.weight": 13.468106289535303, + "out_ln.weight": 0.04700668901205063, + "out_head.weight": 1.1282362053366835, + "out_head.bias": 1.0759554656539119 + } + } + }, + "456": { + "fa": { + "log": { + "train_loss": [ + 2.064060781517029, + 1.9901417289733887, + 1.9581991654205322, + 1.9443307501983642, + 1.935314490966797 + ], + "train_acc": [ + 0.24014, + 0.268, + 0.28416, + 0.29098, + 0.29644 + ], + "test_acc": [ + 0.2713, + 0.3048, + 0.313, + 0.3135, + 0.3242 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.06508420407772064, + -0.013459235429763794, + -0.00898228120058775, + 0.9861425161361694 + ], + "perturbation_rho": [ + 0.0332966186106205, + -0.03956230729818344, + 0.008942835032939911, + 0.18084916472434998 + ], + "nudging": { + "0.001": [ + -2.798624336719513e-06, + 1.7695128917694092e-08, + -1.3969838619232178e-08, + -6.395392119884491e-06 + ], + "0.003": [ + -8.38935375213623e-06, + 7.916241884231567e-09, + -2.60770320892334e-08, + -1.91426370292902e-05 + ], + "0.01": [ + -2.7919188141822815e-05, + 9.639188647270203e-08, + -4.912726581096649e-08, + -6.371899507939816e-05 + ] + }, + "hidden_norms_per_layer": [ + 1010.2516479492188, + 13113.5537109375, + 22355.0546875, + 36300.3671875, + 30980.419921875 + ], + "bp_grad_norms_per_layer": [ + 1.7613443560549058e-05, + 3.159134394081775e-06, + 3.121002691841568e-06, + 3.1120944186113775e-06, + 2.9537256978073856e-06 + ] + }, + "drift": { + "embed.weight": 9.613138255724964, + "embed.bias": 6.926131704202163, + "blocks.0.ln.weight": 0.548354983329773, + "blocks.0.w1.weight": 7.294897696552802, + "blocks.0.w1.bias": 5.312217412593903, + "blocks.0.w2.weight": 20.619584988017134, + "blocks.1.ln.weight": 0.3894560933113098, + "blocks.1.w1.weight": 5.488544569987622, + "blocks.1.w1.bias": 4.831823702146538, + "blocks.1.w2.weight": 15.40878297211816, + "blocks.2.ln.weight": 0.37565672397613525, + "blocks.2.w1.weight": 5.8266288518010825, + "blocks.2.w1.bias": 6.58294580181279, + "blocks.2.w2.weight": 15.720187648044627, + "blocks.3.ln.weight": 0.33931589126586914, + "blocks.3.w1.weight": 4.6498033455621615, + "blocks.3.w1.bias": 3.4624320285535566, + "blocks.3.w2.weight": 14.935221420670851, + "out_ln.weight": 0.04563162103295326, + "out_head.weight": 1.066836036120676, + "out_head.bias": 0.553680529428652 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 256, + "num_blocks": 4, + "batch_size": 128, + "epochs": 5, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 42, + 123, + 456 + ], + "gpu": 0, + "output_dir": "results/fa_early_ckpts", + "methods": [ + "fa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file |
