summaryrefslogtreecommitdiff
path: root/results/fa_early_ckpts/results_cifar10.json
diff options
context:
space:
mode:
Diffstat (limited to 'results/fa_early_ckpts/results_cifar10.json')
-rw-r--r--results/fa_early_ckpts/results_cifar10.json324
1 files changed, 324 insertions, 0 deletions
diff --git a/results/fa_early_ckpts/results_cifar10.json b/results/fa_early_ckpts/results_cifar10.json
new file mode 100644
index 0000000..aa046e3
--- /dev/null
+++ b/results/fa_early_ckpts/results_cifar10.json
@@ -0,0 +1,324 @@
+{
+ "42": {
+ "fa": {
+ "log": {
+ "train_loss": [
+ 2.049124941177368,
+ 1.9718926105117798,
+ 1.9489588039398194,
+ 1.934002057762146,
+ 1.9262304685211182
+ ],
+ "train_acc": [
+ 0.2436,
+ 0.2745,
+ 0.29034,
+ 0.29818,
+ 0.301
+ ],
+ "test_acc": [
+ 0.2789,
+ 0.3111,
+ 0.3091,
+ 0.3094,
+ 0.3219
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.0644938200712204,
+ 0.00024983659386634827,
+ -0.006781474687159061,
+ 0.9952471852302551
+ ],
+ "perturbation_rho": [
+ 0.06215526908636093,
+ -0.005929573439061642,
+ 0.029685823246836662,
+ 0.183500275015831
+ ],
+ "nudging": {
+ "0.001": [
+ -2.8510112315416336e-06,
+ -6.868503987789154e-08,
+ -1.4668330550193787e-08,
+ -6.758375093340874e-06
+ ],
+ "0.003": [
+ -8.508563041687012e-06,
+ -1.6507692635059357e-07,
+ -4.912726581096649e-08,
+ -2.0218081772327423e-05
+ ],
+ "0.01": [
+ -2.8386013582348824e-05,
+ -6.488990038633347e-07,
+ -1.6367994248867035e-07,
+ -6.742705591022968e-05
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 960.3140869140625,
+ 11076.888671875,
+ 31092.83984375,
+ 34876.390625,
+ 29288.8984375
+ ],
+ "bp_grad_norms_per_layer": [
+ 2.04460939130513e-05,
+ 3.4170184335380327e-06,
+ 3.3133326269307872e-06,
+ 3.321988742754911e-06,
+ 3.197354772055405e-06
+ ]
+ },
+ "drift": {
+ "embed.weight": 9.203150246245913,
+ "embed.bias": 7.782631309228306,
+ "blocks.0.ln.weight": 0.5169422030448914,
+ "blocks.0.w1.weight": 7.173286797050866,
+ "blocks.0.w1.bias": 7.530177507927273,
+ "blocks.0.w2.weight": 20.753186824233353,
+ "blocks.1.ln.weight": 0.4266158640384674,
+ "blocks.1.w1.weight": 6.365489317176397,
+ "blocks.1.w1.bias": 7.642448312201989,
+ "blocks.1.w2.weight": 16.266082583917918,
+ "blocks.2.ln.weight": 0.366255521774292,
+ "blocks.2.w1.weight": 5.12781119461704,
+ "blocks.2.w1.bias": 5.784653325984481,
+ "blocks.2.w2.weight": 14.283055474835482,
+ "blocks.3.ln.weight": 0.3213387429714203,
+ "blocks.3.w1.weight": 4.3690188550450015,
+ "blocks.3.w1.bias": 4.601659627893413,
+ "blocks.3.w2.weight": 12.971053381902397,
+ "out_ln.weight": 0.05851004645228386,
+ "out_head.weight": 1.1675271811094783,
+ "out_head.bias": 0.45402486694117133
+ }
+ }
+ },
+ "123": {
+ "fa": {
+ "log": {
+ "train_loss": [
+ 2.039442294845581,
+ 1.953437792930603,
+ 1.9257947838592528,
+ 1.9107846630096434,
+ 1.9030635565567016
+ ],
+ "train_acc": [
+ 0.25042,
+ 0.29012,
+ 0.30112,
+ 0.30812,
+ 0.31226
+ ],
+ "test_acc": [
+ 0.2905,
+ 0.3307,
+ 0.3419,
+ 0.3476,
+ 0.3478
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.06648196280002594,
+ -0.010407494381070137,
+ -0.06906092911958694,
+ 0.9917651414871216
+ ],
+ "perturbation_rho": [
+ 0.031062623485922813,
+ -0.02341378480195999,
+ -0.033602140843868256,
+ 0.2669849991798401
+ ],
+ "nudging": {
+ "0.001": [
+ -3.1692907214164734e-06,
+ -1.126900315284729e-07,
+ 7.380731403827667e-07,
+ -1.1092517524957657e-05
+ ],
+ "0.003": [
+ -9.447336196899414e-06,
+ -3.655441105365753e-07,
+ 2.216547727584839e-06,
+ -3.331666812300682e-05
+ ],
+ "0.01": [
+ -3.144703805446625e-05,
+ -1.210719347000122e-06,
+ 7.366761565208435e-06,
+ -0.00011098524555563927
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 863.883056640625,
+ 6307.2734375,
+ 12256.30859375,
+ 20627.75390625,
+ 15648.552734375
+ ],
+ "bp_grad_norms_per_layer": [
+ 3.119367829640396e-05,
+ 5.960608177701943e-06,
+ 5.3465173550648615e-06,
+ 5.451070592243923e-06,
+ 5.225469521974446e-06
+ ]
+ },
+ "drift": {
+ "embed.weight": 8.803298126163122,
+ "embed.bias": 6.680663743131999,
+ "blocks.0.ln.weight": 0.4969744086265564,
+ "blocks.0.w1.weight": 6.118721027737535,
+ "blocks.0.w1.bias": 5.453192795258035,
+ "blocks.0.w2.weight": 17.832094218833557,
+ "blocks.1.ln.weight": 0.4233635365962982,
+ "blocks.1.w1.weight": 5.605093419712821,
+ "blocks.1.w1.bias": 6.1996663705372885,
+ "blocks.1.w2.weight": 16.130887571112158,
+ "blocks.2.ln.weight": 0.38245585560798645,
+ "blocks.2.w1.weight": 5.4055954853998145,
+ "blocks.2.w1.bias": 6.487736894934294,
+ "blocks.2.w2.weight": 15.370923217597205,
+ "blocks.3.ln.weight": 0.39037927985191345,
+ "blocks.3.w1.weight": 5.164380800730237,
+ "blocks.3.w1.bias": 5.6053151435754565,
+ "blocks.3.w2.weight": 13.468106289535303,
+ "out_ln.weight": 0.04700668901205063,
+ "out_head.weight": 1.1282362053366835,
+ "out_head.bias": 1.0759554656539119
+ }
+ }
+ },
+ "456": {
+ "fa": {
+ "log": {
+ "train_loss": [
+ 2.064060781517029,
+ 1.9901417289733887,
+ 1.9581991654205322,
+ 1.9443307501983642,
+ 1.935314490966797
+ ],
+ "train_acc": [
+ 0.24014,
+ 0.268,
+ 0.28416,
+ 0.29098,
+ 0.29644
+ ],
+ "test_acc": [
+ 0.2713,
+ 0.3048,
+ 0.313,
+ 0.3135,
+ 0.3242
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.06508420407772064,
+ -0.013459235429763794,
+ -0.00898228120058775,
+ 0.9861425161361694
+ ],
+ "perturbation_rho": [
+ 0.0332966186106205,
+ -0.03956230729818344,
+ 0.008942835032939911,
+ 0.18084916472434998
+ ],
+ "nudging": {
+ "0.001": [
+ -2.798624336719513e-06,
+ 1.7695128917694092e-08,
+ -1.3969838619232178e-08,
+ -6.395392119884491e-06
+ ],
+ "0.003": [
+ -8.38935375213623e-06,
+ 7.916241884231567e-09,
+ -2.60770320892334e-08,
+ -1.91426370292902e-05
+ ],
+ "0.01": [
+ -2.7919188141822815e-05,
+ 9.639188647270203e-08,
+ -4.912726581096649e-08,
+ -6.371899507939816e-05
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 1010.2516479492188,
+ 13113.5537109375,
+ 22355.0546875,
+ 36300.3671875,
+ 30980.419921875
+ ],
+ "bp_grad_norms_per_layer": [
+ 1.7613443560549058e-05,
+ 3.159134394081775e-06,
+ 3.121002691841568e-06,
+ 3.1120944186113775e-06,
+ 2.9537256978073856e-06
+ ]
+ },
+ "drift": {
+ "embed.weight": 9.613138255724964,
+ "embed.bias": 6.926131704202163,
+ "blocks.0.ln.weight": 0.548354983329773,
+ "blocks.0.w1.weight": 7.294897696552802,
+ "blocks.0.w1.bias": 5.312217412593903,
+ "blocks.0.w2.weight": 20.619584988017134,
+ "blocks.1.ln.weight": 0.3894560933113098,
+ "blocks.1.w1.weight": 5.488544569987622,
+ "blocks.1.w1.bias": 4.831823702146538,
+ "blocks.1.w2.weight": 15.40878297211816,
+ "blocks.2.ln.weight": 0.37565672397613525,
+ "blocks.2.w1.weight": 5.8266288518010825,
+ "blocks.2.w1.bias": 6.58294580181279,
+ "blocks.2.w2.weight": 15.720187648044627,
+ "blocks.3.ln.weight": 0.33931589126586914,
+ "blocks.3.w1.weight": 4.6498033455621615,
+ "blocks.3.w1.bias": 3.4624320285535566,
+ "blocks.3.w2.weight": 14.935221420670851,
+ "out_ln.weight": 0.04563162103295326,
+ "out_head.weight": 1.066836036120676,
+ "out_head.bias": 0.553680529428652
+ }
+ }
+ },
+ "config": {
+ "dataset": "cifar10",
+ "d_hidden": 256,
+ "num_blocks": 4,
+ "batch_size": 128,
+ "epochs": 5,
+ "lr": 0.001,
+ "lr_fb": 0.001,
+ "wd": 0.01,
+ "lam": 0.1,
+ "K": 4,
+ "sigma_bridge": 0.05,
+ "ema_momentum": 0.995,
+ "term_grad_weight": 1.0,
+ "seeds": [
+ 42,
+ 123,
+ 456
+ ],
+ "gpu": 0,
+ "output_dir": "results/fa_early_ckpts",
+ "methods": [
+ "fa"
+ ],
+ "random_targets": false,
+ "penalty_lam": 0.0,
+ "num_classes": 10
+ }
+} \ No newline at end of file