diff options
Diffstat (limited to 'results/fa_penalty_30ep/results_cifar10.json')
| -rw-r--r-- | results/fa_penalty_30ep/results_cifar10.json | 549 |
1 files changed, 549 insertions, 0 deletions
diff --git a/results/fa_penalty_30ep/results_cifar10.json b/results/fa_penalty_30ep/results_cifar10.json new file mode 100644 index 0000000..bd94af9 --- /dev/null +++ b/results/fa_penalty_30ep/results_cifar10.json @@ -0,0 +1,549 @@ +{ + "42": { + "fa": { + "log": { + "train_loss": [ + 2.001119369430542, + 1.9333542645263673, + 1.9240801361083983, + 1.9216663201141357, + 1.918965964050293, + 1.9119908702850341, + 1.9097738315582276, + 1.9049057181167603, + 1.9038385245513916, + 1.901488224105835, + 1.8975307934951782, + 1.8936708712768555, + 1.8894188814544677, + 1.8864306787872314, + 1.8895101571655273, + 1.8835669344329835, + 1.8832266155624389, + 1.8791197660064698, + 1.8797098063659667, + 1.8761708308410645, + 1.875095842552185, + 1.8748220810317993, + 1.8743982402801513, + 1.8738871404266357, + 1.8735411545562743, + 1.8693548073577881, + 1.8659294427871704, + 1.8685178174591064, + 1.8670922566986083, + 1.8683236782455444 + ], + "train_acc": [ + 0.27484, + 0.30662, + 0.31156, + 0.3128, + 0.31684, + 0.3173, + 0.32102, + 0.32226, + 0.3248, + 0.32594, + 0.32582, + 0.33134, + 0.33202, + 0.33344, + 0.33274, + 0.33626, + 0.33556, + 0.33774, + 0.33918, + 0.33762, + 0.34082, + 0.34052, + 0.34092, + 0.34432, + 0.34294, + 0.3436, + 0.34578, + 0.34654, + 0.34636, + 0.3472 + ], + "test_acc": [ + 0.3237, + 0.3505, + 0.3372, + 0.3327, + 0.3511, + 0.3455, + 0.3449, + 0.344, + 0.3413, + 0.3462, + 0.35, + 0.3511, + 0.3604, + 0.3569, + 0.3565, + 0.3584, + 0.3634, + 0.3651, + 0.3684, + 0.362, + 0.3674, + 0.3586, + 0.3704, + 0.3688, + 0.3729, + 0.3715, + 0.3702, + 0.3713, + 0.3715, + 0.3713 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.154661625623703, + 0.148232564330101, + 0.11714619398117065, + 0.9808143377304077 + ], + "perturbation_rho": [ + 0.2043638527393341, + 0.03451352193951607, + 0.20099492371082306, + 0.6849091649055481 + ], + "nudging": { + "0.001": [ + -7.456343155354261e-06, + -7.0015667006373405e-06, + -6.1238533817231655e-06, + -4.935957258567214e-05 + ], + "0.003": [ + -2.2464897483587265e-05, + -2.095731906592846e-05, + -1.8515565898269415e-05, + -0.00014814879978075624 + ], + "0.01": [ + -7.48702441342175e-05, + -6.984942592680454e-05, + -6.168842082843184e-05, + -0.0004938761703670025 + ] + }, + "hidden_norms_per_layer": [ + 12005.7001953125, + 12007.416015625, + 12011.4248046875, + 12003.9306640625, + 12006.5830078125 + ], + "bp_grad_norms_per_layer": [ + 1.7161242794827558e-05, + 1.712461198621895e-05, + 1.7262802430195734e-05, + 1.6996016711345874e-05, + 1.602559132152237e-05 + ] + }, + "drift": { + "embed.weight": 78.51231332812849, + "embed.bias": 102.12148052673881, + "blocks.0.ln.weight": 0.2991768419742584, + "blocks.0.w1.weight": 3.288291856838975, + "blocks.0.w1.bias": 5.396373593768907, + "blocks.0.w2.weight": 18.07176198506393, + "blocks.1.ln.weight": 0.30484291911125183, + "blocks.1.w1.weight": 3.363888651672944, + "blocks.1.w1.bias": 5.6524749125654825, + "blocks.1.w2.weight": 19.430774882635927, + "blocks.2.ln.weight": 0.3102824091911316, + "blocks.2.w1.weight": 3.4139876185805202, + "blocks.2.w1.bias": 5.452178486528747, + "blocks.2.w2.weight": 20.125989565290535, + "blocks.3.ln.weight": 0.3048917055130005, + "blocks.3.w1.weight": 3.480676314799551, + "blocks.3.w1.bias": 5.215887111238424, + "blocks.3.w2.weight": 18.601435796851646, + "out_ln.weight": 0.20746967196464539, + "out_head.weight": 2.830159264066497, + "out_head.bias": 1.3353853268796754 + } + } + }, + "123": { + "fa": { + "log": { + "train_loss": [ + 1.9927266687774658, + 1.9300689645767213, + 1.92596540309906, + 1.9193654559326172, + 1.9174729382324218, + 1.914334167137146, + 1.9108343372344971, + 1.9098404777526856, + 1.9070983071517944, + 1.9046820580291748, + 1.9035546282958984, + 1.902319673461914, + 1.9007709796524048, + 1.8978776998901368, + 1.896000676651001, + 1.8922497060394288, + 1.889219735031128, + 1.8912379706573486, + 1.8899934759902954, + 1.886697685585022, + 1.8858332043457031, + 1.8851475219726563, + 1.8850083080673217, + 1.8836419734191894, + 1.8818570189666748, + 1.880578341293335, + 1.8828362002563477, + 1.8791149730682373, + 1.881042135925293, + 1.8783651885986328 + ], + "train_acc": [ + 0.28066, + 0.30852, + 0.31142, + 0.3131, + 0.31726, + 0.31894, + 0.31884, + 0.3222, + 0.32346, + 0.325, + 0.32962, + 0.32534, + 0.32718, + 0.32974, + 0.33498, + 0.33296, + 0.33498, + 0.33642, + 0.33698, + 0.33676, + 0.3368, + 0.33892, + 0.33858, + 0.34048, + 0.34248, + 0.34264, + 0.34458, + 0.34442, + 0.34196, + 0.3428 + ], + "test_acc": [ + 0.3339, + 0.344, + 0.3447, + 0.3537, + 0.3523, + 0.3448, + 0.3569, + 0.3548, + 0.3564, + 0.3513, + 0.3558, + 0.3555, + 0.3578, + 0.3565, + 0.3511, + 0.3617, + 0.3621, + 0.3603, + 0.3619, + 0.36, + 0.3663, + 0.3679, + 0.3665, + 0.3645, + 0.363, + 0.3632, + 0.3656, + 0.3666, + 0.3666, + 0.366 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.10381253063678741, + 0.11509518325328827, + 0.17561045289039612, + 0.9849518537521362 + ], + "perturbation_rho": [ + -0.013579179532825947, + 0.098294198513031, + 0.1665695607662201, + 0.7168847322463989 + ], + "nudging": { + "0.001": [ + -3.704102709889412e-06, + -5.607143975794315e-06, + -8.841510862112045e-06, + -5.108967889100313e-05 + ], + "0.003": [ + -1.0923598892986774e-05, + -1.6769510693848133e-05, + -2.6412191800773144e-05, + -0.00015316426288336515 + ], + "0.01": [ + -3.6393641494214535e-05, + -5.586724728345871e-05, + -8.803850505501032e-05, + -0.0005103998119011521 + ] + }, + "hidden_norms_per_layer": [ + 12513.1455078125, + 12514.5771484375, + 12517.3583984375, + 12520.568359375, + 12517.57421875 + ], + "bp_grad_norms_per_layer": [ + 1.9153141693095677e-05, + 1.9135366528644226e-05, + 1.8911112420028076e-05, + 1.892356522148475e-05, + 1.7506923541077413e-05 + ] + }, + "drift": { + "embed.weight": 89.53228672658916, + "embed.bias": 152.8584432062178, + "blocks.0.ln.weight": 0.2654326856136322, + "blocks.0.w1.weight": 3.3634291127986065, + "blocks.0.w1.bias": 5.280393391584056, + "blocks.0.w2.weight": 19.28885452796699, + "blocks.1.ln.weight": 0.2610284984111786, + "blocks.1.w1.weight": 3.3605846097214807, + "blocks.1.w1.bias": 4.889820303164699, + "blocks.1.w2.weight": 20.281310673104024, + "blocks.2.ln.weight": 0.2743469476699829, + "blocks.2.w1.weight": 3.391098035259505, + "blocks.2.w1.bias": 4.733926864945133, + "blocks.2.w2.weight": 20.84238400209855, + "blocks.3.ln.weight": 0.28759220242500305, + "blocks.3.w1.weight": 3.4831818014183473, + "blocks.3.w1.bias": 4.539792496168549, + "blocks.3.w2.weight": 17.922103982945213, + "out_ln.weight": 0.23056229948997498, + "out_head.weight": 2.947940047368198, + "out_head.bias": 0.999532168893943 + } + } + }, + "456": { + "fa": { + "log": { + "train_loss": [ + 2.007471420669556, + 1.9475741510772706, + 1.935076278114319, + 1.9286792455673218, + 1.9233443587493897, + 1.921987562637329, + 1.9133965182113648, + 1.9098651378631593, + 1.9032709611511232, + 1.9005253164291382, + 1.8951593602752685, + 1.8959063681793213, + 1.8941862697982788, + 1.8903014281845092, + 1.887538351173401, + 1.8868630823516845, + 1.8855092962646485, + 1.882849036026001, + 1.88272844581604, + 1.880844307899475, + 1.8779692416000366, + 1.8777563387298584, + 1.8754584911346435, + 1.8720009698104858, + 1.8718394606018067, + 1.871922555580139, + 1.8742619204330444, + 1.871095755996704, + 1.8686067379379272, + 1.8697797116470336 + ], + "train_acc": [ + 0.27346, + 0.30166, + 0.30968, + 0.31384, + 0.3171, + 0.31864, + 0.32234, + 0.32546, + 0.32768, + 0.32608, + 0.33286, + 0.33212, + 0.33646, + 0.33228, + 0.33466, + 0.33496, + 0.33834, + 0.34088, + 0.33898, + 0.34018, + 0.3417, + 0.34182, + 0.3446, + 0.34544, + 0.34776, + 0.34688, + 0.3461, + 0.34764, + 0.3487, + 0.34986 + ], + "test_acc": [ + 0.3312, + 0.3461, + 0.3375, + 0.3554, + 0.3417, + 0.349, + 0.3492, + 0.3315, + 0.3524, + 0.3561, + 0.3452, + 0.3612, + 0.3675, + 0.361, + 0.3588, + 0.3671, + 0.3652, + 0.3621, + 0.3608, + 0.3682, + 0.3607, + 0.3563, + 0.3698, + 0.3701, + 0.3677, + 0.373, + 0.3704, + 0.3674, + 0.37, + 0.3695 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.1374516785144806, + 0.16781684756278992, + 0.13626089692115784, + 0.9774131774902344 + ], + "perturbation_rho": [ + 0.0486995093524456, + 0.06349372863769531, + 0.007340744137763977, + 0.6342363357543945 + ], + "nudging": { + "0.001": [ + -5.1066745072603226e-06, + -7.127760909497738e-06, + -6.226240657269955e-06, + -4.646868910640478e-05 + ], + "0.003": [ + -1.5374505892395973e-05, + -2.130062784999609e-05, + -1.874461304396391e-05, + -0.0001394655555486679 + ], + "0.01": [ + -5.139666609466076e-05, + -7.112661842256784e-05, + -6.259605288505554e-05, + -0.0004648104077205062 + ] + }, + "hidden_norms_per_layer": [ + 12142.6181640625, + 12149.4814453125, + 12152.4228515625, + 12154.3623046875, + 12153.6318359375 + ], + "bp_grad_norms_per_layer": [ + 1.7370659406878985e-05, + 1.7375436073052697e-05, + 1.7465752534917556e-05, + 1.755404082359746e-05, + 1.6847530787345022e-05 + ] + }, + "drift": { + "embed.weight": 81.50708429789378, + "embed.bias": 90.80998898870095, + "blocks.0.ln.weight": 0.30019262433052063, + "blocks.0.w1.weight": 3.2393405099782573, + "blocks.0.w1.bias": 5.11706466812565, + "blocks.0.w2.weight": 17.26999421209171, + "blocks.1.ln.weight": 0.29391026496887207, + "blocks.1.w1.weight": 3.2524768916503883, + "blocks.1.w1.bias": 5.375142102957966, + "blocks.1.w2.weight": 17.822565033735142, + "blocks.2.ln.weight": 0.29711613059043884, + "blocks.2.w1.weight": 3.347203060867532, + "blocks.2.w1.bias": 5.458224100319586, + "blocks.2.w2.weight": 19.038612136675116, + "blocks.3.ln.weight": 0.3008574843406677, + "blocks.3.w1.weight": 3.407494069111075, + "blocks.3.w1.bias": 5.482950458082574, + "blocks.3.w2.weight": 17.98442064571867, + "out_ln.weight": 0.21060113608837128, + "out_head.weight": 2.7473203281973158, + "out_head.bias": 0.9871044007414218 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 256, + "num_blocks": 4, + "batch_size": 128, + "epochs": 30, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 42, + 123, + 456 + ], + "gpu": 0, + "output_dir": "results/fa_penalty_30ep", + "methods": [ + "fa" + ], + "random_targets": false, + "penalty_lam": 0.01, + "num_classes": 10 + } +}
\ No newline at end of file |
