diff options
Diffstat (limited to 'results/fa_dfa_d512_L8_seed5/results_cifar10.json')
| -rw-r--r-- | results/fa_dfa_d512_L8_seed5/results_cifar10.json | 881 |
1 files changed, 881 insertions, 0 deletions
diff --git a/results/fa_dfa_d512_L8_seed5/results_cifar10.json b/results/fa_dfa_d512_L8_seed5/results_cifar10.json new file mode 100644 index 0000000..e2ec1f0 --- /dev/null +++ b/results/fa_dfa_d512_L8_seed5/results_cifar10.json @@ -0,0 +1,881 @@ +{ + "5": { + "dfa": { + "log": { + "train_loss": [ + 2.0787187157440186, + 2.046413578338623, + 2.043807250671387, + 2.0408004566955564, + 2.034887484970093, + 2.03150455116272, + 2.0333121584320066, + 2.030504135971069, + 2.0268674066925048, + 2.026082299346924, + 2.0260079474639894, + 2.025023748931885, + 2.0206153302001955, + 2.019373860206604, + 2.019724616012573, + 2.0159671471405027, + 2.0133701262664796, + 2.013524337539673, + 2.01570996799469, + 2.0106682904815676, + 2.011510214996338, + 2.0109367053222655, + 2.0096939754486085, + 2.0094474443054198, + 2.0071240294647215, + 2.009241544647217, + 2.0069837924957277, + 2.006896128387451, + 2.0078313034439086, + 2.0056955588531493, + 2.0059358129119875, + 2.005650292816162, + 2.0021781530380247, + 2.0034559716033935, + 2.0014139038848877, + 2.0027021182250975, + 2.002708815307617, + 2.0024726174926757, + 2.0010244245910647, + 2.0033527210235595, + 2.0017753661727906, + 2.0007596390151976, + 2.0027179621124267, + 2.000149852218628, + 2.000393838119507, + 1.9997536290740967, + 1.998189719390869, + 2.0013531698226927, + 1.9993105652236938, + 2.002620627365112, + 2.000056958694458, + 1.998213128967285, + 1.9998394576263427, + 1.9988756994628907, + 2.000797734146118, + 1.9982664052581787, + 1.9998580239105224, + 1.9990567274475097, + 2.0013695433807372, + 2.000919705581665, + 2.00011759311676, + 1.998085468597412, + 1.9998952405548096, + 1.9984590853881836, + 2.0001076091766357, + 1.9984168599700927, + 1.998167473297119, + 1.9996371339416503, + 1.998458631324768, + 1.9980697260284423, + 1.99891614402771, + 1.9961408318328857, + 1.9967646357727051, + 1.9982722332000733, + 1.9978370711517335, + 1.9981338930892945, + 1.9954986195373534, + 1.998370800704956, + 1.9966814602661134, + 1.9975791915893555, + 1.9963215632629394, + 1.9960799507141114, + 1.9947424390029906, + 1.995765002822876, + 1.9970874053955079, + 1.9967461769866943, + 1.9952786428070068, + 1.9961291760253905, + 1.9962149816894532, + 1.9956222714614869, + 1.9963563278579712, + 1.9963637481307983, + 1.995376283531189, + 1.9958238674545288, + 1.996031088180542, + 1.9979531326675415, + 1.9962067671966552, + 1.9966400229644776, + 1.9965521046447754, + 1.9952978050994874 + ], + "train_acc": [ + 0.2307, + 0.2439, + 0.241, + 0.24388, + 0.24862, + 0.25128, + 0.2482, + 0.25152, + 0.25226, + 0.25402, + 0.25412, + 0.25508, + 0.25796, + 0.25802, + 0.25836, + 0.26098, + 0.26092, + 0.2638, + 0.2603, + 0.2651, + 0.26208, + 0.26628, + 0.2672, + 0.26436, + 0.26628, + 0.264, + 0.26676, + 0.26936, + 0.26734, + 0.26656, + 0.268, + 0.27012, + 0.26954, + 0.2697, + 0.2733, + 0.27156, + 0.2711, + 0.27136, + 0.27432, + 0.27026, + 0.2729, + 0.27246, + 0.27116, + 0.27356, + 0.27474, + 0.27462, + 0.27356, + 0.27106, + 0.27586, + 0.2728, + 0.2735, + 0.27494, + 0.27414, + 0.27566, + 0.2745, + 0.275, + 0.27516, + 0.27464, + 0.27214, + 0.27562, + 0.27386, + 0.27616, + 0.27478, + 0.27452, + 0.27438, + 0.27868, + 0.27604, + 0.27778, + 0.2761, + 0.2758, + 0.27738, + 0.27662, + 0.27798, + 0.2747, + 0.27758, + 0.27598, + 0.27646, + 0.27686, + 0.27664, + 0.2775, + 0.27674, + 0.27976, + 0.27856, + 0.27662, + 0.27848, + 0.2782, + 0.27792, + 0.27528, + 0.28002, + 0.27594, + 0.27606, + 0.27738, + 0.27828, + 0.2771, + 0.27888, + 0.2776, + 0.27728, + 0.27756, + 0.27762, + 0.27778 + ], + "test_acc": [ + 0.2557, + 0.2621, + 0.2511, + 0.2468, + 0.2652, + 0.2808, + 0.268, + 0.268, + 0.2707, + 0.2646, + 0.2731, + 0.2756, + 0.2675, + 0.2771, + 0.2753, + 0.2594, + 0.2718, + 0.2876, + 0.2822, + 0.29, + 0.2898, + 0.2714, + 0.2849, + 0.2855, + 0.2765, + 0.281, + 0.2888, + 0.2819, + 0.2876, + 0.2929, + 0.278, + 0.2912, + 0.2877, + 0.2921, + 0.2827, + 0.2961, + 0.2852, + 0.2869, + 0.2852, + 0.2944, + 0.2755, + 0.2905, + 0.2862, + 0.2873, + 0.2769, + 0.2929, + 0.2913, + 0.2949, + 0.291, + 0.2811, + 0.2879, + 0.2854, + 0.282, + 0.2891, + 0.2937, + 0.2909, + 0.2957, + 0.289, + 0.292, + 0.2901, + 0.2975, + 0.2944, + 0.2887, + 0.2984, + 0.2938, + 0.295, + 0.2949, + 0.2936, + 0.2911, + 0.2947, + 0.2942, + 0.2841, + 0.2989, + 0.2916, + 0.2949, + 0.2871, + 0.2863, + 0.2936, + 0.2936, + 0.294, + 0.2902, + 0.2943, + 0.2913, + 0.2937, + 0.2942, + 0.294, + 0.2913, + 0.2949, + 0.2934, + 0.294, + 0.293, + 0.2946, + 0.2939, + 0.2929, + 0.2932, + 0.2933, + 0.2937, + 0.2937, + 0.2939, + 0.2938 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.38104039430618286, + 0.0003684713738039136, + -0.00036610430106520653, + 7.536964403698221e-06, + -7.538420322816819e-05, + 0.00021099840523675084, + -0.00022237180382944643, + -0.0001967815769603476 + ], + "perturbation_rho": [ + -0.022990737110376358, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -3.380700945854187e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.1343508958816528e-06, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -3.5706907510757446e-06, + 0.0, + 1.862645149230957e-09, + 0.0, + 1.862645149230957e-09, + 0.0, + 1.862645149230957e-09, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 53305.52734375, + 1104618240.0, + 1890710528.0, + 2939089664.0, + 4109807360.0, + 7493927424.0, + 8776990720.0, + 9704909824.0, + 9853197312.0 + ], + "bp_grad_norms_per_layer": [ + 2.442081949993735e-07, + 1.9960764985338386e-10, + 1.943529920334086e-10, + 1.947161321069757e-10, + 1.9485994762202807e-10, + 1.949839456560909e-10, + 1.9488108349285937e-10, + 1.9481991020420253e-10, + 1.9481959101508295e-10 + ] + }, + "drift": { + "embed.weight": 332.9361658873903, + "embed.bias": 249.6745044186471, + "blocks.0.ln.weight": 9.964419461290774, + "blocks.0.w1.weight": 277.0099379887824, + "blocks.0.w1.bias": 243.60625234836664, + "blocks.0.w2.weight": 475.0412234341198, + "blocks.1.ln.weight": 7.805044859841917, + "blocks.1.w1.weight": 276.45375034840094, + "blocks.1.w1.bias": 248.51195524041702, + "blocks.1.w2.weight": 286.4144000014896, + "blocks.2.ln.weight": 8.343399040552265, + "blocks.2.w1.weight": 312.7393216527087, + "blocks.2.w1.bias": 278.64438111710933, + "blocks.2.w2.weight": 319.41728918519345, + "blocks.3.ln.weight": 8.2818066466087, + "blocks.3.w1.weight": 331.8178756864101, + "blocks.3.w1.bias": 324.7690510217321, + "blocks.3.w2.weight": 327.59347009101197, + "blocks.4.ln.weight": 10.593671245978031, + "blocks.4.w1.weight": 439.88191861683595, + "blocks.4.w1.bias": 415.51184102478516, + "blocks.4.w2.weight": 435.66114149915, + "blocks.5.ln.weight": 10.04837328756298, + "blocks.5.w1.weight": 400.6014717531882, + "blocks.5.w1.bias": 372.89187324310467, + "blocks.5.w2.weight": 389.61950905043466, + "blocks.6.ln.weight": 9.160507087287726, + "blocks.6.w1.weight": 366.7858107672568, + "blocks.6.w1.bias": 337.0123511781895, + "blocks.6.w2.weight": 345.65905679108874, + "blocks.7.ln.weight": 8.151913429022978, + "blocks.7.w1.weight": 320.48348360714385, + "blocks.7.w1.bias": 294.0973824112796, + "blocks.7.w2.weight": 295.11322720454183, + "out_ln.weight": 0.6460140923890967, + "out_head.weight": 9.185934457998872, + "out_head.bias": 0.6276268606859465 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.038202630004883, + 1.9484270775604249, + 1.9142503852081298, + 1.8978817923355102, + 1.8827772109603882, + 1.8714566381073, + 1.866606608543396, + 1.8608839841079712, + 1.851916226119995, + 1.8518026924133302, + 1.8441847302246093, + 1.8414982928466797, + 1.832841236038208, + 1.8305379946517943, + 1.824911393432617, + 1.8228179180908204, + 1.815796693687439, + 1.8136043480682373, + 1.8111609268569946, + 1.8098963827133179, + 1.8101768481063842, + 1.8083936654663086, + 1.806380335998535, + 1.8081299662017822, + 1.8065022414398193, + 1.8069096044158937, + 1.808437180557251, + 1.8035286350250244, + 1.803345340270996, + 1.7989226748657225, + 1.8019704568481445, + 1.799195940284729, + 1.7924841125106812, + 1.7943892532348633, + 1.7934645865249634, + 1.7921251168823242, + 1.789522707901001, + 1.7859628525161744, + 1.7830697146606445, + 1.7806333806991577, + 1.7807968478775025, + 1.7813195696258546, + 1.7742907537460326, + 1.7763463638687134, + 1.7789305100250243, + 1.7736491188812256, + 1.768717915878296, + 1.7715998685073853, + 1.7694499214935302, + 1.7765150137329102, + 1.7702046938323974, + 1.7672565545272827, + 1.7681742614746094, + 1.7645444506835937, + 1.7642444360351563, + 1.7655976934814452, + 1.7601742944717407, + 1.7602788580703734, + 1.7589675115585328, + 1.7637487964630127, + 1.7622764212036133, + 1.7553310860443114, + 1.7552539752197265, + 1.7542964382553101, + 1.7557507891464232, + 1.7533866836929322, + 1.752781385498047, + 1.7534792293930053, + 1.751649098548889, + 1.7528042163848876, + 1.7522574480438233, + 1.748868000869751, + 1.745579310951233, + 1.748803058128357, + 1.7481703802490234, + 1.7455506887817382, + 1.7438273191070557, + 1.7489568131256104, + 1.7463487658691406, + 1.7455516606903076, + 1.7449686437225342, + 1.7442902671051026, + 1.7408798983383178, + 1.7403113724136352, + 1.7415612424087525, + 1.7422372088623046, + 1.7399942685317993, + 1.7430598278045655, + 1.7437233737182618, + 1.7388530453872681, + 1.742972678489685, + 1.7391708444213867, + 1.7380083013153076, + 1.7414154996109008, + 1.7416090111541749, + 1.7426657040405273, + 1.7410819094085694, + 1.7388902407455444, + 1.7386211107635499, + 1.7390259966278077 + ], + "train_acc": [ + 0.24962, + 0.29246, + 0.30748, + 0.31596, + 0.32232, + 0.32566, + 0.32894, + 0.33218, + 0.33564, + 0.33338, + 0.33816, + 0.33802, + 0.34154, + 0.34408, + 0.3454, + 0.3464, + 0.35, + 0.35064, + 0.34942, + 0.35102, + 0.3499, + 0.35208, + 0.354, + 0.35186, + 0.35288, + 0.35062, + 0.35306, + 0.35098, + 0.35422, + 0.35166, + 0.35522, + 0.35382, + 0.35856, + 0.35798, + 0.35914, + 0.35776, + 0.36182, + 0.36158, + 0.36558, + 0.36348, + 0.36282, + 0.36244, + 0.36576, + 0.36514, + 0.3636, + 0.36724, + 0.36794, + 0.36594, + 0.36652, + 0.36342, + 0.36684, + 0.37162, + 0.36754, + 0.36882, + 0.36786, + 0.36904, + 0.36942, + 0.37216, + 0.37156, + 0.37028, + 0.37302, + 0.37194, + 0.37354, + 0.37204, + 0.37152, + 0.37402, + 0.3749, + 0.37556, + 0.37472, + 0.37472, + 0.3748, + 0.37612, + 0.37782, + 0.37806, + 0.37734, + 0.37744, + 0.37922, + 0.37866, + 0.37972, + 0.37874, + 0.37706, + 0.37788, + 0.37796, + 0.38026, + 0.37858, + 0.37822, + 0.37768, + 0.3786, + 0.37856, + 0.3791, + 0.37826, + 0.3811, + 0.38088, + 0.37906, + 0.3784, + 0.37828, + 0.37966, + 0.38174, + 0.38126, + 0.38014 + ], + "test_acc": [ + 0.3045, + 0.3393, + 0.3346, + 0.3359, + 0.3456, + 0.3582, + 0.3589, + 0.3566, + 0.3662, + 0.3668, + 0.3696, + 0.3757, + 0.3629, + 0.3738, + 0.3716, + 0.3674, + 0.3792, + 0.3837, + 0.3815, + 0.3839, + 0.3912, + 0.3753, + 0.3781, + 0.3804, + 0.3753, + 0.3771, + 0.3802, + 0.3792, + 0.3805, + 0.3876, + 0.3826, + 0.3862, + 0.3827, + 0.3927, + 0.382, + 0.393, + 0.3874, + 0.3935, + 0.381, + 0.3979, + 0.3786, + 0.3946, + 0.3908, + 0.3995, + 0.3955, + 0.3971, + 0.4012, + 0.399, + 0.3975, + 0.3922, + 0.3985, + 0.3901, + 0.3931, + 0.3976, + 0.3921, + 0.3985, + 0.3962, + 0.4034, + 0.3942, + 0.4, + 0.4027, + 0.4062, + 0.4005, + 0.4042, + 0.4061, + 0.4041, + 0.4029, + 0.4026, + 0.4045, + 0.4, + 0.4023, + 0.4018, + 0.4061, + 0.4049, + 0.4069, + 0.4042, + 0.4047, + 0.4007, + 0.4039, + 0.4028, + 0.4057, + 0.4022, + 0.4017, + 0.4046, + 0.4012, + 0.4026, + 0.4039, + 0.4032, + 0.4027, + 0.4051, + 0.4043, + 0.4052, + 0.4041, + 0.4048, + 0.4033, + 0.4037, + 0.4032, + 0.4021, + 0.4023, + 0.4025 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.014374179765582085, + 0.09000293165445328, + -0.03108237311244011, + -0.02239440567791462, + -0.02732698619365692, + -0.04657082259654999, + -0.06352561712265015, + 0.9887357950210571 + ], + "perturbation_rho": [ + 0.03167568892240524, + -0.015739459544420242, + 0.0075595797970891, + 0.03960222005844116, + 0.0035968590527772903, + 0.03037603199481964, + 0.029943909496068954, + 0.005924902856349945 + ], + "nudging": { + "0.001": [ + 5.2677933126688e-07, + -4.189787432551384e-07, + 9.185168892145157e-08, + -3.841705620288849e-09, + -3.655441105365753e-08, + 2.3981556296348572e-08, + 2.3981556296348572e-08, + -1.473352313041687e-06 + ], + "0.003": [ + 1.9101426005363464e-06, + -1.223525032401085e-06, + 1.5366822481155396e-07, + -2.537854015827179e-08, + 1.5622936189174652e-07, + 9.872019290924072e-08, + 1.771841198205948e-07, + -4.843459464609623e-06 + ], + "0.01": [ + 6.739639502484351e-06, + -3.795139491558075e-06, + 4.704343155026436e-07, + 2.0337756723165512e-07, + 3.941822797060013e-07, + 6.516929715871811e-07, + 1.0146759450435638e-06, + -1.683842856436968e-05 + ] + }, + "hidden_norms_per_layer": [ + 7750.18701171875, + 163374.5, + 1020835.0, + 1417354.375, + 1653246.5, + 1891510.125, + 2137834.5, + 2238698.25, + 1372594.75 + ], + "bp_grad_norms_per_layer": [ + 2.8991271392442286e-05, + 1.4679561672892305e-06, + 6.962879979255376e-07, + 6.957282039365964e-07, + 6.946868325030664e-07, + 6.940763341845013e-07, + 6.940714456504793e-07, + 6.966275805098121e-07, + 6.714612368341477e-07 + ] + }, + "drift": { + "embed.weight": 55.14922199302306, + "embed.bias": 15.427529434718334, + "blocks.0.ln.weight": 1.3139840801235205, + "blocks.0.w1.weight": 18.28738161809814, + "blocks.0.w1.bias": 13.519470867121564, + "blocks.0.w2.weight": 65.29381196065084, + "blocks.1.ln.weight": 1.2381725193607873, + "blocks.1.w1.weight": 24.224592130996264, + "blocks.1.w1.bias": 19.040373052112855, + "blocks.1.w2.weight": 51.77163849918677, + "blocks.2.ln.weight": 0.7724310378501466, + "blocks.2.w1.weight": 20.96874606199096, + "blocks.2.w1.bias": 19.420703681847986, + "blocks.2.w2.weight": 49.62414572233535, + "blocks.3.ln.weight": 0.6858974872301369, + "blocks.3.w1.weight": 19.410601791558882, + "blocks.3.w1.bias": 18.939055453950672, + "blocks.3.w2.weight": 45.97967492141307, + "blocks.4.ln.weight": 0.6186727990894257, + "blocks.4.w1.weight": 19.69072198327253, + "blocks.4.w1.bias": 19.749605410301402, + "blocks.4.w2.weight": 41.99820530820194, + "blocks.5.ln.weight": 0.6406844547162872, + "blocks.5.w1.weight": 20.510058050945528, + "blocks.5.w1.bias": 20.988299652643033, + "blocks.5.w2.weight": 34.25267498831366, + "blocks.6.ln.weight": 0.6310276000328071, + "blocks.6.w1.weight": 18.188089381701026, + "blocks.6.w1.bias": 17.13681902701148, + "blocks.6.w2.weight": 39.811714763649384, + "blocks.7.ln.weight": 0.7307433363934756, + "blocks.7.w1.weight": 19.56130269038455, + "blocks.7.w1.bias": 16.899275966635617, + "blocks.7.w2.weight": 58.89283532862689, + "out_ln.weight": 0.430748638467109, + "out_head.weight": 7.024186774080115, + "out_head.bias": 0.6948984479282404 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 8, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 5 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L8_seed5", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file |
