diff options
Diffstat (limited to 'results/fa_dfa_d512_L8_seed6/results_cifar10.json')
| -rw-r--r-- | results/fa_dfa_d512_L8_seed6/results_cifar10.json | 881 |
1 files changed, 881 insertions, 0 deletions
diff --git a/results/fa_dfa_d512_L8_seed6/results_cifar10.json b/results/fa_dfa_d512_L8_seed6/results_cifar10.json new file mode 100644 index 0000000..996693e --- /dev/null +++ b/results/fa_dfa_d512_L8_seed6/results_cifar10.json @@ -0,0 +1,881 @@ +{ + "6": { + "dfa": { + "log": { + "train_loss": [ + 2.048823083343506, + 2.036137353439331, + 2.0293412883758544, + 2.0249873413085937, + 2.022372166824341, + 2.0224948783874512, + 2.0168244441223147, + 2.016312102279663, + 2.0174264653015137, + 2.0148667056655882, + 2.016306040496826, + 2.012973408279419, + 2.009305914993286, + 2.0127887310409545, + 2.010646263999939, + 2.0090836833953856, + 2.010158688583374, + 2.0119627042007444, + 2.0057619748306275, + 2.00755533531189, + 2.001894345550537, + 2.004210508117676, + 2.002892424316406, + 2.0056438186264036, + 2.006109398841858, + 2.0008294903945925, + 2.004116688156128, + 2.003745227279663, + 2.0009986640930175, + 2.0015462801361084, + 2.001444738845825, + 1.9975958602142334, + 1.9996052980804444, + 1.999814730834961, + 2.0010388219451904, + 1.9962302626800537, + 1.9987594539642335, + 1.9968312421417236, + 1.9977108895111084, + 1.997722573852539, + 1.9974460966491698, + 1.9960338827514648, + 1.997143808517456, + 1.9972000302505493, + 1.9973672123718262, + 1.9960454425811767, + 1.9950894018936158, + 1.9955249545669556, + 1.9957580101776122, + 1.9944090689086913, + 1.9936514922332764, + 1.99337042137146, + 1.9966205854034424, + 1.9951700267791748, + 1.995979843597412, + 1.9958187718200684, + 1.9929929906463624, + 1.996409147720337, + 1.992997929534912, + 1.9945113285827636, + 1.9946366765975951, + 1.9921776248550416, + 1.9923859210586548, + 1.9960651620864869, + 1.992535831222534, + 1.9948228299331665, + 1.992319468231201, + 1.991410069732666, + 1.990960274810791, + 1.9932797208023072, + 1.9914824683380128, + 1.9920508996963502, + 1.9927404278564453, + 1.9916280765533447, + 1.9920522421646119, + 1.9932328916931152, + 1.9915541756439208, + 1.992103966407776, + 1.9913361803817748, + 1.9909088207626342, + 1.9888611060333252, + 1.9917893444061279, + 1.990510139694214, + 1.9895336349105834, + 1.9925078125, + 1.9905930145263673, + 1.9911131750488282, + 1.9901078464126587, + 1.9898306075286865, + 1.9894448529052735, + 1.9916437425231934, + 1.9892843076324462, + 1.9908900201797486, + 1.991130443496704, + 1.9902202153778077, + 1.9898065716934203, + 1.9898762433242798, + 1.989882501220703, + 1.9897464168548584, + 1.991097922897339 + ], + "train_acc": [ + 0.24736, + 0.24926, + 0.25592, + 0.25554, + 0.25688, + 0.25748, + 0.25952, + 0.25872, + 0.26084, + 0.26068, + 0.25918, + 0.26338, + 0.2641, + 0.2628, + 0.2662, + 0.26518, + 0.26584, + 0.26602, + 0.26744, + 0.26454, + 0.2692, + 0.26846, + 0.27026, + 0.26784, + 0.26908, + 0.27298, + 0.27076, + 0.27126, + 0.26854, + 0.27136, + 0.27188, + 0.27314, + 0.27166, + 0.27166, + 0.27, + 0.27358, + 0.27228, + 0.27568, + 0.27252, + 0.27382, + 0.275, + 0.27676, + 0.27532, + 0.27448, + 0.2742, + 0.27754, + 0.27586, + 0.27378, + 0.27572, + 0.27606, + 0.27584, + 0.27508, + 0.2744, + 0.27512, + 0.27518, + 0.27536, + 0.2768, + 0.27628, + 0.2759, + 0.27418, + 0.27668, + 0.27618, + 0.27754, + 0.2755, + 0.2797, + 0.27622, + 0.27722, + 0.2781, + 0.27882, + 0.27704, + 0.27728, + 0.27598, + 0.27658, + 0.2793, + 0.2785, + 0.27512, + 0.27576, + 0.27754, + 0.2809, + 0.27864, + 0.27994, + 0.27934, + 0.278, + 0.2789, + 0.27944, + 0.2777, + 0.2785, + 0.28082, + 0.27722, + 0.27896, + 0.27776, + 0.27826, + 0.27702, + 0.2777, + 0.28018, + 0.27868, + 0.27942, + 0.277, + 0.2778, + 0.2796 + ], + "test_acc": [ + 0.2851, + 0.2882, + 0.2834, + 0.2969, + 0.2682, + 0.2744, + 0.2751, + 0.2761, + 0.2883, + 0.2869, + 0.2764, + 0.2839, + 0.2682, + 0.2743, + 0.2888, + 0.2735, + 0.2847, + 0.2852, + 0.2974, + 0.2908, + 0.2957, + 0.3032, + 0.2908, + 0.272, + 0.2959, + 0.2858, + 0.2995, + 0.2807, + 0.292, + 0.302, + 0.2824, + 0.2802, + 0.2916, + 0.2913, + 0.3048, + 0.2908, + 0.2934, + 0.284, + 0.3083, + 0.302, + 0.3039, + 0.2868, + 0.3047, + 0.2997, + 0.2914, + 0.2968, + 0.2848, + 0.3035, + 0.2934, + 0.2966, + 0.2973, + 0.2946, + 0.2953, + 0.296, + 0.2932, + 0.2959, + 0.2864, + 0.2976, + 0.293, + 0.2961, + 0.2957, + 0.3012, + 0.2948, + 0.2926, + 0.2969, + 0.3043, + 0.3005, + 0.2957, + 0.2956, + 0.2994, + 0.2919, + 0.2945, + 0.2915, + 0.2903, + 0.2934, + 0.2967, + 0.2953, + 0.3, + 0.3006, + 0.2939, + 0.2995, + 0.2965, + 0.292, + 0.294, + 0.3006, + 0.3013, + 0.2978, + 0.2961, + 0.3011, + 0.2982, + 0.2984, + 0.2974, + 0.2977, + 0.297, + 0.2967, + 0.2972, + 0.2972, + 0.2962, + 0.2961, + 0.296 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.385863333940506, + 0.0001165914727607742, + 0.00017788054537959397, + 0.00018135752179659903, + -0.00028566765831783414, + -0.0002498100802768022, + 0.0006119838217273355, + -0.00013276470417622477 + ], + "perturbation_rho": [ + -0.01099667139351368, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -3.4226104617118835e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.0388903319835663e-06, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -3.7979334592819214e-06, + 0.0, + -9.313225746154785e-10, + 0.0, + 3.725290298461914e-09, + 0.0, + 0.0, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 54199.1640625, + 1216095872.0, + 2414873600.0, + 3109203712.0, + 3571607296.0, + 6284516352.0, + 6465921536.0, + 6651301376.0, + 8724734976.0 + ], + "bp_grad_norms_per_layer": [ + 2.6299613864466664e-07, + 2.240632540617682e-10, + 2.239412544291497e-10, + 2.2397983467925542e-10, + 2.240085755778054e-10, + 2.2390612974820812e-10, + 2.2391909160202061e-10, + 2.2392485088396086e-10, + 2.239693430716727e-10 + ] + }, + "drift": { + "embed.weight": 328.7167633164963, + "embed.bias": 238.81345295557173, + "blocks.0.ln.weight": 10.101694109463628, + "blocks.0.w1.weight": 287.42152472298335, + "blocks.0.w1.bias": 258.1218430687453, + "blocks.0.w2.weight": 482.380198439405, + "blocks.1.ln.weight": 8.527375095072756, + "blocks.1.w1.weight": 281.10785120372356, + "blocks.1.w1.bias": 271.84962810099614, + "blocks.1.w2.weight": 308.13809976512954, + "blocks.2.ln.weight": 8.093445971068885, + "blocks.2.w1.weight": 291.96840313708185, + "blocks.2.w1.bias": 267.27308802490285, + "blocks.2.w2.weight": 296.34890032183836, + "blocks.3.ln.weight": 7.507548795725433, + "blocks.3.w1.weight": 290.43783218773393, + "blocks.3.w1.bias": 271.71149018462404, + "blocks.3.w2.weight": 280.50007531299553, + "blocks.4.ln.weight": 10.4676649794159, + "blocks.4.w1.weight": 425.7246219342384, + "blocks.4.w1.bias": 395.9832771459978, + "blocks.4.w2.weight": 387.2972211221749, + "blocks.5.ln.weight": 7.145103425417736, + "blocks.5.w1.weight": 272.3772890298663, + "blocks.5.w1.bias": 261.31341956376576, + "blocks.5.w2.weight": 248.44511726184933, + "blocks.6.ln.weight": 7.2864592372954435, + "blocks.6.w1.weight": 277.1704677507615, + "blocks.6.w1.bias": 265.20680207190264, + "blocks.6.w2.weight": 260.3666734377331, + "blocks.7.ln.weight": 10.55594745569165, + "blocks.7.w1.weight": 426.2374140444505, + "blocks.7.w1.bias": 401.49349715227146, + "blocks.7.w2.weight": 419.0556854355582, + "out_ln.weight": 0.5880689181906897, + "out_head.weight": 8.642576606816835, + "out_head.bias": 0.6087240911810886 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.0266723140716554, + 1.9453410259246826, + 1.9245470600128174, + 1.9017182848739624, + 1.889633105506897, + 1.8787115466308595, + 1.869741219520569, + 1.859507017288208, + 1.8565170881652833, + 1.8455627252197266, + 1.8404858825683594, + 1.8341104727172852, + 1.8273077993774414, + 1.821364828453064, + 1.8166592828369141, + 1.8075398138046264, + 1.8040798845672608, + 1.7960231017303467, + 1.785357122154236, + 1.7876761278915405, + 1.77504403591156, + 1.7683619609832764, + 1.769164297103882, + 1.7614449658966065, + 1.7590879248428344, + 1.7571340868759155, + 1.7514765051651, + 1.7532490229034423, + 1.7487156938934325, + 1.7469101626205443, + 1.7441981435394287, + 1.7379362894058228, + 1.7393075399017335, + 1.7378688484954834, + 1.7407434060668945, + 1.7369408278656007, + 1.7355171717453004, + 1.7350507015228271, + 1.733909783859253, + 1.7398296880722046, + 1.7312470238876343, + 1.7290490047836304, + 1.7353802160644531, + 1.7334120412445069, + 1.7291723877334595, + 1.726144882774353, + 1.7297367659759522, + 1.72646170627594, + 1.724361591567993, + 1.7226831272125245, + 1.7208573818969726, + 1.7199689587402345, + 1.7188016274261475, + 1.7211026276016235, + 1.7158281423568726, + 1.7192893993759155, + 1.711656453590393, + 1.717024673728943, + 1.710442032470703, + 1.7105829236602783, + 1.711408941345215, + 1.706308694000244, + 1.7078606945419312, + 1.7072623968887328, + 1.703292763900757, + 1.7017784185028075, + 1.7011818041992188, + 1.7006354034805298, + 1.6977454791259765, + 1.6999914080429077, + 1.6981370053863525, + 1.699045623397827, + 1.7020124697494508, + 1.6962461737823487, + 1.6977318247222901, + 1.696797931213379, + 1.697660752220154, + 1.695864578933716, + 1.6946413637542725, + 1.6933105139923095, + 1.6918804026031493, + 1.69379876953125, + 1.692444557876587, + 1.692386579284668, + 1.6933874633026123, + 1.6912192764282226, + 1.6896385999298096, + 1.6908740300750733, + 1.6846979732894898, + 1.6878035149765014, + 1.6874957360839844, + 1.6844608339691163, + 1.687544917602539, + 1.6878573336791993, + 1.6888155934906006, + 1.6884653591537475, + 1.684945572128296, + 1.6854769310760498, + 1.689350791053772, + 1.684672553062439 + ], + "train_acc": [ + 0.25516, + 0.2934, + 0.30626, + 0.31242, + 0.32024, + 0.32414, + 0.32588, + 0.32934, + 0.33358, + 0.3364, + 0.33836, + 0.34152, + 0.34374, + 0.34868, + 0.34688, + 0.35322, + 0.35536, + 0.35502, + 0.3615, + 0.35864, + 0.365, + 0.36708, + 0.36492, + 0.36656, + 0.36836, + 0.36936, + 0.3728, + 0.37342, + 0.37252, + 0.37256, + 0.37424, + 0.3751, + 0.37706, + 0.3752, + 0.37348, + 0.37624, + 0.37684, + 0.37782, + 0.37856, + 0.3766, + 0.37936, + 0.38126, + 0.37778, + 0.37912, + 0.38126, + 0.38394, + 0.37856, + 0.38026, + 0.38072, + 0.38172, + 0.3842, + 0.38356, + 0.38258, + 0.38382, + 0.38378, + 0.38544, + 0.38808, + 0.38518, + 0.3869, + 0.386, + 0.3856, + 0.38972, + 0.38756, + 0.38892, + 0.39094, + 0.38878, + 0.39068, + 0.3887, + 0.39396, + 0.39364, + 0.39368, + 0.39064, + 0.39046, + 0.39298, + 0.39274, + 0.39592, + 0.39298, + 0.39386, + 0.39488, + 0.39222, + 0.39268, + 0.39366, + 0.39478, + 0.39506, + 0.39528, + 0.39594, + 0.39594, + 0.39416, + 0.39786, + 0.39686, + 0.39512, + 0.39482, + 0.39516, + 0.3969, + 0.39614, + 0.39604, + 0.39616, + 0.3983, + 0.39634, + 0.39874 + ], + "test_acc": [ + 0.3093, + 0.3342, + 0.3399, + 0.3542, + 0.3538, + 0.3515, + 0.3467, + 0.3593, + 0.3629, + 0.3586, + 0.3658, + 0.3651, + 0.3634, + 0.368, + 0.3662, + 0.3772, + 0.3706, + 0.3806, + 0.3767, + 0.3884, + 0.3928, + 0.3913, + 0.3908, + 0.3845, + 0.394, + 0.3977, + 0.3963, + 0.3919, + 0.3927, + 0.4001, + 0.3845, + 0.3968, + 0.3923, + 0.3972, + 0.4001, + 0.3927, + 0.3949, + 0.392, + 0.4012, + 0.3949, + 0.4069, + 0.3964, + 0.4031, + 0.3987, + 0.3974, + 0.4014, + 0.3977, + 0.4103, + 0.3991, + 0.3969, + 0.3986, + 0.4054, + 0.4046, + 0.4081, + 0.4036, + 0.4082, + 0.4012, + 0.4036, + 0.4027, + 0.4102, + 0.4079, + 0.4038, + 0.4044, + 0.4118, + 0.4118, + 0.4102, + 0.4106, + 0.4039, + 0.4082, + 0.4046, + 0.4096, + 0.4129, + 0.408, + 0.4041, + 0.4099, + 0.4103, + 0.4081, + 0.4101, + 0.4125, + 0.4133, + 0.4071, + 0.4109, + 0.4092, + 0.412, + 0.4117, + 0.4117, + 0.4081, + 0.412, + 0.412, + 0.41, + 0.4116, + 0.4107, + 0.409, + 0.4105, + 0.4105, + 0.4117, + 0.411, + 0.4107, + 0.4109, + 0.4109 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.036382660269737244, + 0.07720986753702164, + 0.015243301168084145, + -0.03982359915971756, + -0.08008211106061935, + -0.07353264093399048, + 0.004661812447011471, + 0.9986914992332458 + ], + "perturbation_rho": [ + 0.046086542308330536, + -0.012133870273828506, + 0.0135452039539814, + 0.027485966682434082, + -0.0412953719496727, + -0.01517622172832489, + 0.015256978571414948, + -0.014813247136771679 + ], + "nudging": { + "0.001": [ + -2.9372749850153923e-06, + -4.57162968814373e-07, + 5.78584149479866e-08, + 1.1955853551626205e-07, + 9.022187441587448e-08, + 7.35744833946228e-08, + 2.5029294192790985e-08, + -1.23586505651474e-06 + ], + "0.003": [ + -9.18388832360506e-06, + -1.2764940038323402e-06, + -1.8684659153223038e-07, + 2.1711457520723343e-07, + 3.7939753383398056e-07, + 3.0547380447387695e-07, + 1.57160684466362e-08, + -4.404224455356598e-06 + ], + "0.01": [ + -3.0463445000350475e-05, + -4.363246262073517e-06, + -3.3760443329811096e-07, + 6.594927981495857e-07, + 1.332256942987442e-06, + 1.2486707419157028e-06, + -1.0861549526453018e-07, + -1.546763814985752e-05 + ] + }, + "hidden_norms_per_layer": [ + 6046.84130859375, + 74194.140625, + 477900.9375, + 689443.0, + 1004220.0625, + 1523434.625, + 1680752.625, + 1682025.375, + 898568.625 + ], + "bp_grad_norms_per_layer": [ + 3.200672654202208e-05, + 2.357817038500798e-06, + 7.007323006291699e-07, + 6.70125302804081e-07, + 6.57040743590187e-07, + 6.679950388388534e-07, + 6.679492230432515e-07, + 6.673712391602749e-07, + 6.578414968316793e-07 + ] + }, + "drift": { + "embed.weight": 41.37337372663241, + "embed.bias": 21.739351895524546, + "blocks.0.ln.weight": 1.096869475338319, + "blocks.0.w1.weight": 14.853427419399033, + "blocks.0.w1.bias": 14.626173478331832, + "blocks.0.w2.weight": 52.544426520863894, + "blocks.1.ln.weight": 0.989631116445591, + "blocks.1.w1.weight": 18.73171250459504, + "blocks.1.w1.bias": 13.329767291528706, + "blocks.1.w2.weight": 51.37681586378203, + "blocks.2.ln.weight": 0.7265939790865649, + "blocks.2.w1.weight": 18.824924434649834, + "blocks.2.w1.bias": 13.21966047090143, + "blocks.2.w2.weight": 36.1324487703447, + "blocks.3.ln.weight": 0.6455349248601757, + "blocks.3.w1.weight": 19.270974488434813, + "blocks.3.w1.bias": 16.976194451895758, + "blocks.3.w2.weight": 28.417177242878225, + "blocks.4.ln.weight": 0.6212274460619375, + "blocks.4.w1.weight": 20.86703490367653, + "blocks.4.w1.bias": 22.972850990124385, + "blocks.4.w2.weight": 29.740214683313905, + "blocks.5.ln.weight": 0.6222289394933932, + "blocks.5.w1.weight": 18.57775599009712, + "blocks.5.w1.bias": 20.08003721644836, + "blocks.5.w2.weight": 40.53068019154897, + "blocks.6.ln.weight": 0.5581383332084382, + "blocks.6.w1.weight": 15.461885552051882, + "blocks.6.w1.bias": 11.684922950989124, + "blocks.6.w2.weight": 53.18350858546716, + "blocks.7.ln.weight": 0.6558470787555182, + "blocks.7.w1.weight": 20.23429444317466, + "blocks.7.w1.bias": 21.51332878594272, + "blocks.7.w2.weight": 38.38263927504239, + "out_ln.weight": 0.3257959664285173, + "out_head.weight": 5.900526651995236, + "out_head.bias": 0.9300218587303252 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 8, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 6 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L8_seed6", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file |
