diff options
Diffstat (limited to 'results/fa_dfa_d512_L8_seed2/results_cifar10.json')
| -rw-r--r-- | results/fa_dfa_d512_L8_seed2/results_cifar10.json | 881 |
1 files changed, 881 insertions, 0 deletions
diff --git a/results/fa_dfa_d512_L8_seed2/results_cifar10.json b/results/fa_dfa_d512_L8_seed2/results_cifar10.json new file mode 100644 index 0000000..5a01422 --- /dev/null +++ b/results/fa_dfa_d512_L8_seed2/results_cifar10.json @@ -0,0 +1,881 @@ +{ + "2": { + "dfa": { + "log": { + "train_loss": [ + 2.0644361302948, + 2.046419815826416, + 2.038333583984375, + 2.038490813598633, + 2.039234817237854, + 2.036761814575195, + 2.0334856452941894, + 2.034733783721924, + 2.029182948684692, + 2.0294196990585327, + 2.0262659832000733, + 2.026484178543091, + 2.025475791244507, + 2.0258196518707274, + 2.0231178736114503, + 2.0239224526977537, + 2.0206442892456056, + 2.020261905860901, + 2.0215636526489256, + 2.0221778102874755, + 2.019456211929321, + 2.0221493022155763, + 2.0212540914916994, + 2.0227064220809936, + 2.019565500793457, + 2.0192780477905274, + 2.022175419692993, + 2.0185312504577637, + 2.0168647090911866, + 2.018404118041992, + 2.0194629988098143, + 2.019041424560547, + 2.0179552308654785, + 2.0175965952301027, + 2.019192621688843, + 2.0174668720245363, + 2.0164142420196534, + 2.015060181007385, + 2.0180635737609864, + 2.0148490827941896, + 2.017637328643799, + 2.0145212058258055, + 2.0134287954711914, + 2.016226960449219, + 2.017133801612854, + 2.015110319671631, + 2.0140358378601073, + 2.0136930332946776, + 2.013491507110596, + 2.0132310498046877, + 2.0138910511779784, + 2.0127566445159912, + 2.014643051528931, + 2.013907117614746, + 2.013747940750122, + 2.013379987182617, + 2.014386905593872, + 2.0132828955841062, + 2.012256466674805, + 2.0110202868652345, + 2.011350602493286, + 2.0109373377990725, + 2.0134030670928955, + 2.013690048866272, + 2.010493644256592, + 2.0127015099716186, + 2.014034610977173, + 2.0104107666015625, + 2.009855454788208, + 2.0113264336395265, + 2.0088184381866454, + 2.009741211090088, + 2.0078707803726195, + 2.0089734397888184, + 2.010708229217529, + 2.0102198361206054, + 2.0088808136367797, + 2.010095508155823, + 2.0100783850097654, + 2.0090800459289553, + 2.0093112369155883, + 2.0093213819122315, + 2.0104708361053465, + 2.0093392966079713, + 2.007901397628784, + 2.0074678485870363, + 2.0079102828216553, + 2.0076543350982665, + 2.008981750946045, + 2.0082327671051026, + 2.0088174480819703, + 2.009487823638916, + 2.0088063831329346, + 2.008092264633179, + 2.009373797225952, + 2.0082830452728273, + 2.009356221847534, + 2.0067892125701903, + 2.0079211888122557, + 2.009279239501953 + ], + "train_acc": [ + 0.23872, + 0.2456, + 0.24996, + 0.2495, + 0.24748, + 0.2497, + 0.25566, + 0.25088, + 0.25386, + 0.2538, + 0.25732, + 0.25644, + 0.2601, + 0.25942, + 0.2592, + 0.25872, + 0.26188, + 0.26174, + 0.26336, + 0.2605, + 0.2632, + 0.2612, + 0.2603, + 0.2612, + 0.26016, + 0.2645, + 0.26032, + 0.26266, + 0.26196, + 0.26384, + 0.26492, + 0.26296, + 0.26584, + 0.26768, + 0.26482, + 0.2648, + 0.26722, + 0.26534, + 0.26276, + 0.2668, + 0.26668, + 0.26698, + 0.2691, + 0.26718, + 0.26602, + 0.26738, + 0.26584, + 0.26838, + 0.2676, + 0.26986, + 0.268, + 0.2675, + 0.26946, + 0.2669, + 0.2694, + 0.26946, + 0.26752, + 0.26804, + 0.27002, + 0.26888, + 0.27004, + 0.27052, + 0.2715, + 0.26972, + 0.2714, + 0.26728, + 0.26934, + 0.27246, + 0.27128, + 0.27064, + 0.27078, + 0.27072, + 0.27496, + 0.2735, + 0.27134, + 0.27228, + 0.2706, + 0.27152, + 0.27204, + 0.27414, + 0.27122, + 0.27298, + 0.27146, + 0.27378, + 0.27276, + 0.2735, + 0.27246, + 0.2746, + 0.2735, + 0.27246, + 0.27278, + 0.27358, + 0.27442, + 0.27286, + 0.2721, + 0.27286, + 0.27204, + 0.27472, + 0.27256, + 0.27478 + ], + "test_acc": [ + 0.2603, + 0.2353, + 0.2607, + 0.265, + 0.2673, + 0.2661, + 0.2701, + 0.2571, + 0.2897, + 0.288, + 0.2911, + 0.2945, + 0.265, + 0.2754, + 0.2857, + 0.2812, + 0.2695, + 0.2887, + 0.2854, + 0.288, + 0.2916, + 0.2906, + 0.2793, + 0.2867, + 0.2901, + 0.3, + 0.2805, + 0.2864, + 0.2983, + 0.2846, + 0.2886, + 0.2877, + 0.2925, + 0.2778, + 0.2959, + 0.2912, + 0.2908, + 0.2908, + 0.2801, + 0.2988, + 0.2829, + 0.2906, + 0.3007, + 0.2911, + 0.2975, + 0.2959, + 0.3009, + 0.2959, + 0.2901, + 0.2841, + 0.2989, + 0.2867, + 0.2927, + 0.2886, + 0.2802, + 0.281, + 0.3007, + 0.2873, + 0.2891, + 0.3031, + 0.2983, + 0.2973, + 0.2972, + 0.2921, + 0.2927, + 0.2971, + 0.2975, + 0.2988, + 0.3026, + 0.2926, + 0.2941, + 0.2971, + 0.2967, + 0.2972, + 0.2957, + 0.2998, + 0.2961, + 0.2934, + 0.2937, + 0.2949, + 0.2969, + 0.2975, + 0.2933, + 0.2965, + 0.2925, + 0.299, + 0.2975, + 0.2993, + 0.2935, + 0.2975, + 0.2962, + 0.2959, + 0.2958, + 0.2974, + 0.2975, + 0.2967, + 0.2967, + 0.2967, + 0.2967, + 0.2966 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.3420855402946472, + -0.0001374588318867609, + -0.0004025904054287821, + -0.0002873847261071205, + -0.0005695301806554198, + 0.00037305167643353343, + 0.0003001938748639077, + 6.27083791187033e-05 + ], + "perturbation_rho": [ + 0.05308223515748978, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -3.8510188460350037e-07, + 0.0, + 0.0, + 0.0, + -3.725290298461914e-09, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.0007061064243317e-06, + 0.0, + 0.0, + 0.0, + -3.725290298461914e-09, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -3.0365772545337677e-06, + 0.0, + 0.0, + 0.0, + -3.725290298461914e-09, + 0.0, + 0.0, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 53974.390625, + 1994035456.0, + 4991160320.0, + 6590162432.0, + 6723744768.0, + 9356510208.0, + 9466019840.0, + 10094161920.0, + 10161934336.0 + ], + "bp_grad_norms_per_layer": [ + 2.1625525903345988e-07, + 2.0878114515010537e-10, + 2.0495614927451555e-10, + 2.0494976549212396e-10, + 2.0494639318968666e-10, + 2.0494643482305008e-10, + 2.0495241614959525e-10, + 2.0495422026201027e-10, + 2.0496979113993063e-10 + ] + }, + "drift": { + "embed.weight": 347.8854750017766, + "embed.bias": 315.64569824736526, + "blocks.0.ln.weight": 10.235928115345473, + "blocks.0.w1.weight": 318.5495193049707, + "blocks.0.w1.bias": 347.2457558883921, + "blocks.0.w2.weight": 498.50725738614966, + "blocks.1.ln.weight": 9.559401727161022, + "blocks.1.w1.weight": 396.35172799806554, + "blocks.1.w1.bias": 380.45360123690216, + "blocks.1.w2.weight": 397.06562678988604, + "blocks.2.ln.weight": 9.795519840502836, + "blocks.2.w1.weight": 400.5282974464668, + "blocks.2.w1.bias": 367.0528578182135, + "blocks.2.w2.weight": 385.20621819830745, + "blocks.3.ln.weight": 7.79115393268457, + "blocks.3.w1.weight": 267.38195610990635, + "blocks.3.w1.bias": 241.5060314212707, + "blocks.3.w2.weight": 261.60447846418674, + "blocks.4.ln.weight": 10.663570278143073, + "blocks.4.w1.weight": 440.19027772109945, + "blocks.4.w1.bias": 406.52995649086876, + "blocks.4.w2.weight": 429.9566257006414, + "blocks.5.ln.weight": 7.336168242804289, + "blocks.5.w1.weight": 278.99456283486023, + "blocks.5.w1.bias": 254.26825064498672, + "blocks.5.w2.weight": 255.6469209546951, + "blocks.6.ln.weight": 8.774665158034248, + "blocks.6.w1.weight": 349.580580396389, + "blocks.6.w1.bias": 332.01322875711713, + "blocks.6.w2.weight": 335.2183411011385, + "blocks.7.ln.weight": 6.103809206945497, + "blocks.7.w1.weight": 223.73607116076752, + "blocks.7.w1.bias": 204.11858072638188, + "blocks.7.w2.weight": 212.2909923599191, + "out_ln.weight": 0.5939581817276044, + "out_head.weight": 9.736202389552009, + "out_head.bias": 0.7653619259023434 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.0312406940460206, + 1.9362668811798096, + 1.8983942519378663, + 1.881630844039917, + 1.8686952627563476, + 1.8598585787200927, + 1.8500165731430054, + 1.8523509564208984, + 1.841215087814331, + 1.8375128533935547, + 1.82443065284729, + 1.82105125, + 1.8107502059555054, + 1.8082057354736327, + 1.8022028189468384, + 1.7978119366836547, + 1.7920289902496338, + 1.7873877236938476, + 1.7848837859725952, + 1.7817217052459717, + 1.7746633721542358, + 1.7798608938598632, + 1.7749403537750243, + 1.7722899240112304, + 1.7705043783187866, + 1.765470712814331, + 1.7667155236434937, + 1.7605387261199952, + 1.7600090964126587, + 1.7570182471466065, + 1.7548303343963623, + 1.753013416786194, + 1.7530071124267579, + 1.752319090499878, + 1.7505883599090577, + 1.7475732799530028, + 1.7479963735961914, + 1.7440779452514648, + 1.7415211089706422, + 1.7423380712127685, + 1.7422383227157592, + 1.73590397315979, + 1.7367827083587646, + 1.7335861996078492, + 1.732941851272583, + 1.726208869934082, + 1.7261467092895508, + 1.731659492225647, + 1.7264601552963257, + 1.7286788833618163, + 1.722340958251953, + 1.7210906581497192, + 1.7243633469390869, + 1.7231188234710693, + 1.7203610498809814, + 1.7219875589752198, + 1.719448472518921, + 1.7194024643707275, + 1.7188453897857665, + 1.715792247581482, + 1.7124444417572022, + 1.71695018119812, + 1.7176160364532471, + 1.7146047933578492, + 1.7122148095321654, + 1.7136172246551513, + 1.7148900774765015, + 1.7101783513641358, + 1.7065966332244873, + 1.7109612771606446, + 1.7086562002182006, + 1.708388620033264, + 1.7043865182113647, + 1.7084569076919556, + 1.7085095125961303, + 1.7060216832733155, + 1.7013830905532836, + 1.706795428390503, + 1.7063351504898072, + 1.70022035030365, + 1.7059846701049806, + 1.706136099281311, + 1.7015950707626344, + 1.7021336114120484, + 1.6970344146347045, + 1.7002056908416747, + 1.6996512441635132, + 1.701994123878479, + 1.6985623202896118, + 1.7015460013580321, + 1.7006773757171632, + 1.7014044579696654, + 1.7009879675674437, + 1.699486920852661, + 1.6986795735931397, + 1.6978497576904297, + 1.7015439191436768, + 1.699127428817749, + 1.6978760889434814, + 1.69874500705719 + ], + "train_acc": [ + 0.25366, + 0.295, + 0.31024, + 0.322, + 0.3275, + 0.3335, + 0.33654, + 0.33282, + 0.33834, + 0.3379, + 0.34392, + 0.3437, + 0.34912, + 0.35138, + 0.35342, + 0.3525, + 0.35808, + 0.36144, + 0.36196, + 0.36088, + 0.3609, + 0.3617, + 0.36304, + 0.36386, + 0.36488, + 0.36634, + 0.3662, + 0.37146, + 0.36906, + 0.37174, + 0.37052, + 0.37144, + 0.37146, + 0.37096, + 0.37404, + 0.37546, + 0.37712, + 0.37372, + 0.37698, + 0.37738, + 0.37506, + 0.38058, + 0.379, + 0.37758, + 0.38042, + 0.38246, + 0.38102, + 0.38084, + 0.38438, + 0.38092, + 0.38366, + 0.38522, + 0.38544, + 0.38474, + 0.38626, + 0.385, + 0.38362, + 0.38682, + 0.38782, + 0.3882, + 0.38862, + 0.38608, + 0.38642, + 0.3857, + 0.38758, + 0.38714, + 0.38588, + 0.39092, + 0.38846, + 0.39068, + 0.38904, + 0.39098, + 0.39194, + 0.39138, + 0.38832, + 0.39186, + 0.39308, + 0.3901, + 0.39034, + 0.3932, + 0.39182, + 0.39044, + 0.39078, + 0.3919, + 0.39418, + 0.39302, + 0.39488, + 0.39178, + 0.39388, + 0.39272, + 0.39092, + 0.39096, + 0.3937, + 0.39176, + 0.39522, + 0.39246, + 0.39252, + 0.39446, + 0.3936, + 0.39362 + ], + "test_acc": [ + 0.2876, + 0.3293, + 0.3436, + 0.3566, + 0.3569, + 0.3491, + 0.3632, + 0.36, + 0.3748, + 0.3697, + 0.3686, + 0.3785, + 0.3636, + 0.3798, + 0.3817, + 0.3661, + 0.3711, + 0.386, + 0.3761, + 0.3774, + 0.3908, + 0.3855, + 0.3821, + 0.3861, + 0.3921, + 0.398, + 0.3973, + 0.3814, + 0.3875, + 0.3951, + 0.3951, + 0.3873, + 0.399, + 0.3912, + 0.4011, + 0.3901, + 0.4079, + 0.3961, + 0.397, + 0.4004, + 0.4022, + 0.4, + 0.4042, + 0.3988, + 0.4114, + 0.4031, + 0.4057, + 0.4077, + 0.4027, + 0.4028, + 0.4083, + 0.4001, + 0.4068, + 0.411, + 0.4046, + 0.4106, + 0.4098, + 0.4054, + 0.4117, + 0.4084, + 0.407, + 0.4054, + 0.4116, + 0.4124, + 0.4119, + 0.412, + 0.4066, + 0.4118, + 0.4119, + 0.4107, + 0.413, + 0.4056, + 0.4103, + 0.4076, + 0.4136, + 0.4117, + 0.4165, + 0.4112, + 0.4129, + 0.4125, + 0.413, + 0.4136, + 0.4145, + 0.4119, + 0.4112, + 0.4123, + 0.4126, + 0.4137, + 0.4137, + 0.4146, + 0.4117, + 0.4117, + 0.4125, + 0.4122, + 0.4119, + 0.4125, + 0.4126, + 0.4123, + 0.4122, + 0.412 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.018279677256941795, + 0.05121004581451416, + -0.0035886913537979126, + -0.08221495151519775, + -0.07729659974575043, + -0.03265474736690521, + -0.04502299427986145, + 0.9937314391136169 + ], + "perturbation_rho": [ + -0.0045250579714775085, + -0.0025405026972293854, + 0.02932029776275158, + -0.04425422102212906, + -0.024078164249658585, + 0.036674171686172485, + 0.03741011023521423, + -0.027221273630857468 + ], + "nudging": { + "0.001": [ + -1.8192222341895103e-06, + -1.5320256352424622e-07, + -4.225876182317734e-08, + 1.200241968035698e-07, + 7.438939064741135e-08, + 2.7241185307502747e-08, + 2.759043127298355e-08, + -1.1578667908906937e-06 + ], + "0.003": [ + -5.577923730015755e-06, + -8.606584742665291e-07, + -8.975621312856674e-08, + 4.239846020936966e-07, + 4.441244527697563e-07, + 1.2828968465328217e-07, + 2.665910869836807e-07, + -4.686298780143261e-06 + ], + "0.01": [ + -1.855997834354639e-05, + -2.620276063680649e-06, + -2.4156179279088974e-07, + 1.257285475730896e-06, + 1.3473909348249435e-06, + 5.852198228240013e-07, + 7.433118298649788e-07, + -1.6578123904764652e-05 + ] + }, + "hidden_norms_per_layer": [ + 4948.6533203125, + 63535.5390625, + 485750.46875, + 729670.4375, + 1132140.625, + 1303753.625, + 1400111.125, + 1525175.75, + 735722.25 + ], + "bp_grad_norms_per_layer": [ + 3.38389327225741e-05, + 2.3188813429442234e-06, + 7.230223104670586e-07, + 6.714369078508753e-07, + 6.712992899338133e-07, + 6.754108312634344e-07, + 6.760963060514769e-07, + 6.705485020574997e-07, + 6.541473567267531e-07 + ] + }, + "drift": { + "embed.weight": 38.30467980586698, + "embed.bias": 18.474777878278143, + "blocks.0.ln.weight": 1.0140204865718874, + "blocks.0.w1.weight": 14.619711688794071, + "blocks.0.w1.bias": 11.460165338986966, + "blocks.0.w2.weight": 49.811132056190225, + "blocks.1.ln.weight": 0.943966438295369, + "blocks.1.w1.weight": 19.03349845703188, + "blocks.1.w1.bias": 16.732407132612977, + "blocks.1.w2.weight": 43.9070524360131, + "blocks.2.ln.weight": 0.6544196492657218, + "blocks.2.w1.weight": 18.12360528225708, + "blocks.2.w1.bias": 17.453784588368148, + "blocks.2.w2.weight": 43.32806943352846, + "blocks.3.ln.weight": 0.5531295594988335, + "blocks.3.w1.weight": 18.884523077230778, + "blocks.3.w1.bias": 20.07589187473831, + "blocks.3.w2.weight": 35.34492100476457, + "blocks.4.ln.weight": 0.5269231122615889, + "blocks.4.w1.weight": 17.103043932812323, + "blocks.4.w1.bias": 18.102481284342456, + "blocks.4.w2.weight": 42.32555419027007, + "blocks.5.ln.weight": 0.5628789686632275, + "blocks.5.w1.weight": 16.33193833700236, + "blocks.5.w1.bias": 15.909636919669264, + "blocks.5.w2.weight": 48.27717558625265, + "blocks.6.ln.weight": 0.6467844103525138, + "blocks.6.w1.weight": 16.86172393719663, + "blocks.6.w1.bias": 16.921984405373085, + "blocks.6.w2.weight": 56.44865850476068, + "blocks.7.ln.weight": 0.6477260974049089, + "blocks.7.w1.weight": 19.61181966070048, + "blocks.7.w1.bias": 20.268823478099563, + "blocks.7.w2.weight": 50.05420170873082, + "out_ln.weight": 0.28931782627119323, + "out_head.weight": 5.596138162617972, + "out_head.bias": 1.6180902727109185 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 8, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 2 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L8_seed2", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file |
