diff options
Diffstat (limited to 'results/fa_dfa_d512_L6_seed0/results_cifar10.json')
| -rw-r--r-- | results/fa_dfa_d512_L6_seed0/results_cifar10.json | 837 |
1 files changed, 837 insertions, 0 deletions
diff --git a/results/fa_dfa_d512_L6_seed0/results_cifar10.json b/results/fa_dfa_d512_L6_seed0/results_cifar10.json new file mode 100644 index 0000000..821b0e0 --- /dev/null +++ b/results/fa_dfa_d512_L6_seed0/results_cifar10.json @@ -0,0 +1,837 @@ +{ + "0": { + "dfa": { + "log": { + "train_loss": [ + 2.0753555574798583, + 2.0437081289672854, + 2.031278556365967, + 2.027659920578003, + 2.0243590501403808, + 2.0202790953826906, + 2.0175082940673827, + 2.015957561340332, + 2.016036532974243, + 2.013858056335449, + 2.0126232041931154, + 2.006905050048828, + 2.0110664825439453, + 2.0065614931488036, + 2.007456555709839, + 2.0070649071502684, + 2.0075456425476075, + 2.004424341583252, + 2.005271594772339, + 2.0039944921875, + 2.0024667238998415, + 2.000112854309082, + 1.9993625988769532, + 2.002483249664307, + 2.0039204889678954, + 1.9985878760528564, + 2.0006166967010497, + 2.0003138301086425, + 1.9996023558807372, + 1.9986100010681151, + 1.99738998298645, + 1.9980814916229248, + 1.996693759994507, + 2.0003671773529055, + 1.9974340956878662, + 1.9980345281982421, + 1.9958612198638916, + 1.9961950925445557, + 1.9993210222625732, + 1.9953513426971436, + 1.9962950857925414, + 1.9969137393951415, + 1.9964947815704346, + 1.9980549166870116, + 1.9973483322525025, + 1.9953596134185791, + 1.995900859451294, + 1.9963403786849976, + 1.9947577867126465, + 1.996345650177002, + 1.9976896784210205, + 1.9959460092163086, + 1.9935208990478515, + 1.9943153414154053, + 1.9918208218765259, + 1.9932028875350951, + 1.9944638661956786, + 1.995019360809326, + 1.9929348825073243, + 1.9962419706726073, + 1.9949486227035522, + 1.991672697906494, + 1.9935600751495361, + 1.9919422644424438, + 1.994082801437378, + 1.9919944891738892, + 1.9940126430511476, + 1.9923382126617433, + 1.9926940571594238, + 1.9947953038024901, + 1.993876279335022, + 1.992678325843811, + 1.992137566757202, + 1.9900086630630494, + 1.9903694836044312, + 1.9928425230407716, + 1.9906801569366455, + 1.9912908507919311, + 1.9918341095352172, + 1.9906631398773194, + 1.9910551013183593, + 1.9912693448257446, + 1.9916500932312011, + 1.992011311416626, + 1.9930540170669555, + 1.9900453987121582, + 1.9914034769058226, + 1.9921153451156617, + 1.9884680337142944, + 1.9897785013198852, + 1.9896507069396974, + 1.9915314351654052, + 1.9887127932357789, + 1.9907834010314942, + 1.990130773010254, + 1.9915763851547241, + 1.9891119548797607, + 1.9894035186386108, + 1.9906070321273803, + 1.990175763015747 + ], + "train_acc": [ + 0.23202, + 0.24612, + 0.2534, + 0.25296, + 0.25586, + 0.25736, + 0.26078, + 0.25968, + 0.26376, + 0.26194, + 0.26464, + 0.26644, + 0.26328, + 0.26692, + 0.26742, + 0.26744, + 0.26808, + 0.2689, + 0.26662, + 0.26898, + 0.27116, + 0.27066, + 0.27046, + 0.2737, + 0.26932, + 0.27098, + 0.27496, + 0.2742, + 0.27396, + 0.27362, + 0.27356, + 0.27542, + 0.2752, + 0.27348, + 0.27492, + 0.2737, + 0.27614, + 0.27596, + 0.27528, + 0.27582, + 0.27624, + 0.27556, + 0.27432, + 0.27686, + 0.27726, + 0.278, + 0.27874, + 0.27706, + 0.27906, + 0.27922, + 0.27678, + 0.27996, + 0.27996, + 0.27956, + 0.27936, + 0.27786, + 0.27958, + 0.27888, + 0.2801, + 0.2786, + 0.28002, + 0.281, + 0.28102, + 0.27928, + 0.27852, + 0.2814, + 0.28026, + 0.28168, + 0.28254, + 0.27894, + 0.28102, + 0.28146, + 0.27998, + 0.28272, + 0.28324, + 0.28034, + 0.28126, + 0.28156, + 0.28228, + 0.28094, + 0.28084, + 0.28196, + 0.28344, + 0.28236, + 0.28084, + 0.28244, + 0.2821, + 0.28254, + 0.28398, + 0.2826, + 0.28278, + 0.28214, + 0.28376, + 0.28172, + 0.28234, + 0.28182, + 0.28146, + 0.28132, + 0.2826, + 0.28366 + ], + "test_acc": [ + 0.2527, + 0.2731, + 0.2652, + 0.2735, + 0.2758, + 0.2862, + 0.2692, + 0.2901, + 0.2963, + 0.2897, + 0.2793, + 0.2945, + 0.2734, + 0.3044, + 0.2915, + 0.2965, + 0.2975, + 0.2951, + 0.3018, + 0.2954, + 0.2868, + 0.2943, + 0.3068, + 0.2639, + 0.3007, + 0.303, + 0.3009, + 0.3059, + 0.3024, + 0.2876, + 0.2984, + 0.2926, + 0.3024, + 0.3034, + 0.2906, + 0.3054, + 0.2978, + 0.2872, + 0.3036, + 0.3052, + 0.2874, + 0.3045, + 0.3028, + 0.2949, + 0.2981, + 0.3047, + 0.3073, + 0.2937, + 0.3078, + 0.2894, + 0.3098, + 0.2913, + 0.2897, + 0.306, + 0.3075, + 0.302, + 0.3025, + 0.2982, + 0.2983, + 0.31, + 0.3071, + 0.3013, + 0.309, + 0.3042, + 0.3009, + 0.2958, + 0.3024, + 0.3013, + 0.3055, + 0.3016, + 0.3041, + 0.3077, + 0.3023, + 0.3111, + 0.308, + 0.3005, + 0.3051, + 0.3101, + 0.3103, + 0.3006, + 0.3025, + 0.3034, + 0.3043, + 0.3082, + 0.3011, + 0.3069, + 0.3051, + 0.3023, + 0.3033, + 0.3051, + 0.3041, + 0.3044, + 0.3029, + 0.3047, + 0.3047, + 0.3048, + 0.3056, + 0.3056, + 0.3053, + 0.3053 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.3933757543563843, + -0.00033686644746921957, + -0.00029227498453110456, + 0.00023268867516890168, + -0.00033252357388846576, + -0.0006308911251835525 + ], + "perturbation_rho": [ + -0.005786933470517397, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -4.791654646396637e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.2507662177085876e-06, + 0.0, + 0.0, + -3.725290298461914e-09, + 0.0, + 0.0 + ], + "0.01": [ + -4.01865690946579e-06, + 0.0, + 1.862645149230957e-09, + -3.725290298461914e-09, + 0.0, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 52006.18359375, + 1299919104.0, + 4471346176.0, + 5541193216.0, + 6115124736.0, + 6220777984.0, + 7776075776.0 + ], + "bp_grad_norms_per_layer": [ + 2.7690890647136257e-07, + 2.9755062302960766e-10, + 2.9703242643286387e-10, + 2.9693505987360425e-10, + 2.969274826014612e-10, + 2.96939195454371e-10, + 2.9694927072831945e-10 + ] + }, + "drift": { + "embed.weight": 330.18669911453446, + "embed.bias": 275.0701726252062, + "blocks.0.ln.weight": 9.87127425013801, + "blocks.0.w1.weight": 288.2752398777733, + "blocks.0.w1.bias": 276.1693244742186, + "blocks.0.w2.weight": 476.3482702241823, + "blocks.1.ln.weight": 9.208131003871356, + "blocks.1.w1.weight": 379.93945965650295, + "blocks.1.w1.bias": 370.165908360661, + "blocks.1.w2.weight": 391.3759720182112, + "blocks.2.ln.weight": 9.291829925999405, + "blocks.2.w1.weight": 372.47037498699524, + "blocks.2.w1.bias": 340.94957924841253, + "blocks.2.w2.weight": 343.62414266271355, + "blocks.3.ln.weight": 9.704848817987399, + "blocks.3.w1.weight": 334.76567440579134, + "blocks.3.w1.bias": 307.03384934243365, + "blocks.3.w2.weight": 310.86785550010995, + "blocks.4.ln.weight": 6.86934636896043, + "blocks.4.w1.weight": 263.30065941816594, + "blocks.4.w1.bias": 243.63618296008045, + "blocks.4.w2.weight": 243.60310044137827, + "blocks.5.ln.weight": 10.060319390509356, + "blocks.5.w1.weight": 402.62899842011797, + "blocks.5.w1.bias": 374.4871410963097, + "blocks.5.w2.weight": 379.02482222844293, + "out_ln.weight": 0.614063493216844, + "out_head.weight": 8.991195841592733, + "out_head.bias": 0.8181543795651063 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.0268119436264036, + 1.9347526413726808, + 1.9036040418243407, + 1.8875383782958985, + 1.8758800409317016, + 1.8637015169525146, + 1.8550788135147094, + 1.8481883679580688, + 1.8380642176055908, + 1.831563844680786, + 1.82749318901062, + 1.814368692855835, + 1.817901651649475, + 1.8037349676895142, + 1.8005951739883423, + 1.7895659107208253, + 1.7900659426879884, + 1.781074461402893, + 1.7845501969146729, + 1.776343214149475, + 1.7684208422470094, + 1.7643969812774658, + 1.7617348025894164, + 1.7578030713653565, + 1.7565854375457763, + 1.7562964432525634, + 1.7528118307113647, + 1.7484837558364867, + 1.743814429550171, + 1.7366932116699219, + 1.7326225136566162, + 1.728937064590454, + 1.7266727477645873, + 1.7240253580474854, + 1.7245794469833373, + 1.7228408333969116, + 1.7203827739715576, + 1.711478896446228, + 1.7089925986099244, + 1.7088872208023071, + 1.7068994373321533, + 1.7094496100234986, + 1.7018463577270508, + 1.7043216220855713, + 1.6993152593231202, + 1.6993111734771729, + 1.6920886862182618, + 1.6940990381622314, + 1.6973865487670898, + 1.693602544631958, + 1.6929154531478883, + 1.6890407666397094, + 1.685359003982544, + 1.684633869857788, + 1.6784778961181641, + 1.6846174112701415, + 1.6849634936904907, + 1.6800030670928956, + 1.6778543420410157, + 1.6783534344482423, + 1.6748548553848266, + 1.675967246170044, + 1.674832339515686, + 1.6731126987075806, + 1.6737006020355225, + 1.670804006958008, + 1.6716182923126222, + 1.6689927773284912, + 1.6716524493026734, + 1.6704032614517212, + 1.6676370931625366, + 1.6686026779174805, + 1.6649452212142943, + 1.6682810161972046, + 1.6657524144744873, + 1.666575444908142, + 1.6638936541748046, + 1.6632415933609008, + 1.6630835901641845, + 1.6650469580078124, + 1.661507155380249, + 1.6635552446746826, + 1.6633718565368651, + 1.6654531225204467, + 1.660396284713745, + 1.6597816809463501, + 1.6641134811019898, + 1.6597988692855834, + 1.6597074938583374, + 1.6637430121612549, + 1.657964035987854, + 1.6597277429580688, + 1.65512205078125, + 1.66050142578125, + 1.6606662057495116, + 1.6582156159210204, + 1.657457571105957, + 1.6597890829086304, + 1.6602586605834961, + 1.6589343154525757 + ], + "train_acc": [ + 0.2612, + 0.30246, + 0.31784, + 0.3241, + 0.32576, + 0.33054, + 0.33502, + 0.33912, + 0.34148, + 0.34222, + 0.34698, + 0.34792, + 0.34712, + 0.35504, + 0.35516, + 0.35796, + 0.35952, + 0.36286, + 0.36188, + 0.36396, + 0.36778, + 0.36878, + 0.3676, + 0.36954, + 0.37154, + 0.36936, + 0.37584, + 0.3732, + 0.37446, + 0.37702, + 0.37832, + 0.38196, + 0.37868, + 0.3824, + 0.38276, + 0.38352, + 0.38208, + 0.3852, + 0.3863, + 0.38682, + 0.38838, + 0.38784, + 0.3886, + 0.38966, + 0.38966, + 0.39112, + 0.39534, + 0.39184, + 0.39354, + 0.39498, + 0.39364, + 0.39514, + 0.3952, + 0.39942, + 0.40118, + 0.39886, + 0.3962, + 0.39814, + 0.39946, + 0.40066, + 0.39902, + 0.40112, + 0.40356, + 0.40358, + 0.40162, + 0.40426, + 0.40428, + 0.4024, + 0.40284, + 0.40386, + 0.4073, + 0.4049, + 0.40354, + 0.40386, + 0.40588, + 0.4046, + 0.40704, + 0.40718, + 0.40888, + 0.405, + 0.41078, + 0.40724, + 0.40814, + 0.40858, + 0.40918, + 0.40666, + 0.40436, + 0.41216, + 0.40734, + 0.40822, + 0.40996, + 0.40958, + 0.411, + 0.40964, + 0.40704, + 0.40942, + 0.41122, + 0.40922, + 0.40942, + 0.40982 + ], + "test_acc": [ + 0.3205, + 0.3401, + 0.3549, + 0.3534, + 0.3599, + 0.3678, + 0.3633, + 0.3657, + 0.3754, + 0.3736, + 0.3895, + 0.3818, + 0.3742, + 0.3765, + 0.383, + 0.3823, + 0.3883, + 0.4012, + 0.3985, + 0.3927, + 0.3954, + 0.3928, + 0.3965, + 0.3899, + 0.4035, + 0.4001, + 0.4034, + 0.4104, + 0.4022, + 0.4106, + 0.3951, + 0.4038, + 0.4094, + 0.4181, + 0.4124, + 0.4094, + 0.4171, + 0.417, + 0.4163, + 0.418, + 0.4157, + 0.4251, + 0.4269, + 0.4201, + 0.4151, + 0.4222, + 0.4264, + 0.4171, + 0.4209, + 0.4171, + 0.4187, + 0.4239, + 0.4214, + 0.4154, + 0.4241, + 0.4166, + 0.424, + 0.4208, + 0.4265, + 0.427, + 0.4223, + 0.4248, + 0.4301, + 0.4312, + 0.4273, + 0.4313, + 0.4304, + 0.4296, + 0.426, + 0.4255, + 0.4306, + 0.4314, + 0.4315, + 0.4326, + 0.4331, + 0.432, + 0.432, + 0.4294, + 0.4335, + 0.433, + 0.437, + 0.4319, + 0.4304, + 0.4327, + 0.4321, + 0.4319, + 0.4333, + 0.4357, + 0.4315, + 0.4337, + 0.434, + 0.4346, + 0.4342, + 0.4334, + 0.4336, + 0.4328, + 0.434, + 0.4344, + 0.434, + 0.434 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.020984500646591187, + 0.06538723409175873, + -0.02862982451915741, + -0.14415404200553894, + -0.12824469804763794, + 0.9942382574081421 + ], + "perturbation_rho": [ + 0.04626474529504776, + 0.00916149839758873, + 0.02606651559472084, + -0.0706678107380867, + 0.0447549931704998, + -0.07931974530220032 + ], + "nudging": { + "0.001": [ + -2.3448956198990345e-06, + -6.940681487321854e-07, + 9.400537237524986e-08, + 4.384201020002365e-07, + 2.918532118201256e-07, + -2.525397576391697e-06 + ], + "0.003": [ + -7.02321995049715e-06, + -2.0685838535428047e-06, + 3.417953848838806e-07, + 1.5673576854169369e-06, + 1.2195087037980556e-06, + -9.568408131599426e-06 + ], + "0.01": [ + -2.358935307711363e-05, + -7.018155883997679e-06, + 9.208451956510544e-07, + 4.6023051254451275e-06, + 4.1344319470226765e-06, + -3.3148215152323246e-05 + ] + }, + "hidden_norms_per_layer": [ + 3922.13720703125, + 49122.51171875, + 264047.5, + 465089.875, + 831614.3125, + 1037417.9375, + 282876.625 + ], + "bp_grad_norms_per_layer": [ + 3.9657443267060444e-05, + 4.083395651832689e-06, + 1.3594802794614225e-06, + 1.2777863958035596e-06, + 1.2765268593284418e-06, + 1.2795584325431264e-06, + 1.2760270919898176e-06 + ] + }, + "drift": { + "embed.weight": 31.47039846350604, + "embed.bias": 18.02217544364072, + "blocks.0.ln.weight": 0.9789876040215849, + "blocks.0.w1.weight": 13.428105491977634, + "blocks.0.w1.bias": 11.014509469722494, + "blocks.0.w2.weight": 44.375832792200086, + "blocks.1.ln.weight": 0.9256838094480255, + "blocks.1.w1.weight": 16.049693958454462, + "blocks.1.w1.bias": 12.671067703031834, + "blocks.1.w2.weight": 46.940642430560914, + "blocks.2.ln.weight": 0.768237594938939, + "blocks.2.w1.weight": 16.878210463024953, + "blocks.2.w1.bias": 14.246556953188842, + "blocks.2.w2.weight": 45.47089120587146, + "blocks.3.ln.weight": 0.6449156612044069, + "blocks.3.w1.weight": 17.300866761556396, + "blocks.3.w1.bias": 17.793175424183687, + "blocks.3.w2.weight": 34.146538375073234, + "blocks.4.ln.weight": 0.536074289047981, + "blocks.4.w1.weight": 15.870552255415648, + "blocks.4.w1.bias": 17.679587984495768, + "blocks.4.w2.weight": 27.222411899497516, + "blocks.5.ln.weight": 0.5977975068115631, + "blocks.5.w1.weight": 18.813068681696937, + "blocks.5.w1.bias": 22.173876530236612, + "blocks.5.w2.weight": 27.925157214653215, + "out_ln.weight": 0.29706709348499905, + "out_head.weight": 4.788666797102501, + "out_head.bias": 2.266760510860064 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 6, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 0 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L6_seed0", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file |
