diff options
Diffstat (limited to 'results/fa_dfa_d256_L8_seed0/results_cifar10.json')
| -rw-r--r-- | results/fa_dfa_d256_L8_seed0/results_cifar10.json | 881 |
1 files changed, 881 insertions, 0 deletions
diff --git a/results/fa_dfa_d256_L8_seed0/results_cifar10.json b/results/fa_dfa_d256_L8_seed0/results_cifar10.json new file mode 100644 index 0000000..2b2a9de --- /dev/null +++ b/results/fa_dfa_d256_L8_seed0/results_cifar10.json @@ -0,0 +1,881 @@ +{ + "0": { + "dfa": { + "log": { + "train_loss": [ + 2.088111647720337, + 2.0657437747192384, + 2.060557443847656, + 2.054936630554199, + 2.0502566048431397, + 2.050682597808838, + 2.0456129079437257, + 2.0430596421813965, + 2.0408702920913697, + 2.0377991006469727, + 2.0389654118347167, + 2.0398873892593383, + 2.0346363846588136, + 2.033068787918091, + 2.034021237792969, + 2.0311379262542726, + 2.027860624542236, + 2.028515229873657, + 2.0275459106063844, + 2.0274439572143557, + 2.027140206680298, + 2.0251305793762207, + 2.0244256614685057, + 2.0226621907043456, + 2.024185022583008, + 2.0238217923736572, + 2.0209359241485596, + 2.021500840301514, + 2.02022657661438, + 2.0203428337097167, + 2.019736885719299, + 2.0196730166625976, + 2.019779687271118, + 2.0194025174331665, + 2.018459284744263, + 2.0198610536193846, + 2.0196038846588134, + 2.0184577390289307, + 2.0181497329711915, + 2.0173270440292357, + 2.015399761352539, + 2.017926648864746, + 2.0153606281280516, + 2.0176641104507445, + 2.0169564332580565, + 2.0160911991119383, + 2.013510660934448, + 2.0139562338256836, + 2.0157755657196046, + 2.016321997756958, + 2.013838740501404, + 2.0138234704589846, + 2.014709682159424, + 2.0130411010742186, + 2.013964093017578, + 2.014050467605591, + 2.010925949783325, + 2.0116148568344117, + 2.01314746383667, + 2.0142779718780517, + 2.0111367880249023, + 2.0113327654266357, + 2.012854333572388, + 2.0125641105651857, + 2.0123231687927245, + 2.0098405307006835, + 2.014055180015564, + 2.0095224347686766, + 2.0092305796813963, + 2.0110340044403077, + 2.011887366104126, + 2.0092078099823, + 2.0123240684509276, + 2.0100160660171507, + 2.0095057219696044, + 2.0116754064559936, + 2.010024980697632, + 2.0104703690338135, + 2.010904312362671, + 2.009006942138672, + 2.00869906539917, + 2.011290683441162, + 2.007994732589722, + 2.010743540802002, + 2.0077583860778807, + 2.010294546661377, + 2.008463388710022, + 2.009644501991272, + 2.008040932922363, + 2.006879263572693, + 2.0084022887420656, + 2.0101632648468017, + 2.0085393448638915, + 2.0086699520492552, + 2.0095553454971316, + 2.0090075594329835, + 2.0092781968688964, + 2.0092810621643067, + 2.008181187438965, + 2.008701148452759 + ], + "train_acc": [ + 0.23208, + 0.23714, + 0.23872, + 0.24144, + 0.24046, + 0.24026, + 0.24398, + 0.24838, + 0.25002, + 0.25004, + 0.24848, + 0.25134, + 0.25284, + 0.25182, + 0.25088, + 0.25354, + 0.2558, + 0.25726, + 0.25768, + 0.2579, + 0.25742, + 0.25918, + 0.26104, + 0.26256, + 0.26114, + 0.26408, + 0.26146, + 0.26318, + 0.26164, + 0.26194, + 0.2647, + 0.26364, + 0.26356, + 0.26318, + 0.26682, + 0.26424, + 0.26142, + 0.26382, + 0.26662, + 0.26438, + 0.26712, + 0.26532, + 0.26594, + 0.26736, + 0.2675, + 0.26522, + 0.2692, + 0.269, + 0.26666, + 0.26752, + 0.2675, + 0.26926, + 0.26798, + 0.27058, + 0.26658, + 0.26826, + 0.26928, + 0.26994, + 0.2699, + 0.26602, + 0.26866, + 0.26892, + 0.2706, + 0.27134, + 0.2703, + 0.27044, + 0.27026, + 0.26908, + 0.27206, + 0.27296, + 0.26956, + 0.27134, + 0.27088, + 0.27122, + 0.27302, + 0.26934, + 0.27088, + 0.27084, + 0.26988, + 0.27138, + 0.27164, + 0.27134, + 0.27262, + 0.27112, + 0.2701, + 0.27036, + 0.27144, + 0.27058, + 0.27068, + 0.27358, + 0.27096, + 0.2722, + 0.27196, + 0.27312, + 0.27102, + 0.2715, + 0.2707, + 0.27166, + 0.27168, + 0.27296 + ], + "test_acc": [ + 0.2527, + 0.2369, + 0.2462, + 0.2468, + 0.2408, + 0.2746, + 0.2703, + 0.2742, + 0.2599, + 0.2725, + 0.2825, + 0.2652, + 0.274, + 0.2721, + 0.2844, + 0.284, + 0.2791, + 0.2452, + 0.2835, + 0.2744, + 0.2688, + 0.2771, + 0.2812, + 0.2906, + 0.2721, + 0.2732, + 0.2831, + 0.2869, + 0.2833, + 0.2796, + 0.2921, + 0.2921, + 0.2884, + 0.2765, + 0.2871, + 0.2752, + 0.2937, + 0.2931, + 0.2944, + 0.2846, + 0.2897, + 0.279, + 0.2762, + 0.2923, + 0.2886, + 0.2881, + 0.2945, + 0.2907, + 0.2894, + 0.2989, + 0.2886, + 0.2845, + 0.2971, + 0.2895, + 0.2979, + 0.2831, + 0.2946, + 0.2923, + 0.2835, + 0.2971, + 0.2911, + 0.2881, + 0.2926, + 0.2971, + 0.2955, + 0.29, + 0.2908, + 0.2875, + 0.2985, + 0.2949, + 0.2881, + 0.2949, + 0.2848, + 0.2922, + 0.2967, + 0.2925, + 0.2881, + 0.2987, + 0.2907, + 0.2969, + 0.294, + 0.2943, + 0.2935, + 0.2909, + 0.2896, + 0.295, + 0.2953, + 0.2912, + 0.2923, + 0.2936, + 0.2948, + 0.2928, + 0.2943, + 0.2948, + 0.2929, + 0.2931, + 0.293, + 0.293, + 0.293, + 0.2932 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.3739473223686218, + 0.0001710604556137696, + -0.00010273385123582557, + 0.0015739527298137546, + -0.0010641771368682384, + -0.00083403434837237, + -0.0002269457036163658, + 0.00010728999768616632 + ], + "perturbation_rho": [ + -0.012351546436548233, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -3.0873343348503113e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 9.313225746154785e-10, + 0.0 + ], + "0.003": [ + -8.507631719112396e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 2.7939677238464355e-09, + 0.0 + ], + "0.01": [ + -2.818182110786438e-06, + 1.862645149230957e-09, + 0.0, + -5.587935447692871e-09, + 0.0, + -5.587935447692871e-09, + 2.7939677238464355e-09, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 39928.015625, + 244928880.0, + 1463614080.0, + 1492651648.0, + 2068440832.0, + 2077208832.0, + 2699950336.0, + 3167998976.0, + 3559058432.0 + ], + "bp_grad_norms_per_layer": [ + 2.7729842599910626e-07, + 3.92615467914581e-10, + 3.8261058210586896e-10, + 3.827221872754194e-10, + 3.839246975889665e-10, + 3.838843132264458e-10, + 3.8309594385665946e-10, + 3.836255757505569e-10, + 3.848031893127768e-10 + ] + }, + "drift": { + "embed.weight": 355.03769309377134, + "embed.bias": 275.50497763057285, + "blocks.0.ln.weight": 11.560415267944336, + "blocks.0.w1.weight": 217.0016785191733, + "blocks.0.w1.bias": 194.48945621979058, + "blocks.0.w2.weight": 512.6024444572738, + "blocks.1.ln.weight": 11.132081031799316, + "blocks.1.w1.weight": 325.51184428406805, + "blocks.1.w1.bias": 320.01996630850886, + "blocks.1.w2.weight": 501.7286978738936, + "blocks.2.ln.weight": 8.083645820617676, + "blocks.2.w1.weight": 207.09861044191072, + "blocks.2.w1.bias": 194.5111269286842, + "blocks.2.w2.weight": 289.82283688588427, + "blocks.3.ln.weight": 11.859946250915527, + "blocks.3.w1.weight": 335.0771544598146, + "blocks.3.w1.bias": 320.92428204403967, + "blocks.3.w2.weight": 448.5070845635642, + "blocks.4.ln.weight": 7.86250114440918, + "blocks.4.w1.weight": 209.85336811707023, + "blocks.4.w1.bias": 197.98745993011175, + "blocks.4.w2.weight": 286.43335604265127, + "blocks.5.ln.weight": 11.988594055175781, + "blocks.5.w1.weight": 340.3597967146831, + "blocks.5.w1.bias": 320.4706779192804, + "blocks.5.w2.weight": 499.7442316358972, + "blocks.6.ln.weight": 13.202707290649414, + "blocks.6.w1.weight": 376.2778384149751, + "blocks.6.w1.bias": 364.2875039001891, + "blocks.6.w2.weight": 540.0867846092441, + "blocks.7.ln.weight": 11.965360641479492, + "blocks.7.w1.weight": 335.1786572574944, + "blocks.7.w1.bias": 321.09352323787965, + "blocks.7.w2.weight": 471.5612368801969, + "out_ln.weight": 0.7452844381332397, + "out_head.weight": 8.201945333242003, + "out_head.bias": 0.3773728853334405 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.054669055023193, + 1.991719011001587, + 1.9774182040405273, + 1.9712983444976806, + 1.9631762102508545, + 1.959303684387207, + 1.9460533142852783, + 1.9368816702270508, + 1.9278904293060304, + 1.9154337232208252, + 1.912510458755493, + 1.908025641517639, + 1.8988712897491455, + 1.8962811010742187, + 1.8920437637329102, + 1.8893064358520508, + 1.8833487548446655, + 1.882022890625, + 1.8800041366958617, + 1.8740528832626342, + 1.8704966924285888, + 1.8655656155776978, + 1.860756079750061, + 1.8544422786712647, + 1.851064347305298, + 1.8484665994262695, + 1.841072390975952, + 1.8392392129898072, + 1.8378569499969482, + 1.8346193454742432, + 1.82977863861084, + 1.8301198516082764, + 1.8263721365356445, + 1.8249066296768188, + 1.8208455069732665, + 1.8226783791351318, + 1.8220160707855224, + 1.8191678134155274, + 1.81650698387146, + 1.8159678020858765, + 1.810044068222046, + 1.810330848388672, + 1.8103008676910401, + 1.8088883145523071, + 1.8075827156829833, + 1.806718647994995, + 1.8019089365005494, + 1.8028275075531006, + 1.8002472192764283, + 1.8013759677886962, + 1.7999095865631103, + 1.7962470398712158, + 1.7984889847564698, + 1.7963330797958375, + 1.7923067237091064, + 1.793927209777832, + 1.7897032806015014, + 1.7885324808120728, + 1.7894811852645873, + 1.7884446933364868, + 1.7875976447296142, + 1.78652889251709, + 1.7869972435760497, + 1.7855236776351928, + 1.7861184851074219, + 1.781340026550293, + 1.7873938134002685, + 1.7781473917388917, + 1.7804266613006592, + 1.781290369796753, + 1.7821873314666747, + 1.7779191442489624, + 1.780537197303772, + 1.772837181777954, + 1.7783816800689698, + 1.7780200393676757, + 1.7768787881851196, + 1.7751888537216187, + 1.7765182915496827, + 1.774619035987854, + 1.7722581442260743, + 1.7778081979751588, + 1.7721048584365844, + 1.773922477798462, + 1.7717991860198974, + 1.7731413928604125, + 1.7743992209243775, + 1.7737348601913452, + 1.7736994393920897, + 1.7697040644073487, + 1.77587783908844, + 1.770644986305237, + 1.7736803650283814, + 1.7717780599975586, + 1.773167521095276, + 1.7733910553359986, + 1.7736449721527099, + 1.772689087753296, + 1.7709570954132081, + 1.7717201139068603 + ], + "train_acc": [ + 0.2363, + 0.25884, + 0.26796, + 0.2734, + 0.27986, + 0.28188, + 0.2881, + 0.2929, + 0.30118, + 0.3028, + 0.30632, + 0.3073, + 0.31098, + 0.31382, + 0.3148, + 0.31472, + 0.31668, + 0.32106, + 0.32314, + 0.32434, + 0.32476, + 0.32848, + 0.33132, + 0.33338, + 0.33446, + 0.33834, + 0.33702, + 0.33846, + 0.34004, + 0.34296, + 0.34586, + 0.34382, + 0.34452, + 0.34658, + 0.3491, + 0.348, + 0.34758, + 0.35232, + 0.34918, + 0.34832, + 0.35334, + 0.35362, + 0.35472, + 0.35388, + 0.35442, + 0.35508, + 0.35912, + 0.35806, + 0.3582, + 0.3571, + 0.35762, + 0.35974, + 0.35776, + 0.35856, + 0.36102, + 0.36058, + 0.35984, + 0.3606, + 0.36244, + 0.36282, + 0.35994, + 0.36342, + 0.3626, + 0.36326, + 0.36462, + 0.3634, + 0.36056, + 0.36662, + 0.3661, + 0.3627, + 0.36522, + 0.36638, + 0.36216, + 0.36748, + 0.3647, + 0.36344, + 0.36558, + 0.36694, + 0.36642, + 0.36922, + 0.36934, + 0.3653, + 0.36824, + 0.36604, + 0.36928, + 0.3695, + 0.36792, + 0.36662, + 0.3691, + 0.37096, + 0.3664, + 0.36928, + 0.36774, + 0.36802, + 0.36876, + 0.37062, + 0.36878, + 0.36726, + 0.36896, + 0.3694 + ], + "test_acc": [ + 0.2656, + 0.267, + 0.2815, + 0.2894, + 0.2983, + 0.305, + 0.3102, + 0.3197, + 0.3109, + 0.3358, + 0.3417, + 0.3398, + 0.3428, + 0.3516, + 0.3564, + 0.3525, + 0.36, + 0.3373, + 0.3487, + 0.3577, + 0.3604, + 0.3636, + 0.3618, + 0.3746, + 0.3691, + 0.3657, + 0.3717, + 0.376, + 0.3682, + 0.3703, + 0.3784, + 0.3781, + 0.3758, + 0.3784, + 0.3811, + 0.3762, + 0.3767, + 0.3788, + 0.3817, + 0.3799, + 0.3798, + 0.3727, + 0.3786, + 0.3853, + 0.3824, + 0.3821, + 0.3829, + 0.3834, + 0.3843, + 0.3837, + 0.3867, + 0.3812, + 0.3878, + 0.3848, + 0.3881, + 0.385, + 0.3903, + 0.3885, + 0.384, + 0.3856, + 0.3901, + 0.3889, + 0.3863, + 0.3912, + 0.3866, + 0.3914, + 0.3872, + 0.3883, + 0.3914, + 0.3871, + 0.3877, + 0.39, + 0.3872, + 0.3867, + 0.3929, + 0.3915, + 0.3904, + 0.3913, + 0.3909, + 0.3953, + 0.3943, + 0.3937, + 0.3928, + 0.391, + 0.3907, + 0.3928, + 0.3939, + 0.3922, + 0.3922, + 0.3933, + 0.3934, + 0.3924, + 0.3934, + 0.3921, + 0.3934, + 0.3923, + 0.3926, + 0.3927, + 0.3927, + 0.3926 + ] + }, + "diagnostics": { + "bp_cosine": [ + -0.0004061758518218994, + 0.059665147215127945, + -0.00023904931731522083, + 0.04721151292324066, + -0.0009337564697489142, + -0.028840316459536552, + -0.06716784834861755, + 0.9960443377494812 + ], + "perturbation_rho": [ + 0.010079916566610336, + -0.012312020175158978, + 0.0198403000831604, + 0.01036103069782257, + -0.03565298020839691, + 0.012246890924870968, + 0.005117389373481274, + -0.0020000701770186424 + ], + "nudging": { + "0.001": [ + 3.466848284006119e-07, + -1.7741695046424866e-07, + 2.514570951461792e-08, + -2.8405338525772095e-08, + 1.83936208486557e-08, + -2.3283064365386963e-08, + 6.658956408500671e-08, + -1.2598466128110886e-06 + ], + "0.003": [ + 7.851049304008484e-07, + -5.613546818494797e-07, + 2.7706846594810486e-08, + -2.3329630494117737e-07, + -2.3283064365386963e-09, + 5.634501576423645e-08, + 3.5390257835388184e-07, + -4.384666681289673e-06 + ], + "0.01": [ + 2.975575625896454e-06, + -2.0687002688646317e-06, + -4.6100467443466187e-08, + -7.050111889839172e-07, + -2.468004822731018e-08, + 3.227032721042633e-07, + 9.997747838497162e-07, + -1.5316996723413467e-05 + ] + }, + "hidden_norms_per_layer": [ + 7347.82373046875, + 68318.359375, + 457658.875, + 591822.25, + 709413.875, + 821043.4375, + 991908.875, + 1041150.375, + 579088.875 + ], + "bp_grad_norms_per_layer": [ + 1.785214408300817e-05, + 1.5171449376794044e-06, + 8.703580078872619e-07, + 8.675402227709128e-07, + 8.419559094363649e-07, + 8.420378776463622e-07, + 8.421089319199382e-07, + 8.421496886512614e-07, + 8.267679163509456e-07 + ] + }, + "drift": { + "embed.weight": 70.02122616764989, + "embed.bias": 26.83916405091808, + "blocks.0.ln.weight": 1.552919864654541, + "blocks.0.w1.weight": 18.50821863761758, + "blocks.0.w1.bias": 13.116624873818962, + "blocks.0.w2.weight": 79.63032020884965, + "blocks.1.ln.weight": 1.213975429534912, + "blocks.1.w1.weight": 22.51105074783387, + "blocks.1.w1.bias": 18.50130957549983, + "blocks.1.w2.weight": 54.64548642342237, + "blocks.2.ln.weight": 0.9077885150909424, + "blocks.2.w1.weight": 19.292010431221883, + "blocks.2.w1.bias": 19.155835150578653, + "blocks.2.w2.weight": 42.4050815881541, + "blocks.3.ln.weight": 1.022047758102417, + "blocks.3.w1.weight": 21.369964293701976, + "blocks.3.w1.bias": 20.66189871439757, + "blocks.3.w2.weight": 58.43715847809599, + "blocks.4.ln.weight": 1.0435632467269897, + "blocks.4.w1.weight": 22.336963383887138, + "blocks.4.w1.bias": 21.287544536829213, + "blocks.4.w2.weight": 50.161958879048186, + "blocks.5.ln.weight": 0.9771403670310974, + "blocks.5.w1.weight": 24.321956602631435, + "blocks.5.w1.bias": 24.827000479032748, + "blocks.5.w2.weight": 55.476803346488026, + "blocks.6.ln.weight": 1.0375924110412598, + "blocks.6.w1.weight": 22.401649066561536, + "blocks.6.w1.bias": 23.5963789264557, + "blocks.6.w2.weight": 53.39931725577594, + "blocks.7.ln.weight": 1.1293227672576904, + "blocks.7.w1.weight": 24.029515618281994, + "blocks.7.w1.bias": 23.963806041807718, + "blocks.7.w2.weight": 67.90721756504135, + "out_ln.weight": 0.33976390957832336, + "out_head.weight": 5.645579973389043, + "out_head.bias": 1.2526826889976983 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 256, + "num_blocks": 8, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 0 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d256_L8_seed0", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file |
