{ "0": { "dfa": { "log": { "train_loss": [ 2.088111647720337, 2.0657437747192384, 2.060557443847656, 2.054936630554199, 2.0502566048431397, 2.050682597808838, 2.0456129079437257, 2.0430596421813965, 2.0408702920913697, 2.0377991006469727, 2.0389654118347167, 2.0398873892593383, 2.0346363846588136, 2.033068787918091, 2.034021237792969, 2.0311379262542726, 2.027860624542236, 2.028515229873657, 2.0275459106063844, 2.0274439572143557, 2.027140206680298, 2.0251305793762207, 2.0244256614685057, 2.0226621907043456, 2.024185022583008, 2.0238217923736572, 2.0209359241485596, 2.021500840301514, 2.02022657661438, 2.0203428337097167, 2.019736885719299, 2.0196730166625976, 2.019779687271118, 2.0194025174331665, 2.018459284744263, 2.0198610536193846, 2.0196038846588134, 2.0184577390289307, 2.0181497329711915, 2.0173270440292357, 2.015399761352539, 2.017926648864746, 2.0153606281280516, 2.0176641104507445, 2.0169564332580565, 2.0160911991119383, 2.013510660934448, 2.0139562338256836, 2.0157755657196046, 2.016321997756958, 2.013838740501404, 2.0138234704589846, 2.014709682159424, 2.0130411010742186, 2.013964093017578, 2.014050467605591, 2.010925949783325, 2.0116148568344117, 2.01314746383667, 2.0142779718780517, 2.0111367880249023, 2.0113327654266357, 2.012854333572388, 2.0125641105651857, 2.0123231687927245, 2.0098405307006835, 2.014055180015564, 2.0095224347686766, 2.0092305796813963, 2.0110340044403077, 2.011887366104126, 2.0092078099823, 2.0123240684509276, 2.0100160660171507, 2.0095057219696044, 2.0116754064559936, 2.010024980697632, 2.0104703690338135, 2.010904312362671, 2.009006942138672, 2.00869906539917, 2.011290683441162, 2.007994732589722, 2.010743540802002, 2.0077583860778807, 2.010294546661377, 2.008463388710022, 2.009644501991272, 2.008040932922363, 2.006879263572693, 2.0084022887420656, 2.0101632648468017, 2.0085393448638915, 2.0086699520492552, 2.0095553454971316, 2.0090075594329835, 2.0092781968688964, 2.0092810621643067, 2.008181187438965, 2.008701148452759 ], "train_acc": [ 0.23208, 0.23714, 0.23872, 0.24144, 0.24046, 0.24026, 0.24398, 0.24838, 0.25002, 0.25004, 0.24848, 0.25134, 0.25284, 0.25182, 0.25088, 0.25354, 0.2558, 0.25726, 0.25768, 0.2579, 0.25742, 0.25918, 0.26104, 0.26256, 0.26114, 0.26408, 0.26146, 0.26318, 0.26164, 0.26194, 0.2647, 0.26364, 0.26356, 0.26318, 0.26682, 0.26424, 0.26142, 0.26382, 0.26662, 0.26438, 0.26712, 0.26532, 0.26594, 0.26736, 0.2675, 0.26522, 0.2692, 0.269, 0.26666, 0.26752, 0.2675, 0.26926, 0.26798, 0.27058, 0.26658, 0.26826, 0.26928, 0.26994, 0.2699, 0.26602, 0.26866, 0.26892, 0.2706, 0.27134, 0.2703, 0.27044, 0.27026, 0.26908, 0.27206, 0.27296, 0.26956, 0.27134, 0.27088, 0.27122, 0.27302, 0.26934, 0.27088, 0.27084, 0.26988, 0.27138, 0.27164, 0.27134, 0.27262, 0.27112, 0.2701, 0.27036, 0.27144, 0.27058, 0.27068, 0.27358, 0.27096, 0.2722, 0.27196, 0.27312, 0.27102, 0.2715, 0.2707, 0.27166, 0.27168, 0.27296 ], "test_acc": [ 0.2527, 0.2369, 0.2462, 0.2468, 0.2408, 0.2746, 0.2703, 0.2742, 0.2599, 0.2725, 0.2825, 0.2652, 0.274, 0.2721, 0.2844, 0.284, 0.2791, 0.2452, 0.2835, 0.2744, 0.2688, 0.2771, 0.2812, 0.2906, 0.2721, 0.2732, 0.2831, 0.2869, 0.2833, 0.2796, 0.2921, 0.2921, 0.2884, 0.2765, 0.2871, 0.2752, 0.2937, 0.2931, 0.2944, 0.2846, 0.2897, 0.279, 0.2762, 0.2923, 0.2886, 0.2881, 0.2945, 0.2907, 0.2894, 0.2989, 0.2886, 0.2845, 0.2971, 0.2895, 0.2979, 0.2831, 0.2946, 0.2923, 0.2835, 0.2971, 0.2911, 0.2881, 0.2926, 0.2971, 0.2955, 0.29, 0.2908, 0.2875, 0.2985, 0.2949, 0.2881, 0.2949, 0.2848, 0.2922, 0.2967, 0.2925, 0.2881, 0.2987, 0.2907, 0.2969, 0.294, 0.2943, 0.2935, 0.2909, 0.2896, 0.295, 0.2953, 0.2912, 0.2923, 0.2936, 0.2948, 0.2928, 0.2943, 0.2948, 0.2929, 0.2931, 0.293, 0.293, 0.293, 0.2932 ] }, "diagnostics": { "bp_cosine": [ 0.3739473223686218, 0.0001710604556137696, -0.00010273385123582557, 0.0015739527298137546, -0.0010641771368682384, -0.00083403434837237, -0.0002269457036163658, 0.00010728999768616632 ], "perturbation_rho": [ -0.012351546436548233, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], "nudging": { "0.001": [ -3.0873343348503113e-07, 0.0, 0.0, 0.0, 0.0, 0.0, 9.313225746154785e-10, 0.0 ], "0.003": [ -8.507631719112396e-07, 0.0, 0.0, 0.0, 0.0, 0.0, 2.7939677238464355e-09, 0.0 ], "0.01": [ -2.818182110786438e-06, 1.862645149230957e-09, 0.0, -5.587935447692871e-09, 0.0, -5.587935447692871e-09, 2.7939677238464355e-09, 0.0 ] }, "hidden_norms_per_layer": [ 39928.015625, 244928880.0, 1463614080.0, 1492651648.0, 2068440832.0, 2077208832.0, 2699950336.0, 3167998976.0, 3559058432.0 ], "bp_grad_norms_per_layer": [ 2.7729842599910626e-07, 3.92615467914581e-10, 3.8261058210586896e-10, 3.827221872754194e-10, 3.839246975889665e-10, 3.838843132264458e-10, 3.8309594385665946e-10, 3.836255757505569e-10, 3.848031893127768e-10 ] }, "drift": { "embed.weight": 355.03769309377134, "embed.bias": 275.50497763057285, "blocks.0.ln.weight": 11.560415267944336, "blocks.0.w1.weight": 217.0016785191733, "blocks.0.w1.bias": 194.48945621979058, "blocks.0.w2.weight": 512.6024444572738, "blocks.1.ln.weight": 11.132081031799316, "blocks.1.w1.weight": 325.51184428406805, "blocks.1.w1.bias": 320.01996630850886, "blocks.1.w2.weight": 501.7286978738936, "blocks.2.ln.weight": 8.083645820617676, "blocks.2.w1.weight": 207.09861044191072, "blocks.2.w1.bias": 194.5111269286842, "blocks.2.w2.weight": 289.82283688588427, "blocks.3.ln.weight": 11.859946250915527, "blocks.3.w1.weight": 335.0771544598146, "blocks.3.w1.bias": 320.92428204403967, "blocks.3.w2.weight": 448.5070845635642, "blocks.4.ln.weight": 7.86250114440918, "blocks.4.w1.weight": 209.85336811707023, "blocks.4.w1.bias": 197.98745993011175, "blocks.4.w2.weight": 286.43335604265127, "blocks.5.ln.weight": 11.988594055175781, "blocks.5.w1.weight": 340.3597967146831, "blocks.5.w1.bias": 320.4706779192804, "blocks.5.w2.weight": 499.7442316358972, "blocks.6.ln.weight": 13.202707290649414, "blocks.6.w1.weight": 376.2778384149751, "blocks.6.w1.bias": 364.2875039001891, "blocks.6.w2.weight": 540.0867846092441, "blocks.7.ln.weight": 11.965360641479492, "blocks.7.w1.weight": 335.1786572574944, "blocks.7.w1.bias": 321.09352323787965, "blocks.7.w2.weight": 471.5612368801969, "out_ln.weight": 0.7452844381332397, "out_head.weight": 8.201945333242003, "out_head.bias": 0.3773728853334405 } }, "fa": { "log": { "train_loss": [ 2.054669055023193, 1.991719011001587, 1.9774182040405273, 1.9712983444976806, 1.9631762102508545, 1.959303684387207, 1.9460533142852783, 1.9368816702270508, 1.9278904293060304, 1.9154337232208252, 1.912510458755493, 1.908025641517639, 1.8988712897491455, 1.8962811010742187, 1.8920437637329102, 1.8893064358520508, 1.8833487548446655, 1.882022890625, 1.8800041366958617, 1.8740528832626342, 1.8704966924285888, 1.8655656155776978, 1.860756079750061, 1.8544422786712647, 1.851064347305298, 1.8484665994262695, 1.841072390975952, 1.8392392129898072, 1.8378569499969482, 1.8346193454742432, 1.82977863861084, 1.8301198516082764, 1.8263721365356445, 1.8249066296768188, 1.8208455069732665, 1.8226783791351318, 1.8220160707855224, 1.8191678134155274, 1.81650698387146, 1.8159678020858765, 1.810044068222046, 1.810330848388672, 1.8103008676910401, 1.8088883145523071, 1.8075827156829833, 1.806718647994995, 1.8019089365005494, 1.8028275075531006, 1.8002472192764283, 1.8013759677886962, 1.7999095865631103, 1.7962470398712158, 1.7984889847564698, 1.7963330797958375, 1.7923067237091064, 1.793927209777832, 1.7897032806015014, 1.7885324808120728, 1.7894811852645873, 1.7884446933364868, 1.7875976447296142, 1.78652889251709, 1.7869972435760497, 1.7855236776351928, 1.7861184851074219, 1.781340026550293, 1.7873938134002685, 1.7781473917388917, 1.7804266613006592, 1.781290369796753, 1.7821873314666747, 1.7779191442489624, 1.780537197303772, 1.772837181777954, 1.7783816800689698, 1.7780200393676757, 1.7768787881851196, 1.7751888537216187, 1.7765182915496827, 1.774619035987854, 1.7722581442260743, 1.7778081979751588, 1.7721048584365844, 1.773922477798462, 1.7717991860198974, 1.7731413928604125, 1.7743992209243775, 1.7737348601913452, 1.7736994393920897, 1.7697040644073487, 1.77587783908844, 1.770644986305237, 1.7736803650283814, 1.7717780599975586, 1.773167521095276, 1.7733910553359986, 1.7736449721527099, 1.772689087753296, 1.7709570954132081, 1.7717201139068603 ], "train_acc": [ 0.2363, 0.25884, 0.26796, 0.2734, 0.27986, 0.28188, 0.2881, 0.2929, 0.30118, 0.3028, 0.30632, 0.3073, 0.31098, 0.31382, 0.3148, 0.31472, 0.31668, 0.32106, 0.32314, 0.32434, 0.32476, 0.32848, 0.33132, 0.33338, 0.33446, 0.33834, 0.33702, 0.33846, 0.34004, 0.34296, 0.34586, 0.34382, 0.34452, 0.34658, 0.3491, 0.348, 0.34758, 0.35232, 0.34918, 0.34832, 0.35334, 0.35362, 0.35472, 0.35388, 0.35442, 0.35508, 0.35912, 0.35806, 0.3582, 0.3571, 0.35762, 0.35974, 0.35776, 0.35856, 0.36102, 0.36058, 0.35984, 0.3606, 0.36244, 0.36282, 0.35994, 0.36342, 0.3626, 0.36326, 0.36462, 0.3634, 0.36056, 0.36662, 0.3661, 0.3627, 0.36522, 0.36638, 0.36216, 0.36748, 0.3647, 0.36344, 0.36558, 0.36694, 0.36642, 0.36922, 0.36934, 0.3653, 0.36824, 0.36604, 0.36928, 0.3695, 0.36792, 0.36662, 0.3691, 0.37096, 0.3664, 0.36928, 0.36774, 0.36802, 0.36876, 0.37062, 0.36878, 0.36726, 0.36896, 0.3694 ], "test_acc": [ 0.2656, 0.267, 0.2815, 0.2894, 0.2983, 0.305, 0.3102, 0.3197, 0.3109, 0.3358, 0.3417, 0.3398, 0.3428, 0.3516, 0.3564, 0.3525, 0.36, 0.3373, 0.3487, 0.3577, 0.3604, 0.3636, 0.3618, 0.3746, 0.3691, 0.3657, 0.3717, 0.376, 0.3682, 0.3703, 0.3784, 0.3781, 0.3758, 0.3784, 0.3811, 0.3762, 0.3767, 0.3788, 0.3817, 0.3799, 0.3798, 0.3727, 0.3786, 0.3853, 0.3824, 0.3821, 0.3829, 0.3834, 0.3843, 0.3837, 0.3867, 0.3812, 0.3878, 0.3848, 0.3881, 0.385, 0.3903, 0.3885, 0.384, 0.3856, 0.3901, 0.3889, 0.3863, 0.3912, 0.3866, 0.3914, 0.3872, 0.3883, 0.3914, 0.3871, 0.3877, 0.39, 0.3872, 0.3867, 0.3929, 0.3915, 0.3904, 0.3913, 0.3909, 0.3953, 0.3943, 0.3937, 0.3928, 0.391, 0.3907, 0.3928, 0.3939, 0.3922, 0.3922, 0.3933, 0.3934, 0.3924, 0.3934, 0.3921, 0.3934, 0.3923, 0.3926, 0.3927, 0.3927, 0.3926 ] }, "diagnostics": { "bp_cosine": [ -0.0004061758518218994, 0.059665147215127945, -0.00023904931731522083, 0.04721151292324066, -0.0009337564697489142, -0.028840316459536552, -0.06716784834861755, 0.9960443377494812 ], "perturbation_rho": [ 0.010079916566610336, -0.012312020175158978, 0.0198403000831604, 0.01036103069782257, -0.03565298020839691, 0.012246890924870968, 0.005117389373481274, -0.0020000701770186424 ], "nudging": { "0.001": [ 3.466848284006119e-07, -1.7741695046424866e-07, 2.514570951461792e-08, -2.8405338525772095e-08, 1.83936208486557e-08, -2.3283064365386963e-08, 6.658956408500671e-08, -1.2598466128110886e-06 ], "0.003": [ 7.851049304008484e-07, -5.613546818494797e-07, 2.7706846594810486e-08, -2.3329630494117737e-07, -2.3283064365386963e-09, 5.634501576423645e-08, 3.5390257835388184e-07, -4.384666681289673e-06 ], "0.01": [ 2.975575625896454e-06, -2.0687002688646317e-06, -4.6100467443466187e-08, -7.050111889839172e-07, -2.468004822731018e-08, 3.227032721042633e-07, 9.997747838497162e-07, -1.5316996723413467e-05 ] }, "hidden_norms_per_layer": [ 7347.82373046875, 68318.359375, 457658.875, 591822.25, 709413.875, 821043.4375, 991908.875, 1041150.375, 579088.875 ], "bp_grad_norms_per_layer": [ 1.785214408300817e-05, 1.5171449376794044e-06, 8.703580078872619e-07, 8.675402227709128e-07, 8.419559094363649e-07, 8.420378776463622e-07, 8.421089319199382e-07, 8.421496886512614e-07, 8.267679163509456e-07 ] }, "drift": { "embed.weight": 70.02122616764989, "embed.bias": 26.83916405091808, "blocks.0.ln.weight": 1.552919864654541, "blocks.0.w1.weight": 18.50821863761758, "blocks.0.w1.bias": 13.116624873818962, "blocks.0.w2.weight": 79.63032020884965, "blocks.1.ln.weight": 1.213975429534912, "blocks.1.w1.weight": 22.51105074783387, "blocks.1.w1.bias": 18.50130957549983, "blocks.1.w2.weight": 54.64548642342237, "blocks.2.ln.weight": 0.9077885150909424, "blocks.2.w1.weight": 19.292010431221883, "blocks.2.w1.bias": 19.155835150578653, "blocks.2.w2.weight": 42.4050815881541, "blocks.3.ln.weight": 1.022047758102417, "blocks.3.w1.weight": 21.369964293701976, "blocks.3.w1.bias": 20.66189871439757, "blocks.3.w2.weight": 58.43715847809599, "blocks.4.ln.weight": 1.0435632467269897, "blocks.4.w1.weight": 22.336963383887138, "blocks.4.w1.bias": 21.287544536829213, "blocks.4.w2.weight": 50.161958879048186, "blocks.5.ln.weight": 0.9771403670310974, "blocks.5.w1.weight": 24.321956602631435, "blocks.5.w1.bias": 24.827000479032748, "blocks.5.w2.weight": 55.476803346488026, "blocks.6.ln.weight": 1.0375924110412598, "blocks.6.w1.weight": 22.401649066561536, "blocks.6.w1.bias": 23.5963789264557, "blocks.6.w2.weight": 53.39931725577594, "blocks.7.ln.weight": 1.1293227672576904, "blocks.7.w1.weight": 24.029515618281994, "blocks.7.w1.bias": 23.963806041807718, "blocks.7.w2.weight": 67.90721756504135, "out_ln.weight": 0.33976390957832336, "out_head.weight": 5.645579973389043, "out_head.bias": 1.2526826889976983 } } }, "config": { "dataset": "cifar10", "d_hidden": 256, "num_blocks": 8, "batch_size": 128, "epochs": 100, "lr": 0.001, "lr_fb": 0.001, "wd": 0.01, "lam": 0.1, "K": 4, "sigma_bridge": 0.05, "ema_momentum": 0.995, "term_grad_weight": 1.0, "seeds": [ 0 ], "gpu": 0, "output_dir": "results/fa_dfa_d256_L8_seed0", "methods": [ "fa", "dfa" ], "random_targets": false, "penalty_lam": 0.0, "num_classes": 10 } }