From aa73718eb6427d7da3b9cb416275802d90c4b2ed Mon Sep 17 00:00:00 2001 From: YurenHao0426 Date: Sun, 14 Jun 2026 04:06:32 -0500 Subject: Add new experiment scripts, figures, and paper assets; untrack pyc/build artifacts Co-Authored-By: Claude Opus 4.8 (1M context) --- .../cifar_depth_scan_multiseed/d512_L4_s123.json | 838 +++++++++++++++++++++ .../cifar_depth_scan_multiseed/d512_L4_s456.json | 838 +++++++++++++++++++++ results/cifar_depth_scan_multiseed/summary.json | 78 ++ 3 files changed, 1754 insertions(+) create mode 100644 results/cifar_depth_scan_multiseed/d512_L4_s123.json create mode 100644 results/cifar_depth_scan_multiseed/d512_L4_s456.json create mode 100644 results/cifar_depth_scan_multiseed/summary.json (limited to 'results/cifar_depth_scan_multiseed') diff --git a/results/cifar_depth_scan_multiseed/d512_L4_s123.json b/results/cifar_depth_scan_multiseed/d512_L4_s123.json new file mode 100644 index 0000000..65a54a8 --- /dev/null +++ b/results/cifar_depth_scan_multiseed/d512_L4_s123.json @@ -0,0 +1,838 @@ +{ + "dfa": { + "log": { + "train_loss": [ + 2.0624400663757325, + 2.0397126093292237, + 2.030224827308655, + 2.019537830963135, + 2.0098417961883546, + 2.0099143604278566, + 2.001345584373474, + 1.997260925064087, + 1.9985963092803956, + 1.993651236228943, + 1.9917502028656007, + 1.9918365036010741, + 1.9975997705841064, + 1.9904954579925538, + 1.9898213328552246, + 1.9904397552490234, + 1.9868707651519775, + 1.9902398801422119, + 1.9854657730484009, + 1.9914187986755372, + 1.985543049468994, + 1.988907001800537, + 1.9854262001800538, + 1.985992236480713, + 1.986677961502075, + 1.9851684880065918, + 1.9878835204315186, + 1.9870538790893555, + 1.984294542236328, + 1.9862867044067383, + 1.9819232828140259, + 1.98281076171875, + 1.9850609212493897, + 1.9814563494873048, + 1.9814497323989868, + 1.985917557067871, + 1.9854490676498413, + 1.9831627449798583, + 1.9817213195037842, + 1.9821227896118163, + 1.983226878051758, + 1.9835711795043944, + 1.9833458575057983, + 1.9817056420516967, + 1.9815308977890014, + 1.9825032497406005, + 1.9806674496459962, + 1.9796993829345704, + 1.9813827806091309, + 1.9796657073974608, + 1.9811469369506836, + 1.9779993580627442, + 1.9808807947540283, + 1.9770961724853515, + 1.9780911437225341, + 1.977968225440979, + 1.9786042345428467, + 1.9761781659698487, + 1.9768042601013183, + 1.9778708429718017, + 1.9789468997192383, + 1.977910756225586, + 1.9747010653686523, + 1.9750921910858155, + 1.978596909751892, + 1.9746475157928467, + 1.9765909009552003, + 1.9770333652114869, + 1.973500870285034, + 1.9746166478729248, + 1.9764599881362914, + 1.9757590785217285, + 1.9747783449554444, + 1.9717445582580566, + 1.9745237543487548, + 1.9730215407562255, + 1.973571043701172, + 1.9736142820739746, + 1.9728294008636476, + 1.974929390296936, + 1.9747015952301026, + 1.9722450610733033, + 1.9724271701049805, + 1.9719529797363282, + 1.9742696553039552, + 1.9725008160018922, + 1.9723811784362792, + 1.9741300776672364, + 1.973212038269043, + 1.971276873435974, + 1.9703804751205445, + 1.970821933631897, + 1.9726865364074706, + 1.9739636318588256, + 1.9740656425476075, + 1.9727160183334351, + 1.9740542842102051, + 1.971614676208496, + 1.971007470932007, + 1.9721037426757813 + ], + "train_acc": [ + 0.23992, + 0.24876, + 0.25324, + 0.25864, + 0.26564, + 0.2615, + 0.26848, + 0.27098, + 0.26694, + 0.26916, + 0.27382, + 0.27204, + 0.27188, + 0.2717, + 0.2728, + 0.27254, + 0.27468, + 0.27354, + 0.27584, + 0.27626, + 0.27632, + 0.2744, + 0.27728, + 0.2785, + 0.27958, + 0.27762, + 0.27576, + 0.27726, + 0.27874, + 0.27928, + 0.28054, + 0.27888, + 0.27832, + 0.27914, + 0.27884, + 0.27718, + 0.27824, + 0.2804, + 0.27976, + 0.27946, + 0.28172, + 0.2795, + 0.28108, + 0.28218, + 0.27982, + 0.27774, + 0.28186, + 0.28412, + 0.28204, + 0.2815, + 0.28198, + 0.28554, + 0.2811, + 0.28404, + 0.28402, + 0.28164, + 0.28256, + 0.2828, + 0.28234, + 0.28364, + 0.28262, + 0.28518, + 0.28496, + 0.28426, + 0.28172, + 0.28612, + 0.28492, + 0.28286, + 0.28482, + 0.28532, + 0.28536, + 0.28336, + 0.28372, + 0.28758, + 0.28676, + 0.28552, + 0.28688, + 0.28368, + 0.28704, + 0.28502, + 0.28718, + 0.2875, + 0.28606, + 0.2879, + 0.28476, + 0.28654, + 0.28584, + 0.28448, + 0.2866, + 0.285, + 0.28856, + 0.28764, + 0.28518, + 0.28624, + 0.28576, + 0.28584, + 0.28608, + 0.28572, + 0.28552, + 0.28808 + ], + "test_acc": [ + 0.278, + 0.2691, + 0.2704, + 0.2679, + 0.2967, + 0.277, + 0.2844, + 0.2727, + 0.2956, + 0.2896, + 0.2868, + 0.3071, + 0.2921, + 0.2959, + 0.2702, + 0.2866, + 0.2803, + 0.3031, + 0.2824, + 0.2907, + 0.3033, + 0.2871, + 0.299, + 0.2951, + 0.2985, + 0.286, + 0.2974, + 0.2932, + 0.2974, + 0.3103, + 0.289, + 0.3118, + 0.3113, + 0.3008, + 0.2943, + 0.2927, + 0.3043, + 0.3027, + 0.303, + 0.2838, + 0.291, + 0.3025, + 0.3031, + 0.3022, + 0.3058, + 0.3064, + 0.3029, + 0.2923, + 0.2905, + 0.3044, + 0.3046, + 0.3085, + 0.3098, + 0.3073, + 0.2895, + 0.3096, + 0.3094, + 0.2988, + 0.2985, + 0.3078, + 0.3008, + 0.3075, + 0.3094, + 0.3081, + 0.305, + 0.2999, + 0.311, + 0.3039, + 0.305, + 0.3045, + 0.3106, + 0.3084, + 0.3096, + 0.3111, + 0.3133, + 0.3093, + 0.3035, + 0.3052, + 0.3052, + 0.3127, + 0.3085, + 0.3042, + 0.3093, + 0.3092, + 0.3111, + 0.3122, + 0.3093, + 0.3087, + 0.308, + 0.31, + 0.3088, + 0.3101, + 0.3083, + 0.3106, + 0.3082, + 0.3098, + 0.3092, + 0.309, + 0.3094, + 0.3094 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.4263063073158264, + 0.0001657301909290254, + 0.0015438150148838758, + -0.00019233977945987135 + ], + "perturbation_rho": [ + 0.009619573131203651, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -5.541369318962097e-07, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.4677643775939941e-06, + 1.862645149230957e-09, + 0.0, + 0.0 + ], + "0.01": [ + -4.9551017582416534e-06, + 5.587935447692871e-09, + 0.0, + -9.313225746154785e-10 + ] + } + }, + "drift": { + "embed.weight": 309.23071938279907, + "embed.bias": 237.22614756651578, + "blocks.0.ln.weight": 8.168762727821633, + "blocks.0.w1.weight": 195.29175323586153, + "blocks.0.w1.bias": 144.58261416253544, + "blocks.0.w2.weight": 309.9917799247131, + "blocks.1.ln.weight": 7.339870422040846, + "blocks.1.w1.weight": 295.9775177425126, + "blocks.1.w1.bias": 264.8927251390922, + "blocks.1.w2.weight": 245.06528152841986, + "blocks.2.ln.weight": 5.568128485175647, + "blocks.2.w1.weight": 190.7912001656625, + "blocks.2.w1.bias": 151.63622211328425, + "blocks.2.w2.weight": 148.49851386877077, + "blocks.3.ln.weight": 6.179513040361593, + "blocks.3.w1.weight": 205.73090318196944, + "blocks.3.w1.bias": 165.1061911212938, + "blocks.3.w2.weight": 158.15418176798514, + "out_ln.weight": 0.501473453849158, + "out_head.weight": 8.012264954942612, + "out_head.bias": 1.9789555595618953 + } + }, + "credit_bridge": { + "log": { + "train_loss": [ + 2.047448694076538, + 2.0313234769439696, + 2.020185037612915, + 2.010881477203369, + 2.0009254308319093, + 1.996725665550232, + 1.9918568433380126, + 1.9913989479064942, + 1.9935748266220092, + 1.9885124765396118, + 1.9861768546295167, + 1.9848651294326782, + 1.9840049396514892, + 1.983628706588745, + 1.9854573066711425, + 1.9872153559875487, + 1.9828620668029786, + 1.9831350064849853, + 1.9787096114730836, + 1.9825948291397095, + 1.982805096092224, + 1.9789331387710571, + 1.9828052577972413, + 1.9828253454589844, + 1.9806131177520752, + 1.9788864892578124, + 1.9819215051651, + 1.980471068572998, + 1.9807338523101807, + 1.9813471894073487, + 1.9765760861968995, + 1.9785934501647948, + 1.9759084671783447, + 1.9761677234649657, + 1.9766588953399657, + 1.9750333673095704, + 1.9727988452911378, + 1.976060122756958, + 1.9735999352264404, + 1.9706104679870606, + 1.9753494220733643, + 1.9759558032989502, + 1.9787567910766601, + 1.9837647161483765, + 1.9871420777893067, + 1.987115750579834, + 1.9864891412734986, + 1.9884751352310182, + 1.9899889309310914, + 1.9925686660385131, + 1.9907978774261474, + 1.9877149448013305, + 1.9817013835906983, + 1.980732074661255, + 1.9820370742034912, + 1.9818651236724854, + 1.9819183142089845, + 1.983378551673889, + 1.9774607040023804, + 1.9741114377593993, + 1.9718304233551025, + 1.9683720745849609, + 1.9687809296417236, + 1.9646731536102295, + 1.966298240814209, + 1.9667856047821044, + 1.9649899264526367, + 1.9666670780181885, + 1.967674755706787, + 1.9645581008148194, + 1.9643389911651612, + 1.9642895327377319, + 1.9622753365707397, + 1.9615250595855713, + 1.958518320388794, + 1.959536176071167, + 1.956633321876526, + 1.955765027809143, + 1.955853212966919, + 1.9559229345321656, + 1.9547552616882324, + 1.9537322317886352, + 1.955186512069702, + 1.9555984322357178, + 1.9526400441741942, + 1.9539651677703858, + 1.9541091006469726, + 1.9521852050018311, + 1.9506305475616454, + 1.9516619800567627, + 1.9523800668716431, + 1.9526127795410155, + 1.9500486698150634, + 1.9506466804504394, + 1.950422335205078, + 1.9492678187561034, + 1.951801809387207, + 1.9492061517333985, + 1.9487847713470459, + 1.9479388536834716 + ], + "train_acc": [ + 0.24982, + 0.25432, + 0.25988, + 0.26398, + 0.26826, + 0.27072, + 0.2736, + 0.27368, + 0.27026, + 0.2744, + 0.27562, + 0.2762, + 0.27898, + 0.27752, + 0.27714, + 0.27558, + 0.27794, + 0.27886, + 0.27944, + 0.27866, + 0.27782, + 0.2802, + 0.27774, + 0.2782, + 0.27834, + 0.28176, + 0.27964, + 0.28122, + 0.28302, + 0.2803, + 0.28168, + 0.28492, + 0.28662, + 0.28498, + 0.28398, + 0.28418, + 0.28436, + 0.28406, + 0.285, + 0.28506, + 0.2853, + 0.28632, + 0.2816, + 0.27674, + 0.27452, + 0.2762, + 0.27286, + 0.27104, + 0.26842, + 0.26674, + 0.2682, + 0.27086, + 0.2721, + 0.27334, + 0.26858, + 0.26754, + 0.2682, + 0.26736, + 0.27326, + 0.2725, + 0.27848, + 0.2799, + 0.2766, + 0.27852, + 0.27936, + 0.27878, + 0.27724, + 0.27918, + 0.27932, + 0.28118, + 0.28126, + 0.28392, + 0.2803, + 0.28266, + 0.286, + 0.28438, + 0.28684, + 0.28472, + 0.28482, + 0.28542, + 0.2862, + 0.2881, + 0.28612, + 0.2846, + 0.28796, + 0.28742, + 0.2881, + 0.28934, + 0.28792, + 0.2901, + 0.28916, + 0.28972, + 0.28884, + 0.28898, + 0.29152, + 0.2882, + 0.28858, + 0.28908, + 0.28988, + 0.2907 + ], + "test_acc": [ + 0.2708, + 0.2877, + 0.2844, + 0.3019, + 0.2856, + 0.2682, + 0.2969, + 0.2967, + 0.3046, + 0.2904, + 0.3, + 0.301, + 0.3127, + 0.3024, + 0.3016, + 0.2968, + 0.2932, + 0.2853, + 0.2994, + 0.3163, + 0.3043, + 0.3064, + 0.3019, + 0.3067, + 0.3108, + 0.2873, + 0.313, + 0.294, + 0.2992, + 0.2957, + 0.3138, + 0.2825, + 0.3008, + 0.313, + 0.2963, + 0.2933, + 0.2959, + 0.302, + 0.3025, + 0.2988, + 0.2862, + 0.2898, + 0.288, + 0.2864, + 0.2839, + 0.2921, + 0.2735, + 0.2758, + 0.2837, + 0.2809, + 0.2859, + 0.2985, + 0.2934, + 0.2953, + 0.2901, + 0.2936, + 0.2736, + 0.2867, + 0.2944, + 0.2772, + 0.2934, + 0.2907, + 0.2657, + 0.2904, + 0.2948, + 0.2986, + 0.295, + 0.2922, + 0.2922, + 0.2917, + 0.3014, + 0.2951, + 0.3037, + 0.2992, + 0.2986, + 0.3016, + 0.2992, + 0.2954, + 0.3074, + 0.3062, + 0.3064, + 0.3057, + 0.303, + 0.3132, + 0.3005, + 0.3005, + 0.3063, + 0.3046, + 0.3109, + 0.3151, + 0.3084, + 0.3104, + 0.3085, + 0.3095, + 0.3074, + 0.3073, + 0.3089, + 0.3099, + 0.3091, + 0.3091 + ], + "value_loss": [ + 0.5707502987289429, + 0.21804388338804245, + 0.15581646837711335, + 0.12560613882541657, + 0.09998264230251312, + 0.0933005502653122, + 0.07152107969522477, + 0.0807156308054924, + 0.0758870889377594, + 0.0725011654818058, + 0.06433650768041611, + 0.0583218602848053, + 0.05447098765850067, + 0.05356317266821861, + 0.051104292290210726, + 0.04830431792974472, + 0.04666164525568485, + 0.04492894763946533, + 0.03697128227233887, + 0.044636598999500274, + 0.038840959733724596, + 0.03668111073076725, + 0.04674931382536888, + 0.03399278607428074, + 0.03293712543725968, + 0.031108056619167327, + 0.0359199301469326, + 0.032191682063341144, + 0.030914719166755675, + 0.034918713810443876, + 0.027279326229095457, + 0.028772687133550644, + 0.026532491153478624, + 0.03022626201868057, + 0.02425762990474701, + 0.021495298286676408, + 0.02340421486377716, + 0.02494628700852394, + 0.02157571573317051, + 0.02458729513168335, + 0.030457366015315054, + 0.028464497868418693, + 0.03454268118262291, + 0.033964355611801146, + 0.03478494201660156, + 0.03390908203601837, + 0.03329608275532722, + 0.02784320188641548, + 0.02793454117655754, + 0.023065205497145653, + 0.029398028488755227, + 0.024307609224021434, + 0.021599815479516983, + 0.022070472690463066, + 0.022135936530828477, + 0.02179758728504181, + 0.022954062497019766, + 0.022859104943275452, + 0.024180895167589186, + 0.019018265507221223, + 0.01852666168630123, + 0.015004176079034806, + 0.017163365394473076, + 0.0168545845746994, + 0.01648236249387264, + 0.016145528084635735, + 0.015413791033029556, + 0.015835543455481528, + 0.013550898374915124, + 0.011481919240653515, + 0.011319914956986903, + 0.011722228462398052, + 0.010268688924312592, + 0.01009241207242012, + 0.008388303059637546, + 0.00844380562722683, + 0.006402937684953213, + 0.006658818064033985, + 0.005926967875659466, + 0.005063385500609875, + 0.0050483594045042995, + 0.004696262401491403, + 0.0043795296615362165, + 0.003987229224145412, + 0.0035997102437913416, + 0.0031672756604850293, + 0.002853435079678893, + 0.0022479154869914056, + 0.0024048341078311206, + 0.002326442019492388, + 0.0018791391399502755, + 0.0019763806109130383, + 0.001660491597019136, + 0.001620488502755761, + 0.0015598841778188945, + 0.0014834263022989035, + 0.0016181138510629534, + 0.001651853753849864, + 0.0013820433163642884, + 0.0014498794532939792 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.4222239851951599, + 0.024459093809127808, + 0.03995171934366226, + 0.04084722697734833 + ], + "perturbation_rho": [ + 0.0041291676461696625, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -9.620562195777893e-07, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -2.819579094648361e-06, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -9.313225746154785e-06, + 0.0, + 0.0, + 0.0 + ] + } + }, + "drift": { + "embed.weight": 169.77199532845293, + "embed.bias": 94.39731873768694, + "blocks.0.ln.weight": 7.473003215045953, + "blocks.0.w1.weight": 145.00989153500845, + "blocks.0.w1.bias": 73.71991188904822, + "blocks.0.w2.weight": 216.26672683471344, + "blocks.1.ln.weight": 5.6302704740332, + "blocks.1.w1.weight": 287.6493822307464, + "blocks.1.w1.bias": 275.5944088792273, + "blocks.1.w2.weight": 219.66609078475946, + "blocks.2.ln.weight": 5.395047896943873, + "blocks.2.w1.weight": 191.14272489250746, + "blocks.2.w1.bias": 161.75927742876627, + "blocks.2.w2.weight": 145.48860169566208, + "blocks.3.ln.weight": 5.19878957620753, + "blocks.3.w1.weight": 200.1583978400427, + "blocks.3.w1.bias": 170.38103589953525, + "blocks.3.w2.weight": 151.26043219994418, + "out_ln.weight": 0.33097813227586326, + "out_head.weight": 5.511603241177177, + "out_head.bias": 1.1054650448531782 + } + } +} \ No newline at end of file diff --git a/results/cifar_depth_scan_multiseed/d512_L4_s456.json b/results/cifar_depth_scan_multiseed/d512_L4_s456.json new file mode 100644 index 0000000..d65e1d7 --- /dev/null +++ b/results/cifar_depth_scan_multiseed/d512_L4_s456.json @@ -0,0 +1,838 @@ +{ + "dfa": { + "log": { + "train_loss": [ + 2.036063042526245, + 2.021697966270447, + 2.0175282815551756, + 2.0099474266815185, + 2.0023637646484373, + 1.9997982411193849, + 1.994500837135315, + 1.9948944988250732, + 1.9934745358657837, + 1.9940558145904541, + 1.989395669708252, + 1.9907533937835693, + 1.9909110592269899, + 1.9884686621856689, + 1.9896867315673827, + 1.9894394902038575, + 1.9898611724472046, + 1.9885188024139404, + 1.987230980796814, + 1.9882823168945312, + 1.989858097000122, + 1.9871241819763183, + 1.9898512621307374, + 1.9869138607788086, + 1.989206803970337, + 1.988134487876892, + 1.986097745704651, + 1.9872820987701416, + 1.9868804365539552, + 1.9866938884735108, + 1.985588796081543, + 1.986480817642212, + 1.9833618145370484, + 1.9839750741958617, + 1.9829646868133546, + 1.9850209189605712, + 1.9842300912857056, + 1.9849878060150146, + 1.9847687102508544, + 1.9829170959854125, + 1.984829327354431, + 1.9833095259094238, + 1.9831390253067016, + 1.9819875910186768, + 1.9806521664810182, + 1.9821762627410888, + 1.9833445807647705, + 1.9825785246276855, + 1.9821733226013183, + 1.9822086193847657, + 1.9808910798645019, + 1.9806908596801758, + 1.9812886644744874, + 1.9800793671417236, + 1.9819620014953614, + 1.9800292537689208, + 1.979716037750244, + 1.978658859024048, + 1.9786357926940918, + 1.9784277129745484, + 1.9790855308532715, + 1.9804114780426025, + 1.9788293676757813, + 1.9797553412628175, + 1.9789648551940917, + 1.9809454851913453, + 1.9784064822006227, + 1.9783699047851562, + 1.9753275060272217, + 1.980608378677368, + 1.9775778076553345, + 1.9774704048919678, + 1.9763223318481444, + 1.978011087913513, + 1.97850968624115, + 1.9783587040710449, + 1.9769906464385987, + 1.9791353679656982, + 1.97542318939209, + 1.9777735634613036, + 1.975693401260376, + 1.9773611065673828, + 1.9757630486679076, + 1.9765492203521728, + 1.975723260231018, + 1.9762512990951537, + 1.9754029594421387, + 1.9746607822418214, + 1.9762510384368897, + 1.9770903870391845, + 1.9748862505340576, + 1.976749499130249, + 1.976892409286499, + 1.9757273367309571, + 1.972305432357788, + 1.9770755380630494, + 1.9764292287445069, + 1.9747362934875488, + 1.975049077911377, + 1.976587531967163 + ], + "train_acc": [ + 0.25522, + 0.26076, + 0.26296, + 0.26702, + 0.2714, + 0.27334, + 0.27538, + 0.27482, + 0.27362, + 0.27556, + 0.2766, + 0.27682, + 0.27788, + 0.27572, + 0.2761, + 0.28036, + 0.2788, + 0.2774, + 0.27756, + 0.27672, + 0.2781, + 0.27916, + 0.27804, + 0.2809, + 0.27756, + 0.28, + 0.28036, + 0.2806, + 0.27974, + 0.27884, + 0.28096, + 0.28062, + 0.28074, + 0.2811, + 0.27964, + 0.28086, + 0.2816, + 0.27984, + 0.28018, + 0.28336, + 0.28218, + 0.28254, + 0.28162, + 0.2826, + 0.28312, + 0.2808, + 0.28174, + 0.28084, + 0.28134, + 0.28002, + 0.28486, + 0.28302, + 0.28208, + 0.28486, + 0.28308, + 0.2868, + 0.28404, + 0.28324, + 0.28522, + 0.28274, + 0.28296, + 0.2849, + 0.28348, + 0.28502, + 0.2858, + 0.28408, + 0.28474, + 0.28406, + 0.28494, + 0.28688, + 0.28452, + 0.2866, + 0.28644, + 0.28752, + 0.2848, + 0.2847, + 0.28528, + 0.28742, + 0.28474, + 0.28446, + 0.28554, + 0.28544, + 0.2885, + 0.28666, + 0.28764, + 0.28658, + 0.28774, + 0.2877, + 0.2864, + 0.28752, + 0.28678, + 0.28682, + 0.28516, + 0.28676, + 0.28974, + 0.2863, + 0.28726, + 0.28506, + 0.2869, + 0.28632 + ], + "test_acc": [ + 0.2764, + 0.2831, + 0.2553, + 0.2822, + 0.2726, + 0.3049, + 0.2889, + 0.2915, + 0.2919, + 0.29, + 0.2917, + 0.3012, + 0.3024, + 0.2861, + 0.2951, + 0.2962, + 0.2897, + 0.3072, + 0.3078, + 0.2958, + 0.3093, + 0.3003, + 0.3061, + 0.2877, + 0.2989, + 0.3088, + 0.2985, + 0.3014, + 0.2986, + 0.3011, + 0.2905, + 0.2903, + 0.3073, + 0.3026, + 0.2959, + 0.3046, + 0.2985, + 0.2826, + 0.3078, + 0.3067, + 0.311, + 0.2923, + 0.3024, + 0.2948, + 0.3099, + 0.3032, + 0.3032, + 0.2891, + 0.3125, + 0.2991, + 0.3019, + 0.3096, + 0.3008, + 0.3084, + 0.3091, + 0.3074, + 0.3035, + 0.3102, + 0.3007, + 0.2979, + 0.306, + 0.309, + 0.3073, + 0.2987, + 0.3052, + 0.2988, + 0.306, + 0.2997, + 0.3018, + 0.3041, + 0.3108, + 0.3085, + 0.3074, + 0.2964, + 0.3099, + 0.3028, + 0.3144, + 0.3079, + 0.3112, + 0.3078, + 0.307, + 0.3066, + 0.3091, + 0.312, + 0.3103, + 0.3094, + 0.3098, + 0.3109, + 0.3078, + 0.311, + 0.3066, + 0.3075, + 0.31, + 0.3083, + 0.3101, + 0.3094, + 0.3091, + 0.3094, + 0.3093, + 0.3092 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.4088655710220337, + -0.002644116524606943, + -0.0004242559662088752, + -0.0010030616540461779 + ], + "perturbation_rho": [ + 0.017800072208046913, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -5.755573511123657e-07, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.5613622963428497e-06, + 0.0, + 3.725290298461914e-09, + 0.0 + ], + "0.01": [ + -4.863366484642029e-06, + 8.381903171539307e-09, + 3.725290298461914e-09, + 0.0 + ] + } + }, + "drift": { + "embed.weight": 315.7518315228289, + "embed.bias": 267.4648316812159, + "blocks.0.ln.weight": 8.951174139569966, + "blocks.0.w1.weight": 192.5650439107619, + "blocks.0.w1.bias": 171.4174148721031, + "blocks.0.w2.weight": 313.89070882314047, + "blocks.1.ln.weight": 6.8925622022167055, + "blocks.1.w1.weight": 194.8736554676167, + "blocks.1.w1.bias": 185.61167242903338, + "blocks.1.w2.weight": 199.17952398047407, + "blocks.2.ln.weight": 8.397539871551576, + "blocks.2.w1.weight": 312.5130188038433, + "blocks.2.w1.bias": 283.112828695419, + "blocks.2.w2.weight": 249.32467631496087, + "blocks.3.ln.weight": 6.76367039376874, + "blocks.3.w1.weight": 241.46568621944522, + "blocks.3.w1.bias": 203.66496160187438, + "blocks.3.w2.weight": 186.01939293630215, + "out_ln.weight": 0.5298360495994237, + "out_head.weight": 8.435630315194661, + "out_head.bias": 0.7630893123248244 + } + }, + "credit_bridge": { + "log": { + "train_loss": [ + 2.047214571609497, + 2.0258394496154786, + 2.014380089035034, + 2.0146764693450927, + 2.002668328704834, + 2.000372503089905, + 2.001418789329529, + 1.998143268814087, + 1.9978233530426026, + 1.9975765690612792, + 1.9943444046783447, + 1.9969068594741821, + 1.9981716201019286, + 1.9975472425842284, + 1.9952556930541991, + 1.995059373474121, + 1.995745087661743, + 1.9953922092437744, + 1.9958506728363037, + 1.9966995294189454, + 1.9936217088317871, + 1.9977384044647217, + 1.9900378979492188, + 1.995351834487915, + 1.9918289903259276, + 1.9904872145843506, + 1.9942275651550292, + 1.989791995162964, + 1.9887557349014282, + 1.988693539352417, + 1.9892886767959594, + 1.9865470782089234, + 1.9907586919784546, + 1.9870898510742188, + 1.9892321113586426, + 1.98550331451416, + 1.9859091914367675, + 1.9834753859710694, + 1.98342726020813, + 1.9825064794158935, + 1.975549033241272, + 1.9788008226776124, + 1.9745526809692382, + 1.9800522961425782, + 1.9824256409454346, + 1.9816255992126466, + 1.978651096534729, + 1.9725130395507813, + 1.9663972332000732, + 1.9632064117050172, + 1.9650584650421143, + 1.9675109244537354, + 1.9645280550765991, + 1.9664162315368652, + 1.9687347400665283, + 1.9710959251403808, + 1.9732261672210694, + 1.974084882888794, + 1.9758672645568847, + 1.9739485864257813, + 1.9705300562286376, + 1.9686042825317382, + 1.965795273361206, + 1.9660617874908448, + 1.9628826525115968, + 1.9620544732666017, + 1.9629843045043944, + 1.9600788848114015, + 1.9614591297912598, + 1.959416093711853, + 1.9595416427993775, + 1.9580717747497558, + 1.9569868729400635, + 1.9550456240081786, + 1.9526267935180663, + 1.9543473984146118, + 1.9556241884994507, + 1.953918418045044, + 1.9537964162445067, + 1.9556760054779052, + 1.954758342666626, + 1.954609064102173, + 1.9536275988006593, + 1.9515601125717164, + 1.9516277051544189, + 1.9505773259735106, + 1.9528203700256348, + 1.9523416848754882, + 1.9502265241241454, + 1.9548965677642822, + 1.9504040404510499, + 1.9516542065048217, + 1.9515247675323486, + 1.9528300396347047, + 1.9510270280456543, + 1.9527411913299562, + 1.9511454486465454, + 1.9502003860092163, + 1.9522995095825195, + 1.9499988179779053 + ], + "train_acc": [ + 0.2499, + 0.25788, + 0.26494, + 0.26084, + 0.26878, + 0.2712, + 0.27118, + 0.27266, + 0.27206, + 0.27154, + 0.27462, + 0.27238, + 0.27458, + 0.27032, + 0.27216, + 0.27462, + 0.27516, + 0.27298, + 0.27476, + 0.27324, + 0.27506, + 0.274, + 0.27674, + 0.27228, + 0.27442, + 0.27654, + 0.27692, + 0.27454, + 0.2804, + 0.27958, + 0.27752, + 0.27736, + 0.27586, + 0.2782, + 0.27726, + 0.28116, + 0.2789, + 0.27918, + 0.27974, + 0.27888, + 0.28152, + 0.28328, + 0.28688, + 0.28546, + 0.28702, + 0.28784, + 0.28384, + 0.2859, + 0.28622, + 0.28602, + 0.28538, + 0.28386, + 0.2856, + 0.28078, + 0.28262, + 0.28218, + 0.27848, + 0.27654, + 0.27756, + 0.2757, + 0.2768, + 0.27826, + 0.27982, + 0.2823, + 0.2833, + 0.28266, + 0.28444, + 0.28494, + 0.28888, + 0.28544, + 0.28652, + 0.28844, + 0.28944, + 0.2881, + 0.28956, + 0.2906, + 0.29176, + 0.28834, + 0.2891, + 0.28948, + 0.29038, + 0.29102, + 0.29088, + 0.2909, + 0.29376, + 0.29154, + 0.29132, + 0.29376, + 0.29256, + 0.29118, + 0.29196, + 0.2941, + 0.29126, + 0.29016, + 0.2928, + 0.29118, + 0.29464, + 0.29172, + 0.29392, + 0.29224 + ], + "test_acc": [ + 0.268, + 0.2814, + 0.2568, + 0.292, + 0.2886, + 0.2937, + 0.2938, + 0.2863, + 0.2814, + 0.2905, + 0.275, + 0.3008, + 0.2934, + 0.2729, + 0.2989, + 0.2943, + 0.3016, + 0.3044, + 0.2873, + 0.2852, + 0.2866, + 0.2992, + 0.2724, + 0.2684, + 0.2827, + 0.304, + 0.2923, + 0.2958, + 0.2981, + 0.2918, + 0.2992, + 0.2875, + 0.2948, + 0.2875, + 0.3111, + 0.2978, + 0.3045, + 0.2924, + 0.2877, + 0.3019, + 0.2913, + 0.2895, + 0.2916, + 0.3021, + 0.3003, + 0.3099, + 0.3144, + 0.3095, + 0.3134, + 0.3026, + 0.3103, + 0.313, + 0.3017, + 0.2915, + 0.3025, + 0.3131, + 0.3051, + 0.2965, + 0.2797, + 0.2977, + 0.297, + 0.2923, + 0.3018, + 0.3011, + 0.2965, + 0.2952, + 0.3104, + 0.3054, + 0.3027, + 0.2976, + 0.3061, + 0.3103, + 0.2906, + 0.3087, + 0.3045, + 0.3048, + 0.3059, + 0.3052, + 0.3091, + 0.3066, + 0.3069, + 0.302, + 0.3095, + 0.3084, + 0.3047, + 0.3052, + 0.3046, + 0.312, + 0.3046, + 0.3087, + 0.3074, + 0.3071, + 0.3084, + 0.3055, + 0.3087, + 0.306, + 0.3077, + 0.3083, + 0.3083, + 0.3082 + ], + "value_loss": [ + 0.4913383872461319, + 0.22707418837547302, + 0.13916742013454436, + 0.13212530642986298, + 0.10880221369981766, + 0.08938120811700821, + 0.08285075999736786, + 0.07436370565414428, + 0.0646062517285347, + 0.06708085874557496, + 0.06273384610176086, + 0.064812075984478, + 0.049855572044849396, + 0.05398712279319763, + 0.052991126668453216, + 0.050567634534835815, + 0.04312473271250725, + 0.04782974468231201, + 0.04543443771123886, + 0.04484746047854424, + 0.035203484983444215, + 0.04272815336585045, + 0.03375269600868225, + 0.03881277943253517, + 0.034244361296892165, + 0.027195509301424028, + 0.030852211108207704, + 0.028148308331370355, + 0.02536116129040718, + 0.02516651110470295, + 0.025302239354252815, + 0.024717796894311906, + 0.026331115382909775, + 0.02268559746146202, + 0.021856163937449456, + 0.02109427482008934, + 0.01978621674835682, + 0.020399272651076317, + 0.019002752946615218, + 0.023610892441272735, + 0.023741913932561875, + 0.027797962927818297, + 0.027950212181806565, + 0.03767011716604233, + 0.03873864560246468, + 0.04052356805562973, + 0.037897691878676414, + 0.03509879319190979, + 0.0298798540186882, + 0.02826528010547161, + 0.025150670776367186, + 0.023189803918004036, + 0.02258694800913334, + 0.0227879896363616, + 0.018950610594153403, + 0.02115175976037979, + 0.018137221140265464, + 0.019522321037650107, + 0.020110222578048707, + 0.021634354048967363, + 0.015847989951372148, + 0.015064587514996529, + 0.015048437801599503, + 0.016566245526373386, + 0.012935848025679588, + 0.013470480437874794, + 0.012650928798913956, + 0.012831412743330002, + 0.011072860435843468, + 0.011155749862790108, + 0.010086000168919564, + 0.009089389600753783, + 0.008566578444838524, + 0.008092092015594243, + 0.006333343644291163, + 0.006346363427191973, + 0.006376809701025486, + 0.005647175543010235, + 0.0053406619898974895, + 0.0047307779046893116, + 0.00445646613240242, + 0.004058126023709774, + 0.0036959357082098723, + 0.003506302860453725, + 0.0027707107151299713, + 0.002242106978483498, + 0.0019543161161243916, + 0.0018740397913753987, + 0.0018142502733692527, + 0.0016265417851135135, + 0.001198948476240039, + 0.0011920882830768825, + 0.0015300761497579515, + 0.0010724398448318244, + 0.0010545289165526629, + 0.001005428223758936, + 0.000948510251417756, + 0.0011458344962261618, + 0.0009775474201142787, + 0.0009718504769355059 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.4191405475139618, + 0.03466985002160072, + 0.062135644257068634, + 0.06281301379203796 + ], + "perturbation_rho": [ + 0.030575327575206757, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -8.246861398220062e-07, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -2.357177436351776e-06, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -7.648952305316925e-06, + 0.0, + 0.0, + 0.0 + ] + } + }, + "drift": { + "embed.weight": 175.57738575537255, + "embed.bias": 86.00449885806607, + "blocks.0.ln.weight": 7.688942765674561, + "blocks.0.w1.weight": 144.41266674735652, + "blocks.0.w1.bias": 74.01994211256735, + "blocks.0.w2.weight": 210.53383192058863, + "blocks.1.ln.weight": 4.915474746745905, + "blocks.1.w1.weight": 180.17962097603296, + "blocks.1.w1.bias": 190.87101630029008, + "blocks.1.w2.weight": 184.55972367569038, + "blocks.2.ln.weight": 6.248608922167667, + "blocks.2.w1.weight": 243.67136564327112, + "blocks.2.w1.bias": 181.912146435998, + "blocks.2.w2.weight": 169.06723472537846, + "blocks.3.ln.weight": 5.300882057690101, + "blocks.3.w1.weight": 226.17127443307106, + "blocks.3.w1.bias": 188.5014277153452, + "blocks.3.w2.weight": 160.55519875984788, + "out_ln.weight": 0.3450086076510187, + "out_head.weight": 5.754048046839396, + "out_head.bias": 2.9997445042071984 + } + } +} \ No newline at end of file diff --git a/results/cifar_depth_scan_multiseed/summary.json b/results/cifar_depth_scan_multiseed/summary.json new file mode 100644 index 0000000..0bbb673 --- /dev/null +++ b/results/cifar_depth_scan_multiseed/summary.json @@ -0,0 +1,78 @@ +{ + "d512_L4_s123": { + "dfa": { + "test_acc": 0.3094, + "mean_bp_cosine": 0.10695587818554486, + "mean_rho": 0.002404893282800913, + "mean_nudge_01": -1.237611286342144e-06, + "bp_cosine_per_layer": [ + 0.4263063073158264, + 0.0001657301909290254, + 0.0015438150148838758, + -0.00019233977945987135 + ], + "rho_per_layer": [ + 0.009619573131203651, + 0.0, + 0.0, + 0.0 + ] + }, + "credit_bridge": { + "test_acc": 0.3091, + "mean_bp_cosine": 0.13187050633132458, + "mean_rho": 0.0010322919115424156, + "mean_nudge_01": -2.3283064365386963e-06, + "bp_cosine_per_layer": [ + 0.4222239851951599, + 0.024459093809127808, + 0.03995171934366226, + 0.04084722697734833 + ], + "rho_per_layer": [ + 0.0041291676461696625, + 0.0, + 0.0, + 0.0 + ] + } + }, + "d512_L4_s456": { + "dfa": { + "test_acc": 0.3092, + "mean_bp_cosine": 0.10119853421929292, + "mean_rho": 0.004450018052011728, + "mean_nudge_01": -1.212814822793007e-06, + "bp_cosine_per_layer": [ + 0.4088655710220337, + -0.002644116524606943, + -0.0004242559662088752, + -0.0010030616540461779 + ], + "rho_per_layer": [ + 0.017800072208046913, + 0.0, + 0.0, + 0.0 + ] + }, + "credit_bridge": { + "test_acc": 0.3082, + "mean_bp_cosine": 0.14468976389616728, + "mean_rho": 0.007643831893801689, + "mean_nudge_01": -1.9122380763292313e-06, + "bp_cosine_per_layer": [ + 0.4191405475139618, + 0.03466985002160072, + 0.062135644257068634, + 0.06281301379203796 + ], + "rho_per_layer": [ + 0.030575327575206757, + 0.0, + 0.0, + 0.0 + ] + } + } +} \ No newline at end of file -- cgit v1.2.3