diff options
Diffstat (limited to 'results/cifar_depth_scan_s42/d512_L8_s42.json')
| -rw-r--r-- | results/cifar_depth_scan_s42/d512_L8_s42.json | 1313 |
1 files changed, 1313 insertions, 0 deletions
diff --git a/results/cifar_depth_scan_s42/d512_L8_s42.json b/results/cifar_depth_scan_s42/d512_L8_s42.json new file mode 100644 index 0000000..1b9e3ac --- /dev/null +++ b/results/cifar_depth_scan_s42/d512_L8_s42.json @@ -0,0 +1,1313 @@ +{ + "bp": { + "log": { + "train_loss": [ + 1.915883038673401, + 1.6826532165908814, + 1.5945291757965088, + 1.5274945846557617, + 1.4820034421539308, + 1.442767191696167, + 1.4153068727493285, + 1.3821979986190795, + 1.3577272303390502, + 1.3394956853485107, + 1.317076534767151, + 1.2955091687774658, + 1.2746722560501098, + 1.2573306383514404, + 1.2403777645874023, + 1.2230817644500733, + 1.2058218476486207, + 1.1939524814605713, + 1.1745232657241822, + 1.157467629776001, + 1.1439746014022827, + 1.1305697510528565, + 1.1148247271347045, + 1.1023933839797975, + 1.086283505592346, + 1.073430366706848, + 1.0622342100524902, + 1.0457359760665894, + 1.0301126390266417, + 1.0175300416183473, + 1.0012673997306825, + 0.9858699301910401, + 0.978185770187378, + 0.9613556467437744, + 0.945394755973816, + 0.9297811899185181, + 0.9155407496261597, + 0.8999193870353699, + 0.883978879852295, + 0.8744391927337647, + 0.8499023978042602, + 0.8385310165596008, + 0.8242085730552673, + 0.8106124278068543, + 0.7989885342788696, + 0.7789599366378784, + 0.7652538424110412, + 0.7498692364501953, + 0.7314479119682312, + 0.7191568143844604, + 0.7038094125175476, + 0.6889407563781739, + 0.6683795366287232, + 0.6543509451675416, + 0.6412768173599244, + 0.6269769005203247, + 0.6097362549209595, + 0.5970014540195465, + 0.5797126161766052, + 0.5652852652168274, + 0.5507066846847534, + 0.5374195718574524, + 0.5179209604072571, + 0.5085140320968627, + 0.49228226098060607, + 0.47772936537742616, + 0.46773862747192385, + 0.4499609982967377, + 0.437783351650238, + 0.4272708469581604, + 0.414694590549469, + 0.40083733788490294, + 0.3944969776344299, + 0.3800938123130798, + 0.3698486288642883, + 0.359144784078598, + 0.35212729773521423, + 0.3423941581821442, + 0.33239298017501834, + 0.32468383754730223, + 0.3140012940311432, + 0.30757560186386107, + 0.306006680727005, + 0.2992121668767929, + 0.2903935454463959, + 0.2856505290937424, + 0.28447353170394896, + 0.27721282821655274, + 0.27670302483081816, + 0.2717075553417206, + 0.26587641248703003, + 0.2625578575849533, + 0.25637605533599855, + 0.26298797123908996, + 0.25818991552352905, + 0.25911176864147184, + 0.2525204331064224, + 0.25532632726669313, + 0.2519308549976349, + 0.25220084325790404 + ], + "train_acc": [ + 0.31264, + 0.39142, + 0.4222, + 0.44868, + 0.46492, + 0.47794, + 0.48996, + 0.50224, + 0.51198, + 0.51518, + 0.52792, + 0.5311, + 0.53912, + 0.54762, + 0.55402, + 0.56126, + 0.56546, + 0.56916, + 0.58018, + 0.58254, + 0.59062, + 0.59414, + 0.59952, + 0.60316, + 0.60978, + 0.6147, + 0.61912, + 0.62562, + 0.63032, + 0.63576, + 0.64308, + 0.64636, + 0.64882, + 0.65384, + 0.6618, + 0.66354, + 0.67342, + 0.67882, + 0.68372, + 0.68614, + 0.69758, + 0.69736, + 0.70374, + 0.70786, + 0.7118, + 0.7184, + 0.72552, + 0.72828, + 0.73742, + 0.74336, + 0.74852, + 0.75294, + 0.76092, + 0.76556, + 0.76962, + 0.77602, + 0.7833, + 0.78558, + 0.7929, + 0.79948, + 0.80264, + 0.80768, + 0.8158, + 0.81734, + 0.82548, + 0.8299, + 0.834, + 0.83984, + 0.8439, + 0.84742, + 0.85312, + 0.85774, + 0.85856, + 0.8659, + 0.8691, + 0.87372, + 0.87712, + 0.88168, + 0.88188, + 0.885, + 0.88966, + 0.89152, + 0.89464, + 0.89494, + 0.8985, + 0.90096, + 0.90124, + 0.9037, + 0.90518, + 0.90646, + 0.9091, + 0.90814, + 0.91108, + 0.91036, + 0.91186, + 0.91076, + 0.91388, + 0.91184, + 0.91348, + 0.91236 + ], + "test_acc": [ + 0.3856, + 0.4346, + 0.4501, + 0.478, + 0.4875, + 0.5022, + 0.5082, + 0.5204, + 0.5286, + 0.526, + 0.5408, + 0.5448, + 0.5497, + 0.554, + 0.5586, + 0.5582, + 0.5704, + 0.5716, + 0.5706, + 0.5786, + 0.5784, + 0.5793, + 0.5749, + 0.5803, + 0.5861, + 0.5853, + 0.5914, + 0.5853, + 0.5857, + 0.5931, + 0.5927, + 0.5896, + 0.5884, + 0.5893, + 0.592, + 0.5967, + 0.596, + 0.5986, + 0.5954, + 0.5947, + 0.5926, + 0.5925, + 0.5975, + 0.5989, + 0.596, + 0.5905, + 0.5967, + 0.5981, + 0.6016, + 0.5987, + 0.5932, + 0.5941, + 0.6009, + 0.5969, + 0.6018, + 0.5988, + 0.5972, + 0.5994, + 0.5961, + 0.6002, + 0.5958, + 0.5927, + 0.5963, + 0.5966, + 0.5983, + 0.5938, + 0.5983, + 0.5933, + 0.5971, + 0.5994, + 0.5915, + 0.5951, + 0.5928, + 0.5914, + 0.595, + 0.5951, + 0.5918, + 0.5922, + 0.594, + 0.5895, + 0.5919, + 0.5959, + 0.5924, + 0.59, + 0.5911, + 0.5889, + 0.5895, + 0.5893, + 0.5895, + 0.5895, + 0.5888, + 0.5889, + 0.5883, + 0.5884, + 0.589, + 0.5886, + 0.5887, + 0.5891, + 0.5891, + 0.5893 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.9652402400970459, + 0.9650944471359253, + 0.9653107523918152, + 0.9652330875396729, + 0.9652847051620483, + 0.9657888412475586, + 0.9645384550094604, + 0.9626421928405762 + ], + "perturbation_rho": [ + 0.988792896270752, + 0.9915838837623596, + 0.9924975037574768, + 0.9927228689193726, + 0.9931389093399048, + 0.9920060038566589, + 0.9927037358283997, + 0.9916384816169739 + ], + "nudging": { + "0.001": [ + -0.002859140280634165, + -0.0028454181738197803, + -0.0028910627588629723, + -0.002915592398494482, + -0.002882363274693489, + -0.0027308787684887648, + -0.0025113134179264307, + -0.002093898830935359 + ], + "0.003": [ + -0.008569219149649143, + -0.008527351543307304, + -0.00866447202861309, + -0.008737519383430481, + -0.00863889791071415, + -0.00818516593426466, + -0.007527736481279135, + -0.00627755094319582 + ], + "0.01": [ + -0.028463756665587425, + -0.02832203544676304, + -0.028776202350854874, + -0.02901865355670452, + -0.028697077184915543, + -0.02719692885875702, + -0.025021735578775406, + -0.02087603136897087 + ] + } + }, + "drift": { + "embed.weight": 8.768116164246894, + "embed.bias": 14.842866852450323, + "blocks.0.ln.weight": 0.3243736999277519, + "blocks.0.w1.weight": 4.248609055700286, + "blocks.0.w1.bias": 6.812297519559175, + "blocks.0.w2.weight": 10.91013034026142, + "blocks.1.ln.weight": 0.2264375721290641, + "blocks.1.w1.weight": 4.530696239577078, + "blocks.1.w1.bias": 4.691704909019707, + "blocks.1.w2.weight": 12.033360841473895, + "blocks.2.ln.weight": 0.19514233538894757, + "blocks.2.w1.weight": 4.641536271142397, + "blocks.2.w1.bias": 3.9592492063653415, + "blocks.2.w2.weight": 12.825064196706213, + "blocks.3.ln.weight": 0.20608382603702532, + "blocks.3.w1.weight": 4.675452058260372, + "blocks.3.w1.bias": 3.8959342701768565, + "blocks.3.w2.weight": 13.295810386821595, + "blocks.4.ln.weight": 0.21010986166308263, + "blocks.4.w1.weight": 4.643995186005312, + "blocks.4.w1.bias": 3.768514281224811, + "blocks.4.w2.weight": 13.646370291700444, + "blocks.5.ln.weight": 0.23069735418096324, + "blocks.5.w1.weight": 4.626200002743113, + "blocks.5.w1.bias": 3.8236708931328205, + "blocks.5.w2.weight": 14.086269621793239, + "blocks.6.ln.weight": 0.2603177383528657, + "blocks.6.w1.weight": 4.602646477526439, + "blocks.6.w1.bias": 3.7825134675181187, + "blocks.6.w2.weight": 14.472250927890926, + "blocks.7.ln.weight": 0.261911500383157, + "blocks.7.w1.weight": 4.5487829119704335, + "blocks.7.w1.bias": 3.638463299582741, + "blocks.7.w2.weight": 15.316485617563446, + "out_ln.weight": 0.33173738669135683, + "out_head.weight": 3.2824882859100533, + "out_head.bias": 1.1419967578910684 + } + }, + "dfa": { + "log": { + "train_loss": [ + 2.0608128107070924, + 2.0412321961975097, + 2.0255805563354494, + 2.0184878905487063, + 2.01624364112854, + 2.012751336364746, + 2.013942890319824, + 2.013219351119995, + 2.011828511428833, + 2.0141313094329836, + 2.0115797592163087, + 2.0122245865631103, + 2.0109272706604004, + 2.0090200772476194, + 2.009412646636963, + 2.008880652618408, + 2.0089728942489624, + 2.0055529666900633, + 2.0086945219421386, + 2.0047824951171873, + 2.0031694742202757, + 2.005182927017212, + 2.0044863636779784, + 2.004345258331299, + 2.0049652186584472, + 2.000570617828369, + 2.004866870994568, + 2.0028822119522096, + 2.003401609535217, + 2.001731799697876, + 2.0005709008789063, + 1.9988768460464477, + 1.9997380918884278, + 2.002846463394165, + 1.9990734589385986, + 1.9994710061264038, + 1.9974897908782958, + 1.9986941064453125, + 1.997244268875122, + 1.9990774517822265, + 1.9992041358184816, + 1.998780822906494, + 1.9990862586975098, + 1.9967566333770752, + 1.9988449997329711, + 1.995755309448242, + 1.996486500854492, + 1.9974545507049561, + 1.9962934982681275, + 1.995275443725586, + 1.9972379402923583, + 1.9948255879974366, + 1.994635205116272, + 1.995422209739685, + 1.9959965203857422, + 1.9941707052993773, + 1.9957991414642333, + 1.995743550643921, + 1.9927717569732666, + 1.9928340990829467, + 1.9935791075134277, + 1.9936188889312745, + 1.9914473522949219, + 1.99154779296875, + 1.993619135131836, + 1.9917610000228883, + 1.9916434611511231, + 1.993833284072876, + 1.9928128030395509, + 1.991187938156128, + 1.9918081129455567, + 1.9942408345794678, + 1.9907841648101807, + 1.9906754878234862, + 1.9921169677352906, + 1.990103793334961, + 1.9908813669204712, + 1.9915815322113037, + 1.991861714782715, + 1.9920157837295531, + 1.9913273377227783, + 1.9915946127319335, + 1.9907670964050292, + 1.991461629562378, + 1.9904960403442382, + 1.9905955597686769, + 1.9918609113311767, + 1.991680837097168, + 1.9905535266494752, + 1.9902639037322998, + 1.991429512939453, + 1.9898409436798095, + 1.9906655899047851, + 1.9915429690170288, + 1.98768330619812, + 1.988915832824707, + 1.9896351364135743, + 1.990216370239258, + 1.9885352130889893, + 1.9870271291351318 + ], + "train_acc": [ + 0.243, + 0.25202, + 0.25564, + 0.26482, + 0.26238, + 0.26154, + 0.26364, + 0.26322, + 0.26436, + 0.26482, + 0.2633, + 0.26298, + 0.26476, + 0.2654, + 0.26356, + 0.26538, + 0.26644, + 0.26838, + 0.26656, + 0.26864, + 0.27314, + 0.27042, + 0.26898, + 0.26672, + 0.26908, + 0.2701, + 0.26998, + 0.27056, + 0.26842, + 0.27026, + 0.27222, + 0.27208, + 0.27172, + 0.27086, + 0.27158, + 0.2727, + 0.27244, + 0.27268, + 0.27344, + 0.27336, + 0.27228, + 0.27516, + 0.27298, + 0.2762, + 0.27606, + 0.27508, + 0.27436, + 0.27444, + 0.27638, + 0.27438, + 0.27472, + 0.27788, + 0.27694, + 0.27622, + 0.27612, + 0.2758, + 0.2748, + 0.2753, + 0.27684, + 0.27852, + 0.2789, + 0.27824, + 0.27708, + 0.27816, + 0.27774, + 0.2779, + 0.27866, + 0.27744, + 0.28004, + 0.2772, + 0.27834, + 0.27546, + 0.279, + 0.27854, + 0.27808, + 0.28114, + 0.27966, + 0.27976, + 0.27822, + 0.2767, + 0.28038, + 0.27794, + 0.2817, + 0.27958, + 0.27992, + 0.27988, + 0.28026, + 0.28032, + 0.28058, + 0.27946, + 0.2797, + 0.2796, + 0.28088, + 0.27574, + 0.28058, + 0.27854, + 0.27872, + 0.28038, + 0.28194, + 0.28066 + ], + "test_acc": [ + 0.2682, + 0.2742, + 0.2881, + 0.2861, + 0.2979, + 0.294, + 0.291, + 0.2966, + 0.2988, + 0.2988, + 0.2844, + 0.2888, + 0.2806, + 0.2961, + 0.2761, + 0.2908, + 0.2853, + 0.2763, + 0.2759, + 0.2962, + 0.3049, + 0.2905, + 0.2945, + 0.2803, + 0.2978, + 0.2973, + 0.3048, + 0.2627, + 0.2941, + 0.2926, + 0.3019, + 0.2993, + 0.2753, + 0.2948, + 0.3083, + 0.3031, + 0.2891, + 0.3018, + 0.3049, + 0.2925, + 0.3133, + 0.2912, + 0.3036, + 0.2953, + 0.3011, + 0.2927, + 0.2895, + 0.3024, + 0.3009, + 0.3024, + 0.3012, + 0.3091, + 0.2932, + 0.2974, + 0.2911, + 0.2949, + 0.2978, + 0.3017, + 0.3025, + 0.3032, + 0.3051, + 0.3055, + 0.3024, + 0.3093, + 0.2987, + 0.3004, + 0.2907, + 0.3094, + 0.296, + 0.3036, + 0.302, + 0.3117, + 0.3083, + 0.3012, + 0.3019, + 0.3025, + 0.2994, + 0.3071, + 0.3043, + 0.3032, + 0.3018, + 0.3078, + 0.3044, + 0.3039, + 0.3067, + 0.3035, + 0.3002, + 0.3073, + 0.3035, + 0.3045, + 0.3054, + 0.3075, + 0.3032, + 0.3055, + 0.305, + 0.3059, + 0.3053, + 0.3057, + 0.3058, + 0.3057 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.37686973810195923, + 0.0015315066557377577, + -0.0013667643070220947, + 7.698773697484285e-05, + 5.11965190526098e-05, + -0.0007562801474705338, + -0.00026300083845853806, + -4.065445682499558e-05 + ], + "perturbation_rho": [ + 0.015484973788261414, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -3.7671998143196106e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.1455267667770386e-06, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -3.912951797246933e-06, + -4.6566128730773926e-09, + 0.0, + 0.0, + 1.862645149230957e-09, + 0.0, + 0.0, + 0.0 + ] + } + }, + "drift": { + "embed.weight": 322.7518389055776, + "embed.bias": 247.2993592394181, + "blocks.0.ln.weight": 7.905005047928133, + "blocks.0.w1.weight": 212.9905007399479, + "blocks.0.w1.bias": 182.14933416642216, + "blocks.0.w2.weight": 330.2619044995618, + "blocks.1.ln.weight": 6.755700255991533, + "blocks.1.w1.weight": 232.57652888590908, + "blocks.1.w1.bias": 216.016264679947, + "blocks.1.w2.weight": 201.84872258219994, + "blocks.2.ln.weight": 8.01709273899982, + "blocks.2.w1.weight": 320.8453952706143, + "blocks.2.w1.bias": 276.695608451841, + "blocks.2.w2.weight": 249.60378814337417, + "blocks.3.ln.weight": 5.7943043253709785, + "blocks.3.w1.weight": 199.17683771092914, + "blocks.3.w1.bias": 166.1495411478002, + "blocks.3.w2.weight": 154.7202840885994, + "blocks.4.ln.weight": 6.436414607996792, + "blocks.4.w1.weight": 234.06786452519447, + "blocks.4.w1.bias": 198.33331425257262, + "blocks.4.w2.weight": 172.58039651693562, + "blocks.5.ln.weight": 5.904929352186912, + "blocks.5.w1.weight": 213.62873358910434, + "blocks.5.w1.bias": 184.85933031281215, + "blocks.5.w2.weight": 162.5444110371982, + "blocks.6.ln.weight": 5.922534596873395, + "blocks.6.w1.weight": 212.35774255360528, + "blocks.6.w1.bias": 187.6428413347025, + "blocks.6.w2.weight": 168.7920563845852, + "blocks.7.ln.weight": 6.9752563434590575, + "blocks.7.w1.weight": 251.09533847711697, + "blocks.7.w1.bias": 212.42015960200564, + "blocks.7.w2.weight": 182.17183040015334, + "out_ln.weight": 0.5380156572173149, + "out_head.weight": 8.131814182552638, + "out_head.bias": 0.8699895784813219 + } + }, + "credit_bridge": { + "log": { + "train_loss": [ + 2.0511436333847044, + 2.0277864767456055, + 2.02313751663208, + 2.0225361193847657, + 2.0088740489196777, + 2.0053706857681273, + 2.0050094537353518, + 2.0043996282196046, + 1.9971280477905273, + 2.0012310653686525, + 1.9964948630142212, + 1.994698377227783, + 1.9948096728134155, + 1.9947182064437867, + 1.9971965953826905, + 1.9937935817718506, + 1.9970728733825684, + 1.9935891329956055, + 1.9934549239730834, + 1.9943741856384278, + 1.9916830521392823, + 1.987718446044922, + 1.9891010809326173, + 1.9910999655532837, + 1.9870087446594238, + 1.9892022339248656, + 1.9874243151855469, + 1.9917996590423583, + 1.9928570490264892, + 1.9904958669281005, + 1.9926761375427247, + 1.9937843286895751, + 1.9929048935317992, + 1.9935734990692138, + 1.9881847104263306, + 1.9887749886322021, + 1.9857899047851562, + 1.9843308640289308, + 1.9787338375091552, + 1.9824666664886474, + 1.978441342086792, + 1.9784807219696046, + 1.975589619178772, + 1.9833358307647706, + 1.9823905195617675, + 2.005378373565674, + 2.044235708847046, + 2.053414641113281, + 2.062485707550049, + 2.082823034057617, + 2.0909555229187013, + 2.080506026992798, + 2.068187072105408, + 2.0602312271881105, + 2.051138618621826, + 2.048516896209717, + 2.0438889092254637, + 2.0391429753875734, + 2.030038472366333, + 2.025220480957031, + 2.0230712211608886, + 2.0209190673828124, + 2.0193369206237795, + 2.016298021621704, + 2.0145631092071534, + 2.007977565994263, + 2.007634442100525, + 2.00878977973938, + 2.0094445184707643, + 2.01266563041687, + 2.017705497894287, + 2.022621645774841, + 2.032514253387451, + 2.0355931142425536, + 2.0368993505859376, + 2.0395946674346925, + 2.0404893283081056, + 2.0423217945861816, + 2.0412329326629637, + 2.037104062347412, + 2.0325731353759764, + 2.0280113526916503, + 2.0226307469177245, + 2.0187665083312987, + 2.0180757246398926, + 2.016418752593994, + 2.013713173828125, + 2.013864770889282, + 2.0139804064941407, + 2.012384439239502, + 2.0120543003082276, + 2.013385525588989, + 2.0126590882873536, + 2.0125275871276855, + 2.0113051068878174, + 2.012384732666016, + 2.0117678832626344, + 2.0116403619766237, + 2.0123612815093996, + 2.0132470719146727 + ], + "train_acc": [ + 0.24252, + 0.25588, + 0.261, + 0.25814, + 0.26604, + 0.26736, + 0.26858, + 0.26616, + 0.27332, + 0.2707, + 0.27226, + 0.27346, + 0.27344, + 0.271, + 0.2724, + 0.27562, + 0.2716, + 0.27578, + 0.27566, + 0.27588, + 0.2756, + 0.2763, + 0.27646, + 0.2747, + 0.2769, + 0.27584, + 0.27532, + 0.27358, + 0.27436, + 0.27466, + 0.2748, + 0.27282, + 0.27324, + 0.2744, + 0.27718, + 0.27714, + 0.27682, + 0.2795, + 0.28304, + 0.28044, + 0.28062, + 0.28496, + 0.2849, + 0.28414, + 0.28304, + 0.27624, + 0.2686, + 0.26552, + 0.26574, + 0.2669, + 0.2667, + 0.26856, + 0.26866, + 0.27098, + 0.2677, + 0.26784, + 0.26828, + 0.2664, + 0.27158, + 0.27212, + 0.26908, + 0.27222, + 0.2712, + 0.27086, + 0.2689, + 0.27072, + 0.26998, + 0.27202, + 0.26646, + 0.26438, + 0.26242, + 0.26064, + 0.25896, + 0.25936, + 0.25754, + 0.26352, + 0.26094, + 0.263, + 0.26368, + 0.26518, + 0.2691, + 0.2693, + 0.26912, + 0.27422, + 0.2738, + 0.27242, + 0.27296, + 0.27486, + 0.27162, + 0.27384, + 0.27526, + 0.27172, + 0.27436, + 0.27512, + 0.27536, + 0.2737, + 0.27316, + 0.27306, + 0.27564, + 0.27502 + ], + "test_acc": [ + 0.2611, + 0.2728, + 0.2907, + 0.2809, + 0.2926, + 0.308, + 0.3005, + 0.3036, + 0.3052, + 0.2847, + 0.2973, + 0.2946, + 0.3093, + 0.2907, + 0.2893, + 0.3111, + 0.3023, + 0.2891, + 0.2992, + 0.295, + 0.2972, + 0.2999, + 0.3136, + 0.305, + 0.2994, + 0.3022, + 0.308, + 0.2936, + 0.2988, + 0.3132, + 0.299, + 0.2989, + 0.2874, + 0.2961, + 0.3114, + 0.3062, + 0.2921, + 0.3129, + 0.2986, + 0.2975, + 0.3083, + 0.2973, + 0.2912, + 0.3022, + 0.294, + 0.2862, + 0.264, + 0.2576, + 0.2455, + 0.2604, + 0.2623, + 0.2694, + 0.2661, + 0.2696, + 0.2726, + 0.265, + 0.2603, + 0.2717, + 0.2617, + 0.2692, + 0.2806, + 0.2848, + 0.2879, + 0.271, + 0.2743, + 0.2673, + 0.279, + 0.2598, + 0.2612, + 0.2694, + 0.2531, + 0.2553, + 0.254, + 0.2426, + 0.2479, + 0.2594, + 0.2546, + 0.2605, + 0.2661, + 0.27, + 0.2717, + 0.2842, + 0.2891, + 0.2864, + 0.2804, + 0.2904, + 0.2905, + 0.2865, + 0.2837, + 0.2868, + 0.2888, + 0.2865, + 0.2922, + 0.2874, + 0.2855, + 0.2892, + 0.2892, + 0.2887, + 0.2881, + 0.2881 + ], + "value_loss": [ + 0.42954374360084535, + 0.15756836537361146, + 0.12134378553867341, + 0.11469457547187806, + 0.09599290944099426, + 0.0877463513469696, + 0.0803228804397583, + 0.0667632784330845, + 0.0672521750831604, + 0.0677217069363594, + 0.06062764830827713, + 0.05060176089763641, + 0.05358793372392654, + 0.0549625644493103, + 0.052779753321409224, + 0.046068096545934675, + 0.060794142736196516, + 0.045549960198402406, + 0.04104518217980862, + 0.046112847526073454, + 0.04252848902463913, + 0.03344858367443085, + 0.03625167763471603, + 0.04116482120513916, + 0.03612018641352654, + 0.03742522090911865, + 0.03000010380625725, + 0.03407002721071243, + 0.03310543386161327, + 0.02845939307689667, + 0.02669600613832474, + 0.029440363923311234, + 0.025257505105137825, + 0.02826717380940914, + 0.024276180317401887, + 0.021678789145350458, + 0.02275293359041214, + 0.024361893134117126, + 0.019806503029465676, + 0.028984218854904174, + 0.025155036890506743, + 0.028825275114774705, + 0.035153553171157835, + 0.04898144502878189, + 0.053181294503211976, + 0.25244187658309936, + 9.008439293670655, + 244.4939577636719, + 1492.590131953125, + 5003.36131859375, + 15302.2068553125, + 42179.784040625, + 40569.28429, + 55602.10701875, + 58168.276421875, + 47172.94552875, + 32400.684385, + 36457.8510815625, + 55178.61985875, + 33309.7381096875, + 11320.94740828125, + 20943.124659375, + 19742.98079, + 7133.2663896875, + 7511.98685734375, + 4198.7234078125, + 5113.87740875, + 3748.421473125, + 2814.479872578125, + 2090.86994609375, + 1224.1438057421874, + 568.31011984375, + 272.7561458203125, + 120.44688937988282, + 92.17080795898437, + 58.42213998718262, + 22.001411871643068, + 13.230551771850585, + 7.299799581985473, + 1.8038615107917786, + 0.6261436622428894, + 0.23487731760025024, + 0.09752666207790375, + 0.05221867031812668, + 0.03414971536874771, + 0.02229098296046257, + 0.014439315105676651, + 0.01196531643152237, + 0.009077809438109399, + 0.008066386908292771, + 0.006000318608433008, + 0.006079038715660572, + 0.0053833009558916095, + 0.004241169492304325, + 0.003964086800217629, + 0.0037191870298981665, + 0.004121905582398176, + 0.0030519997741281987, + 0.002969844557568431, + 0.00373484493046999 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.20516762137413025, + 0.010197397321462631, + 0.012989584356546402, + 0.01799590140581131, + 0.026415985077619553, + 0.028529509902000427, + 0.02881331369280815, + 0.029378943145275116 + ], + "perturbation_rho": [ + 0.03650364279747009, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -5.299225449562073e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.5022233128547668e-06, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -5.0407834351062775e-06, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ] + } + }, + "drift": { + "embed.weight": 144.386565398537, + "embed.bias": 134.32612651237073, + "blocks.0.ln.weight": 6.748346473737498, + "blocks.0.w1.weight": 132.91428698293475, + "blocks.0.w1.bias": 119.60864597492228, + "blocks.0.w2.weight": 212.526020150235, + "blocks.1.ln.weight": 4.414642051445624, + "blocks.1.w1.weight": 225.12165033097793, + "blocks.1.w1.bias": 234.43494513588323, + "blocks.1.w2.weight": 178.536762788961, + "blocks.2.ln.weight": 5.970302149919209, + "blocks.2.w1.weight": 271.1166277423572, + "blocks.2.w1.bias": 227.3762025819469, + "blocks.2.w2.weight": 203.14523981621292, + "blocks.3.ln.weight": 5.796867528153885, + "blocks.3.w1.weight": 255.0825523445021, + "blocks.3.w1.bias": 231.33899213301072, + "blocks.3.w2.weight": 158.37683433796386, + "blocks.4.ln.weight": 4.529609889606443, + "blocks.4.w1.weight": 162.41528397286507, + "blocks.4.w1.bias": 142.635739824408, + "blocks.4.w2.weight": 120.99022470237554, + "blocks.5.ln.weight": 4.160579158399913, + "blocks.5.w1.weight": 154.8997138115475, + "blocks.5.w1.bias": 138.47252168248815, + "blocks.5.w2.weight": 128.9164645106335, + "blocks.6.ln.weight": 3.6901613153795623, + "blocks.6.w1.weight": 111.81360738270082, + "blocks.6.w1.bias": 84.19462418565291, + "blocks.6.w2.weight": 97.14752351362607, + "blocks.7.ln.weight": 4.210487097037135, + "blocks.7.w1.weight": 93.35554428980572, + "blocks.7.w1.bias": 43.21054718035769, + "blocks.7.w2.weight": 87.7426729890803, + "out_ln.weight": 0.3258527803812823, + "out_head.weight": 5.969889930683836, + "out_head.bias": 3.3113123218582157 + } + } +}
\ No newline at end of file |
