diff options
Diffstat (limited to 'results/fa_dfa_d512_L8_seed3/results_cifar10.json')
| -rw-r--r-- | results/fa_dfa_d512_L8_seed3/results_cifar10.json | 881 |
1 files changed, 881 insertions, 0 deletions
diff --git a/results/fa_dfa_d512_L8_seed3/results_cifar10.json b/results/fa_dfa_d512_L8_seed3/results_cifar10.json new file mode 100644 index 0000000..4a81c5e --- /dev/null +++ b/results/fa_dfa_d512_L8_seed3/results_cifar10.json @@ -0,0 +1,881 @@ +{ + "3": { + "dfa": { + "log": { + "train_loss": [ + 2.067654705734253, + 2.0439451234436037, + 2.0388121842193603, + 2.0374467311859132, + 2.032723134689331, + 2.024335552711487, + 2.0244412200927733, + 2.020215712852478, + 2.02228947303772, + 2.0196216506958007, + 2.018127802886963, + 2.0154445655822752, + 2.014929994125366, + 2.0130414514160155, + 2.010970745010376, + 2.009714426345825, + 2.008820608253479, + 2.0076065615081786, + 2.0063576023864744, + 2.0080780532455442, + 2.0033639809799193, + 2.0052446017456056, + 2.003892343444824, + 2.0035709592437745, + 2.004005387611389, + 2.003134113922119, + 2.002949758758545, + 2.003510911102295, + 2.0014034066009523, + 2.0013524534606932, + 2.001380777282715, + 2.0003092810058596, + 2.001631733970642, + 1.9991252640533448, + 2.001369930343628, + 2.0026023275375366, + 1.9995970624542236, + 2.001532023696899, + 1.9995446050643921, + 1.9996646937561036, + 2.0011661471176145, + 2.001093216934204, + 1.9989905255126954, + 1.9983071016693115, + 2.0015053617095946, + 2.0005502851867676, + 1.9974795049667358, + 2.001641445236206, + 1.9987062954711914, + 1.9984554448699952, + 2.000685492324829, + 1.9992525644683838, + 2.0004551610565184, + 1.9981906829452514, + 1.998261117515564, + 1.9979981622314453, + 1.999091849937439, + 1.999358012046814, + 1.9977104736709594, + 1.998026010055542, + 1.9991155099487306, + 1.998898390197754, + 1.997283667640686, + 1.9974989557266236, + 1.9966122649765015, + 1.99706481754303, + 1.9956817004394531, + 1.9957520567321778, + 1.9964876544570922, + 1.9971850046539306, + 1.9973575018310548, + 1.9963464212417603, + 1.995423618736267, + 1.9970853455352784, + 1.9958270948028565, + 1.997421721458435, + 1.9960898080825806, + 1.996168349533081, + 1.9963192137145995, + 1.997360442123413, + 1.9975355083465576, + 1.996066244506836, + 1.9953448879241944, + 1.9958293856430054, + 1.9962540533828734, + 1.9953726630401611, + 1.9940088095092774, + 1.9959244760894774, + 1.9942969249725342, + 1.9947067263793945, + 1.9935712906646728, + 1.994222310256958, + 1.9953250341796875, + 1.995568331222534, + 1.9945212261199952, + 1.9952520877075195, + 1.9954078897857666, + 1.9954364456176759, + 1.9938959969329835, + 1.9937770639038086 + ], + "train_acc": [ + 0.23262, + 0.24192, + 0.24632, + 0.24536, + 0.2464, + 0.2547, + 0.25628, + 0.25992, + 0.25686, + 0.25908, + 0.25972, + 0.2614, + 0.26032, + 0.26244, + 0.26444, + 0.26376, + 0.26774, + 0.26756, + 0.26628, + 0.2646, + 0.26774, + 0.26684, + 0.26744, + 0.2703, + 0.26722, + 0.2705, + 0.26978, + 0.26872, + 0.2695, + 0.27244, + 0.27246, + 0.27274, + 0.26828, + 0.27612, + 0.27204, + 0.27338, + 0.27352, + 0.26948, + 0.27384, + 0.27384, + 0.27366, + 0.27284, + 0.27418, + 0.27444, + 0.2734, + 0.27474, + 0.27514, + 0.27438, + 0.274, + 0.27478, + 0.2748, + 0.27494, + 0.27578, + 0.27282, + 0.27458, + 0.27628, + 0.27448, + 0.27492, + 0.27672, + 0.27708, + 0.2768, + 0.27398, + 0.27716, + 0.27682, + 0.27596, + 0.27722, + 0.2767, + 0.27738, + 0.27778, + 0.27676, + 0.27824, + 0.27798, + 0.27824, + 0.27748, + 0.27828, + 0.2766, + 0.2783, + 0.27724, + 0.27768, + 0.27614, + 0.27838, + 0.27748, + 0.27688, + 0.27912, + 0.27784, + 0.27692, + 0.27876, + 0.27918, + 0.27702, + 0.27866, + 0.2807, + 0.27914, + 0.27646, + 0.27638, + 0.27878, + 0.27844, + 0.27988, + 0.27484, + 0.27874, + 0.27812 + ], + "test_acc": [ + 0.2578, + 0.2681, + 0.2679, + 0.2839, + 0.2546, + 0.2725, + 0.2785, + 0.2856, + 0.2934, + 0.2638, + 0.2995, + 0.288, + 0.2773, + 0.2846, + 0.3003, + 0.2936, + 0.2826, + 0.2985, + 0.2836, + 0.2804, + 0.282, + 0.2777, + 0.2854, + 0.2789, + 0.3056, + 0.296, + 0.2922, + 0.2934, + 0.2854, + 0.2952, + 0.3047, + 0.2955, + 0.2902, + 0.2858, + 0.3016, + 0.2931, + 0.2948, + 0.2922, + 0.2924, + 0.2932, + 0.2975, + 0.2989, + 0.2968, + 0.3084, + 0.295, + 0.2818, + 0.2943, + 0.2957, + 0.2895, + 0.2846, + 0.2928, + 0.2932, + 0.2923, + 0.2923, + 0.2948, + 0.2839, + 0.2885, + 0.2986, + 0.2955, + 0.292, + 0.3058, + 0.2945, + 0.302, + 0.2895, + 0.294, + 0.2968, + 0.293, + 0.3034, + 0.2963, + 0.2881, + 0.295, + 0.2915, + 0.2951, + 0.2916, + 0.3019, + 0.2971, + 0.2965, + 0.2974, + 0.297, + 0.2973, + 0.2983, + 0.3006, + 0.2984, + 0.2948, + 0.2929, + 0.2974, + 0.2957, + 0.2979, + 0.2947, + 0.2944, + 0.2955, + 0.2957, + 0.2938, + 0.296, + 0.2973, + 0.2969, + 0.2962, + 0.2965, + 0.2967, + 0.2967 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.3848226070404053, + 0.0005948130274191499, + 0.0008828549180179834, + -0.0005318043986335397, + -0.0011384200770407915, + 0.001081241061910987, + -0.0007273855153471231, + -9.393676009494811e-05 + ], + "perturbation_rho": [ + 0.02956267260015011, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -4.009343683719635e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.071486622095108e-06, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -3.686174750328064e-06, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 53208.375, + 1418977024.0, + 1839794304.0, + 3366365440.0, + 5876559872.0, + 6415245312.0, + 6484437504.0, + 6990735872.0, + 8058666496.0 + ], + "bp_grad_norms_per_layer": [ + 2.507335352675e-07, + 3.0165914211011113e-10, + 3.015434568709452e-10, + 3.022892214321615e-10, + 3.0200991707474145e-10, + 3.020122762986688e-10, + 3.0200916767419983e-10, + 3.020158290123476e-10, + 3.0227947922512044e-10 + ] + }, + "drift": { + "embed.weight": 327.65976028345034, + "embed.bias": 222.60798330036593, + "blocks.0.ln.weight": 9.50619149859276, + "blocks.0.w1.weight": 302.2719637669162, + "blocks.0.w1.bias": 270.6809804281448, + "blocks.0.w2.weight": 471.5962929911042, + "blocks.1.ln.weight": 7.339419282166665, + "blocks.1.w1.weight": 236.07033236306975, + "blocks.1.w1.bias": 214.61371141440935, + "blocks.1.w2.weight": 257.6350959469773, + "blocks.2.ln.weight": 8.814285893710872, + "blocks.2.w1.weight": 346.78316046198574, + "blocks.2.w1.bias": 305.8043449728941, + "blocks.2.w2.weight": 337.46416422582774, + "blocks.3.ln.weight": 9.745238986543606, + "blocks.3.w1.weight": 405.8018654296332, + "blocks.3.w1.bias": 374.1566493279044, + "blocks.3.w2.weight": 402.68490503996316, + "blocks.4.ln.weight": 8.109738257108088, + "blocks.4.w1.weight": 332.27298936873524, + "blocks.4.w1.bias": 309.4368090711017, + "blocks.4.w2.weight": 317.97600700647837, + "blocks.5.ln.weight": 6.209367170959338, + "blocks.5.w1.weight": 230.00050402864986, + "blocks.5.w1.bias": 215.5148975730586, + "blocks.5.w2.weight": 220.6587410113886, + "blocks.6.ln.weight": 8.869002619258092, + "blocks.6.w1.weight": 341.5561743855726, + "blocks.6.w1.bias": 310.4841079276583, + "blocks.6.w2.weight": 307.6831168444481, + "blocks.7.ln.weight": 10.348632836289921, + "blocks.7.w1.weight": 389.83507647770097, + "blocks.7.w1.bias": 362.80530475665813, + "blocks.7.w2.weight": 372.02137428373555, + "out_ln.weight": 0.6700943575231241, + "out_head.weight": 9.265882212393693, + "out_head.bias": 0.5155313240128788 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.0319221758270265, + 1.9485809626770019, + 1.9236068560409545, + 1.9108332887268067, + 1.8946271410751343, + 1.8748885510635376, + 1.8679874765777589, + 1.8528113153839112, + 1.854252926864624, + 1.8431586810302734, + 1.835074407119751, + 1.830763704071045, + 1.8243982820892335, + 1.8189660712051392, + 1.8128780780792237, + 1.8087021814346314, + 1.804141354446411, + 1.8017899124526977, + 1.7928460983657837, + 1.7929035697174072, + 1.7876431453704833, + 1.7875543393325806, + 1.7795426641845704, + 1.775789865951538, + 1.7698374728012085, + 1.768277823562622, + 1.7634769374847412, + 1.7641023599624635, + 1.757076796989441, + 1.7488349988174439, + 1.7501668548965454, + 1.7444925582122803, + 1.7474665740585327, + 1.7382987225723268, + 1.734984835205078, + 1.7357169647979735, + 1.7307214682388306, + 1.7250416897201537, + 1.7257672270965576, + 1.7227689398574828, + 1.7207815472030639, + 1.7202168838882446, + 1.7142020806503295, + 1.714960380783081, + 1.71727346408844, + 1.712963445739746, + 1.7071009867095948, + 1.7091050637435914, + 1.7093897729492187, + 1.7002552864837646, + 1.7015939654922485, + 1.7008016619873048, + 1.6995257330703735, + 1.697141215133667, + 1.6948078282928467, + 1.6939659241485596, + 1.69270690864563, + 1.6911025055313111, + 1.6915674340820313, + 1.6882078439712525, + 1.6849368871307373, + 1.6877099935913087, + 1.6871322372817994, + 1.683017247543335, + 1.6802406833648682, + 1.6818945336532594, + 1.6764708988189698, + 1.6813461883163452, + 1.679410824356079, + 1.6801742096328736, + 1.6759367601776123, + 1.6751640189361572, + 1.6755147110366821, + 1.6753325751113892, + 1.669490467529297, + 1.6727212616348266, + 1.6710585062026977, + 1.6708744039535524, + 1.6716560121536255, + 1.6683519116210936, + 1.669690319480896, + 1.6661786761474608, + 1.6632559734725951, + 1.6698375412750244, + 1.6648968856430053, + 1.6646921353912354, + 1.6623535321044922, + 1.6634717670440673, + 1.662731397628784, + 1.6652179148101807, + 1.664047002029419, + 1.6637420905303955, + 1.6616315328979492, + 1.6613892125320435, + 1.6618764827728272, + 1.6608539768218995, + 1.662214938583374, + 1.6588589643859863, + 1.6624627486419679, + 1.6634692792129517 + ], + "train_acc": [ + 0.25286, + 0.28656, + 0.29892, + 0.30622, + 0.31524, + 0.32466, + 0.33028, + 0.33336, + 0.33322, + 0.33718, + 0.34028, + 0.34314, + 0.3488, + 0.34888, + 0.3513, + 0.35072, + 0.35386, + 0.35666, + 0.35802, + 0.35736, + 0.36152, + 0.36052, + 0.36226, + 0.36666, + 0.3689, + 0.3657, + 0.36826, + 0.36686, + 0.36968, + 0.37314, + 0.37168, + 0.37526, + 0.3725, + 0.37586, + 0.37854, + 0.38008, + 0.38192, + 0.38086, + 0.38108, + 0.38154, + 0.38472, + 0.38546, + 0.38504, + 0.3864, + 0.3857, + 0.3858, + 0.38726, + 0.38836, + 0.38902, + 0.39314, + 0.39324, + 0.39162, + 0.39194, + 0.3928, + 0.39618, + 0.39266, + 0.39604, + 0.39298, + 0.39408, + 0.39738, + 0.3988, + 0.39496, + 0.39628, + 0.39638, + 0.3968, + 0.40014, + 0.40092, + 0.3965, + 0.39798, + 0.39902, + 0.40112, + 0.40126, + 0.39922, + 0.40196, + 0.40252, + 0.40146, + 0.40286, + 0.4025, + 0.40382, + 0.40394, + 0.40502, + 0.40518, + 0.40502, + 0.4029, + 0.40602, + 0.40654, + 0.40788, + 0.40838, + 0.40608, + 0.40626, + 0.40682, + 0.4054, + 0.40548, + 0.40818, + 0.4063, + 0.40556, + 0.40728, + 0.40878, + 0.41036, + 0.40504 + ], + "test_acc": [ + 0.3107, + 0.3197, + 0.3396, + 0.3501, + 0.3445, + 0.3533, + 0.3594, + 0.3665, + 0.3638, + 0.3697, + 0.3705, + 0.3731, + 0.3731, + 0.3779, + 0.3879, + 0.3797, + 0.3855, + 0.3895, + 0.389, + 0.3836, + 0.3898, + 0.3899, + 0.3957, + 0.3975, + 0.3903, + 0.3904, + 0.3925, + 0.4026, + 0.3948, + 0.404, + 0.407, + 0.4052, + 0.4016, + 0.4038, + 0.4077, + 0.4055, + 0.4139, + 0.4125, + 0.4071, + 0.41, + 0.4157, + 0.4116, + 0.4057, + 0.4093, + 0.4107, + 0.4121, + 0.4178, + 0.4151, + 0.4102, + 0.415, + 0.4171, + 0.4152, + 0.4107, + 0.4169, + 0.4165, + 0.4128, + 0.4211, + 0.4239, + 0.419, + 0.4169, + 0.4175, + 0.4204, + 0.4172, + 0.42, + 0.4223, + 0.4193, + 0.4249, + 0.4239, + 0.4247, + 0.4237, + 0.4241, + 0.4249, + 0.4246, + 0.4236, + 0.4222, + 0.4256, + 0.4236, + 0.4254, + 0.4218, + 0.4219, + 0.4253, + 0.4236, + 0.4242, + 0.4266, + 0.4265, + 0.4244, + 0.4236, + 0.4244, + 0.4257, + 0.426, + 0.4278, + 0.4252, + 0.4256, + 0.4257, + 0.425, + 0.4251, + 0.4257, + 0.4253, + 0.4252, + 0.4251 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.011332567781209946, + 0.06513819098472595, + 0.0006735082715749741, + 0.01789894700050354, + -0.04844595864415169, + -0.0789085328578949, + -0.08352568745613098, + 0.9912235736846924 + ], + "perturbation_rho": [ + -0.019055387005209923, + 0.000641997903585434, + 0.007458926644176245, + -0.0017425119876861572, + 0.01768876612186432, + -0.035874560475349426, + -0.009836452081799507, + -0.0037038950249552727 + ], + "nudging": { + "0.001": [ + 3.5937409847974777e-07, + -8.093193173408508e-07, + 5.8673322200775146e-08, + -9.988434612751007e-08, + 1.5122350305318832e-07, + 2.3562461137771606e-07, + 1.7601996660232544e-07, + -2.587912604212761e-06 + ], + "0.003": [ + 4.564644768834114e-07, + -2.398388460278511e-06, + -8.882489055395126e-08, + -2.555316314101219e-07, + 4.91039827466011e-07, + 6.976770237088203e-07, + 7.48317688703537e-07, + -8.602859452366829e-06 + ], + "0.01": [ + 2.159271389245987e-06, + -7.903669029474258e-06, + -2.2118911147117615e-09, + -8.208444342017174e-07, + 1.3509998098015785e-06, + 2.2408785298466682e-06, + 2.4311011657118797e-06, + -2.9218033887445927e-05 + ] + }, + "hidden_norms_per_layer": [ + 4438.09326171875, + 51045.953125, + 224884.15625, + 281807.5625, + 313976.5, + 377254.8125, + 619965.6875, + 803209.875, + 395948.40625 + ], + "bp_grad_norms_per_layer": [ + 5.1603383326437324e-05, + 4.058777449245099e-06, + 1.2677732001975528e-06, + 1.2549784287330112e-06, + 1.1549120699783089e-06, + 1.1174195151397726e-06, + 1.1277491012151586e-06, + 1.1274379403403145e-06, + 1.1047595762647688e-06 + ] + }, + "drift": { + "embed.weight": 35.94724430826174, + "embed.bias": 12.93641303808505, + "blocks.0.ln.weight": 1.0316107269800443, + "blocks.0.w1.weight": 14.343462705354373, + "blocks.0.w1.bias": 10.48509752557022, + "blocks.0.w2.weight": 50.82449203404063, + "blocks.1.ln.weight": 0.8954057658407036, + "blocks.1.w1.weight": 17.084567030271085, + "blocks.1.w1.bias": 8.321442582132649, + "blocks.1.w2.weight": 43.15792770243562, + "blocks.2.ln.weight": 0.50963325244791, + "blocks.2.w1.weight": 13.992497779476563, + "blocks.2.w1.bias": 8.388377688031984, + "blocks.2.w2.weight": 36.31652372933758, + "blocks.3.ln.weight": 0.5126350353439263, + "blocks.3.w1.weight": 13.376631353463463, + "blocks.3.w1.bias": 7.3308712021855715, + "blocks.3.w2.weight": 38.8692971792842, + "blocks.4.ln.weight": 0.44132703400905127, + "blocks.4.w1.weight": 13.832310366505041, + "blocks.4.w1.bias": 8.752010456823355, + "blocks.4.w2.weight": 29.67447860418571, + "blocks.5.ln.weight": 0.44899228147891274, + "blocks.5.w1.weight": 15.279703613622448, + "blocks.5.w1.bias": 14.678313736857362, + "blocks.5.w2.weight": 23.989192930697847, + "blocks.6.ln.weight": 0.48252983596530574, + "blocks.6.w1.weight": 15.592232386492284, + "blocks.6.w1.bias": 15.403926884582539, + "blocks.6.w2.weight": 26.0662939832218, + "blocks.7.ln.weight": 0.5873549927115453, + "blocks.7.w1.weight": 16.643576121850806, + "blocks.7.w1.bias": 15.88707616632278, + "blocks.7.w2.weight": 36.93738990528072, + "out_ln.weight": 0.2817508020334675, + "out_head.weight": 5.01870628445436, + "out_head.bias": 1.5909890644945488 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 8, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 3 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L8_seed3", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file |
