diff options
Diffstat (limited to 'results/fa_dfa_d512_L6_seed8/results_cifar10.json')
| -rw-r--r-- | results/fa_dfa_d512_L6_seed8/results_cifar10.json | 837 |
1 files changed, 837 insertions, 0 deletions
diff --git a/results/fa_dfa_d512_L6_seed8/results_cifar10.json b/results/fa_dfa_d512_L6_seed8/results_cifar10.json new file mode 100644 index 0000000..120e901 --- /dev/null +++ b/results/fa_dfa_d512_L6_seed8/results_cifar10.json @@ -0,0 +1,837 @@ +{ + "8": { + "dfa": { + "log": { + "train_loss": [ + 2.080832463722229, + 2.048995506820679, + 2.0368573234558105, + 2.0319985338974, + 2.0285650724029543, + 2.0264119197082517, + 2.0246589180755614, + 2.0194223052597047, + 2.023226359863281, + 2.0190831980895996, + 2.0185574532699584, + 2.0163141152572632, + 2.0120892976379396, + 2.0131693548965455, + 2.0107243951416014, + 2.0126246429443357, + 2.0099691207504273, + 2.007364001235962, + 2.009500975265503, + 2.005168029327393, + 2.0078421032333376, + 2.005056644668579, + 2.003016718902588, + 2.002305891647339, + 2.004946018447876, + 2.0044735367202757, + 2.0007521640014647, + 2.0023752281188965, + 2.002063782119751, + 1.9980181777954102, + 1.999666137161255, + 2.0014348561096194, + 1.9975938603973389, + 1.997772049255371, + 1.9968431232452393, + 1.9957593490600587, + 1.9965335370254516, + 1.9956621952819824, + 1.9998830209732055, + 1.9981224115371705, + 1.9981820908355712, + 1.9963909757995606, + 1.9932935749816894, + 1.9949091632843017, + 1.9948957403564453, + 1.9962669714736938, + 1.9956311498641968, + 1.993200020980835, + 1.9959270191192626, + 1.9959917138671874, + 1.99261492729187, + 1.9928764194107056, + 1.9886184701538085, + 1.9945403707504272, + 1.9950171528625489, + 1.9918433042144776, + 1.9921051956939697, + 1.9939103940582275, + 1.9926260836791991, + 1.9888539260864257, + 1.9864782820129394, + 1.9928740283203126, + 1.9936177950286864, + 1.9908017050933837, + 1.9916553665924073, + 1.991268727722168, + 1.9904707485198974, + 1.990502048187256, + 1.9922669997406006, + 1.990579651412964, + 1.989777621154785, + 1.9894770226669312, + 1.9896350140762329, + 1.9901242208480836, + 1.9889081002426148, + 1.9884048733520507, + 1.991174902420044, + 1.9889308002090453, + 1.9896702439880372, + 1.9888165955352783, + 1.9881730316925048, + 1.9870906281280518, + 1.9875952992248536, + 1.987178568344116, + 1.98706350440979, + 1.987743454208374, + 1.9882577419281007, + 1.98791315574646, + 1.9878161795806886, + 1.9868540209197998, + 1.9870264992523194, + 1.9860009889984132, + 1.9875505159759521, + 1.9869904022979736, + 1.9855010274887086, + 1.9865515293884277, + 1.9860705053710936, + 1.9874219760131835, + 1.9881664807128907, + 1.9873311661148072 + ], + "train_acc": [ + 0.22856, + 0.2384, + 0.24456, + 0.24494, + 0.24732, + 0.25098, + 0.24904, + 0.2523, + 0.25028, + 0.2514, + 0.25612, + 0.25722, + 0.25858, + 0.25916, + 0.26012, + 0.26044, + 0.26232, + 0.26452, + 0.26066, + 0.26248, + 0.2637, + 0.26254, + 0.26474, + 0.26712, + 0.2677, + 0.26322, + 0.26788, + 0.26452, + 0.26682, + 0.26908, + 0.2686, + 0.26734, + 0.27066, + 0.26914, + 0.26942, + 0.27018, + 0.26882, + 0.2706, + 0.26786, + 0.26876, + 0.27074, + 0.27, + 0.27148, + 0.27242, + 0.27346, + 0.27144, + 0.27246, + 0.27478, + 0.2713, + 0.27238, + 0.2732, + 0.27598, + 0.27788, + 0.27462, + 0.27216, + 0.27474, + 0.27372, + 0.27488, + 0.2751, + 0.27356, + 0.27528, + 0.2739, + 0.2744, + 0.27592, + 0.27666, + 0.27542, + 0.2749, + 0.27564, + 0.27486, + 0.27662, + 0.27766, + 0.27678, + 0.27682, + 0.27618, + 0.27544, + 0.2767, + 0.27538, + 0.27816, + 0.27422, + 0.27562, + 0.2772, + 0.27786, + 0.27924, + 0.2775, + 0.2789, + 0.27834, + 0.27724, + 0.27724, + 0.27694, + 0.27716, + 0.27862, + 0.27786, + 0.27676, + 0.2771, + 0.278, + 0.27708, + 0.27678, + 0.27732, + 0.27858, + 0.27724 + ], + "test_acc": [ + 0.25, + 0.2553, + 0.2484, + 0.2586, + 0.2481, + 0.2619, + 0.2688, + 0.2577, + 0.2807, + 0.2806, + 0.2671, + 0.2735, + 0.2663, + 0.272, + 0.2824, + 0.2808, + 0.2859, + 0.2837, + 0.282, + 0.2895, + 0.2788, + 0.2787, + 0.275, + 0.2726, + 0.283, + 0.2729, + 0.2866, + 0.2909, + 0.2806, + 0.2892, + 0.2818, + 0.2915, + 0.288, + 0.2928, + 0.2895, + 0.3015, + 0.2906, + 0.2869, + 0.2936, + 0.2929, + 0.2996, + 0.2852, + 0.295, + 0.284, + 0.2772, + 0.302, + 0.278, + 0.2875, + 0.2899, + 0.3026, + 0.2806, + 0.2945, + 0.2936, + 0.298, + 0.2945, + 0.2987, + 0.2926, + 0.2891, + 0.2897, + 0.2928, + 0.2947, + 0.2867, + 0.2913, + 0.2912, + 0.2907, + 0.2955, + 0.2948, + 0.2955, + 0.2934, + 0.2874, + 0.2917, + 0.2904, + 0.2907, + 0.2893, + 0.2937, + 0.2975, + 0.2948, + 0.2938, + 0.2921, + 0.2903, + 0.2921, + 0.2932, + 0.2969, + 0.2915, + 0.294, + 0.2947, + 0.2891, + 0.2975, + 0.2948, + 0.2956, + 0.2919, + 0.2948, + 0.2936, + 0.2931, + 0.293, + 0.2934, + 0.2934, + 0.294, + 0.2937, + 0.2937 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.37266281247138977, + -0.00045336512266658247, + -0.00016533058078493923, + -0.00022915060981176794, + -9.622798825148493e-05, + -0.0005005986895412207 + ], + "perturbation_rho": [ + -0.022135162726044655, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -4.153698682785034e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.184176653623581e-06, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -4.217028617858887e-06, + 9.313225746154785e-10, + 0.0, + 0.0, + -4.6566128730773926e-09, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 50847.30078125, + 848742848.0, + 4660130304.0, + 4956724224.0, + 5474357248.0, + 5779449344.0, + 6897274368.0 + ], + "bp_grad_norms_per_layer": [ + 3.144729134874069e-07, + 2.3805521442987754e-10, + 2.377963659316862e-10, + 2.3782173452779887e-10, + 2.382432029435222e-10, + 2.381181640753738e-10, + 2.3811083660341126e-10 + ] + }, + "drift": { + "embed.weight": 319.35155723550326, + "embed.bias": 248.60053068216845, + "blocks.0.ln.weight": 9.427908278003652, + "blocks.0.w1.weight": 267.21029834357404, + "blocks.0.w1.bias": 240.44244412845688, + "blocks.0.w2.weight": 467.22266822391686, + "blocks.1.ln.weight": 9.176869505684051, + "blocks.1.w1.weight": 396.8672687385826, + "blocks.1.w1.bias": 384.3213865917358, + "blocks.1.w2.weight": 392.8080177067534, + "blocks.2.ln.weight": 6.748169794145516, + "blocks.2.w1.weight": 260.5475963025436, + "blocks.2.w1.bias": 233.50388419740455, + "blocks.2.w2.weight": 243.9744268156159, + "blocks.3.ln.weight": 8.622995843562022, + "blocks.3.w1.weight": 331.24292230819924, + "blocks.3.w1.bias": 315.5877913098073, + "blocks.3.w2.weight": 298.8313159517988, + "blocks.4.ln.weight": 8.025346777953455, + "blocks.4.w1.weight": 289.7124732778298, + "blocks.4.w1.bias": 265.5485861970709, + "blocks.4.w2.weight": 262.14080086870655, + "blocks.5.ln.weight": 9.46176669645325, + "blocks.5.w1.weight": 364.3981229248546, + "blocks.5.w1.bias": 349.70853696261963, + "blocks.5.w2.weight": 357.5708723169573, + "out_ln.weight": 0.5682786122526117, + "out_head.weight": 8.418164907479396, + "out_head.bias": 0.662517650973914 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.0414198583984375, + 1.9522941863250733, + 1.912906623916626, + 1.8914309909057616, + 1.8828500344848633, + 1.8722443673706055, + 1.867602448654175, + 1.856888031578064, + 1.8552497367095948, + 1.843837871170044, + 1.8456551111602784, + 1.8392053551864624, + 1.832319408721924, + 1.8315165106582643, + 1.8281906174468994, + 1.8275285326766968, + 1.823442045288086, + 1.8223411087799073, + 1.8216783404922485, + 1.8156820178604125, + 1.8176694821166992, + 1.8109132452392578, + 1.8122728036880493, + 1.8086374733734132, + 1.8135607794189452, + 1.807762541885376, + 1.8036900131225586, + 1.8017769944000244, + 1.8010728991699219, + 1.7973168952178955, + 1.792228649635315, + 1.7974130847549439, + 1.7889769022369384, + 1.78288419921875, + 1.7853075231552125, + 1.7797698288726806, + 1.7783561060333253, + 1.7755371138763427, + 1.7767201525115968, + 1.7773720627593994, + 1.7749587761688232, + 1.7682355377960206, + 1.7642274634552002, + 1.7686031677627563, + 1.763687038230896, + 1.7665823318862914, + 1.7619021569824218, + 1.7553145666885377, + 1.757571950340271, + 1.7612224188995362, + 1.7522613665390014, + 1.7504500290298461, + 1.7475368619537353, + 1.7495313736343383, + 1.7496113208389281, + 1.7413074100494386, + 1.7437634180450439, + 1.7434646717071534, + 1.746056427268982, + 1.740490657081604, + 1.7333746536254884, + 1.7381776495742798, + 1.73842948387146, + 1.7355398804092408, + 1.7360767101287842, + 1.7332445461273194, + 1.732193847579956, + 1.730364631652832, + 1.732538067970276, + 1.7276911224746705, + 1.7270672924804686, + 1.7267360440826416, + 1.725644203414917, + 1.7283870937347412, + 1.72088895652771, + 1.7216768536376954, + 1.723486647377014, + 1.7211247494125366, + 1.7208690827178954, + 1.7219977252578735, + 1.7204493698120118, + 1.7233893868255614, + 1.7198170611190795, + 1.7190866617202758, + 1.7203575790405274, + 1.7177474556350707, + 1.7174966396713256, + 1.7169412566375732, + 1.714385998878479, + 1.7148360108184815, + 1.7158790673446656, + 1.7134223749542237, + 1.714623501663208, + 1.7133040780639648, + 1.715691188583374, + 1.7187325539398193, + 1.7134061930084228, + 1.713903324661255, + 1.7177725345230102, + 1.7178887582397462 + ], + "train_acc": [ + 0.24766, + 0.28764, + 0.30724, + 0.31554, + 0.31774, + 0.32862, + 0.32412, + 0.32998, + 0.33022, + 0.33776, + 0.33938, + 0.33914, + 0.3422, + 0.33992, + 0.34394, + 0.34276, + 0.3413, + 0.34586, + 0.347, + 0.34834, + 0.34908, + 0.35144, + 0.35026, + 0.3513, + 0.34898, + 0.35394, + 0.35424, + 0.35282, + 0.3581, + 0.35672, + 0.35968, + 0.35698, + 0.36002, + 0.36412, + 0.36052, + 0.36534, + 0.36372, + 0.36502, + 0.36444, + 0.36414, + 0.36688, + 0.36802, + 0.37136, + 0.3681, + 0.3721, + 0.36958, + 0.37192, + 0.37408, + 0.37342, + 0.37124, + 0.37412, + 0.37666, + 0.37718, + 0.3741, + 0.37748, + 0.37834, + 0.3806, + 0.37612, + 0.3775, + 0.37774, + 0.38072, + 0.38068, + 0.38082, + 0.37998, + 0.382, + 0.38172, + 0.38358, + 0.3851, + 0.38346, + 0.38376, + 0.38542, + 0.38474, + 0.38684, + 0.38692, + 0.3899, + 0.38944, + 0.38642, + 0.38598, + 0.38606, + 0.38656, + 0.38402, + 0.38854, + 0.38994, + 0.38908, + 0.3902, + 0.38896, + 0.39004, + 0.39054, + 0.3903, + 0.38856, + 0.38994, + 0.38868, + 0.38934, + 0.39004, + 0.39128, + 0.39024, + 0.39048, + 0.3904, + 0.38948, + 0.39032 + ], + "test_acc": [ + 0.2817, + 0.3086, + 0.321, + 0.3426, + 0.3496, + 0.3478, + 0.3546, + 0.3478, + 0.3568, + 0.3627, + 0.3663, + 0.3752, + 0.3646, + 0.3581, + 0.3779, + 0.3748, + 0.3759, + 0.3762, + 0.3858, + 0.3739, + 0.3779, + 0.3667, + 0.3721, + 0.3827, + 0.369, + 0.3761, + 0.3719, + 0.3794, + 0.38, + 0.3844, + 0.3733, + 0.3869, + 0.3885, + 0.3911, + 0.387, + 0.3906, + 0.3904, + 0.395, + 0.3849, + 0.3987, + 0.3901, + 0.4009, + 0.3893, + 0.385, + 0.3918, + 0.3995, + 0.392, + 0.3934, + 0.3954, + 0.4044, + 0.3976, + 0.3997, + 0.3967, + 0.402, + 0.404, + 0.3987, + 0.3997, + 0.3959, + 0.3955, + 0.3962, + 0.4028, + 0.4067, + 0.3984, + 0.4075, + 0.4072, + 0.4011, + 0.4028, + 0.4082, + 0.4069, + 0.4041, + 0.4058, + 0.4022, + 0.4061, + 0.4029, + 0.4059, + 0.4071, + 0.4086, + 0.4069, + 0.4058, + 0.4081, + 0.4083, + 0.4114, + 0.4078, + 0.4083, + 0.4074, + 0.4097, + 0.4064, + 0.4087, + 0.4057, + 0.4068, + 0.4067, + 0.407, + 0.4072, + 0.4089, + 0.4094, + 0.4092, + 0.4088, + 0.4081, + 0.4078, + 0.408 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.016317440196871758, + 0.08700302243232727, + -0.07087832689285278, + -0.04807429015636444, + -0.011868046596646309, + 0.9980045557022095 + ], + "perturbation_rho": [ + 0.021565234288573265, + 0.02034841850399971, + 0.026769006624817848, + -0.05038269981741905, + 0.007762039080262184, + 0.04057261347770691 + ], + "nudging": { + "0.001": [ + -1.8766731955111027e-06, + -6.295740604400635e-07, + 1.050066202878952e-07, + 5.390029400587082e-08, + -8.824281394481659e-08, + -1.8120626918971539e-06 + ], + "0.003": [ + -6.020389264449477e-06, + -1.8251012079417706e-06, + 4.4528860598802567e-07, + 2.2060703486204147e-07, + 2.1478626877069473e-08, + -6.290327291935682e-06 + ], + "0.01": [ + -2.0035397028550506e-05, + -6.116693839430809e-06, + 1.5703844837844372e-06, + 9.065261110663414e-07, + 1.710723154246807e-07, + -2.1861022105440497e-05 + ] + }, + "hidden_norms_per_layer": [ + 7090.23486328125, + 97328.1015625, + 1195400.0, + 1509493.75, + 1603199.125, + 1626782.875, + 733314.5 + ], + "bp_grad_norms_per_layer": [ + 2.7028392651118338e-05, + 1.845057795435423e-06, + 8.682639531798486e-07, + 8.779788345236739e-07, + 8.714667956155608e-07, + 8.789162393441075e-07, + 8.642545594739204e-07 + ] + }, + "drift": { + "embed.weight": 49.97107099246965, + "embed.bias": 16.073336112282973, + "blocks.0.ln.weight": 1.1864794930711844, + "blocks.0.w1.weight": 16.772962175548443, + "blocks.0.w1.bias": 11.717179109423967, + "blocks.0.w2.weight": 54.31408520827723, + "blocks.1.ln.weight": 1.1429608481901679, + "blocks.1.w1.weight": 25.1727265395903, + "blocks.1.w1.bias": 23.582538699977512, + "blocks.1.w2.weight": 44.89348831927897, + "blocks.2.ln.weight": 0.6307651937494874, + "blocks.2.w1.weight": 20.089313457148293, + "blocks.2.w1.bias": 20.43961041250799, + "blocks.2.w2.weight": 36.04199442331163, + "blocks.3.ln.weight": 0.5625176858716091, + "blocks.3.w1.weight": 17.29253500275298, + "blocks.3.w1.bias": 18.353681256446446, + "blocks.3.w2.weight": 45.166175379509205, + "blocks.4.ln.weight": 0.5780873641807247, + "blocks.4.w1.weight": 15.563905559896059, + "blocks.4.w1.bias": 13.46488029874075, + "blocks.4.w2.weight": 59.00645861860142, + "blocks.5.ln.weight": 0.7521925423173332, + "blocks.5.w1.weight": 19.685866374226443, + "blocks.5.w1.bias": 20.12611595031584, + "blocks.5.w2.weight": 41.84701231716978, + "out_ln.weight": 0.3374467885491731, + "out_head.weight": 5.972151190658582, + "out_head.bias": 0.6096349208966545 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 6, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 8 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L6_seed8", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file |
