diff options
Diffstat (limited to 'results/dfa_canonical_lam1e-2_30ep/results_cifar10.json')
| -rw-r--r-- | results/dfa_canonical_lam1e-2_30ep/results_cifar10.json | 549 |
1 files changed, 549 insertions, 0 deletions
diff --git a/results/dfa_canonical_lam1e-2_30ep/results_cifar10.json b/results/dfa_canonical_lam1e-2_30ep/results_cifar10.json new file mode 100644 index 0000000..01a7a9f --- /dev/null +++ b/results/dfa_canonical_lam1e-2_30ep/results_cifar10.json @@ -0,0 +1,549 @@ +{ + "42": { + "dfa": { + "log": { + "train_loss": [ + 1.9962164908218383, + 1.9369539144134522, + 1.9308127733612062, + 1.9288829196548463, + 1.924676773147583, + 1.918132286529541, + 1.918223798522949, + 1.9147104105377197, + 1.9164991827011109, + 1.9150708497619628, + 1.9118981461334228, + 1.9094872266387939, + 1.905809390487671, + 1.9049867826080322, + 1.90767120262146, + 1.9046393532562256, + 1.9038504275894166, + 1.9006466190338134, + 1.8996596237182617, + 1.8986669551086426, + 1.8997121560668946, + 1.8965645993423461, + 1.8971398455047608, + 1.8969778009414673, + 1.89667788482666, + 1.8934650146484375, + 1.8900965643310548, + 1.8924740059661864, + 1.892325519180298, + 1.8923273934555054 + ], + "train_acc": [ + 0.2802, + 0.30808, + 0.31262, + 0.3156, + 0.31668, + 0.31968, + 0.3211, + 0.32238, + 0.3231, + 0.32382, + 0.32378, + 0.32668, + 0.32826, + 0.32862, + 0.32902, + 0.32944, + 0.32948, + 0.33388, + 0.33008, + 0.33162, + 0.33228, + 0.33294, + 0.33364, + 0.33536, + 0.33534, + 0.3341, + 0.33584, + 0.33624, + 0.33534, + 0.34052 + ], + "test_acc": [ + 0.3313, + 0.3463, + 0.3422, + 0.3411, + 0.3597, + 0.3549, + 0.3499, + 0.3438, + 0.3423, + 0.3476, + 0.3497, + 0.3524, + 0.3595, + 0.3521, + 0.354, + 0.3537, + 0.3588, + 0.3613, + 0.356, + 0.359, + 0.3599, + 0.3572, + 0.3615, + 0.3592, + 0.3592, + 0.3609, + 0.3578, + 0.36, + 0.3611, + 0.3607 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.33146190643310547, + 0.16347576677799225, + 0.1610197126865387, + 0.17380905151367188 + ], + "perturbation_rho": [ + 0.12955714762210846, + 0.045910485088825226, + 0.08855772018432617, + 0.06986565887928009 + ], + "nudging": { + "0.001": [ + -1.3143871910870075e-05, + -5.465932190418243e-06, + -5.55114820599556e-06, + -5.612848326563835e-06 + ], + "0.003": [ + -3.937864676117897e-05, + -1.6393139958381653e-05, + -1.6577658243477345e-05, + -1.6813864931464195e-05 + ], + "0.01": [ + -0.00013115769252181053, + -5.455967038869858e-05, + -5.524198058992624e-05, + -5.596294067800045e-05 + ] + }, + "hidden_norms_per_layer": [ + 12120.9111328125, + 12188.2958984375, + 12193.77734375, + 12198.8359375, + 12219.8369140625 + ], + "bp_grad_norms_per_layer": [ + 1.3196319741837215e-05, + 1.3040654266660567e-05, + 1.2841821444453672e-05, + 1.2863742995250504e-05, + 1.2522319593699649e-05 + ] + }, + "drift": { + "embed.weight": 92.65036012342134, + "embed.bias": 95.50366326873346, + "blocks.0.ln.weight": 0.34198617935180664, + "blocks.0.w1.weight": 3.6285842919394478, + "blocks.0.w1.bias": 6.327346232452673, + "blocks.0.w2.weight": 18.245981750960237, + "blocks.1.ln.weight": 0.3590666353702545, + "blocks.1.w1.weight": 3.60860615088841, + "blocks.1.w1.bias": 6.19502489141982, + "blocks.1.w2.weight": 17.96136875105609, + "blocks.2.ln.weight": 0.36136317253112793, + "blocks.2.w1.weight": 3.702767807151941, + "blocks.2.w1.bias": 6.508610347801663, + "blocks.2.w2.weight": 19.259072912976688, + "blocks.3.ln.weight": 0.35671475529670715, + "blocks.3.w1.weight": 3.7164720093334025, + "blocks.3.w1.bias": 6.203418611522943, + "blocks.3.w2.weight": 19.77945180777123, + "out_ln.weight": 0.16879618167877197, + "out_head.weight": 2.418863784769041, + "out_head.bias": 1.1721698518470152 + } + } + }, + "123": { + "dfa": { + "log": { + "train_loss": [ + 1.9917970180892943, + 1.9455738663482667, + 1.9412584506225585, + 1.9381972328948975, + 1.935788204345703, + 1.9355586415863038, + 1.9291299639892578, + 1.9304945249938965, + 1.9279169077301026, + 1.9247258889389038, + 1.9270652098083496, + 1.9239173442077637, + 1.920924050216675, + 1.9179519243621825, + 1.9187614895629883, + 1.9149749173736572, + 1.9101889783477783, + 1.9113776821517945, + 1.9123343227767944, + 1.9115107091522217, + 1.9105930194091796, + 1.9092110122680663, + 1.9085102457046508, + 1.9054700579071044, + 1.906116495628357, + 1.9055400652313232, + 1.9055342751312256, + 1.9067684815216064, + 1.9050640679168702, + 1.9041575216674804 + ], + "train_acc": [ + 0.2828, + 0.3048, + 0.30986, + 0.31168, + 0.31358, + 0.31522, + 0.31574, + 0.317, + 0.31914, + 0.31836, + 0.31874, + 0.31798, + 0.32148, + 0.32276, + 0.32502, + 0.3259, + 0.32756, + 0.32626, + 0.32706, + 0.32784, + 0.329, + 0.32942, + 0.32888, + 0.33042, + 0.33034, + 0.33086, + 0.33476, + 0.33214, + 0.33166, + 0.33136 + ], + "test_acc": [ + 0.3309, + 0.3491, + 0.3382, + 0.3519, + 0.3375, + 0.3487, + 0.3436, + 0.3431, + 0.3632, + 0.341, + 0.3632, + 0.3623, + 0.3531, + 0.3536, + 0.3476, + 0.3485, + 0.3662, + 0.3539, + 0.3453, + 0.36, + 0.3532, + 0.3566, + 0.3562, + 0.3639, + 0.3622, + 0.3623, + 0.3579, + 0.3541, + 0.3576, + 0.3582 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.3322567343711853, + 0.10321325808763504, + 0.15636520087718964, + 0.19004486501216888 + ], + "perturbation_rho": [ + 0.18363387882709503, + 0.04898637533187866, + 0.09784461557865143, + 0.09405896812677383 + ], + "nudging": { + "0.001": [ + -1.0297924745827913e-05, + -3.0525843612849712e-06, + -4.650210030376911e-06, + -5.671870894730091e-06 + ], + "0.003": [ + -3.089715028181672e-05, + -9.163166396319866e-06, + -1.3922981452196836e-05, + -1.7057405784726143e-05 + ], + "0.01": [ + -0.00010294892126694322, + -3.057112917304039e-05, + -4.6447094064205885e-05, + -5.68098621442914e-05 + ] + }, + "hidden_norms_per_layer": [ + 13127.3251953125, + 13128.8193359375, + 13141.9501953125, + 13135.3408203125, + 13141.1806640625 + ], + "bp_grad_norms_per_layer": [ + 1.0922197361651342e-05, + 1.074585088645108e-05, + 1.0719732017605565e-05, + 1.0678052603907418e-05, + 1.0736169315350708e-05 + ] + }, + "drift": { + "embed.weight": 95.06253847427116, + "embed.bias": 69.56343057263858, + "blocks.0.ln.weight": 0.35518038272857666, + "blocks.0.w1.weight": 3.5318213424051828, + "blocks.0.w1.bias": 6.6251757142063346, + "blocks.0.w2.weight": 18.1649292755703, + "blocks.1.ln.weight": 0.37566548585891724, + "blocks.1.w1.weight": 3.577683688515039, + "blocks.1.w1.bias": 6.4976786510039295, + "blocks.1.w2.weight": 18.132601820579644, + "blocks.2.ln.weight": 0.39002206921577454, + "blocks.2.w1.weight": 3.7319215256746268, + "blocks.2.w1.bias": 6.266003010226504, + "blocks.2.w2.weight": 18.870330386369503, + "blocks.3.ln.weight": 0.35860922932624817, + "blocks.3.w1.weight": 3.790655898639472, + "blocks.3.w1.bias": 5.892665738859735, + "blocks.3.w2.weight": 20.602751226104075, + "out_ln.weight": 0.15972787141799927, + "out_head.weight": 2.3651003853179144, + "out_head.bias": 1.3627504013501806 + } + } + }, + "456": { + "dfa": { + "log": { + "train_loss": [ + 1.9882178707504272, + 1.9317677951049805, + 1.9248256610870362, + 1.9217043152618407, + 1.917682172279358, + 1.919907767944336, + 1.9130486701202392, + 1.9160204236221314, + 1.9124437686920166, + 1.910402643661499, + 1.9057467068099976, + 1.9108504774475097, + 1.9094166152572631, + 1.9078613684844972, + 1.9042421661376954, + 1.9058136753082275, + 1.9045625204467773, + 1.9027692486953736, + 1.901805549621582, + 1.9018799402236939, + 1.8972413793945313, + 1.8987792961883545, + 1.897886729812622, + 1.894330054550171, + 1.8959275032806397, + 1.8961896368408202, + 1.8971351692962646, + 1.8951918316650391, + 1.89493817653656, + 1.8952873357391358 + ], + "train_acc": [ + 0.2782, + 0.30628, + 0.31294, + 0.31586, + 0.31784, + 0.31944, + 0.32352, + 0.32056, + 0.32366, + 0.32092, + 0.32656, + 0.3254, + 0.3272, + 0.3234, + 0.32672, + 0.32684, + 0.33252, + 0.3313, + 0.32936, + 0.33102, + 0.3319, + 0.33272, + 0.33364, + 0.3357, + 0.334, + 0.3336, + 0.33476, + 0.33474, + 0.3348, + 0.33498 + ], + "test_acc": [ + 0.3368, + 0.3538, + 0.3441, + 0.3634, + 0.343, + 0.3574, + 0.341, + 0.3403, + 0.3594, + 0.3606, + 0.3628, + 0.3674, + 0.3684, + 0.3558, + 0.3637, + 0.3632, + 0.3649, + 0.3565, + 0.3631, + 0.3629, + 0.3546, + 0.3629, + 0.3594, + 0.3629, + 0.3619, + 0.3609, + 0.3623, + 0.3609, + 0.3607, + 0.3614 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.35106080770492554, + 0.12957611680030823, + 0.12942053377628326, + 0.15940426290035248 + ], + "perturbation_rho": [ + 0.17636069655418396, + 0.08117785304784775, + 0.0472814217209816, + 0.11043912172317505 + ], + "nudging": { + "0.001": [ + -1.4475401258096099e-05, + -4.396220901980996e-06, + -4.46141348220408e-06, + -5.974114174023271e-06 + ], + "0.003": [ + -4.3450010707601905e-05, + -1.3241806300356984e-05, + -1.3367069186642766e-05, + -1.7999671399593353e-05 + ], + "0.01": [ + -0.00014482333790510893, + -4.394981078803539e-05, + -4.4448592234402895e-05, + -5.99866034463048e-05 + ] + }, + "hidden_norms_per_layer": [ + 12229.544921875, + 12266.8916015625, + 12257.5791015625, + 12255.2705078125, + 12254.3017578125 + ], + "bp_grad_norms_per_layer": [ + 1.334045191470068e-05, + 1.2721701750706416e-05, + 1.251421963388566e-05, + 1.2780437828041613e-05, + 1.2897891792817973e-05 + ] + }, + "drift": { + "embed.weight": 96.08641084786761, + "embed.bias": 127.15202633956196, + "blocks.0.ln.weight": 0.3495213985443115, + "blocks.0.w1.weight": 3.6819779928624996, + "blocks.0.w1.bias": 6.228241220813173, + "blocks.0.w2.weight": 17.84445507605505, + "blocks.1.ln.weight": 0.34699368476867676, + "blocks.1.w1.weight": 3.6269651247865355, + "blocks.1.w1.bias": 6.23888543559435, + "blocks.1.w2.weight": 17.036835876989706, + "blocks.2.ln.weight": 0.3590497076511383, + "blocks.2.w1.weight": 3.638377800312274, + "blocks.2.w1.bias": 6.075326782548754, + "blocks.2.w2.weight": 17.82232398341993, + "blocks.3.ln.weight": 0.35772791504859924, + "blocks.3.w1.weight": 3.76772753341028, + "blocks.3.w1.bias": 6.390494916053956, + "blocks.3.w2.weight": 18.92297820750716, + "out_ln.weight": 0.18465575575828552, + "out_head.weight": 2.515055641320841, + "out_head.bias": 1.320315434071626 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 256, + "num_blocks": 4, + "batch_size": 128, + "epochs": 30, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 42, + 123, + 456 + ], + "gpu": 0, + "output_dir": "results/dfa_canonical_lam1e-2_30ep", + "methods": [ + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.01, + "num_classes": 10 + } +}
\ No newline at end of file |
