{ "42": { "dfa": { "log": { "train_loss": [ 1.9962164908218383, 1.9369539144134522, 1.9308127733612062, 1.9288829196548463, 1.924676773147583, 1.918132286529541, 1.918223798522949, 1.9147104105377197, 1.9164991827011109, 1.9150708497619628, 1.9118981461334228, 1.9094872266387939, 1.905809390487671, 1.9049867826080322, 1.90767120262146, 1.9046393532562256, 1.9038504275894166, 1.9006466190338134, 1.8996596237182617, 1.8986669551086426, 1.8997121560668946, 1.8965645993423461, 1.8971398455047608, 1.8969778009414673, 1.89667788482666, 1.8934650146484375, 1.8900965643310548, 1.8924740059661864, 1.892325519180298, 1.8923273934555054 ], "train_acc": [ 0.2802, 0.30808, 0.31262, 0.3156, 0.31668, 0.31968, 0.3211, 0.32238, 0.3231, 0.32382, 0.32378, 0.32668, 0.32826, 0.32862, 0.32902, 0.32944, 0.32948, 0.33388, 0.33008, 0.33162, 0.33228, 0.33294, 0.33364, 0.33536, 0.33534, 0.3341, 0.33584, 0.33624, 0.33534, 0.34052 ], "test_acc": [ 0.3313, 0.3463, 0.3422, 0.3411, 0.3597, 0.3549, 0.3499, 0.3438, 0.3423, 0.3476, 0.3497, 0.3524, 0.3595, 0.3521, 0.354, 0.3537, 0.3588, 0.3613, 0.356, 0.359, 0.3599, 0.3572, 0.3615, 0.3592, 0.3592, 0.3609, 0.3578, 0.36, 0.3611, 0.3607 ] }, "diagnostics": { "bp_cosine": [ 0.33146190643310547, 0.16347576677799225, 0.1610197126865387, 0.17380905151367188 ], "perturbation_rho": [ 0.12955714762210846, 0.045910485088825226, 0.08855772018432617, 0.06986565887928009 ], "nudging": { "0.001": [ -1.3143871910870075e-05, -5.465932190418243e-06, -5.55114820599556e-06, -5.612848326563835e-06 ], "0.003": [ -3.937864676117897e-05, -1.6393139958381653e-05, -1.6577658243477345e-05, -1.6813864931464195e-05 ], "0.01": [ -0.00013115769252181053, -5.455967038869858e-05, -5.524198058992624e-05, -5.596294067800045e-05 ] }, "hidden_norms_per_layer": [ 12120.9111328125, 12188.2958984375, 12193.77734375, 12198.8359375, 12219.8369140625 ], "bp_grad_norms_per_layer": [ 1.3196319741837215e-05, 1.3040654266660567e-05, 1.2841821444453672e-05, 1.2863742995250504e-05, 1.2522319593699649e-05 ] }, "drift": { "embed.weight": 92.65036012342134, "embed.bias": 95.50366326873346, "blocks.0.ln.weight": 0.34198617935180664, "blocks.0.w1.weight": 3.6285842919394478, "blocks.0.w1.bias": 6.327346232452673, "blocks.0.w2.weight": 18.245981750960237, "blocks.1.ln.weight": 0.3590666353702545, "blocks.1.w1.weight": 3.60860615088841, "blocks.1.w1.bias": 6.19502489141982, "blocks.1.w2.weight": 17.96136875105609, "blocks.2.ln.weight": 0.36136317253112793, "blocks.2.w1.weight": 3.702767807151941, "blocks.2.w1.bias": 6.508610347801663, "blocks.2.w2.weight": 19.259072912976688, "blocks.3.ln.weight": 0.35671475529670715, "blocks.3.w1.weight": 3.7164720093334025, "blocks.3.w1.bias": 6.203418611522943, "blocks.3.w2.weight": 19.77945180777123, "out_ln.weight": 0.16879618167877197, "out_head.weight": 2.418863784769041, "out_head.bias": 1.1721698518470152 } } }, "123": { "dfa": { "log": { "train_loss": [ 1.9917970180892943, 1.9455738663482667, 1.9412584506225585, 1.9381972328948975, 1.935788204345703, 1.9355586415863038, 1.9291299639892578, 1.9304945249938965, 1.9279169077301026, 1.9247258889389038, 1.9270652098083496, 1.9239173442077637, 1.920924050216675, 1.9179519243621825, 1.9187614895629883, 1.9149749173736572, 1.9101889783477783, 1.9113776821517945, 1.9123343227767944, 1.9115107091522217, 1.9105930194091796, 1.9092110122680663, 1.9085102457046508, 1.9054700579071044, 1.906116495628357, 1.9055400652313232, 1.9055342751312256, 1.9067684815216064, 1.9050640679168702, 1.9041575216674804 ], "train_acc": [ 0.2828, 0.3048, 0.30986, 0.31168, 0.31358, 0.31522, 0.31574, 0.317, 0.31914, 0.31836, 0.31874, 0.31798, 0.32148, 0.32276, 0.32502, 0.3259, 0.32756, 0.32626, 0.32706, 0.32784, 0.329, 0.32942, 0.32888, 0.33042, 0.33034, 0.33086, 0.33476, 0.33214, 0.33166, 0.33136 ], "test_acc": [ 0.3309, 0.3491, 0.3382, 0.3519, 0.3375, 0.3487, 0.3436, 0.3431, 0.3632, 0.341, 0.3632, 0.3623, 0.3531, 0.3536, 0.3476, 0.3485, 0.3662, 0.3539, 0.3453, 0.36, 0.3532, 0.3566, 0.3562, 0.3639, 0.3622, 0.3623, 0.3579, 0.3541, 0.3576, 0.3582 ] }, "diagnostics": { "bp_cosine": [ 0.3322567343711853, 0.10321325808763504, 0.15636520087718964, 0.19004486501216888 ], "perturbation_rho": [ 0.18363387882709503, 0.04898637533187866, 0.09784461557865143, 0.09405896812677383 ], "nudging": { "0.001": [ -1.0297924745827913e-05, -3.0525843612849712e-06, -4.650210030376911e-06, -5.671870894730091e-06 ], "0.003": [ -3.089715028181672e-05, -9.163166396319866e-06, -1.3922981452196836e-05, -1.7057405784726143e-05 ], "0.01": [ -0.00010294892126694322, -3.057112917304039e-05, -4.6447094064205885e-05, -5.68098621442914e-05 ] }, "hidden_norms_per_layer": [ 13127.3251953125, 13128.8193359375, 13141.9501953125, 13135.3408203125, 13141.1806640625 ], "bp_grad_norms_per_layer": [ 1.0922197361651342e-05, 1.074585088645108e-05, 1.0719732017605565e-05, 1.0678052603907418e-05, 1.0736169315350708e-05 ] }, "drift": { "embed.weight": 95.06253847427116, "embed.bias": 69.56343057263858, "blocks.0.ln.weight": 0.35518038272857666, "blocks.0.w1.weight": 3.5318213424051828, "blocks.0.w1.bias": 6.6251757142063346, "blocks.0.w2.weight": 18.1649292755703, "blocks.1.ln.weight": 0.37566548585891724, "blocks.1.w1.weight": 3.577683688515039, "blocks.1.w1.bias": 6.4976786510039295, "blocks.1.w2.weight": 18.132601820579644, "blocks.2.ln.weight": 0.39002206921577454, "blocks.2.w1.weight": 3.7319215256746268, "blocks.2.w1.bias": 6.266003010226504, "blocks.2.w2.weight": 18.870330386369503, "blocks.3.ln.weight": 0.35860922932624817, "blocks.3.w1.weight": 3.790655898639472, "blocks.3.w1.bias": 5.892665738859735, "blocks.3.w2.weight": 20.602751226104075, "out_ln.weight": 0.15972787141799927, "out_head.weight": 2.3651003853179144, "out_head.bias": 1.3627504013501806 } } }, "456": { "dfa": { "log": { "train_loss": [ 1.9882178707504272, 1.9317677951049805, 1.9248256610870362, 1.9217043152618407, 1.917682172279358, 1.919907767944336, 1.9130486701202392, 1.9160204236221314, 1.9124437686920166, 1.910402643661499, 1.9057467068099976, 1.9108504774475097, 1.9094166152572631, 1.9078613684844972, 1.9042421661376954, 1.9058136753082275, 1.9045625204467773, 1.9027692486953736, 1.901805549621582, 1.9018799402236939, 1.8972413793945313, 1.8987792961883545, 1.897886729812622, 1.894330054550171, 1.8959275032806397, 1.8961896368408202, 1.8971351692962646, 1.8951918316650391, 1.89493817653656, 1.8952873357391358 ], "train_acc": [ 0.2782, 0.30628, 0.31294, 0.31586, 0.31784, 0.31944, 0.32352, 0.32056, 0.32366, 0.32092, 0.32656, 0.3254, 0.3272, 0.3234, 0.32672, 0.32684, 0.33252, 0.3313, 0.32936, 0.33102, 0.3319, 0.33272, 0.33364, 0.3357, 0.334, 0.3336, 0.33476, 0.33474, 0.3348, 0.33498 ], "test_acc": [ 0.3368, 0.3538, 0.3441, 0.3634, 0.343, 0.3574, 0.341, 0.3403, 0.3594, 0.3606, 0.3628, 0.3674, 0.3684, 0.3558, 0.3637, 0.3632, 0.3649, 0.3565, 0.3631, 0.3629, 0.3546, 0.3629, 0.3594, 0.3629, 0.3619, 0.3609, 0.3623, 0.3609, 0.3607, 0.3614 ] }, "diagnostics": { "bp_cosine": [ 0.35106080770492554, 0.12957611680030823, 0.12942053377628326, 0.15940426290035248 ], "perturbation_rho": [ 0.17636069655418396, 0.08117785304784775, 0.0472814217209816, 0.11043912172317505 ], "nudging": { "0.001": [ -1.4475401258096099e-05, -4.396220901980996e-06, -4.46141348220408e-06, -5.974114174023271e-06 ], "0.003": [ -4.3450010707601905e-05, -1.3241806300356984e-05, -1.3367069186642766e-05, -1.7999671399593353e-05 ], "0.01": [ -0.00014482333790510893, -4.394981078803539e-05, -4.4448592234402895e-05, -5.99866034463048e-05 ] }, "hidden_norms_per_layer": [ 12229.544921875, 12266.8916015625, 12257.5791015625, 12255.2705078125, 12254.3017578125 ], "bp_grad_norms_per_layer": [ 1.334045191470068e-05, 1.2721701750706416e-05, 1.251421963388566e-05, 1.2780437828041613e-05, 1.2897891792817973e-05 ] }, "drift": { "embed.weight": 96.08641084786761, "embed.bias": 127.15202633956196, "blocks.0.ln.weight": 0.3495213985443115, "blocks.0.w1.weight": 3.6819779928624996, "blocks.0.w1.bias": 6.228241220813173, "blocks.0.w2.weight": 17.84445507605505, "blocks.1.ln.weight": 0.34699368476867676, "blocks.1.w1.weight": 3.6269651247865355, "blocks.1.w1.bias": 6.23888543559435, "blocks.1.w2.weight": 17.036835876989706, "blocks.2.ln.weight": 0.3590497076511383, "blocks.2.w1.weight": 3.638377800312274, "blocks.2.w1.bias": 6.075326782548754, "blocks.2.w2.weight": 17.82232398341993, "blocks.3.ln.weight": 0.35772791504859924, "blocks.3.w1.weight": 3.76772753341028, "blocks.3.w1.bias": 6.390494916053956, "blocks.3.w2.weight": 18.92297820750716, "out_ln.weight": 0.18465575575828552, "out_head.weight": 2.515055641320841, "out_head.bias": 1.320315434071626 } } }, "config": { "dataset": "cifar10", "d_hidden": 256, "num_blocks": 4, "batch_size": 128, "epochs": 30, "lr": 0.001, "lr_fb": 0.001, "wd": 0.01, "lam": 0.1, "K": 4, "sigma_bridge": 0.05, "ema_momentum": 0.995, "term_grad_weight": 1.0, "seeds": [ 42, 123, 456 ], "gpu": 0, "output_dir": "results/dfa_canonical_lam1e-2_30ep", "methods": [ "dfa" ], "random_targets": false, "penalty_lam": 0.01, "num_classes": 10 } }