{ "42": { "dfa": { "log": { "train_loss": [ 1.9962164908218383, 1.9369539144134522, 1.9308127733612062, 1.9288829196548463, 1.924676773147583, 1.918132286529541, 1.918223798522949, 1.9147104105377197, 1.9164991827011109, 1.9150708497619628, 1.9118981461334228, 1.9094872266387939, 1.905809390487671, 1.9049867826080322, 1.90767120262146, 1.9046393532562256, 1.9038504275894166, 1.9006466190338134, 1.8996596237182617, 1.8986669551086426, 1.8997121560668946, 1.8965645993423461, 1.8971398455047608, 1.8969778009414673, 1.89667788482666, 1.8934650146484375, 1.8900965643310548, 1.8924740059661864, 1.892325519180298, 1.8923273934555054 ], "train_acc": [ 0.2802, 0.30808, 0.31262, 0.3156, 0.31668, 0.31968, 0.3211, 0.32238, 0.3231, 0.32382, 0.32378, 0.32668, 0.32826, 0.32862, 0.32902, 0.32944, 0.32948, 0.33388, 0.33008, 0.33162, 0.33228, 0.33294, 0.33364, 0.33536, 0.33534, 0.3341, 0.33584, 0.33624, 0.33534, 0.34052 ], "test_acc": [ 0.3313, 0.3463, 0.3422, 0.3411, 0.3597, 0.3549, 0.3499, 0.3438, 0.3423, 0.3476, 0.3497, 0.3524, 0.3595, 0.3521, 0.354, 0.3537, 0.3588, 0.3613, 0.356, 0.359, 0.3599, 0.3572, 0.3615, 0.3592, 0.3592, 0.3609, 0.3578, 0.36, 0.3611, 0.3607 ] }, "diagnostics": { "bp_cosine": [ 0.33146190643310547, 0.16347576677799225, 0.1610197126865387, 0.17380905151367188 ], "perturbation_rho": [ 0.12955714762210846, 0.045910485088825226, 0.08855772018432617, 0.06986565887928009 ], "nudging": { "0.001": [ -1.3143871910870075e-05, -5.465932190418243e-06, -5.55114820599556e-06, -5.612848326563835e-06 ], "0.003": [ -3.937864676117897e-05, -1.6393139958381653e-05, -1.6577658243477345e-05, -1.6813864931464195e-05 ], "0.01": [ -0.00013115769252181053, -5.455967038869858e-05, -5.524198058992624e-05, -5.596294067800045e-05 ] }, "hidden_norms_per_layer": [ 12120.9111328125, 12188.2958984375, 12193.77734375, 12198.8359375, 12219.8369140625 ], "bp_grad_norms_per_layer": [ 1.3196319741837215e-05, 1.3040654266660567e-05, 1.2841821444453672e-05, 1.2863742995250504e-05, 1.2522319593699649e-05 ] }, "drift": { "embed.weight": 92.65036012342134, "embed.bias": 95.50366326873346, "blocks.0.ln.weight": 0.34198617935180664, "blocks.0.w1.weight": 3.6285842919394478, "blocks.0.w1.bias": 6.327346232452673, "blocks.0.w2.weight": 18.245981750960237, "blocks.1.ln.weight": 0.3590666353702545, "blocks.1.w1.weight": 3.60860615088841, "blocks.1.w1.bias": 6.19502489141982, "blocks.1.w2.weight": 17.96136875105609, "blocks.2.ln.weight": 0.36136317253112793, "blocks.2.w1.weight": 3.702767807151941, "blocks.2.w1.bias": 6.508610347801663, "blocks.2.w2.weight": 19.259072912976688, "blocks.3.ln.weight": 0.35671475529670715, "blocks.3.w1.weight": 3.7164720093334025, "blocks.3.w1.bias": 6.203418611522943, "blocks.3.w2.weight": 19.77945180777123, "out_ln.weight": 0.16879618167877197, "out_head.weight": 2.418863784769041, "out_head.bias": 1.1721698518470152 } } }, "config": { "dataset": "cifar10", "d_hidden": 256, "num_blocks": 4, "batch_size": 128, "epochs": 30, "lr": 0.001, "lr_fb": 0.001, "wd": 0.01, "lam": 0.1, "K": 4, "sigma_bridge": 0.05, "ema_momentum": 0.995, "term_grad_weight": 1.0, "seeds": [ 42 ], "gpu": 0, "output_dir": "results/round41_dfa_penalty_30ep", "methods": [ "dfa" ], "random_targets": false, "penalty_lam": 0.01, "num_classes": 10 } }