{ "42": { "dfa": { "log": { "train_loss": [ 2.041171367111206, 2.01035458278656, 2.0089993319702146, 2.0087824996566774, 2.005417049255371, 2.004202005844116, 2.004135177154541, 2.0013649726104736, 2.0012059982299806, 2.002799745788574, 1.999473673324585, 1.9972123648834228, 1.9990896060180665, 1.9961500553894043, 1.9974410807800294, 1.997062333984375, 1.9967610731506347, 1.9935473257827758, 1.9938662228393556, 1.9911954043197633, 1.9918721434783935, 1.9906449029922486, 1.9902287648773194, 1.9899413529968262, 1.9901034127044677, 1.9888135684204102, 1.9867314432144165, 1.98775391166687, 1.9877222427749635, 1.986023156890869 ], "train_acc": [ 0.25212, 0.26684, 0.2673, 0.26968, 0.2702, 0.27032, 0.27152, 0.27198, 0.27294, 0.27146, 0.27256, 0.27506, 0.27462, 0.27678, 0.27724, 0.27692, 0.27528, 0.2775, 0.27616, 0.27902, 0.28, 0.28058, 0.28046, 0.28064, 0.27876, 0.27924, 0.2785, 0.28286, 0.2799, 0.28246 ], "test_acc": [ 0.2899, 0.3039, 0.297, 0.2862, 0.2876, 0.2963, 0.2947, 0.302, 0.29, 0.2862, 0.2878, 0.3067, 0.2853, 0.3071, 0.2948, 0.2992, 0.309, 0.298, 0.303, 0.3047, 0.3106, 0.3006, 0.3082, 0.2972, 0.3017, 0.3064, 0.305, 0.3064, 0.3061, 0.307 ] }, "diagnostics": { "bp_cosine": [ 0.4062161147594452, 0.02404380589723587, -0.01314563024789095, -0.02864420786499977 ], "perturbation_rho": [ 0.02938488870859146, 0.0, 0.0, -0.04040054604411125 ], "nudging": { "0.001": [ -1.2400560081005096e-06, -9.313225746154785e-10, 9.313225746154785e-10, 9.313225746154785e-10 ], "0.003": [ -3.570225089788437e-06, -4.6566128730773926e-09, 1.862645149230957e-09, 3.725290298461914e-09 ], "0.01": [ -1.1967029422521591e-05, -1.210719347000122e-08, 1.3969838619232178e-09, 2.7939677238464355e-09 ] }, "hidden_norms_per_layer": [ 12514.5302734375, 10006967.0, 24731472.0, 48831016.0, 61899120.0 ], "bp_grad_norms_per_layer": [ 1.1740979743990465e-06, 7.62120411224032e-09, 7.4135684258180845e-09, 7.434150628427005e-09, 7.419798997432281e-09 ] }, "drift": { "embed.weight": 106.47232673325593, "embed.bias": 88.63566200531763, "blocks.0.ln.weight": 4.505657196044922, "blocks.0.w1.weight": 71.1190136511018, "blocks.0.w1.bias": 65.27901182999418, "blocks.0.w2.weight": 191.81664513772796, "blocks.1.ln.weight": 3.699037790298462, "blocks.1.w1.weight": 76.41161958621439, "blocks.1.w1.bias": 75.09969389083356, "blocks.1.w2.weight": 132.93780551811736, "blocks.2.ln.weight": 3.838078737258911, "blocks.2.w1.weight": 96.89058587597209, "blocks.2.w1.bias": 96.42063073285577, "blocks.2.w2.weight": 155.6341879797901, "blocks.3.ln.weight": 3.6865062713623047, "blocks.3.w1.weight": 91.12525240918814, "blocks.3.w1.bias": 89.48750762910116, "blocks.3.w2.weight": 144.11887720845996, "out_ln.weight": 0.4002464711666107, "out_head.weight": 4.1392195557143445, "out_head.bias": 0.3521037802404755 } } }, "123": { "dfa": { "log": { "train_loss": [ 2.043456584434509, 2.015362589454651, 2.011732778701782, 2.0096331211090086, 2.008639169998169, 2.006175118484497, 2.0041568740081788, 2.0016602905273437, 1.9992998320770263, 1.9955639269638061, 1.997627447128296, 1.9954585150146484, 1.9922321643829346, 1.9908589698028565, 1.99274623046875, 1.987565859451294, 1.9855359014892577, 1.9876045575714112, 1.9856640274047852, 1.9843628873062134, 1.983776489906311, 1.9830582943344117, 1.9843373564910889, 1.9821271927642823, 1.9814285857391358, 1.9822291962051393, 1.9806529276275635, 1.9811888027191162, 1.981866326751709, 1.9813806748199463 ], "train_acc": [ 0.2482, 0.2599, 0.26138, 0.26296, 0.26512, 0.26796, 0.2672, 0.2701, 0.27262, 0.27384, 0.27248, 0.27358, 0.27466, 0.2741, 0.2742, 0.27818, 0.27998, 0.27902, 0.27798, 0.27852, 0.2805, 0.2829, 0.27792, 0.28224, 0.28406, 0.28142, 0.28238, 0.2844, 0.28192, 0.28278 ], "test_acc": [ 0.2683, 0.2831, 0.2816, 0.2732, 0.2737, 0.2935, 0.2862, 0.2764, 0.2816, 0.2943, 0.2797, 0.2884, 0.2935, 0.2935, 0.2918, 0.2941, 0.2962, 0.2928, 0.2944, 0.2986, 0.292, 0.301, 0.2926, 0.3011, 0.2988, 0.298, 0.2972, 0.2948, 0.2994, 0.2985 ] }, "diagnostics": { "bp_cosine": [ 0.4047228693962097, 0.04104449227452278, -0.011393947526812553, -0.009042292833328247 ], "perturbation_rho": [ 0.0367465503513813, 0.0, 0.0, 0.0 ], "nudging": { "0.001": [ -1.1739321053028107e-06, -9.313225746154785e-09, 0.0, 0.0 ], "0.003": [ -3.5855919122695923e-06, 1.862645149230957e-09, 0.0, -9.313225746154785e-10 ], "0.01": [ -1.1938624083995819e-05, -6.984919309616089e-09, -7.450580596923828e-09, -1.3969838619232178e-09 ] }, "hidden_norms_per_layer": [ 13284.138671875, 7033498.5, 39633128.0, 64930576.0, 66113756.0 ], "bp_grad_norms_per_layer": [ 1.1239109198868391e-06, 6.197617086201035e-09, 5.381650236557789e-09, 5.38055022758499e-09, 5.385091483844917e-09 ] }, "drift": { "embed.weight": 110.02475615987971, "embed.bias": 68.65394532089363, "blocks.0.ln.weight": 4.497658729553223, "blocks.0.w1.weight": 70.59434597015027, "blocks.0.w1.bias": 60.73836185221871, "blocks.0.w2.weight": 193.2499470830069, "blocks.1.ln.weight": 3.926071882247925, "blocks.1.w1.weight": 92.10013575370482, "blocks.1.w1.bias": 84.79144794996425, "blocks.1.w2.weight": 160.80728575641794, "blocks.2.ln.weight": 4.058661937713623, "blocks.2.w1.weight": 100.55769394683692, "blocks.2.w1.bias": 95.75447672997026, "blocks.2.w2.weight": 159.847282269534, "blocks.3.ln.weight": 2.8910417556762695, "blocks.3.w1.weight": 61.4594216054129, "blocks.3.w1.bias": 50.547957978671036, "blocks.3.w2.weight": 99.05733623792827, "out_ln.weight": 0.373835951089859, "out_head.weight": 3.990617544364531, "out_head.bias": 0.4551460205356602 } } }, "456": { "dfa": { "log": { "train_loss": [ 2.0538753495788575, 2.0328780548095704, 2.0271591313171387, 2.0249926682281494, 2.0220700995635985, 2.02148416885376, 2.018033724822998, 2.017232068099976, 2.0104196131134033, 2.011538788986206, 2.007870107727051, 2.009203465423584, 2.0076511237335204, 2.0058005546569824, 2.0033703365707396, 2.0026689519500733, 2.0036011488342287, 2.001192732009888, 1.9991068982315063, 1.9989625353240967, 1.997356651878357, 1.997875876312256, 1.9966684870147704, 1.9943733695983887, 1.9942921846008301, 1.9950503900146483, 1.995271727142334, 1.9939517930603028, 1.9949346865844726, 1.9955234482192994 ], "train_acc": [ 0.24646, 0.25364, 0.25594, 0.25786, 0.26122, 0.2595, 0.26164, 0.26184, 0.26632, 0.26368, 0.26816, 0.26846, 0.2676, 0.26972, 0.26778, 0.26982, 0.27154, 0.27196, 0.27306, 0.2728, 0.2749, 0.27498, 0.274, 0.27632, 0.27516, 0.27554, 0.274, 0.27776, 0.27618, 0.27674 ], "test_acc": [ 0.2591, 0.2786, 0.2747, 0.2853, 0.2705, 0.2732, 0.274, 0.2744, 0.2837, 0.2887, 0.2899, 0.2931, 0.2939, 0.2822, 0.2828, 0.2954, 0.2961, 0.2902, 0.2943, 0.2941, 0.2972, 0.2971, 0.2955, 0.2942, 0.2995, 0.2923, 0.3007, 0.2965, 0.2971, 0.2966 ] }, "diagnostics": { "bp_cosine": [ 0.3774259090423584, -0.00713147409260273, -0.007417748216539621, 0.0045123836025595665 ], "perturbation_rho": [ -0.001135501079261303, 0.0, 0.0, 0.0 ], "nudging": { "0.001": [ -9.578652679920197e-07, -9.313225746154785e-10, 9.313225746154785e-10, 0.0 ], "0.003": [ -2.8796494007110596e-06, -2.3283064365386963e-09, 9.313225746154785e-10, -3.725290298461914e-09 ], "0.01": [ -9.520910680294037e-06, -3.725290298461914e-09, 2.3283064365386963e-09, -9.313225746154785e-10 ] }, "hidden_norms_per_layer": [ 13071.4912109375, 11935114.0, 44992308.0, 80702592.0, 99592856.0 ], "bp_grad_norms_per_layer": [ 9.842210602073465e-07, 4.690550614583344e-09, 4.549102428086371e-09, 4.550036347694686e-09, 4.553873722557e-09 ] }, "drift": { "embed.weight": 112.63263586193521, "embed.bias": 120.50774359993062, "blocks.0.ln.weight": 4.934469223022461, "blocks.0.w1.weight": 79.98242400105649, "blocks.0.w1.bias": 71.77394207010907, "blocks.0.w2.weight": 207.96745983686054, "blocks.1.ln.weight": 3.650259494781494, "blocks.1.w1.weight": 92.54979942916204, "blocks.1.w1.bias": 86.83660111158788, "blocks.1.w2.weight": 164.60563519153237, "blocks.2.ln.weight": 4.2977728843688965, "blocks.2.w1.weight": 116.55584641362735, "blocks.2.w1.bias": 109.68765194306474, "blocks.2.w2.weight": 173.76733209355965, "blocks.3.ln.weight": 4.123578071594238, "blocks.3.w1.weight": 106.41492266556777, "blocks.3.w1.bias": 102.54546146383908, "blocks.3.w2.weight": 164.09091953653103, "out_ln.weight": 0.4199577569961548, "out_head.weight": 4.122513850074516, "out_head.bias": 0.3386189609154552 } } }, "config": { "dataset": "cifar10", "d_hidden": 256, "num_blocks": 4, "batch_size": 128, "epochs": 30, "lr": 0.001, "lr_fb": 0.001, "wd": 0.01, "lam": 0.1, "K": 4, "sigma_bridge": 0.05, "ema_momentum": 0.995, "term_grad_weight": 1.0, "seeds": [ 42, 123, 456 ], "gpu": 0, "output_dir": "results/dfa_no_penalty_30ep", "methods": [ "dfa" ], "random_targets": false, "penalty_lam": 0.0, "num_classes": 10 } }