diff options
Diffstat (limited to 'results/fa_dfa_d512_L6_seed6/results_cifar10.json')
| -rw-r--r-- | results/fa_dfa_d512_L6_seed6/results_cifar10.json | 837 |
1 files changed, 837 insertions, 0 deletions
diff --git a/results/fa_dfa_d512_L6_seed6/results_cifar10.json b/results/fa_dfa_d512_L6_seed6/results_cifar10.json new file mode 100644 index 0000000..e57be14 --- /dev/null +++ b/results/fa_dfa_d512_L6_seed6/results_cifar10.json @@ -0,0 +1,837 @@ +{ + "6": { + "dfa": { + "log": { + "train_loss": [ + 2.058094557647705, + 2.0324147889709474, + 2.0218327713394166, + 2.0282914637756346, + 2.021828155441284, + 2.0190604560852052, + 2.0181676456069946, + 2.015587470474243, + 2.0169739764022827, + 2.012877261199951, + 2.015299163284302, + 2.015902668991089, + 2.010847349205017, + 2.0069280560302736, + 2.008257807006836, + 2.0082109774017334, + 2.006445485191345, + 2.0060603087615965, + 2.0080129043579102, + 2.0026243621444704, + 2.0042781149291993, + 2.0038208263397217, + 2.00284217376709, + 2.0032863708496094, + 2.003503814620972, + 2.0014031100082397, + 2.00154658203125, + 2.0027565859985352, + 1.9988653813934327, + 2.0048675049591065, + 2.002612951965332, + 1.9998337776184083, + 2.0022170097351073, + 2.0006915353012085, + 1.9998700122451782, + 2.001532240638733, + 2.0009784855651858, + 2.001896736717224, + 1.997888755760193, + 1.9997327941894532, + 1.998836244468689, + 1.9972671249008178, + 1.9991955379867554, + 1.996907271156311, + 1.9989860800933839, + 1.9979305069732667, + 1.9979226346588135, + 1.9973365603637696, + 1.9991827986145019, + 1.9979995524597167, + 1.9976954571533203, + 1.9975597812652588, + 1.9963330318450927, + 1.9988955184173585, + 1.9985223587036134, + 1.9940103855895996, + 1.9969634999084473, + 1.9979491520690917, + 1.9940481422424317, + 1.9955107457733154, + 1.9963652723693848, + 1.9967138528060913, + 1.9949829993438721, + 1.9970767431259155, + 1.996754500579834, + 1.9966884062194825, + 1.9968673025131225, + 1.9936358183288574, + 1.994392073059082, + 1.9950825340270997, + 1.9949580252838135, + 1.9944661192321778, + 1.994148543624878, + 1.992705733795166, + 1.9950622162246705, + 1.9951953311538697, + 1.9943858066558837, + 1.9932064359283448, + 1.9938985696411133, + 1.992524094696045, + 1.9919383232116699, + 1.9938886001586915, + 1.992321823425293, + 1.993851443786621, + 1.99483976688385, + 1.991985835800171, + 1.995015989379883, + 1.9928585873413085, + 1.9930935015487672, + 1.9924035341644286, + 1.992943452758789, + 1.992181587677002, + 1.992508976135254, + 1.9930797433471679, + 1.9915329122924805, + 1.9912273866271972, + 1.9927945401000977, + 1.991996759376526, + 1.992920351409912, + 1.99362177444458 + ], + "train_acc": [ + 0.24312, + 0.25066, + 0.25642, + 0.25462, + 0.25868, + 0.25902, + 0.25752, + 0.26142, + 0.26284, + 0.26284, + 0.26276, + 0.261, + 0.26512, + 0.2669, + 0.26932, + 0.2642, + 0.26996, + 0.26718, + 0.26772, + 0.26848, + 0.2686, + 0.27064, + 0.26932, + 0.26956, + 0.2707, + 0.27038, + 0.27084, + 0.27048, + 0.27238, + 0.26878, + 0.27158, + 0.27342, + 0.27218, + 0.27104, + 0.2718, + 0.27454, + 0.27166, + 0.27142, + 0.2736, + 0.27368, + 0.27276, + 0.2748, + 0.2733, + 0.2743, + 0.27712, + 0.2743, + 0.27604, + 0.2745, + 0.27354, + 0.27452, + 0.27538, + 0.27732, + 0.27562, + 0.27438, + 0.27546, + 0.27798, + 0.27778, + 0.27474, + 0.27972, + 0.27802, + 0.27448, + 0.27894, + 0.27802, + 0.27462, + 0.27436, + 0.27914, + 0.27646, + 0.27898, + 0.27676, + 0.27804, + 0.27688, + 0.2795, + 0.27758, + 0.27814, + 0.27706, + 0.27766, + 0.27888, + 0.2781, + 0.27842, + 0.27892, + 0.2802, + 0.27858, + 0.2792, + 0.2788, + 0.27574, + 0.28088, + 0.27956, + 0.27906, + 0.27764, + 0.27988, + 0.27716, + 0.27956, + 0.2782, + 0.2787, + 0.2789, + 0.2799, + 0.27836, + 0.28092, + 0.27914, + 0.27792 + ], + "test_acc": [ + 0.2564, + 0.2576, + 0.2815, + 0.2842, + 0.2869, + 0.2771, + 0.2864, + 0.2704, + 0.2864, + 0.2827, + 0.3005, + 0.2939, + 0.2817, + 0.2771, + 0.3033, + 0.2802, + 0.2776, + 0.3002, + 0.3, + 0.2773, + 0.3036, + 0.2858, + 0.2917, + 0.2964, + 0.2956, + 0.2996, + 0.2972, + 0.2926, + 0.3109, + 0.2826, + 0.3046, + 0.3077, + 0.278, + 0.2856, + 0.2955, + 0.2948, + 0.3034, + 0.3088, + 0.2955, + 0.2871, + 0.303, + 0.3009, + 0.2893, + 0.2922, + 0.3096, + 0.3061, + 0.2974, + 0.3028, + 0.2996, + 0.2988, + 0.3022, + 0.3003, + 0.3068, + 0.2961, + 0.2908, + 0.2971, + 0.3027, + 0.2998, + 0.3008, + 0.3071, + 0.2999, + 0.2988, + 0.2959, + 0.2982, + 0.3025, + 0.295, + 0.2934, + 0.2965, + 0.3014, + 0.2975, + 0.2893, + 0.2951, + 0.3003, + 0.3061, + 0.297, + 0.2987, + 0.3, + 0.3016, + 0.2974, + 0.303, + 0.3003, + 0.3043, + 0.3029, + 0.3004, + 0.2992, + 0.2983, + 0.3017, + 0.299, + 0.3005, + 0.2977, + 0.3004, + 0.2986, + 0.3, + 0.2993, + 0.2988, + 0.2994, + 0.2998, + 0.2994, + 0.2993, + 0.2994 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.3915177881717682, + 0.00031804549507796764, + -0.0005461883265525103, + 0.00020462644170038402, + -0.0005664663622155786, + -0.0008316519670188427 + ], + "perturbation_rho": [ + -0.011084532365202904, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -4.0885061025619507e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.2046657502651215e-06, + 0.0, + 9.313225746154785e-10, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -4.0549784898757935e-06, + 0.0, + 9.313225746154785e-10, + 0.0, + 9.313225746154785e-10, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 52265.20703125, + 1156494592.0, + 2602998784.0, + 3437189888.0, + 3901714432.0, + 6607706112.0, + 6800756736.0 + ], + "bp_grad_norms_per_layer": [ + 2.745953793237277e-07, + 2.664374554317561e-10, + 2.662880471682172e-10, + 2.6685945120341614e-10, + 2.6688468102165075e-10, + 2.6681562514951906e-10, + 2.668691656548816e-10 + ] + }, + "drift": { + "embed.weight": 323.8438777567112, + "embed.bias": 236.32127317549046, + "blocks.0.ln.weight": 9.81678478040185, + "blocks.0.w1.weight": 283.8033448856046, + "blocks.0.w1.bias": 254.7986676902803, + "blocks.0.w2.weight": 459.2312703971922, + "blocks.1.ln.weight": 7.9902441845145225, + "blocks.1.w1.weight": 303.8032457038149, + "blocks.1.w1.bias": 290.75689909603165, + "blocks.1.w2.weight": 302.7987642593431, + "blocks.2.ln.weight": 7.8083033170497576, + "blocks.2.w1.weight": 313.18221523970163, + "blocks.2.w1.bias": 288.235969181505, + "blocks.2.w2.weight": 301.0694759577726, + "blocks.3.ln.weight": 7.457261198270327, + "blocks.3.w1.weight": 299.37309231794677, + "blocks.3.w1.bias": 276.9324398819875, + "blocks.3.w2.weight": 279.5917777002267, + "blocks.4.ln.weight": 10.517843332237796, + "blocks.4.w1.weight": 436.93554951444736, + "blocks.4.w1.bias": 404.54404533704337, + "blocks.4.w2.weight": 386.63677119691914, + "blocks.5.ln.weight": 7.187391888063363, + "blocks.5.w1.weight": 278.782404520272, + "blocks.5.w1.bias": 266.36329153767053, + "blocks.5.w2.weight": 249.56060346351742, + "out_ln.weight": 0.5731338871373126, + "out_head.weight": 8.11049488578885, + "out_head.bias": 1.1188503504473455 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.0280703507995606, + 1.9438096570587158, + 1.9075839419174194, + 1.891309684791565, + 1.8722532345199585, + 1.8634102462768554, + 1.8554693552017212, + 1.8489897765350343, + 1.846228702659607, + 1.8386287690734864, + 1.8406133193206786, + 1.8387405684661866, + 1.8276975162506104, + 1.8232349169921875, + 1.8223698708724976, + 1.821195379638672, + 1.8214911682510375, + 1.8160597175598145, + 1.8229843518066406, + 1.813365035133362, + 1.8151773623275758, + 1.8169539303207398, + 1.8105776592254639, + 1.8114565746307374, + 1.807515641746521, + 1.806776870765686, + 1.8001547024154663, + 1.8010753986358643, + 1.7956665130233764, + 1.8011931986236571, + 1.7943034625625611, + 1.791777073059082, + 1.791583038711548, + 1.7873410034179686, + 1.7855097869873047, + 1.7860526992416381, + 1.7824570555877686, + 1.7813435136795044, + 1.7782593814086913, + 1.77326356174469, + 1.7763885403060913, + 1.7732242642974854, + 1.7696506113052368, + 1.7713894509887695, + 1.7732664206695556, + 1.7675420838165283, + 1.763011986732483, + 1.7635349313354491, + 1.7613904175186157, + 1.7604380879974366, + 1.7606450772476196, + 1.7594652207183838, + 1.7573485291290283, + 1.7578170404434204, + 1.754877228012085, + 1.7504150357437134, + 1.7527595220565797, + 1.7520960317230225, + 1.7485238824081422, + 1.7481630941772461, + 1.7490973169326782, + 1.7442448379516602, + 1.7454284634399415, + 1.7441554889678954, + 1.744502698326111, + 1.7424594699478149, + 1.7411711275482178, + 1.737814305076599, + 1.7386144577789306, + 1.7368740311431885, + 1.7391422924804687, + 1.737129613571167, + 1.7340563317108155, + 1.733810231666565, + 1.7361049320220947, + 1.7318184774398804, + 1.7328989402008057, + 1.731393734779358, + 1.7313611135482787, + 1.7314666943740844, + 1.7308115518188476, + 1.7282487900543213, + 1.7280186172485352, + 1.7302542147445679, + 1.729862018966675, + 1.7284494402313233, + 1.7274188668060302, + 1.72759757938385, + 1.728316011276245, + 1.7254533053588867, + 1.7282261031341553, + 1.7274837893676758, + 1.7255484241104126, + 1.7243229049682618, + 1.7256911779022217, + 1.7249952478790282, + 1.7246788320541382, + 1.724236849632263, + 1.7243867401885986, + 1.7224459433746337 + ], + "train_acc": [ + 0.26004, + 0.2935, + 0.3105, + 0.31688, + 0.32726, + 0.3268, + 0.33294, + 0.33576, + 0.33912, + 0.34126, + 0.3408, + 0.33982, + 0.34284, + 0.34334, + 0.34554, + 0.3478, + 0.34762, + 0.34808, + 0.34944, + 0.35122, + 0.35068, + 0.35214, + 0.35338, + 0.35102, + 0.35642, + 0.35468, + 0.35298, + 0.35718, + 0.35686, + 0.3546, + 0.3602, + 0.36098, + 0.35922, + 0.35992, + 0.36128, + 0.36012, + 0.36176, + 0.3639, + 0.36318, + 0.36768, + 0.36394, + 0.36712, + 0.36688, + 0.36672, + 0.36676, + 0.36582, + 0.37102, + 0.36946, + 0.37328, + 0.37212, + 0.36806, + 0.37476, + 0.3764, + 0.37204, + 0.37148, + 0.37532, + 0.37424, + 0.37422, + 0.37674, + 0.37706, + 0.37652, + 0.37612, + 0.37884, + 0.37688, + 0.37654, + 0.37978, + 0.37676, + 0.37814, + 0.3802, + 0.38226, + 0.3787, + 0.38016, + 0.38294, + 0.38174, + 0.37992, + 0.38192, + 0.38114, + 0.38394, + 0.38458, + 0.38406, + 0.38478, + 0.38358, + 0.38324, + 0.38232, + 0.38468, + 0.3854, + 0.3863, + 0.38538, + 0.38582, + 0.38406, + 0.38552, + 0.38434, + 0.3848, + 0.38542, + 0.38594, + 0.38728, + 0.38536, + 0.3858, + 0.38608, + 0.386 + ], + "test_acc": [ + 0.3013, + 0.3142, + 0.3483, + 0.3543, + 0.3608, + 0.3458, + 0.3633, + 0.3576, + 0.3713, + 0.3632, + 0.3817, + 0.3741, + 0.3661, + 0.3795, + 0.3734, + 0.3749, + 0.3706, + 0.3813, + 0.3822, + 0.3692, + 0.3725, + 0.3749, + 0.3836, + 0.3822, + 0.389, + 0.3821, + 0.3846, + 0.3777, + 0.3861, + 0.3809, + 0.3857, + 0.3855, + 0.3765, + 0.3955, + 0.3914, + 0.3948, + 0.3997, + 0.3994, + 0.3935, + 0.3989, + 0.3987, + 0.4008, + 0.3925, + 0.3922, + 0.4023, + 0.4055, + 0.3953, + 0.4008, + 0.4003, + 0.3971, + 0.3987, + 0.3931, + 0.4076, + 0.4019, + 0.4046, + 0.3981, + 0.4081, + 0.4046, + 0.4043, + 0.406, + 0.4118, + 0.4076, + 0.4057, + 0.414, + 0.3979, + 0.4079, + 0.4044, + 0.4087, + 0.4065, + 0.4024, + 0.3997, + 0.4078, + 0.4117, + 0.4092, + 0.409, + 0.406, + 0.4132, + 0.4133, + 0.4066, + 0.4131, + 0.4083, + 0.413, + 0.4126, + 0.4117, + 0.4109, + 0.4096, + 0.4111, + 0.4099, + 0.4094, + 0.4105, + 0.4105, + 0.4081, + 0.4122, + 0.4113, + 0.412, + 0.4095, + 0.41, + 0.4114, + 0.4111, + 0.4117 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.03050209954380989, + 0.07341398298740387, + -0.030213337391614914, + -0.1065329909324646, + -0.112009696662426, + 0.9969239234924316 + ], + "perturbation_rho": [ + 0.060943812131881714, + 0.02977900393307209, + 0.030837206169962883, + 0.03361157327890396, + 0.003993029240518808, + -0.012494717724621296 + ], + "nudging": { + "0.001": [ + -2.1707965061068535e-06, + -2.555316314101219e-07, + 1.9907020032405853e-08, + 1.1094380170106888e-07, + 1.1106021702289581e-07, + -1.6409903764724731e-06 + ], + "0.003": [ + -5.929847247898579e-06, + -7.075723260641098e-07, + 1.384178176522255e-07, + 6.816117092967033e-07, + 7.138587534427643e-07, + -6.536138243973255e-06 + ], + "0.01": [ + -2.0072446204721928e-05, + -2.6080524548888206e-06, + 4.3155159801244736e-07, + 2.434244379401207e-06, + 2.515967935323715e-06, + -2.3723463527858257e-05 + ] + }, + "hidden_norms_per_layer": [ + 6712.94189453125, + 241810.65625, + 672440.6875, + 1111939.0, + 1518122.75, + 1746060.875, + 545338.8125 + ], + "bp_grad_norms_per_layer": [ + 2.4450268028886057e-05, + 1.2711601584669552e-06, + 9.635764399718028e-07, + 9.082662018045085e-07, + 9.076145488506882e-07, + 9.084363341571589e-07, + 8.959328283708601e-07 + ] + }, + "drift": { + "embed.weight": 52.75751575367878, + "embed.bias": 16.290300308868286, + "blocks.0.ln.weight": 1.3122960145180722, + "blocks.0.w1.weight": 18.35861512742778, + "blocks.0.w1.bias": 15.033623290031253, + "blocks.0.w2.weight": 59.867189750033205, + "blocks.1.ln.weight": 0.9612442862570167, + "blocks.1.w1.weight": 19.510654541967284, + "blocks.1.w1.bias": 16.988315121645034, + "blocks.1.w2.weight": 44.17488250656146, + "blocks.2.ln.weight": 0.8653865881813659, + "blocks.2.w1.weight": 21.845526237032836, + "blocks.2.w1.bias": 20.228308191109985, + "blocks.2.w2.weight": 54.8483328488635, + "blocks.3.ln.weight": 0.687489668753582, + "blocks.3.w1.weight": 21.235696780055214, + "blocks.3.w1.bias": 22.153293813124844, + "blocks.3.w2.weight": 37.488286386992314, + "blocks.4.ln.weight": 0.6826937366325999, + "blocks.4.w1.weight": 20.105546147824473, + "blocks.4.w1.bias": 21.27470328328852, + "blocks.4.w2.weight": 45.57345483069108, + "blocks.5.ln.weight": 0.7529120733274255, + "blocks.5.w1.weight": 23.26064002188114, + "blocks.5.w1.bias": 24.637626686580436, + "blocks.5.w2.weight": 40.48651407120801, + "out_ln.weight": 0.28684521814071196, + "out_head.weight": 5.505824885038432, + "out_head.bias": 1.5561206526473979 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 6, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 6 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L6_seed6", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file |
