diff options
Diffstat (limited to 'results/fa_dfa_d256_L8_seed4/results_cifar10.json')
| -rw-r--r-- | results/fa_dfa_d256_L8_seed4/results_cifar10.json | 881 |
1 files changed, 881 insertions, 0 deletions
diff --git a/results/fa_dfa_d256_L8_seed4/results_cifar10.json b/results/fa_dfa_d256_L8_seed4/results_cifar10.json new file mode 100644 index 0000000..18ac505 --- /dev/null +++ b/results/fa_dfa_d256_L8_seed4/results_cifar10.json @@ -0,0 +1,881 @@ +{ + "4": { + "dfa": { + "log": { + "train_loss": [ + 2.0656567663574217, + 2.0505859230041503, + 2.053203703994751, + 2.0526840745544432, + 2.0508153134155274, + 2.0500064964294435, + 2.0487480962753297, + 2.049719909591675, + 2.046275976715088, + 2.0458218814468383, + 2.0437327142333985, + 2.042165072555542, + 2.0446242738342284, + 2.0408524193572997, + 2.042093524932861, + 2.0402661250305174, + 2.0408810894012452, + 2.0373284103012086, + 2.03862116607666, + 2.035870743408203, + 2.0366722262573242, + 2.0368052173614504, + 2.0363987246322632, + 2.0385638175201417, + 2.0332528368377685, + 2.0347424139404295, + 2.0324463927841188, + 2.0332344970321654, + 2.031939734954834, + 2.0355701583862307, + 2.0314910345077513, + 2.031226405334473, + 2.0333832290649414, + 2.031931709365845, + 2.032775154876709, + 2.0315535527038575, + 2.030991368255615, + 2.031312015762329, + 2.0315300573349, + 2.0294947302246094, + 2.0329836825180054, + 2.0305798300933837, + 2.0313154915618896, + 2.030211465911865, + 2.0333448963928222, + 2.032171858444214, + 2.0326642723083497, + 2.029641508178711, + 2.030972560157776, + 2.03141214263916, + 2.0315414978790285, + 2.031778284568787, + 2.0305172948837282, + 2.029238556213379, + 2.0308371866607664, + 2.029705788726807, + 2.031071273651123, + 2.031551382446289, + 2.0320941706085205, + 2.0302660203552247, + 2.02909424407959, + 2.0317608085632326, + 2.0301849742126463, + 2.0325209705352782, + 2.0291553590393065, + 2.029445772399902, + 2.029556517715454, + 2.030448714065552, + 2.0291586400985717, + 2.030339909133911, + 2.0314291301727296, + 2.02971793800354, + 2.0307174551391602, + 2.0306736737823488, + 2.02974597366333, + 2.0310014685058593, + 2.0306331666564943, + 2.0300069621276857, + 2.0305981512069704, + 2.0304822917938234, + 2.0307031988525392, + 2.029250856361389, + 2.0301674210357668, + 2.0295850566864013, + 2.0306209880828856, + 2.029310823135376, + 2.031099863433838, + 2.029841015853882, + 2.028756529998779, + 2.028993240814209, + 2.028344483795166, + 2.0281790949249268, + 2.0280795663452147, + 2.0291675205230715, + 2.0298909065246584, + 2.029457865447998, + 2.029133120574951, + 2.0299807266998293, + 2.028713212356567, + 2.029348062210083 + ], + "train_acc": [ + 0.24136, + 0.24482, + 0.2427, + 0.24432, + 0.24608, + 0.24552, + 0.24764, + 0.2474, + 0.25004, + 0.24796, + 0.24904, + 0.25112, + 0.25108, + 0.25374, + 0.25486, + 0.25144, + 0.25464, + 0.2558, + 0.25348, + 0.25578, + 0.25414, + 0.2542, + 0.2566, + 0.25478, + 0.25908, + 0.2592, + 0.2572, + 0.25702, + 0.25768, + 0.25918, + 0.25924, + 0.25932, + 0.2585, + 0.25922, + 0.26328, + 0.25904, + 0.25988, + 0.26086, + 0.25982, + 0.26208, + 0.25958, + 0.26294, + 0.26084, + 0.26124, + 0.25758, + 0.25902, + 0.2607, + 0.25844, + 0.26254, + 0.26218, + 0.26206, + 0.25944, + 0.26196, + 0.26358, + 0.26044, + 0.26258, + 0.26298, + 0.26036, + 0.26196, + 0.26234, + 0.2623, + 0.26234, + 0.26312, + 0.262, + 0.26436, + 0.2627, + 0.2642, + 0.26248, + 0.26322, + 0.26162, + 0.26198, + 0.26202, + 0.26128, + 0.26422, + 0.26246, + 0.26314, + 0.26326, + 0.26486, + 0.26184, + 0.2653, + 0.26566, + 0.26624, + 0.26518, + 0.2621, + 0.26352, + 0.26428, + 0.26192, + 0.26372, + 0.26478, + 0.26346, + 0.26528, + 0.26426, + 0.2625, + 0.26412, + 0.26512, + 0.26276, + 0.26434, + 0.26354, + 0.2629, + 0.26342 + ], + "test_acc": [ + 0.2692, + 0.2577, + 0.2647, + 0.2579, + 0.2596, + 0.2707, + 0.2687, + 0.2731, + 0.269, + 0.2517, + 0.2616, + 0.2484, + 0.2766, + 0.2724, + 0.276, + 0.272, + 0.2751, + 0.2806, + 0.2765, + 0.279, + 0.2773, + 0.2673, + 0.2747, + 0.277, + 0.269, + 0.2777, + 0.2753, + 0.2775, + 0.2797, + 0.2744, + 0.2772, + 0.2708, + 0.2839, + 0.2778, + 0.2803, + 0.2838, + 0.2723, + 0.2851, + 0.2776, + 0.2839, + 0.2736, + 0.2716, + 0.2746, + 0.2758, + 0.2722, + 0.2781, + 0.2784, + 0.2804, + 0.2746, + 0.2794, + 0.2783, + 0.2786, + 0.2876, + 0.2727, + 0.2771, + 0.2809, + 0.2826, + 0.2797, + 0.2732, + 0.2775, + 0.2641, + 0.2775, + 0.2755, + 0.2821, + 0.2624, + 0.2773, + 0.2849, + 0.2705, + 0.2759, + 0.2828, + 0.2814, + 0.2828, + 0.2826, + 0.2761, + 0.2845, + 0.2793, + 0.2823, + 0.2763, + 0.2854, + 0.2845, + 0.2802, + 0.2804, + 0.2746, + 0.2779, + 0.2781, + 0.2789, + 0.2808, + 0.2824, + 0.2812, + 0.2792, + 0.2791, + 0.2822, + 0.2818, + 0.2815, + 0.2805, + 0.2815, + 0.2816, + 0.2813, + 0.2813, + 0.2814 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.3630167841911316, + 0.004104766063392162, + -0.0002518058172427118, + -0.0025237088557332754, + -0.003213974880054593, + 0.0022183996625244617, + 0.0008768833940848708, + -0.0017630010843276978 + ], + "perturbation_rho": [ + 0.006874069571495056, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -2.2398307919502258e-07, + 0.0, + 0.0, + -5.587935447692871e-09, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -6.924383342266083e-07, + 0.0, + 0.0, + -5.587935447692871e-09, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -2.5294721126556396e-06, + 0.0, + -9.313225746154785e-10, + -5.587935447692871e-09, + 0.0, + 0.0, + 0.0, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 47186.2265625, + 482479744.0, + 795204864.0, + 1384574336.0, + 2104582784.0, + 2617342720.0, + 2587617024.0, + 2589698816.0, + 2634861312.0 + ], + "bp_grad_norms_per_layer": [ + 2.55692498285498e-07, + 7.717193550860202e-10, + 7.686514758020735e-10, + 7.652692923798554e-10, + 7.633532139728061e-10, + 7.631562048970864e-10, + 7.626149711725816e-10, + 7.631405507524391e-10, + 7.631412723974051e-10 + ] + }, + "drift": { + "embed.weight": 386.6934065819407, + "embed.bias": 327.73324641624026, + "blocks.0.ln.weight": 10.576894760131836, + "blocks.0.w1.weight": 252.2875271106343, + "blocks.0.w1.bias": 255.16348096251252, + "blocks.0.w2.weight": 532.796399887916, + "blocks.1.ln.weight": 9.370871543884277, + "blocks.1.w1.weight": 248.217891631117, + "blocks.1.w1.bias": 225.83698087753675, + "blocks.1.w2.weight": 368.8833971864335, + "blocks.2.ln.weight": 10.612818717956543, + "blocks.2.w1.weight": 309.33943112065816, + "blocks.2.w1.bias": 301.26066223306907, + "blocks.2.w2.weight": 459.4422042261257, + "blocks.3.ln.weight": 11.764174461364746, + "blocks.3.w1.weight": 329.2768198307833, + "blocks.3.w1.bias": 336.9945554639267, + "blocks.3.w2.weight": 474.12186702206924, + "blocks.4.ln.weight": 12.292579650878906, + "blocks.4.w1.weight": 343.47770155099636, + "blocks.4.w1.bias": 332.7847814820698, + "blocks.4.w2.weight": 472.8538820952661, + "blocks.5.ln.weight": 8.211516380310059, + "blocks.5.w1.weight": 222.56739284033247, + "blocks.5.w1.bias": 206.0998611048773, + "blocks.5.w2.weight": 305.4202607785938, + "blocks.6.ln.weight": 9.443477630615234, + "blocks.6.w1.weight": 257.25562261225616, + "blocks.6.w1.bias": 242.26681812434325, + "blocks.6.w2.weight": 326.61842660972985, + "blocks.7.ln.weight": 8.36430549621582, + "blocks.7.w1.weight": 208.0675552186769, + "blocks.7.w1.bias": 201.05667739507527, + "blocks.7.w2.weight": 263.052196747128, + "out_ln.weight": 0.8603641390800476, + "out_head.weight": 9.058541297614818, + "out_head.bias": 0.2966235995908337 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.077447769546509, + 2.011511716918945, + 1.9823692067718506, + 1.9588359634780883, + 1.9388098348999023, + 1.9257721879577636, + 1.9099706296539307, + 1.9016640214538574, + 1.8879541109466553, + 1.8786920461654664, + 1.8665837494277955, + 1.8579063525009156, + 1.8527830486297607, + 1.846642380104065, + 1.8408584032440185, + 1.8423255667495728, + 1.842407624206543, + 1.83341695854187, + 1.8353786220550536, + 1.8293717947006225, + 1.8322698459243774, + 1.8300572503280639, + 1.8311189281845093, + 1.8299159336090087, + 1.8229221743774413, + 1.8264959131240845, + 1.8210881603622437, + 1.8220705929946899, + 1.8175409701538086, + 1.822826947631836, + 1.8118110262680054, + 1.8122388744735718, + 1.8130562100982666, + 1.806387862510681, + 1.80971314163208, + 1.804927230606079, + 1.800072886314392, + 1.803294679031372, + 1.801126385574341, + 1.8011314083862304, + 1.797285436630249, + 1.7961348217391968, + 1.797834702682495, + 1.794442939529419, + 1.7977431702041626, + 1.7928060857391357, + 1.7940230228424072, + 1.789833497238159, + 1.7892559197235107, + 1.7902562041473389, + 1.7880674313735962, + 1.7850274938201904, + 1.7843693801879883, + 1.7830010782623291, + 1.7847207107925416, + 1.784270888710022, + 1.7800133163833618, + 1.7805128693389893, + 1.7791845178604127, + 1.778016010169983, + 1.776533423500061, + 1.7764989660644532, + 1.7743354483032228, + 1.7752654462051392, + 1.7738590878677367, + 1.7712104555892945, + 1.7690131970214844, + 1.7739268627548217, + 1.7680237869644164, + 1.7693133504867553, + 1.7714519711303711, + 1.767369662475586, + 1.7680323779296876, + 1.7669272440338135, + 1.7676428430175781, + 1.7666232497787475, + 1.7640809685516357, + 1.7647571885681153, + 1.7644749606323242, + 1.763671007080078, + 1.7634928821182252, + 1.7633259762573241, + 1.7641471377563476, + 1.7631501617050171, + 1.7632150897598267, + 1.7611829147720337, + 1.7612092932891845, + 1.7640886084747314, + 1.7649920001220702, + 1.7596327584838867, + 1.7607089739227295, + 1.7609526915740967, + 1.7600691442871095, + 1.7606582910919188, + 1.7567459772491456, + 1.7617831900405885, + 1.7606757662963868, + 1.7583884005737305, + 1.755460721168518, + 1.7597045026397704 + ], + "train_acc": [ + 0.23164, + 0.25566, + 0.26534, + 0.2785, + 0.29544, + 0.30102, + 0.30646, + 0.30954, + 0.31586, + 0.3198, + 0.3232, + 0.33236, + 0.33444, + 0.3363, + 0.33972, + 0.34014, + 0.34228, + 0.3414, + 0.34292, + 0.34472, + 0.343, + 0.34502, + 0.3443, + 0.3446, + 0.34886, + 0.34656, + 0.34782, + 0.34692, + 0.34918, + 0.34708, + 0.3504, + 0.3498, + 0.35128, + 0.3542, + 0.35568, + 0.35602, + 0.3552, + 0.3547, + 0.35416, + 0.35536, + 0.3587, + 0.35762, + 0.35662, + 0.35792, + 0.3559, + 0.35938, + 0.35752, + 0.35952, + 0.3605, + 0.36166, + 0.36062, + 0.36, + 0.36316, + 0.36162, + 0.35992, + 0.36142, + 0.36284, + 0.36478, + 0.36274, + 0.36296, + 0.36524, + 0.3655, + 0.36454, + 0.36344, + 0.36574, + 0.36768, + 0.3671, + 0.36604, + 0.36738, + 0.36676, + 0.36726, + 0.36788, + 0.36766, + 0.37, + 0.36784, + 0.36824, + 0.36952, + 0.3703, + 0.37022, + 0.36936, + 0.37064, + 0.36912, + 0.37038, + 0.37162, + 0.36972, + 0.37024, + 0.37328, + 0.37162, + 0.3707, + 0.37108, + 0.37054, + 0.37196, + 0.37158, + 0.3725, + 0.37438, + 0.3719, + 0.37154, + 0.37274, + 0.3749, + 0.37412 + ], + "test_acc": [ + 0.2691, + 0.2769, + 0.2897, + 0.3114, + 0.3126, + 0.3325, + 0.3278, + 0.3359, + 0.3422, + 0.3373, + 0.3566, + 0.3503, + 0.367, + 0.3714, + 0.3746, + 0.3677, + 0.3815, + 0.3797, + 0.375, + 0.3784, + 0.3769, + 0.3722, + 0.377, + 0.3838, + 0.3768, + 0.3804, + 0.3876, + 0.3857, + 0.3861, + 0.3838, + 0.3869, + 0.3788, + 0.3858, + 0.3869, + 0.3815, + 0.3819, + 0.3849, + 0.3873, + 0.3869, + 0.3854, + 0.3836, + 0.386, + 0.3849, + 0.3932, + 0.3901, + 0.3862, + 0.391, + 0.3902, + 0.3876, + 0.3889, + 0.3858, + 0.3899, + 0.3896, + 0.3882, + 0.3857, + 0.3939, + 0.3952, + 0.3855, + 0.3793, + 0.3886, + 0.3887, + 0.3886, + 0.3887, + 0.39, + 0.3859, + 0.3877, + 0.3918, + 0.3899, + 0.3918, + 0.3949, + 0.3979, + 0.3944, + 0.3961, + 0.3959, + 0.3911, + 0.3966, + 0.3959, + 0.3961, + 0.4004, + 0.3939, + 0.3939, + 0.392, + 0.3927, + 0.3958, + 0.3961, + 0.3954, + 0.396, + 0.3944, + 0.3953, + 0.3952, + 0.395, + 0.394, + 0.3957, + 0.3955, + 0.3958, + 0.396, + 0.3956, + 0.3963, + 0.396, + 0.3962 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.04865531623363495, + 0.03906121850013733, + -0.031024880707263947, + 0.02575453743338585, + -0.06059260666370392, + -0.005358260590583086, + -0.0420413613319397, + 0.9973955154418945 + ], + "perturbation_rho": [ + 0.01942356675863266, + -0.017882898449897766, + 0.010590418241918087, + 0.01589794270694256, + 0.00590391643345356, + -0.0037763454020023346, + 0.034376855939626694, + 0.024630604311823845 + ], + "nudging": { + "0.001": [ + -2.2064195945858955e-06, + 5.948822945356369e-08, + 1.394655555486679e-07, + 5.5995769798755646e-08, + 8.498318493366241e-08, + 3.748573362827301e-08, + -1.8510036170482635e-08, + -9.316718205809593e-07 + ], + "0.003": [ + -6.74789771437645e-06, + -2.5331974029541016e-07, + 2.5168992578983307e-07, + -4.540197551250458e-08, + 2.979068085551262e-07, + 5.855690687894821e-08, + 2.4668406695127487e-07, + -3.4426338970661163e-06 + ], + "0.01": [ + -2.2898893803358078e-05, + -1.1182855814695358e-06, + 5.499459803104401e-07, + -2.2863969206809998e-07, + 8.718343451619148e-07, + 9.883660823106766e-08, + 5.047768354415894e-07, + -1.27346720546484e-05 + ] + }, + "hidden_norms_per_layer": [ + 6722.4755859375, + 51687.578125, + 543854.4375, + 751334.8125, + 919391.5625, + 1085895.375, + 1253076.875, + 1512727.75, + 818620.0625 + ], + "bp_grad_norms_per_layer": [ + 2.1660385755239986e-05, + 1.6567922784815892e-06, + 6.900805828990997e-07, + 6.877048690512311e-07, + 6.878757972117455e-07, + 6.88388126945938e-07, + 6.882298180244106e-07, + 6.877112355141435e-07, + 6.86944076733198e-07 + ] + }, + "drift": { + "embed.weight": 58.79676538348338, + "embed.bias": 28.710195472726856, + "blocks.0.ln.weight": 1.8708003759384155, + "blocks.0.w1.weight": 18.96163367141555, + "blocks.0.w1.bias": 15.000532672986424, + "blocks.0.w2.weight": 84.68199811589957, + "blocks.1.ln.weight": 1.7282594442367554, + "blocks.1.w1.weight": 26.714732007818075, + "blocks.1.w1.bias": 22.78624792045308, + "blocks.1.w2.weight": 60.87295612341078, + "blocks.2.ln.weight": 0.7759197950363159, + "blocks.2.w1.weight": 21.60045634414882, + "blocks.2.w1.bias": 24.705897270945115, + "blocks.2.w2.weight": 43.1465578222564, + "blocks.3.ln.weight": 0.9216886162757874, + "blocks.3.w1.weight": 21.587181438217584, + "blocks.3.w1.bias": 24.852801390777184, + "blocks.3.w2.weight": 60.24516088646971, + "blocks.4.ln.weight": 1.012886643409729, + "blocks.4.w1.weight": 22.29008054548115, + "blocks.4.w1.bias": 24.143128403264154, + "blocks.4.w2.weight": 62.903896132871985, + "blocks.5.ln.weight": 1.0267071723937988, + "blocks.5.w1.weight": 23.703189005498285, + "blocks.5.w1.bias": 24.657295832394876, + "blocks.5.w2.weight": 62.36992788497744, + "blocks.6.ln.weight": 1.0589414834976196, + "blocks.6.w1.weight": 24.22469961585016, + "blocks.6.w1.bias": 25.501732480992107, + "blocks.6.w2.weight": 47.57630517495101, + "blocks.7.ln.weight": 1.0684436559677124, + "blocks.7.w1.weight": 25.054823172351234, + "blocks.7.w1.bias": 28.09669994835103, + "blocks.7.w2.weight": 54.10842857089798, + "out_ln.weight": 0.41823405027389526, + "out_head.weight": 6.413607955937394, + "out_head.bias": 0.8745995262572578 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 256, + "num_blocks": 8, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 4 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d256_L8_seed4", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file |
