diff options
Diffstat (limited to 'results/fa_dfa_d512_L8_seed1/results_cifar10.json')
| -rw-r--r-- | results/fa_dfa_d512_L8_seed1/results_cifar10.json | 881 |
1 files changed, 881 insertions, 0 deletions
diff --git a/results/fa_dfa_d512_L8_seed1/results_cifar10.json b/results/fa_dfa_d512_L8_seed1/results_cifar10.json new file mode 100644 index 0000000..7ae4222 --- /dev/null +++ b/results/fa_dfa_d512_L8_seed1/results_cifar10.json @@ -0,0 +1,881 @@ +{ + "1": { + "dfa": { + "log": { + "train_loss": [ + 2.082363905029297, + 2.0549054863739014, + 2.048207590179443, + 2.040793383560181, + 2.0366951902008057, + 2.032741487388611, + 2.0325103788757324, + 2.030692584877014, + 2.0259502252197263, + 2.025390202407837, + 2.0222666513061522, + 2.0214237771606447, + 2.0199728786468505, + 2.021625373916626, + 2.017548585968018, + 2.0168718279266358, + 2.0175807970428465, + 2.0172574797058105, + 2.01521342338562, + 2.0168276077270506, + 2.012039394607544, + 2.0129323637390137, + 2.014951186904907, + 2.0105557980346678, + 2.01169651550293, + 2.0122193618011472, + 2.0088330200576783, + 2.0112308924102784, + 2.010762939796448, + 2.0086743901824953, + 2.00971054649353, + 2.010672675628662, + 2.007623846206665, + 2.0087671311187743, + 2.0092544104766845, + 2.0102373889541627, + 2.0070104084014893, + 2.00739478553772, + 2.0070192804336546, + 2.0084682094573973, + 2.00599662361145, + 2.005214340057373, + 2.0051871164703368, + 2.0050434196472167, + 2.005824287185669, + 2.004784624862671, + 2.006481015930176, + 2.004454507408142, + 2.004789038391113, + 2.004796143836975, + 2.004693070678711, + 2.0054635766601563, + 2.005378037185669, + 2.004756614151001, + 2.0019625535583496, + 2.002588889465332, + 2.0051779277038575, + 2.0044091219711304, + 2.003338354034424, + 2.0033947316741942, + 2.003234750213623, + 2.0028414460754393, + 2.0036003881072997, + 2.0034564432525634, + 2.0025813438415527, + 2.0027055737304686, + 2.003683521270752, + 2.000025806732178, + 2.000833299789429, + 2.000070794067383, + 2.0004121925354004, + 1.9998590943908692, + 2.001034607772827, + 1.9999368872451782, + 2.001410151634216, + 1.9992089236831665, + 2.000929930076599, + 2.0007802046966554, + 1.999412225341797, + 2.00004220161438, + 1.9992640100860595, + 1.9985904244995116, + 1.9986971343231201, + 1.998013607559204, + 1.9992886389160156, + 1.999632513961792, + 1.9996888436889648, + 1.997718791885376, + 1.9993746613311767, + 1.9998883880233764, + 1.9990022793960571, + 1.997506809692383, + 2.0001587979507445, + 1.9999388419342041, + 1.997678748703003, + 1.9990675243759155, + 1.9979941717910767, + 1.99825477684021, + 1.9982349634552001, + 1.9987702852630616 + ], + "train_acc": [ + 0.22766, + 0.23814, + 0.24064, + 0.24662, + 0.24896, + 0.24744, + 0.25128, + 0.25148, + 0.257, + 0.25682, + 0.25768, + 0.25754, + 0.25738, + 0.25788, + 0.25796, + 0.26228, + 0.26038, + 0.26018, + 0.2624, + 0.26194, + 0.26886, + 0.26334, + 0.26348, + 0.26562, + 0.26452, + 0.26388, + 0.26886, + 0.26554, + 0.26736, + 0.26842, + 0.26788, + 0.2677, + 0.26986, + 0.26932, + 0.2696, + 0.26988, + 0.27094, + 0.26952, + 0.27026, + 0.26998, + 0.27264, + 0.27378, + 0.27244, + 0.27328, + 0.27004, + 0.27324, + 0.27278, + 0.27568, + 0.27154, + 0.27282, + 0.27324, + 0.27192, + 0.27224, + 0.27294, + 0.27522, + 0.27348, + 0.27266, + 0.2733, + 0.27448, + 0.27446, + 0.27554, + 0.27596, + 0.27772, + 0.2751, + 0.27624, + 0.2762, + 0.27634, + 0.27822, + 0.27598, + 0.2772, + 0.2767, + 0.27624, + 0.27632, + 0.27776, + 0.27558, + 0.27776, + 0.27686, + 0.27808, + 0.27808, + 0.27784, + 0.27804, + 0.2769, + 0.27848, + 0.27982, + 0.27622, + 0.27712, + 0.27716, + 0.27708, + 0.2775, + 0.27748, + 0.27844, + 0.27692, + 0.2778, + 0.27656, + 0.27824, + 0.27638, + 0.27978, + 0.27886, + 0.27804, + 0.27882 + ], + "test_acc": [ + 0.2423, + 0.2394, + 0.2347, + 0.2736, + 0.2726, + 0.255, + 0.2481, + 0.2855, + 0.2847, + 0.266, + 0.291, + 0.2613, + 0.2775, + 0.2742, + 0.268, + 0.2726, + 0.2918, + 0.2421, + 0.2813, + 0.2608, + 0.2622, + 0.2733, + 0.2684, + 0.2941, + 0.2935, + 0.2823, + 0.2868, + 0.2952, + 0.3002, + 0.2952, + 0.3004, + 0.2797, + 0.2936, + 0.2993, + 0.2878, + 0.291, + 0.2839, + 0.2917, + 0.2933, + 0.2925, + 0.2814, + 0.2948, + 0.2987, + 0.2876, + 0.2737, + 0.2985, + 0.3022, + 0.2788, + 0.2868, + 0.2958, + 0.2886, + 0.302, + 0.2925, + 0.2965, + 0.2882, + 0.2941, + 0.3024, + 0.2895, + 0.3013, + 0.2994, + 0.2891, + 0.2866, + 0.291, + 0.2939, + 0.2834, + 0.2973, + 0.2851, + 0.2965, + 0.2951, + 0.2911, + 0.297, + 0.2923, + 0.2987, + 0.2954, + 0.2925, + 0.2892, + 0.2923, + 0.2956, + 0.2976, + 0.2978, + 0.2955, + 0.2986, + 0.2959, + 0.294, + 0.294, + 0.297, + 0.296, + 0.2964, + 0.2979, + 0.2999, + 0.2958, + 0.2963, + 0.2963, + 0.2954, + 0.2959, + 0.2953, + 0.2952, + 0.2956, + 0.2958, + 0.2958 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.396922767162323, + -0.0003933884436264634, + -8.022795373108238e-05, + -0.00015045293548610061, + 0.0005392992752604187, + -0.00036555560654960573, + 0.0002907244488596916, + -9.105022036237642e-05 + ], + "perturbation_rho": [ + -0.022967863827943802, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -3.4226104617118835e-07, + 0.0, + 0.0, + 0.0, + -1.862645149230957e-09, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.051928848028183e-06, + 0.0, + -1.862645149230957e-09, + 0.0, + -2.7939677238464355e-09, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -3.5013072192668915e-06, + 0.0, + -1.862645149230957e-09, + 0.0, + -4.6566128730773926e-09, + 0.0, + 0.0, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 56414.3359375, + 1650990848.0, + 3797263872.0, + 4648061440.0, + 5705044480.0, + 6883173376.0, + 7810007552.0, + 9771259904.0, + 10072083456.0 + ], + "bp_grad_norms_per_layer": [ + 2.3933893089633784e-07, + 2.075998678519042e-10, + 2.073478749808899e-10, + 2.0727551619525997e-10, + 2.0692902946706226e-10, + 2.0691065527600472e-10, + 2.0689701341058964e-10, + 2.0701990122162783e-10, + 2.070805887877114e-10 + ] + }, + "drift": { + "embed.weight": 342.6478357645448, + "embed.bias": 262.14247134543683, + "blocks.0.ln.weight": 10.104765097486084, + "blocks.0.w1.weight": 316.5544638552012, + "blocks.0.w1.bias": 286.1293710348026, + "blocks.0.w2.weight": 488.0042854468774, + "blocks.1.ln.weight": 9.276528933854145, + "blocks.1.w1.weight": 365.09493896949925, + "blocks.1.w1.bias": 338.1199629883399, + "blocks.1.w2.weight": 335.8119384773775, + "blocks.2.ln.weight": 8.33234375341126, + "blocks.2.w1.weight": 339.8426755615624, + "blocks.2.w1.bias": 311.7235998065332, + "blocks.2.w2.weight": 316.14958807276867, + "blocks.3.ln.weight": 8.800684936876243, + "blocks.3.w1.weight": 365.78353354881364, + "blocks.3.w1.bias": 339.9117633918553, + "blocks.3.w2.weight": 349.88606585029197, + "blocks.4.ln.weight": 9.453209200185142, + "blocks.4.w1.weight": 385.95491307373965, + "blocks.4.w1.bias": 356.64512170760173, + "blocks.4.w2.weight": 352.5237601257774, + "blocks.5.ln.weight": 9.069459103352756, + "blocks.5.w1.weight": 368.17480746032606, + "blocks.5.w1.bias": 336.75699627476126, + "blocks.5.w2.weight": 341.8227235483115, + "blocks.6.ln.weight": 11.485193095368288, + "blocks.6.w1.weight": 454.812566497913, + "blocks.6.w1.bias": 427.10330552698105, + "blocks.6.w2.weight": 417.7750307221631, + "blocks.7.ln.weight": 8.765686193273819, + "blocks.7.w1.weight": 347.8255816484655, + "blocks.7.w1.bias": 334.696723010558, + "blocks.7.w2.weight": 324.7278045130555, + "out_ln.weight": 0.6473491781322253, + "out_head.weight": 9.841211699963578, + "out_head.bias": 1.0361314019488228 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.0216404482269286, + 1.9374429712677002, + 1.9162016125488281, + 1.8889766452026366, + 1.8700911392211914, + 1.8582790970611571, + 1.8527047716903686, + 1.848144301109314, + 1.8374218865966796, + 1.8318119472503662, + 1.8252220174407958, + 1.8180473458862305, + 1.8125171118927002, + 1.8029107147216796, + 1.8021541842651367, + 1.7970342992401123, + 1.7941553127288818, + 1.7909175275421143, + 1.782177930908203, + 1.78192981880188, + 1.7714600928115845, + 1.7727110540008546, + 1.7706272528076172, + 1.7628321991348266, + 1.7623195043563842, + 1.7597255520629882, + 1.7538186774063111, + 1.7597168767547609, + 1.751037622909546, + 1.7449284631729125, + 1.7485404402923583, + 1.7483091131973267, + 1.7475471432113647, + 1.7437016841888429, + 1.744876121864319, + 1.7462471084213256, + 1.739538355178833, + 1.740323678817749, + 1.7387758260345458, + 1.7394548602294921, + 1.7387311511611938, + 1.7320650988388062, + 1.7338726557159423, + 1.7331176211547852, + 1.7323205828475952, + 1.7314462683486938, + 1.7310566067886353, + 1.7290524214935303, + 1.728844566001892, + 1.7304431410980226, + 1.7348097546386718, + 1.7295909213638305, + 1.7310686675262452, + 1.7292217221069337, + 1.7266970907211303, + 1.7262331484985352, + 1.7316008935928344, + 1.732737080116272, + 1.72913513130188, + 1.7267526404190063, + 1.7301805443954468, + 1.726527798461914, + 1.7265644375991822, + 1.7238530459213257, + 1.7280543838500977, + 1.7257318978881835, + 1.726536202392578, + 1.7238884717559815, + 1.724731280784607, + 1.7219169683456421, + 1.725569347229004, + 1.7213600470352173, + 1.7229674570083617, + 1.723882184753418, + 1.7203116341781617, + 1.723835189590454, + 1.7169267540740967, + 1.7185560147094727, + 1.7191566823577882, + 1.7162863437652587, + 1.7187646448135376, + 1.7191588035202026, + 1.716515462913513, + 1.7150226160430908, + 1.7141532082748414, + 1.7141116730499268, + 1.7139857364273072, + 1.7161692111587525, + 1.7135670114135741, + 1.7172551581573485, + 1.711785320739746, + 1.710454179458618, + 1.7139234024429322, + 1.712289864768982, + 1.7146155573272706, + 1.7152564708709717, + 1.713687544631958, + 1.7137356586456298, + 1.7130619507217406, + 1.7119491666030884 + ], + "train_acc": [ + 0.25728, + 0.2904, + 0.30204, + 0.31214, + 0.32092, + 0.32608, + 0.32958, + 0.333, + 0.34152, + 0.34204, + 0.34534, + 0.34792, + 0.34984, + 0.35522, + 0.35358, + 0.35932, + 0.35884, + 0.35804, + 0.36126, + 0.3618, + 0.36572, + 0.36352, + 0.36422, + 0.36884, + 0.36892, + 0.36766, + 0.3732, + 0.36848, + 0.37482, + 0.37654, + 0.374, + 0.37336, + 0.37234, + 0.37542, + 0.37588, + 0.37886, + 0.3797, + 0.37564, + 0.37638, + 0.37848, + 0.37414, + 0.38038, + 0.38024, + 0.37908, + 0.37766, + 0.38196, + 0.38054, + 0.3817, + 0.37988, + 0.3799, + 0.37976, + 0.3801, + 0.38004, + 0.3791, + 0.38402, + 0.38122, + 0.38052, + 0.38022, + 0.37968, + 0.38174, + 0.37974, + 0.3824, + 0.38198, + 0.38646, + 0.38242, + 0.38228, + 0.37926, + 0.386, + 0.38334, + 0.3845, + 0.38592, + 0.38384, + 0.3829, + 0.38474, + 0.38294, + 0.38496, + 0.3877, + 0.3868, + 0.38626, + 0.38876, + 0.3878, + 0.38576, + 0.38496, + 0.387, + 0.38818, + 0.38784, + 0.38816, + 0.38708, + 0.38834, + 0.38604, + 0.38942, + 0.39, + 0.38986, + 0.39166, + 0.38932, + 0.3872, + 0.38822, + 0.38954, + 0.38726, + 0.39068 + ], + "test_acc": [ + 0.3016, + 0.3144, + 0.318, + 0.344, + 0.3607, + 0.3449, + 0.3552, + 0.3675, + 0.3696, + 0.3786, + 0.3818, + 0.374, + 0.3837, + 0.3739, + 0.3839, + 0.3766, + 0.3871, + 0.3763, + 0.3964, + 0.3769, + 0.375, + 0.3863, + 0.3847, + 0.4028, + 0.3957, + 0.4041, + 0.3988, + 0.3948, + 0.4082, + 0.4017, + 0.4074, + 0.4028, + 0.4108, + 0.4004, + 0.4041, + 0.4025, + 0.4049, + 0.4087, + 0.4, + 0.402, + 0.3985, + 0.4059, + 0.4004, + 0.4091, + 0.4071, + 0.4143, + 0.4132, + 0.409, + 0.4076, + 0.4057, + 0.4061, + 0.4117, + 0.41, + 0.4148, + 0.4041, + 0.4149, + 0.4053, + 0.4078, + 0.4003, + 0.4132, + 0.4143, + 0.4119, + 0.4157, + 0.4033, + 0.417, + 0.4029, + 0.4075, + 0.4054, + 0.4092, + 0.4144, + 0.4062, + 0.4099, + 0.4126, + 0.4157, + 0.4137, + 0.4152, + 0.4095, + 0.4124, + 0.4125, + 0.4084, + 0.4123, + 0.4109, + 0.4112, + 0.4101, + 0.4096, + 0.4125, + 0.4075, + 0.4126, + 0.4119, + 0.4123, + 0.412, + 0.4125, + 0.4119, + 0.4121, + 0.4112, + 0.4123, + 0.4125, + 0.4116, + 0.4123, + 0.4123 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.018269643187522888, + 0.07727976143360138, + -0.007720204535871744, + -0.03330487757921219, + -0.08965514600276947, + -0.05200214684009552, + -0.026286372914910316, + 0.9987640380859375 + ], + "perturbation_rho": [ + -0.009564901702105999, + -0.008453571237623692, + 0.004972926340997219, + -0.0027248896658420563, + -0.002009802497923374, + -0.036599986255168915, + -0.004199513234198093, + -0.03066324070096016 + ], + "nudging": { + "0.001": [ + -2.971384674310684e-06, + -4.7439243644475937e-07, + -6.693881005048752e-08, + 8.987262845039368e-08, + 6.495974957942963e-08, + 3.585591912269592e-08, + 5.820766091346741e-08, + -1.373467966914177e-06 + ], + "0.003": [ + -8.388538844883442e-06, + -1.432374119758606e-06, + 3.236345946788788e-08, + 1.6565900295972824e-07, + 4.4424086809158325e-07, + 1.909211277961731e-07, + 1.6344711184501648e-07, + -5.066161975264549e-06 + ], + "0.01": [ + -2.80656386166811e-05, + -5.379552021622658e-06, + 6.239861249923706e-08, + 5.534384399652481e-07, + 1.6264384612441063e-06, + 8.010538294911385e-07, + 5.328329280018806e-07, + -1.849012915045023e-05 + ] + }, + "hidden_norms_per_layer": [ + 6323.6376953125, + 105527.390625, + 956923.0625, + 1390351.75, + 1878290.125, + 2305144.75, + 2408578.0, + 2446730.0, + 1101165.75 + ], + "bp_grad_norms_per_layer": [ + 3.628878766903654e-05, + 2.4742682853684528e-06, + 7.756235049782845e-07, + 7.634440635229112e-07, + 7.605355563100602e-07, + 7.63955029015051e-07, + 7.672065862607269e-07, + 7.676999871364387e-07, + 7.604297138641414e-07 + ] + }, + "drift": { + "embed.weight": 41.248137439097945, + "embed.bias": 19.492810839395194, + "blocks.0.ln.weight": 1.069830164213397, + "blocks.0.w1.weight": 14.662790472591759, + "blocks.0.w1.bias": 14.121256961200007, + "blocks.0.w2.weight": 58.06593910253397, + "blocks.1.ln.weight": 1.133609473869923, + "blocks.1.w1.weight": 22.616267702800116, + "blocks.1.w1.bias": 18.62883152876521, + "blocks.1.w2.weight": 55.42300201602086, + "blocks.2.ln.weight": 0.8395001619703416, + "blocks.2.w1.weight": 23.18964915987391, + "blocks.2.w1.bias": 23.25897523020434, + "blocks.2.w2.weight": 51.12589199717472, + "blocks.3.ln.weight": 0.8079243325814617, + "blocks.3.w1.weight": 23.270223091205043, + "blocks.3.w1.bias": 23.973451384958736, + "blocks.3.w2.weight": 44.38945707970049, + "blocks.4.ln.weight": 0.7352731954552547, + "blocks.4.w1.weight": 23.461113331044203, + "blocks.4.w1.bias": 25.70390171443948, + "blocks.4.w2.weight": 35.45845934569411, + "blocks.5.ln.weight": 0.5783243137671081, + "blocks.5.w1.weight": 18.59117833907557, + "blocks.5.w1.bias": 19.752451457096242, + "blocks.5.w2.weight": 34.26869815664233, + "blocks.6.ln.weight": 0.5587106874210115, + "blocks.6.w1.weight": 16.34076044382607, + "blocks.6.w1.bias": 16.11418290554864, + "blocks.6.w2.weight": 46.04419499078452, + "blocks.7.ln.weight": 0.7410616437288059, + "blocks.7.w1.weight": 24.360145562410782, + "blocks.7.w1.bias": 27.19985092354318, + "blocks.7.w2.weight": 37.619326829386324, + "out_ln.weight": 0.35428257928264884, + "out_head.weight": 7.449367264588818, + "out_head.bias": 2.03709619180192 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 8, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 1 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L8_seed1", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file |
