{ "config": { "output_dir": "results/vit_test", "epochs": 2, "lr": 0.001, "wd": 0.05, "seed": 42, "depth": 4, "d_model": 128, "n_heads": 4 }, "depth": 4, "d_model": 128, "architecture": "ViTMini", "bp_log": [ { "hidden_norms_cls": [ 0.31052109599113464, 3.2536301612854004, 4.7681732177734375, 6.129599571228027, 7.124356746673584 ], "hidden_norms_avg": [ 6.008513927459717, 7.034290313720703, 7.942849636077881, 8.915729522705078, 9.635361671447754 ], "bp_grad_per_sample_l2_med": [ 0.004442029166966677, 0.0012008437188342214, 0.0009976272704079747, 0.0009104337659664452, 0.0008558662957511842 ], "bp_grad_F": [ 0.18535660207271576, 0.03968670591711998, 0.032604776322841644, 0.029427478089928627, 0.02746850810945034 ], "gamma_dfa": NaN, "gamma_dfa_per_layer": [], "acc_eval": 0.1376953125, "loss_eval": 2.35188627243042, "epoch": 0 }, { "hidden_norms_cls": [ 0.5119581818580627, 12.45346450805664, 18.367006301879883, 25.541183471679688, 31.80698585510254 ], "hidden_norms_avg": [ 4.10694694519043, 10.166566848754883, 14.66998291015625, 19.933429718017578, 25.32620620727539 ], "bp_grad_per_sample_l2_med": [ 0.001977165462449193, 0.0002232444821856916, 0.00016308759222738445, 0.00014621141599491239, 0.0001451292773708701 ], "bp_grad_F": [ 0.11066912859678268, 0.010344989597797394, 0.006372526753693819, 0.005224850494414568, 0.005017552524805069 ], "gamma_dfa": NaN, "gamma_dfa_per_layer": [], "acc_eval": 0.4501953125, "loss_eval": 1.5154452323913574, "epoch": 1 }, { "hidden_norms_cls": [ 0.5624178647994995, 10.294456481933594, 15.314032554626465, 21.36781883239746, 25.580961227416992 ], "hidden_norms_avg": [ 3.8611817359924316, 9.257936477661133, 13.537341117858887, 17.418514251708984, 20.89995002746582 ], "bp_grad_per_sample_l2_med": [ 0.0021491278894245625, 0.0003653931198641658, 0.00022418916341848671, 0.00018154713325202465, 0.00017043459229171276 ], "bp_grad_F": [ 0.10512559115886688, 0.014831307344138622, 0.008541438728570938, 0.006647223141044378, 0.005966350436210632 ], "gamma_dfa": NaN, "gamma_dfa_per_layer": [], "acc_eval": 0.52734375, "loss_eval": 1.3002638816833496, "epoch": 2 } ], "dfa_log": [ { "hidden_norms_cls": [ 0.31052109599113464, 3.2536301612854004, 4.7681732177734375, 6.129599571228027, 7.124356746673584 ], "hidden_norms_avg": [ 6.008513927459717, 7.034290313720703, 7.942849636077881, 8.915729522705078, 9.635361671447754 ], "bp_grad_per_sample_l2_med": [ 0.004442029166966677, 0.0012008437188342214, 0.0009976272704079747, 0.0009104337659664452, 0.0008558662957511842 ], "bp_grad_F": [ 0.18535660207271576, 0.03968670591711998, 0.032604776322841644, 0.029427478089928627, 0.02746850810945034 ], "gamma_dfa": 0.003905070887412876, "gamma_dfa_per_layer": [ 0.004920602310448885, 0.005249542184174061, 0.0038580193649977446, 0.0015921196900308132 ], "acc_eval": 0.1376953125, "loss_eval": 2.35188627243042, "epoch": 0 }, { "hidden_norms_cls": [ 1.9684416055679321, 2029.23681640625, 2826.160400390625, 4672.1005859375, 8737.1298828125 ], "hidden_norms_avg": [ 45.1302375793457, 3068.238037109375, 8890.5302734375, 11475.16015625, 14973.22265625 ], "bp_grad_per_sample_l2_med": [ 0.0002814636391121894, 7.465861813216179e-07, 6.924062745383708e-07, 6.833349175394687e-07, 6.810931267864362e-07 ], "bp_grad_F": [ 0.08884845674037933, 7.258133700815961e-05, 4.2221599869662896e-05, 2.3938106096466072e-05, 2.3418680939357728e-05 ], "gamma_dfa": 0.0073737858911044896, "gamma_dfa_per_layer": [ 0.0056831855326890945, 0.015363219194114208, 0.009368096478283405, -0.0009193576406687498 ], "acc_eval": 0.23046875, "loss_eval": 2.054917335510254, "epoch": 1 }, { "hidden_norms_cls": [ 3.0175483226776123, 3912.425048828125, 6017.75830078125, 8192.2646484375, 19695.205078125 ], "hidden_norms_avg": [ 70.26815032958984, 6088.02001953125, 20950.01953125, 25550.255859375, 33917.15625 ], "bp_grad_per_sample_l2_med": [ 0.000260332744801417, 3.430939727877558e-07, 3.090381710535439e-07, 3.0240653359214775e-07, 2.9979452165207476e-07 ], "bp_grad_F": [ 0.25490984320640564, 2.9280510716489516e-05, 1.6399520973209292e-05, 9.804005458136089e-06, 9.64422997640213e-06 ], "gamma_dfa": 0.007508119277190417, "gamma_dfa_per_layer": [ 0.007269312161952257, 0.013389883562922478, 0.01044462900608778, -0.0010713476222008467 ], "acc_eval": 0.2265625, "loss_eval": 2.0774288177490234, "epoch": 2 } ] }