diff options
Diffstat (limited to 'results/snapshot_vit_v1/run_s123.log')
| -rw-r--r-- | results/snapshot_vit_v1/run_s123.log | 36 |
1 files changed, 36 insertions, 0 deletions
diff --git a/results/snapshot_vit_v1/run_s123.log b/results/snapshot_vit_v1/run_s123.log new file mode 100644 index 0000000..e23f876 --- /dev/null +++ b/results/snapshot_vit_v1/run_s123.log @@ -0,0 +1,36 @@ +ViT-MINI: depth=4, d_model=128, n_heads=4, epochs=60, seed=123 + +=== BP training (ViT-Mini) === + n_params=809354 + [BP-vit] Ep 0: ||h_L_cls||=7.591e+00 ||g_2||=9.667e-04 acc=0.1250 + [BP-vit] Ep 1: ||h_L_cls||=3.242e+01 ||g_2||=1.457e-04 acc=0.4697 + [BP-vit] Ep 5: ||h_L_cls||=3.030e+01 ||g_2||=1.731e-04 acc=0.5957 + [BP-vit] Ep 10: ||h_L_cls||=2.876e+01 ||g_2||=1.742e-04 acc=0.6768 + [BP-vit] Ep 15: ||h_L_cls||=2.787e+01 ||g_2||=1.661e-04 acc=0.7129 + [BP-vit] Ep 20: ||h_L_cls||=2.435e+01 ||g_2||=1.685e-04 acc=0.7461 + [BP-vit] Ep 25: ||h_L_cls||=2.108e+01 ||g_2||=1.419e-04 acc=0.7627 + [BP-vit] Ep 30: ||h_L_cls||=1.855e+01 ||g_2||=1.543e-04 acc=0.7783 + [BP-vit] Ep 35: ||h_L_cls||=1.614e+01 ||g_2||=1.360e-04 acc=0.7939 + [BP-vit] Ep 40: ||h_L_cls||=1.470e+01 ||g_2||=1.014e-04 acc=0.7822 + [BP-vit] Ep 45: ||h_L_cls||=1.305e+01 ||g_2||=9.261e-05 acc=0.7822 + [BP-vit] Ep 50: ||h_L_cls||=1.282e+01 ||g_2||=4.534e-05 acc=0.7822 + [BP-vit] Ep 55: ||h_L_cls||=1.249e+01 ||g_2||=4.168e-05 acc=0.7910 + [BP-vit] Ep 60: ||h_L_cls||=1.250e+01 ||g_2||=3.572e-05 acc=0.7871 + +=== DFA training (ViT-Mini, block-level DFA) === + [DFA-vit] Ep 0: ||h_L_cls||=7.591e+00 ||g_2||=9.667e-04 acc=0.1250 + [DFA-vit] Ep 1: ||h_L_cls||=1.532e+04 ||g_2||=4.354e-07 acc=0.2158 γ=0.0090 + [DFA-vit] Ep 5: ||h_L_cls||=7.063e+05 ||g_2||=1.320e-08 acc=0.2197 γ=0.0009 + [DFA-vit] Ep 10: ||h_L_cls||=4.235e+06 ||g_2||=2.736e-09 acc=0.2373 γ=0.0020 + [DFA-vit] Ep 15: ||h_L_cls||=7.955e+06 ||g_2||=1.467e-09 acc=0.2266 γ=0.0020 + [DFA-vit] Ep 20: ||h_L_cls||=1.460e+07 ||g_2||=9.774e-10 acc=0.2139 γ=0.0018 + [DFA-vit] Ep 25: ||h_L_cls||=2.068e+07 ||g_2||=7.712e-10 acc=0.2080 γ=0.0013 + [DFA-vit] Ep 30: ||h_L_cls||=3.012e+07 ||g_2||=5.962e-10 acc=0.2139 γ=0.0010 + [DFA-vit] Ep 35: ||h_L_cls||=4.466e+07 ||g_2||=4.717e-10 acc=0.1396 γ=0.0040 + [DFA-vit] Ep 40: ||h_L_cls||=4.770e+07 ||g_2||=4.407e-10 acc=0.1963 γ=0.0032 + [DFA-vit] Ep 45: ||h_L_cls||=4.536e+07 ||g_2||=3.493e-10 acc=0.2031 γ=0.0021 + [DFA-vit] Ep 50: ||h_L_cls||=4.727e+07 ||g_2||=3.342e-10 acc=0.1904 γ=0.0017 + [DFA-vit] Ep 55: ||h_L_cls||=4.865e+07 ||g_2||=3.534e-10 acc=0.1982 γ=0.0019 + [DFA-vit] Ep 60: ||h_L_cls||=5.080e+07 ||g_2||=3.359e-10 acc=0.2021 γ=0.0017 + +Saved results/snapshot_vit_v1/snapshot_vit_s123.json |
