diff options
Diffstat (limited to 'results/snapshot_vit_v1/run_s42.log')
| -rw-r--r-- | results/snapshot_vit_v1/run_s42.log | 36 |
1 files changed, 36 insertions, 0 deletions
diff --git a/results/snapshot_vit_v1/run_s42.log b/results/snapshot_vit_v1/run_s42.log new file mode 100644 index 0000000..2bd76dd --- /dev/null +++ b/results/snapshot_vit_v1/run_s42.log @@ -0,0 +1,36 @@ +ViT-MINI: depth=4, d_model=128, n_heads=4, epochs=60, seed=42 + +=== BP training (ViT-Mini) === + n_params=809354 + [BP-vit] Ep 0: ||h_L_cls||=7.124e+00 ||g_2||=9.976e-04 acc=0.1377 + [BP-vit] Ep 1: ||h_L_cls||=3.181e+01 ||g_2||=1.631e-04 acc=0.4502 + [BP-vit] Ep 5: ||h_L_cls||=3.236e+01 ||g_2||=1.881e-04 acc=0.6035 + [BP-vit] Ep 10: ||h_L_cls||=3.166e+01 ||g_2||=1.970e-04 acc=0.6846 + [BP-vit] Ep 15: ||h_L_cls||=2.804e+01 ||g_2||=1.690e-04 acc=0.7334 + [BP-vit] Ep 20: ||h_L_cls||=2.521e+01 ||g_2||=1.525e-04 acc=0.7637 + [BP-vit] Ep 25: ||h_L_cls||=2.196e+01 ||g_2||=1.524e-04 acc=0.7568 + [BP-vit] Ep 30: ||h_L_cls||=1.903e+01 ||g_2||=1.451e-04 acc=0.7822 + [BP-vit] Ep 35: ||h_L_cls||=1.669e+01 ||g_2||=1.225e-04 acc=0.7852 + [BP-vit] Ep 40: ||h_L_cls||=1.509e+01 ||g_2||=9.628e-05 acc=0.7998 + [BP-vit] Ep 45: ||h_L_cls||=1.395e+01 ||g_2||=5.592e-05 acc=0.8066 + [BP-vit] Ep 50: ||h_L_cls||=1.327e+01 ||g_2||=4.421e-05 acc=0.7998 + [BP-vit] Ep 55: ||h_L_cls||=1.307e+01 ||g_2||=3.098e-05 acc=0.7959 + [BP-vit] Ep 60: ||h_L_cls||=1.297e+01 ||g_2||=3.051e-05 acc=0.7988 + +=== DFA training (ViT-Mini, block-level DFA) === + [DFA-vit] Ep 0: ||h_L_cls||=7.124e+00 ||g_2||=9.976e-04 acc=0.1377 + [DFA-vit] Ep 1: ||h_L_cls||=1.008e+04 ||g_2||=6.152e-07 acc=0.2402 γ=0.0062 + [DFA-vit] Ep 5: ||h_L_cls||=3.966e+05 ||g_2||=1.957e-08 acc=0.2266 γ=0.0066 + [DFA-vit] Ep 10: ||h_L_cls||=1.798e+06 ||g_2||=4.677e-09 acc=0.2266 γ=0.0045 + [DFA-vit] Ep 15: ||h_L_cls||=7.772e+06 ||g_2||=1.681e-09 acc=0.2334 γ=0.0003 + [DFA-vit] Ep 20: ||h_L_cls||=1.204e+07 ||g_2||=1.003e-09 acc=0.2402 γ=-0.0002 + [DFA-vit] Ep 25: ||h_L_cls||=1.835e+07 ||g_2||=6.058e-10 acc=0.2451 γ=-0.0014 + [DFA-vit] Ep 30: ||h_L_cls||=2.502e+07 ||g_2||=4.425e-10 acc=0.2266 γ=-0.0011 + [DFA-vit] Ep 35: ||h_L_cls||=3.150e+07 ||g_2||=3.412e-10 acc=0.2344 γ=-0.0003 + [DFA-vit] Ep 40: ||h_L_cls||=3.778e+07 ||g_2||=3.106e-10 acc=0.2549 γ=-0.0003 + [DFA-vit] Ep 45: ||h_L_cls||=4.166e+07 ||g_2||=2.891e-10 acc=0.2344 γ=-0.0004 + [DFA-vit] Ep 50: ||h_L_cls||=4.633e+07 ||g_2||=2.688e-10 acc=0.2549 γ=0.0004 + [DFA-vit] Ep 55: ||h_L_cls||=4.796e+07 ||g_2||=2.616e-10 acc=0.2510 γ=0.0005 + [DFA-vit] Ep 60: ||h_L_cls||=4.761e+07 ||g_2||=2.622e-10 acc=0.2559 γ=0.0007 + +Saved results/snapshot_vit_v1/snapshot_vit_s42.json |
