diff options
| -rw-r--r-- | results/d256_deep_scan.log | 368 | ||||
| -rw-r--r-- | results/fa_dfa_d256_L12_seed0/results_cifar10.json | 969 | ||||
| -rw-r--r-- | results/fa_dfa_d256_L12_seed1/results_cifar10.json | 969 | ||||
| -rw-r--r-- | results/fa_dfa_d256_L12_seed2/results_cifar10.json | 969 | ||||
| -rw-r--r-- | results/fa_dfa_d256_L12_seed3/results_cifar10.json | 969 | ||||
| -rw-r--r-- | results/fa_dfa_d256_L12_seed4/results_cifar10.json | 969 | ||||
| -rw-r--r-- | results/fa_dfa_d256_L8_seed0/results_cifar10.json | 881 | ||||
| -rw-r--r-- | results/fa_dfa_d256_L8_seed1/results_cifar10.json | 881 | ||||
| -rw-r--r-- | results/fa_dfa_d256_L8_seed2/results_cifar10.json | 881 | ||||
| -rw-r--r-- | results/fa_dfa_d256_L8_seed3/results_cifar10.json | 881 | ||||
| -rw-r--r-- | results/fa_dfa_d256_L8_seed4/results_cifar10.json | 881 |
11 files changed, 9618 insertions, 0 deletions
diff --git a/results/d256_deep_scan.log b/results/d256_deep_scan.log new file mode 100644 index 0000000..a68ca8d --- /dev/null +++ b/results/d256_deep_scan.log @@ -0,0 +1,368 @@ +=== FA+DFA at d=256 DEEP: L=8 and L=12, seeds 0-4 === +Start: Sun Apr 26 11:32:41 AM CDT 2026 + +--- d=256 L=8 --- + L=8 seed=0 (Sun Apr 26 11:32:41 AM CDT 2026) +Using device: cuda:0 + +============================================================ +Seed 0 +============================================================ + +--- DFA --- + [DFA] Epoch 1: loss=2.0881, train=0.2321, test=0.2527 + [DFA] Epoch 10: loss=2.0378, train=0.2500, test=0.2725 + [DFA] Epoch 20: loss=2.0274, train=0.2579, test=0.2744 + [DFA] Epoch 30: loss=2.0203, train=0.2619, test=0.2796 + [DFA] Epoch 40: loss=2.0173, train=0.2644, test=0.2846 + [DFA] Epoch 50: loss=2.0163, train=0.2675, test=0.2989 + [DFA] Epoch 60: loss=2.0143, train=0.2660, test=0.2971 + [DFA] Epoch 70: loss=2.0110, train=0.2730, test=0.2949 + [DFA] Epoch 80: loss=2.0090, train=0.2714, test=0.2969 + [DFA] Epoch 90: loss=2.0069, train=0.2736, test=0.2936 + [DFA] Epoch 100: loss=2.0087, train=0.2730, test=0.2932 + Final test acc: 0.2932 + +--- FA --- + [FA] Epoch 1: loss=2.0547, train=0.2363, test=0.2656 + [FA] Epoch 10: loss=1.9154, train=0.3028, test=0.3358 + [FA] Epoch 20: loss=1.8741, train=0.3243, test=0.3577 + [FA] Epoch 30: loss=1.8346, train=0.3430, test=0.3703 + [FA] Epoch 40: loss=1.8160, train=0.3483, test=0.3799 + [FA] Epoch 50: loss=1.8014, train=0.3571, test=0.3837 + [FA] Epoch 60: loss=1.7884, train=0.3628, test=0.3856 + [FA] Epoch 70: loss=1.7813, train=0.3627, test=0.3871 + [FA] Epoch 80: loss=1.7746, train=0.3692, test=0.3953 + [FA] Epoch 90: loss=1.7697, train=0.3710, test=0.3933 + [FA] Epoch 100: loss=1.7717, train=0.3694, test=0.3926 + Final test acc: 0.3926 + +All results saved to results/fa_dfa_d256_L8_seed0/results_cifar10.json + L=8 seed=1 (Sun Apr 26 11:51:23 AM CDT 2026) +Using device: cuda:0 + +============================================================ +Seed 1 +============================================================ + +--- DFA --- + [DFA] Epoch 1: loss=2.0732, train=0.2372, test=0.2766 + [DFA] Epoch 10: loss=2.0212, train=0.2592, test=0.2792 + [DFA] Epoch 20: loss=2.0138, train=0.2631, test=0.2756 + [DFA] Epoch 30: loss=2.0083, train=0.2687, test=0.2720 + [DFA] Epoch 40: loss=2.0092, train=0.2695, test=0.2793 + [DFA] Epoch 50: loss=2.0069, train=0.2718, test=0.2877 + [DFA] Epoch 60: loss=2.0048, train=0.2738, test=0.2891 + [DFA] Epoch 70: loss=2.0051, train=0.2741, test=0.2979 + [DFA] Epoch 80: loss=2.0064, train=0.2745, test=0.2871 + [DFA] Epoch 90: loss=2.0024, train=0.2754, test=0.2922 + [DFA] Epoch 100: loss=2.0042, train=0.2757, test=0.2900 + Final test acc: 0.2900 + +--- FA --- + [FA] Epoch 1: loss=2.0616, train=0.2355, test=0.2672 + [FA] Epoch 10: loss=1.9012, train=0.3065, test=0.3400 + [FA] Epoch 20: loss=1.8783, train=0.3216, test=0.3667 + [FA] Epoch 30: loss=1.8588, train=0.3327, test=0.3628 + [FA] Epoch 40: loss=1.8388, train=0.3435, test=0.3747 + [FA] Epoch 50: loss=1.8280, train=0.3484, test=0.3797 + [FA] Epoch 60: loss=1.8198, train=0.3498, test=0.3812 + [FA] Epoch 70: loss=1.8100, train=0.3566, test=0.3827 + [FA] Epoch 80: loss=1.8049, train=0.3559, test=0.3848 + [FA] Epoch 90: loss=1.7997, train=0.3611, test=0.3878 + [FA] Epoch 100: loss=1.8024, train=0.3600, test=0.3864 + Final test acc: 0.3864 + +All results saved to results/fa_dfa_d256_L8_seed1/results_cifar10.json + L=8 seed=2 (Sun Apr 26 12:10:10 PM CDT 2026) +Using device: cuda:0 + +============================================================ +Seed 2 +============================================================ + +--- DFA --- + [DFA] Epoch 1: loss=2.0887, train=0.2300, test=0.2516 + [DFA] Epoch 10: loss=2.0677, train=0.2372, test=0.2516 + [DFA] Epoch 20: loss=2.0654, train=0.2430, test=0.2406 + [DFA] Epoch 30: loss=2.0701, train=0.2433, test=0.2661 + [DFA] Epoch 40: loss=2.0737, train=0.2391, test=0.2396 + [DFA] Epoch 50: loss=2.0765, train=0.2385, test=0.2638 + [DFA] Epoch 60: loss=2.0779, train=0.2412, test=0.2491 + [DFA] Epoch 70: loss=2.0781, train=0.2412, test=0.2502 + [DFA] Epoch 80: loss=2.0787, train=0.2412, test=0.2521 + [DFA] Epoch 90: loss=2.0788, train=0.2424, test=0.2491 + [DFA] Epoch 100: loss=2.0776, train=0.2415, test=0.2533 + Final test acc: 0.2533 + +--- FA --- + [FA] Epoch 1: loss=2.0576, train=0.2397, test=0.2762 + [FA] Epoch 10: loss=1.8946, train=0.3120, test=0.3276 + [FA] Epoch 20: loss=1.8617, train=0.3295, test=0.3438 + [FA] Epoch 30: loss=1.8345, train=0.3451, test=0.3655 + [FA] Epoch 40: loss=1.8104, train=0.3528, test=0.3745 + [FA] Epoch 50: loss=1.7890, train=0.3614, test=0.3821 + [FA] Epoch 60: loss=1.7762, train=0.3651, test=0.3857 + [FA] Epoch 70: loss=1.7712, train=0.3670, test=0.3877 + [FA] Epoch 80: loss=1.7661, train=0.3691, test=0.3904 + [FA] Epoch 90: loss=1.7666, train=0.3699, test=0.3888 + [FA] Epoch 100: loss=1.7636, train=0.3707, test=0.3894 + Final test acc: 0.3894 + +All results saved to results/fa_dfa_d256_L8_seed2/results_cifar10.json + L=8 seed=3 (Sun Apr 26 12:28:00 PM CDT 2026) +Using device: cuda:0 + +============================================================ +Seed 3 +============================================================ + +--- DFA --- + [DFA] Epoch 1: loss=2.0686, train=0.2387, test=0.2605 + [DFA] Epoch 10: loss=2.0282, train=0.2591, test=0.2664 + [DFA] Epoch 20: loss=2.0167, train=0.2674, test=0.2843 + [DFA] Epoch 30: loss=2.0118, train=0.2728, test=0.2758 + [DFA] Epoch 40: loss=2.0091, train=0.2734, test=0.2883 + [DFA] Epoch 50: loss=2.0107, train=0.2759, test=0.2977 + [DFA] Epoch 60: loss=2.0120, train=0.2736, test=0.2934 + [DFA] Epoch 70: loss=2.0100, train=0.2753, test=0.2955 + [DFA] Epoch 80: loss=2.0104, train=0.2740, test=0.2975 + [DFA] Epoch 90: loss=2.0085, train=0.2752, test=0.2950 + [DFA] Epoch 100: loss=2.0069, train=0.2758, test=0.2946 + Final test acc: 0.2946 + +--- FA --- + [FA] Epoch 1: loss=2.0590, train=0.2423, test=0.2801 + [FA] Epoch 10: loss=1.8989, train=0.3115, test=0.3249 + [FA] Epoch 20: loss=1.8557, train=0.3338, test=0.3676 + [FA] Epoch 30: loss=1.8108, train=0.3517, test=0.3844 + [FA] Epoch 40: loss=1.7758, train=0.3660, test=0.3948 + [FA] Epoch 50: loss=1.7627, train=0.3707, test=0.3968 + [FA] Epoch 60: loss=1.7505, train=0.3761, test=0.3973 + [FA] Epoch 70: loss=1.7405, train=0.3772, test=0.4021 + [FA] Epoch 80: loss=1.7362, train=0.3798, test=0.4026 + [FA] Epoch 90: loss=1.7267, train=0.3841, test=0.4014 + [FA] Epoch 100: loss=1.7232, train=0.3834, test=0.4035 + Final test acc: 0.4035 + +All results saved to results/fa_dfa_d256_L8_seed3/results_cifar10.json + L=8 seed=4 (Sun Apr 26 12:45:29 PM CDT 2026) +Using device: cuda:0 + +============================================================ +Seed 4 +============================================================ + +--- DFA --- + [DFA] Epoch 1: loss=2.0657, train=0.2414, test=0.2692 + [DFA] Epoch 10: loss=2.0458, train=0.2480, test=0.2517 + [DFA] Epoch 20: loss=2.0359, train=0.2558, test=0.2790 + [DFA] Epoch 30: loss=2.0356, train=0.2592, test=0.2744 + [DFA] Epoch 40: loss=2.0295, train=0.2621, test=0.2839 + [DFA] Epoch 50: loss=2.0314, train=0.2622, test=0.2794 + [DFA] Epoch 60: loss=2.0303, train=0.2623, test=0.2775 + [DFA] Epoch 70: loss=2.0303, train=0.2616, test=0.2828 + [DFA] Epoch 80: loss=2.0305, train=0.2653, test=0.2845 + [DFA] Epoch 90: loss=2.0290, train=0.2635, test=0.2792 + [DFA] Epoch 100: loss=2.0293, train=0.2634, test=0.2814 + Final test acc: 0.2814 + +--- FA --- + [FA] Epoch 1: loss=2.0774, train=0.2316, test=0.2691 + [FA] Epoch 10: loss=1.8787, train=0.3198, test=0.3373 + [FA] Epoch 20: loss=1.8294, train=0.3447, test=0.3784 + [FA] Epoch 30: loss=1.8228, train=0.3471, test=0.3838 + [FA] Epoch 40: loss=1.8011, train=0.3554, test=0.3854 + [FA] Epoch 50: loss=1.7903, train=0.3617, test=0.3889 + [FA] Epoch 60: loss=1.7780, train=0.3630, test=0.3886 + [FA] Epoch 70: loss=1.7693, train=0.3668, test=0.3949 + [FA] Epoch 80: loss=1.7637, train=0.3694, test=0.3939 + [FA] Epoch 90: loss=1.7596, train=0.3711, test=0.3952 + [FA] Epoch 100: loss=1.7597, train=0.3741, test=0.3962 + Final test acc: 0.3962 + +All results saved to results/fa_dfa_d256_L8_seed4/results_cifar10.json + +--- d=256 L=12 --- + L=12 seed=0 (Sun Apr 26 01:02:49 PM CDT 2026) +Using device: cuda:0 + +============================================================ +Seed 0 +============================================================ + +--- DFA --- + [DFA] Epoch 1: loss=2.0789, train=0.2324, test=0.2663 + [DFA] Epoch 10: loss=2.0336, train=0.2527, test=0.2625 + [DFA] Epoch 20: loss=2.0229, train=0.2600, test=0.2770 + [DFA] Epoch 30: loss=2.0144, train=0.2656, test=0.2832 + [DFA] Epoch 40: loss=2.0092, train=0.2673, test=0.2900 + [DFA] Epoch 50: loss=2.0072, train=0.2716, test=0.2880 + [DFA] Epoch 60: loss=2.0083, train=0.2707, test=0.2909 + [DFA] Epoch 70: loss=2.0060, train=0.2721, test=0.2878 + [DFA] Epoch 80: loss=2.0036, train=0.2733, test=0.2928 + [DFA] Epoch 90: loss=2.0025, train=0.2729, test=0.2959 + [DFA] Epoch 100: loss=2.0018, train=0.2744, test=0.2956 + Final test acc: 0.2956 + +--- FA --- + [FA] Epoch 1: loss=2.0900, train=0.2220, test=0.2317 + [FA] Epoch 10: loss=1.9267, train=0.3005, test=0.3223 + [FA] Epoch 20: loss=1.8993, train=0.3140, test=0.3303 + [FA] Epoch 30: loss=1.8738, train=0.3269, test=0.3504 + [FA] Epoch 40: loss=1.8485, train=0.3353, test=0.3691 + [FA] Epoch 50: loss=1.8262, train=0.3459, test=0.3682 + [FA] Epoch 60: loss=1.8133, train=0.3499, test=0.3768 + [FA] Epoch 70: loss=1.8020, train=0.3581, test=0.3826 + [FA] Epoch 80: loss=1.7935, train=0.3569, test=0.3844 + [FA] Epoch 90: loss=1.7947, train=0.3598, test=0.3874 + [FA] Epoch 100: loss=1.7935, train=0.3598, test=0.3867 + Final test acc: 0.3867 + +All results saved to results/fa_dfa_d256_L12_seed0/results_cifar10.json + L=12 seed=1 (Sun Apr 26 01:25:07 PM CDT 2026) +Using device: cuda:0 + +============================================================ +Seed 1 +============================================================ + +--- DFA --- + [DFA] Epoch 1: loss=2.0677, train=0.2381, test=0.2661 + [DFA] Epoch 10: loss=2.0284, train=0.2529, test=0.2657 + [DFA] Epoch 20: loss=2.0180, train=0.2620, test=0.2858 + [DFA] Epoch 30: loss=2.0152, train=0.2633, test=0.2892 + [DFA] Epoch 40: loss=2.0116, train=0.2698, test=0.2752 + [DFA] Epoch 50: loss=2.0073, train=0.2713, test=0.2885 + [DFA] Epoch 60: loss=2.0060, train=0.2712, test=0.2904 + [DFA] Epoch 70: loss=2.0030, train=0.2721, test=0.2900 + [DFA] Epoch 80: loss=2.0028, train=0.2752, test=0.2851 + [DFA] Epoch 90: loss=2.0021, train=0.2734, test=0.2858 + [DFA] Epoch 100: loss=2.0036, train=0.2713, test=0.2872 + Final test acc: 0.2872 + +--- FA --- + [FA] Epoch 1: loss=2.0876, train=0.2226, test=0.2532 + [FA] Epoch 10: loss=1.8945, train=0.3173, test=0.3408 + [FA] Epoch 20: loss=1.8548, train=0.3322, test=0.3604 + [FA] Epoch 30: loss=1.8063, train=0.3535, test=0.3877 + [FA] Epoch 40: loss=1.7791, train=0.3624, test=0.3889 + [FA] Epoch 50: loss=1.7631, train=0.3704, test=0.3963 + [FA] Epoch 60: loss=1.7526, train=0.3734, test=0.4018 + [FA] Epoch 70: loss=1.7480, train=0.3753, test=0.4018 + [FA] Epoch 80: loss=1.7460, train=0.3788, test=0.4059 + [FA] Epoch 90: loss=1.7411, train=0.3778, test=0.4056 + [FA] Epoch 100: loss=1.7453, train=0.3781, test=0.4065 + Final test acc: 0.4065 + +All results saved to results/fa_dfa_d256_L12_seed1/results_cifar10.json + L=12 seed=2 (Sun Apr 26 01:47:27 PM CDT 2026) +Using device: cuda:0 + +============================================================ +Seed 2 +============================================================ + +--- DFA --- + [DFA] Epoch 1: loss=2.0928, train=0.2321, test=0.2527 + [DFA] Epoch 10: loss=2.0654, train=0.2410, test=0.2552 + [DFA] Epoch 20: loss=2.0649, train=0.2433, test=0.2619 + [DFA] Epoch 30: loss=2.0643, train=0.2466, test=0.2706 + [DFA] Epoch 40: loss=2.0687, train=0.2467, test=0.2562 + [DFA] Epoch 50: loss=2.0695, train=0.2473, test=0.2606 + [DFA] Epoch 60: loss=2.0722, train=0.2467, test=0.2661 + [DFA] Epoch 70: loss=2.0726, train=0.2479, test=0.2544 + [DFA] Epoch 80: loss=2.0731, train=0.2471, test=0.2643 + [DFA] Epoch 90: loss=2.0716, train=0.2480, test=0.2630 + [DFA] Epoch 100: loss=2.0725, train=0.2494, test=0.2652 + Final test acc: 0.2652 + +--- FA --- + [FA] Epoch 1: loss=2.0747, train=0.2315, test=0.2629 + [FA] Epoch 10: loss=1.9393, train=0.2930, test=0.3051 + [FA] Epoch 20: loss=1.9019, train=0.3114, test=0.3421 + [FA] Epoch 30: loss=1.8808, train=0.3226, test=0.3531 + [FA] Epoch 40: loss=1.8720, train=0.3293, test=0.3425 + [FA] Epoch 50: loss=1.8621, train=0.3354, test=0.3545 + [FA] Epoch 60: loss=1.8549, train=0.3362, test=0.3649 + [FA] Epoch 70: loss=1.8487, train=0.3409, test=0.3610 + [FA] Epoch 80: loss=1.8427, train=0.3422, test=0.3688 + [FA] Epoch 90: loss=1.8386, train=0.3431, test=0.3670 + [FA] Epoch 100: loss=1.8405, train=0.3444, test=0.3684 + Final test acc: 0.3684 + +All results saved to results/fa_dfa_d256_L12_seed2/results_cifar10.json + L=12 seed=3 (Sun Apr 26 02:10:00 PM CDT 2026) +Using device: cuda:0 + +============================================================ +Seed 3 +============================================================ + +--- DFA --- + [DFA] Epoch 1: loss=2.0810, train=0.2315, test=0.2643 + [DFA] Epoch 10: loss=2.0346, train=0.2553, test=0.2773 + [DFA] Epoch 20: loss=2.0237, train=0.2613, test=0.2833 + [DFA] Epoch 30: loss=2.0150, train=0.2653, test=0.2910 + [DFA] Epoch 40: loss=2.0098, train=0.2698, test=0.2927 + [DFA] Epoch 50: loss=2.0095, train=0.2726, test=0.2876 + [DFA] Epoch 60: loss=2.0064, train=0.2749, test=0.2969 + [DFA] Epoch 70: loss=2.0056, train=0.2740, test=0.2875 + [DFA] Epoch 80: loss=2.0061, train=0.2728, test=0.2935 + [DFA] Epoch 90: loss=2.0060, train=0.2748, test=0.2958 + [DFA] Epoch 100: loss=2.0064, train=0.2760, test=0.2968 + Final test acc: 0.2968 + +--- FA --- + [FA] Epoch 1: loss=2.1018, train=0.2074, test=0.2378 + [FA] Epoch 10: loss=1.9068, train=0.3096, test=0.3382 + [FA] Epoch 20: loss=1.8759, train=0.3228, test=0.3570 + [FA] Epoch 30: loss=1.8522, train=0.3362, test=0.3621 + [FA] Epoch 40: loss=1.8364, train=0.3448, test=0.3688 + [FA] Epoch 50: loss=1.8291, train=0.3455, test=0.3740 + [FA] Epoch 60: loss=1.8195, train=0.3513, test=0.3795 + [FA] Epoch 70: loss=1.8121, train=0.3524, test=0.3814 + [FA] Epoch 80: loss=1.8078, train=0.3587, test=0.3818 + [FA] Epoch 90: loss=1.8068, train=0.3560, test=0.3821 + [FA] Epoch 100: loss=1.8052, train=0.3588, test=0.3818 + Final test acc: 0.3818 + +All results saved to results/fa_dfa_d256_L12_seed3/results_cifar10.json + L=12 seed=4 (Sun Apr 26 02:32:36 PM CDT 2026) +Using device: cuda:0 + +============================================================ +Seed 4 +============================================================ + +--- DFA --- + [DFA] Epoch 1: loss=2.0826, train=0.2312, test=0.2652 + [DFA] Epoch 10: loss=2.0607, train=0.2401, test=0.2485 + [DFA] Epoch 20: loss=2.0546, train=0.2436, test=0.2572 + [DFA] Epoch 30: loss=2.0503, train=0.2453, test=0.2590 + [DFA] Epoch 40: loss=2.0507, train=0.2482, test=0.2564 + [DFA] Epoch 50: loss=2.0500, train=0.2534, test=0.2690 + [DFA] Epoch 60: loss=2.0493, train=0.2523, test=0.2734 + [DFA] Epoch 70: loss=2.0477, train=0.2527, test=0.2672 + [DFA] Epoch 80: loss=2.0458, train=0.2571, test=0.2663 + [DFA] Epoch 90: loss=2.0443, train=0.2551, test=0.2719 + [DFA] Epoch 100: loss=2.0463, train=0.2545, test=0.2695 + Final test acc: 0.2695 + +--- FA --- + [FA] Epoch 1: loss=2.0497, train=0.2416, test=0.2850 + [FA] Epoch 10: loss=1.8906, train=0.3163, test=0.3399 + [FA] Epoch 20: loss=1.8399, train=0.3365, test=0.3601 + [FA] Epoch 30: loss=1.7951, train=0.3563, test=0.3781 + [FA] Epoch 40: loss=1.7733, train=0.3692, test=0.3917 + [FA] Epoch 50: loss=1.7603, train=0.3714, test=0.3997 + [FA] Epoch 60: loss=1.7486, train=0.3756, test=0.4013 + [FA] Epoch 70: loss=1.7420, train=0.3806, test=0.4029 + [FA] Epoch 80: loss=1.7347, train=0.3832, test=0.4084 + [FA] Epoch 90: loss=1.7314, train=0.3801, test=0.4048 + [FA] Epoch 100: loss=1.7322, train=0.3812, test=0.4090 + Final test acc: 0.4090 + +All results saved to results/fa_dfa_d256_L12_seed4/results_cifar10.json + +=== d=256 DEEP DONE (Sun Apr 26 02:55:15 PM CDT 2026) === diff --git a/results/fa_dfa_d256_L12_seed0/results_cifar10.json b/results/fa_dfa_d256_L12_seed0/results_cifar10.json new file mode 100644 index 0000000..a31aa8b --- /dev/null +++ b/results/fa_dfa_d256_L12_seed0/results_cifar10.json @@ -0,0 +1,969 @@ +{ + "0": { + "dfa": { + "log": { + "train_loss": [ + 2.078884480743408, + 2.059846487350464, + 2.0553709620666503, + 2.0546888257598876, + 2.047630134963989, + 2.045008226928711, + 2.0420518775177, + 2.041442600250244, + 2.0381681137847902, + 2.0336293191146853, + 2.0332381079864503, + 2.0334933743286134, + 2.035796566848755, + 2.0294106813049315, + 2.0285469360351565, + 2.026694899978638, + 2.0260820417785643, + 2.0254271863555906, + 2.0230323413848876, + 2.0229154515075685, + 2.0223220541763305, + 2.0228745832824706, + 2.021729295196533, + 2.017538852996826, + 2.017971972579956, + 2.0173734220886232, + 2.017427021217346, + 2.016292642364502, + 2.0162722787475587, + 2.0143618490600588, + 2.013466402053833, + 2.013067155303955, + 2.0135757694244383, + 2.0145153722381592, + 2.0109932864379885, + 2.013635636329651, + 2.0102212376403807, + 2.0110452947616575, + 2.011999717025757, + 2.0092155500030517, + 2.0120949505615235, + 2.010387943572998, + 2.010760363922119, + 2.0082851540374755, + 2.010158937225342, + 2.0103255702209473, + 2.0088272121429442, + 2.0083791939544677, + 2.009366819381714, + 2.0072296501922606, + 2.0084847410583495, + 2.0094225827789307, + 2.00697819190979, + 2.0068142838287355, + 2.0079344313049314, + 2.0090717527008057, + 2.0060496669769288, + 2.008173867645264, + 2.0086808282089232, + 2.0082889751434325, + 2.004431292037964, + 2.006781820449829, + 2.0053993869018556, + 2.0058481903457643, + 2.006508606338501, + 2.0069684927368163, + 2.0063695631408693, + 2.008684331817627, + 2.0065671909332274, + 2.005968345184326, + 2.0088549881744386, + 2.0075259527969362, + 2.0050431396484374, + 2.0057015911102294, + 2.0043167867279053, + 2.005799201889038, + 2.0063393185043337, + 2.0052655740737917, + 2.0044282928848265, + 2.003640436248779, + 2.005821014328003, + 2.003054510650635, + 2.0039722763824463, + 2.004264993286133, + 2.004719021987915, + 2.004308237876892, + 2.004206368179321, + 2.003580096473694, + 2.0051046691131593, + 2.0024837198638914, + 2.004619668197632, + 2.0042623318481447, + 2.0048826543426514, + 2.003167839813232, + 2.0047615603637694, + 2.005793152923584, + 2.004860108909607, + 2.0029871907043457, + 2.0043380725097655, + 2.00175498626709 + ], + "train_acc": [ + 0.23242, + 0.23972, + 0.2412, + 0.24256, + 0.24318, + 0.2452, + 0.24798, + 0.24904, + 0.25184, + 0.25274, + 0.25106, + 0.25332, + 0.25176, + 0.25698, + 0.2543, + 0.25926, + 0.25692, + 0.25934, + 0.25892, + 0.26004, + 0.26144, + 0.26174, + 0.26074, + 0.26036, + 0.26248, + 0.2653, + 0.26038, + 0.2655, + 0.26348, + 0.26562, + 0.26624, + 0.2666, + 0.2666, + 0.26524, + 0.2674, + 0.2642, + 0.266, + 0.26908, + 0.26762, + 0.26734, + 0.26894, + 0.26826, + 0.26892, + 0.27234, + 0.26856, + 0.26836, + 0.2683, + 0.26972, + 0.2684, + 0.27156, + 0.26852, + 0.27026, + 0.27, + 0.27166, + 0.26992, + 0.27072, + 0.27186, + 0.27046, + 0.26968, + 0.27074, + 0.27308, + 0.27064, + 0.27174, + 0.26952, + 0.27196, + 0.27196, + 0.27176, + 0.27202, + 0.26996, + 0.27208, + 0.26994, + 0.27192, + 0.27492, + 0.27146, + 0.272, + 0.2721, + 0.27436, + 0.27366, + 0.27262, + 0.2733, + 0.27358, + 0.27316, + 0.27382, + 0.27264, + 0.274, + 0.27248, + 0.2722, + 0.2718, + 0.27186, + 0.27288, + 0.27412, + 0.27164, + 0.27014, + 0.27418, + 0.2732, + 0.27242, + 0.2715, + 0.27234, + 0.27324, + 0.27442 + ], + "test_acc": [ + 0.2663, + 0.2538, + 0.2393, + 0.2585, + 0.2479, + 0.2622, + 0.2687, + 0.2675, + 0.2687, + 0.2625, + 0.2707, + 0.2776, + 0.2781, + 0.2768, + 0.2674, + 0.2767, + 0.2932, + 0.2846, + 0.2656, + 0.277, + 0.2843, + 0.2856, + 0.2648, + 0.2894, + 0.2777, + 0.2864, + 0.2809, + 0.2937, + 0.2826, + 0.2832, + 0.2769, + 0.2874, + 0.2864, + 0.2842, + 0.295, + 0.2794, + 0.2872, + 0.2969, + 0.2732, + 0.29, + 0.2922, + 0.2994, + 0.2883, + 0.2934, + 0.2842, + 0.29, + 0.299, + 0.2933, + 0.2918, + 0.288, + 0.2901, + 0.2966, + 0.2922, + 0.2894, + 0.2872, + 0.292, + 0.2925, + 0.2937, + 0.2959, + 0.2909, + 0.2932, + 0.2954, + 0.2919, + 0.2989, + 0.2933, + 0.2987, + 0.2977, + 0.2861, + 0.2912, + 0.2878, + 0.2991, + 0.2888, + 0.2907, + 0.2875, + 0.2937, + 0.2968, + 0.2893, + 0.2961, + 0.2933, + 0.2928, + 0.2911, + 0.2945, + 0.2983, + 0.2959, + 0.2963, + 0.296, + 0.293, + 0.295, + 0.2947, + 0.2959, + 0.2972, + 0.2934, + 0.296, + 0.2957, + 0.2949, + 0.2955, + 0.2957, + 0.2956, + 0.2956, + 0.2956 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.38264644145965576, + 0.0008421496022492647, + -0.000935245247092098, + 0.0012684656539931893, + -7.955901673994958e-05, + 4.1166742448695004e-05, + -0.0003361787530593574, + -0.000117168077849783, + 0.00040733604691922665, + 0.0004055476747453213, + 0.00027774946647696197, + -0.00022715271916240454 + ], + "perturbation_rho": [ + 0.02277328446507454, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -2.998858690261841e-07, + 0.0, + -1.862645149230957e-09, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -9.373761713504791e-07, + 0.0, + -1.862645149230957e-09, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -2.9550865292549133e-06, + -3.725290298461914e-09, + -1.862645149230957e-09, + 9.313225746154785e-10, + 0.0, + 0.0, + -5.587935447692871e-09, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 40328.7421875, + 215651328.0, + 1357730560.0, + 1379884160.0, + 1907440512.0, + 1914356608.0, + 2525564416.0, + 3004119296.0, + 3396676352.0, + 3411036160.0, + 3436140800.0, + 3741719808.0, + 3836453888.0 + ], + "bp_grad_norms_per_layer": [ + 3.007613997851877e-07, + 3.797166470143054e-10, + 3.725074693150532e-10, + 3.724560382334374e-10, + 3.726816910631925e-10, + 3.727783637330617e-10, + 3.7286917997647606e-10, + 3.728354847076787e-10, + 3.7310107781074464e-10, + 3.731098485726392e-10, + 3.7311384537552783e-10, + 3.7324987545162003e-10, + 3.732594233696318e-10 + ] + }, + "drift": { + "embed.weight": 356.12528180806567, + "embed.bias": 271.7696079359235, + "blocks.0.ln.weight": 12.160896301269531, + "blocks.0.w1.weight": 213.2156736743355, + "blocks.0.w1.bias": 188.46956990883064, + "blocks.0.w2.weight": 511.5389504411083, + "blocks.1.ln.weight": 11.089088439941406, + "blocks.1.w1.weight": 313.55891993641393, + "blocks.1.w1.bias": 313.7326373058554, + "blocks.1.w2.weight": 494.64000104284804, + "blocks.2.ln.weight": 7.991178512573242, + "blocks.2.w1.weight": 192.53270642077123, + "blocks.2.w1.bias": 179.27909905821068, + "blocks.2.w2.weight": 281.61858273592986, + "blocks.3.ln.weight": 11.628951072692871, + "blocks.3.w1.weight": 320.27019212444, + "blocks.3.w1.bias": 308.4511088686199, + "blocks.3.w2.weight": 439.5098655875687, + "blocks.4.ln.weight": 7.837525844573975, + "blocks.4.w1.weight": 198.99888492277404, + "blocks.4.w1.bias": 188.93588246764426, + "blocks.4.w2.weight": 279.38242336071187, + "blocks.5.ln.weight": 11.894773483276367, + "blocks.5.w1.weight": 335.3636162468368, + "blocks.5.w1.bias": 316.6947924070723, + "blocks.5.w2.weight": 491.62782621790865, + "blocks.6.ln.weight": 13.031082153320312, + "blocks.6.w1.weight": 372.496461957569, + "blocks.6.w1.bias": 361.37412651682865, + "blocks.6.w2.weight": 534.5811734194972, + "blocks.7.ln.weight": 11.872922897338867, + "blocks.7.w1.weight": 332.30066015200464, + "blocks.7.w1.bias": 318.8149983218427, + "blocks.7.w2.weight": 467.51464843487275, + "blocks.8.ln.weight": 8.978517532348633, + "blocks.8.w1.weight": 249.40733753718652, + "blocks.8.w1.bias": 227.90248095685143, + "blocks.8.w2.weight": 340.61733231825633, + "blocks.9.ln.weight": 8.258359909057617, + "blocks.9.w1.weight": 227.6803182887842, + "blocks.9.w1.bias": 215.0496950812678, + "blocks.9.w2.weight": 304.7891443514947, + "blocks.10.ln.weight": 11.31588363647461, + "blocks.10.w1.weight": 320.63785926119624, + "blocks.10.w1.bias": 285.28965414218027, + "blocks.10.w2.weight": 449.4781041979696, + "blocks.11.ln.weight": 8.833503723144531, + "blocks.11.w1.weight": 246.89372906458416, + "blocks.11.w1.bias": 234.18032268874182, + "blocks.11.w2.weight": 346.3219491734876, + "out_ln.weight": 0.7750099301338196, + "out_head.weight": 8.051922392430235, + "out_head.bias": 0.5259713382162419 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.090004820022583, + 2.0399895847320555, + 2.016654765472412, + 1.993640735435486, + 1.970404408721924, + 1.9607704742050172, + 1.9497378304672242, + 1.945601725769043, + 1.9354119133758545, + 1.926653540725708, + 1.919797864074707, + 1.9190785704040527, + 1.921803589859009, + 1.9146928586196899, + 1.9141384088134765, + 1.911525807800293, + 1.9086668190765381, + 1.9076120470809936, + 1.9019774869155883, + 1.8993370356750487, + 1.8970520935440063, + 1.8962269541931152, + 1.8934082318496703, + 1.8856604388809204, + 1.8881552615356445, + 1.8857498220062257, + 1.8820669253158568, + 1.882652628211975, + 1.8782193230819701, + 1.8737722314453125, + 1.870555029067993, + 1.870324335861206, + 1.8657306994247436, + 1.866270491065979, + 1.8603372681427002, + 1.8619992153930665, + 1.8532835510253907, + 1.8544231454086304, + 1.8503647320175172, + 1.848538630065918, + 1.8463982132720946, + 1.844799047012329, + 1.843893060836792, + 1.8374801321411134, + 1.8375832195281983, + 1.836339090499878, + 1.8296538625335694, + 1.8266508029174804, + 1.8317974203109741, + 1.8261817306518555, + 1.8269930626678468, + 1.8243385958480836, + 1.8216499743270873, + 1.8209158150482179, + 1.8218781385040284, + 1.8225933773040772, + 1.8176012789916993, + 1.8158467154312135, + 1.814079772338867, + 1.8132556122207641, + 1.8133179102325439, + 1.8107317385101318, + 1.8110995058822632, + 1.809731153831482, + 1.8064914291381835, + 1.8078764828109741, + 1.8058526267242432, + 1.8063254193496705, + 1.808208419113159, + 1.801958323020935, + 1.805595525779724, + 1.8042979398345946, + 1.7999617199325562, + 1.8028947134017945, + 1.800519053878784, + 1.7981389894866944, + 1.7996525979614257, + 1.7991148620224, + 1.7987511922836303, + 1.7935400792694092, + 1.7988948291397096, + 1.7971973416900635, + 1.7980579327392578, + 1.798070336303711, + 1.7963832228088379, + 1.7970877619171142, + 1.795718716392517, + 1.794479237098694, + 1.7980125144195556, + 1.7947333910369874, + 1.798018039779663, + 1.7939606005096436, + 1.7963129815292358, + 1.7946422113418579, + 1.79368143699646, + 1.7950917078399657, + 1.794961125793457, + 1.7920723850250244, + 1.793914785118103, + 1.7935118531036376 + ], + "train_acc": [ + 0.22204, + 0.23944, + 0.25174, + 0.26568, + 0.2792, + 0.28158, + 0.29006, + 0.29422, + 0.29792, + 0.3005, + 0.30396, + 0.3033, + 0.30092, + 0.30788, + 0.30678, + 0.30974, + 0.31076, + 0.3128, + 0.31236, + 0.31404, + 0.31956, + 0.31576, + 0.31692, + 0.32034, + 0.31844, + 0.32288, + 0.32082, + 0.32326, + 0.3258, + 0.32692, + 0.32782, + 0.32868, + 0.32982, + 0.32992, + 0.33264, + 0.33086, + 0.33356, + 0.33424, + 0.33596, + 0.33532, + 0.33676, + 0.33762, + 0.33838, + 0.34134, + 0.34054, + 0.34094, + 0.34518, + 0.34504, + 0.34382, + 0.34594, + 0.34606, + 0.3472, + 0.34726, + 0.3491, + 0.34792, + 0.34796, + 0.35154, + 0.34984, + 0.34972, + 0.34986, + 0.35286, + 0.35258, + 0.3507, + 0.35242, + 0.35492, + 0.35352, + 0.35304, + 0.35276, + 0.35448, + 0.35806, + 0.3536, + 0.35518, + 0.35724, + 0.3548, + 0.35754, + 0.3575, + 0.3584, + 0.35666, + 0.358, + 0.35686, + 0.35654, + 0.35748, + 0.3573, + 0.3578, + 0.3589, + 0.35928, + 0.35822, + 0.35924, + 0.3585, + 0.3598, + 0.35814, + 0.36028, + 0.3576, + 0.3603, + 0.35812, + 0.36048, + 0.3598, + 0.35942, + 0.361, + 0.35982 + ], + "test_acc": [ + 0.2317, + 0.2482, + 0.2524, + 0.2817, + 0.2906, + 0.3053, + 0.3168, + 0.3229, + 0.3333, + 0.3223, + 0.3265, + 0.3361, + 0.3348, + 0.3282, + 0.3316, + 0.3402, + 0.3496, + 0.3424, + 0.3301, + 0.3303, + 0.3445, + 0.3441, + 0.3309, + 0.3506, + 0.3379, + 0.3507, + 0.351, + 0.3592, + 0.3551, + 0.3504, + 0.3446, + 0.3557, + 0.3632, + 0.3475, + 0.3565, + 0.3584, + 0.3592, + 0.3606, + 0.3493, + 0.3691, + 0.3623, + 0.3675, + 0.371, + 0.3667, + 0.3641, + 0.3695, + 0.3741, + 0.3705, + 0.3696, + 0.3682, + 0.3803, + 0.3754, + 0.3701, + 0.3767, + 0.3715, + 0.3816, + 0.3792, + 0.3848, + 0.3825, + 0.3768, + 0.3823, + 0.3775, + 0.3814, + 0.3805, + 0.3795, + 0.3823, + 0.3807, + 0.3802, + 0.3818, + 0.3826, + 0.3834, + 0.3871, + 0.3823, + 0.379, + 0.3835, + 0.3823, + 0.3841, + 0.3867, + 0.3848, + 0.3844, + 0.3849, + 0.3854, + 0.385, + 0.3827, + 0.3843, + 0.3859, + 0.3865, + 0.385, + 0.3868, + 0.3874, + 0.3882, + 0.3865, + 0.3878, + 0.3893, + 0.3869, + 0.3872, + 0.3872, + 0.387, + 0.3868, + 0.3867 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.005462596658617258, + 0.04850056767463684, + 0.013480322435498238, + 0.017950695008039474, + -0.06765332818031311, + -0.0009795809164643288, + -0.01368715800344944, + 0.008839694783091545, + -0.02521103248000145, + 0.02215195819735527, + 0.024440081790089607, + 0.9901714324951172 + ], + "perturbation_rho": [ + -0.018134091049432755, + -0.015751726925373077, + 0.00351056270301342, + -0.00837995670735836, + -0.011277807876467705, + 0.028536146506667137, + -0.04167690500617027, + -0.03265586867928505, + 0.04896393418312073, + 0.020356643944978714, + 0.027929214760661125, + -0.0027437973767518997 + ], + "nudging": { + "0.001": [ + 1.5739351511001587e-07, + -1.4295801520347595e-07, + 2.0023435354232788e-08, + -3.4924596548080444e-08, + 8.055940270423889e-08, + -1.6298145055770874e-08, + -3.585591912269592e-08, + -1.5832483768463135e-08, + -3.725290298461914e-08, + -3.073364496231079e-08, + -8.521601557731628e-08, + -9.82079654932022e-07 + ], + "0.003": [ + 4.880130290985107e-07, + -3.8463622331619263e-07, + -6.05359673500061e-08, + -1.1082738637924194e-07, + 2.5797635316848755e-07, + -7.776543498039246e-08, + 5.8673322200775146e-08, + -3.67872416973114e-08, + 3.771856427192688e-08, + -1.1362135410308838e-07, + -1.0477378964424133e-07, + -3.4580007195472717e-06 + ], + "0.01": [ + 1.5916302800178528e-06, + -1.2102536857128143e-06, + -2.3888424038887024e-07, + -3.0547380447387695e-07, + 9.206123650074005e-07, + -1.8766149878501892e-07, + 1.1455267667770386e-07, + -8.242204785346985e-08, + 2.086162567138672e-07, + -3.003515303134918e-07, + -3.511086106300354e-07, + -1.198984682559967e-05 + ] + }, + "hidden_norms_per_layer": [ + 8903.6103515625, + 53416.37109375, + 323086.75, + 543887.1875, + 720660.5625, + 852344.25, + 912922.875, + 915610.5, + 918604.6875, + 933013.8125, + 945300.25, + 944588.0625, + 786758.5625 + ], + "bp_grad_norms_per_layer": [ + 1.4760601516172756e-05, + 1.4644468819824397e-06, + 7.143049174374028e-07, + 6.862116492811765e-07, + 6.922383022356371e-07, + 6.88848786012386e-07, + 6.914569894433953e-07, + 6.923283422111126e-07, + 6.937441980880976e-07, + 6.82936615703511e-07, + 6.813309028075309e-07, + 6.779434329473588e-07, + 6.447000373555056e-07 + ] + }, + "drift": { + "embed.weight": 84.35971307415942, + "embed.bias": 32.19502954792936, + "blocks.0.ln.weight": 1.6290949583053589, + "blocks.0.w1.weight": 18.83059829156805, + "blocks.0.w1.bias": 11.893972638627677, + "blocks.0.w2.weight": 85.79388547124405, + "blocks.1.ln.weight": 1.249456763267517, + "blocks.1.w1.weight": 22.541099264872408, + "blocks.1.w1.bias": 19.900474950549036, + "blocks.1.w2.weight": 60.379082703434506, + "blocks.2.ln.weight": 0.8102703094482422, + "blocks.2.w1.weight": 21.89597288552171, + "blocks.2.w1.bias": 23.382511495918294, + "blocks.2.w2.weight": 50.297376870531075, + "blocks.3.ln.weight": 0.8057949542999268, + "blocks.3.w1.weight": 22.204088550957646, + "blocks.3.w1.bias": 23.964618513708253, + "blocks.3.w2.weight": 54.618428441768906, + "blocks.4.ln.weight": 0.739398181438446, + "blocks.4.w1.weight": 21.57539189625426, + "blocks.4.w1.bias": 22.672842875453526, + "blocks.4.w2.weight": 42.39506645504441, + "blocks.5.ln.weight": 0.7785037159919739, + "blocks.5.w1.weight": 21.199615780366805, + "blocks.5.w1.bias": 21.79400267554791, + "blocks.5.w2.weight": 45.035435661106206, + "blocks.6.ln.weight": 0.6276997923851013, + "blocks.6.w1.weight": 14.78204467257611, + "blocks.6.w1.bias": 13.651015110446988, + "blocks.6.w2.weight": 47.906552978017096, + "blocks.7.ln.weight": 0.6718869209289551, + "blocks.7.w1.weight": 15.301077297758928, + "blocks.7.w1.bias": 11.14864058748224, + "blocks.7.w2.weight": 81.22443944293431, + "blocks.8.ln.weight": 0.6778666377067566, + "blocks.8.w1.weight": 15.225997184691419, + "blocks.8.w1.bias": 12.413017961913502, + "blocks.8.w2.weight": 70.26476379892472, + "blocks.9.ln.weight": 0.689741849899292, + "blocks.9.w1.weight": 14.934806218542306, + "blocks.9.w1.bias": 13.186621451322553, + "blocks.9.w2.weight": 66.88878955298375, + "blocks.10.ln.weight": 0.6384391188621521, + "blocks.10.w1.weight": 14.869272775568358, + "blocks.10.w1.bias": 11.801783819091257, + "blocks.10.w2.weight": 74.67053466138057, + "blocks.11.ln.weight": 0.7496767044067383, + "blocks.11.w1.weight": 17.612627240302658, + "blocks.11.w1.bias": 15.909240129776858, + "blocks.11.w2.weight": 90.63484324360367, + "out_ln.weight": 0.389094740152359, + "out_head.weight": 5.407193127809483, + "out_head.bias": 0.7008348223133053 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 256, + "num_blocks": 12, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 0 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d256_L12_seed0", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file diff --git a/results/fa_dfa_d256_L12_seed1/results_cifar10.json b/results/fa_dfa_d256_L12_seed1/results_cifar10.json new file mode 100644 index 0000000..a5b7453 --- /dev/null +++ b/results/fa_dfa_d256_L12_seed1/results_cifar10.json @@ -0,0 +1,969 @@ +{ + "1": { + "dfa": { + "log": { + "train_loss": [ + 2.067703508453369, + 2.0509845906829836, + 2.0489810423278807, + 2.0436042738342284, + 2.0423764279174805, + 2.0355014089202883, + 2.033642913208008, + 2.031632952003479, + 2.0292669165039063, + 2.0284323851013184, + 2.0254091524505617, + 2.023782984008789, + 2.024598086013794, + 2.02109279624939, + 2.0222655739593507, + 2.017897210388184, + 2.017190932922363, + 2.0189253887939453, + 2.021160142478943, + 2.017966260147095, + 2.019014611053467, + 2.0170903842163086, + 2.0153890646743773, + 2.013819185180664, + 2.014828204040527, + 2.0162787924957275, + 2.013823607635498, + 2.0116753441619872, + 2.0123462033843995, + 2.015174687728882, + 2.012867821960449, + 2.012428879699707, + 2.0133413526153565, + 2.013093120727539, + 2.011299782371521, + 2.0090217667388917, + 2.0103736295318604, + 2.010286435699463, + 2.00893989654541, + 2.0116446880722045, + 2.0098753689193725, + 2.010866590499878, + 2.00980348903656, + 2.0109012869644167, + 2.00831172958374, + 2.00788239944458, + 2.0091643276596067, + 2.008021312484741, + 2.0090034342193603, + 2.007280367126465, + 2.0106113063812256, + 2.0081184278106687, + 2.0077548957824707, + 2.0058415910339353, + 2.007537776412964, + 2.005961977920532, + 2.006186770477295, + 2.0064545154190063, + 2.006464062461853, + 2.005965493736267, + 2.004203755187988, + 2.00689825302124, + 2.007209775085449, + 2.0072729249572756, + 2.002925030517578, + 2.005165770111084, + 2.0051349970245362, + 2.0035568313598633, + 2.004871815338135, + 2.003002490501404, + 2.0050700107574464, + 2.003749114379883, + 2.0044914908599853, + 2.006470828590393, + 2.0049134774017334, + 2.003252558746338, + 2.003931010055542, + 2.002243830718994, + 2.003598410110474, + 2.0027744925689697, + 2.0048628931427004, + 2.0027463244628905, + 2.005390119895935, + 2.0036273486328127, + 2.001874176979065, + 2.0030779739379883, + 2.002777744674683, + 2.002490601806641, + 2.0044283893585204, + 2.0020725644683837, + 2.0030053114318846, + 2.002065945777893, + 2.0020910046386717, + 2.0032121129989626, + 2.0020565942382813, + 2.001598520851135, + 2.0025369876098633, + 2.0027815823745727, + 2.002781241912842, + 2.003610602493286 + ], + "train_acc": [ + 0.23808, + 0.24056, + 0.2394, + 0.24146, + 0.24426, + 0.25002, + 0.2469, + 0.24916, + 0.25386, + 0.25294, + 0.25656, + 0.2569, + 0.25622, + 0.26112, + 0.25894, + 0.26168, + 0.26178, + 0.25928, + 0.25872, + 0.26202, + 0.26178, + 0.26178, + 0.26246, + 0.26322, + 0.26474, + 0.25964, + 0.2657, + 0.264, + 0.26538, + 0.26332, + 0.26432, + 0.2647, + 0.26426, + 0.26644, + 0.26712, + 0.2672, + 0.2667, + 0.2673, + 0.26496, + 0.2698, + 0.2697, + 0.26524, + 0.26698, + 0.26778, + 0.26776, + 0.26726, + 0.26798, + 0.26834, + 0.2708, + 0.27128, + 0.2668, + 0.26872, + 0.26908, + 0.26946, + 0.26962, + 0.26966, + 0.26966, + 0.27008, + 0.27042, + 0.2712, + 0.2724, + 0.26974, + 0.27206, + 0.2707, + 0.27216, + 0.27204, + 0.2715, + 0.27046, + 0.2716, + 0.27214, + 0.27192, + 0.27188, + 0.27328, + 0.27076, + 0.2717, + 0.27228, + 0.27288, + 0.27206, + 0.27244, + 0.27522, + 0.27138, + 0.27282, + 0.27212, + 0.27274, + 0.27264, + 0.27226, + 0.27054, + 0.27412, + 0.27054, + 0.27344, + 0.27234, + 0.27526, + 0.27328, + 0.27266, + 0.27276, + 0.27616, + 0.274, + 0.27256, + 0.27362, + 0.27134 + ], + "test_acc": [ + 0.2661, + 0.2453, + 0.247, + 0.2496, + 0.2729, + 0.2614, + 0.2432, + 0.2629, + 0.2751, + 0.2657, + 0.2483, + 0.2648, + 0.2763, + 0.2843, + 0.2744, + 0.2859, + 0.2707, + 0.271, + 0.2724, + 0.2858, + 0.2834, + 0.2832, + 0.282, + 0.2795, + 0.2799, + 0.2735, + 0.2569, + 0.2847, + 0.2724, + 0.2892, + 0.2797, + 0.2809, + 0.2752, + 0.2811, + 0.2655, + 0.281, + 0.294, + 0.2816, + 0.2759, + 0.2752, + 0.2755, + 0.285, + 0.2917, + 0.2948, + 0.2796, + 0.2865, + 0.2715, + 0.2946, + 0.2846, + 0.2885, + 0.2846, + 0.286, + 0.2885, + 0.2917, + 0.2879, + 0.2853, + 0.2897, + 0.2857, + 0.2834, + 0.2904, + 0.2719, + 0.2858, + 0.2896, + 0.2764, + 0.2892, + 0.2881, + 0.2824, + 0.2841, + 0.2824, + 0.29, + 0.2826, + 0.2896, + 0.2886, + 0.2921, + 0.2871, + 0.2847, + 0.2849, + 0.2907, + 0.2886, + 0.2851, + 0.2855, + 0.2885, + 0.2933, + 0.2877, + 0.2871, + 0.2865, + 0.2891, + 0.2887, + 0.2887, + 0.2858, + 0.2862, + 0.2881, + 0.2863, + 0.2877, + 0.2874, + 0.2879, + 0.2874, + 0.2872, + 0.2873, + 0.2872 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.3827846348285675, + 0.0010349926305934787, + 0.0028652632609009743, + -0.00039675208972766995, + -3.398433909751475e-05, + 0.0006842626025900245, + -0.0005091045168228447, + -0.0008636444108560681, + 0.0010306478943675756, + -0.0014248000225052238, + -0.0008004868868738413, + -0.001347829820588231 + ], + "perturbation_rho": [ + 0.03251604735851288, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -3.8463622331619263e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.0239891707897186e-06, + 9.313225746154785e-10, + 0.0, + 0.0, + 3.725290298461914e-09, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -3.2410025596618652e-06, + 9.313225746154785e-10, + 0.0, + 1.1175870895385742e-08, + 3.725290298461914e-09, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 44938.71484375, + 224396224.0, + 534405824.0, + 628434240.0, + 817915136.0, + 1014971776.0, + 1195768320.0, + 1696130048.0, + 1781777664.0, + 2129590784.0, + 2360990464.0, + 2604362240.0, + 2611689216.0 + ], + "bp_grad_norms_per_layer": [ + 3.2724244647397427e-07, + 4.958744526106784e-10, + 4.933069508439303e-10, + 4.927478425287291e-10, + 4.92776874860823e-10, + 4.929293639932553e-10, + 4.929709418455275e-10, + 4.923624841168817e-10, + 4.925106433795179e-10, + 4.924368135483803e-10, + 4.92419938158406e-10, + 4.924182728238691e-10, + 4.923914054266731e-10 + ] + }, + "drift": { + "embed.weight": 366.7568618351936, + "embed.bias": 309.0473623596911, + "blocks.0.ln.weight": 11.064080238342285, + "blocks.0.w1.weight": 209.81708023371544, + "blocks.0.w1.bias": 212.90654132913863, + "blocks.0.w2.weight": 491.9328056450382, + "blocks.1.ln.weight": 9.262639999389648, + "blocks.1.w1.weight": 235.0790709887316, + "blocks.1.w1.bias": 223.63959899284364, + "blocks.1.w2.weight": 344.39905753660923, + "blocks.2.ln.weight": 8.437020301818848, + "blocks.2.w1.weight": 201.71434530769594, + "blocks.2.w1.bias": 185.02882567315115, + "blocks.2.w2.weight": 318.26352419378276, + "blocks.3.ln.weight": 8.921281814575195, + "blocks.3.w1.weight": 253.83854854822596, + "blocks.3.w1.bias": 229.6894019696064, + "blocks.3.w2.weight": 350.3011606006072, + "blocks.4.ln.weight": 9.37590503692627, + "blocks.4.w1.weight": 261.7678873002099, + "blocks.4.w1.bias": 243.5300414079033, + "blocks.4.w2.weight": 342.796891596585, + "blocks.5.ln.weight": 9.026515007019043, + "blocks.5.w1.weight": 246.11946079088935, + "blocks.5.w1.bias": 237.3468691840109, + "blocks.5.w2.weight": 337.576876810432, + "blocks.6.ln.weight": 11.87130355834961, + "blocks.6.w1.weight": 323.004633882484, + "blocks.6.w1.bias": 302.8929133787546, + "blocks.6.w2.weight": 446.231359639456, + "blocks.7.ln.weight": 8.507142066955566, + "blocks.7.w1.weight": 235.44405159319476, + "blocks.7.w1.bias": 230.35367387868078, + "blocks.7.w2.weight": 329.8095182055512, + "blocks.8.ln.weight": 10.987072944641113, + "blocks.8.w1.weight": 320.7072155931754, + "blocks.8.w1.bias": 294.7395252801994, + "blocks.8.w2.weight": 454.98281282285984, + "blocks.9.ln.weight": 10.434910774230957, + "blocks.9.w1.weight": 294.34456133171307, + "blocks.9.w1.bias": 277.98968710676087, + "blocks.9.w2.weight": 386.31162290067874, + "blocks.10.ln.weight": 10.570181846618652, + "blocks.10.w1.weight": 301.76214096118804, + "blocks.10.w1.bias": 288.8696738168496, + "blocks.10.w2.weight": 412.44020515723406, + "blocks.11.ln.weight": 6.689911842346191, + "blocks.11.w1.weight": 178.97751193731568, + "blocks.11.w1.bias": 170.64222401222574, + "blocks.11.w2.weight": 255.10714043782204, + "out_ln.weight": 0.7581733465194702, + "out_head.weight": 7.801807644921005, + "out_head.bias": 0.42687402024736276 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.0875839049530027, + 2.0147405715942384, + 1.9875884157562256, + 1.9586031070709229, + 1.9435594665908813, + 1.9274922204971314, + 1.9189438362121582, + 1.909316096420288, + 1.901387437400818, + 1.8944761938858032, + 1.8878041967773438, + 1.8841591590118407, + 1.8791934803009034, + 1.8707326766204835, + 1.8738373201751708, + 1.8677770760726928, + 1.8662226232910155, + 1.8621477996444702, + 1.8617159877777099, + 1.8547751527404785, + 1.8523059939193725, + 1.8457024936676025, + 1.8389914599609376, + 1.8356397885131837, + 1.8316144234466554, + 1.8286296670150757, + 1.8218667905044557, + 1.8144276452255248, + 1.807547642478943, + 1.8062635816192627, + 1.8020675115203857, + 1.7944365270233154, + 1.7917551394271851, + 1.792857387008667, + 1.786349270362854, + 1.7846514977645873, + 1.782985519180298, + 1.7807849361801147, + 1.776029171066284, + 1.7791108153533937, + 1.7775139881134032, + 1.774389083557129, + 1.7711814474487304, + 1.7737521209716798, + 1.7688932614135742, + 1.7682257052612305, + 1.7661459450531005, + 1.7686957946014403, + 1.764788871536255, + 1.7630911065292358, + 1.7634479265975953, + 1.7625660708999633, + 1.7607597863388063, + 1.7593693128204346, + 1.7575467151641846, + 1.7593074349975586, + 1.7534115236663819, + 1.7566318701934815, + 1.7595509714508057, + 1.7525636702728271, + 1.7542430780029297, + 1.7547497830581664, + 1.75442517868042, + 1.753666476097107, + 1.750391424331665, + 1.7509596431732177, + 1.7515560512542725, + 1.7488042358016969, + 1.750752509765625, + 1.7479970736312866, + 1.7519938071060182, + 1.747468376235962, + 1.7489017395782471, + 1.7493435995864868, + 1.7475122854232787, + 1.7483077466583252, + 1.7469019915390014, + 1.7457904458999634, + 1.7442825564956665, + 1.7459553696060182, + 1.7484514234161377, + 1.745931058731079, + 1.7454334391021729, + 1.743918479309082, + 1.745262015953064, + 1.744756409225464, + 1.7426206311035157, + 1.7429217789077758, + 1.7468717276382446, + 1.7410543600082398, + 1.7426005300140381, + 1.7455276907730102, + 1.7452946990966798, + 1.743043016319275, + 1.7401957970428468, + 1.7423173105621337, + 1.7431762436294556, + 1.7445746730804443, + 1.7446096383285523, + 1.7452752307510375 + ], + "train_acc": [ + 0.22256, + 0.25346, + 0.27028, + 0.2853, + 0.2961, + 0.30292, + 0.30572, + 0.30968, + 0.31464, + 0.31728, + 0.31962, + 0.32042, + 0.32064, + 0.32604, + 0.32566, + 0.32516, + 0.32768, + 0.32772, + 0.329, + 0.3322, + 0.33392, + 0.33814, + 0.34014, + 0.34284, + 0.3429, + 0.34292, + 0.34672, + 0.35008, + 0.35292, + 0.35346, + 0.35432, + 0.36024, + 0.35984, + 0.35798, + 0.35996, + 0.36256, + 0.36294, + 0.36324, + 0.3641, + 0.36242, + 0.36302, + 0.36536, + 0.36798, + 0.36558, + 0.36696, + 0.36854, + 0.36734, + 0.36774, + 0.36798, + 0.3704, + 0.36866, + 0.37204, + 0.3707, + 0.3689, + 0.3732, + 0.36918, + 0.373, + 0.37182, + 0.37306, + 0.3734, + 0.37252, + 0.37262, + 0.37084, + 0.37442, + 0.37388, + 0.37424, + 0.37434, + 0.37284, + 0.3746, + 0.3753, + 0.37408, + 0.37856, + 0.37516, + 0.37618, + 0.37696, + 0.377, + 0.3785, + 0.37692, + 0.37544, + 0.3788, + 0.37708, + 0.37568, + 0.37792, + 0.37742, + 0.37744, + 0.37678, + 0.37664, + 0.38078, + 0.3783, + 0.37778, + 0.38034, + 0.37784, + 0.37736, + 0.38046, + 0.37762, + 0.38042, + 0.37982, + 0.37818, + 0.37718, + 0.37814 + ], + "test_acc": [ + 0.2532, + 0.2635, + 0.3067, + 0.3126, + 0.3209, + 0.3326, + 0.3161, + 0.3304, + 0.3471, + 0.3408, + 0.3412, + 0.3556, + 0.3545, + 0.3571, + 0.3562, + 0.3655, + 0.3543, + 0.3596, + 0.3661, + 0.3604, + 0.3646, + 0.3656, + 0.3708, + 0.3724, + 0.3732, + 0.3702, + 0.3718, + 0.3739, + 0.3815, + 0.3877, + 0.3723, + 0.3882, + 0.3776, + 0.3921, + 0.3859, + 0.3901, + 0.3839, + 0.39, + 0.3898, + 0.3889, + 0.392, + 0.39, + 0.395, + 0.3926, + 0.3921, + 0.3946, + 0.3925, + 0.3904, + 0.4002, + 0.3963, + 0.3983, + 0.3951, + 0.4027, + 0.3978, + 0.4034, + 0.3987, + 0.3974, + 0.4003, + 0.3996, + 0.4018, + 0.3972, + 0.4031, + 0.4023, + 0.3997, + 0.4, + 0.4038, + 0.4005, + 0.3993, + 0.4026, + 0.4018, + 0.4028, + 0.4035, + 0.4069, + 0.4041, + 0.4043, + 0.4035, + 0.4046, + 0.4039, + 0.406, + 0.4059, + 0.4049, + 0.4046, + 0.4042, + 0.4055, + 0.4033, + 0.4054, + 0.405, + 0.4049, + 0.4048, + 0.4056, + 0.408, + 0.4068, + 0.4054, + 0.4042, + 0.404, + 0.4064, + 0.4056, + 0.4067, + 0.4065, + 0.4065 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.022406980395317078, + 0.047667622566223145, + 0.09574861824512482, + 0.035870544612407684, + -0.041190385818481445, + -0.0699436366558075, + -0.02966473624110222, + -0.039145052433013916, + -0.004860554356127977, + 0.005380071699619293, + -0.027314443141222, + 0.9947109222412109 + ], + "perturbation_rho": [ + -0.022866476327180862, + -0.018013648688793182, + 0.009727759286761284, + -0.053621724247932434, + -0.02228248305618763, + 0.0316929928958416, + 0.0015144720673561096, + 0.00410531647503376, + 0.03611733764410019, + 0.00018612481653690338, + -0.02358195371925831, + 0.011200916953384876 + ], + "nudging": { + "0.001": [ + -1.3886019587516785e-06, + -1.648440957069397e-07, + -2.230517566204071e-07, + -8.614733815193176e-08, + -2.2817403078079224e-08, + 5.2386894822120667e-08, + 1.257285475730896e-08, + -1.5366822481155396e-08, + -1.280568540096283e-08, + -1.83936208486557e-08, + 1.909211277961731e-08, + -7.841736078262329e-07 + ], + "0.003": [ + -3.930879756808281e-06, + -3.627501428127289e-07, + -4.2142346501350403e-07, + -1.5157274901866913e-07, + 3.9814040064811707e-08, + 1.9534491002559662e-07, + 3.003515303134918e-08, + 4.889443516731262e-08, + -2.1420419216156006e-08, + -6.239861249923706e-08, + 9.66247171163559e-08, + -2.8829090297222137e-06 + ], + "0.01": [ + -1.2913951650261879e-05, + -1.1620577424764633e-06, + -1.3329554349184036e-06, + -4.7474168241024017e-07, + 3.6065466701984406e-07, + 6.561167538166046e-07, + 1.7811544239521027e-07, + 4.209578037261963e-07, + 6.076879799365997e-08, + -1.2945383787155151e-07, + 2.454034984111786e-07, + -1.0041752830147743e-05 + ] + }, + "hidden_norms_per_layer": [ + 5485.41796875, + 46996.3671875, + 180271.890625, + 213572.234375, + 396517.875, + 588744.375, + 881525.8125, + 983474.1875, + 1058439.5, + 1218365.25, + 1317032.625, + 1334561.375, + 932176.0 + ], + "bp_grad_norms_per_layer": [ + 2.137463707185816e-05, + 1.5701441498094937e-06, + 7.100429115780571e-07, + 5.854868732058094e-07, + 5.86559053772362e-07, + 5.756642735832429e-07, + 5.825444304718985e-07, + 5.82348206989991e-07, + 5.825494895361771e-07, + 5.821411264150811e-07, + 5.809959020552924e-07, + 5.812668746330019e-07, + 5.751632556894037e-07 + ] + }, + "drift": { + "embed.weight": 58.38236439806917, + "embed.bias": 21.442522118983266, + "blocks.0.ln.weight": 1.5033414363861084, + "blocks.0.w1.weight": 16.882547993290178, + "blocks.0.w1.bias": 13.788910852251966, + "blocks.0.w2.weight": 79.37840244647813, + "blocks.1.ln.weight": 1.2546273469924927, + "blocks.1.w1.weight": 20.18123204529634, + "blocks.1.w1.bias": 12.541139360298107, + "blocks.1.w2.weight": 57.12310972297133, + "blocks.2.ln.weight": 1.1011525392532349, + "blocks.2.w1.weight": 18.73377758773642, + "blocks.2.w1.bias": 9.29578164795983, + "blocks.2.w2.weight": 54.28694557656785, + "blocks.3.ln.weight": 0.9108357429504395, + "blocks.3.w1.weight": 19.53507030946998, + "blocks.3.w1.bias": 19.00316399101826, + "blocks.3.w2.weight": 36.56504557755405, + "blocks.4.ln.weight": 0.9345387816429138, + "blocks.4.w1.weight": 21.803911980238134, + "blocks.4.w1.bias": 22.478543755429566, + "blocks.4.w2.weight": 34.17486626170071, + "blocks.5.ln.weight": 0.8433954119682312, + "blocks.5.w1.weight": 24.829617601702147, + "blocks.5.w1.bias": 27.438215191570773, + "blocks.5.w2.weight": 33.01528044399735, + "blocks.6.ln.weight": 0.7245873212814331, + "blocks.6.w1.weight": 20.82361019510065, + "blocks.6.w1.bias": 23.280464463082836, + "blocks.6.w2.weight": 34.072389985742944, + "blocks.7.ln.weight": 0.7635722756385803, + "blocks.7.w1.weight": 21.474198757222815, + "blocks.7.w1.bias": 23.449600463397505, + "blocks.7.w2.weight": 36.144241146743035, + "blocks.8.ln.weight": 0.9644007682800293, + "blocks.8.w1.weight": 22.392143221629144, + "blocks.8.w1.bias": 22.63072974134052, + "blocks.8.w2.weight": 52.64014420220465, + "blocks.9.ln.weight": 0.9803445339202881, + "blocks.9.w1.weight": 23.99208040262014, + "blocks.9.w1.bias": 25.016494427878733, + "blocks.9.w2.weight": 50.534240681340044, + "blocks.10.ln.weight": 0.8526968955993652, + "blocks.10.w1.weight": 19.026884395163023, + "blocks.10.w1.bias": 18.57529627609461, + "blocks.10.w2.weight": 64.66072262637215, + "blocks.11.ln.weight": 0.9996767044067383, + "blocks.11.w1.weight": 20.832721864932843, + "blocks.11.w1.bias": 20.006242016386533, + "blocks.11.w2.weight": 78.70619993314986, + "out_ln.weight": 0.4153711795806885, + "out_head.weight": 5.83941550812572, + "out_head.bias": 1.058698128024849 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 256, + "num_blocks": 12, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 1 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d256_L12_seed1", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file diff --git a/results/fa_dfa_d256_L12_seed2/results_cifar10.json b/results/fa_dfa_d256_L12_seed2/results_cifar10.json new file mode 100644 index 0000000..70776c8 --- /dev/null +++ b/results/fa_dfa_d256_L12_seed2/results_cifar10.json @@ -0,0 +1,969 @@ +{ + "2": { + "dfa": { + "log": { + "train_loss": [ + 2.092766728363037, + 2.071445608520508, + 2.0701047048187258, + 2.0665665516662597, + 2.067467219543457, + 2.066609907684326, + 2.067391862182617, + 2.0658849488830566, + 2.065659608001709, + 2.065375587425232, + 2.0646166355133055, + 2.0635058017349244, + 2.0630363352966308, + 2.0627129718780517, + 2.0648969506835937, + 2.063057222671509, + 2.062092007293701, + 2.062603257293701, + 2.0662800875091554, + 2.0649482633972167, + 2.0633998834609986, + 2.0644692920684813, + 2.0658038328552246, + 2.064523613204956, + 2.0647737124633787, + 2.063466505584717, + 2.0641796745300294, + 2.0648309812164305, + 2.06493783164978, + 2.0642703956604005, + 2.065596410675049, + 2.067113346405029, + 2.0670792920684815, + 2.0674756241607666, + 2.066420622558594, + 2.066541123046875, + 2.0665447259521486, + 2.0689855900573733, + 2.067198378753662, + 2.0686578797149657, + 2.06837053024292, + 2.0679433206939697, + 2.0698270501708986, + 2.0687507022094724, + 2.070133085708618, + 2.068685834007263, + 2.0692544694519044, + 2.069098585357666, + 2.070015923538208, + 2.0695283253479, + 2.0700033166503906, + 2.0695154525756836, + 2.0704460035705567, + 2.0702316856384275, + 2.0706022947692873, + 2.0712034313201904, + 2.070552589263916, + 2.071217707977295, + 2.0711937672424314, + 2.072246285095215, + 2.068459527130127, + 2.0709887282562254, + 2.0723592150115966, + 2.072640259552002, + 2.0726871685791015, + 2.069176601486206, + 2.071328667678833, + 2.0708692224121092, + 2.0729969272613524, + 2.0725777490997315, + 2.0719549550628664, + 2.0703202209472655, + 2.0716604712677, + 2.0724998860168458, + 2.072380848312378, + 2.0722954035949708, + 2.070641780166626, + 2.071795623474121, + 2.0716914239120485, + 2.073136379394531, + 2.0709913427734374, + 2.0712071760559083, + 2.071455654296875, + 2.0722044621276856, + 2.071295954055786, + 2.0727719202041626, + 2.0717286488342284, + 2.0731436529541014, + 2.0714281242370607, + 2.07157251701355, + 2.0711801865386965, + 2.071615061340332, + 2.0700036166381834, + 2.0721589122009276, + 2.071688577957153, + 2.0693919525909426, + 2.0720266328430177, + 2.0719666347503662, + 2.0707905860900877, + 2.072465877342224 + ], + "train_acc": [ + 0.23206, + 0.2349, + 0.23432, + 0.2351, + 0.2368, + 0.23936, + 0.23746, + 0.24068, + 0.24038, + 0.24096, + 0.2413, + 0.24336, + 0.2423, + 0.24238, + 0.24486, + 0.24402, + 0.24642, + 0.24514, + 0.24274, + 0.24326, + 0.24602, + 0.24686, + 0.24686, + 0.24712, + 0.24644, + 0.24618, + 0.24624, + 0.24394, + 0.2468, + 0.24656, + 0.24538, + 0.24386, + 0.24564, + 0.24706, + 0.24646, + 0.24484, + 0.24802, + 0.24608, + 0.24632, + 0.2467, + 0.24684, + 0.2485, + 0.24542, + 0.24522, + 0.2466, + 0.24708, + 0.24736, + 0.24888, + 0.2499, + 0.24734, + 0.24814, + 0.24822, + 0.24724, + 0.24642, + 0.24566, + 0.2475, + 0.24706, + 0.2471, + 0.24564, + 0.2467, + 0.24904, + 0.2474, + 0.24816, + 0.24836, + 0.24672, + 0.24812, + 0.24726, + 0.2477, + 0.24868, + 0.24794, + 0.2471, + 0.24962, + 0.24896, + 0.24758, + 0.2479, + 0.24738, + 0.24834, + 0.24676, + 0.2483, + 0.24708, + 0.24694, + 0.24966, + 0.25074, + 0.2455, + 0.24898, + 0.2485, + 0.2489, + 0.24698, + 0.24926, + 0.248, + 0.2504, + 0.25158, + 0.25008, + 0.24946, + 0.24912, + 0.2502, + 0.24898, + 0.25056, + 0.24956, + 0.24938 + ], + "test_acc": [ + 0.2527, + 0.2607, + 0.246, + 0.2601, + 0.2571, + 0.266, + 0.2599, + 0.2551, + 0.2513, + 0.2552, + 0.2562, + 0.2613, + 0.2675, + 0.2656, + 0.264, + 0.2367, + 0.2691, + 0.2628, + 0.2582, + 0.2619, + 0.2729, + 0.2684, + 0.2705, + 0.2654, + 0.2614, + 0.2692, + 0.2514, + 0.2663, + 0.2645, + 0.2706, + 0.2657, + 0.2484, + 0.264, + 0.2659, + 0.2636, + 0.2718, + 0.2601, + 0.2585, + 0.2664, + 0.2562, + 0.2649, + 0.2623, + 0.2622, + 0.2599, + 0.26, + 0.2643, + 0.2635, + 0.2689, + 0.2733, + 0.2606, + 0.2726, + 0.2692, + 0.2663, + 0.264, + 0.2636, + 0.2583, + 0.2676, + 0.2779, + 0.2617, + 0.2661, + 0.2716, + 0.2707, + 0.266, + 0.2684, + 0.2702, + 0.2573, + 0.2637, + 0.2675, + 0.2738, + 0.2544, + 0.2636, + 0.2666, + 0.2671, + 0.2687, + 0.2668, + 0.2666, + 0.2612, + 0.2664, + 0.2675, + 0.2643, + 0.2657, + 0.2671, + 0.2624, + 0.2684, + 0.2624, + 0.2684, + 0.2646, + 0.2645, + 0.2655, + 0.263, + 0.2648, + 0.2677, + 0.2644, + 0.2658, + 0.2661, + 0.2656, + 0.2649, + 0.2652, + 0.2652, + 0.2652 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.21441566944122314, + 0.0008995254174806178, + -0.0011490017641335726, + -0.0006673308089375496, + -0.0004349752562120557, + 0.001973408740013838, + 0.00042850073077715933, + 0.0003071604296565056, + 0.0011147534241899848, + -0.0019351006485521793, + -0.00017125890008173883, + 0.0004633249482139945 + ], + "perturbation_rho": [ + 0.03552835434675217, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -8.102506399154663e-08, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + -2.7939677238464355e-09 + ], + "0.003": [ + -3.5390257835388184e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + -2.7939677238464355e-09 + ], + "0.01": [ + -1.2461096048355103e-06, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + -2.7939677238464355e-09 + ] + }, + "hidden_norms_per_layer": [ + 45168.5625, + 768591872.0, + 1624721024.0, + 1961752960.0, + 2100152448.0, + 2500510976.0, + 2508241920.0, + 2941273344.0, + 2989846784.0, + 3150896128.0, + 3321745152.0, + 3535885312.0, + 3636161792.0 + ], + "bp_grad_norms_per_layer": [ + 1.8181714267484494e-07, + 3.9466230283835557e-10, + 3.8982980732349404e-10, + 3.898551481640311e-10, + 3.8986006090091507e-10, + 3.900082479191269e-10, + 3.8990691231255425e-10, + 3.899590372835604e-10, + 3.900538503298634e-10, + 3.901238221359904e-10, + 3.9011002761490943e-10, + 3.9018840936044796e-10, + 3.9032757581658473e-10 + ] + }, + "drift": { + "embed.weight": 408.6565633657492, + "embed.bias": 423.28110485460303, + "blocks.0.ln.weight": 9.70258617401123, + "blocks.0.w1.weight": 286.6816708847807, + "blocks.0.w1.bias": 311.9898470727492, + "blocks.0.w2.weight": 590.5617039721983, + "blocks.1.ln.weight": 11.42900562286377, + "blocks.1.w1.weight": 339.1322667459177, + "blocks.1.w1.bias": 319.191979974844, + "blocks.1.w2.weight": 485.54212783414897, + "blocks.2.ln.weight": 10.751296043395996, + "blocks.2.w1.weight": 306.6921002240311, + "blocks.2.w1.bias": 286.8824089191201, + "blocks.2.w2.weight": 419.397779279592, + "blocks.3.ln.weight": 10.36766242980957, + "blocks.3.w1.weight": 282.4814974926866, + "blocks.3.w1.bias": 252.8190034898104, + "blocks.3.w2.weight": 394.269135440916, + "blocks.4.ln.weight": 11.355030059814453, + "blocks.4.w1.weight": 326.6622613339727, + "blocks.4.w1.bias": 314.3979807413481, + "blocks.4.w2.weight": 461.04714607850275, + "blocks.5.ln.weight": 8.093010902404785, + "blocks.5.w1.weight": 217.27913735619703, + "blocks.5.w1.bias": 207.68042804194928, + "blocks.5.w2.weight": 294.75329385151446, + "blocks.6.ln.weight": 11.64451789855957, + "blocks.6.w1.weight": 331.7248686164095, + "blocks.6.w1.bias": 309.00249567397475, + "blocks.6.w2.weight": 484.57904574635427, + "blocks.7.ln.weight": 8.409276008605957, + "blocks.7.w1.weight": 225.75220538114135, + "blocks.7.w1.bias": 216.53163331133942, + "blocks.7.w2.weight": 324.7052199531864, + "blocks.8.ln.weight": 11.350098609924316, + "blocks.8.w1.weight": 320.93864855988267, + "blocks.8.w1.bias": 306.19484707322147, + "blocks.8.w2.weight": 442.72910719653026, + "blocks.9.ln.weight": 10.944558143615723, + "blocks.9.w1.weight": 307.8993593428455, + "blocks.9.w1.bias": 290.93916631623483, + "blocks.9.w2.weight": 440.2539306532767, + "blocks.10.ln.weight": 11.270280838012695, + "blocks.10.w1.weight": 321.55439380753864, + "blocks.10.w1.bias": 288.9418448710429, + "blocks.10.w2.weight": 411.81523324589523, + "blocks.11.ln.weight": 10.046154022216797, + "blocks.11.w1.weight": 286.57908879426424, + "blocks.11.w1.bias": 275.6362336173934, + "blocks.11.w2.weight": 399.62017806369147, + "out_ln.weight": 0.7484539747238159, + "out_head.weight": 8.125483521933623, + "out_head.bias": 0.7273602975063448 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.074700547027588, + 2.030654427947998, + 2.0057160476684572, + 1.9860789415740967, + 1.9725520542144774, + 1.95931587890625, + 1.9556365052032472, + 1.949812787322998, + 1.9462810146331788, + 1.9393242764282226, + 1.9346750591278077, + 1.9271999840545655, + 1.9232344121932983, + 1.9189868188095094, + 1.919980531692505, + 1.9102826860046387, + 1.90958540763855, + 1.906280760154724, + 1.9068798965835572, + 1.9018908991241454, + 1.8965862115859986, + 1.894734903831482, + 1.893279719581604, + 1.8903550232315063, + 1.8864814365005493, + 1.884921064682007, + 1.8844925833129882, + 1.8856516895294189, + 1.881670213623047, + 1.880837113380432, + 1.8822157500457763, + 1.882532099876404, + 1.882689362220764, + 1.881834898109436, + 1.8771058824920654, + 1.8768038847351074, + 1.876141697921753, + 1.877026067199707, + 1.8729603647613526, + 1.8720484650039673, + 1.8726009467315674, + 1.8684697211456298, + 1.871248878250122, + 1.869133426246643, + 1.8653471280288696, + 1.8631344338226319, + 1.8654646643066406, + 1.8650356465911866, + 1.8616812967300416, + 1.8621146585464476, + 1.8621270999908448, + 1.8586566192626952, + 1.8607052938842774, + 1.8588444815063476, + 1.8568199838256836, + 1.855796114845276, + 1.8530415719604492, + 1.8559222234725952, + 1.856116162338257, + 1.8548921677017212, + 1.8508316849517823, + 1.8514919400787353, + 1.8525736228179932, + 1.8520959496307372, + 1.8482627039337158, + 1.8451711013412475, + 1.8483959712982179, + 1.8483288528442383, + 1.848829554977417, + 1.848747034034729, + 1.8459476345443726, + 1.8471334496307372, + 1.8432773351669312, + 1.8460039371490478, + 1.8457718017196656, + 1.8440695235824585, + 1.8412491543579101, + 1.8426079097747803, + 1.8468621598052979, + 1.8427482269668578, + 1.8419597222137452, + 1.841674754562378, + 1.8427531800079346, + 1.8430379685211182, + 1.8435487987899781, + 1.842195815963745, + 1.842669207496643, + 1.8409197840118408, + 1.8410899520492554, + 1.8386338932037354, + 1.8406484191894532, + 1.8388528701400757, + 1.837176209716797, + 1.8422198328399657, + 1.8413942455673218, + 1.839041402053833, + 1.8415731252288818, + 1.839198327407837, + 1.8407912665557862, + 1.8404628273773194 + ], + "train_acc": [ + 0.23152, + 0.2483, + 0.26406, + 0.27226, + 0.27666, + 0.2867, + 0.28482, + 0.2895, + 0.29026, + 0.29298, + 0.29752, + 0.30064, + 0.30484, + 0.30198, + 0.30454, + 0.30912, + 0.31148, + 0.3106, + 0.3109, + 0.31142, + 0.317, + 0.31652, + 0.31412, + 0.31586, + 0.32138, + 0.31964, + 0.32098, + 0.32112, + 0.32356, + 0.32264, + 0.32256, + 0.32364, + 0.32494, + 0.32508, + 0.32778, + 0.32594, + 0.32872, + 0.32838, + 0.32896, + 0.32934, + 0.32812, + 0.32972, + 0.33028, + 0.33102, + 0.33146, + 0.33284, + 0.33266, + 0.33498, + 0.3342, + 0.33536, + 0.33232, + 0.33478, + 0.33422, + 0.33506, + 0.33846, + 0.33662, + 0.33606, + 0.33612, + 0.33684, + 0.3362, + 0.3385, + 0.33634, + 0.33684, + 0.33968, + 0.33902, + 0.34212, + 0.34068, + 0.33904, + 0.33856, + 0.34094, + 0.34132, + 0.34188, + 0.3424, + 0.34188, + 0.34086, + 0.34386, + 0.34486, + 0.34176, + 0.3404, + 0.34224, + 0.34428, + 0.34282, + 0.34242, + 0.3411, + 0.3426, + 0.34362, + 0.34128, + 0.34484, + 0.34452, + 0.34312, + 0.34328, + 0.34296, + 0.34432, + 0.34434, + 0.34316, + 0.3433, + 0.34516, + 0.346, + 0.34178, + 0.3444 + ], + "test_acc": [ + 0.2629, + 0.2729, + 0.2809, + 0.3082, + 0.3046, + 0.3106, + 0.3059, + 0.3072, + 0.3226, + 0.3051, + 0.3139, + 0.3342, + 0.336, + 0.3323, + 0.3386, + 0.3217, + 0.3373, + 0.3327, + 0.3293, + 0.3421, + 0.346, + 0.3439, + 0.3489, + 0.3488, + 0.3561, + 0.3493, + 0.3318, + 0.3436, + 0.3441, + 0.3531, + 0.3485, + 0.3409, + 0.3518, + 0.3509, + 0.3502, + 0.36, + 0.3513, + 0.3503, + 0.3492, + 0.3425, + 0.3538, + 0.3402, + 0.3518, + 0.3541, + 0.3519, + 0.3499, + 0.3639, + 0.3504, + 0.3575, + 0.3545, + 0.3569, + 0.3559, + 0.3541, + 0.36, + 0.3586, + 0.3625, + 0.3608, + 0.3595, + 0.3629, + 0.3649, + 0.3603, + 0.3653, + 0.3651, + 0.3619, + 0.3644, + 0.3655, + 0.3616, + 0.3621, + 0.3631, + 0.361, + 0.3605, + 0.3647, + 0.3669, + 0.366, + 0.3644, + 0.3677, + 0.3653, + 0.3664, + 0.3679, + 0.3688, + 0.3658, + 0.3666, + 0.3664, + 0.368, + 0.3659, + 0.3647, + 0.3667, + 0.368, + 0.3688, + 0.367, + 0.3683, + 0.3688, + 0.3674, + 0.3685, + 0.3683, + 0.3682, + 0.3679, + 0.368, + 0.3683, + 0.3684 + ] + }, + "diagnostics": { + "bp_cosine": [ + -0.014710275456309319, + 0.02226361073553562, + 0.03605305403470993, + 0.003516306634992361, + 0.012390266172587872, + 0.058464229106903076, + -0.04347992688417435, + 0.0054677389562129974, + 0.015231117606163025, + -0.03650633990764618, + -0.04501262307167053, + 0.9956947565078735 + ], + "perturbation_rho": [ + 0.008943558670580387, + 0.00040163123048841953, + -0.040518321096897125, + -0.022643186151981354, + 0.044356103986501694, + 0.02536097541451454, + 0.022935548797249794, + 0.028215371072292328, + 0.011796066537499428, + 0.016897639259696007, + -0.004378834739327431, + -0.04016704112291336 + ], + "nudging": { + "0.001": [ + 5.587935447692871e-09, + -9.569339454174042e-08, + -4.7963112592697144e-08, + -2.2351741790771484e-08, + -4.6566128730773926e-09, + -6.28642737865448e-08, + 7.450580596923828e-09, + -3.771856427192688e-08, + -4.516914486885071e-08, + 5.587935447692871e-09, + 1.6298145055770874e-08, + -6.00004568696022e-07 + ], + "0.003": [ + 3.2922253012657166e-07, + -1.3760291039943695e-07, + -1.1292286217212677e-07, + -9.778887033462524e-09, + -7.450580596923828e-09, + -1.5995465219020844e-07, + 8.055940270423889e-08, + -1.862645149230957e-08, + -9.313225746154785e-08, + 9.778887033462524e-09, + 0.0, + -1.941109076142311e-06 + ], + "0.01": [ + 1.0672956705093384e-06, + -3.3783726394176483e-07, + -4.216562956571579e-07, + -1.5529803931713104e-07, + -9.639188647270203e-08, + -5.022156983613968e-07, + 2.952292561531067e-07, + -8.288770914077759e-08, + -1.0244548320770264e-07, + 2.5960616767406464e-07, + 3.287568688392639e-07, + -6.931368261575699e-06 + ] + }, + "hidden_norms_per_layer": [ + 11081.2958984375, + 118196.625, + 614115.75, + 828878.8125, + 921725.125, + 1069404.875, + 1237270.375, + 1402473.625, + 1720954.375, + 1997529.875, + 2216433.75, + 2325002.5, + 1993140.375 + ], + "bp_grad_norms_per_layer": [ + 1.0706523426051717e-05, + 6.52372534659662e-07, + 4.187676552191988e-07, + 4.1873394707181433e-07, + 4.1315911403216887e-07, + 4.1367323433405545e-07, + 4.1193311517417897e-07, + 4.11331342320409e-07, + 4.107659776764194e-07, + 4.1129530359285127e-07, + 4.1165918673868873e-07, + 4.1159788111144735e-07, + 4.073814920957375e-07 + ] + }, + "drift": { + "embed.weight": 98.4177462274615, + "embed.bias": 23.69940251347622, + "blocks.0.ln.weight": 2.1789700984954834, + "blocks.0.w1.weight": 24.29409235730548, + "blocks.0.w1.bias": 18.31784316907853, + "blocks.0.w2.weight": 101.8385022981204, + "blocks.1.ln.weight": 1.3870518207550049, + "blocks.1.w1.weight": 26.373394752918255, + "blocks.1.w1.bias": 22.570698680348862, + "blocks.1.w2.weight": 47.64154250315352, + "blocks.2.ln.weight": 0.9734257459640503, + "blocks.2.w1.weight": 21.148009831845542, + "blocks.2.w1.bias": 20.856615521275852, + "blocks.2.w2.weight": 53.70072333386875, + "blocks.3.ln.weight": 1.1795594692230225, + "blocks.3.w1.weight": 20.73167225109056, + "blocks.3.w1.bias": 18.579235259886254, + "blocks.3.w2.weight": 86.87336634029717, + "blocks.4.ln.weight": 0.997854471206665, + "blocks.4.w1.weight": 21.602122373726505, + "blocks.4.w1.bias": 22.47848497309394, + "blocks.4.w2.weight": 53.78872222769277, + "blocks.5.ln.weight": 0.9774206280708313, + "blocks.5.w1.weight": 22.79774822645063, + "blocks.5.w1.bias": 24.21613131547018, + "blocks.5.w2.weight": 58.26315367277549, + "blocks.6.ln.weight": 1.0200481414794922, + "blocks.6.w1.weight": 26.093657863707982, + "blocks.6.w1.bias": 26.27916630293517, + "blocks.6.w2.weight": 33.89854797735531, + "blocks.7.ln.weight": 1.0476559400558472, + "blocks.7.w1.weight": 28.70756132891856, + "blocks.7.w1.bias": 30.025930965175473, + "blocks.7.w2.weight": 39.54073820690228, + "blocks.8.ln.weight": 1.030435562133789, + "blocks.8.w1.weight": 29.48263158312378, + "blocks.8.w1.bias": 31.083890237870737, + "blocks.8.w2.weight": 37.91543643893619, + "blocks.9.ln.weight": 1.0992196798324585, + "blocks.9.w1.weight": 29.805154447874546, + "blocks.9.w1.bias": 31.101382421095806, + "blocks.9.w2.weight": 34.2461727464101, + "blocks.10.ln.weight": 1.1228426694869995, + "blocks.10.w1.weight": 25.197803524439806, + "blocks.10.w1.bias": 24.54186072317423, + "blocks.10.w2.weight": 46.988036920078784, + "blocks.11.ln.weight": 0.9333347082138062, + "blocks.11.w1.weight": 19.997632331729342, + "blocks.11.w1.bias": 16.848829693670883, + "blocks.11.w2.weight": 117.94127434264082, + "out_ln.weight": 0.5421217679977417, + "out_head.weight": 6.825100238203774, + "out_head.bias": 0.5928900621441576 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 256, + "num_blocks": 12, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 2 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d256_L12_seed2", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file diff --git a/results/fa_dfa_d256_L12_seed3/results_cifar10.json b/results/fa_dfa_d256_L12_seed3/results_cifar10.json new file mode 100644 index 0000000..b2495bc --- /dev/null +++ b/results/fa_dfa_d256_L12_seed3/results_cifar10.json @@ -0,0 +1,969 @@ +{ + "3": { + "dfa": { + "log": { + "train_loss": [ + 2.080998458023071, + 2.0535908667755125, + 2.0456236054992676, + 2.0437664754867555, + 2.04373485496521, + 2.0431359854125977, + 2.0434441787338256, + 2.0382570924377443, + 2.038818662261963, + 2.0345919523620606, + 2.03406599155426, + 2.031346089324951, + 2.0293883434677125, + 2.0278041764831545, + 2.0296552677154542, + 2.0259662225723267, + 2.0258015427017213, + 2.024105228805542, + 2.0241383403778075, + 2.0237019972991943, + 2.021524270172119, + 2.0192913208389283, + 2.020055650939941, + 2.0187213849639893, + 2.0182762395858767, + 2.018081063079834, + 2.0169390340805053, + 2.015762889480591, + 2.014286817321777, + 2.0149634075164795, + 2.012851970252991, + 2.0143834255218507, + 2.012589658126831, + 2.013470998764038, + 2.0132155655670165, + 2.0125345434570314, + 2.014651682510376, + 2.011008670730591, + 2.010692717514038, + 2.0098264505767824, + 2.010699557571411, + 2.010047173309326, + 2.0120795148086548, + 2.012862138290405, + 2.010627545700073, + 2.012053688697815, + 2.012078428192139, + 2.0087205919647215, + 2.010501960144043, + 2.0095251747131346, + 2.0100318467712404, + 2.009802308883667, + 2.008675379104614, + 2.009749626541138, + 2.0081673904037474, + 2.010975965423584, + 2.0075539178466797, + 2.007934888153076, + 2.0076504850769044, + 2.0063897485733033, + 2.007999542160034, + 2.0096467161560057, + 2.0087811013031005, + 2.0079173694229127, + 2.005330057106018, + 2.006340228881836, + 2.008018601989746, + 2.0075161946868896, + 2.0061075159454345, + 2.00557112739563, + 2.0092006335449217, + 2.005981811904907, + 2.008798143310547, + 2.006232444229126, + 2.0086249324798584, + 2.0055838273620608, + 2.0056044164276123, + 2.0061014500427246, + 2.00520422416687, + 2.0060690811538695, + 2.0042829446411132, + 2.007496082611084, + 2.006376414794922, + 2.007382208404541, + 2.0068653521347044, + 2.00715774230957, + 2.0046533392715453, + 2.0055563162994385, + 2.0043802153778074, + 2.00596639465332, + 2.0056586725234986, + 2.0065873177337648, + 2.006924060974121, + 2.007017402496338, + 2.007137279701233, + 2.007027236022949, + 2.005679884109497, + 2.00417349899292, + 2.006064287567139, + 2.0064190141296385 + ], + "train_acc": [ + 0.2315, + 0.2422, + 0.242, + 0.2424, + 0.24744, + 0.24618, + 0.24638, + 0.25122, + 0.2481, + 0.2553, + 0.25512, + 0.25802, + 0.2577, + 0.25796, + 0.2582, + 0.2618, + 0.26004, + 0.262, + 0.25824, + 0.26126, + 0.26322, + 0.26556, + 0.26414, + 0.26362, + 0.26414, + 0.2653, + 0.26648, + 0.26604, + 0.26904, + 0.26534, + 0.26784, + 0.26806, + 0.2689, + 0.26802, + 0.26812, + 0.26996, + 0.26976, + 0.26924, + 0.27148, + 0.26982, + 0.2701, + 0.27084, + 0.27122, + 0.27068, + 0.2739, + 0.26968, + 0.27164, + 0.27262, + 0.27076, + 0.2726, + 0.27058, + 0.26984, + 0.27202, + 0.27046, + 0.2735, + 0.27262, + 0.27144, + 0.27432, + 0.2739, + 0.27494, + 0.27442, + 0.2727, + 0.27466, + 0.2713, + 0.27138, + 0.27472, + 0.27394, + 0.27338, + 0.27562, + 0.274, + 0.27062, + 0.2747, + 0.27102, + 0.27396, + 0.27284, + 0.27616, + 0.27526, + 0.27456, + 0.27508, + 0.27278, + 0.27454, + 0.2754, + 0.27318, + 0.27526, + 0.27438, + 0.27674, + 0.27516, + 0.27372, + 0.27594, + 0.2748, + 0.27798, + 0.27314, + 0.27486, + 0.27464, + 0.2744, + 0.2761, + 0.27524, + 0.27572, + 0.27546, + 0.27596 + ], + "test_acc": [ + 0.2643, + 0.2485, + 0.2666, + 0.258, + 0.2678, + 0.2703, + 0.276, + 0.2618, + 0.2717, + 0.2773, + 0.2889, + 0.281, + 0.2829, + 0.2771, + 0.2647, + 0.2713, + 0.29, + 0.2779, + 0.2855, + 0.2833, + 0.2932, + 0.276, + 0.2898, + 0.2953, + 0.2799, + 0.2835, + 0.286, + 0.2849, + 0.2838, + 0.291, + 0.2948, + 0.2955, + 0.2873, + 0.2931, + 0.2908, + 0.2927, + 0.2896, + 0.2932, + 0.2863, + 0.2927, + 0.2886, + 0.2954, + 0.2871, + 0.2946, + 0.2925, + 0.2889, + 0.3005, + 0.2984, + 0.2954, + 0.2876, + 0.2852, + 0.2888, + 0.2943, + 0.2923, + 0.2934, + 0.2888, + 0.2941, + 0.2926, + 0.287, + 0.2969, + 0.2969, + 0.2895, + 0.2982, + 0.2961, + 0.2972, + 0.2998, + 0.2986, + 0.2932, + 0.2877, + 0.2875, + 0.2914, + 0.2958, + 0.2935, + 0.2963, + 0.2987, + 0.2948, + 0.2952, + 0.296, + 0.2993, + 0.2935, + 0.2962, + 0.2933, + 0.2948, + 0.2986, + 0.291, + 0.2968, + 0.2948, + 0.2976, + 0.2946, + 0.2958, + 0.2968, + 0.2977, + 0.2977, + 0.2966, + 0.2962, + 0.2963, + 0.2963, + 0.2966, + 0.2968, + 0.2968 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.384052038192749, + 0.00187331298366189, + 0.0014918153174221516, + -0.003990606404840946, + 0.0011513899080455303, + -0.0013353436952456832, + -0.0003460634616203606, + -0.0025712824426591396, + 0.0021760533563792706, + 0.0015363170532509685, + 0.0023321444168686867, + -0.0023729477543383837 + ], + "perturbation_rho": [ + 0.008436945267021656, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -2.48197466135025e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.144595444202423e-06, + -2.7939677238464355e-09, + 3.725290298461914e-09, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -3.5781413316726685e-06, + -4.6566128730773926e-09, + 3.725290298461914e-09, + -2.7939677238464355e-09, + -3.725290298461914e-09, + -6.51925802230835e-09, + 9.313225746154785e-10, + 0.0, + 0.0, + 0.0, + -3.725290298461914e-09, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 40862.5703125, + 302949600.0, + 779490368.0, + 866010048.0, + 1434764928.0, + 1708853376.0, + 1753628672.0, + 2062010112.0, + 2322754048.0, + 2426205952.0, + 2646477312.0, + 3046314752.0, + 3080729344.0 + ], + "bp_grad_norms_per_layer": [ + 3.3795413401094265e-07, + 9.012455781665096e-10, + 8.881588797748918e-10, + 8.880864932336863e-10, + 8.938522144674721e-10, + 8.938976781003305e-10, + 8.937018902699378e-10, + 8.93527418721618e-10, + 8.935119866215757e-10, + 8.93440765814546e-10, + 8.933446760117647e-10, + 8.934304962515682e-10, + 8.93500051724061e-10 + ] + }, + "drift": { + "embed.weight": 357.53469530895904, + "embed.bias": 290.04318739609596, + "blocks.0.ln.weight": 10.989575386047363, + "blocks.0.w1.weight": 220.1012762362677, + "blocks.0.w1.bias": 206.37217351266216, + "blocks.0.w2.weight": 501.3829415210294, + "blocks.1.ln.weight": 9.839688301086426, + "blocks.1.w1.weight": 256.6554332372174, + "blocks.1.w1.bias": 237.5977187566281, + "blocks.1.w2.weight": 396.28129110774046, + "blocks.2.ln.weight": 8.956626892089844, + "blocks.2.w1.weight": 225.58790523471438, + "blocks.2.w1.bias": 219.9504694961951, + "blocks.2.w2.weight": 332.68534972274165, + "blocks.3.ln.weight": 10.4323091506958, + "blocks.3.w1.weight": 304.362127476672, + "blocks.3.w1.bias": 271.4581804566994, + "blocks.3.w2.weight": 456.776518831265, + "blocks.4.ln.weight": 10.220683097839355, + "blocks.4.w1.weight": 294.88770495280846, + "blocks.4.w1.bias": 278.17483493032455, + "blocks.4.w2.weight": 422.86722605618826, + "blocks.5.ln.weight": 7.415699005126953, + "blocks.5.w1.weight": 190.23919300974276, + "blocks.5.w1.bias": 170.52475346607713, + "blocks.5.w2.weight": 282.6076353329658, + "blocks.6.ln.weight": 11.005321502685547, + "blocks.6.w1.weight": 315.1699742985849, + "blocks.6.w1.bias": 293.37584502861324, + "blocks.6.w2.weight": 428.46065418838674, + "blocks.7.ln.weight": 11.400951385498047, + "blocks.7.w1.weight": 320.1547970124374, + "blocks.7.w1.bias": 312.2252164433187, + "blocks.7.w2.weight": 444.7674866393189, + "blocks.8.ln.weight": 9.238995552062988, + "blocks.8.w1.weight": 261.6975850751966, + "blocks.8.w1.bias": 251.09895252390498, + "blocks.8.w2.weight": 352.9133162081499, + "blocks.9.ln.weight": 11.197455406188965, + "blocks.9.w1.weight": 315.7224672836386, + "blocks.9.w1.bias": 295.82606170712853, + "blocks.9.w2.weight": 416.36578033635686, + "blocks.10.ln.weight": 11.847606658935547, + "blocks.10.w1.weight": 338.01615076254603, + "blocks.10.w1.bias": 307.22711286489067, + "blocks.10.w2.weight": 464.68687019974544, + "blocks.11.ln.weight": 7.684996128082275, + "blocks.11.w1.weight": 188.9801145292558, + "blocks.11.w1.bias": 178.35247968031155, + "blocks.11.w2.weight": 244.64524232865998, + "out_ln.weight": 1.0227844715118408, + "out_head.weight": 10.424635519375226, + "out_head.bias": 0.7962212896591503 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.1018029602050783, + 2.0231987742614748, + 1.9913426873779296, + 1.9757863534164428, + 1.9628748377227783, + 1.949777193145752, + 1.9388965420150757, + 1.9255553958511353, + 1.9167609970474244, + 1.9067581841278076, + 1.9039552599716187, + 1.899418589744568, + 1.8931139596176147, + 1.8900751802825928, + 1.8894958589935302, + 1.8822405550384522, + 1.8818315468597413, + 1.8777127625274659, + 1.8790236785125733, + 1.8758525806045532, + 1.8699211498641968, + 1.8684503372573853, + 1.868811384048462, + 1.8628138626098634, + 1.8617137921524047, + 1.859757089614868, + 1.8566949377059936, + 1.8563865858459472, + 1.8520149837493896, + 1.852164205932617, + 1.846489913673401, + 1.847879118347168, + 1.8489951482391358, + 1.8469820216369628, + 1.8453059420394897, + 1.8414616915893556, + 1.845604056930542, + 1.8367319815063476, + 1.838143812828064, + 1.8364125064468384, + 1.8365082810211182, + 1.8345620361328125, + 1.8362988974761962, + 1.8378243299102783, + 1.833498821182251, + 1.8334047562408446, + 1.8298064031600951, + 1.828000364303589, + 1.8265536352539062, + 1.829135093307495, + 1.8257994495391845, + 1.8226896044158936, + 1.826085818748474, + 1.8257194399261474, + 1.820882752685547, + 1.823325060119629, + 1.821622840309143, + 1.8205207012939453, + 1.8186096018218993, + 1.8195034896469116, + 1.8178193243026732, + 1.8202132829284667, + 1.8164496115112305, + 1.8147546480941772, + 1.8138668438720704, + 1.8105954584121704, + 1.8140293844604491, + 1.8142269690704347, + 1.8139710745239257, + 1.812066497116089, + 1.8160650396728515, + 1.8103228116607666, + 1.81037494846344, + 1.8109107693481445, + 1.8118848001480103, + 1.80986087184906, + 1.8117412914276123, + 1.8095279247283935, + 1.8082179739379882, + 1.8077979961395263, + 1.8045953066635132, + 1.8087207033920287, + 1.8068067630004883, + 1.8097841464996338, + 1.8086548376846314, + 1.8075193279266357, + 1.8061525051498413, + 1.8061482022476196, + 1.805157029685974, + 1.806752130088806, + 1.8043223245239257, + 1.8059532889175416, + 1.804686725730896, + 1.8053256253051757, + 1.8066633655166626, + 1.805836711769104, + 1.8080352004241944, + 1.803317499961853, + 1.801328492088318, + 1.8052216162109376 + ], + "train_acc": [ + 0.20742, + 0.24158, + 0.25614, + 0.26952, + 0.27686, + 0.28706, + 0.29562, + 0.30176, + 0.3048, + 0.30964, + 0.3102, + 0.31448, + 0.31506, + 0.31792, + 0.32218, + 0.3222, + 0.3216, + 0.3241, + 0.32336, + 0.32284, + 0.32752, + 0.3269, + 0.3251, + 0.33148, + 0.3297, + 0.32996, + 0.33296, + 0.333, + 0.3372, + 0.33618, + 0.336, + 0.33676, + 0.3361, + 0.33558, + 0.33656, + 0.34134, + 0.3399, + 0.34096, + 0.34498, + 0.34478, + 0.34308, + 0.34372, + 0.34374, + 0.34298, + 0.3446, + 0.34562, + 0.34566, + 0.34838, + 0.34956, + 0.34546, + 0.34762, + 0.34824, + 0.34838, + 0.34946, + 0.35068, + 0.35008, + 0.35006, + 0.35236, + 0.35148, + 0.35128, + 0.35232, + 0.3504, + 0.35356, + 0.3535, + 0.3542, + 0.35762, + 0.3522, + 0.35422, + 0.35474, + 0.3524, + 0.3533, + 0.3531, + 0.35642, + 0.356, + 0.35522, + 0.35664, + 0.35594, + 0.35494, + 0.3547, + 0.35866, + 0.35694, + 0.35732, + 0.35788, + 0.35534, + 0.35586, + 0.35614, + 0.35642, + 0.3569, + 0.35992, + 0.35596, + 0.35776, + 0.35608, + 0.35652, + 0.35726, + 0.35692, + 0.35758, + 0.35714, + 0.35754, + 0.35816, + 0.35878 + ], + "test_acc": [ + 0.2378, + 0.2506, + 0.2725, + 0.2937, + 0.3002, + 0.3155, + 0.3308, + 0.3192, + 0.3399, + 0.3382, + 0.3357, + 0.3429, + 0.3428, + 0.3431, + 0.3337, + 0.3418, + 0.3495, + 0.3491, + 0.3603, + 0.357, + 0.3506, + 0.3612, + 0.3593, + 0.3643, + 0.3621, + 0.3646, + 0.3621, + 0.3668, + 0.3625, + 0.3621, + 0.3598, + 0.3715, + 0.3627, + 0.37, + 0.373, + 0.3567, + 0.3681, + 0.3703, + 0.3744, + 0.3688, + 0.3656, + 0.3763, + 0.37, + 0.3705, + 0.3778, + 0.3681, + 0.3743, + 0.3804, + 0.3765, + 0.374, + 0.373, + 0.3763, + 0.3756, + 0.3773, + 0.3779, + 0.3734, + 0.3834, + 0.3806, + 0.3719, + 0.3795, + 0.3785, + 0.3767, + 0.3782, + 0.3827, + 0.3801, + 0.3815, + 0.3842, + 0.3829, + 0.38, + 0.3814, + 0.3821, + 0.3831, + 0.382, + 0.3796, + 0.3846, + 0.3786, + 0.3789, + 0.382, + 0.3786, + 0.3818, + 0.3831, + 0.3849, + 0.3807, + 0.3828, + 0.3799, + 0.382, + 0.3804, + 0.3823, + 0.3797, + 0.3821, + 0.3812, + 0.3807, + 0.3819, + 0.3817, + 0.3818, + 0.3809, + 0.3816, + 0.3816, + 0.3817, + 0.3818 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.027911242097616196, + 0.0825929045677185, + 0.0342608243227005, + -0.01674751378595829, + -0.02788889780640602, + -0.001998391468077898, + 0.007203143555670977, + -0.022960849106311798, + -0.001600538264028728, + -0.02186085283756256, + 0.013902194797992706, + 0.99517422914505 + ], + "perturbation_rho": [ + 0.024441849440336227, + 0.023016899824142456, + -0.07353583723306656, + 0.014823662117123604, + 0.003079652786254883, + -0.01875249855220318, + -0.01764325238764286, + -0.009929117746651173, + -0.04451639950275421, + 0.0320318341255188, + -0.0011818185448646545, + 0.022136203944683075 + ], + "nudging": { + "0.001": [ + -7.490161806344986e-07, + -2.367887645959854e-07, + -8.963979780673981e-08, + -9.546056389808655e-09, + -3.026798367500305e-09, + 2.7939677238464355e-08, + 2.60770320892334e-08, + 2.3283064365386963e-08, + 1.210719347000122e-08, + -1.3969838619232178e-08, + 1.862645149230957e-08, + -7.632188498973846e-07 + ], + "0.003": [ + -2.018176019191742e-06, + -6.172340363264084e-07, + -1.916196197271347e-07, + -3.3527612686157227e-08, + -1.210719347000122e-08, + -6.51925802230835e-08, + 2.514570951461792e-08, + 4.493631422519684e-08, + -4.912726581096649e-08, + 1.5366822481155396e-08, + -1.4901161193847656e-08, + -2.5206245481967926e-06 + ], + "0.01": [ + -6.553716957569122e-06, + -1.8514692783355713e-06, + -4.6426430344581604e-07, + 1.8603168427944183e-07, + 1.6205012798309326e-07, + -1.210719347000122e-08, + -1.2363307178020477e-07, + 2.6961788535118103e-07, + 1.83936208486557e-08, + 2.1792948246002197e-07, + -8.521601557731628e-08, + -9.013805538415909e-06 + ] + }, + "hidden_norms_per_layer": [ + 7627.2177734375, + 77014.5703125, + 261090.03125, + 402920.15625, + 531063.4375, + 650439.0625, + 771807.6875, + 876618.25, + 1137488.875, + 1214998.0, + 1291784.5, + 1356458.875, + 1186984.25 + ], + "bp_grad_norms_per_layer": [ + 1.7493783161626197e-05, + 1.0944853556793532e-06, + 5.515285579349438e-07, + 5.171301609152579e-07, + 5.162664820090868e-07, + 5.121610797687026e-07, + 5.100455382489599e-07, + 5.093034474157321e-07, + 5.131421971782402e-07, + 5.133904323884053e-07, + 5.137007974553853e-07, + 5.171222028366174e-07, + 5.011079906580562e-07 + ] + }, + "drift": { + "embed.weight": 71.59970957169052, + "embed.bias": 34.13643636527804, + "blocks.0.ln.weight": 1.511795163154602, + "blocks.0.w1.weight": 18.837866685893864, + "blocks.0.w1.bias": 13.70267468651505, + "blocks.0.w2.weight": 93.71863715934398, + "blocks.1.ln.weight": 1.3081063032150269, + "blocks.1.w1.weight": 21.240305602835985, + "blocks.1.w1.bias": 13.685504606064915, + "blocks.1.w2.weight": 61.028360088185984, + "blocks.2.ln.weight": 1.0054956674575806, + "blocks.2.w1.weight": 19.38818821685861, + "blocks.2.w1.bias": 16.089163443737224, + "blocks.2.w2.weight": 50.045867786081864, + "blocks.3.ln.weight": 0.7472347021102905, + "blocks.3.w1.weight": 19.199307011929896, + "blocks.3.w1.bias": 18.432771215086387, + "blocks.3.w2.weight": 37.33602068975751, + "blocks.4.ln.weight": 0.6882285475730896, + "blocks.4.w1.weight": 19.489052040551293, + "blocks.4.w1.bias": 20.122343743742668, + "blocks.4.w2.weight": 40.50107681410708, + "blocks.5.ln.weight": 0.7229293584823608, + "blocks.5.w1.weight": 19.290019148333055, + "blocks.5.w1.bias": 19.20270861199211, + "blocks.5.w2.weight": 44.78405827498682, + "blocks.6.ln.weight": 0.7572810649871826, + "blocks.6.w1.weight": 22.02089472340751, + "blocks.6.w1.bias": 22.158527807844386, + "blocks.6.w2.weight": 39.04536700764983, + "blocks.7.ln.weight": 0.7923915386199951, + "blocks.7.w1.weight": 22.987184790170787, + "blocks.7.w1.bias": 25.461720259715808, + "blocks.7.w2.weight": 43.420459766425545, + "blocks.8.ln.weight": 0.7664017081260681, + "blocks.8.w1.weight": 19.084502786198005, + "blocks.8.w1.bias": 19.896472419541702, + "blocks.8.w2.weight": 45.39367900417946, + "blocks.9.ln.weight": 0.7352473735809326, + "blocks.9.w1.weight": 18.514530575989, + "blocks.9.w1.bias": 19.155701487630136, + "blocks.9.w2.weight": 38.96532462321347, + "blocks.10.ln.weight": 0.7539297342300415, + "blocks.10.w1.weight": 18.499939513355947, + "blocks.10.w1.bias": 18.297859410049362, + "blocks.10.w2.weight": 33.573172219147224, + "blocks.11.ln.weight": 0.7084934711456299, + "blocks.11.w1.weight": 16.503202421262042, + "blocks.11.w1.bias": 12.571542518226021, + "blocks.11.w2.weight": 90.02901137589497, + "out_ln.weight": 0.44491103291511536, + "out_head.weight": 6.031580417677323, + "out_head.bias": 0.5341898346582782 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 256, + "num_blocks": 12, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 3 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d256_L12_seed3", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file diff --git a/results/fa_dfa_d256_L12_seed4/results_cifar10.json b/results/fa_dfa_d256_L12_seed4/results_cifar10.json new file mode 100644 index 0000000..6ce051d --- /dev/null +++ b/results/fa_dfa_d256_L12_seed4/results_cifar10.json @@ -0,0 +1,969 @@ +{ + "4": { + "dfa": { + "log": { + "train_loss": [ + 2.08256451877594, + 2.0663339112854002, + 2.065528285903931, + 2.0641389405822754, + 2.063766109161377, + 2.0643044331359865, + 2.061108066558838, + 2.0643930868148805, + 2.061966312408447, + 2.060680084762573, + 2.058289337158203, + 2.05898098777771, + 2.0577542728424074, + 2.0568669985198973, + 2.056451043395996, + 2.054366501312256, + 2.0567925134277343, + 2.053527456817627, + 2.05231096206665, + 2.054566929168701, + 2.0525731983947755, + 2.0498655393218996, + 2.0522367533874513, + 2.0521925598526, + 2.05059351852417, + 2.0510073263931274, + 2.0483419913482668, + 2.0495694082641602, + 2.049738536529541, + 2.0502711264038087, + 2.0500267957305907, + 2.0491143457794188, + 2.050376922607422, + 2.049438688735962, + 2.048919423675537, + 2.0511557712554933, + 2.050678148727417, + 2.051541336364746, + 2.0506073697280884, + 2.0506765252685546, + 2.0498380197906494, + 2.049806067466736, + 2.0485455421447756, + 2.0491862691497804, + 2.0492898570251463, + 2.0476144123840334, + 2.0490683988189695, + 2.0513844109344483, + 2.0487480349731446, + 2.04999566116333, + 2.047838335647583, + 2.049221919898987, + 2.0486966360092165, + 2.0475039793395995, + 2.0491980659484863, + 2.04866181350708, + 2.047751593399048, + 2.0454024687194825, + 2.0468619202423097, + 2.0493145092010496, + 2.048052859725952, + 2.04698764465332, + 2.0469833773040773, + 2.0464803858947755, + 2.0463906023406984, + 2.047911610183716, + 2.048891339149475, + 2.046190006866455, + 2.047377264175415, + 2.0476539112091063, + 2.0468983917617796, + 2.046835005149841, + 2.047859336853027, + 2.044651624069214, + 2.0467517770385744, + 2.04683432472229, + 2.0458193408966063, + 2.0458727869415285, + 2.0462171686553954, + 2.045814103355408, + 2.0447903966522216, + 2.0451703567504884, + 2.046430616149902, + 2.044895040740967, + 2.0480339848327636, + 2.044828313446045, + 2.0458129290008547, + 2.0457765758514403, + 2.0444045190811155, + 2.044279132156372, + 2.046977679824829, + 2.047140994644165, + 2.0433586880111694, + 2.045490126800537, + 2.0467801708221436, + 2.0449837641906736, + 2.0445874029541016, + 2.0448050390625, + 2.0441628652191164, + 2.0462842389678957 + ], + "train_acc": [ + 0.23122, + 0.23646, + 0.23786, + 0.2353, + 0.23812, + 0.23576, + 0.23744, + 0.23778, + 0.23848, + 0.24008, + 0.23958, + 0.24138, + 0.23976, + 0.24192, + 0.2421, + 0.24358, + 0.24318, + 0.246, + 0.2423, + 0.24362, + 0.24622, + 0.24714, + 0.24544, + 0.24618, + 0.24696, + 0.24756, + 0.2518, + 0.24818, + 0.24998, + 0.24534, + 0.24778, + 0.25146, + 0.25022, + 0.24854, + 0.25064, + 0.24894, + 0.25102, + 0.2495, + 0.25118, + 0.24816, + 0.2509, + 0.25058, + 0.25294, + 0.24896, + 0.24964, + 0.25192, + 0.25154, + 0.2498, + 0.25128, + 0.25344, + 0.25054, + 0.25238, + 0.2517, + 0.25266, + 0.2531, + 0.25136, + 0.25094, + 0.25286, + 0.25358, + 0.25226, + 0.25338, + 0.25504, + 0.2549, + 0.2549, + 0.25466, + 0.25336, + 0.25376, + 0.25556, + 0.25546, + 0.2527, + 0.25446, + 0.25494, + 0.2543, + 0.25412, + 0.25538, + 0.25468, + 0.25604, + 0.25698, + 0.2563, + 0.25708, + 0.25672, + 0.25804, + 0.2562, + 0.25584, + 0.25736, + 0.25728, + 0.25424, + 0.2564, + 0.25502, + 0.25512, + 0.25496, + 0.25608, + 0.25874, + 0.25556, + 0.25554, + 0.25712, + 0.2583, + 0.25732, + 0.25662, + 0.25448 + ], + "test_acc": [ + 0.2652, + 0.2592, + 0.2598, + 0.24, + 0.2463, + 0.2675, + 0.2477, + 0.2644, + 0.2193, + 0.2485, + 0.2463, + 0.2506, + 0.2573, + 0.261, + 0.2639, + 0.2619, + 0.2542, + 0.2491, + 0.2607, + 0.2572, + 0.2644, + 0.2546, + 0.2564, + 0.2631, + 0.2536, + 0.2618, + 0.2623, + 0.2654, + 0.2622, + 0.259, + 0.2608, + 0.2555, + 0.2803, + 0.2565, + 0.2723, + 0.2608, + 0.2661, + 0.2633, + 0.2512, + 0.2564, + 0.2686, + 0.2678, + 0.2687, + 0.266, + 0.2684, + 0.2691, + 0.2695, + 0.2661, + 0.2716, + 0.269, + 0.2681, + 0.2739, + 0.2661, + 0.2627, + 0.2673, + 0.264, + 0.2703, + 0.2719, + 0.2653, + 0.2734, + 0.2601, + 0.2622, + 0.2775, + 0.2672, + 0.2615, + 0.2804, + 0.2608, + 0.2719, + 0.2634, + 0.2672, + 0.2714, + 0.2629, + 0.2664, + 0.2664, + 0.2719, + 0.2688, + 0.2724, + 0.2737, + 0.2737, + 0.2663, + 0.2694, + 0.272, + 0.271, + 0.2699, + 0.2654, + 0.2695, + 0.2711, + 0.2699, + 0.2695, + 0.2719, + 0.2682, + 0.2674, + 0.2688, + 0.2692, + 0.2683, + 0.2681, + 0.2691, + 0.2695, + 0.2694, + 0.2695 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.3369702696800232, + 0.0017256045248359442, + 0.0003115047584287822, + 0.0012309218291193247, + 0.0004706716863438487, + -0.0003515754360705614, + 0.0010358416475355625, + 0.00042472450877539814, + -0.00047611017362214625, + -0.001211655791848898, + -0.0013567307032644749, + 0.0005671238759532571 + ], + "perturbation_rho": [ + 0.008996784687042236, + 0.0, + 0.0, + -0.004853670950978994, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -1.7974525690078735e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -6.239861249923706e-07, + 0.0, + 0.0, + -1.862645149230957e-09, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -2.148095518350601e-06, + 7.450580596923828e-09, + -3.725290298461914e-09, + -5.587935447692871e-09, + 0.0, + 0.0, + 1.862645149230957e-09, + 0.0, + 0.0, + 0.0, + -1.862645149230957e-09, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 50053.5078125, + 506126208.0, + 839627776.0, + 1433945984.0, + 2151404288.0, + 2646801152.0, + 2618418688.0, + 2616808960.0, + 2656328704.0, + 2846460672.0, + 3113679360.0, + 3486919424.0, + 3484421376.0 + ], + "bp_grad_norms_per_layer": [ + 2.0373111908611463e-07, + 5.87830117915189e-10, + 5.875596120752391e-10, + 5.888394216668758e-10, + 5.894798538186308e-10, + 5.895269272748749e-10, + 5.894864596456273e-10, + 5.894754129265323e-10, + 5.893150412106252e-10, + 5.894901788927598e-10, + 5.895146593104528e-10, + 5.898119770364474e-10, + 5.898875832244244e-10 + ] + }, + "drift": { + "embed.weight": 406.5158433791385, + "embed.bias": 328.00460641981374, + "blocks.0.ln.weight": 10.621206283569336, + "blocks.0.w1.weight": 260.1775263539264, + "blocks.0.w1.bias": 261.5553235769218, + "blocks.0.w2.weight": 546.8623769606548, + "blocks.1.ln.weight": 9.573291778564453, + "blocks.1.w1.weight": 254.1321472468791, + "blocks.1.w1.bias": 229.33671096787808, + "blocks.1.w2.weight": 375.8430206058194, + "blocks.2.ln.weight": 10.723036766052246, + "blocks.2.w1.weight": 312.23440184847425, + "blocks.2.w1.bias": 301.75305024697, + "blocks.2.w2.weight": 465.9606227271888, + "blocks.3.ln.weight": 11.675829887390137, + "blocks.3.w1.weight": 329.12792372114694, + "blocks.3.w1.bias": 336.1713580136849, + "blocks.3.w2.weight": 479.62371125689094, + "blocks.4.ln.weight": 12.166597366333008, + "blocks.4.w1.weight": 342.5184155405496, + "blocks.4.w1.bias": 331.25442650567624, + "blocks.4.w2.weight": 473.58945710721196, + "blocks.5.ln.weight": 8.257390022277832, + "blocks.5.w1.weight": 224.17786513425023, + "blocks.5.w1.bias": 206.55166933090487, + "blocks.5.w2.weight": 310.58552361575914, + "blocks.6.ln.weight": 9.497365951538086, + "blocks.6.w1.weight": 257.28476030419915, + "blocks.6.w1.bias": 241.49147853808202, + "blocks.6.w2.weight": 326.8691329729593, + "blocks.7.ln.weight": 8.255859375, + "blocks.7.w1.weight": 200.14594495243745, + "blocks.7.w1.bias": 191.91816529358212, + "blocks.7.w2.weight": 257.1419920590999, + "blocks.8.ln.weight": 11.01574993133545, + "blocks.8.w1.weight": 307.85736409735574, + "blocks.8.w1.bias": 281.3908916662528, + "blocks.8.w2.weight": 408.1843979772796, + "blocks.9.ln.weight": 11.364969253540039, + "blocks.9.w1.weight": 305.9120817067219, + "blocks.9.w1.bias": 279.3079525981705, + "blocks.9.w2.weight": 396.62586594267685, + "blocks.10.ln.weight": 11.884243965148926, + "blocks.10.w1.weight": 334.91020121291007, + "blocks.10.w1.bias": 324.23156540936054, + "blocks.10.w2.weight": 491.4126848947247, + "blocks.11.ln.weight": 11.255571365356445, + "blocks.11.w1.weight": 308.006589638926, + "blocks.11.w1.bias": 292.5786848876635, + "blocks.11.w2.weight": 425.11018085204205, + "out_ln.weight": 0.7832708358764648, + "out_head.weight": 8.078985322489435, + "out_head.bias": 0.29867781036736046 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.049650134963989, + 1.973029051513672, + 1.9459494330596925, + 1.9263736043548585, + 1.918336088027954, + 1.9090399783706664, + 1.9018680041122435, + 1.9004452823638915, + 1.8939352771759033, + 1.8905867196655273, + 1.88599944606781, + 1.8800309131622315, + 1.8739088614273072, + 1.8683558203125, + 1.8653341689300538, + 1.8583481198120118, + 1.8549240982055664, + 1.8496921838760376, + 1.8435221142959595, + 1.8398676053619385, + 1.8327734576416015, + 1.8336225759124756, + 1.8276614254379273, + 1.8216393673324585, + 1.8191773874664308, + 1.8113161963653563, + 1.8087092595672607, + 1.8040201818084718, + 1.7976001425170898, + 1.795108130493164, + 1.7909190823745726, + 1.7871669284820557, + 1.7862169911956787, + 1.7830527084350587, + 1.7802653800201416, + 1.7863243228912353, + 1.786163843460083, + 1.7841941988754273, + 1.7805864434814453, + 1.7733078927230834, + 1.7747596154022216, + 1.772979263381958, + 1.7702470769882201, + 1.766930933494568, + 1.7668507308197021, + 1.7599455773544312, + 1.7646458086395265, + 1.7616525787734985, + 1.760625428390503, + 1.7602823944473267, + 1.7544173859024048, + 1.7526818951797485, + 1.7567130640029907, + 1.7525626901245117, + 1.7489151987075806, + 1.7496014617919922, + 1.750472166748047, + 1.7476551281738282, + 1.7476194573974608, + 1.7486230560302733, + 1.7486302239227296, + 1.7496078895950318, + 1.745147467918396, + 1.745377778968811, + 1.7445024374771119, + 1.746578332748413, + 1.7441591314697265, + 1.7423080096817016, + 1.7424391454315185, + 1.7420111348724365, + 1.7456162438964844, + 1.740698592147827, + 1.74056429561615, + 1.7388315572738648, + 1.7365661661529541, + 1.7376085388565063, + 1.7377186611557007, + 1.7358070825958252, + 1.7348824306488038, + 1.7347428383636474, + 1.734776619567871, + 1.7334602453994752, + 1.7353667811965943, + 1.7360192542266846, + 1.7331659270477295, + 1.7365653960800171, + 1.735952963256836, + 1.7346072133255004, + 1.7329498879623413, + 1.7314477652740479, + 1.736275334815979, + 1.736558514175415, + 1.7318700998306273, + 1.7343199029159546, + 1.7351305802154542, + 1.7342404050064086, + 1.7299409299087525, + 1.730475319519043, + 1.7300176064300536, + 1.7322037688064575 + ], + "train_acc": [ + 0.24164, + 0.27262, + 0.28936, + 0.2973, + 0.2997, + 0.30452, + 0.30772, + 0.30798, + 0.31336, + 0.31634, + 0.3196, + 0.31996, + 0.32114, + 0.32864, + 0.32748, + 0.3284, + 0.33336, + 0.33396, + 0.33538, + 0.33648, + 0.34296, + 0.34042, + 0.3434, + 0.34368, + 0.34544, + 0.35258, + 0.35284, + 0.35284, + 0.3565, + 0.35634, + 0.35946, + 0.35858, + 0.3638, + 0.36276, + 0.36302, + 0.36144, + 0.36354, + 0.3642, + 0.36578, + 0.36918, + 0.36566, + 0.36788, + 0.36728, + 0.36864, + 0.3674, + 0.37124, + 0.36972, + 0.37258, + 0.37202, + 0.3714, + 0.37326, + 0.37558, + 0.37402, + 0.37572, + 0.37648, + 0.3754, + 0.37748, + 0.37704, + 0.37678, + 0.3756, + 0.3764, + 0.37632, + 0.37764, + 0.37716, + 0.37722, + 0.37784, + 0.37756, + 0.37846, + 0.3795, + 0.38056, + 0.37696, + 0.378, + 0.37894, + 0.37918, + 0.38124, + 0.37876, + 0.38168, + 0.38232, + 0.3829, + 0.3832, + 0.38284, + 0.3823, + 0.3804, + 0.38132, + 0.3823, + 0.3801, + 0.3822, + 0.38338, + 0.38094, + 0.38014, + 0.38042, + 0.38178, + 0.3816, + 0.38086, + 0.38378, + 0.3806, + 0.38276, + 0.38406, + 0.38342, + 0.38124 + ], + "test_acc": [ + 0.285, + 0.3059, + 0.321, + 0.3096, + 0.3312, + 0.3302, + 0.3228, + 0.3481, + 0.3321, + 0.3399, + 0.3381, + 0.3452, + 0.3487, + 0.353, + 0.3556, + 0.3522, + 0.3601, + 0.3548, + 0.3545, + 0.3601, + 0.3639, + 0.3662, + 0.3599, + 0.3704, + 0.376, + 0.384, + 0.3756, + 0.3748, + 0.3822, + 0.3781, + 0.3928, + 0.383, + 0.3873, + 0.3871, + 0.3898, + 0.3911, + 0.3906, + 0.3919, + 0.3913, + 0.3917, + 0.3974, + 0.3933, + 0.3945, + 0.3961, + 0.4018, + 0.3979, + 0.4008, + 0.4002, + 0.3904, + 0.3997, + 0.3978, + 0.4013, + 0.4005, + 0.3988, + 0.3977, + 0.3977, + 0.3988, + 0.4007, + 0.4037, + 0.4013, + 0.3994, + 0.4027, + 0.401, + 0.4011, + 0.4005, + 0.4059, + 0.4028, + 0.4058, + 0.4074, + 0.4029, + 0.4066, + 0.4041, + 0.4077, + 0.4053, + 0.4073, + 0.4054, + 0.4047, + 0.4053, + 0.406, + 0.4084, + 0.4073, + 0.4093, + 0.4042, + 0.4078, + 0.4072, + 0.4083, + 0.4063, + 0.4072, + 0.4064, + 0.4048, + 0.4085, + 0.4079, + 0.4088, + 0.4081, + 0.4075, + 0.4085, + 0.4084, + 0.4087, + 0.4089, + 0.409 + ] + }, + "diagnostics": { + "bp_cosine": [ + -0.002191243227571249, + 0.0590691976249218, + 0.0698356181383133, + 0.13605648279190063, + 0.08923564106225967, + 0.015668585896492004, + -0.07714484632015228, + 0.01213260181248188, + -0.0025911303237080574, + -0.04502572491765022, + -0.009707875549793243, + 0.9995251893997192 + ], + "perturbation_rho": [ + -0.043511975556612015, + -0.037262365221977234, + -0.03251894563436508, + 0.004958000499755144, + -0.02477085031569004, + 0.03360602259635925, + -0.005741700530052185, + 0.060581792145967484, + -0.0035040113143622875, + 0.014635683037340641, + -0.027158895507454872, + -0.040769003331661224 + ], + "nudging": { + "0.001": [ + 3.6729034036397934e-07, + -4.2887404561042786e-07, + -1.8009450286626816e-07, + -2.9650982469320297e-07, + -1.4586839824914932e-07, + -2.0139850676059723e-08, + 1.0035000741481781e-07, + 1.641456037759781e-08, + -3.4924596548080444e-09, + -2.0954757928848267e-08, + -5.9371814131736755e-09, + -1.025269739329815e-06 + ], + "0.003": [ + 1.1167721822857857e-06, + -1.264386810362339e-06, + -6.126938387751579e-07, + -7.883645594120026e-07, + -4.4063199311494827e-07, + -3.41096892952919e-08, + 2.60770320892334e-07, + -5.6694261729717255e-08, + -5.587935447692871e-09, + 1.5133991837501526e-07, + 2.8405338525772095e-08, + -3.802357241511345e-06 + ], + "0.01": [ + 3.6997953429818153e-06, + -4.11586370319128e-06, + -1.989188604056835e-06, + -2.64833215624094e-06, + -1.3904646039009094e-06, + -2.3422762751579285e-07, + 9.041978046298027e-07, + -2.2142194211483002e-07, + 1.0477378964424133e-08, + 5.260808393359184e-07, + 3.888271749019623e-08, + -1.2977398000657558e-05 + ] + }, + "hidden_norms_per_layer": [ + 4480.74365234375, + 31138.357421875, + 92759.8359375, + 106458.953125, + 140787.828125, + 222385.875, + 450712.65625, + 609736.25, + 665552.8125, + 762367.8125, + 785119.4375, + 812034.75, + 413068.875 + ], + "bp_grad_norms_per_layer": [ + 2.8362848752294667e-05, + 2.841563627953292e-06, + 1.476502120567602e-06, + 1.0035049626822001e-06, + 8.074727020357386e-07, + 7.039782303763786e-07, + 6.961119538573257e-07, + 6.961448093534273e-07, + 6.940574621694395e-07, + 6.940469461369503e-07, + 6.940644539099594e-07, + 6.928050879650982e-07, + 6.924681770215102e-07 + ] + }, + "drift": { + "embed.weight": 49.91851185408626, + "embed.bias": 17.86594759988079, + "blocks.0.ln.weight": 1.3650614023208618, + "blocks.0.w1.weight": 15.146734124129205, + "blocks.0.w1.bias": 11.038570361897584, + "blocks.0.w2.weight": 73.17905982531522, + "blocks.1.ln.weight": 1.2106239795684814, + "blocks.1.w1.weight": 15.587515080559209, + "blocks.1.w1.bias": 8.439216877390802, + "blocks.1.w2.weight": 58.68756094487398, + "blocks.2.ln.weight": 1.2199766635894775, + "blocks.2.w1.weight": 16.452854749419053, + "blocks.2.w1.bias": 7.664067486265118, + "blocks.2.w2.weight": 57.4145143245269, + "blocks.3.ln.weight": 0.9999213218688965, + "blocks.3.w1.weight": 15.45370831650007, + "blocks.3.w1.bias": 9.701139813225756, + "blocks.3.w2.weight": 50.62047330019419, + "blocks.4.ln.weight": 0.9458633661270142, + "blocks.4.w1.weight": 17.169635878784078, + "blocks.4.w1.bias": 12.344329529016134, + "blocks.4.w2.weight": 46.31906604368206, + "blocks.5.ln.weight": 0.9830385446548462, + "blocks.5.w1.weight": 19.826223976248237, + "blocks.5.w1.bias": 17.42019988793343, + "blocks.5.w2.weight": 54.57298040844193, + "blocks.6.ln.weight": 0.7866309881210327, + "blocks.6.w1.weight": 18.567239259563237, + "blocks.6.w1.bias": 18.51093768753787, + "blocks.6.w2.weight": 37.907911398349064, + "blocks.7.ln.weight": 0.664797842502594, + "blocks.7.w1.weight": 16.223960253839458, + "blocks.7.w1.bias": 17.78980163184235, + "blocks.7.w2.weight": 33.05609365656536, + "blocks.8.ln.weight": 0.7077108025550842, + "blocks.8.w1.weight": 17.30902246074257, + "blocks.8.w1.bias": 18.027686468269295, + "blocks.8.w2.weight": 34.213298812674346, + "blocks.9.ln.weight": 0.6533149480819702, + "blocks.9.w1.weight": 15.761533820678208, + "blocks.9.w1.bias": 16.051257049533785, + "blocks.9.w2.weight": 28.711206238147582, + "blocks.10.ln.weight": 0.6785529255867004, + "blocks.10.w1.weight": 14.454064107657125, + "blocks.10.w1.bias": 14.910736898942519, + "blocks.10.w2.weight": 33.53805959707834, + "blocks.11.ln.weight": 0.790199339389801, + "blocks.11.w1.weight": 21.739355051784727, + "blocks.11.w1.bias": 26.75768188676886, + "blocks.11.w2.weight": 38.60387777810455, + "out_ln.weight": 0.26305317878723145, + "out_head.weight": 4.037201408884711, + "out_head.bias": 0.8009701595278154 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 256, + "num_blocks": 12, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 4 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d256_L12_seed4", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file diff --git a/results/fa_dfa_d256_L8_seed0/results_cifar10.json b/results/fa_dfa_d256_L8_seed0/results_cifar10.json new file mode 100644 index 0000000..2b2a9de --- /dev/null +++ b/results/fa_dfa_d256_L8_seed0/results_cifar10.json @@ -0,0 +1,881 @@ +{ + "0": { + "dfa": { + "log": { + "train_loss": [ + 2.088111647720337, + 2.0657437747192384, + 2.060557443847656, + 2.054936630554199, + 2.0502566048431397, + 2.050682597808838, + 2.0456129079437257, + 2.0430596421813965, + 2.0408702920913697, + 2.0377991006469727, + 2.0389654118347167, + 2.0398873892593383, + 2.0346363846588136, + 2.033068787918091, + 2.034021237792969, + 2.0311379262542726, + 2.027860624542236, + 2.028515229873657, + 2.0275459106063844, + 2.0274439572143557, + 2.027140206680298, + 2.0251305793762207, + 2.0244256614685057, + 2.0226621907043456, + 2.024185022583008, + 2.0238217923736572, + 2.0209359241485596, + 2.021500840301514, + 2.02022657661438, + 2.0203428337097167, + 2.019736885719299, + 2.0196730166625976, + 2.019779687271118, + 2.0194025174331665, + 2.018459284744263, + 2.0198610536193846, + 2.0196038846588134, + 2.0184577390289307, + 2.0181497329711915, + 2.0173270440292357, + 2.015399761352539, + 2.017926648864746, + 2.0153606281280516, + 2.0176641104507445, + 2.0169564332580565, + 2.0160911991119383, + 2.013510660934448, + 2.0139562338256836, + 2.0157755657196046, + 2.016321997756958, + 2.013838740501404, + 2.0138234704589846, + 2.014709682159424, + 2.0130411010742186, + 2.013964093017578, + 2.014050467605591, + 2.010925949783325, + 2.0116148568344117, + 2.01314746383667, + 2.0142779718780517, + 2.0111367880249023, + 2.0113327654266357, + 2.012854333572388, + 2.0125641105651857, + 2.0123231687927245, + 2.0098405307006835, + 2.014055180015564, + 2.0095224347686766, + 2.0092305796813963, + 2.0110340044403077, + 2.011887366104126, + 2.0092078099823, + 2.0123240684509276, + 2.0100160660171507, + 2.0095057219696044, + 2.0116754064559936, + 2.010024980697632, + 2.0104703690338135, + 2.010904312362671, + 2.009006942138672, + 2.00869906539917, + 2.011290683441162, + 2.007994732589722, + 2.010743540802002, + 2.0077583860778807, + 2.010294546661377, + 2.008463388710022, + 2.009644501991272, + 2.008040932922363, + 2.006879263572693, + 2.0084022887420656, + 2.0101632648468017, + 2.0085393448638915, + 2.0086699520492552, + 2.0095553454971316, + 2.0090075594329835, + 2.0092781968688964, + 2.0092810621643067, + 2.008181187438965, + 2.008701148452759 + ], + "train_acc": [ + 0.23208, + 0.23714, + 0.23872, + 0.24144, + 0.24046, + 0.24026, + 0.24398, + 0.24838, + 0.25002, + 0.25004, + 0.24848, + 0.25134, + 0.25284, + 0.25182, + 0.25088, + 0.25354, + 0.2558, + 0.25726, + 0.25768, + 0.2579, + 0.25742, + 0.25918, + 0.26104, + 0.26256, + 0.26114, + 0.26408, + 0.26146, + 0.26318, + 0.26164, + 0.26194, + 0.2647, + 0.26364, + 0.26356, + 0.26318, + 0.26682, + 0.26424, + 0.26142, + 0.26382, + 0.26662, + 0.26438, + 0.26712, + 0.26532, + 0.26594, + 0.26736, + 0.2675, + 0.26522, + 0.2692, + 0.269, + 0.26666, + 0.26752, + 0.2675, + 0.26926, + 0.26798, + 0.27058, + 0.26658, + 0.26826, + 0.26928, + 0.26994, + 0.2699, + 0.26602, + 0.26866, + 0.26892, + 0.2706, + 0.27134, + 0.2703, + 0.27044, + 0.27026, + 0.26908, + 0.27206, + 0.27296, + 0.26956, + 0.27134, + 0.27088, + 0.27122, + 0.27302, + 0.26934, + 0.27088, + 0.27084, + 0.26988, + 0.27138, + 0.27164, + 0.27134, + 0.27262, + 0.27112, + 0.2701, + 0.27036, + 0.27144, + 0.27058, + 0.27068, + 0.27358, + 0.27096, + 0.2722, + 0.27196, + 0.27312, + 0.27102, + 0.2715, + 0.2707, + 0.27166, + 0.27168, + 0.27296 + ], + "test_acc": [ + 0.2527, + 0.2369, + 0.2462, + 0.2468, + 0.2408, + 0.2746, + 0.2703, + 0.2742, + 0.2599, + 0.2725, + 0.2825, + 0.2652, + 0.274, + 0.2721, + 0.2844, + 0.284, + 0.2791, + 0.2452, + 0.2835, + 0.2744, + 0.2688, + 0.2771, + 0.2812, + 0.2906, + 0.2721, + 0.2732, + 0.2831, + 0.2869, + 0.2833, + 0.2796, + 0.2921, + 0.2921, + 0.2884, + 0.2765, + 0.2871, + 0.2752, + 0.2937, + 0.2931, + 0.2944, + 0.2846, + 0.2897, + 0.279, + 0.2762, + 0.2923, + 0.2886, + 0.2881, + 0.2945, + 0.2907, + 0.2894, + 0.2989, + 0.2886, + 0.2845, + 0.2971, + 0.2895, + 0.2979, + 0.2831, + 0.2946, + 0.2923, + 0.2835, + 0.2971, + 0.2911, + 0.2881, + 0.2926, + 0.2971, + 0.2955, + 0.29, + 0.2908, + 0.2875, + 0.2985, + 0.2949, + 0.2881, + 0.2949, + 0.2848, + 0.2922, + 0.2967, + 0.2925, + 0.2881, + 0.2987, + 0.2907, + 0.2969, + 0.294, + 0.2943, + 0.2935, + 0.2909, + 0.2896, + 0.295, + 0.2953, + 0.2912, + 0.2923, + 0.2936, + 0.2948, + 0.2928, + 0.2943, + 0.2948, + 0.2929, + 0.2931, + 0.293, + 0.293, + 0.293, + 0.2932 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.3739473223686218, + 0.0001710604556137696, + -0.00010273385123582557, + 0.0015739527298137546, + -0.0010641771368682384, + -0.00083403434837237, + -0.0002269457036163658, + 0.00010728999768616632 + ], + "perturbation_rho": [ + -0.012351546436548233, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -3.0873343348503113e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 9.313225746154785e-10, + 0.0 + ], + "0.003": [ + -8.507631719112396e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 2.7939677238464355e-09, + 0.0 + ], + "0.01": [ + -2.818182110786438e-06, + 1.862645149230957e-09, + 0.0, + -5.587935447692871e-09, + 0.0, + -5.587935447692871e-09, + 2.7939677238464355e-09, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 39928.015625, + 244928880.0, + 1463614080.0, + 1492651648.0, + 2068440832.0, + 2077208832.0, + 2699950336.0, + 3167998976.0, + 3559058432.0 + ], + "bp_grad_norms_per_layer": [ + 2.7729842599910626e-07, + 3.92615467914581e-10, + 3.8261058210586896e-10, + 3.827221872754194e-10, + 3.839246975889665e-10, + 3.838843132264458e-10, + 3.8309594385665946e-10, + 3.836255757505569e-10, + 3.848031893127768e-10 + ] + }, + "drift": { + "embed.weight": 355.03769309377134, + "embed.bias": 275.50497763057285, + "blocks.0.ln.weight": 11.560415267944336, + "blocks.0.w1.weight": 217.0016785191733, + "blocks.0.w1.bias": 194.48945621979058, + "blocks.0.w2.weight": 512.6024444572738, + "blocks.1.ln.weight": 11.132081031799316, + "blocks.1.w1.weight": 325.51184428406805, + "blocks.1.w1.bias": 320.01996630850886, + "blocks.1.w2.weight": 501.7286978738936, + "blocks.2.ln.weight": 8.083645820617676, + "blocks.2.w1.weight": 207.09861044191072, + "blocks.2.w1.bias": 194.5111269286842, + "blocks.2.w2.weight": 289.82283688588427, + "blocks.3.ln.weight": 11.859946250915527, + "blocks.3.w1.weight": 335.0771544598146, + "blocks.3.w1.bias": 320.92428204403967, + "blocks.3.w2.weight": 448.5070845635642, + "blocks.4.ln.weight": 7.86250114440918, + "blocks.4.w1.weight": 209.85336811707023, + "blocks.4.w1.bias": 197.98745993011175, + "blocks.4.w2.weight": 286.43335604265127, + "blocks.5.ln.weight": 11.988594055175781, + "blocks.5.w1.weight": 340.3597967146831, + "blocks.5.w1.bias": 320.4706779192804, + "blocks.5.w2.weight": 499.7442316358972, + "blocks.6.ln.weight": 13.202707290649414, + "blocks.6.w1.weight": 376.2778384149751, + "blocks.6.w1.bias": 364.2875039001891, + "blocks.6.w2.weight": 540.0867846092441, + "blocks.7.ln.weight": 11.965360641479492, + "blocks.7.w1.weight": 335.1786572574944, + "blocks.7.w1.bias": 321.09352323787965, + "blocks.7.w2.weight": 471.5612368801969, + "out_ln.weight": 0.7452844381332397, + "out_head.weight": 8.201945333242003, + "out_head.bias": 0.3773728853334405 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.054669055023193, + 1.991719011001587, + 1.9774182040405273, + 1.9712983444976806, + 1.9631762102508545, + 1.959303684387207, + 1.9460533142852783, + 1.9368816702270508, + 1.9278904293060304, + 1.9154337232208252, + 1.912510458755493, + 1.908025641517639, + 1.8988712897491455, + 1.8962811010742187, + 1.8920437637329102, + 1.8893064358520508, + 1.8833487548446655, + 1.882022890625, + 1.8800041366958617, + 1.8740528832626342, + 1.8704966924285888, + 1.8655656155776978, + 1.860756079750061, + 1.8544422786712647, + 1.851064347305298, + 1.8484665994262695, + 1.841072390975952, + 1.8392392129898072, + 1.8378569499969482, + 1.8346193454742432, + 1.82977863861084, + 1.8301198516082764, + 1.8263721365356445, + 1.8249066296768188, + 1.8208455069732665, + 1.8226783791351318, + 1.8220160707855224, + 1.8191678134155274, + 1.81650698387146, + 1.8159678020858765, + 1.810044068222046, + 1.810330848388672, + 1.8103008676910401, + 1.8088883145523071, + 1.8075827156829833, + 1.806718647994995, + 1.8019089365005494, + 1.8028275075531006, + 1.8002472192764283, + 1.8013759677886962, + 1.7999095865631103, + 1.7962470398712158, + 1.7984889847564698, + 1.7963330797958375, + 1.7923067237091064, + 1.793927209777832, + 1.7897032806015014, + 1.7885324808120728, + 1.7894811852645873, + 1.7884446933364868, + 1.7875976447296142, + 1.78652889251709, + 1.7869972435760497, + 1.7855236776351928, + 1.7861184851074219, + 1.781340026550293, + 1.7873938134002685, + 1.7781473917388917, + 1.7804266613006592, + 1.781290369796753, + 1.7821873314666747, + 1.7779191442489624, + 1.780537197303772, + 1.772837181777954, + 1.7783816800689698, + 1.7780200393676757, + 1.7768787881851196, + 1.7751888537216187, + 1.7765182915496827, + 1.774619035987854, + 1.7722581442260743, + 1.7778081979751588, + 1.7721048584365844, + 1.773922477798462, + 1.7717991860198974, + 1.7731413928604125, + 1.7743992209243775, + 1.7737348601913452, + 1.7736994393920897, + 1.7697040644073487, + 1.77587783908844, + 1.770644986305237, + 1.7736803650283814, + 1.7717780599975586, + 1.773167521095276, + 1.7733910553359986, + 1.7736449721527099, + 1.772689087753296, + 1.7709570954132081, + 1.7717201139068603 + ], + "train_acc": [ + 0.2363, + 0.25884, + 0.26796, + 0.2734, + 0.27986, + 0.28188, + 0.2881, + 0.2929, + 0.30118, + 0.3028, + 0.30632, + 0.3073, + 0.31098, + 0.31382, + 0.3148, + 0.31472, + 0.31668, + 0.32106, + 0.32314, + 0.32434, + 0.32476, + 0.32848, + 0.33132, + 0.33338, + 0.33446, + 0.33834, + 0.33702, + 0.33846, + 0.34004, + 0.34296, + 0.34586, + 0.34382, + 0.34452, + 0.34658, + 0.3491, + 0.348, + 0.34758, + 0.35232, + 0.34918, + 0.34832, + 0.35334, + 0.35362, + 0.35472, + 0.35388, + 0.35442, + 0.35508, + 0.35912, + 0.35806, + 0.3582, + 0.3571, + 0.35762, + 0.35974, + 0.35776, + 0.35856, + 0.36102, + 0.36058, + 0.35984, + 0.3606, + 0.36244, + 0.36282, + 0.35994, + 0.36342, + 0.3626, + 0.36326, + 0.36462, + 0.3634, + 0.36056, + 0.36662, + 0.3661, + 0.3627, + 0.36522, + 0.36638, + 0.36216, + 0.36748, + 0.3647, + 0.36344, + 0.36558, + 0.36694, + 0.36642, + 0.36922, + 0.36934, + 0.3653, + 0.36824, + 0.36604, + 0.36928, + 0.3695, + 0.36792, + 0.36662, + 0.3691, + 0.37096, + 0.3664, + 0.36928, + 0.36774, + 0.36802, + 0.36876, + 0.37062, + 0.36878, + 0.36726, + 0.36896, + 0.3694 + ], + "test_acc": [ + 0.2656, + 0.267, + 0.2815, + 0.2894, + 0.2983, + 0.305, + 0.3102, + 0.3197, + 0.3109, + 0.3358, + 0.3417, + 0.3398, + 0.3428, + 0.3516, + 0.3564, + 0.3525, + 0.36, + 0.3373, + 0.3487, + 0.3577, + 0.3604, + 0.3636, + 0.3618, + 0.3746, + 0.3691, + 0.3657, + 0.3717, + 0.376, + 0.3682, + 0.3703, + 0.3784, + 0.3781, + 0.3758, + 0.3784, + 0.3811, + 0.3762, + 0.3767, + 0.3788, + 0.3817, + 0.3799, + 0.3798, + 0.3727, + 0.3786, + 0.3853, + 0.3824, + 0.3821, + 0.3829, + 0.3834, + 0.3843, + 0.3837, + 0.3867, + 0.3812, + 0.3878, + 0.3848, + 0.3881, + 0.385, + 0.3903, + 0.3885, + 0.384, + 0.3856, + 0.3901, + 0.3889, + 0.3863, + 0.3912, + 0.3866, + 0.3914, + 0.3872, + 0.3883, + 0.3914, + 0.3871, + 0.3877, + 0.39, + 0.3872, + 0.3867, + 0.3929, + 0.3915, + 0.3904, + 0.3913, + 0.3909, + 0.3953, + 0.3943, + 0.3937, + 0.3928, + 0.391, + 0.3907, + 0.3928, + 0.3939, + 0.3922, + 0.3922, + 0.3933, + 0.3934, + 0.3924, + 0.3934, + 0.3921, + 0.3934, + 0.3923, + 0.3926, + 0.3927, + 0.3927, + 0.3926 + ] + }, + "diagnostics": { + "bp_cosine": [ + -0.0004061758518218994, + 0.059665147215127945, + -0.00023904931731522083, + 0.04721151292324066, + -0.0009337564697489142, + -0.028840316459536552, + -0.06716784834861755, + 0.9960443377494812 + ], + "perturbation_rho": [ + 0.010079916566610336, + -0.012312020175158978, + 0.0198403000831604, + 0.01036103069782257, + -0.03565298020839691, + 0.012246890924870968, + 0.005117389373481274, + -0.0020000701770186424 + ], + "nudging": { + "0.001": [ + 3.466848284006119e-07, + -1.7741695046424866e-07, + 2.514570951461792e-08, + -2.8405338525772095e-08, + 1.83936208486557e-08, + -2.3283064365386963e-08, + 6.658956408500671e-08, + -1.2598466128110886e-06 + ], + "0.003": [ + 7.851049304008484e-07, + -5.613546818494797e-07, + 2.7706846594810486e-08, + -2.3329630494117737e-07, + -2.3283064365386963e-09, + 5.634501576423645e-08, + 3.5390257835388184e-07, + -4.384666681289673e-06 + ], + "0.01": [ + 2.975575625896454e-06, + -2.0687002688646317e-06, + -4.6100467443466187e-08, + -7.050111889839172e-07, + -2.468004822731018e-08, + 3.227032721042633e-07, + 9.997747838497162e-07, + -1.5316996723413467e-05 + ] + }, + "hidden_norms_per_layer": [ + 7347.82373046875, + 68318.359375, + 457658.875, + 591822.25, + 709413.875, + 821043.4375, + 991908.875, + 1041150.375, + 579088.875 + ], + "bp_grad_norms_per_layer": [ + 1.785214408300817e-05, + 1.5171449376794044e-06, + 8.703580078872619e-07, + 8.675402227709128e-07, + 8.419559094363649e-07, + 8.420378776463622e-07, + 8.421089319199382e-07, + 8.421496886512614e-07, + 8.267679163509456e-07 + ] + }, + "drift": { + "embed.weight": 70.02122616764989, + "embed.bias": 26.83916405091808, + "blocks.0.ln.weight": 1.552919864654541, + "blocks.0.w1.weight": 18.50821863761758, + "blocks.0.w1.bias": 13.116624873818962, + "blocks.0.w2.weight": 79.63032020884965, + "blocks.1.ln.weight": 1.213975429534912, + "blocks.1.w1.weight": 22.51105074783387, + "blocks.1.w1.bias": 18.50130957549983, + "blocks.1.w2.weight": 54.64548642342237, + "blocks.2.ln.weight": 0.9077885150909424, + "blocks.2.w1.weight": 19.292010431221883, + "blocks.2.w1.bias": 19.155835150578653, + "blocks.2.w2.weight": 42.4050815881541, + "blocks.3.ln.weight": 1.022047758102417, + "blocks.3.w1.weight": 21.369964293701976, + "blocks.3.w1.bias": 20.66189871439757, + "blocks.3.w2.weight": 58.43715847809599, + "blocks.4.ln.weight": 1.0435632467269897, + "blocks.4.w1.weight": 22.336963383887138, + "blocks.4.w1.bias": 21.287544536829213, + "blocks.4.w2.weight": 50.161958879048186, + "blocks.5.ln.weight": 0.9771403670310974, + "blocks.5.w1.weight": 24.321956602631435, + "blocks.5.w1.bias": 24.827000479032748, + "blocks.5.w2.weight": 55.476803346488026, + "blocks.6.ln.weight": 1.0375924110412598, + "blocks.6.w1.weight": 22.401649066561536, + "blocks.6.w1.bias": 23.5963789264557, + "blocks.6.w2.weight": 53.39931725577594, + "blocks.7.ln.weight": 1.1293227672576904, + "blocks.7.w1.weight": 24.029515618281994, + "blocks.7.w1.bias": 23.963806041807718, + "blocks.7.w2.weight": 67.90721756504135, + "out_ln.weight": 0.33976390957832336, + "out_head.weight": 5.645579973389043, + "out_head.bias": 1.2526826889976983 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 256, + "num_blocks": 8, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 0 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d256_L8_seed0", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file diff --git a/results/fa_dfa_d256_L8_seed1/results_cifar10.json b/results/fa_dfa_d256_L8_seed1/results_cifar10.json new file mode 100644 index 0000000..cd78ab9 --- /dev/null +++ b/results/fa_dfa_d256_L8_seed1/results_cifar10.json @@ -0,0 +1,881 @@ +{ + "1": { + "dfa": { + "log": { + "train_loss": [ + 2.073248834991455, + 2.0561216505432127, + 2.0487407315063475, + 2.0397916249084473, + 2.0339059049224852, + 2.031160478439331, + 2.0272507332992555, + 2.0234306407928466, + 2.0243458992767334, + 2.0212309561920168, + 2.0213267765045164, + 2.0173781090927125, + 2.015773401260376, + 2.016544706039429, + 2.0143174066925047, + 2.015236302452087, + 2.016163635253906, + 2.0134117998504637, + 2.0144200479125978, + 2.013800316390991, + 2.0096480722045897, + 2.0113335668945314, + 2.0122015085601808, + 2.0122210752868654, + 2.0115504538345337, + 2.007763221130371, + 2.009548883666992, + 2.0078995152282717, + 2.0087636469268797, + 2.008320099105835, + 2.008820813369751, + 2.006456883468628, + 2.0074588145446777, + 2.008500497131348, + 2.0084443688964844, + 2.0064106777954103, + 2.0071660092544557, + 2.0042392598724366, + 2.0080142588043213, + 2.0092139667510986, + 2.0066707161712647, + 2.0068252431869507, + 2.0088330657958986, + 2.00828530166626, + 2.0062371633148195, + 2.0070145973205564, + 2.006389614715576, + 2.008107543182373, + 2.0061909777069094, + 2.0069403369522094, + 2.0080043979644775, + 2.0067166803359986, + 2.0072613145446776, + 2.0074930825042725, + 2.0079901724243165, + 2.00686990776062, + 2.007503282737732, + 2.0056561468124388, + 2.0081290016937254, + 2.0048272270965577, + 2.006778687286377, + 2.0057738173675537, + 2.004890469207764, + 2.0073782594299314, + 2.0066935089874267, + 2.005761955795288, + 2.006959405899048, + 2.006399262161255, + 2.006497128829956, + 2.0050615126800535, + 2.006692824935913, + 2.0056320917510986, + 2.0048789993286134, + 2.005718688735962, + 2.0067832677459716, + 2.004570386314392, + 2.004494798736572, + 2.0044968630599977, + 2.0067117002105714, + 2.0063977341461183, + 2.0049595592880247, + 2.0041789445495604, + 2.0047897937774657, + 2.0072312893676756, + 2.004594275741577, + 2.004407970199585, + 2.0041916183471677, + 2.002536773300171, + 2.0047487148284913, + 2.0024493196105957, + 2.0030294651794436, + 2.0037170224761964, + 2.0036521311187743, + 2.002812971572876, + 2.00589838760376, + 2.0043148944854736, + 2.005489154701233, + 2.0043797174072266, + 2.0031765459442137, + 2.0041999047851564 + ], + "train_acc": [ + 0.23724, + 0.24082, + 0.24538, + 0.25098, + 0.25208, + 0.25444, + 0.25704, + 0.25828, + 0.25656, + 0.25916, + 0.26014, + 0.26036, + 0.26374, + 0.25984, + 0.26378, + 0.26424, + 0.26426, + 0.26496, + 0.26426, + 0.2631, + 0.2658, + 0.26536, + 0.2682, + 0.2644, + 0.26904, + 0.27, + 0.27042, + 0.26654, + 0.27128, + 0.2687, + 0.26822, + 0.27002, + 0.26908, + 0.27106, + 0.2686, + 0.27238, + 0.27072, + 0.27068, + 0.26996, + 0.26948, + 0.27084, + 0.2695, + 0.2713, + 0.27054, + 0.27256, + 0.26982, + 0.26974, + 0.27168, + 0.27108, + 0.27182, + 0.27238, + 0.27258, + 0.27288, + 0.27236, + 0.27066, + 0.27214, + 0.27302, + 0.27214, + 0.27026, + 0.2738, + 0.27384, + 0.2733, + 0.27542, + 0.27348, + 0.27172, + 0.27406, + 0.27318, + 0.2763, + 0.27302, + 0.27414, + 0.27384, + 0.27542, + 0.27476, + 0.27312, + 0.27358, + 0.275, + 0.27514, + 0.27526, + 0.27434, + 0.2745, + 0.27498, + 0.2763, + 0.27358, + 0.27466, + 0.27588, + 0.27678, + 0.27598, + 0.2766, + 0.27502, + 0.27544, + 0.27712, + 0.27632, + 0.27598, + 0.2788, + 0.27462, + 0.27336, + 0.27738, + 0.27462, + 0.27666, + 0.27568 + ], + "test_acc": [ + 0.2766, + 0.2536, + 0.2625, + 0.2542, + 0.2703, + 0.271, + 0.2674, + 0.2674, + 0.2539, + 0.2792, + 0.2744, + 0.2832, + 0.2755, + 0.2799, + 0.2729, + 0.2704, + 0.2736, + 0.2734, + 0.2651, + 0.2756, + 0.2709, + 0.2847, + 0.2691, + 0.2717, + 0.278, + 0.2623, + 0.2801, + 0.2833, + 0.2587, + 0.272, + 0.2868, + 0.2876, + 0.2743, + 0.2912, + 0.2824, + 0.2835, + 0.2831, + 0.2874, + 0.2945, + 0.2793, + 0.2786, + 0.2921, + 0.2922, + 0.2856, + 0.2997, + 0.28, + 0.2756, + 0.2834, + 0.29, + 0.2877, + 0.2853, + 0.279, + 0.2947, + 0.2922, + 0.284, + 0.2894, + 0.2943, + 0.2887, + 0.2787, + 0.2891, + 0.29, + 0.2823, + 0.2815, + 0.2846, + 0.2876, + 0.2899, + 0.2918, + 0.2927, + 0.2944, + 0.2979, + 0.2868, + 0.286, + 0.2894, + 0.293, + 0.2877, + 0.2914, + 0.2892, + 0.2938, + 0.2884, + 0.2871, + 0.2869, + 0.2841, + 0.2897, + 0.2911, + 0.2882, + 0.2856, + 0.2901, + 0.2928, + 0.2905, + 0.2922, + 0.2897, + 0.2898, + 0.2902, + 0.2899, + 0.291, + 0.2906, + 0.2903, + 0.2901, + 0.2899, + 0.29 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.39772462844848633, + -0.002370076021179557, + 0.0021920399740338326, + -0.0014136324170976877, + 0.001076650689356029, + 0.001179476035758853, + 0.0019159603398293257, + -0.00038467111880891025 + ], + "perturbation_rho": [ + -0.014354787766933441, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -3.3620744943618774e-07, + 0.0, + 2.7939677238464355e-09, + 1.862645149230957e-09, + 0.0, + 0.0, + -1.862645149230957e-09, + 0.0 + ], + "0.003": [ + -1.0924413800239563e-06, + 9.313225746154785e-10, + 2.7939677238464355e-09, + 1.862645149230957e-09, + 0.0, + 0.0, + -3.725290298461914e-09, + 0.0 + ], + "0.01": [ + -3.4980475902557373e-06, + -6.51925802230835e-09, + 1.862645149230957e-09, + 1.862645149230957e-09, + 0.0, + 9.313225746154785e-10, + -3.725290298461914e-09, + 1.862645149230957e-09 + ] + }, + "hidden_norms_per_layer": [ + 45727.35546875, + 263312528.0, + 537438144.0, + 664858176.0, + 845261312.0, + 1070211328.0, + 1238112640.0, + 1792965120.0, + 1877736448.0 + ], + "bp_grad_norms_per_layer": [ + 3.161038364396518e-07, + 7.485224107206534e-10, + 7.498183740572983e-10, + 7.446125938059822e-10, + 7.443420324548811e-10, + 7.443580751775869e-10, + 7.442703675586415e-10, + 7.441662841500829e-10, + 7.441969818167138e-10 + ] + }, + "drift": { + "embed.weight": 357.86461919115555, + "embed.bias": 312.95737180504705, + "blocks.0.ln.weight": 10.188159942626953, + "blocks.0.w1.weight": 211.37068163903083, + "blocks.0.w1.bias": 223.0519629100973, + "blocks.0.w2.weight": 475.30658612467033, + "blocks.1.ln.weight": 9.102700233459473, + "blocks.1.w1.weight": 234.88719578297977, + "blocks.1.w1.bias": 224.13271789865033, + "blocks.1.w2.weight": 330.0486356799854, + "blocks.2.ln.weight": 8.034659385681152, + "blocks.2.w1.weight": 218.84814555155938, + "blocks.2.w1.bias": 202.24920680355018, + "blocks.2.w2.weight": 322.67560949424507, + "blocks.3.ln.weight": 8.901957511901855, + "blocks.3.w1.weight": 255.31555530937166, + "blocks.3.w1.bias": 231.65373117308073, + "blocks.3.w2.weight": 349.8458716057918, + "blocks.4.ln.weight": 9.597921371459961, + "blocks.4.w1.weight": 268.9594747130486, + "blocks.4.w1.bias": 248.71219274081335, + "blocks.4.w2.weight": 348.3370635956996, + "blocks.5.ln.weight": 8.862635612487793, + "blocks.5.w1.weight": 246.04530310618395, + "blocks.5.w1.bias": 236.82548627938067, + "blocks.5.w2.weight": 334.66771305862346, + "blocks.6.ln.weight": 11.917081832885742, + "blocks.6.w1.weight": 333.54126147765857, + "blocks.6.w1.bias": 313.5525629552238, + "blocks.6.w2.weight": 456.098261674838, + "blocks.7.ln.weight": 8.582963943481445, + "blocks.7.w1.weight": 238.44273828859957, + "blocks.7.w1.bias": 233.1627077875165, + "blocks.7.w2.weight": 333.95054465825257, + "out_ln.weight": 0.8174178600311279, + "out_head.weight": 9.070668717195536, + "out_head.bias": 0.3922775460932503 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.0616466424560547, + 2.0068159678649904, + 1.9794388083648682, + 1.9586813756561279, + 1.9426360791778565, + 1.9318589163589477, + 1.9205885326766967, + 1.9138255990600586, + 1.9087649211120605, + 1.9012188312149048, + 1.8993849408340455, + 1.889568119506836, + 1.8889932192230225, + 1.886743229675293, + 1.881684862098694, + 1.8811524909210204, + 1.8822594582366943, + 1.8778549640655517, + 1.8765599794769288, + 1.8783365441131592, + 1.8720827561950684, + 1.8732831771087646, + 1.8713359966659546, + 1.8697589547729492, + 1.8710370569610595, + 1.8677758638763429, + 1.8653087866973876, + 1.8623830352783204, + 1.860157603187561, + 1.8587954444885253, + 1.8555354092788696, + 1.851196873703003, + 1.8507367937850951, + 1.847343747253418, + 1.8439307402801515, + 1.8404593259048463, + 1.8425774618530273, + 1.834388339767456, + 1.838042152633667, + 1.8388391250610352, + 1.8344174416732788, + 1.8349179996109009, + 1.8346392430877685, + 1.834155149269104, + 1.830893579940796, + 1.83102765045166, + 1.8300759775543214, + 1.8317738976287843, + 1.8306509511947633, + 1.8280337787246703, + 1.8300961248779297, + 1.8254442840576173, + 1.827361393814087, + 1.8285963610839844, + 1.8233610538101197, + 1.8227714263534547, + 1.825165744857788, + 1.8187065335083008, + 1.8204853109359742, + 1.8197687387084962, + 1.8170435654449464, + 1.8141087036132812, + 1.8137724670410156, + 1.8160163790893555, + 1.8154231036376953, + 1.8116517877197265, + 1.81113690158844, + 1.8139338250732422, + 1.8109377099990844, + 1.8099997354125976, + 1.808261435623169, + 1.8079131549835206, + 1.8053869789886474, + 1.808599826927185, + 1.8066932723999023, + 1.8063899947738646, + 1.8004683576202392, + 1.803932279701233, + 1.8084900228500367, + 1.8048654077911377, + 1.8045777291107177, + 1.805527420272827, + 1.8058658325576782, + 1.8057460729980468, + 1.8027491494750976, + 1.7995332913589477, + 1.8038537915039063, + 1.8010695806884767, + 1.801180100402832, + 1.799683776512146, + 1.800211155319214, + 1.8025256408309938, + 1.801271418762207, + 1.8011763118743898, + 1.802209068031311, + 1.8032223471450806, + 1.8022766012191773, + 1.8044791673278808, + 1.799467188949585, + 1.8024462516021729 + ], + "train_acc": [ + 0.23552, + 0.25616, + 0.27266, + 0.2833, + 0.2882, + 0.29386, + 0.29772, + 0.30354, + 0.30432, + 0.3065, + 0.30664, + 0.31514, + 0.3148, + 0.31538, + 0.32214, + 0.31962, + 0.3197, + 0.32268, + 0.32306, + 0.32158, + 0.32436, + 0.32346, + 0.32862, + 0.32786, + 0.33004, + 0.33162, + 0.33204, + 0.33184, + 0.3347, + 0.33274, + 0.3368, + 0.33864, + 0.33874, + 0.33938, + 0.34288, + 0.34424, + 0.34244, + 0.34636, + 0.34196, + 0.34354, + 0.34362, + 0.34602, + 0.34536, + 0.3441, + 0.34752, + 0.34754, + 0.34766, + 0.34916, + 0.34726, + 0.34842, + 0.34642, + 0.34936, + 0.3475, + 0.34798, + 0.34848, + 0.35126, + 0.34852, + 0.3516, + 0.35024, + 0.34976, + 0.35364, + 0.3529, + 0.35252, + 0.35444, + 0.35268, + 0.35314, + 0.35424, + 0.3535, + 0.3522, + 0.35664, + 0.35702, + 0.35622, + 0.35608, + 0.35676, + 0.35834, + 0.35704, + 0.359, + 0.35584, + 0.35488, + 0.35586, + 0.35672, + 0.35682, + 0.3565, + 0.35698, + 0.3576, + 0.35854, + 0.35728, + 0.3583, + 0.35986, + 0.36106, + 0.35794, + 0.3601, + 0.35654, + 0.3593, + 0.35772, + 0.3568, + 0.35766, + 0.35846, + 0.3578, + 0.36002 + ], + "test_acc": [ + 0.2672, + 0.262, + 0.2993, + 0.3091, + 0.3147, + 0.3179, + 0.3358, + 0.33, + 0.3176, + 0.34, + 0.34, + 0.3502, + 0.3515, + 0.3545, + 0.3441, + 0.3566, + 0.3555, + 0.3536, + 0.3524, + 0.3667, + 0.3434, + 0.3625, + 0.3608, + 0.365, + 0.3656, + 0.3608, + 0.372, + 0.376, + 0.3609, + 0.3628, + 0.3688, + 0.3727, + 0.3679, + 0.3753, + 0.372, + 0.3693, + 0.3738, + 0.3656, + 0.3765, + 0.3747, + 0.3699, + 0.3733, + 0.3738, + 0.3738, + 0.382, + 0.3755, + 0.3781, + 0.3784, + 0.3822, + 0.3797, + 0.3736, + 0.379, + 0.3821, + 0.3765, + 0.3776, + 0.3751, + 0.3767, + 0.3844, + 0.3724, + 0.3812, + 0.3826, + 0.3806, + 0.3836, + 0.3805, + 0.3826, + 0.3838, + 0.382, + 0.3868, + 0.389, + 0.3827, + 0.3814, + 0.3811, + 0.3798, + 0.3848, + 0.3887, + 0.3859, + 0.3839, + 0.3853, + 0.3844, + 0.3848, + 0.382, + 0.3853, + 0.3875, + 0.39, + 0.3858, + 0.3851, + 0.3881, + 0.3862, + 0.3857, + 0.3878, + 0.3861, + 0.3847, + 0.386, + 0.3856, + 0.3861, + 0.3856, + 0.3853, + 0.3859, + 0.3863, + 0.3864 + ] + }, + "diagnostics": { + "bp_cosine": [ + -0.004850233439356089, + 0.02608177438378334, + -0.07089883089065552, + -0.018480747938156128, + -0.021952074021100998, + -0.0742325484752655, + 0.014702335000038147, + 0.9978023171424866 + ], + "perturbation_rho": [ + 0.006630884483456612, + 0.04272107779979706, + 0.011450880207121372, + -0.02322380244731903, + -0.023082010447978973, + 0.005496563855558634, + 0.01337945181876421, + 0.0004176963120698929 + ], + "nudging": { + "0.001": [ + 1.0048970580101013e-06, + -9.452924132347107e-08, + 7.194466888904572e-08, + 8.987262845039368e-08, + 1.6298145055770874e-09, + 1.0640360414981842e-07, + -1.5133991837501526e-08, + -1.0149087756872177e-06 + ], + "0.003": [ + 3.1320378184318542e-06, + -1.778826117515564e-07, + 2.0326115190982819e-07, + 6.426125764846802e-08, + 8.940696716308594e-08, + 2.9616057872772217e-07, + -5.820766091346741e-08, + -3.819353878498077e-06 + ], + "0.01": [ + 1.0303221642971039e-05, + -9.301584213972092e-07, + 9.851064532995224e-07, + 3.310851752758026e-07, + 2.6961788535118103e-07, + 1.0957010090351105e-06, + -2.2887252271175385e-07, + -1.3955170288681984e-05 + ] + }, + "hidden_norms_per_layer": [ + 8388.6279296875, + 78234.5703125, + 285077.09375, + 690526.125, + 807174.625, + 983118.0625, + 1164967.375, + 1221883.25, + 645722.0 + ], + "bp_grad_norms_per_layer": [ + 1.7088063032133505e-05, + 1.4101407259659027e-06, + 7.534777637374646e-07, + 7.644222819180868e-07, + 7.630256391166768e-07, + 7.645797950317501e-07, + 7.790439440213959e-07, + 7.754296120765503e-07, + 7.678858651161136e-07 + ] + }, + "drift": { + "embed.weight": 74.95033008101261, + "embed.bias": 26.33394267321031, + "blocks.0.ln.weight": 1.718515396118164, + "blocks.0.w1.weight": 19.27588868651768, + "blocks.0.w1.bias": 16.597455066774533, + "blocks.0.w2.weight": 78.15464599428759, + "blocks.1.ln.weight": 1.166561484336853, + "blocks.1.w1.weight": 22.94470041655997, + "blocks.1.w1.bias": 19.67519900380536, + "blocks.1.w2.weight": 48.26877331154166, + "blocks.2.ln.weight": 0.8080865740776062, + "blocks.2.w1.weight": 22.880080354832984, + "blocks.2.w1.bias": 24.117736850367468, + "blocks.2.w2.weight": 35.99124940422002, + "blocks.3.ln.weight": 0.7484873533248901, + "blocks.3.w1.weight": 19.064483285445874, + "blocks.3.w1.bias": 19.658006710932415, + "blocks.3.w2.weight": 33.07320462824024, + "blocks.4.ln.weight": 0.8579117059707642, + "blocks.4.w1.weight": 21.835583499123793, + "blocks.4.w1.bias": 22.344412303522038, + "blocks.4.w2.weight": 37.98679923879473, + "blocks.5.ln.weight": 0.8886668682098389, + "blocks.5.w1.weight": 21.7450742777726, + "blocks.5.w1.bias": 23.066791818306008, + "blocks.5.w2.weight": 39.601364504544236, + "blocks.6.ln.weight": 0.8415105938911438, + "blocks.6.w1.weight": 19.797554767242907, + "blocks.6.w1.bias": 20.68672501454032, + "blocks.6.w2.weight": 35.63045020577285, + "blocks.7.ln.weight": 1.1435216665267944, + "blocks.7.w1.weight": 23.68255893921491, + "blocks.7.w1.bias": 24.7022235520292, + "blocks.7.w2.weight": 50.913078873963286, + "out_ln.weight": 0.38547876477241516, + "out_head.weight": 5.732715293326757, + "out_head.bias": 0.752175082244237 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 256, + "num_blocks": 8, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 1 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d256_L8_seed1", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file diff --git a/results/fa_dfa_d256_L8_seed2/results_cifar10.json b/results/fa_dfa_d256_L8_seed2/results_cifar10.json new file mode 100644 index 0000000..d34561f --- /dev/null +++ b/results/fa_dfa_d256_L8_seed2/results_cifar10.json @@ -0,0 +1,881 @@ +{ + "2": { + "dfa": { + "log": { + "train_loss": [ + 2.0886882679748533, + 2.068717489356995, + 2.0682498848724364, + 2.069725094680786, + 2.070216057891846, + 2.0698515621948244, + 2.0687220024108885, + 2.070030203552246, + 2.0674163632965086, + 2.0676918890380858, + 2.0673313298797606, + 2.064816045455933, + 2.0655937913513185, + 2.0663441319274902, + 2.0637396141815185, + 2.062943668060303, + 2.0631902577209473, + 2.0599184128189085, + 2.061999180755615, + 2.06542762260437, + 2.0647084296417235, + 2.065485874633789, + 2.0667521308898924, + 2.0664935392761232, + 2.0673532804107664, + 2.0684957303619385, + 2.066869210281372, + 2.067966806564331, + 2.0697209989929197, + 2.070055845565796, + 2.0691994259643556, + 2.0712842699432374, + 2.0721969300079346, + 2.0708646242523194, + 2.071090523452759, + 2.0724222064208986, + 2.0733718238830567, + 2.0746167260742188, + 2.075273102645874, + 2.0737432322692873, + 2.0741693158721923, + 2.0734868059539795, + 2.074445921783447, + 2.074121329498291, + 2.07596641456604, + 2.077426067504883, + 2.076066472320557, + 2.0742275380706787, + 2.076295921859741, + 2.076496145629883, + 2.0769809690856933, + 2.077219148712158, + 2.0779221090698243, + 2.0786613733673094, + 2.0790778132629395, + 2.077897124710083, + 2.0780438188934327, + 2.077808642272949, + 2.077776354217529, + 2.0779004010772706, + 2.079731927947998, + 2.0795092359924316, + 2.077359133682251, + 2.080645820541382, + 2.0798381079101564, + 2.0776988359069826, + 2.0795934693145752, + 2.080012099685669, + 2.0793860891723632, + 2.0781388677215578, + 2.0804970370483398, + 2.078412676239014, + 2.0773618979644777, + 2.07952963760376, + 2.0799946071624755, + 2.0800561015319823, + 2.0800602492523192, + 2.0796355754089357, + 2.0790224867248535, + 2.078683383331299, + 2.080645339508057, + 2.0792727799224853, + 2.0785955645751955, + 2.0800429849243165, + 2.078200587310791, + 2.0787713687896727, + 2.079665564956665, + 2.0787830973052976, + 2.0781467710113524, + 2.0787809454345703, + 2.0801569499206543, + 2.0799850133514406, + 2.07929296005249, + 2.079213014297485, + 2.0788697573852537, + 2.0782224908828737, + 2.080356029815674, + 2.078121220932007, + 2.078316292877197, + 2.077635722503662 + ], + "train_acc": [ + 0.23002, + 0.23622, + 0.2345, + 0.2358, + 0.23586, + 0.23384, + 0.2376, + 0.23746, + 0.23776, + 0.23716, + 0.24034, + 0.2422, + 0.24098, + 0.23872, + 0.24112, + 0.2408, + 0.24232, + 0.24592, + 0.24418, + 0.24304, + 0.24244, + 0.24248, + 0.24314, + 0.24312, + 0.2393, + 0.24304, + 0.24282, + 0.2429, + 0.24196, + 0.24328, + 0.24174, + 0.24158, + 0.24166, + 0.2407, + 0.24348, + 0.24048, + 0.24018, + 0.23818, + 0.24048, + 0.23914, + 0.2415, + 0.2415, + 0.24158, + 0.24146, + 0.24178, + 0.24136, + 0.24252, + 0.2435, + 0.24262, + 0.23852, + 0.24158, + 0.23898, + 0.23892, + 0.24054, + 0.24006, + 0.23972, + 0.24122, + 0.24014, + 0.24046, + 0.24122, + 0.23972, + 0.23902, + 0.24022, + 0.23936, + 0.2416, + 0.24268, + 0.2365, + 0.23816, + 0.23982, + 0.24116, + 0.24232, + 0.24096, + 0.24274, + 0.23918, + 0.23968, + 0.24092, + 0.23912, + 0.2421, + 0.2393, + 0.24116, + 0.24206, + 0.2393, + 0.2402, + 0.23988, + 0.2409, + 0.24032, + 0.24144, + 0.2411, + 0.24234, + 0.24238, + 0.23996, + 0.2394, + 0.23952, + 0.23958, + 0.24116, + 0.2415, + 0.23976, + 0.24184, + 0.24056, + 0.24148 + ], + "test_acc": [ + 0.2516, + 0.257, + 0.242, + 0.2516, + 0.2568, + 0.2492, + 0.247, + 0.2534, + 0.2538, + 0.2516, + 0.259, + 0.2616, + 0.241, + 0.2622, + 0.2649, + 0.263, + 0.254, + 0.2557, + 0.2605, + 0.2406, + 0.2404, + 0.2602, + 0.2541, + 0.2557, + 0.2459, + 0.2537, + 0.2639, + 0.2592, + 0.2641, + 0.2661, + 0.2526, + 0.2479, + 0.2497, + 0.2529, + 0.2595, + 0.2577, + 0.2523, + 0.2523, + 0.2578, + 0.2396, + 0.2647, + 0.2567, + 0.2657, + 0.2477, + 0.259, + 0.2492, + 0.2469, + 0.246, + 0.2515, + 0.2638, + 0.2481, + 0.2607, + 0.2516, + 0.2447, + 0.2457, + 0.2547, + 0.2481, + 0.2426, + 0.2434, + 0.2491, + 0.2579, + 0.2605, + 0.2537, + 0.2654, + 0.2601, + 0.2484, + 0.249, + 0.2515, + 0.2555, + 0.2502, + 0.2538, + 0.258, + 0.2569, + 0.254, + 0.2533, + 0.2544, + 0.2589, + 0.2566, + 0.2566, + 0.2521, + 0.2485, + 0.254, + 0.2493, + 0.2517, + 0.2532, + 0.252, + 0.2492, + 0.2531, + 0.2524, + 0.2491, + 0.2538, + 0.2526, + 0.251, + 0.2548, + 0.2537, + 0.2541, + 0.2543, + 0.2532, + 0.2533, + 0.2533 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.20111382007598877, + 0.0004621353873517364, + -0.0012605632655322552, + 5.168101051822305e-05, + -0.0015679890057072043, + 0.0019785298500210047, + 0.0006557029555551708, + 0.0007036488968878984 + ], + "perturbation_rho": [ + -0.02028709650039673, + 0.0, + 0.0, + 0.0, + -0.0007397841545753181, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -1.695007085800171e-07, + 0.0, + 0.0, + 0.0, + 9.313225746154785e-10, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -3.8463622331619263e-07, + 0.0, + 0.0, + 0.0, + 9.313225746154785e-10, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -1.1408701539039612e-06, + 0.0, + 0.0, + 0.0, + 2.7939677238464355e-09, + 0.0, + 0.0, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 45184.2421875, + 1068381440.0, + 1778018944.0, + 2116394752.0, + 2182007296.0, + 2562655488.0, + 2566907136.0, + 2985686272.0, + 3052116480.0 + ], + "bp_grad_norms_per_layer": [ + 1.4686037275168928e-07, + 6.425820986422082e-10, + 6.384120454505648e-10, + 6.389382356530859e-10, + 6.396255192164801e-10, + 6.406807306902351e-10, + 6.405915242702065e-10, + 6.406805641567814e-10, + 6.40737352064491e-10 + ] + }, + "drift": { + "embed.weight": 418.6113426367787, + "embed.bias": 437.5668398161194, + "blocks.0.ln.weight": 10.016462326049805, + "blocks.0.w1.weight": 315.59584333536606, + "blocks.0.w1.bias": 338.3304059680062, + "blocks.0.w2.weight": 607.5484294308882, + "blocks.1.ln.weight": 11.81142807006836, + "blocks.1.w1.weight": 335.3681839945845, + "blocks.1.w1.bias": 316.5131919451052, + "blocks.1.w2.weight": 481.7556503680824, + "blocks.2.ln.weight": 10.658580780029297, + "blocks.2.w1.weight": 312.4649885568403, + "blocks.2.w1.bias": 293.43148423074234, + "blocks.2.w2.weight": 428.14647887743376, + "blocks.3.ln.weight": 9.960590362548828, + "blocks.3.w1.weight": 241.4254307025639, + "blocks.3.w1.bias": 217.00406676688746, + "blocks.3.w2.weight": 345.4810531131192, + "blocks.4.ln.weight": 11.285172462463379, + "blocks.4.w1.weight": 325.03446821544236, + "blocks.4.w1.bias": 313.6381406887335, + "blocks.4.w2.weight": 455.8087708526798, + "blocks.5.ln.weight": 8.04609489440918, + "blocks.5.w1.weight": 215.75566383550878, + "blocks.5.w1.bias": 207.62392148123524, + "blocks.5.w2.weight": 294.26989490791993, + "blocks.6.ln.weight": 11.42038631439209, + "blocks.6.w1.weight": 327.5629239121232, + "blocks.6.w1.bias": 305.4508358753746, + "blocks.6.w2.weight": 477.6831296534087, + "blocks.7.ln.weight": 8.823966979980469, + "blocks.7.w1.weight": 247.08109127723426, + "blocks.7.w1.bias": 236.30426713334924, + "blocks.7.w2.weight": 344.7289213462053, + "out_ln.weight": 0.8588130474090576, + "out_head.weight": 8.666066942387875, + "out_head.bias": 1.137642620895565 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.0575581959533693, + 1.982899369468689, + 1.949610625076294, + 1.9338219098281861, + 1.9279949721527099, + 1.9190433423614501, + 1.9120057677459716, + 1.9073227895355225, + 1.9010615240478517, + 1.8946472421264648, + 1.8895224981307983, + 1.8831638266372681, + 1.8827376470184327, + 1.8782546444320678, + 1.87282203704834, + 1.8714419341278077, + 1.867955844154358, + 1.8605742862701415, + 1.8630245657348632, + 1.8617111119842529, + 1.858997180404663, + 1.8575258152770997, + 1.853901057510376, + 1.851965792312622, + 1.849754028968811, + 1.8477436004638672, + 1.8454922165679932, + 1.8445022313690185, + 1.8395921088409424, + 1.8344731448745728, + 1.8315038693237304, + 1.8307365502929687, + 1.8269501232528687, + 1.824952223854065, + 1.816781127052307, + 1.815732438735962, + 1.81667452003479, + 1.8134329107666016, + 1.8117710926055908, + 1.8103601391220092, + 1.8055513787841797, + 1.8014423720550536, + 1.8024774380111694, + 1.7986959079360962, + 1.7953778457641603, + 1.7985560263442992, + 1.7965025356674194, + 1.7882651119613648, + 1.7905249483489991, + 1.789016272354126, + 1.7867180441665649, + 1.787857604675293, + 1.7842913775253295, + 1.7844246352386475, + 1.7800830935287475, + 1.7805310357284545, + 1.7760378722763062, + 1.7760090646743774, + 1.7754922837066651, + 1.7761539294815063, + 1.7764367975234985, + 1.7729884731292724, + 1.7713473574829102, + 1.7740464125823974, + 1.775451347427368, + 1.771372494544983, + 1.7704123861312866, + 1.774370786743164, + 1.770715291481018, + 1.7711526647949218, + 1.7708271207046509, + 1.7675350022125245, + 1.7677011463165284, + 1.7680682544708253, + 1.769882447128296, + 1.7696199462127686, + 1.7676654192352295, + 1.7694056000137328, + 1.769216322402954, + 1.7660821619415283, + 1.766729666442871, + 1.7692560305786134, + 1.7672280679702759, + 1.766029937133789, + 1.7672864461517335, + 1.7673147164535523, + 1.766981872253418, + 1.7642782439804077, + 1.765021160812378, + 1.7666088375091553, + 1.7657413306045533, + 1.7673192971801759, + 1.7683989611434936, + 1.7645942099380494, + 1.7642021070098877, + 1.7653449111175537, + 1.7663517474365233, + 1.7659291017913819, + 1.7664799518203735, + 1.763619673538208 + ], + "train_acc": [ + 0.23966, + 0.27212, + 0.28776, + 0.29538, + 0.30048, + 0.30252, + 0.30566, + 0.30792, + 0.3088, + 0.31204, + 0.31266, + 0.31664, + 0.31756, + 0.31608, + 0.32266, + 0.32434, + 0.32378, + 0.3291, + 0.32652, + 0.32948, + 0.33328, + 0.33224, + 0.33312, + 0.33614, + 0.33746, + 0.33856, + 0.3381, + 0.33856, + 0.34324, + 0.3451, + 0.34418, + 0.34646, + 0.34888, + 0.34762, + 0.35072, + 0.35438, + 0.35212, + 0.35348, + 0.35384, + 0.35278, + 0.35586, + 0.35542, + 0.3554, + 0.35636, + 0.35712, + 0.35806, + 0.35824, + 0.36148, + 0.35998, + 0.36142, + 0.361, + 0.35922, + 0.36284, + 0.36258, + 0.3628, + 0.3614, + 0.36514, + 0.36556, + 0.36786, + 0.36512, + 0.36446, + 0.36606, + 0.36762, + 0.3651, + 0.3634, + 0.3675, + 0.36398, + 0.36604, + 0.36612, + 0.36696, + 0.3681, + 0.36822, + 0.37026, + 0.36726, + 0.36898, + 0.36782, + 0.36792, + 0.36732, + 0.3675, + 0.36906, + 0.3676, + 0.36604, + 0.36928, + 0.36834, + 0.36678, + 0.36774, + 0.37054, + 0.36758, + 0.36964, + 0.36992, + 0.36908, + 0.37016, + 0.36498, + 0.36648, + 0.37084, + 0.37, + 0.36922, + 0.3694, + 0.36746, + 0.37072 + ], + "test_acc": [ + 0.2762, + 0.2993, + 0.3248, + 0.3131, + 0.3217, + 0.324, + 0.3154, + 0.3369, + 0.3261, + 0.3276, + 0.3409, + 0.3459, + 0.327, + 0.3408, + 0.3397, + 0.3309, + 0.3524, + 0.3538, + 0.3522, + 0.3438, + 0.3473, + 0.3506, + 0.3502, + 0.3546, + 0.3579, + 0.3615, + 0.3654, + 0.3661, + 0.3669, + 0.3655, + 0.3694, + 0.3655, + 0.3657, + 0.3646, + 0.3701, + 0.3684, + 0.3688, + 0.3636, + 0.3718, + 0.3745, + 0.3806, + 0.3699, + 0.3824, + 0.3806, + 0.3814, + 0.3851, + 0.3805, + 0.3745, + 0.3867, + 0.3821, + 0.3766, + 0.3863, + 0.3824, + 0.38, + 0.3827, + 0.3808, + 0.3811, + 0.3814, + 0.3822, + 0.3857, + 0.3854, + 0.3832, + 0.3813, + 0.3866, + 0.3831, + 0.3794, + 0.384, + 0.3861, + 0.385, + 0.3877, + 0.386, + 0.3854, + 0.3875, + 0.3869, + 0.3868, + 0.388, + 0.3888, + 0.3899, + 0.3869, + 0.3904, + 0.3891, + 0.3886, + 0.386, + 0.39, + 0.3882, + 0.3885, + 0.3862, + 0.3886, + 0.39, + 0.3888, + 0.3891, + 0.3892, + 0.3888, + 0.3899, + 0.391, + 0.3904, + 0.3893, + 0.3894, + 0.3893, + 0.3894 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.03124001994729042, + 0.08908016234636307, + 0.04665760695934296, + -0.04777219519019127, + -0.07556367665529251, + -0.0461275577545166, + -0.03507336974143982, + 0.9990285634994507 + ], + "perturbation_rho": [ + 0.027517501264810562, + 0.010178674943745136, + -0.034707143902778625, + 0.005213148891925812, + -0.07512903958559036, + -0.00475400872528553, + 9.842216968536377e-06, + 0.02905265800654888 + ], + "nudging": { + "0.001": [ + -1.698266714811325e-06, + -2.1292362362146378e-07, + 3.958120942115784e-08, + 4.831235855817795e-08, + 8.055940270423889e-08, + 2.3748725652694702e-08, + 5.098991096019745e-08, + -9.502982720732689e-07 + ], + "0.003": [ + -5.119014531373978e-06, + -9.03732143342495e-07, + -1.7078127712011337e-07, + 2.859160304069519e-07, + 2.7811620384454727e-07, + 2.0384322851896286e-07, + 1.5203841030597687e-07, + -3.3261021599173546e-06 + ], + "0.01": [ + -1.702900044620037e-05, + -3.168359398841858e-06, + -6.603077054023743e-07, + 5.726469680666924e-07, + 8.258502930402756e-07, + 5.200272426009178e-07, + 3.9814040064811707e-07, + -1.166516449302435e-05 + ] + }, + "hidden_norms_per_layer": [ + 6265.44482421875, + 53904.69921875, + 118099.265625, + 566143.375, + 935321.5, + 1106561.75, + 1203173.875, + 1238320.5, + 740976.125 + ], + "bp_grad_norms_per_layer": [ + 2.1726980776293203e-05, + 1.636821252759546e-06, + 7.93243316365988e-07, + 6.795298759243451e-07, + 6.810526542722073e-07, + 6.809283945585776e-07, + 6.807385943830013e-07, + 6.802036978115211e-07, + 6.770658842469857e-07 + ] + }, + "drift": { + "embed.weight": 63.21845610957774, + "embed.bias": 25.90407451226601, + "blocks.0.ln.weight": 1.8451513051986694, + "blocks.0.w1.weight": 19.346207451992306, + "blocks.0.w1.bias": 14.068566093574962, + "blocks.0.w2.weight": 82.80276521804117, + "blocks.1.ln.weight": 1.3143744468688965, + "blocks.1.w1.weight": 20.043434543563325, + "blocks.1.w1.bias": 13.614899313814618, + "blocks.1.w2.weight": 70.67866920669934, + "blocks.2.ln.weight": 1.2612940073013306, + "blocks.2.w1.weight": 24.037240913049125, + "blocks.2.w1.bias": 22.307086193211962, + "blocks.2.w2.weight": 47.56871336729849, + "blocks.3.ln.weight": 1.0761433839797974, + "blocks.3.w1.weight": 25.473873360543205, + "blocks.3.w1.bias": 25.35175214203562, + "blocks.3.w2.weight": 40.85251406771216, + "blocks.4.ln.weight": 0.9428171515464783, + "blocks.4.w1.weight": 23.293363798467443, + "blocks.4.w1.bias": 24.428185562790627, + "blocks.4.w2.weight": 37.128127593959505, + "blocks.5.ln.weight": 0.8093754649162292, + "blocks.5.w1.weight": 19.89014687425912, + "blocks.5.w1.bias": 21.460830616828144, + "blocks.5.w2.weight": 34.639394288754545, + "blocks.6.ln.weight": 0.7578156590461731, + "blocks.6.w1.weight": 17.50016127400267, + "blocks.6.w1.bias": 18.172019570522167, + "blocks.6.w2.weight": 28.78778456757253, + "blocks.7.ln.weight": 0.9525559544563293, + "blocks.7.w1.weight": 21.92857716495514, + "blocks.7.w1.bias": 22.77505483247343, + "blocks.7.w2.weight": 58.30518938223301, + "out_ln.weight": 0.38212674856185913, + "out_head.weight": 5.330482873019479, + "out_head.bias": 0.8164980229115041 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 256, + "num_blocks": 8, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 2 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d256_L8_seed2", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file diff --git a/results/fa_dfa_d256_L8_seed3/results_cifar10.json b/results/fa_dfa_d256_L8_seed3/results_cifar10.json new file mode 100644 index 0000000..5c3b9f9 --- /dev/null +++ b/results/fa_dfa_d256_L8_seed3/results_cifar10.json @@ -0,0 +1,881 @@ +{ + "3": { + "dfa": { + "log": { + "train_loss": [ + 2.068615023956299, + 2.047176646156311, + 2.045687096939087, + 2.041560323257446, + 2.039340658111572, + 2.037013464202881, + 2.035925048980713, + 2.0311173275756835, + 2.032121988372803, + 2.028200259399414, + 2.0268639810943605, + 2.0260205685043333, + 2.023872131958008, + 2.022557463645935, + 2.019715512161255, + 2.0188200274276733, + 2.0172054164886473, + 2.017824410171509, + 2.017126524734497, + 2.0166522624969483, + 2.0137256134414674, + 2.0104475395965578, + 2.014379795913696, + 2.014344210357666, + 2.0130453625488283, + 2.0132340604400634, + 2.015250590057373, + 2.0126567953872683, + 2.011817781829834, + 2.011758652381897, + 2.012901442489624, + 2.010367298049927, + 2.01117289024353, + 2.0130296683502196, + 2.011817899551392, + 2.0138639054107665, + 2.0104484258270263, + 2.01166593460083, + 2.0109901863861084, + 2.009072921295166, + 2.0114363181304933, + 2.0125079582214354, + 2.0117907460784914, + 2.0110322050476075, + 2.0118529551696778, + 2.0105218141174315, + 2.0101245944213866, + 2.011210913467407, + 2.00997705657959, + 2.0106901919555664, + 2.0133068067932127, + 2.010482346534729, + 2.01014770980835, + 2.013678374786377, + 2.0105705118560793, + 2.009467424964905, + 2.0119786975097655, + 2.012015775146484, + 2.0094030991363527, + 2.012004408950806, + 2.0111507415008547, + 2.0106979748535156, + 2.008728541183472, + 2.010878988723755, + 2.009884360809326, + 2.0100272506713868, + 2.010221890258789, + 2.0115498251342774, + 2.012461339263916, + 2.010008108291626, + 2.009727705307007, + 2.0100395127105712, + 2.009380127105713, + 2.0105259854888917, + 2.0112087518310546, + 2.0097919299316405, + 2.009896908798218, + 2.0096029611968995, + 2.0109228817749023, + 2.0104407540130613, + 2.008896068954468, + 2.010570963516235, + 2.0119053901672364, + 2.010287576522827, + 2.008419459609985, + 2.0072363785171508, + 2.00801055557251, + 2.008693112564087, + 2.0080592823791505, + 2.0085273458862303, + 2.009861908569336, + 2.0077927374267577, + 2.009273677444458, + 2.008448128814697, + 2.00934712600708, + 2.0098364579772947, + 2.0096740563964843, + 2.009671881637573, + 2.0100610285949707, + 2.0068935653305053 + ], + "train_acc": [ + 0.23866, + 0.24262, + 0.24576, + 0.25012, + 0.25268, + 0.25266, + 0.25262, + 0.25804, + 0.2571, + 0.25908, + 0.26094, + 0.26028, + 0.26044, + 0.26054, + 0.262, + 0.26448, + 0.26688, + 0.26672, + 0.266, + 0.26744, + 0.26834, + 0.26974, + 0.26904, + 0.2705, + 0.26832, + 0.26774, + 0.26668, + 0.26762, + 0.267, + 0.27284, + 0.27026, + 0.27038, + 0.27058, + 0.27046, + 0.27236, + 0.26896, + 0.27174, + 0.27068, + 0.27064, + 0.2734, + 0.27182, + 0.27226, + 0.27142, + 0.2746, + 0.27316, + 0.27024, + 0.27572, + 0.27118, + 0.2743, + 0.27594, + 0.27086, + 0.27004, + 0.27266, + 0.26996, + 0.27428, + 0.27528, + 0.27046, + 0.27342, + 0.27552, + 0.27356, + 0.27532, + 0.27676, + 0.27258, + 0.27636, + 0.27536, + 0.27252, + 0.2765, + 0.27326, + 0.2722, + 0.27532, + 0.27574, + 0.2746, + 0.27538, + 0.27316, + 0.2756, + 0.27282, + 0.27362, + 0.27488, + 0.2711, + 0.27402, + 0.27544, + 0.27228, + 0.27592, + 0.2756, + 0.27716, + 0.27642, + 0.27756, + 0.27424, + 0.27854, + 0.27518, + 0.27258, + 0.2778, + 0.27602, + 0.277, + 0.27468, + 0.27454, + 0.27424, + 0.27666, + 0.27484, + 0.27578 + ], + "test_acc": [ + 0.2605, + 0.2721, + 0.2761, + 0.2552, + 0.2751, + 0.2671, + 0.2772, + 0.279, + 0.2708, + 0.2664, + 0.2807, + 0.2803, + 0.2787, + 0.2911, + 0.2852, + 0.2856, + 0.2844, + 0.2747, + 0.2802, + 0.2843, + 0.294, + 0.2912, + 0.2858, + 0.276, + 0.2842, + 0.2825, + 0.2921, + 0.2849, + 0.289, + 0.2758, + 0.2964, + 0.2852, + 0.2834, + 0.2877, + 0.2999, + 0.2849, + 0.293, + 0.2873, + 0.2934, + 0.2883, + 0.293, + 0.2895, + 0.2914, + 0.292, + 0.3002, + 0.2865, + 0.2926, + 0.2945, + 0.2989, + 0.2977, + 0.2916, + 0.2894, + 0.2977, + 0.2938, + 0.3004, + 0.2967, + 0.3009, + 0.2818, + 0.2924, + 0.2934, + 0.2852, + 0.2866, + 0.2939, + 0.2929, + 0.2948, + 0.296, + 0.2999, + 0.2936, + 0.2911, + 0.2955, + 0.2922, + 0.2948, + 0.292, + 0.2922, + 0.2954, + 0.2909, + 0.2971, + 0.2964, + 0.2964, + 0.2975, + 0.2926, + 0.2936, + 0.2895, + 0.2968, + 0.295, + 0.2928, + 0.2962, + 0.2962, + 0.2981, + 0.295, + 0.2952, + 0.2962, + 0.2954, + 0.2942, + 0.294, + 0.2946, + 0.2946, + 0.2945, + 0.2946, + 0.2946 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.39051711559295654, + 0.00047808888484723866, + 0.00286676874384284, + -0.0025043508503586054, + 0.000322053674608469, + -0.0008587267366237938, + -0.0018792838091030717, + 0.0018272181041538715 + ], + "perturbation_rho": [ + 0.005894219968467951, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -2.915039658546448e-07, + -9.313225746154785e-10, + 9.313225746154785e-10, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.0211952030658722e-06, + -1.862645149230957e-09, + 9.313225746154785e-10, + 0.0, + 0.0, + 0.0, + 9.313225746154785e-10, + 0.0 + ], + "0.01": [ + -3.0533410608768463e-06, + -3.725290298461914e-09, + 9.313225746154785e-10, + 3.725290298461914e-09, + 0.0, + 0.0, + -9.313225746154785e-10, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 40316.796875, + 307326208.0, + 913832192.0, + 988009024.0, + 1508025088.0, + 1806359168.0, + 1849629568.0, + 2188687616.0, + 2485296896.0 + ], + "bp_grad_norms_per_layer": [ + 3.1247913057086407e-07, + 6.341530633946491e-10, + 6.226453796998044e-10, + 6.2297295100322e-10, + 6.209277536584068e-10, + 6.210325587119314e-10, + 6.210361669367614e-10, + 6.206388181162481e-10, + 6.205727043351317e-10 + ] + }, + "drift": { + "embed.weight": 346.8648177248426, + "embed.bias": 294.36548366042695, + "blocks.0.ln.weight": 11.012950897216797, + "blocks.0.w1.weight": 218.82511475631335, + "blocks.0.w1.bias": 208.27393479202811, + "blocks.0.w2.weight": 491.86978296944665, + "blocks.1.ln.weight": 10.164929389953613, + "blocks.1.w1.weight": 277.9108744662706, + "blocks.1.w1.bias": 255.17942982496433, + "blocks.1.w2.weight": 416.2935804854679, + "blocks.2.ln.weight": 9.155071258544922, + "blocks.2.w1.weight": 230.32353546806806, + "blocks.2.w1.bias": 225.3918473151377, + "blocks.2.w2.weight": 335.1338975810203, + "blocks.3.ln.weight": 10.646790504455566, + "blocks.3.w1.weight": 305.38387277965444, + "blocks.3.w1.bias": 272.0924461450336, + "blocks.3.w2.weight": 449.71231243467605, + "blocks.4.ln.weight": 10.511990547180176, + "blocks.4.w1.weight": 301.02475276314414, + "blocks.4.w1.bias": 283.9163308436812, + "blocks.4.w2.weight": 425.48006497712345, + "blocks.5.ln.weight": 7.359297275543213, + "blocks.5.w1.weight": 197.05490869212008, + "blocks.5.w1.bias": 175.3607301882127, + "blocks.5.w2.weight": 279.18290459751876, + "blocks.6.ln.weight": 11.279300689697266, + "blocks.6.w1.weight": 321.6075481759436, + "blocks.6.w1.bias": 299.55963820199565, + "blocks.6.w2.weight": 437.1964645534032, + "blocks.7.ln.weight": 11.766033172607422, + "blocks.7.w1.weight": 330.68186483881703, + "blocks.7.w1.bias": 322.85133764510664, + "blocks.7.w2.weight": 455.9640356105679, + "out_ln.weight": 0.8627486824989319, + "out_head.weight": 8.827849511384864, + "out_head.bias": 1.0810052204688796 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.058981375579834, + 1.9834817496490478, + 1.9641429858398438, + 1.9436818535614013, + 1.9326764168930053, + 1.9252216508483886, + 1.9228894757080077, + 1.9135075526428222, + 1.908241348800659, + 1.898921445579529, + 1.8944130271148683, + 1.8912477577972413, + 1.8830120736312865, + 1.881072061805725, + 1.8768733514022826, + 1.8708937075805665, + 1.8682633953094483, + 1.867644786605835, + 1.862151336288452, + 1.8556909922027587, + 1.8531654512786866, + 1.8516737128448486, + 1.856813112487793, + 1.8525701689147949, + 1.845026340560913, + 1.839306372718811, + 1.8322341234970092, + 1.8263341422271728, + 1.8188115433502197, + 1.8107766916275025, + 1.8092199557113648, + 1.8012385620880127, + 1.7975863833999635, + 1.7922526489257813, + 1.7880666454315186, + 1.7886801773071288, + 1.7807807125091553, + 1.779724262084961, + 1.775158731918335, + 1.7758396688842772, + 1.7778481522369385, + 1.774957112350464, + 1.7728896438598634, + 1.7728447552490234, + 1.7720583478546144, + 1.7720315837860108, + 1.7645318777084351, + 1.7661468465423584, + 1.7653337018203736, + 1.7627277339935303, + 1.7612493091583252, + 1.7608684145736695, + 1.7618030507659912, + 1.7599208697128297, + 1.7579380053710938, + 1.7533478388214112, + 1.7546381701278686, + 1.7539206622314454, + 1.74741821723938, + 1.750472043838501, + 1.7494391228866577, + 1.748632820663452, + 1.7438091558837892, + 1.7478703462982177, + 1.7456060763549806, + 1.7437335445785522, + 1.7426370120239258, + 1.7418463362884522, + 1.7448427515029907, + 1.7404597217559814, + 1.743501665992737, + 1.7376666475677491, + 1.7360430716705322, + 1.7362179106903075, + 1.7341359579086304, + 1.733851330947876, + 1.7334849172210693, + 1.7326775600814819, + 1.7341204782104491, + 1.7362167639541626, + 1.7309519817352295, + 1.7323176555633546, + 1.733476494140625, + 1.7358508117294311, + 1.7305281336212157, + 1.7303411197662353, + 1.73070291847229, + 1.7317729217147828, + 1.727743486557007, + 1.7267417626571655, + 1.7302013341522218, + 1.7299649307250977, + 1.7291040048217774, + 1.7297229943847656, + 1.7304346767807006, + 1.7294611694335937, + 1.731980471458435, + 1.7267779105377197, + 1.728856870651245, + 1.7232196910858155 + ], + "train_acc": [ + 0.24234, + 0.2742, + 0.29098, + 0.29598, + 0.29902, + 0.29986, + 0.3002, + 0.30472, + 0.30692, + 0.31152, + 0.31082, + 0.31398, + 0.3191, + 0.31702, + 0.32336, + 0.32422, + 0.32658, + 0.32714, + 0.32938, + 0.33382, + 0.33342, + 0.33472, + 0.33338, + 0.33292, + 0.33888, + 0.3423, + 0.34322, + 0.3461, + 0.34668, + 0.35166, + 0.35128, + 0.35728, + 0.3546, + 0.35978, + 0.361, + 0.3614, + 0.36304, + 0.36228, + 0.36516, + 0.36602, + 0.36758, + 0.36578, + 0.36746, + 0.3672, + 0.36552, + 0.3666, + 0.37044, + 0.3676, + 0.37066, + 0.37068, + 0.37034, + 0.37252, + 0.37222, + 0.37374, + 0.372, + 0.37456, + 0.37316, + 0.37352, + 0.37646, + 0.37608, + 0.37806, + 0.37702, + 0.37736, + 0.3746, + 0.37672, + 0.37986, + 0.37954, + 0.38086, + 0.3791, + 0.37724, + 0.3785, + 0.38128, + 0.37962, + 0.3815, + 0.38328, + 0.38234, + 0.38302, + 0.38246, + 0.38054, + 0.37982, + 0.38474, + 0.38062, + 0.382, + 0.3824, + 0.38212, + 0.38174, + 0.38348, + 0.38414, + 0.38298, + 0.38414, + 0.38316, + 0.38198, + 0.38374, + 0.3837, + 0.38268, + 0.38232, + 0.38248, + 0.38382, + 0.38298, + 0.38342 + ], + "test_acc": [ + 0.2801, + 0.3073, + 0.3193, + 0.3203, + 0.3257, + 0.3225, + 0.3347, + 0.3351, + 0.3319, + 0.3249, + 0.337, + 0.3477, + 0.3485, + 0.3531, + 0.3504, + 0.3551, + 0.3542, + 0.3603, + 0.3587, + 0.3676, + 0.3669, + 0.3616, + 0.3612, + 0.3648, + 0.3724, + 0.3675, + 0.3764, + 0.3806, + 0.3759, + 0.3844, + 0.38, + 0.3841, + 0.3848, + 0.3885, + 0.3898, + 0.3917, + 0.3918, + 0.3932, + 0.3872, + 0.3948, + 0.3953, + 0.3905, + 0.3913, + 0.3936, + 0.3956, + 0.3946, + 0.3982, + 0.3893, + 0.4001, + 0.3968, + 0.3968, + 0.3963, + 0.3964, + 0.4, + 0.3958, + 0.3986, + 0.397, + 0.398, + 0.3948, + 0.3973, + 0.3975, + 0.401, + 0.4026, + 0.3986, + 0.3986, + 0.3997, + 0.4009, + 0.3998, + 0.3987, + 0.4021, + 0.4003, + 0.4008, + 0.4017, + 0.4014, + 0.4009, + 0.4024, + 0.3995, + 0.4047, + 0.4028, + 0.4026, + 0.4041, + 0.4028, + 0.4013, + 0.4016, + 0.4038, + 0.404, + 0.404, + 0.405, + 0.4042, + 0.4014, + 0.4029, + 0.404, + 0.4044, + 0.4035, + 0.4035, + 0.4036, + 0.4035, + 0.4036, + 0.4036, + 0.4035 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.015765948221087456, + 0.07965049147605896, + 0.01825195923447609, + -0.06194503605365753, + -0.06945411115884781, + -0.027306973934173584, + -0.06518878042697906, + 0.9981832504272461 + ], + "perturbation_rho": [ + -0.003869679756462574, + 0.028149917721748352, + -0.03722488507628441, + 0.010893862694501877, + -0.053424231708049774, + 0.0007988980505615473, + 0.014475717209279537, + 0.039686620235443115 + ], + "nudging": { + "0.001": [ + 1.698499545454979e-07, + -3.166496753692627e-07, + -3.213062882423401e-08, + 1.387670636177063e-07, + 1.8265563994646072e-07, + 7.741618901491165e-08, + 1.4121178537607193e-07, + -1.2140953913331032e-06 + ], + "0.003": [ + 3.164168447256088e-07, + -1.0326039046049118e-06, + -5.611218512058258e-08, + 2.748565748333931e-07, + 3.5634730011224747e-07, + 1.3550743460655212e-07, + 3.384193405508995e-07, + -4.2710453271865845e-06 + ], + "0.01": [ + 1.0926742106676102e-06, + -3.4980475902557373e-06, + -2.4691689759492874e-07, + 1.008971594274044e-06, + 1.0706717148423195e-06, + 4.578614607453346e-07, + 1.044594682753086e-06, + -1.4978926628828049e-05 + ] + }, + "hidden_norms_per_layer": [ + 5578.46875, + 33612.796875, + 156752.953125, + 338066.0625, + 628331.5, + 779574.8125, + 844256.6875, + 888649.0, + 425876.65625 + ], + "bp_grad_norms_per_layer": [ + 2.167309503420256e-05, + 2.129869471900747e-06, + 8.520624419361411e-07, + 8.023235977816512e-07, + 8.024675253182068e-07, + 8.062054348556558e-07, + 8.076636390796921e-07, + 8.067011663115409e-07, + 8.030539788705937e-07 + ] + }, + "drift": { + "embed.weight": 54.49055679435881, + "embed.bias": 20.29269603704931, + "blocks.0.ln.weight": 1.6682279109954834, + "blocks.0.w1.weight": 17.062151194987237, + "blocks.0.w1.bias": 13.499766617268852, + "blocks.0.w2.weight": 71.40829154639394, + "blocks.1.ln.weight": 1.4543473720550537, + "blocks.1.w1.weight": 19.719236452544127, + "blocks.1.w1.bias": 9.828062345144708, + "blocks.1.w2.weight": 58.72293185943862, + "blocks.2.ln.weight": 1.0721585750579834, + "blocks.2.w1.weight": 18.58227228825852, + "blocks.2.w1.bias": 15.746003179119816, + "blocks.2.w2.weight": 41.71235474869442, + "blocks.3.ln.weight": 0.8726892471313477, + "blocks.3.w1.weight": 20.471342330584488, + "blocks.3.w1.bias": 19.512499778298427, + "blocks.3.w2.weight": 36.10169832252701, + "blocks.4.ln.weight": 0.7409772276878357, + "blocks.4.w1.weight": 17.51931629450496, + "blocks.4.w1.bias": 18.723236583560368, + "blocks.4.w2.weight": 30.24901961387597, + "blocks.5.ln.weight": 0.7194885015487671, + "blocks.5.w1.weight": 16.539786302289123, + "blocks.5.w1.bias": 16.75240225769585, + "blocks.5.w2.weight": 28.9854050706238, + "blocks.6.ln.weight": 0.7770423293113708, + "blocks.6.w1.weight": 17.2275004107886, + "blocks.6.w1.bias": 18.38493950420895, + "blocks.6.w2.weight": 32.31440391348023, + "blocks.7.ln.weight": 0.9231076240539551, + "blocks.7.w1.weight": 19.50195743824471, + "blocks.7.w1.bias": 21.20774557625887, + "blocks.7.w2.weight": 46.39053908141515, + "out_ln.weight": 0.307411789894104, + "out_head.weight": 4.604781316777015, + "out_head.bias": 1.258175899516633 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 256, + "num_blocks": 8, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 3 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d256_L8_seed3", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file diff --git a/results/fa_dfa_d256_L8_seed4/results_cifar10.json b/results/fa_dfa_d256_L8_seed4/results_cifar10.json new file mode 100644 index 0000000..18ac505 --- /dev/null +++ b/results/fa_dfa_d256_L8_seed4/results_cifar10.json @@ -0,0 +1,881 @@ +{ + "4": { + "dfa": { + "log": { + "train_loss": [ + 2.0656567663574217, + 2.0505859230041503, + 2.053203703994751, + 2.0526840745544432, + 2.0508153134155274, + 2.0500064964294435, + 2.0487480962753297, + 2.049719909591675, + 2.046275976715088, + 2.0458218814468383, + 2.0437327142333985, + 2.042165072555542, + 2.0446242738342284, + 2.0408524193572997, + 2.042093524932861, + 2.0402661250305174, + 2.0408810894012452, + 2.0373284103012086, + 2.03862116607666, + 2.035870743408203, + 2.0366722262573242, + 2.0368052173614504, + 2.0363987246322632, + 2.0385638175201417, + 2.0332528368377685, + 2.0347424139404295, + 2.0324463927841188, + 2.0332344970321654, + 2.031939734954834, + 2.0355701583862307, + 2.0314910345077513, + 2.031226405334473, + 2.0333832290649414, + 2.031931709365845, + 2.032775154876709, + 2.0315535527038575, + 2.030991368255615, + 2.031312015762329, + 2.0315300573349, + 2.0294947302246094, + 2.0329836825180054, + 2.0305798300933837, + 2.0313154915618896, + 2.030211465911865, + 2.0333448963928222, + 2.032171858444214, + 2.0326642723083497, + 2.029641508178711, + 2.030972560157776, + 2.03141214263916, + 2.0315414978790285, + 2.031778284568787, + 2.0305172948837282, + 2.029238556213379, + 2.0308371866607664, + 2.029705788726807, + 2.031071273651123, + 2.031551382446289, + 2.0320941706085205, + 2.0302660203552247, + 2.02909424407959, + 2.0317608085632326, + 2.0301849742126463, + 2.0325209705352782, + 2.0291553590393065, + 2.029445772399902, + 2.029556517715454, + 2.030448714065552, + 2.0291586400985717, + 2.030339909133911, + 2.0314291301727296, + 2.02971793800354, + 2.0307174551391602, + 2.0306736737823488, + 2.02974597366333, + 2.0310014685058593, + 2.0306331666564943, + 2.0300069621276857, + 2.0305981512069704, + 2.0304822917938234, + 2.0307031988525392, + 2.029250856361389, + 2.0301674210357668, + 2.0295850566864013, + 2.0306209880828856, + 2.029310823135376, + 2.031099863433838, + 2.029841015853882, + 2.028756529998779, + 2.028993240814209, + 2.028344483795166, + 2.0281790949249268, + 2.0280795663452147, + 2.0291675205230715, + 2.0298909065246584, + 2.029457865447998, + 2.029133120574951, + 2.0299807266998293, + 2.028713212356567, + 2.029348062210083 + ], + "train_acc": [ + 0.24136, + 0.24482, + 0.2427, + 0.24432, + 0.24608, + 0.24552, + 0.24764, + 0.2474, + 0.25004, + 0.24796, + 0.24904, + 0.25112, + 0.25108, + 0.25374, + 0.25486, + 0.25144, + 0.25464, + 0.2558, + 0.25348, + 0.25578, + 0.25414, + 0.2542, + 0.2566, + 0.25478, + 0.25908, + 0.2592, + 0.2572, + 0.25702, + 0.25768, + 0.25918, + 0.25924, + 0.25932, + 0.2585, + 0.25922, + 0.26328, + 0.25904, + 0.25988, + 0.26086, + 0.25982, + 0.26208, + 0.25958, + 0.26294, + 0.26084, + 0.26124, + 0.25758, + 0.25902, + 0.2607, + 0.25844, + 0.26254, + 0.26218, + 0.26206, + 0.25944, + 0.26196, + 0.26358, + 0.26044, + 0.26258, + 0.26298, + 0.26036, + 0.26196, + 0.26234, + 0.2623, + 0.26234, + 0.26312, + 0.262, + 0.26436, + 0.2627, + 0.2642, + 0.26248, + 0.26322, + 0.26162, + 0.26198, + 0.26202, + 0.26128, + 0.26422, + 0.26246, + 0.26314, + 0.26326, + 0.26486, + 0.26184, + 0.2653, + 0.26566, + 0.26624, + 0.26518, + 0.2621, + 0.26352, + 0.26428, + 0.26192, + 0.26372, + 0.26478, + 0.26346, + 0.26528, + 0.26426, + 0.2625, + 0.26412, + 0.26512, + 0.26276, + 0.26434, + 0.26354, + 0.2629, + 0.26342 + ], + "test_acc": [ + 0.2692, + 0.2577, + 0.2647, + 0.2579, + 0.2596, + 0.2707, + 0.2687, + 0.2731, + 0.269, + 0.2517, + 0.2616, + 0.2484, + 0.2766, + 0.2724, + 0.276, + 0.272, + 0.2751, + 0.2806, + 0.2765, + 0.279, + 0.2773, + 0.2673, + 0.2747, + 0.277, + 0.269, + 0.2777, + 0.2753, + 0.2775, + 0.2797, + 0.2744, + 0.2772, + 0.2708, + 0.2839, + 0.2778, + 0.2803, + 0.2838, + 0.2723, + 0.2851, + 0.2776, + 0.2839, + 0.2736, + 0.2716, + 0.2746, + 0.2758, + 0.2722, + 0.2781, + 0.2784, + 0.2804, + 0.2746, + 0.2794, + 0.2783, + 0.2786, + 0.2876, + 0.2727, + 0.2771, + 0.2809, + 0.2826, + 0.2797, + 0.2732, + 0.2775, + 0.2641, + 0.2775, + 0.2755, + 0.2821, + 0.2624, + 0.2773, + 0.2849, + 0.2705, + 0.2759, + 0.2828, + 0.2814, + 0.2828, + 0.2826, + 0.2761, + 0.2845, + 0.2793, + 0.2823, + 0.2763, + 0.2854, + 0.2845, + 0.2802, + 0.2804, + 0.2746, + 0.2779, + 0.2781, + 0.2789, + 0.2808, + 0.2824, + 0.2812, + 0.2792, + 0.2791, + 0.2822, + 0.2818, + 0.2815, + 0.2805, + 0.2815, + 0.2816, + 0.2813, + 0.2813, + 0.2814 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.3630167841911316, + 0.004104766063392162, + -0.0002518058172427118, + -0.0025237088557332754, + -0.003213974880054593, + 0.0022183996625244617, + 0.0008768833940848708, + -0.0017630010843276978 + ], + "perturbation_rho": [ + 0.006874069571495056, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -2.2398307919502258e-07, + 0.0, + 0.0, + -5.587935447692871e-09, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -6.924383342266083e-07, + 0.0, + 0.0, + -5.587935447692871e-09, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -2.5294721126556396e-06, + 0.0, + -9.313225746154785e-10, + -5.587935447692871e-09, + 0.0, + 0.0, + 0.0, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 47186.2265625, + 482479744.0, + 795204864.0, + 1384574336.0, + 2104582784.0, + 2617342720.0, + 2587617024.0, + 2589698816.0, + 2634861312.0 + ], + "bp_grad_norms_per_layer": [ + 2.55692498285498e-07, + 7.717193550860202e-10, + 7.686514758020735e-10, + 7.652692923798554e-10, + 7.633532139728061e-10, + 7.631562048970864e-10, + 7.626149711725816e-10, + 7.631405507524391e-10, + 7.631412723974051e-10 + ] + }, + "drift": { + "embed.weight": 386.6934065819407, + "embed.bias": 327.73324641624026, + "blocks.0.ln.weight": 10.576894760131836, + "blocks.0.w1.weight": 252.2875271106343, + "blocks.0.w1.bias": 255.16348096251252, + "blocks.0.w2.weight": 532.796399887916, + "blocks.1.ln.weight": 9.370871543884277, + "blocks.1.w1.weight": 248.217891631117, + "blocks.1.w1.bias": 225.83698087753675, + "blocks.1.w2.weight": 368.8833971864335, + "blocks.2.ln.weight": 10.612818717956543, + "blocks.2.w1.weight": 309.33943112065816, + "blocks.2.w1.bias": 301.26066223306907, + "blocks.2.w2.weight": 459.4422042261257, + "blocks.3.ln.weight": 11.764174461364746, + "blocks.3.w1.weight": 329.2768198307833, + "blocks.3.w1.bias": 336.9945554639267, + "blocks.3.w2.weight": 474.12186702206924, + "blocks.4.ln.weight": 12.292579650878906, + "blocks.4.w1.weight": 343.47770155099636, + "blocks.4.w1.bias": 332.7847814820698, + "blocks.4.w2.weight": 472.8538820952661, + "blocks.5.ln.weight": 8.211516380310059, + "blocks.5.w1.weight": 222.56739284033247, + "blocks.5.w1.bias": 206.0998611048773, + "blocks.5.w2.weight": 305.4202607785938, + "blocks.6.ln.weight": 9.443477630615234, + "blocks.6.w1.weight": 257.25562261225616, + "blocks.6.w1.bias": 242.26681812434325, + "blocks.6.w2.weight": 326.61842660972985, + "blocks.7.ln.weight": 8.36430549621582, + "blocks.7.w1.weight": 208.0675552186769, + "blocks.7.w1.bias": 201.05667739507527, + "blocks.7.w2.weight": 263.052196747128, + "out_ln.weight": 0.8603641390800476, + "out_head.weight": 9.058541297614818, + "out_head.bias": 0.2966235995908337 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.077447769546509, + 2.011511716918945, + 1.9823692067718506, + 1.9588359634780883, + 1.9388098348999023, + 1.9257721879577636, + 1.9099706296539307, + 1.9016640214538574, + 1.8879541109466553, + 1.8786920461654664, + 1.8665837494277955, + 1.8579063525009156, + 1.8527830486297607, + 1.846642380104065, + 1.8408584032440185, + 1.8423255667495728, + 1.842407624206543, + 1.83341695854187, + 1.8353786220550536, + 1.8293717947006225, + 1.8322698459243774, + 1.8300572503280639, + 1.8311189281845093, + 1.8299159336090087, + 1.8229221743774413, + 1.8264959131240845, + 1.8210881603622437, + 1.8220705929946899, + 1.8175409701538086, + 1.822826947631836, + 1.8118110262680054, + 1.8122388744735718, + 1.8130562100982666, + 1.806387862510681, + 1.80971314163208, + 1.804927230606079, + 1.800072886314392, + 1.803294679031372, + 1.801126385574341, + 1.8011314083862304, + 1.797285436630249, + 1.7961348217391968, + 1.797834702682495, + 1.794442939529419, + 1.7977431702041626, + 1.7928060857391357, + 1.7940230228424072, + 1.789833497238159, + 1.7892559197235107, + 1.7902562041473389, + 1.7880674313735962, + 1.7850274938201904, + 1.7843693801879883, + 1.7830010782623291, + 1.7847207107925416, + 1.784270888710022, + 1.7800133163833618, + 1.7805128693389893, + 1.7791845178604127, + 1.778016010169983, + 1.776533423500061, + 1.7764989660644532, + 1.7743354483032228, + 1.7752654462051392, + 1.7738590878677367, + 1.7712104555892945, + 1.7690131970214844, + 1.7739268627548217, + 1.7680237869644164, + 1.7693133504867553, + 1.7714519711303711, + 1.767369662475586, + 1.7680323779296876, + 1.7669272440338135, + 1.7676428430175781, + 1.7666232497787475, + 1.7640809685516357, + 1.7647571885681153, + 1.7644749606323242, + 1.763671007080078, + 1.7634928821182252, + 1.7633259762573241, + 1.7641471377563476, + 1.7631501617050171, + 1.7632150897598267, + 1.7611829147720337, + 1.7612092932891845, + 1.7640886084747314, + 1.7649920001220702, + 1.7596327584838867, + 1.7607089739227295, + 1.7609526915740967, + 1.7600691442871095, + 1.7606582910919188, + 1.7567459772491456, + 1.7617831900405885, + 1.7606757662963868, + 1.7583884005737305, + 1.755460721168518, + 1.7597045026397704 + ], + "train_acc": [ + 0.23164, + 0.25566, + 0.26534, + 0.2785, + 0.29544, + 0.30102, + 0.30646, + 0.30954, + 0.31586, + 0.3198, + 0.3232, + 0.33236, + 0.33444, + 0.3363, + 0.33972, + 0.34014, + 0.34228, + 0.3414, + 0.34292, + 0.34472, + 0.343, + 0.34502, + 0.3443, + 0.3446, + 0.34886, + 0.34656, + 0.34782, + 0.34692, + 0.34918, + 0.34708, + 0.3504, + 0.3498, + 0.35128, + 0.3542, + 0.35568, + 0.35602, + 0.3552, + 0.3547, + 0.35416, + 0.35536, + 0.3587, + 0.35762, + 0.35662, + 0.35792, + 0.3559, + 0.35938, + 0.35752, + 0.35952, + 0.3605, + 0.36166, + 0.36062, + 0.36, + 0.36316, + 0.36162, + 0.35992, + 0.36142, + 0.36284, + 0.36478, + 0.36274, + 0.36296, + 0.36524, + 0.3655, + 0.36454, + 0.36344, + 0.36574, + 0.36768, + 0.3671, + 0.36604, + 0.36738, + 0.36676, + 0.36726, + 0.36788, + 0.36766, + 0.37, + 0.36784, + 0.36824, + 0.36952, + 0.3703, + 0.37022, + 0.36936, + 0.37064, + 0.36912, + 0.37038, + 0.37162, + 0.36972, + 0.37024, + 0.37328, + 0.37162, + 0.3707, + 0.37108, + 0.37054, + 0.37196, + 0.37158, + 0.3725, + 0.37438, + 0.3719, + 0.37154, + 0.37274, + 0.3749, + 0.37412 + ], + "test_acc": [ + 0.2691, + 0.2769, + 0.2897, + 0.3114, + 0.3126, + 0.3325, + 0.3278, + 0.3359, + 0.3422, + 0.3373, + 0.3566, + 0.3503, + 0.367, + 0.3714, + 0.3746, + 0.3677, + 0.3815, + 0.3797, + 0.375, + 0.3784, + 0.3769, + 0.3722, + 0.377, + 0.3838, + 0.3768, + 0.3804, + 0.3876, + 0.3857, + 0.3861, + 0.3838, + 0.3869, + 0.3788, + 0.3858, + 0.3869, + 0.3815, + 0.3819, + 0.3849, + 0.3873, + 0.3869, + 0.3854, + 0.3836, + 0.386, + 0.3849, + 0.3932, + 0.3901, + 0.3862, + 0.391, + 0.3902, + 0.3876, + 0.3889, + 0.3858, + 0.3899, + 0.3896, + 0.3882, + 0.3857, + 0.3939, + 0.3952, + 0.3855, + 0.3793, + 0.3886, + 0.3887, + 0.3886, + 0.3887, + 0.39, + 0.3859, + 0.3877, + 0.3918, + 0.3899, + 0.3918, + 0.3949, + 0.3979, + 0.3944, + 0.3961, + 0.3959, + 0.3911, + 0.3966, + 0.3959, + 0.3961, + 0.4004, + 0.3939, + 0.3939, + 0.392, + 0.3927, + 0.3958, + 0.3961, + 0.3954, + 0.396, + 0.3944, + 0.3953, + 0.3952, + 0.395, + 0.394, + 0.3957, + 0.3955, + 0.3958, + 0.396, + 0.3956, + 0.3963, + 0.396, + 0.3962 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.04865531623363495, + 0.03906121850013733, + -0.031024880707263947, + 0.02575453743338585, + -0.06059260666370392, + -0.005358260590583086, + -0.0420413613319397, + 0.9973955154418945 + ], + "perturbation_rho": [ + 0.01942356675863266, + -0.017882898449897766, + 0.010590418241918087, + 0.01589794270694256, + 0.00590391643345356, + -0.0037763454020023346, + 0.034376855939626694, + 0.024630604311823845 + ], + "nudging": { + "0.001": [ + -2.2064195945858955e-06, + 5.948822945356369e-08, + 1.394655555486679e-07, + 5.5995769798755646e-08, + 8.498318493366241e-08, + 3.748573362827301e-08, + -1.8510036170482635e-08, + -9.316718205809593e-07 + ], + "0.003": [ + -6.74789771437645e-06, + -2.5331974029541016e-07, + 2.5168992578983307e-07, + -4.540197551250458e-08, + 2.979068085551262e-07, + 5.855690687894821e-08, + 2.4668406695127487e-07, + -3.4426338970661163e-06 + ], + "0.01": [ + -2.2898893803358078e-05, + -1.1182855814695358e-06, + 5.499459803104401e-07, + -2.2863969206809998e-07, + 8.718343451619148e-07, + 9.883660823106766e-08, + 5.047768354415894e-07, + -1.27346720546484e-05 + ] + }, + "hidden_norms_per_layer": [ + 6722.4755859375, + 51687.578125, + 543854.4375, + 751334.8125, + 919391.5625, + 1085895.375, + 1253076.875, + 1512727.75, + 818620.0625 + ], + "bp_grad_norms_per_layer": [ + 2.1660385755239986e-05, + 1.6567922784815892e-06, + 6.900805828990997e-07, + 6.877048690512311e-07, + 6.878757972117455e-07, + 6.88388126945938e-07, + 6.882298180244106e-07, + 6.877112355141435e-07, + 6.86944076733198e-07 + ] + }, + "drift": { + "embed.weight": 58.79676538348338, + "embed.bias": 28.710195472726856, + "blocks.0.ln.weight": 1.8708003759384155, + "blocks.0.w1.weight": 18.96163367141555, + "blocks.0.w1.bias": 15.000532672986424, + "blocks.0.w2.weight": 84.68199811589957, + "blocks.1.ln.weight": 1.7282594442367554, + "blocks.1.w1.weight": 26.714732007818075, + "blocks.1.w1.bias": 22.78624792045308, + "blocks.1.w2.weight": 60.87295612341078, + "blocks.2.ln.weight": 0.7759197950363159, + "blocks.2.w1.weight": 21.60045634414882, + "blocks.2.w1.bias": 24.705897270945115, + "blocks.2.w2.weight": 43.1465578222564, + "blocks.3.ln.weight": 0.9216886162757874, + "blocks.3.w1.weight": 21.587181438217584, + "blocks.3.w1.bias": 24.852801390777184, + "blocks.3.w2.weight": 60.24516088646971, + "blocks.4.ln.weight": 1.012886643409729, + "blocks.4.w1.weight": 22.29008054548115, + "blocks.4.w1.bias": 24.143128403264154, + "blocks.4.w2.weight": 62.903896132871985, + "blocks.5.ln.weight": 1.0267071723937988, + "blocks.5.w1.weight": 23.703189005498285, + "blocks.5.w1.bias": 24.657295832394876, + "blocks.5.w2.weight": 62.36992788497744, + "blocks.6.ln.weight": 1.0589414834976196, + "blocks.6.w1.weight": 24.22469961585016, + "blocks.6.w1.bias": 25.501732480992107, + "blocks.6.w2.weight": 47.57630517495101, + "blocks.7.ln.weight": 1.0684436559677124, + "blocks.7.w1.weight": 25.054823172351234, + "blocks.7.w1.bias": 28.09669994835103, + "blocks.7.w2.weight": 54.10842857089798, + "out_ln.weight": 0.41823405027389526, + "out_head.weight": 6.413607955937394, + "out_head.bias": 0.8745995262572578 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 256, + "num_blocks": 8, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 4 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d256_L8_seed4", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file |
