diff options
Diffstat (limited to 'runs/srm_smoke_500_h256.json')
| -rw-r--r-- | runs/srm_smoke_500_h256.json | 2712 |
1 files changed, 2712 insertions, 0 deletions
diff --git a/runs/srm_smoke_500_h256.json b/runs/srm_smoke_500_h256.json new file mode 100644 index 0000000..728ef05 --- /dev/null +++ b/runs/srm_smoke_500_h256.json @@ -0,0 +1,2712 @@ +{ + "args": { + "data_path": "/home/yurenh2/rrm/data/sudoku-extreme-1k-aug-1000", + "n_steps": 500, + "batch_size": 8, + "lr": 0.0001, + "puzzle_emb_lr": 0.0001, + "warmup_steps": 100, + "weight_decay": 1.0, + "hidden_size": 256, + "n_iters": 6, + "n_aol_layers": 2, + "kappa": 0.9, + "eta": 1.0, + "alpha": 1.0, + "k_lyap": 2, + "lyap_iters": 4, + "lyap_every": 25, + "seed": 42, + "eval_every": 100, + "eval_n": 256, + "eval_batch_size": 32, + "out": "runs/srm_smoke_500_h256.json", + "save_ckpt": "" + }, + "n_params": 939314, + "initial_acc": 0.0, + "initial_tok_acc": 0.0, + "initial_lip": { + "lip_emp_mean": 0.1357038915157318, + "lip_emp_max": 0.17765390872955322, + "lip_emp_99p": 0.17537692189216614, + "lip_theoretical_bound": 0.9, + "passes_bound": true + }, + "steps": [ + { + "step": 0, + "lr": 0.0, + "sup_loss": 2.8284496844095326, + "lyap1_mean": -6.2286057472229, + "lyap1_max": -6.200310230255127, + "lyap_spec_mean": [ + -6.2286057472229, + -6.228340148925781 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 1, + "lr": 1e-06, + "sup_loss": 2.838926445513094 + }, + { + "step": 2, + "lr": 2e-06, + "sup_loss": 2.834020958290193 + }, + { + "step": 3, + "lr": 3e-06, + "sup_loss": 2.8271419600543934 + }, + { + "step": 4, + "lr": 4e-06, + "sup_loss": 2.8336220039837565 + }, + { + "step": 5, + "lr": 5e-06, + "sup_loss": 2.847244138413309 + }, + { + "step": 6, + "lr": 6e-06, + "sup_loss": 2.8296374251845307 + }, + { + "step": 7, + "lr": 7e-06, + "sup_loss": 2.816413976469476 + }, + { + "step": 8, + "lr": 8e-06, + "sup_loss": 2.8221126965976864 + }, + { + "step": 9, + "lr": 9e-06, + "sup_loss": 2.8106848008287297 + }, + { + "step": 10, + "lr": 1e-05, + "sup_loss": 2.8203775573541052 + }, + { + "step": 11, + "lr": 1.1000000000000001e-05, + "sup_loss": 2.8181432855784427 + }, + { + "step": 12, + "lr": 1.2e-05, + "sup_loss": 2.8234647595703923 + }, + { + "step": 13, + "lr": 1.3000000000000001e-05, + "sup_loss": 2.819964976625202 + }, + { + "step": 14, + "lr": 1.4e-05, + "sup_loss": 2.777240835924918 + }, + { + "step": 15, + "lr": 1.5e-05, + "sup_loss": 2.796632874074624 + }, + { + "step": 16, + "lr": 1.6e-05, + "sup_loss": 2.80353484864027 + }, + { + "step": 17, + "lr": 1.7e-05, + "sup_loss": 2.7731115467511405 + }, + { + "step": 18, + "lr": 1.8e-05, + "sup_loss": 2.776553477441689 + }, + { + "step": 19, + "lr": 1.9e-05, + "sup_loss": 2.764454348935523 + }, + { + "step": 20, + "lr": 2e-05, + "sup_loss": 2.763660193795498 + }, + { + "step": 21, + "lr": 2.1000000000000002e-05, + "sup_loss": 2.7644818192133025 + }, + { + "step": 22, + "lr": 2.2000000000000003e-05, + "sup_loss": 2.720537518373678 + }, + { + "step": 23, + "lr": 2.3e-05, + "sup_loss": 2.7332803327454616 + }, + { + "step": 24, + "lr": 2.4e-05, + "sup_loss": 2.7274532837280776 + }, + { + "step": 25, + "lr": 2.5e-05, + "sup_loss": 2.7009521547955337, + "lyap1_mean": -6.240983009338379, + "lyap1_max": -6.22477388381958, + "lyap_spec_mean": [ + -6.240983009338379, + -6.240518093109131 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 26, + "lr": 2.6000000000000002e-05, + "sup_loss": 2.7118575888452168 + }, + { + "step": 27, + "lr": 2.7000000000000002e-05, + "sup_loss": 2.676175428796176 + }, + { + "step": 28, + "lr": 2.8e-05, + "sup_loss": 2.693807866858765 + }, + { + "step": 29, + "lr": 2.9000000000000004e-05, + "sup_loss": 2.682955672998549 + }, + { + "step": 30, + "lr": 3e-05, + "sup_loss": 2.654173913401533 + }, + { + "step": 31, + "lr": 3.1e-05, + "sup_loss": 2.6763871059258495 + }, + { + "step": 32, + "lr": 3.2e-05, + "sup_loss": 2.644394835134615 + }, + { + "step": 33, + "lr": 3.3e-05, + "sup_loss": 2.6299395433077652 + }, + { + "step": 34, + "lr": 3.4e-05, + "sup_loss": 2.6242866722739664 + }, + { + "step": 35, + "lr": 3.5000000000000004e-05, + "sup_loss": 2.616557927761839 + }, + { + "step": 36, + "lr": 3.6e-05, + "sup_loss": 2.5994089813723797 + }, + { + "step": 37, + "lr": 3.7000000000000005e-05, + "sup_loss": 2.5626432829550247 + }, + { + "step": 38, + "lr": 3.8e-05, + "sup_loss": 2.5606932168456327 + }, + { + "step": 39, + "lr": 3.9e-05, + "sup_loss": 2.546780039790928 + }, + { + "step": 40, + "lr": 4e-05, + "sup_loss": 2.520369392127516 + }, + { + "step": 41, + "lr": 4.1e-05, + "sup_loss": 2.5127611686076583 + }, + { + "step": 42, + "lr": 4.2000000000000004e-05, + "sup_loss": 2.5208038257044745 + }, + { + "step": 43, + "lr": 4.3e-05, + "sup_loss": 2.493714124425292 + }, + { + "step": 44, + "lr": 4.4000000000000006e-05, + "sup_loss": 2.480158985877274 + }, + { + "step": 45, + "lr": 4.5e-05, + "sup_loss": 2.4361940027334783 + }, + { + "step": 46, + "lr": 4.6e-05, + "sup_loss": 2.4460679490083947 + }, + { + "step": 47, + "lr": 4.7000000000000004e-05, + "sup_loss": 2.4495761120800132 + }, + { + "step": 48, + "lr": 4.8e-05, + "sup_loss": 2.4131285345159026 + }, + { + "step": 49, + "lr": 4.9e-05, + "sup_loss": 2.3999822324103657 + }, + { + "step": 50, + "lr": 5e-05, + "sup_loss": 2.4029195286821534, + "lyap1_mean": -6.2544169425964355, + "lyap1_max": -6.231961250305176, + "lyap_spec_mean": [ + -6.2544169425964355, + -6.252962112426758 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 51, + "lr": 5.1000000000000006e-05, + "sup_loss": 2.366847464160111 + }, + { + "step": 52, + "lr": 5.2000000000000004e-05, + "sup_loss": 2.33094637470472 + }, + { + "step": 53, + "lr": 5.3e-05, + "sup_loss": 2.329777112486255 + }, + { + "step": 54, + "lr": 5.4000000000000005e-05, + "sup_loss": 2.334095543782154 + }, + { + "step": 55, + "lr": 5.500000000000001e-05, + "sup_loss": 2.3051139076185523 + }, + { + "step": 56, + "lr": 5.6e-05, + "sup_loss": 2.3167502882455797 + }, + { + "step": 57, + "lr": 5.7e-05, + "sup_loss": 2.2903970820408137 + }, + { + "step": 58, + "lr": 5.800000000000001e-05, + "sup_loss": 2.274886117171231 + }, + { + "step": 59, + "lr": 5.9e-05, + "sup_loss": 2.2313597888611825 + }, + { + "step": 60, + "lr": 6e-05, + "sup_loss": 2.2339985174568273 + }, + { + "step": 61, + "lr": 6.1000000000000005e-05, + "sup_loss": 2.223536861505 + }, + { + "step": 62, + "lr": 6.2e-05, + "sup_loss": 2.201599151730311 + }, + { + "step": 63, + "lr": 6.3e-05, + "sup_loss": 2.1832543882469557 + }, + { + "step": 64, + "lr": 6.4e-05, + "sup_loss": 2.1580907568451795 + }, + { + "step": 65, + "lr": 6.500000000000001e-05, + "sup_loss": 2.1679979832095837 + }, + { + "step": 66, + "lr": 6.6e-05, + "sup_loss": 2.151159814374817 + }, + { + "step": 67, + "lr": 6.7e-05, + "sup_loss": 2.132176910649508 + }, + { + "step": 68, + "lr": 6.8e-05, + "sup_loss": 2.123453724015459 + }, + { + "step": 69, + "lr": 6.900000000000001e-05, + "sup_loss": 2.100389289485579 + }, + { + "step": 70, + "lr": 7.000000000000001e-05, + "sup_loss": 2.100101099728348 + }, + { + "step": 71, + "lr": 7.1e-05, + "sup_loss": 2.082929395622361 + }, + { + "step": 72, + "lr": 7.2e-05, + "sup_loss": 2.0613085193351983 + }, + { + "step": 73, + "lr": 7.3e-05, + "sup_loss": 2.043758664648479 + }, + { + "step": 74, + "lr": 7.400000000000001e-05, + "sup_loss": 2.0471150778750125 + }, + { + "step": 75, + "lr": 7.500000000000001e-05, + "sup_loss": 2.043438499353482, + "lyap1_mean": -6.26137638092041, + "lyap1_max": -6.241876125335693, + "lyap_spec_mean": [ + -6.261376857757568, + -6.260560035705566 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 76, + "lr": 7.6e-05, + "sup_loss": 2.0290594292513395 + }, + { + "step": 77, + "lr": 7.7e-05, + "sup_loss": 2.017350256019743 + }, + { + "step": 78, + "lr": 7.8e-05, + "sup_loss": 2.010315789581182 + }, + { + "step": 79, + "lr": 7.900000000000001e-05, + "sup_loss": 2.007747901168263 + }, + { + "step": 80, + "lr": 8e-05, + "sup_loss": 1.9878722973916485 + }, + { + "step": 81, + "lr": 8.099999999999999e-05, + "sup_loss": 1.9820387430330704 + }, + { + "step": 82, + "lr": 8.2e-05, + "sup_loss": 1.960933098122315 + }, + { + "step": 83, + "lr": 8.3e-05, + "sup_loss": 1.9703326768508003 + }, + { + "step": 84, + "lr": 8.400000000000001e-05, + "sup_loss": 1.9608279961175652 + }, + { + "step": 85, + "lr": 8.5e-05, + "sup_loss": 1.9439071529032013 + }, + { + "step": 86, + "lr": 8.6e-05, + "sup_loss": 1.940231741846871 + }, + { + "step": 87, + "lr": 8.700000000000001e-05, + "sup_loss": 1.9256818466494516 + }, + { + "step": 88, + "lr": 8.800000000000001e-05, + "sup_loss": 1.9422044059735057 + }, + { + "step": 89, + "lr": 8.9e-05, + "sup_loss": 1.9265779568349395 + }, + { + "step": 90, + "lr": 9e-05, + "sup_loss": 1.9107046426157344 + }, + { + "step": 91, + "lr": 9.1e-05, + "sup_loss": 1.9120888307606432 + }, + { + "step": 92, + "lr": 9.2e-05, + "sup_loss": 1.9039226834805385 + }, + { + "step": 93, + "lr": 9.300000000000001e-05, + "sup_loss": 1.8859638864585686 + }, + { + "step": 94, + "lr": 9.400000000000001e-05, + "sup_loss": 1.8935103237832633 + }, + { + "step": 95, + "lr": 9.499999999999999e-05, + "sup_loss": 1.8780442206074615 + }, + { + "step": 96, + "lr": 9.6e-05, + "sup_loss": 1.880738136494954 + }, + { + "step": 97, + "lr": 9.7e-05, + "sup_loss": 1.8727563337733835 + }, + { + "step": 98, + "lr": 9.8e-05, + "sup_loss": 1.8747788248049444 + }, + { + "step": 99, + "lr": 9.900000000000001e-05, + "sup_loss": 1.8535151587817753 + }, + { + "step": 100, + "lr": 0.0001, + "sup_loss": 1.8369216468190068, + "lyap1_mean": -6.2764668464660645, + "lyap1_max": -6.253869533538818, + "lyap_spec_mean": [ + -6.2764668464660645, + -6.276641368865967 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 101, + "lr": 0.0001, + "sup_loss": 1.8375981252343498 + }, + { + "step": 102, + "lr": 0.0001, + "sup_loss": 1.8414441493027138 + }, + { + "step": 103, + "lr": 0.0001, + "sup_loss": 1.841932872396079 + }, + { + "step": 104, + "lr": 0.0001, + "sup_loss": 1.8250621869367878 + }, + { + "step": 105, + "lr": 0.0001, + "sup_loss": 1.8180236366243632 + }, + { + "step": 106, + "lr": 0.0001, + "sup_loss": 1.8148333969430548 + }, + { + "step": 107, + "lr": 0.0001, + "sup_loss": 1.8097191971601063 + }, + { + "step": 108, + "lr": 0.0001, + "sup_loss": 1.8062571266936345 + }, + { + "step": 109, + "lr": 0.0001, + "sup_loss": 1.783310944279242 + }, + { + "step": 110, + "lr": 0.0001, + "sup_loss": 1.7882709675762618 + }, + { + "step": 111, + "lr": 0.0001, + "sup_loss": 1.7837487501971951 + }, + { + "step": 112, + "lr": 0.0001, + "sup_loss": 1.8040981386613326 + }, + { + "step": 113, + "lr": 0.0001, + "sup_loss": 1.7839348955084138 + }, + { + "step": 114, + "lr": 0.0001, + "sup_loss": 1.7808710490288988 + }, + { + "step": 115, + "lr": 0.0001, + "sup_loss": 1.73568023182333 + }, + { + "step": 116, + "lr": 0.0001, + "sup_loss": 1.7546620469313183 + }, + { + "step": 117, + "lr": 0.0001, + "sup_loss": 1.7751414856763805 + }, + { + "step": 118, + "lr": 0.0001, + "sup_loss": 1.752759060218006 + }, + { + "step": 119, + "lr": 0.0001, + "sup_loss": 1.7358692128944557 + }, + { + "step": 120, + "lr": 0.0001, + "sup_loss": 1.739228786343244 + }, + { + "step": 121, + "lr": 0.0001, + "sup_loss": 1.7493239323825307 + }, + { + "step": 122, + "lr": 0.0001, + "sup_loss": 1.7536705840390145 + }, + { + "step": 123, + "lr": 0.0001, + "sup_loss": 1.7502938192568702 + }, + { + "step": 124, + "lr": 0.0001, + "sup_loss": 1.7474968410988128 + }, + { + "step": 125, + "lr": 0.0001, + "sup_loss": 1.741263138180546, + "lyap1_mean": -6.289633750915527, + "lyap1_max": -6.271181106567383, + "lyap_spec_mean": [ + -6.289633750915527, + -6.290492534637451 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 126, + "lr": 0.0001, + "sup_loss": 1.7211279276855354 + }, + { + "step": 127, + "lr": 0.0001, + "sup_loss": 1.7073139543990044 + }, + { + "step": 128, + "lr": 0.0001, + "sup_loss": 1.7141654935069126 + }, + { + "step": 129, + "lr": 0.0001, + "sup_loss": 1.7005028367708452 + }, + { + "step": 130, + "lr": 0.0001, + "sup_loss": 1.6860979041501705 + }, + { + "step": 131, + "lr": 0.0001, + "sup_loss": 1.7066609179775223 + }, + { + "step": 132, + "lr": 0.0001, + "sup_loss": 1.6979615420852867 + }, + { + "step": 133, + "lr": 0.0001, + "sup_loss": 1.7273194417800906 + }, + { + "step": 134, + "lr": 0.0001, + "sup_loss": 1.7047646668485257 + }, + { + "step": 135, + "lr": 0.0001, + "sup_loss": 1.716210852207255 + }, + { + "step": 136, + "lr": 0.0001, + "sup_loss": 1.7307658980947846 + }, + { + "step": 137, + "lr": 0.0001, + "sup_loss": 1.711602512894522 + }, + { + "step": 138, + "lr": 0.0001, + "sup_loss": 1.7057358493816908 + }, + { + "step": 139, + "lr": 0.0001, + "sup_loss": 1.688446466585652 + }, + { + "step": 140, + "lr": 0.0001, + "sup_loss": 1.704572577683926 + }, + { + "step": 141, + "lr": 0.0001, + "sup_loss": 1.7304753449546095 + }, + { + "step": 142, + "lr": 0.0001, + "sup_loss": 1.6883580864485974 + }, + { + "step": 143, + "lr": 0.0001, + "sup_loss": 1.6718549694522848 + }, + { + "step": 144, + "lr": 0.0001, + "sup_loss": 1.7193390482772282 + }, + { + "step": 145, + "lr": 0.0001, + "sup_loss": 1.6713528463158704 + }, + { + "step": 146, + "lr": 0.0001, + "sup_loss": 1.6959659224268964 + }, + { + "step": 147, + "lr": 0.0001, + "sup_loss": 1.6890260775715398 + }, + { + "step": 148, + "lr": 0.0001, + "sup_loss": 1.6248832250680991 + }, + { + "step": 149, + "lr": 0.0001, + "sup_loss": 1.654215549400142 + }, + { + "step": 150, + "lr": 0.0001, + "sup_loss": 1.6615707449121377, + "lyap1_mean": -6.306196689605713, + "lyap1_max": -6.2920122146606445, + "lyap_spec_mean": [ + -6.306196212768555, + -6.305814266204834 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 151, + "lr": 0.0001, + "sup_loss": 1.6810928502731424 + }, + { + "step": 152, + "lr": 0.0001, + "sup_loss": 1.6695774965144798 + }, + { + "step": 153, + "lr": 0.0001, + "sup_loss": 1.6444260938397506 + }, + { + "step": 154, + "lr": 0.0001, + "sup_loss": 1.6821734376746678 + }, + { + "step": 155, + "lr": 0.0001, + "sup_loss": 1.6728631290634948 + }, + { + "step": 156, + "lr": 0.0001, + "sup_loss": 1.6820655261417026 + }, + { + "step": 157, + "lr": 0.0001, + "sup_loss": 1.6516146373078011 + }, + { + "step": 158, + "lr": 0.0001, + "sup_loss": 1.6530359901341733 + }, + { + "step": 159, + "lr": 0.0001, + "sup_loss": 1.684539741706318 + }, + { + "step": 160, + "lr": 0.0001, + "sup_loss": 1.6740305810886589 + }, + { + "step": 161, + "lr": 0.0001, + "sup_loss": 1.60870367198615 + }, + { + "step": 162, + "lr": 0.0001, + "sup_loss": 1.6475936644805484 + }, + { + "step": 163, + "lr": 0.0001, + "sup_loss": 1.6515501185205237 + }, + { + "step": 164, + "lr": 0.0001, + "sup_loss": 1.6180064545910822 + }, + { + "step": 165, + "lr": 0.0001, + "sup_loss": 1.6406713466514131 + }, + { + "step": 166, + "lr": 0.0001, + "sup_loss": 1.6452356602937375 + }, + { + "step": 167, + "lr": 0.0001, + "sup_loss": 1.6201243770891205 + }, + { + "step": 168, + "lr": 0.0001, + "sup_loss": 1.665671342076336 + }, + { + "step": 169, + "lr": 0.0001, + "sup_loss": 1.6228934612102783 + }, + { + "step": 170, + "lr": 0.0001, + "sup_loss": 1.6389439525875207 + }, + { + "step": 171, + "lr": 0.0001, + "sup_loss": 1.6387175869530952 + }, + { + "step": 172, + "lr": 0.0001, + "sup_loss": 1.6356821068128287 + }, + { + "step": 173, + "lr": 0.0001, + "sup_loss": 1.6217340583613125 + }, + { + "step": 174, + "lr": 0.0001, + "sup_loss": 1.612851693540602 + }, + { + "step": 175, + "lr": 0.0001, + "sup_loss": 1.6110975775679166, + "lyap1_mean": -6.304257392883301, + "lyap1_max": -6.284839153289795, + "lyap_spec_mean": [ + -6.304257392883301, + -6.30750846862793 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 176, + "lr": 0.0001, + "sup_loss": 1.61914691618028 + }, + { + "step": 177, + "lr": 0.0001, + "sup_loss": 1.618533874469112 + }, + { + "step": 178, + "lr": 0.0001, + "sup_loss": 1.6225511296653723 + }, + { + "step": 179, + "lr": 0.0001, + "sup_loss": 1.6409765068011015 + }, + { + "step": 180, + "lr": 0.0001, + "sup_loss": 1.6209402621335396 + }, + { + "step": 181, + "lr": 0.0001, + "sup_loss": 1.5903578498662685 + }, + { + "step": 182, + "lr": 0.0001, + "sup_loss": 1.6243800011765677 + }, + { + "step": 183, + "lr": 0.0001, + "sup_loss": 1.6207408935168828 + }, + { + "step": 184, + "lr": 0.0001, + "sup_loss": 1.6102868840186222 + }, + { + "step": 185, + "lr": 0.0001, + "sup_loss": 1.6001848591227634 + }, + { + "step": 186, + "lr": 0.0001, + "sup_loss": 1.572493708893578 + }, + { + "step": 187, + "lr": 0.0001, + "sup_loss": 1.612159538574632 + }, + { + "step": 188, + "lr": 0.0001, + "sup_loss": 1.5911217530125168 + }, + { + "step": 189, + "lr": 0.0001, + "sup_loss": 1.6293490369187993 + }, + { + "step": 190, + "lr": 0.0001, + "sup_loss": 1.6040246983038626 + }, + { + "step": 191, + "lr": 0.0001, + "sup_loss": 1.6171197527958248 + }, + { + "step": 192, + "lr": 0.0001, + "sup_loss": 1.6471899354369277 + }, + { + "step": 193, + "lr": 0.0001, + "sup_loss": 1.6034272540262462 + }, + { + "step": 194, + "lr": 0.0001, + "sup_loss": 1.624265957837026 + }, + { + "step": 195, + "lr": 0.0001, + "sup_loss": 1.6315334681058467 + }, + { + "step": 196, + "lr": 0.0001, + "sup_loss": 1.6182992051901102 + }, + { + "step": 197, + "lr": 0.0001, + "sup_loss": 1.6026089533859447 + }, + { + "step": 198, + "lr": 0.0001, + "sup_loss": 1.6068981420107973 + }, + { + "step": 199, + "lr": 0.0001, + "sup_loss": 1.6226155394597461 + }, + { + "step": 200, + "lr": 0.0001, + "sup_loss": 1.5908226672539212, + "lyap1_mean": -6.314453125, + "lyap1_max": -6.295107841491699, + "lyap_spec_mean": [ + -6.314453601837158, + -6.311760902404785 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 201, + "lr": 0.0001, + "sup_loss": 1.614759653061085 + }, + { + "step": 202, + "lr": 0.0001, + "sup_loss": 1.607828428395806 + }, + { + "step": 203, + "lr": 0.0001, + "sup_loss": 1.6031055264743805 + }, + { + "step": 204, + "lr": 0.0001, + "sup_loss": 1.62692512809244 + }, + { + "step": 205, + "lr": 0.0001, + "sup_loss": 1.6197515599744592 + }, + { + "step": 206, + "lr": 0.0001, + "sup_loss": 1.5945279526481753 + }, + { + "step": 207, + "lr": 0.0001, + "sup_loss": 1.600027352742842 + }, + { + "step": 208, + "lr": 0.0001, + "sup_loss": 1.6231346460135334 + }, + { + "step": 209, + "lr": 0.0001, + "sup_loss": 1.6045676349961688 + }, + { + "step": 210, + "lr": 0.0001, + "sup_loss": 1.612343899125989 + }, + { + "step": 211, + "lr": 0.0001, + "sup_loss": 1.572819151638782 + }, + { + "step": 212, + "lr": 0.0001, + "sup_loss": 1.5998946197093518 + }, + { + "step": 213, + "lr": 0.0001, + "sup_loss": 1.603872718653751 + }, + { + "step": 214, + "lr": 0.0001, + "sup_loss": 1.6064486221293404 + }, + { + "step": 215, + "lr": 0.0001, + "sup_loss": 1.5816140347137049 + }, + { + "step": 216, + "lr": 0.0001, + "sup_loss": 1.623563311041461 + }, + { + "step": 217, + "lr": 0.0001, + "sup_loss": 1.604757450088786 + }, + { + "step": 218, + "lr": 0.0001, + "sup_loss": 1.5856211225488168 + }, + { + "step": 219, + "lr": 0.0001, + "sup_loss": 1.5971583490656518 + }, + { + "step": 220, + "lr": 0.0001, + "sup_loss": 1.62120464502734 + }, + { + "step": 221, + "lr": 0.0001, + "sup_loss": 1.5521333505669912 + }, + { + "step": 222, + "lr": 0.0001, + "sup_loss": 1.5910480072140367 + }, + { + "step": 223, + "lr": 0.0001, + "sup_loss": 1.5878960555935198 + }, + { + "step": 224, + "lr": 0.0001, + "sup_loss": 1.52525868655689 + }, + { + "step": 225, + "lr": 0.0001, + "sup_loss": 1.6046161077785628, + "lyap1_mean": -6.307826995849609, + "lyap1_max": -6.286316394805908, + "lyap_spec_mean": [ + -6.307826995849609, + -6.311221122741699 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 226, + "lr": 0.0001, + "sup_loss": 1.5787563174285753 + }, + { + "step": 227, + "lr": 0.0001, + "sup_loss": 1.6046591313503475 + }, + { + "step": 228, + "lr": 0.0001, + "sup_loss": 1.6058233574528387 + }, + { + "step": 229, + "lr": 0.0001, + "sup_loss": 1.5708493836588395 + }, + { + "step": 230, + "lr": 0.0001, + "sup_loss": 1.5522555239571714 + }, + { + "step": 231, + "lr": 0.0001, + "sup_loss": 1.5866450454598817 + }, + { + "step": 232, + "lr": 0.0001, + "sup_loss": 1.5833501739262028 + }, + { + "step": 233, + "lr": 0.0001, + "sup_loss": 1.5781072194587922 + }, + { + "step": 234, + "lr": 0.0001, + "sup_loss": 1.5801195205359906 + }, + { + "step": 235, + "lr": 0.0001, + "sup_loss": 1.6097866642525447 + }, + { + "step": 236, + "lr": 0.0001, + "sup_loss": 1.5776707740181835 + }, + { + "step": 237, + "lr": 0.0001, + "sup_loss": 1.575370573452357 + }, + { + "step": 238, + "lr": 0.0001, + "sup_loss": 1.62187735175436 + }, + { + "step": 239, + "lr": 0.0001, + "sup_loss": 1.5719111789223998 + }, + { + "step": 240, + "lr": 0.0001, + "sup_loss": 1.6043917104768717 + }, + { + "step": 241, + "lr": 0.0001, + "sup_loss": 1.6115212118102145 + }, + { + "step": 242, + "lr": 0.0001, + "sup_loss": 1.5726522914277772 + }, + { + "step": 243, + "lr": 0.0001, + "sup_loss": 1.5747379788820692 + }, + { + "step": 244, + "lr": 0.0001, + "sup_loss": 1.602347413668085 + }, + { + "step": 245, + "lr": 0.0001, + "sup_loss": 1.561972573460076 + }, + { + "step": 246, + "lr": 0.0001, + "sup_loss": 1.579944843245691 + }, + { + "step": 247, + "lr": 0.0001, + "sup_loss": 1.56607517962854 + }, + { + "step": 248, + "lr": 0.0001, + "sup_loss": 1.5602422723452187 + }, + { + "step": 249, + "lr": 0.0001, + "sup_loss": 1.5481532506755131 + }, + { + "step": 250, + "lr": 0.0001, + "sup_loss": 1.6282572215428823, + "lyap1_mean": -6.311943054199219, + "lyap1_max": -6.285268306732178, + "lyap_spec_mean": [ + -6.311943054199219, + -6.315288543701172 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 251, + "lr": 0.0001, + "sup_loss": 1.5718172405729163 + }, + { + "step": 252, + "lr": 0.0001, + "sup_loss": 1.568016116494461 + }, + { + "step": 253, + "lr": 0.0001, + "sup_loss": 1.5892380828003114 + }, + { + "step": 254, + "lr": 0.0001, + "sup_loss": 1.5607691734001938 + }, + { + "step": 255, + "lr": 0.0001, + "sup_loss": 1.5788166224967786 + }, + { + "step": 256, + "lr": 0.0001, + "sup_loss": 1.5587572791400528 + }, + { + "step": 257, + "lr": 0.0001, + "sup_loss": 1.56509858619077 + }, + { + "step": 258, + "lr": 0.0001, + "sup_loss": 1.5588711212966218 + }, + { + "step": 259, + "lr": 0.0001, + "sup_loss": 1.579683401292984 + }, + { + "step": 260, + "lr": 0.0001, + "sup_loss": 1.5572354648170486 + }, + { + "step": 261, + "lr": 0.0001, + "sup_loss": 1.569087077453923 + }, + { + "step": 262, + "lr": 0.0001, + "sup_loss": 1.552346801093667 + }, + { + "step": 263, + "lr": 0.0001, + "sup_loss": 1.5730190896596785 + }, + { + "step": 264, + "lr": 0.0001, + "sup_loss": 1.5592950476448364 + }, + { + "step": 265, + "lr": 0.0001, + "sup_loss": 1.578283494419273 + }, + { + "step": 266, + "lr": 0.0001, + "sup_loss": 1.5317856999633797 + }, + { + "step": 267, + "lr": 0.0001, + "sup_loss": 1.5859820808572964 + }, + { + "step": 268, + "lr": 0.0001, + "sup_loss": 1.5849993822920676 + }, + { + "step": 269, + "lr": 0.0001, + "sup_loss": 1.5748490996922742 + }, + { + "step": 270, + "lr": 0.0001, + "sup_loss": 1.5941484134857486 + }, + { + "step": 271, + "lr": 0.0001, + "sup_loss": 1.5520565621157805 + }, + { + "step": 272, + "lr": 0.0001, + "sup_loss": 1.5736600032776027 + }, + { + "step": 273, + "lr": 0.0001, + "sup_loss": 1.5703344026318378 + }, + { + "step": 274, + "lr": 0.0001, + "sup_loss": 1.532767851458178 + }, + { + "step": 275, + "lr": 0.0001, + "sup_loss": 1.5764611628543121, + "lyap1_mean": -6.288212776184082, + "lyap1_max": -6.271989822387695, + "lyap_spec_mean": [ + -6.288212776184082, + -6.290098190307617 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 276, + "lr": 0.0001, + "sup_loss": 1.5627879826848854 + }, + { + "step": 277, + "lr": 0.0001, + "sup_loss": 1.5565442095845055 + }, + { + "step": 278, + "lr": 0.0001, + "sup_loss": 1.5479018965033426 + }, + { + "step": 279, + "lr": 0.0001, + "sup_loss": 1.542800690022451 + }, + { + "step": 280, + "lr": 0.0001, + "sup_loss": 1.5708411044949124 + }, + { + "step": 281, + "lr": 0.0001, + "sup_loss": 1.5704922914677404 + }, + { + "step": 282, + "lr": 0.0001, + "sup_loss": 1.548503892204476 + }, + { + "step": 283, + "lr": 0.0001, + "sup_loss": 1.557741653174852 + }, + { + "step": 284, + "lr": 0.0001, + "sup_loss": 1.5737748118594674 + }, + { + "step": 285, + "lr": 0.0001, + "sup_loss": 1.5217131287518002 + }, + { + "step": 286, + "lr": 0.0001, + "sup_loss": 1.5508766477654388 + }, + { + "step": 287, + "lr": 0.0001, + "sup_loss": 1.5681149739226237 + }, + { + "step": 288, + "lr": 0.0001, + "sup_loss": 1.533574656235852 + }, + { + "step": 289, + "lr": 0.0001, + "sup_loss": 1.5711439676427719 + }, + { + "step": 290, + "lr": 0.0001, + "sup_loss": 1.5541693707191668 + }, + { + "step": 291, + "lr": 0.0001, + "sup_loss": 1.53935113280687 + }, + { + "step": 292, + "lr": 0.0001, + "sup_loss": 1.5477303077305633 + }, + { + "step": 293, + "lr": 0.0001, + "sup_loss": 1.5411001058204383 + }, + { + "step": 294, + "lr": 0.0001, + "sup_loss": 1.566108745714733 + }, + { + "step": 295, + "lr": 0.0001, + "sup_loss": 1.5431387629950557 + }, + { + "step": 296, + "lr": 0.0001, + "sup_loss": 1.5719911366769372 + }, + { + "step": 297, + "lr": 0.0001, + "sup_loss": 1.5459029921829308 + }, + { + "step": 298, + "lr": 0.0001, + "sup_loss": 1.5684083943193499 + }, + { + "step": 299, + "lr": 0.0001, + "sup_loss": 1.5552347272080564 + }, + { + "step": 300, + "lr": 0.0001, + "sup_loss": 1.5612322177126692, + "lyap1_mean": -6.289813995361328, + "lyap1_max": -6.263297080993652, + "lyap_spec_mean": [ + -6.289813995361328, + -6.282321453094482 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 301, + "lr": 0.0001, + "sup_loss": 1.5511914784971887 + }, + { + "step": 302, + "lr": 0.0001, + "sup_loss": 1.5731978045920958 + }, + { + "step": 303, + "lr": 0.0001, + "sup_loss": 1.5566588069651133 + }, + { + "step": 304, + "lr": 0.0001, + "sup_loss": 1.5435834816545064 + }, + { + "step": 305, + "lr": 0.0001, + "sup_loss": 1.5338201112718275 + }, + { + "step": 306, + "lr": 0.0001, + "sup_loss": 1.5620164001408299 + }, + { + "step": 307, + "lr": 0.0001, + "sup_loss": 1.5461411309298845 + }, + { + "step": 308, + "lr": 0.0001, + "sup_loss": 1.556008165963498 + }, + { + "step": 309, + "lr": 0.0001, + "sup_loss": 1.5554833750200956 + }, + { + "step": 310, + "lr": 0.0001, + "sup_loss": 1.568397089848242 + }, + { + "step": 311, + "lr": 0.0001, + "sup_loss": 1.5708716932718876 + }, + { + "step": 312, + "lr": 0.0001, + "sup_loss": 1.5195926766923842 + }, + { + "step": 313, + "lr": 0.0001, + "sup_loss": 1.52811400474664 + }, + { + "step": 314, + "lr": 0.0001, + "sup_loss": 1.5539360130593718 + }, + { + "step": 315, + "lr": 0.0001, + "sup_loss": 1.5706093056068795 + }, + { + "step": 316, + "lr": 0.0001, + "sup_loss": 1.557120468837921 + }, + { + "step": 317, + "lr": 0.0001, + "sup_loss": 1.5822501082816158 + }, + { + "step": 318, + "lr": 0.0001, + "sup_loss": 1.542841458798004 + }, + { + "step": 319, + "lr": 0.0001, + "sup_loss": 1.5563868638207532 + }, + { + "step": 320, + "lr": 0.0001, + "sup_loss": 1.5360635360486008 + }, + { + "step": 321, + "lr": 0.0001, + "sup_loss": 1.5459013446151262 + }, + { + "step": 322, + "lr": 0.0001, + "sup_loss": 1.5867201314501376 + }, + { + "step": 323, + "lr": 0.0001, + "sup_loss": 1.5681245372992967 + }, + { + "step": 324, + "lr": 0.0001, + "sup_loss": 1.545037808734181 + }, + { + "step": 325, + "lr": 0.0001, + "sup_loss": 1.5807109835114312, + "lyap1_mean": -6.280990123748779, + "lyap1_max": -6.268655776977539, + "lyap_spec_mean": [ + -6.2809906005859375, + -6.283902168273926 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 326, + "lr": 0.0001, + "sup_loss": 1.5361487778088874 + }, + { + "step": 327, + "lr": 0.0001, + "sup_loss": 1.5569850983489886 + }, + { + "step": 328, + "lr": 0.0001, + "sup_loss": 1.5468934937491015 + }, + { + "step": 329, + "lr": 0.0001, + "sup_loss": 1.5269107579149266 + }, + { + "step": 330, + "lr": 0.0001, + "sup_loss": 1.5308932888735198 + }, + { + "step": 331, + "lr": 0.0001, + "sup_loss": 1.546140456130772 + }, + { + "step": 332, + "lr": 0.0001, + "sup_loss": 1.556001616894801 + }, + { + "step": 333, + "lr": 0.0001, + "sup_loss": 1.5160081181707756 + }, + { + "step": 334, + "lr": 0.0001, + "sup_loss": 1.5370807622724176 + }, + { + "step": 335, + "lr": 0.0001, + "sup_loss": 1.5463874980357313 + }, + { + "step": 336, + "lr": 0.0001, + "sup_loss": 1.527108314657003 + }, + { + "step": 337, + "lr": 0.0001, + "sup_loss": 1.5391164403451145 + }, + { + "step": 338, + "lr": 0.0001, + "sup_loss": 1.5288020191539669 + }, + { + "step": 339, + "lr": 0.0001, + "sup_loss": 1.544628633865707 + }, + { + "step": 340, + "lr": 0.0001, + "sup_loss": 1.5508991977799556 + }, + { + "step": 341, + "lr": 0.0001, + "sup_loss": 1.55731107953372 + }, + { + "step": 342, + "lr": 0.0001, + "sup_loss": 1.5502437895717247 + }, + { + "step": 343, + "lr": 0.0001, + "sup_loss": 1.5113026766574436 + }, + { + "step": 344, + "lr": 0.0001, + "sup_loss": 1.534505224713083 + }, + { + "step": 345, + "lr": 0.0001, + "sup_loss": 1.5664212448478674 + }, + { + "step": 346, + "lr": 0.0001, + "sup_loss": 1.5343774635089513 + }, + { + "step": 347, + "lr": 0.0001, + "sup_loss": 1.523872870702963 + }, + { + "step": 348, + "lr": 0.0001, + "sup_loss": 1.520222813804984 + }, + { + "step": 349, + "lr": 0.0001, + "sup_loss": 1.5100668329858764 + }, + { + "step": 350, + "lr": 0.0001, + "sup_loss": 1.5462873210196821, + "lyap1_mean": -6.279208660125732, + "lyap1_max": -6.246695518493652, + "lyap_spec_mean": [ + -6.279209136962891, + -6.2811126708984375 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 351, + "lr": 0.0001, + "sup_loss": 1.5592996002285948 + }, + { + "step": 352, + "lr": 0.0001, + "sup_loss": 1.513165697344076 + }, + { + "step": 353, + "lr": 0.0001, + "sup_loss": 1.5556921340697518 + }, + { + "step": 354, + "lr": 0.0001, + "sup_loss": 1.558373381312493 + }, + { + "step": 355, + "lr": 0.0001, + "sup_loss": 1.5547521752157896 + }, + { + "step": 356, + "lr": 0.0001, + "sup_loss": 1.5482859037251404 + }, + { + "step": 357, + "lr": 0.0001, + "sup_loss": 1.5541198311222053 + }, + { + "step": 358, + "lr": 0.0001, + "sup_loss": 1.513929981198374 + }, + { + "step": 359, + "lr": 0.0001, + "sup_loss": 1.5386774539004036 + }, + { + "step": 360, + "lr": 0.0001, + "sup_loss": 1.5418122629340056 + }, + { + "step": 361, + "lr": 0.0001, + "sup_loss": 1.5336020776983843 + }, + { + "step": 362, + "lr": 0.0001, + "sup_loss": 1.5442231828959216 + }, + { + "step": 363, + "lr": 0.0001, + "sup_loss": 1.5474763280659718 + }, + { + "step": 364, + "lr": 0.0001, + "sup_loss": 1.5401739408673039 + }, + { + "step": 365, + "lr": 0.0001, + "sup_loss": 1.5340803578606583 + }, + { + "step": 366, + "lr": 0.0001, + "sup_loss": 1.5137462377878363 + }, + { + "step": 367, + "lr": 0.0001, + "sup_loss": 1.5490127083318859 + }, + { + "step": 368, + "lr": 0.0001, + "sup_loss": 1.562103831097833 + }, + { + "step": 369, + "lr": 0.0001, + "sup_loss": 1.5230020136679903 + }, + { + "step": 370, + "lr": 0.0001, + "sup_loss": 1.519926874355663 + }, + { + "step": 371, + "lr": 0.0001, + "sup_loss": 1.525188796570022 + }, + { + "step": 372, + "lr": 0.0001, + "sup_loss": 1.5389628270751545 + }, + { + "step": 373, + "lr": 0.0001, + "sup_loss": 1.5206521555610368 + }, + { + "step": 374, + "lr": 0.0001, + "sup_loss": 1.5910327063155232 + }, + { + "step": 375, + "lr": 0.0001, + "sup_loss": 1.5517228598442367, + "lyap1_mean": -6.272121906280518, + "lyap1_max": -6.257327079772949, + "lyap_spec_mean": [ + -6.272121906280518, + -6.272157669067383 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 376, + "lr": 0.0001, + "sup_loss": 1.5083365432341427 + }, + { + "step": 377, + "lr": 0.0001, + "sup_loss": 1.5351844511019792 + }, + { + "step": 378, + "lr": 0.0001, + "sup_loss": 1.5185103477122763 + }, + { + "step": 379, + "lr": 0.0001, + "sup_loss": 1.5313015037319202 + }, + { + "step": 380, + "lr": 0.0001, + "sup_loss": 1.6042587657443281 + }, + { + "step": 381, + "lr": 0.0001, + "sup_loss": 1.5148778440809103 + }, + { + "step": 382, + "lr": 0.0001, + "sup_loss": 1.547646792356559 + }, + { + "step": 383, + "lr": 0.0001, + "sup_loss": 1.5043766820879998 + }, + { + "step": 384, + "lr": 0.0001, + "sup_loss": 1.517374048269513 + }, + { + "step": 385, + "lr": 0.0001, + "sup_loss": 1.510813145580335 + }, + { + "step": 386, + "lr": 0.0001, + "sup_loss": 1.5343651897756998 + }, + { + "step": 387, + "lr": 0.0001, + "sup_loss": 1.5567518054948828 + }, + { + "step": 388, + "lr": 0.0001, + "sup_loss": 1.556583401528102 + }, + { + "step": 389, + "lr": 0.0001, + "sup_loss": 1.5469114160109558 + }, + { + "step": 390, + "lr": 0.0001, + "sup_loss": 1.5066436477329854 + }, + { + "step": 391, + "lr": 0.0001, + "sup_loss": 1.5363803613218583 + }, + { + "step": 392, + "lr": 0.0001, + "sup_loss": 1.5623538263511423 + }, + { + "step": 393, + "lr": 0.0001, + "sup_loss": 1.5509992112423387 + }, + { + "step": 394, + "lr": 0.0001, + "sup_loss": 1.5553260421123227 + }, + { + "step": 395, + "lr": 0.0001, + "sup_loss": 1.5360717974523455 + }, + { + "step": 396, + "lr": 0.0001, + "sup_loss": 1.5299607690174781 + }, + { + "step": 397, + "lr": 0.0001, + "sup_loss": 1.4931204366796695 + }, + { + "step": 398, + "lr": 0.0001, + "sup_loss": 1.5352384127840748 + }, + { + "step": 399, + "lr": 0.0001, + "sup_loss": 1.5222885300656976 + }, + { + "step": 400, + "lr": 0.0001, + "sup_loss": 1.508831153333313, + "lyap1_mean": -6.27598237991333, + "lyap1_max": -6.24452018737793, + "lyap_spec_mean": [ + -6.275981903076172, + -6.269106388092041 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 401, + "lr": 0.0001, + "sup_loss": 1.5349017639824347 + }, + { + "step": 402, + "lr": 0.0001, + "sup_loss": 1.548015367001545 + }, + { + "step": 403, + "lr": 0.0001, + "sup_loss": 1.5078454672949264 + }, + { + "step": 404, + "lr": 0.0001, + "sup_loss": 1.5185558897396196 + }, + { + "step": 405, + "lr": 0.0001, + "sup_loss": 1.5512310382424748 + }, + { + "step": 406, + "lr": 0.0001, + "sup_loss": 1.5304067426341317 + }, + { + "step": 407, + "lr": 0.0001, + "sup_loss": 1.5183398062918037 + }, + { + "step": 408, + "lr": 0.0001, + "sup_loss": 1.5387815991781382 + }, + { + "step": 409, + "lr": 0.0001, + "sup_loss": 1.5672918045341844 + }, + { + "step": 410, + "lr": 0.0001, + "sup_loss": 1.5307374295242138 + }, + { + "step": 411, + "lr": 0.0001, + "sup_loss": 1.544239649750348 + }, + { + "step": 412, + "lr": 0.0001, + "sup_loss": 1.5372248394722121 + }, + { + "step": 413, + "lr": 0.0001, + "sup_loss": 1.546755674066759 + }, + { + "step": 414, + "lr": 0.0001, + "sup_loss": 1.5602227617649562 + }, + { + "step": 415, + "lr": 0.0001, + "sup_loss": 1.5235959308536382 + }, + { + "step": 416, + "lr": 0.0001, + "sup_loss": 1.54999620670243 + }, + { + "step": 417, + "lr": 0.0001, + "sup_loss": 1.5595363483742926 + }, + { + "step": 418, + "lr": 0.0001, + "sup_loss": 1.5429251013869647 + }, + { + "step": 419, + "lr": 0.0001, + "sup_loss": 1.5263398785761602 + }, + { + "step": 420, + "lr": 0.0001, + "sup_loss": 1.5403264813934174 + }, + { + "step": 421, + "lr": 0.0001, + "sup_loss": 1.51891197855286 + }, + { + "step": 422, + "lr": 0.0001, + "sup_loss": 1.5330320818314984 + }, + { + "step": 423, + "lr": 0.0001, + "sup_loss": 1.5024285323810576 + }, + { + "step": 424, + "lr": 0.0001, + "sup_loss": 1.5529554719113352 + }, + { + "step": 425, + "lr": 0.0001, + "sup_loss": 1.519168167373818, + "lyap1_mean": -6.279496192932129, + "lyap1_max": -6.26162052154541, + "lyap_spec_mean": [ + -6.279496192932129, + -6.2808918952941895 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 426, + "lr": 0.0001, + "sup_loss": 1.5365064575118708 + }, + { + "step": 427, + "lr": 0.0001, + "sup_loss": 1.5222562257747192 + }, + { + "step": 428, + "lr": 0.0001, + "sup_loss": 1.519125449620276 + }, + { + "step": 429, + "lr": 0.0001, + "sup_loss": 1.529261269354274 + }, + { + "step": 430, + "lr": 0.0001, + "sup_loss": 1.52106984620442 + }, + { + "step": 431, + "lr": 0.0001, + "sup_loss": 1.542332523616496 + }, + { + "step": 432, + "lr": 0.0001, + "sup_loss": 1.5053887182166101 + }, + { + "step": 433, + "lr": 0.0001, + "sup_loss": 1.5287912390123242 + }, + { + "step": 434, + "lr": 0.0001, + "sup_loss": 1.4792699148804047 + }, + { + "step": 435, + "lr": 0.0001, + "sup_loss": 1.5175428625480079 + }, + { + "step": 436, + "lr": 0.0001, + "sup_loss": 1.5141915348633057 + }, + { + "step": 437, + "lr": 0.0001, + "sup_loss": 1.5021534012618962 + }, + { + "step": 438, + "lr": 0.0001, + "sup_loss": 1.515655711068608 + }, + { + "step": 439, + "lr": 0.0001, + "sup_loss": 1.5336998941946303 + }, + { + "step": 440, + "lr": 0.0001, + "sup_loss": 1.5709049462152431 + }, + { + "step": 441, + "lr": 0.0001, + "sup_loss": 1.5345619270812774 + }, + { + "step": 442, + "lr": 0.0001, + "sup_loss": 1.545632307238727 + }, + { + "step": 443, + "lr": 0.0001, + "sup_loss": 1.5348385538551335 + }, + { + "step": 444, + "lr": 0.0001, + "sup_loss": 1.5596831114341054 + }, + { + "step": 445, + "lr": 0.0001, + "sup_loss": 1.5213286739984129 + }, + { + "step": 446, + "lr": 0.0001, + "sup_loss": 1.507136513986345 + }, + { + "step": 447, + "lr": 0.0001, + "sup_loss": 1.5338240960099745 + }, + { + "step": 448, + "lr": 0.0001, + "sup_loss": 1.5335693962074035 + }, + { + "step": 449, + "lr": 0.0001, + "sup_loss": 1.5439888653044145 + }, + { + "step": 450, + "lr": 0.0001, + "sup_loss": 1.527999408648118, + "lyap1_mean": -6.28324031829834, + "lyap1_max": -6.266049385070801, + "lyap_spec_mean": [ + -6.28324031829834, + -6.283727169036865 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 451, + "lr": 0.0001, + "sup_loss": 1.5393156317969563 + }, + { + "step": 452, + "lr": 0.0001, + "sup_loss": 1.5240758554208984 + }, + { + "step": 453, + "lr": 0.0001, + "sup_loss": 1.5172760077652057 + }, + { + "step": 454, + "lr": 0.0001, + "sup_loss": 1.5435138079852262 + }, + { + "step": 455, + "lr": 0.0001, + "sup_loss": 1.553782064658805 + }, + { + "step": 456, + "lr": 0.0001, + "sup_loss": 1.4990875570761242 + }, + { + "step": 457, + "lr": 0.0001, + "sup_loss": 1.526767795262043 + }, + { + "step": 458, + "lr": 0.0001, + "sup_loss": 1.5074100872415515 + }, + { + "step": 459, + "lr": 0.0001, + "sup_loss": 1.5299431484804877 + }, + { + "step": 460, + "lr": 0.0001, + "sup_loss": 1.5411429028548818 + }, + { + "step": 461, + "lr": 0.0001, + "sup_loss": 1.5258069753257875 + }, + { + "step": 462, + "lr": 0.0001, + "sup_loss": 1.5130222736269014 + }, + { + "step": 463, + "lr": 0.0001, + "sup_loss": 1.59688740678216 + }, + { + "step": 464, + "lr": 0.0001, + "sup_loss": 1.5169865077256424 + }, + { + "step": 465, + "lr": 0.0001, + "sup_loss": 1.522381168712973 + }, + { + "step": 466, + "lr": 0.0001, + "sup_loss": 1.522439002600057 + }, + { + "step": 467, + "lr": 0.0001, + "sup_loss": 1.5190416825884472 + }, + { + "step": 468, + "lr": 0.0001, + "sup_loss": 1.5425338998993232 + }, + { + "step": 469, + "lr": 0.0001, + "sup_loss": 1.5452529428590425 + }, + { + "step": 470, + "lr": 0.0001, + "sup_loss": 1.5260539108805669 + }, + { + "step": 471, + "lr": 0.0001, + "sup_loss": 1.5488179073420898 + }, + { + "step": 472, + "lr": 0.0001, + "sup_loss": 1.5192884004268512 + }, + { + "step": 473, + "lr": 0.0001, + "sup_loss": 1.5055062362761682 + }, + { + "step": 474, + "lr": 0.0001, + "sup_loss": 1.4984221563948537 + }, + { + "step": 475, + "lr": 0.0001, + "sup_loss": 1.5320479692645415, + "lyap1_mean": -6.27734375, + "lyap1_max": -6.261284828186035, + "lyap_spec_mean": [ + -6.277344226837158, + -6.2793049812316895 + ], + "lyap_bound": -0.10536051565782628 + }, + { + "step": 476, + "lr": 0.0001, + "sup_loss": 1.538385270074266 + }, + { + "step": 477, + "lr": 0.0001, + "sup_loss": 1.4984740868654256 + }, + { + "step": 478, + "lr": 0.0001, + "sup_loss": 1.5450349077570704 + }, + { + "step": 479, + "lr": 0.0001, + "sup_loss": 1.5320467747126512 + }, + { + "step": 480, + "lr": 0.0001, + "sup_loss": 1.5451753778273984 + }, + { + "step": 481, + "lr": 0.0001, + "sup_loss": 1.5345277338401393 + }, + { + "step": 482, + "lr": 0.0001, + "sup_loss": 1.5576481611049358 + }, + { + "step": 483, + "lr": 0.0001, + "sup_loss": 1.525092799737688 + }, + { + "step": 484, + "lr": 0.0001, + "sup_loss": 1.5177981040203947 + }, + { + "step": 485, + "lr": 0.0001, + "sup_loss": 1.524682836990655 + }, + { + "step": 486, + "lr": 0.0001, + "sup_loss": 1.5577210090147655 + }, + { + "step": 487, + "lr": 0.0001, + "sup_loss": 1.5175527588503717 + }, + { + "step": 488, + "lr": 0.0001, + "sup_loss": 1.517884672177967 + }, + { + "step": 489, + "lr": 0.0001, + "sup_loss": 1.540989397132943 + }, + { + "step": 490, + "lr": 0.0001, + "sup_loss": 1.5207840823262242 + }, + { + "step": 491, + "lr": 0.0001, + "sup_loss": 1.5308205979089273 + }, + { + "step": 492, + "lr": 0.0001, + "sup_loss": 1.5343529235115525 + }, + { + "step": 493, + "lr": 0.0001, + "sup_loss": 1.4972792643274797 + }, + { + "step": 494, + "lr": 0.0001, + "sup_loss": 1.5436764958011016 + }, + { + "step": 495, + "lr": 0.0001, + "sup_loss": 1.5344808545549244 + }, + { + "step": 496, + "lr": 0.0001, + "sup_loss": 1.5101506555087487 + }, + { + "step": 497, + "lr": 0.0001, + "sup_loss": 1.5398357969125787 + }, + { + "step": 498, + "lr": 0.0001, + "sup_loss": 1.5341583392864167 + }, + { + "step": 499, + "lr": 0.0001, + "sup_loss": 1.5103523937991166 + } + ], + "evals": [ + { + "step": 0, + "acc": 0.0, + "tok_acc": 0.0 + }, + { + "step": 100, + "acc": 0.0, + "tok_acc": 0.38816550925925924 + }, + { + "step": 200, + "acc": 0.0, + "tok_acc": 0.3872974537037037 + }, + { + "step": 300, + "acc": 0.0, + "tok_acc": 0.3872974537037037 + }, + { + "step": 400, + "acc": 0.0, + "tok_acc": 0.39052854938271603 + }, + { + "step": 500, + "acc": 0.0, + "tok_acc": 0.3880208333333333 + } + ], + "final_acc": 0.0, + "final_tok_acc": 0.3880208333333333 +}
\ No newline at end of file |
