{ "args": { "data_path": "/home/yurenh2/rrm/data/sudoku-extreme-1k-aug-1000", "n_steps": 500, "batch_size": 8, "lr": 0.0001, "puzzle_emb_lr": 0.0001, "warmup_steps": 100, "weight_decay": 1.0, "hidden_size": 256, "n_iters": 6, "n_aol_layers": 2, "kappa": 0.9, "eta": 1.0, "alpha": 1.0, "k_lyap": 2, "lyap_iters": 4, "lyap_every": 25, "seed": 42, "eval_every": 100, "eval_n": 256, "eval_batch_size": 32, "out": "runs/srm_smoke_500_h256.json", "save_ckpt": "" }, "n_params": 939314, "initial_acc": 0.0, "initial_tok_acc": 0.0, "initial_lip": { "lip_emp_mean": 0.1357038915157318, "lip_emp_max": 0.17765390872955322, "lip_emp_99p": 0.17537692189216614, "lip_theoretical_bound": 0.9, "passes_bound": true }, "steps": [ { "step": 0, "lr": 0.0, "sup_loss": 2.8284496844095326, "lyap1_mean": -6.2286057472229, "lyap1_max": -6.200310230255127, "lyap_spec_mean": [ -6.2286057472229, -6.228340148925781 ], "lyap_bound": -0.10536051565782628 }, { "step": 1, "lr": 1e-06, "sup_loss": 2.838926445513094 }, { "step": 2, "lr": 2e-06, "sup_loss": 2.834020958290193 }, { "step": 3, "lr": 3e-06, "sup_loss": 2.8271419600543934 }, { "step": 4, "lr": 4e-06, "sup_loss": 2.8336220039837565 }, { "step": 5, "lr": 5e-06, "sup_loss": 2.847244138413309 }, { "step": 6, "lr": 6e-06, "sup_loss": 2.8296374251845307 }, { "step": 7, "lr": 7e-06, "sup_loss": 2.816413976469476 }, { "step": 8, "lr": 8e-06, "sup_loss": 2.8221126965976864 }, { "step": 9, "lr": 9e-06, "sup_loss": 2.8106848008287297 }, { "step": 10, "lr": 1e-05, "sup_loss": 2.8203775573541052 }, { "step": 11, "lr": 1.1000000000000001e-05, "sup_loss": 2.8181432855784427 }, { "step": 12, "lr": 1.2e-05, "sup_loss": 2.8234647595703923 }, { "step": 13, "lr": 1.3000000000000001e-05, "sup_loss": 2.819964976625202 }, { "step": 14, "lr": 1.4e-05, "sup_loss": 2.777240835924918 }, { "step": 15, "lr": 1.5e-05, "sup_loss": 2.796632874074624 }, { "step": 16, "lr": 1.6e-05, "sup_loss": 2.80353484864027 }, { "step": 17, "lr": 1.7e-05, "sup_loss": 2.7731115467511405 }, { "step": 18, "lr": 1.8e-05, "sup_loss": 2.776553477441689 }, { "step": 19, "lr": 1.9e-05, "sup_loss": 2.764454348935523 }, { "step": 20, "lr": 2e-05, "sup_loss": 2.763660193795498 }, { "step": 21, "lr": 2.1000000000000002e-05, "sup_loss": 2.7644818192133025 }, { "step": 22, "lr": 2.2000000000000003e-05, "sup_loss": 2.720537518373678 }, { "step": 23, "lr": 2.3e-05, "sup_loss": 2.7332803327454616 }, { "step": 24, "lr": 2.4e-05, "sup_loss": 2.7274532837280776 }, { "step": 25, "lr": 2.5e-05, "sup_loss": 2.7009521547955337, "lyap1_mean": -6.240983009338379, "lyap1_max": -6.22477388381958, "lyap_spec_mean": [ -6.240983009338379, -6.240518093109131 ], "lyap_bound": -0.10536051565782628 }, { "step": 26, "lr": 2.6000000000000002e-05, "sup_loss": 2.7118575888452168 }, { "step": 27, "lr": 2.7000000000000002e-05, "sup_loss": 2.676175428796176 }, { "step": 28, "lr": 2.8e-05, "sup_loss": 2.693807866858765 }, { "step": 29, "lr": 2.9000000000000004e-05, "sup_loss": 2.682955672998549 }, { "step": 30, "lr": 3e-05, "sup_loss": 2.654173913401533 }, { "step": 31, "lr": 3.1e-05, "sup_loss": 2.6763871059258495 }, { "step": 32, "lr": 3.2e-05, "sup_loss": 2.644394835134615 }, { "step": 33, "lr": 3.3e-05, "sup_loss": 2.6299395433077652 }, { "step": 34, "lr": 3.4e-05, "sup_loss": 2.6242866722739664 }, { "step": 35, "lr": 3.5000000000000004e-05, "sup_loss": 2.616557927761839 }, { "step": 36, "lr": 3.6e-05, "sup_loss": 2.5994089813723797 }, { "step": 37, "lr": 3.7000000000000005e-05, "sup_loss": 2.5626432829550247 }, { "step": 38, "lr": 3.8e-05, "sup_loss": 2.5606932168456327 }, { "step": 39, "lr": 3.9e-05, "sup_loss": 2.546780039790928 }, { "step": 40, "lr": 4e-05, "sup_loss": 2.520369392127516 }, { "step": 41, "lr": 4.1e-05, "sup_loss": 2.5127611686076583 }, { "step": 42, "lr": 4.2000000000000004e-05, "sup_loss": 2.5208038257044745 }, { "step": 43, "lr": 4.3e-05, "sup_loss": 2.493714124425292 }, { "step": 44, "lr": 4.4000000000000006e-05, "sup_loss": 2.480158985877274 }, { "step": 45, "lr": 4.5e-05, "sup_loss": 2.4361940027334783 }, { "step": 46, "lr": 4.6e-05, "sup_loss": 2.4460679490083947 }, { "step": 47, "lr": 4.7000000000000004e-05, "sup_loss": 2.4495761120800132 }, { "step": 48, "lr": 4.8e-05, "sup_loss": 2.4131285345159026 }, { "step": 49, "lr": 4.9e-05, "sup_loss": 2.3999822324103657 }, { "step": 50, "lr": 5e-05, "sup_loss": 2.4029195286821534, "lyap1_mean": -6.2544169425964355, "lyap1_max": -6.231961250305176, "lyap_spec_mean": [ -6.2544169425964355, -6.252962112426758 ], "lyap_bound": -0.10536051565782628 }, { "step": 51, "lr": 5.1000000000000006e-05, "sup_loss": 2.366847464160111 }, { "step": 52, "lr": 5.2000000000000004e-05, "sup_loss": 2.33094637470472 }, { "step": 53, "lr": 5.3e-05, "sup_loss": 2.329777112486255 }, { "step": 54, "lr": 5.4000000000000005e-05, "sup_loss": 2.334095543782154 }, { "step": 55, "lr": 5.500000000000001e-05, "sup_loss": 2.3051139076185523 }, { "step": 56, "lr": 5.6e-05, "sup_loss": 2.3167502882455797 }, { "step": 57, "lr": 5.7e-05, "sup_loss": 2.2903970820408137 }, { "step": 58, "lr": 5.800000000000001e-05, "sup_loss": 2.274886117171231 }, { "step": 59, "lr": 5.9e-05, "sup_loss": 2.2313597888611825 }, { "step": 60, "lr": 6e-05, "sup_loss": 2.2339985174568273 }, { "step": 61, "lr": 6.1000000000000005e-05, "sup_loss": 2.223536861505 }, { "step": 62, "lr": 6.2e-05, "sup_loss": 2.201599151730311 }, { "step": 63, "lr": 6.3e-05, "sup_loss": 2.1832543882469557 }, { "step": 64, "lr": 6.4e-05, "sup_loss": 2.1580907568451795 }, { "step": 65, "lr": 6.500000000000001e-05, "sup_loss": 2.1679979832095837 }, { "step": 66, "lr": 6.6e-05, "sup_loss": 2.151159814374817 }, { "step": 67, "lr": 6.7e-05, "sup_loss": 2.132176910649508 }, { "step": 68, "lr": 6.8e-05, "sup_loss": 2.123453724015459 }, { "step": 69, "lr": 6.900000000000001e-05, "sup_loss": 2.100389289485579 }, { "step": 70, "lr": 7.000000000000001e-05, "sup_loss": 2.100101099728348 }, { "step": 71, "lr": 7.1e-05, "sup_loss": 2.082929395622361 }, { "step": 72, "lr": 7.2e-05, "sup_loss": 2.0613085193351983 }, { "step": 73, "lr": 7.3e-05, "sup_loss": 2.043758664648479 }, { "step": 74, "lr": 7.400000000000001e-05, "sup_loss": 2.0471150778750125 }, { "step": 75, "lr": 7.500000000000001e-05, "sup_loss": 2.043438499353482, "lyap1_mean": -6.26137638092041, "lyap1_max": -6.241876125335693, "lyap_spec_mean": [ -6.261376857757568, -6.260560035705566 ], "lyap_bound": -0.10536051565782628 }, { "step": 76, "lr": 7.6e-05, "sup_loss": 2.0290594292513395 }, { "step": 77, "lr": 7.7e-05, "sup_loss": 2.017350256019743 }, { "step": 78, "lr": 7.8e-05, "sup_loss": 2.010315789581182 }, { "step": 79, "lr": 7.900000000000001e-05, "sup_loss": 2.007747901168263 }, { "step": 80, "lr": 8e-05, "sup_loss": 1.9878722973916485 }, { "step": 81, "lr": 8.099999999999999e-05, "sup_loss": 1.9820387430330704 }, { "step": 82, "lr": 8.2e-05, "sup_loss": 1.960933098122315 }, { "step": 83, "lr": 8.3e-05, "sup_loss": 1.9703326768508003 }, { "step": 84, "lr": 8.400000000000001e-05, "sup_loss": 1.9608279961175652 }, { "step": 85, "lr": 8.5e-05, "sup_loss": 1.9439071529032013 }, { "step": 86, "lr": 8.6e-05, "sup_loss": 1.940231741846871 }, { "step": 87, "lr": 8.700000000000001e-05, "sup_loss": 1.9256818466494516 }, { "step": 88, "lr": 8.800000000000001e-05, "sup_loss": 1.9422044059735057 }, { "step": 89, "lr": 8.9e-05, "sup_loss": 1.9265779568349395 }, { "step": 90, "lr": 9e-05, "sup_loss": 1.9107046426157344 }, { "step": 91, "lr": 9.1e-05, "sup_loss": 1.9120888307606432 }, { "step": 92, "lr": 9.2e-05, "sup_loss": 1.9039226834805385 }, { "step": 93, "lr": 9.300000000000001e-05, "sup_loss": 1.8859638864585686 }, { "step": 94, "lr": 9.400000000000001e-05, "sup_loss": 1.8935103237832633 }, { "step": 95, "lr": 9.499999999999999e-05, "sup_loss": 1.8780442206074615 }, { "step": 96, "lr": 9.6e-05, "sup_loss": 1.880738136494954 }, { "step": 97, "lr": 9.7e-05, "sup_loss": 1.8727563337733835 }, { "step": 98, "lr": 9.8e-05, "sup_loss": 1.8747788248049444 }, { "step": 99, "lr": 9.900000000000001e-05, "sup_loss": 1.8535151587817753 }, { "step": 100, "lr": 0.0001, "sup_loss": 1.8369216468190068, "lyap1_mean": -6.2764668464660645, "lyap1_max": -6.253869533538818, "lyap_spec_mean": [ -6.2764668464660645, -6.276641368865967 ], "lyap_bound": -0.10536051565782628 }, { "step": 101, "lr": 0.0001, "sup_loss": 1.8375981252343498 }, { "step": 102, "lr": 0.0001, "sup_loss": 1.8414441493027138 }, { "step": 103, "lr": 0.0001, "sup_loss": 1.841932872396079 }, { "step": 104, "lr": 0.0001, "sup_loss": 1.8250621869367878 }, { "step": 105, "lr": 0.0001, "sup_loss": 1.8180236366243632 }, { "step": 106, "lr": 0.0001, "sup_loss": 1.8148333969430548 }, { "step": 107, "lr": 0.0001, "sup_loss": 1.8097191971601063 }, { "step": 108, "lr": 0.0001, "sup_loss": 1.8062571266936345 }, { "step": 109, "lr": 0.0001, "sup_loss": 1.783310944279242 }, { "step": 110, "lr": 0.0001, "sup_loss": 1.7882709675762618 }, { "step": 111, "lr": 0.0001, "sup_loss": 1.7837487501971951 }, { "step": 112, "lr": 0.0001, "sup_loss": 1.8040981386613326 }, { "step": 113, "lr": 0.0001, "sup_loss": 1.7839348955084138 }, { "step": 114, "lr": 0.0001, "sup_loss": 1.7808710490288988 }, { "step": 115, "lr": 0.0001, "sup_loss": 1.73568023182333 }, { "step": 116, "lr": 0.0001, "sup_loss": 1.7546620469313183 }, { "step": 117, "lr": 0.0001, "sup_loss": 1.7751414856763805 }, { "step": 118, "lr": 0.0001, "sup_loss": 1.752759060218006 }, { "step": 119, "lr": 0.0001, "sup_loss": 1.7358692128944557 }, { "step": 120, "lr": 0.0001, "sup_loss": 1.739228786343244 }, { "step": 121, "lr": 0.0001, "sup_loss": 1.7493239323825307 }, { "step": 122, "lr": 0.0001, "sup_loss": 1.7536705840390145 }, { "step": 123, "lr": 0.0001, "sup_loss": 1.7502938192568702 }, { "step": 124, "lr": 0.0001, "sup_loss": 1.7474968410988128 }, { "step": 125, "lr": 0.0001, "sup_loss": 1.741263138180546, "lyap1_mean": -6.289633750915527, "lyap1_max": -6.271181106567383, "lyap_spec_mean": [ -6.289633750915527, -6.290492534637451 ], "lyap_bound": -0.10536051565782628 }, { "step": 126, "lr": 0.0001, "sup_loss": 1.7211279276855354 }, { "step": 127, "lr": 0.0001, "sup_loss": 1.7073139543990044 }, { "step": 128, "lr": 0.0001, "sup_loss": 1.7141654935069126 }, { "step": 129, "lr": 0.0001, "sup_loss": 1.7005028367708452 }, { "step": 130, "lr": 0.0001, "sup_loss": 1.6860979041501705 }, { "step": 131, "lr": 0.0001, "sup_loss": 1.7066609179775223 }, { "step": 132, "lr": 0.0001, "sup_loss": 1.6979615420852867 }, { "step": 133, "lr": 0.0001, "sup_loss": 1.7273194417800906 }, { "step": 134, "lr": 0.0001, "sup_loss": 1.7047646668485257 }, { "step": 135, "lr": 0.0001, "sup_loss": 1.716210852207255 }, { "step": 136, "lr": 0.0001, "sup_loss": 1.7307658980947846 }, { "step": 137, "lr": 0.0001, "sup_loss": 1.711602512894522 }, { "step": 138, "lr": 0.0001, "sup_loss": 1.7057358493816908 }, { "step": 139, "lr": 0.0001, "sup_loss": 1.688446466585652 }, { "step": 140, "lr": 0.0001, "sup_loss": 1.704572577683926 }, { "step": 141, "lr": 0.0001, "sup_loss": 1.7304753449546095 }, { "step": 142, "lr": 0.0001, "sup_loss": 1.6883580864485974 }, { "step": 143, "lr": 0.0001, "sup_loss": 1.6718549694522848 }, { "step": 144, "lr": 0.0001, "sup_loss": 1.7193390482772282 }, { "step": 145, "lr": 0.0001, "sup_loss": 1.6713528463158704 }, { "step": 146, "lr": 0.0001, "sup_loss": 1.6959659224268964 }, { "step": 147, "lr": 0.0001, "sup_loss": 1.6890260775715398 }, { "step": 148, "lr": 0.0001, "sup_loss": 1.6248832250680991 }, { "step": 149, "lr": 0.0001, "sup_loss": 1.654215549400142 }, { "step": 150, "lr": 0.0001, "sup_loss": 1.6615707449121377, "lyap1_mean": -6.306196689605713, "lyap1_max": -6.2920122146606445, "lyap_spec_mean": [ -6.306196212768555, -6.305814266204834 ], "lyap_bound": -0.10536051565782628 }, { "step": 151, "lr": 0.0001, "sup_loss": 1.6810928502731424 }, { "step": 152, "lr": 0.0001, "sup_loss": 1.6695774965144798 }, { "step": 153, "lr": 0.0001, "sup_loss": 1.6444260938397506 }, { "step": 154, "lr": 0.0001, "sup_loss": 1.6821734376746678 }, { "step": 155, "lr": 0.0001, "sup_loss": 1.6728631290634948 }, { "step": 156, "lr": 0.0001, "sup_loss": 1.6820655261417026 }, { "step": 157, "lr": 0.0001, "sup_loss": 1.6516146373078011 }, { "step": 158, "lr": 0.0001, "sup_loss": 1.6530359901341733 }, { "step": 159, "lr": 0.0001, "sup_loss": 1.684539741706318 }, { "step": 160, "lr": 0.0001, "sup_loss": 1.6740305810886589 }, { "step": 161, "lr": 0.0001, "sup_loss": 1.60870367198615 }, { "step": 162, "lr": 0.0001, "sup_loss": 1.6475936644805484 }, { "step": 163, "lr": 0.0001, "sup_loss": 1.6515501185205237 }, { "step": 164, "lr": 0.0001, "sup_loss": 1.6180064545910822 }, { "step": 165, "lr": 0.0001, "sup_loss": 1.6406713466514131 }, { "step": 166, "lr": 0.0001, "sup_loss": 1.6452356602937375 }, { "step": 167, "lr": 0.0001, "sup_loss": 1.6201243770891205 }, { "step": 168, "lr": 0.0001, "sup_loss": 1.665671342076336 }, { "step": 169, "lr": 0.0001, "sup_loss": 1.6228934612102783 }, { "step": 170, "lr": 0.0001, "sup_loss": 1.6389439525875207 }, { "step": 171, "lr": 0.0001, "sup_loss": 1.6387175869530952 }, { "step": 172, "lr": 0.0001, "sup_loss": 1.6356821068128287 }, { "step": 173, "lr": 0.0001, "sup_loss": 1.6217340583613125 }, { "step": 174, "lr": 0.0001, "sup_loss": 1.612851693540602 }, { "step": 175, "lr": 0.0001, "sup_loss": 1.6110975775679166, "lyap1_mean": -6.304257392883301, "lyap1_max": -6.284839153289795, "lyap_spec_mean": [ -6.304257392883301, -6.30750846862793 ], "lyap_bound": -0.10536051565782628 }, { "step": 176, "lr": 0.0001, "sup_loss": 1.61914691618028 }, { "step": 177, "lr": 0.0001, "sup_loss": 1.618533874469112 }, { "step": 178, "lr": 0.0001, "sup_loss": 1.6225511296653723 }, { "step": 179, "lr": 0.0001, "sup_loss": 1.6409765068011015 }, { "step": 180, "lr": 0.0001, "sup_loss": 1.6209402621335396 }, { "step": 181, "lr": 0.0001, "sup_loss": 1.5903578498662685 }, { "step": 182, "lr": 0.0001, "sup_loss": 1.6243800011765677 }, { "step": 183, "lr": 0.0001, "sup_loss": 1.6207408935168828 }, { "step": 184, "lr": 0.0001, "sup_loss": 1.6102868840186222 }, { "step": 185, "lr": 0.0001, "sup_loss": 1.6001848591227634 }, { "step": 186, "lr": 0.0001, "sup_loss": 1.572493708893578 }, { "step": 187, "lr": 0.0001, "sup_loss": 1.612159538574632 }, { "step": 188, "lr": 0.0001, "sup_loss": 1.5911217530125168 }, { "step": 189, "lr": 0.0001, "sup_loss": 1.6293490369187993 }, { "step": 190, "lr": 0.0001, "sup_loss": 1.6040246983038626 }, { "step": 191, "lr": 0.0001, "sup_loss": 1.6171197527958248 }, { "step": 192, "lr": 0.0001, "sup_loss": 1.6471899354369277 }, { "step": 193, "lr": 0.0001, "sup_loss": 1.6034272540262462 }, { "step": 194, "lr": 0.0001, "sup_loss": 1.624265957837026 }, { "step": 195, "lr": 0.0001, "sup_loss": 1.6315334681058467 }, { "step": 196, "lr": 0.0001, "sup_loss": 1.6182992051901102 }, { "step": 197, "lr": 0.0001, "sup_loss": 1.6026089533859447 }, { "step": 198, "lr": 0.0001, "sup_loss": 1.6068981420107973 }, { "step": 199, "lr": 0.0001, "sup_loss": 1.6226155394597461 }, { "step": 200, "lr": 0.0001, "sup_loss": 1.5908226672539212, "lyap1_mean": -6.314453125, "lyap1_max": -6.295107841491699, "lyap_spec_mean": [ -6.314453601837158, -6.311760902404785 ], "lyap_bound": -0.10536051565782628 }, { "step": 201, "lr": 0.0001, "sup_loss": 1.614759653061085 }, { "step": 202, "lr": 0.0001, "sup_loss": 1.607828428395806 }, { "step": 203, "lr": 0.0001, "sup_loss": 1.6031055264743805 }, { "step": 204, "lr": 0.0001, "sup_loss": 1.62692512809244 }, { "step": 205, "lr": 0.0001, "sup_loss": 1.6197515599744592 }, { "step": 206, "lr": 0.0001, "sup_loss": 1.5945279526481753 }, { "step": 207, "lr": 0.0001, "sup_loss": 1.600027352742842 }, { "step": 208, "lr": 0.0001, "sup_loss": 1.6231346460135334 }, { "step": 209, "lr": 0.0001, "sup_loss": 1.6045676349961688 }, { "step": 210, "lr": 0.0001, "sup_loss": 1.612343899125989 }, { "step": 211, "lr": 0.0001, "sup_loss": 1.572819151638782 }, { "step": 212, "lr": 0.0001, "sup_loss": 1.5998946197093518 }, { "step": 213, "lr": 0.0001, "sup_loss": 1.603872718653751 }, { "step": 214, "lr": 0.0001, "sup_loss": 1.6064486221293404 }, { "step": 215, "lr": 0.0001, "sup_loss": 1.5816140347137049 }, { "step": 216, "lr": 0.0001, "sup_loss": 1.623563311041461 }, { "step": 217, "lr": 0.0001, "sup_loss": 1.604757450088786 }, { "step": 218, "lr": 0.0001, "sup_loss": 1.5856211225488168 }, { "step": 219, "lr": 0.0001, "sup_loss": 1.5971583490656518 }, { "step": 220, "lr": 0.0001, "sup_loss": 1.62120464502734 }, { "step": 221, "lr": 0.0001, "sup_loss": 1.5521333505669912 }, { "step": 222, "lr": 0.0001, "sup_loss": 1.5910480072140367 }, { "step": 223, "lr": 0.0001, "sup_loss": 1.5878960555935198 }, { "step": 224, "lr": 0.0001, "sup_loss": 1.52525868655689 }, { "step": 225, "lr": 0.0001, "sup_loss": 1.6046161077785628, "lyap1_mean": -6.307826995849609, "lyap1_max": -6.286316394805908, "lyap_spec_mean": [ -6.307826995849609, -6.311221122741699 ], "lyap_bound": -0.10536051565782628 }, { "step": 226, "lr": 0.0001, "sup_loss": 1.5787563174285753 }, { "step": 227, "lr": 0.0001, "sup_loss": 1.6046591313503475 }, { "step": 228, "lr": 0.0001, "sup_loss": 1.6058233574528387 }, { "step": 229, "lr": 0.0001, "sup_loss": 1.5708493836588395 }, { "step": 230, "lr": 0.0001, "sup_loss": 1.5522555239571714 }, { "step": 231, "lr": 0.0001, "sup_loss": 1.5866450454598817 }, { "step": 232, "lr": 0.0001, "sup_loss": 1.5833501739262028 }, { "step": 233, "lr": 0.0001, "sup_loss": 1.5781072194587922 }, { "step": 234, "lr": 0.0001, "sup_loss": 1.5801195205359906 }, { "step": 235, "lr": 0.0001, "sup_loss": 1.6097866642525447 }, { "step": 236, "lr": 0.0001, "sup_loss": 1.5776707740181835 }, { "step": 237, "lr": 0.0001, "sup_loss": 1.575370573452357 }, { "step": 238, "lr": 0.0001, "sup_loss": 1.62187735175436 }, { "step": 239, "lr": 0.0001, "sup_loss": 1.5719111789223998 }, { "step": 240, "lr": 0.0001, "sup_loss": 1.6043917104768717 }, { "step": 241, "lr": 0.0001, "sup_loss": 1.6115212118102145 }, { "step": 242, "lr": 0.0001, "sup_loss": 1.5726522914277772 }, { "step": 243, "lr": 0.0001, "sup_loss": 1.5747379788820692 }, { "step": 244, "lr": 0.0001, "sup_loss": 1.602347413668085 }, { "step": 245, "lr": 0.0001, "sup_loss": 1.561972573460076 }, { "step": 246, "lr": 0.0001, "sup_loss": 1.579944843245691 }, { "step": 247, "lr": 0.0001, "sup_loss": 1.56607517962854 }, { "step": 248, "lr": 0.0001, "sup_loss": 1.5602422723452187 }, { "step": 249, "lr": 0.0001, "sup_loss": 1.5481532506755131 }, { "step": 250, "lr": 0.0001, "sup_loss": 1.6282572215428823, "lyap1_mean": -6.311943054199219, "lyap1_max": -6.285268306732178, "lyap_spec_mean": [ -6.311943054199219, -6.315288543701172 ], "lyap_bound": -0.10536051565782628 }, { "step": 251, "lr": 0.0001, "sup_loss": 1.5718172405729163 }, { "step": 252, "lr": 0.0001, "sup_loss": 1.568016116494461 }, { "step": 253, "lr": 0.0001, "sup_loss": 1.5892380828003114 }, { "step": 254, "lr": 0.0001, "sup_loss": 1.5607691734001938 }, { "step": 255, "lr": 0.0001, "sup_loss": 1.5788166224967786 }, { "step": 256, "lr": 0.0001, "sup_loss": 1.5587572791400528 }, { "step": 257, "lr": 0.0001, "sup_loss": 1.56509858619077 }, { "step": 258, "lr": 0.0001, "sup_loss": 1.5588711212966218 }, { "step": 259, "lr": 0.0001, "sup_loss": 1.579683401292984 }, { "step": 260, "lr": 0.0001, "sup_loss": 1.5572354648170486 }, { "step": 261, "lr": 0.0001, "sup_loss": 1.569087077453923 }, { "step": 262, "lr": 0.0001, "sup_loss": 1.552346801093667 }, { "step": 263, "lr": 0.0001, "sup_loss": 1.5730190896596785 }, { "step": 264, "lr": 0.0001, "sup_loss": 1.5592950476448364 }, { "step": 265, "lr": 0.0001, "sup_loss": 1.578283494419273 }, { "step": 266, "lr": 0.0001, "sup_loss": 1.5317856999633797 }, { "step": 267, "lr": 0.0001, "sup_loss": 1.5859820808572964 }, { "step": 268, "lr": 0.0001, "sup_loss": 1.5849993822920676 }, { "step": 269, "lr": 0.0001, "sup_loss": 1.5748490996922742 }, { "step": 270, "lr": 0.0001, "sup_loss": 1.5941484134857486 }, { "step": 271, "lr": 0.0001, "sup_loss": 1.5520565621157805 }, { "step": 272, "lr": 0.0001, "sup_loss": 1.5736600032776027 }, { "step": 273, "lr": 0.0001, "sup_loss": 1.5703344026318378 }, { "step": 274, "lr": 0.0001, "sup_loss": 1.532767851458178 }, { "step": 275, "lr": 0.0001, "sup_loss": 1.5764611628543121, "lyap1_mean": -6.288212776184082, "lyap1_max": -6.271989822387695, "lyap_spec_mean": [ -6.288212776184082, -6.290098190307617 ], "lyap_bound": -0.10536051565782628 }, { "step": 276, "lr": 0.0001, "sup_loss": 1.5627879826848854 }, { "step": 277, "lr": 0.0001, "sup_loss": 1.5565442095845055 }, { "step": 278, "lr": 0.0001, "sup_loss": 1.5479018965033426 }, { "step": 279, "lr": 0.0001, "sup_loss": 1.542800690022451 }, { "step": 280, "lr": 0.0001, "sup_loss": 1.5708411044949124 }, { "step": 281, "lr": 0.0001, "sup_loss": 1.5704922914677404 }, { "step": 282, "lr": 0.0001, "sup_loss": 1.548503892204476 }, { "step": 283, "lr": 0.0001, "sup_loss": 1.557741653174852 }, { "step": 284, "lr": 0.0001, "sup_loss": 1.5737748118594674 }, { "step": 285, "lr": 0.0001, "sup_loss": 1.5217131287518002 }, { "step": 286, "lr": 0.0001, "sup_loss": 1.5508766477654388 }, { "step": 287, "lr": 0.0001, "sup_loss": 1.5681149739226237 }, { "step": 288, "lr": 0.0001, "sup_loss": 1.533574656235852 }, { "step": 289, "lr": 0.0001, "sup_loss": 1.5711439676427719 }, { "step": 290, "lr": 0.0001, "sup_loss": 1.5541693707191668 }, { "step": 291, "lr": 0.0001, "sup_loss": 1.53935113280687 }, { "step": 292, "lr": 0.0001, "sup_loss": 1.5477303077305633 }, { "step": 293, "lr": 0.0001, "sup_loss": 1.5411001058204383 }, { "step": 294, "lr": 0.0001, "sup_loss": 1.566108745714733 }, { "step": 295, "lr": 0.0001, "sup_loss": 1.5431387629950557 }, { "step": 296, "lr": 0.0001, "sup_loss": 1.5719911366769372 }, { "step": 297, "lr": 0.0001, "sup_loss": 1.5459029921829308 }, { "step": 298, "lr": 0.0001, "sup_loss": 1.5684083943193499 }, { "step": 299, "lr": 0.0001, "sup_loss": 1.5552347272080564 }, { "step": 300, "lr": 0.0001, "sup_loss": 1.5612322177126692, "lyap1_mean": -6.289813995361328, "lyap1_max": -6.263297080993652, "lyap_spec_mean": [ -6.289813995361328, -6.282321453094482 ], "lyap_bound": -0.10536051565782628 }, { "step": 301, "lr": 0.0001, "sup_loss": 1.5511914784971887 }, { "step": 302, "lr": 0.0001, "sup_loss": 1.5731978045920958 }, { "step": 303, "lr": 0.0001, "sup_loss": 1.5566588069651133 }, { "step": 304, "lr": 0.0001, "sup_loss": 1.5435834816545064 }, { "step": 305, "lr": 0.0001, "sup_loss": 1.5338201112718275 }, { "step": 306, "lr": 0.0001, "sup_loss": 1.5620164001408299 }, { "step": 307, "lr": 0.0001, "sup_loss": 1.5461411309298845 }, { "step": 308, "lr": 0.0001, "sup_loss": 1.556008165963498 }, { "step": 309, "lr": 0.0001, "sup_loss": 1.5554833750200956 }, { "step": 310, "lr": 0.0001, "sup_loss": 1.568397089848242 }, { "step": 311, "lr": 0.0001, "sup_loss": 1.5708716932718876 }, { "step": 312, "lr": 0.0001, "sup_loss": 1.5195926766923842 }, { "step": 313, "lr": 0.0001, "sup_loss": 1.52811400474664 }, { "step": 314, "lr": 0.0001, "sup_loss": 1.5539360130593718 }, { "step": 315, "lr": 0.0001, "sup_loss": 1.5706093056068795 }, { "step": 316, "lr": 0.0001, "sup_loss": 1.557120468837921 }, { "step": 317, "lr": 0.0001, "sup_loss": 1.5822501082816158 }, { "step": 318, "lr": 0.0001, "sup_loss": 1.542841458798004 }, { "step": 319, "lr": 0.0001, "sup_loss": 1.5563868638207532 }, { "step": 320, "lr": 0.0001, "sup_loss": 1.5360635360486008 }, { "step": 321, "lr": 0.0001, "sup_loss": 1.5459013446151262 }, { "step": 322, "lr": 0.0001, "sup_loss": 1.5867201314501376 }, { "step": 323, "lr": 0.0001, "sup_loss": 1.5681245372992967 }, { "step": 324, "lr": 0.0001, "sup_loss": 1.545037808734181 }, { "step": 325, "lr": 0.0001, "sup_loss": 1.5807109835114312, "lyap1_mean": -6.280990123748779, "lyap1_max": -6.268655776977539, "lyap_spec_mean": [ -6.2809906005859375, -6.283902168273926 ], "lyap_bound": -0.10536051565782628 }, { "step": 326, "lr": 0.0001, "sup_loss": 1.5361487778088874 }, { "step": 327, "lr": 0.0001, "sup_loss": 1.5569850983489886 }, { "step": 328, "lr": 0.0001, "sup_loss": 1.5468934937491015 }, { "step": 329, "lr": 0.0001, "sup_loss": 1.5269107579149266 }, { "step": 330, "lr": 0.0001, "sup_loss": 1.5308932888735198 }, { "step": 331, "lr": 0.0001, "sup_loss": 1.546140456130772 }, { "step": 332, "lr": 0.0001, "sup_loss": 1.556001616894801 }, { "step": 333, "lr": 0.0001, "sup_loss": 1.5160081181707756 }, { "step": 334, "lr": 0.0001, "sup_loss": 1.5370807622724176 }, { "step": 335, "lr": 0.0001, "sup_loss": 1.5463874980357313 }, { "step": 336, "lr": 0.0001, "sup_loss": 1.527108314657003 }, { "step": 337, "lr": 0.0001, "sup_loss": 1.5391164403451145 }, { "step": 338, "lr": 0.0001, "sup_loss": 1.5288020191539669 }, { "step": 339, "lr": 0.0001, "sup_loss": 1.544628633865707 }, { "step": 340, "lr": 0.0001, "sup_loss": 1.5508991977799556 }, { "step": 341, "lr": 0.0001, "sup_loss": 1.55731107953372 }, { "step": 342, "lr": 0.0001, "sup_loss": 1.5502437895717247 }, { "step": 343, "lr": 0.0001, "sup_loss": 1.5113026766574436 }, { "step": 344, "lr": 0.0001, "sup_loss": 1.534505224713083 }, { "step": 345, "lr": 0.0001, "sup_loss": 1.5664212448478674 }, { "step": 346, "lr": 0.0001, "sup_loss": 1.5343774635089513 }, { "step": 347, "lr": 0.0001, "sup_loss": 1.523872870702963 }, { "step": 348, "lr": 0.0001, "sup_loss": 1.520222813804984 }, { "step": 349, "lr": 0.0001, "sup_loss": 1.5100668329858764 }, { "step": 350, "lr": 0.0001, "sup_loss": 1.5462873210196821, "lyap1_mean": -6.279208660125732, "lyap1_max": -6.246695518493652, "lyap_spec_mean": [ -6.279209136962891, -6.2811126708984375 ], "lyap_bound": -0.10536051565782628 }, { "step": 351, "lr": 0.0001, "sup_loss": 1.5592996002285948 }, { "step": 352, "lr": 0.0001, "sup_loss": 1.513165697344076 }, { "step": 353, "lr": 0.0001, "sup_loss": 1.5556921340697518 }, { "step": 354, "lr": 0.0001, "sup_loss": 1.558373381312493 }, { "step": 355, "lr": 0.0001, "sup_loss": 1.5547521752157896 }, { "step": 356, "lr": 0.0001, "sup_loss": 1.5482859037251404 }, { "step": 357, "lr": 0.0001, "sup_loss": 1.5541198311222053 }, { "step": 358, "lr": 0.0001, "sup_loss": 1.513929981198374 }, { "step": 359, "lr": 0.0001, "sup_loss": 1.5386774539004036 }, { "step": 360, "lr": 0.0001, "sup_loss": 1.5418122629340056 }, { "step": 361, "lr": 0.0001, "sup_loss": 1.5336020776983843 }, { "step": 362, "lr": 0.0001, "sup_loss": 1.5442231828959216 }, { "step": 363, "lr": 0.0001, "sup_loss": 1.5474763280659718 }, { "step": 364, "lr": 0.0001, "sup_loss": 1.5401739408673039 }, { "step": 365, "lr": 0.0001, "sup_loss": 1.5340803578606583 }, { "step": 366, "lr": 0.0001, "sup_loss": 1.5137462377878363 }, { "step": 367, "lr": 0.0001, "sup_loss": 1.5490127083318859 }, { "step": 368, "lr": 0.0001, "sup_loss": 1.562103831097833 }, { "step": 369, "lr": 0.0001, "sup_loss": 1.5230020136679903 }, { "step": 370, "lr": 0.0001, "sup_loss": 1.519926874355663 }, { "step": 371, "lr": 0.0001, "sup_loss": 1.525188796570022 }, { "step": 372, "lr": 0.0001, "sup_loss": 1.5389628270751545 }, { "step": 373, "lr": 0.0001, "sup_loss": 1.5206521555610368 }, { "step": 374, "lr": 0.0001, "sup_loss": 1.5910327063155232 }, { "step": 375, "lr": 0.0001, "sup_loss": 1.5517228598442367, "lyap1_mean": -6.272121906280518, "lyap1_max": -6.257327079772949, "lyap_spec_mean": [ -6.272121906280518, -6.272157669067383 ], "lyap_bound": -0.10536051565782628 }, { "step": 376, "lr": 0.0001, "sup_loss": 1.5083365432341427 }, { "step": 377, "lr": 0.0001, "sup_loss": 1.5351844511019792 }, { "step": 378, "lr": 0.0001, "sup_loss": 1.5185103477122763 }, { "step": 379, "lr": 0.0001, "sup_loss": 1.5313015037319202 }, { "step": 380, "lr": 0.0001, "sup_loss": 1.6042587657443281 }, { "step": 381, "lr": 0.0001, "sup_loss": 1.5148778440809103 }, { "step": 382, "lr": 0.0001, "sup_loss": 1.547646792356559 }, { "step": 383, "lr": 0.0001, "sup_loss": 1.5043766820879998 }, { "step": 384, "lr": 0.0001, "sup_loss": 1.517374048269513 }, { "step": 385, "lr": 0.0001, "sup_loss": 1.510813145580335 }, { "step": 386, "lr": 0.0001, "sup_loss": 1.5343651897756998 }, { "step": 387, "lr": 0.0001, "sup_loss": 1.5567518054948828 }, { "step": 388, "lr": 0.0001, "sup_loss": 1.556583401528102 }, { "step": 389, "lr": 0.0001, "sup_loss": 1.5469114160109558 }, { "step": 390, "lr": 0.0001, "sup_loss": 1.5066436477329854 }, { "step": 391, "lr": 0.0001, "sup_loss": 1.5363803613218583 }, { "step": 392, "lr": 0.0001, "sup_loss": 1.5623538263511423 }, { "step": 393, "lr": 0.0001, "sup_loss": 1.5509992112423387 }, { "step": 394, "lr": 0.0001, "sup_loss": 1.5553260421123227 }, { "step": 395, "lr": 0.0001, "sup_loss": 1.5360717974523455 }, { "step": 396, "lr": 0.0001, "sup_loss": 1.5299607690174781 }, { "step": 397, "lr": 0.0001, "sup_loss": 1.4931204366796695 }, { "step": 398, "lr": 0.0001, "sup_loss": 1.5352384127840748 }, { "step": 399, "lr": 0.0001, "sup_loss": 1.5222885300656976 }, { "step": 400, "lr": 0.0001, "sup_loss": 1.508831153333313, "lyap1_mean": -6.27598237991333, "lyap1_max": -6.24452018737793, "lyap_spec_mean": [ -6.275981903076172, -6.269106388092041 ], "lyap_bound": -0.10536051565782628 }, { "step": 401, "lr": 0.0001, "sup_loss": 1.5349017639824347 }, { "step": 402, "lr": 0.0001, "sup_loss": 1.548015367001545 }, { "step": 403, "lr": 0.0001, "sup_loss": 1.5078454672949264 }, { "step": 404, "lr": 0.0001, "sup_loss": 1.5185558897396196 }, { "step": 405, "lr": 0.0001, "sup_loss": 1.5512310382424748 }, { "step": 406, "lr": 0.0001, "sup_loss": 1.5304067426341317 }, { "step": 407, "lr": 0.0001, "sup_loss": 1.5183398062918037 }, { "step": 408, "lr": 0.0001, "sup_loss": 1.5387815991781382 }, { "step": 409, "lr": 0.0001, "sup_loss": 1.5672918045341844 }, { "step": 410, "lr": 0.0001, "sup_loss": 1.5307374295242138 }, { "step": 411, "lr": 0.0001, "sup_loss": 1.544239649750348 }, { "step": 412, "lr": 0.0001, "sup_loss": 1.5372248394722121 }, { "step": 413, "lr": 0.0001, "sup_loss": 1.546755674066759 }, { "step": 414, "lr": 0.0001, "sup_loss": 1.5602227617649562 }, { "step": 415, "lr": 0.0001, "sup_loss": 1.5235959308536382 }, { "step": 416, "lr": 0.0001, "sup_loss": 1.54999620670243 }, { "step": 417, "lr": 0.0001, "sup_loss": 1.5595363483742926 }, { "step": 418, "lr": 0.0001, "sup_loss": 1.5429251013869647 }, { "step": 419, "lr": 0.0001, "sup_loss": 1.5263398785761602 }, { "step": 420, "lr": 0.0001, "sup_loss": 1.5403264813934174 }, { "step": 421, "lr": 0.0001, "sup_loss": 1.51891197855286 }, { "step": 422, "lr": 0.0001, "sup_loss": 1.5330320818314984 }, { "step": 423, "lr": 0.0001, "sup_loss": 1.5024285323810576 }, { "step": 424, "lr": 0.0001, "sup_loss": 1.5529554719113352 }, { "step": 425, "lr": 0.0001, "sup_loss": 1.519168167373818, "lyap1_mean": -6.279496192932129, "lyap1_max": -6.26162052154541, "lyap_spec_mean": [ -6.279496192932129, -6.2808918952941895 ], "lyap_bound": -0.10536051565782628 }, { "step": 426, "lr": 0.0001, "sup_loss": 1.5365064575118708 }, { "step": 427, "lr": 0.0001, "sup_loss": 1.5222562257747192 }, { "step": 428, "lr": 0.0001, "sup_loss": 1.519125449620276 }, { "step": 429, "lr": 0.0001, "sup_loss": 1.529261269354274 }, { "step": 430, "lr": 0.0001, "sup_loss": 1.52106984620442 }, { "step": 431, "lr": 0.0001, "sup_loss": 1.542332523616496 }, { "step": 432, "lr": 0.0001, "sup_loss": 1.5053887182166101 }, { "step": 433, "lr": 0.0001, "sup_loss": 1.5287912390123242 }, { "step": 434, "lr": 0.0001, "sup_loss": 1.4792699148804047 }, { "step": 435, "lr": 0.0001, "sup_loss": 1.5175428625480079 }, { "step": 436, "lr": 0.0001, "sup_loss": 1.5141915348633057 }, { "step": 437, "lr": 0.0001, "sup_loss": 1.5021534012618962 }, { "step": 438, "lr": 0.0001, "sup_loss": 1.515655711068608 }, { "step": 439, "lr": 0.0001, "sup_loss": 1.5336998941946303 }, { "step": 440, "lr": 0.0001, "sup_loss": 1.5709049462152431 }, { "step": 441, "lr": 0.0001, "sup_loss": 1.5345619270812774 }, { "step": 442, "lr": 0.0001, "sup_loss": 1.545632307238727 }, { "step": 443, "lr": 0.0001, "sup_loss": 1.5348385538551335 }, { "step": 444, "lr": 0.0001, "sup_loss": 1.5596831114341054 }, { "step": 445, "lr": 0.0001, "sup_loss": 1.5213286739984129 }, { "step": 446, "lr": 0.0001, "sup_loss": 1.507136513986345 }, { "step": 447, "lr": 0.0001, "sup_loss": 1.5338240960099745 }, { "step": 448, "lr": 0.0001, "sup_loss": 1.5335693962074035 }, { "step": 449, "lr": 0.0001, "sup_loss": 1.5439888653044145 }, { "step": 450, "lr": 0.0001, "sup_loss": 1.527999408648118, "lyap1_mean": -6.28324031829834, "lyap1_max": -6.266049385070801, "lyap_spec_mean": [ -6.28324031829834, -6.283727169036865 ], "lyap_bound": -0.10536051565782628 }, { "step": 451, "lr": 0.0001, "sup_loss": 1.5393156317969563 }, { "step": 452, "lr": 0.0001, "sup_loss": 1.5240758554208984 }, { "step": 453, "lr": 0.0001, "sup_loss": 1.5172760077652057 }, { "step": 454, "lr": 0.0001, "sup_loss": 1.5435138079852262 }, { "step": 455, "lr": 0.0001, "sup_loss": 1.553782064658805 }, { "step": 456, "lr": 0.0001, "sup_loss": 1.4990875570761242 }, { "step": 457, "lr": 0.0001, "sup_loss": 1.526767795262043 }, { "step": 458, "lr": 0.0001, "sup_loss": 1.5074100872415515 }, { "step": 459, "lr": 0.0001, "sup_loss": 1.5299431484804877 }, { "step": 460, "lr": 0.0001, "sup_loss": 1.5411429028548818 }, { "step": 461, "lr": 0.0001, "sup_loss": 1.5258069753257875 }, { "step": 462, "lr": 0.0001, "sup_loss": 1.5130222736269014 }, { "step": 463, "lr": 0.0001, "sup_loss": 1.59688740678216 }, { "step": 464, "lr": 0.0001, "sup_loss": 1.5169865077256424 }, { "step": 465, "lr": 0.0001, "sup_loss": 1.522381168712973 }, { "step": 466, "lr": 0.0001, "sup_loss": 1.522439002600057 }, { "step": 467, "lr": 0.0001, "sup_loss": 1.5190416825884472 }, { "step": 468, "lr": 0.0001, "sup_loss": 1.5425338998993232 }, { "step": 469, "lr": 0.0001, "sup_loss": 1.5452529428590425 }, { "step": 470, "lr": 0.0001, "sup_loss": 1.5260539108805669 }, { "step": 471, "lr": 0.0001, "sup_loss": 1.5488179073420898 }, { "step": 472, "lr": 0.0001, "sup_loss": 1.5192884004268512 }, { "step": 473, "lr": 0.0001, "sup_loss": 1.5055062362761682 }, { "step": 474, "lr": 0.0001, "sup_loss": 1.4984221563948537 }, { "step": 475, "lr": 0.0001, "sup_loss": 1.5320479692645415, "lyap1_mean": -6.27734375, "lyap1_max": -6.261284828186035, "lyap_spec_mean": [ -6.277344226837158, -6.2793049812316895 ], "lyap_bound": -0.10536051565782628 }, { "step": 476, "lr": 0.0001, "sup_loss": 1.538385270074266 }, { "step": 477, "lr": 0.0001, "sup_loss": 1.4984740868654256 }, { "step": 478, "lr": 0.0001, "sup_loss": 1.5450349077570704 }, { "step": 479, "lr": 0.0001, "sup_loss": 1.5320467747126512 }, { "step": 480, "lr": 0.0001, "sup_loss": 1.5451753778273984 }, { "step": 481, "lr": 0.0001, "sup_loss": 1.5345277338401393 }, { "step": 482, "lr": 0.0001, "sup_loss": 1.5576481611049358 }, { "step": 483, "lr": 0.0001, "sup_loss": 1.525092799737688 }, { "step": 484, "lr": 0.0001, "sup_loss": 1.5177981040203947 }, { "step": 485, "lr": 0.0001, "sup_loss": 1.524682836990655 }, { "step": 486, "lr": 0.0001, "sup_loss": 1.5577210090147655 }, { "step": 487, "lr": 0.0001, "sup_loss": 1.5175527588503717 }, { "step": 488, "lr": 0.0001, "sup_loss": 1.517884672177967 }, { "step": 489, "lr": 0.0001, "sup_loss": 1.540989397132943 }, { "step": 490, "lr": 0.0001, "sup_loss": 1.5207840823262242 }, { "step": 491, "lr": 0.0001, "sup_loss": 1.5308205979089273 }, { "step": 492, "lr": 0.0001, "sup_loss": 1.5343529235115525 }, { "step": 493, "lr": 0.0001, "sup_loss": 1.4972792643274797 }, { "step": 494, "lr": 0.0001, "sup_loss": 1.5436764958011016 }, { "step": 495, "lr": 0.0001, "sup_loss": 1.5344808545549244 }, { "step": 496, "lr": 0.0001, "sup_loss": 1.5101506555087487 }, { "step": 497, "lr": 0.0001, "sup_loss": 1.5398357969125787 }, { "step": 498, "lr": 0.0001, "sup_loss": 1.5341583392864167 }, { "step": 499, "lr": 0.0001, "sup_loss": 1.5103523937991166 } ], "evals": [ { "step": 0, "acc": 0.0, "tok_acc": 0.0 }, { "step": 100, "acc": 0.0, "tok_acc": 0.38816550925925924 }, { "step": 200, "acc": 0.0, "tok_acc": 0.3872974537037037 }, { "step": 300, "acc": 0.0, "tok_acc": 0.3872974537037037 }, { "step": 400, "acc": 0.0, "tok_acc": 0.39052854938271603 }, { "step": 500, "acc": 0.0, "tok_acc": 0.3880208333333333 } ], "final_acc": 0.0, "final_tok_acc": 0.3880208333333333 }