diff options
Diffstat (limited to 'research/flossing/ab_baseline.json')
| -rw-r--r-- | research/flossing/ab_baseline.json | 1834 |
1 files changed, 1834 insertions, 0 deletions
diff --git a/research/flossing/ab_baseline.json b/research/flossing/ab_baseline.json new file mode 100644 index 0000000..2c9e72f --- /dev/null +++ b/research/flossing/ab_baseline.json @@ -0,0 +1,1834 @@ +{ + "args": { + "ckpt_root": "/home/yurenh2/rrm/hrm/checkpoints/Sudoku-extreme-1k-aug-1000 ACT-torch/HierarchicalReasoningModel_ACTV1 righteous-python", + "ckpt_name": "step_26040", + "n_steps": 200, + "batch_size": 8, + "lr": 5e-06, + "alpha_rf": 0.0, + "lambda_star": -0.85, + "k_lyap": 2, + "lyap_act_steps": 4, + "rf_mode": "fixed", + "rf_eps": 1e-06, + "seed": 42, + "eval_every": 50, + "eval_n": 512, + "eval_batch_size": 32, + "out": "ab_baseline.json" + }, + "initial_acc": 0.517578125, + "initial_tok_acc": 0.8276668595679012, + "steps": [ + { + "step": 0, + "sup_loss": 0.5692796107486419, + "rf_loss": 0.0188673734664917, + "total_loss": 0.5692796107486419, + "mean_lyap": -0.8058679103851318, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.5 + }, + { + "step": 1, + "sup_loss": 0.44216542344132326, + "rf_loss": 0.008090442977845669, + "total_loss": 0.44216542344132326, + "mean_lyap": -0.8389676809310913, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.375 + }, + { + "step": 2, + "sup_loss": 0.7179927810809378, + "rf_loss": 0.02125377207994461, + "total_loss": 0.7179927810809378, + "mean_lyap": -0.772433876991272, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 3, + "sup_loss": 0.5924383523500246, + "rf_loss": 0.011564759537577629, + "total_loss": 0.5924383523500246, + "mean_lyap": -0.8060556650161743, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 4, + "sup_loss": 0.556722850089877, + "rf_loss": 0.0227561853826046, + "total_loss": 0.556722850089877, + "mean_lyap": -0.7509233951568604, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 5, + "sup_loss": 0.8150128306870151, + "rf_loss": 0.029057025909423828, + "total_loss": 0.8150128306870151, + "mean_lyap": -0.7090215086936951, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 6, + "sup_loss": 0.573086816912248, + "rf_loss": 0.015127853490412235, + "total_loss": 0.573086816912248, + "mean_lyap": -0.7678287029266357, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 7, + "sup_loss": 0.608783918182104, + "rf_loss": 0.019849684089422226, + "total_loss": 0.608783918182104, + "mean_lyap": -0.7466349005699158, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 8, + "sup_loss": 0.8013712427010381, + "rf_loss": 0.024912847205996513, + "total_loss": 0.8013712427010381, + "mean_lyap": -0.7160708904266357, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 9, + "sup_loss": 0.8611781132773041, + "rf_loss": 0.02378900721669197, + "total_loss": 0.8611781132773041, + "mean_lyap": -0.7170447111129761, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 10, + "sup_loss": 0.603850205329637, + "rf_loss": 0.02224445343017578, + "total_loss": 0.603850205329637, + "mean_lyap": -0.7423288822174072, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 11, + "sup_loss": 0.5935196770689526, + "rf_loss": 0.017126888036727905, + "total_loss": 0.5935196770689526, + "mean_lyap": -0.7946580648422241, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 12, + "sup_loss": 0.4828408869062336, + "rf_loss": 0.0137407211586833, + "total_loss": 0.4828408869062336, + "mean_lyap": -0.7682695388793945, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 13, + "sup_loss": 0.6836410110691055, + "rf_loss": 0.020328622311353683, + "total_loss": 0.6836410110691055, + "mean_lyap": -0.741506040096283, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 14, + "sup_loss": 0.6211160413092429, + "rf_loss": 0.01726095750927925, + "total_loss": 0.6211160413092429, + "mean_lyap": -0.7909979224205017, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 15, + "sup_loss": 0.7067292512448923, + "rf_loss": 0.013877086341381073, + "total_loss": 0.7067292512448923, + "mean_lyap": -0.780184268951416, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 16, + "sup_loss": 0.5147661194638552, + "rf_loss": 0.012162098661065102, + "total_loss": 0.5147661194638552, + "mean_lyap": -0.7706966996192932, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 17, + "sup_loss": 0.7133729104702038, + "rf_loss": 0.027364332228899002, + "total_loss": 0.7133729104702038, + "mean_lyap": -0.7124254703521729, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 18, + "sup_loss": 0.6053845994506637, + "rf_loss": 0.018460115417838097, + "total_loss": 0.6053845994506637, + "mean_lyap": -0.7781225442886353, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 19, + "sup_loss": 0.47194613694069043, + "rf_loss": 0.011388372629880905, + "total_loss": 0.47194613694069043, + "mean_lyap": -0.8060376048088074, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.5 + }, + { + "step": 20, + "sup_loss": 0.6011921668060978, + "rf_loss": 0.015685610473155975, + "total_loss": 0.6011921668060978, + "mean_lyap": -0.7717862129211426, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 21, + "sup_loss": 0.5884581598601081, + "rf_loss": 0.024047259241342545, + "total_loss": 0.5884581598601081, + "mean_lyap": -0.7246743440628052, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 22, + "sup_loss": 0.5379079542528139, + "rf_loss": 0.01822628453373909, + "total_loss": 0.5379079542528139, + "mean_lyap": -0.7664903402328491, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 23, + "sup_loss": 0.5228693137725087, + "rf_loss": 0.01828143000602722, + "total_loss": 0.5228693137725087, + "mean_lyap": -0.7506092190742493, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 24, + "sup_loss": 0.6359728331729679, + "rf_loss": 0.014887947589159012, + "total_loss": 0.6359728331729679, + "mean_lyap": -0.7469883561134338, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 25, + "sup_loss": 0.7589778317342881, + "rf_loss": 0.027633272111415863, + "total_loss": 0.7589778317342881, + "mean_lyap": -0.7086294889450073, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 26, + "sup_loss": 0.7816825484183515, + "rf_loss": 0.02294420450925827, + "total_loss": 0.7816825484183515, + "mean_lyap": -0.7194583415985107, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 27, + "sup_loss": 0.40752060809355906, + "rf_loss": 0.00869689043611288, + "total_loss": 0.40752060809355906, + "mean_lyap": -0.853058397769928, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.5 + }, + { + "step": 28, + "sup_loss": 0.7598342200180294, + "rf_loss": 0.03639621287584305, + "total_loss": 0.7598342200180294, + "mean_lyap": -0.6926866769790649, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 29, + "sup_loss": 0.7346288749325594, + "rf_loss": 0.02920931950211525, + "total_loss": 0.7346288749325594, + "mean_lyap": -0.7217535972595215, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 30, + "sup_loss": 0.7777219884619142, + "rf_loss": 0.02019217610359192, + "total_loss": 0.7777219884619142, + "mean_lyap": -0.7585852146148682, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 31, + "sup_loss": 0.62066619383762, + "rf_loss": 0.02644718810915947, + "total_loss": 0.62066619383762, + "mean_lyap": -0.7444592714309692, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 32, + "sup_loss": 0.6670801201641591, + "rf_loss": 0.022040028125047684, + "total_loss": 0.6670801201641591, + "mean_lyap": -0.7274681329727173, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 33, + "sup_loss": 0.4943865516639496, + "rf_loss": 0.018601693212985992, + "total_loss": 0.4943865516639496, + "mean_lyap": -0.7411475777626038, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 1.0 + }, + { + "step": 34, + "sup_loss": 0.6997482393491802, + "rf_loss": 0.015254433266818523, + "total_loss": 0.6997482393491802, + "mean_lyap": -0.7534999847412109, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 35, + "sup_loss": 0.43278240473618146, + "rf_loss": 0.00983005203306675, + "total_loss": 0.43278240473618146, + "mean_lyap": -0.833416759967804, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.5 + }, + { + "step": 36, + "sup_loss": 0.6031542975352331, + "rf_loss": 0.01650114357471466, + "total_loss": 0.6031542975352331, + "mean_lyap": -0.7596582770347595, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 37, + "sup_loss": 0.8032300956495472, + "rf_loss": 0.020659850910305977, + "total_loss": 0.8032300956495472, + "mean_lyap": -0.7174378037452698, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 1.0 + }, + { + "step": 38, + "sup_loss": 0.5121771257965528, + "rf_loss": 0.013280767016112804, + "total_loss": 0.5121771257965528, + "mean_lyap": -0.786674976348877, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 39, + "sup_loss": 0.7275060381637447, + "rf_loss": 0.0203658826649189, + "total_loss": 0.7275060381637447, + "mean_lyap": -0.7346393465995789, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 40, + "sup_loss": 0.6514353098353729, + "rf_loss": 0.021555598825216293, + "total_loss": 0.6514353098353729, + "mean_lyap": -0.7274446487426758, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 1.0 + }, + { + "step": 41, + "sup_loss": 0.49709373743651797, + "rf_loss": 0.016344700008630753, + "total_loss": 0.49709373743651797, + "mean_lyap": -0.7981948256492615, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 42, + "sup_loss": 0.7065703530147928, + "rf_loss": 0.02221592701971531, + "total_loss": 0.7065703530147928, + "mean_lyap": -0.7542256712913513, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 43, + "sup_loss": 0.7090396772089527, + "rf_loss": 0.016372324898838997, + "total_loss": 0.7090396772089527, + "mean_lyap": -0.7794075012207031, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 44, + "sup_loss": 0.9289881086289538, + "rf_loss": 0.033044710755348206, + "total_loss": 0.9289881086289538, + "mean_lyap": -0.6806827783584595, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 1.0 + }, + { + "step": 45, + "sup_loss": 0.6448505333994983, + "rf_loss": 0.014616364613175392, + "total_loss": 0.6448505333994983, + "mean_lyap": -0.7577605247497559, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 46, + "sup_loss": 0.6382476558440502, + "rf_loss": 0.026464039459824562, + "total_loss": 0.6382476558440502, + "mean_lyap": -0.7286443710327148, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 47, + "sup_loss": 0.6398566142673036, + "rf_loss": 0.01438322477042675, + "total_loss": 0.6398566142673036, + "mean_lyap": -0.7668901681900024, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 48, + "sup_loss": 0.563725057322177, + "rf_loss": 0.01295809168368578, + "total_loss": 0.563725057322177, + "mean_lyap": -0.8093639612197876, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.5 + }, + { + "step": 49, + "sup_loss": 0.5697769070491401, + "rf_loss": 0.013909493573009968, + "total_loss": 0.5697769070491401, + "mean_lyap": -0.7570986747741699, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875, + "eval_acc": 0.552734375, + "eval_tok_acc": 0.8404706790123457 + }, + { + "step": 50, + "sup_loss": 0.5826585723900255, + "rf_loss": 0.0233764685690403, + "total_loss": 0.5826585723900255, + "mean_lyap": -0.7396541833877563, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 51, + "sup_loss": 0.790284612106612, + "rf_loss": 0.020453739911317825, + "total_loss": 0.790284612106612, + "mean_lyap": -0.7257196307182312, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 52, + "sup_loss": 0.5531537291386107, + "rf_loss": 0.016834422945976257, + "total_loss": 0.5531537291386107, + "mean_lyap": -0.8007345795631409, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 53, + "sup_loss": 0.6352155311914186, + "rf_loss": 0.013691319152712822, + "total_loss": 0.6352155311914186, + "mean_lyap": -0.7804731130599976, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 54, + "sup_loss": 0.6188030821523791, + "rf_loss": 0.008023826405405998, + "total_loss": 0.6188030821523791, + "mean_lyap": -0.7914124727249146, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 55, + "sup_loss": 0.6814536327347118, + "rf_loss": 0.014042635448276997, + "total_loss": 0.6814536327347118, + "mean_lyap": -0.7954126596450806, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 56, + "sup_loss": 0.5377735327434082, + "rf_loss": 0.0090873334556818, + "total_loss": 0.5377735327434082, + "mean_lyap": -0.8083645105361938, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 57, + "sup_loss": 0.6449342931850235, + "rf_loss": 0.0221039317548275, + "total_loss": 0.6449342931850235, + "mean_lyap": -0.7603305578231812, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 58, + "sup_loss": 0.66538010054372, + "rf_loss": 0.013639635406434536, + "total_loss": 0.66538010054372, + "mean_lyap": -0.7493693232536316, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 59, + "sup_loss": 0.44518168701931965, + "rf_loss": 0.009685002267360687, + "total_loss": 0.44518168701931965, + "mean_lyap": -0.8124280571937561, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.5 + }, + { + "step": 60, + "sup_loss": 0.7009587615205658, + "rf_loss": 0.016143623739480972, + "total_loss": 0.7009587615205658, + "mean_lyap": -0.7512706518173218, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 61, + "sup_loss": 0.5436809132351307, + "rf_loss": 0.01747089996933937, + "total_loss": 0.5436809132351307, + "mean_lyap": -0.7507422566413879, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 62, + "sup_loss": 0.4203695930318393, + "rf_loss": 0.012806981801986694, + "total_loss": 0.4203695930318393, + "mean_lyap": -0.7961456775665283, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.5 + }, + { + "step": 63, + "sup_loss": 0.7150037589508791, + "rf_loss": 0.015982968732714653, + "total_loss": 0.7150037589508791, + "mean_lyap": -0.7318211197853088, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 1.0 + }, + { + "step": 64, + "sup_loss": 0.652219758719418, + "rf_loss": 0.023553067818284035, + "total_loss": 0.652219758719418, + "mean_lyap": -0.7388643026351929, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 65, + "sup_loss": 0.837005725572724, + "rf_loss": 0.026055293157696724, + "total_loss": 0.837005725572724, + "mean_lyap": -0.7198929786682129, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 66, + "sup_loss": 0.6202614566734284, + "rf_loss": 0.020005282014608383, + "total_loss": 0.6202614566734284, + "mean_lyap": -0.7444818615913391, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 67, + "sup_loss": 0.4833856378949369, + "rf_loss": 0.028408441692590714, + "total_loss": 0.4833856378949369, + "mean_lyap": -0.7288103103637695, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 68, + "sup_loss": 0.661273925734417, + "rf_loss": 0.027708619832992554, + "total_loss": 0.661273925734417, + "mean_lyap": -0.7327063083648682, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 69, + "sup_loss": 0.7496881555889151, + "rf_loss": 0.026845691725611687, + "total_loss": 0.7496881555889151, + "mean_lyap": -0.7238901853561401, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 70, + "sup_loss": 0.655025449607148, + "rf_loss": 0.01595262996852398, + "total_loss": 0.655025449607148, + "mean_lyap": -0.7677739858627319, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 71, + "sup_loss": 0.7007515633599688, + "rf_loss": 0.021251795813441277, + "total_loss": 0.7007515633599688, + "mean_lyap": -0.7331578731536865, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 72, + "sup_loss": 0.576434780397206, + "rf_loss": 0.016705233603715897, + "total_loss": 0.576434780397206, + "mean_lyap": -0.7671521306037903, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 73, + "sup_loss": 0.5601490880533248, + "rf_loss": 0.013522581197321415, + "total_loss": 0.5601490880533248, + "mean_lyap": -0.7947361469268799, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 74, + "sup_loss": 0.7067256772074493, + "rf_loss": 0.018538862466812134, + "total_loss": 0.7067256772074493, + "mean_lyap": -0.7749499082565308, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 75, + "sup_loss": 0.6361385332077261, + "rf_loss": 0.016246065497398376, + "total_loss": 0.6361385332077261, + "mean_lyap": -0.7251455187797546, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 1.0 + }, + { + "step": 76, + "sup_loss": 0.5909602447715041, + "rf_loss": 0.021141495555639267, + "total_loss": 0.5909602447715041, + "mean_lyap": -0.7562252283096313, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 77, + "sup_loss": 0.6434410927961547, + "rf_loss": 0.017454879358410835, + "total_loss": 0.6434410927961547, + "mean_lyap": -0.7376397848129272, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 1.0 + }, + { + "step": 78, + "sup_loss": 0.9238963273058481, + "rf_loss": 0.021510258316993713, + "total_loss": 0.9238963273058481, + "mean_lyap": -0.7469452619552612, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 79, + "sup_loss": 0.7236918128430727, + "rf_loss": 0.020564239472150803, + "total_loss": 0.7236918128430727, + "mean_lyap": -0.726544976234436, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 80, + "sup_loss": 0.6409598500947482, + "rf_loss": 0.022763080894947052, + "total_loss": 0.6409598500947482, + "mean_lyap": -0.7322769165039062, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 81, + "sup_loss": 0.5012803999183488, + "rf_loss": 0.018363304436206818, + "total_loss": 0.5012803999183488, + "mean_lyap": -0.7594103813171387, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 82, + "sup_loss": 0.595928273477278, + "rf_loss": 0.017527179792523384, + "total_loss": 0.595928273477278, + "mean_lyap": -0.7859100699424744, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.5 + }, + { + "step": 83, + "sup_loss": 0.6768147054365412, + "rf_loss": 0.026447735726833344, + "total_loss": 0.6768147054365412, + "mean_lyap": -0.735393762588501, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 84, + "sup_loss": 0.7782185708548788, + "rf_loss": 0.024071525782346725, + "total_loss": 0.7782185708548788, + "mean_lyap": -0.7258061766624451, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 85, + "sup_loss": 0.7325408617531275, + "rf_loss": 0.014113116078078747, + "total_loss": 0.7325408617531275, + "mean_lyap": -0.7520842552185059, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 86, + "sup_loss": 0.6975618308654884, + "rf_loss": 0.013928623870015144, + "total_loss": 0.6975618308654884, + "mean_lyap": -0.7803336381912231, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 87, + "sup_loss": 0.6705027423264631, + "rf_loss": 0.017224054783582687, + "total_loss": 0.6705027423264631, + "mean_lyap": -0.7290611267089844, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 1.0 + }, + { + "step": 88, + "sup_loss": 0.839529559756467, + "rf_loss": 0.01645783707499504, + "total_loss": 0.839529559756467, + "mean_lyap": -0.7615631222724915, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 89, + "sup_loss": 0.7825565524722257, + "rf_loss": 0.027677323669195175, + "total_loss": 0.7825565524722257, + "mean_lyap": -0.7235103845596313, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 90, + "sup_loss": 0.5883908234238493, + "rf_loss": 0.01794498600065708, + "total_loss": 0.5883908234238493, + "mean_lyap": -0.7414563894271851, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 91, + "sup_loss": 0.7292005601421612, + "rf_loss": 0.027813289314508438, + "total_loss": 0.7292005601421612, + "mean_lyap": -0.7170945405960083, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 92, + "sup_loss": 0.728902278763871, + "rf_loss": 0.015727847814559937, + "total_loss": 0.728902278763871, + "mean_lyap": -0.7584367990493774, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 93, + "sup_loss": 0.5259643118830124, + "rf_loss": 0.04229661449790001, + "total_loss": 0.5259643118830124, + "mean_lyap": -0.6916723251342773, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 94, + "sup_loss": 0.6927865813446163, + "rf_loss": 0.0240999273955822, + "total_loss": 0.6927865813446163, + "mean_lyap": -0.7025532722473145, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 1.0 + }, + { + "step": 95, + "sup_loss": 0.6083888511156739, + "rf_loss": 0.021323921158909798, + "total_loss": 0.6083888511156739, + "mean_lyap": -0.7353249788284302, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 96, + "sup_loss": 0.7740486976196439, + "rf_loss": 0.018548760563135147, + "total_loss": 0.7740486976196439, + "mean_lyap": -0.7455372214317322, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 97, + "sup_loss": 0.4197253022727293, + "rf_loss": 0.014343942515552044, + "total_loss": 0.4197253022727293, + "mean_lyap": -0.7712873220443726, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 98, + "sup_loss": 0.6843407592199003, + "rf_loss": 0.01865364983677864, + "total_loss": 0.6843407592199003, + "mean_lyap": -0.7498626708984375, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 99, + "sup_loss": 0.6567347851715216, + "rf_loss": 0.01154338475316763, + "total_loss": 0.6567347851715216, + "mean_lyap": -0.7681440114974976, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875, + "eval_acc": 0.568359375, + "eval_tok_acc": 0.8472222222222222 + }, + { + "step": 100, + "sup_loss": 0.5980592795436199, + "rf_loss": 0.022254904732108116, + "total_loss": 0.5980592795436199, + "mean_lyap": -0.7540820837020874, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 101, + "sup_loss": 0.5624580689029629, + "rf_loss": 0.022493505850434303, + "total_loss": 0.5624580689029629, + "mean_lyap": -0.7384459972381592, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 102, + "sup_loss": 0.7240116003521692, + "rf_loss": 0.01897306554019451, + "total_loss": 0.7240116003521692, + "mean_lyap": -0.7602998614311218, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 103, + "sup_loss": 0.6642026462980748, + "rf_loss": 0.02639048546552658, + "total_loss": 0.6642026462980748, + "mean_lyap": -0.7270517349243164, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 104, + "sup_loss": 0.7024871782173701, + "rf_loss": 0.02026406303048134, + "total_loss": 0.7024871782173701, + "mean_lyap": -0.7161645293235779, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 1.0 + }, + { + "step": 105, + "sup_loss": 0.6312421595788891, + "rf_loss": 0.023384898900985718, + "total_loss": 0.6312421595788891, + "mean_lyap": -0.7193712592124939, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 106, + "sup_loss": 0.6884468966995417, + "rf_loss": 0.016240853816270828, + "total_loss": 0.6884468966995417, + "mean_lyap": -0.755121648311615, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 107, + "sup_loss": 0.6379543052468103, + "rf_loss": 0.029100649058818817, + "total_loss": 0.6379543052468103, + "mean_lyap": -0.7257106900215149, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 108, + "sup_loss": 0.6744931370553356, + "rf_loss": 0.02190406806766987, + "total_loss": 0.6744931370553356, + "mean_lyap": -0.7128636837005615, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 1.0 + }, + { + "step": 109, + "sup_loss": 0.7785363922920249, + "rf_loss": 0.024109523743391037, + "total_loss": 0.7785363922920249, + "mean_lyap": -0.7296158671379089, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 110, + "sup_loss": 0.588132590652831, + "rf_loss": 0.01571902260184288, + "total_loss": 0.588132590652831, + "mean_lyap": -0.7552504539489746, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 111, + "sup_loss": 0.7479231860441646, + "rf_loss": 0.025284072384238243, + "total_loss": 0.7479231860441646, + "mean_lyap": -0.7190150022506714, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 112, + "sup_loss": 0.6834877028197537, + "rf_loss": 0.03612999990582466, + "total_loss": 0.6834877028197537, + "mean_lyap": -0.6671320199966431, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 1.0 + }, + { + "step": 113, + "sup_loss": 0.6448277894236214, + "rf_loss": 0.025801360607147217, + "total_loss": 0.6448277894236214, + "mean_lyap": -0.7187458276748657, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 114, + "sup_loss": 0.5451011739222377, + "rf_loss": 0.019290510565042496, + "total_loss": 0.5451011739222377, + "mean_lyap": -0.770762026309967, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 115, + "sup_loss": 0.599914185954553, + "rf_loss": 0.023138873279094696, + "total_loss": 0.599914185954553, + "mean_lyap": -0.7440516352653503, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 116, + "sup_loss": 0.648403322220771, + "rf_loss": 0.02817549556493759, + "total_loss": 0.648403322220771, + "mean_lyap": -0.7008245587348938, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 117, + "sup_loss": 0.7785382513402747, + "rf_loss": 0.02821667678654194, + "total_loss": 0.7785382513402747, + "mean_lyap": -0.7017555236816406, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 118, + "sup_loss": 0.5088733739421664, + "rf_loss": 0.02005225419998169, + "total_loss": 0.5088733739421664, + "mean_lyap": -0.7307372093200684, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 119, + "sup_loss": 0.5891269964910592, + "rf_loss": 0.023113207891583443, + "total_loss": 0.5891269964910592, + "mean_lyap": -0.7270251512527466, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 120, + "sup_loss": 0.6018402072397729, + "rf_loss": 0.0183856338262558, + "total_loss": 0.6018402072397729, + "mean_lyap": -0.7451243996620178, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 121, + "sup_loss": 0.7100135583773861, + "rf_loss": 0.024687785655260086, + "total_loss": 0.7100135583773861, + "mean_lyap": -0.7152650356292725, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 122, + "sup_loss": 0.7555754775823643, + "rf_loss": 0.02922731637954712, + "total_loss": 0.7555754775823643, + "mean_lyap": -0.7086939215660095, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 123, + "sup_loss": 0.7816156267217089, + "rf_loss": 0.019390424713492393, + "total_loss": 0.7816156267217089, + "mean_lyap": -0.7322418689727783, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 124, + "sup_loss": 0.9818846053491461, + "rf_loss": 0.02662663720548153, + "total_loss": 0.9818846053491461, + "mean_lyap": -0.6961807012557983, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 1.0 + }, + { + "step": 125, + "sup_loss": 0.6221557133762831, + "rf_loss": 0.01712089404463768, + "total_loss": 0.6221557133762831, + "mean_lyap": -0.7866336107254028, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 126, + "sup_loss": 0.5070404021275237, + "rf_loss": 0.013781729154288769, + "total_loss": 0.5070404021275237, + "mean_lyap": -0.758777916431427, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 127, + "sup_loss": 0.4524038611152998, + "rf_loss": 0.013750974088907242, + "total_loss": 0.4524038611152998, + "mean_lyap": -0.7971649169921875, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 128, + "sup_loss": 0.4499837349472374, + "rf_loss": 0.017808884382247925, + "total_loss": 0.4499837349472374, + "mean_lyap": -0.7716026902198792, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 129, + "sup_loss": 0.45269192485421456, + "rf_loss": 0.015859700739383698, + "total_loss": 0.45269192485421456, + "mean_lyap": -0.76076340675354, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 130, + "sup_loss": 0.6673497155113727, + "rf_loss": 0.022777624428272247, + "total_loss": 0.6673497155113727, + "mean_lyap": -0.7725577354431152, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 131, + "sup_loss": 0.4786419942216587, + "rf_loss": 0.02173563279211521, + "total_loss": 0.4786419942216587, + "mean_lyap": -0.7585476636886597, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 132, + "sup_loss": 0.6063093064908082, + "rf_loss": 0.014882536605000496, + "total_loss": 0.6063093064908082, + "mean_lyap": -0.7524175643920898, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 1.0 + }, + { + "step": 133, + "sup_loss": 0.597613288846509, + "rf_loss": 0.013898427598178387, + "total_loss": 0.597613288846509, + "mean_lyap": -0.7747804522514343, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 134, + "sup_loss": 0.60566359002851, + "rf_loss": 0.01954154670238495, + "total_loss": 0.60566359002851, + "mean_lyap": -0.7563980221748352, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 135, + "sup_loss": 0.6949796434718941, + "rf_loss": 0.028822313994169235, + "total_loss": 0.6949796434718941, + "mean_lyap": -0.6872661113739014, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 1.0 + }, + { + "step": 136, + "sup_loss": 0.7223416948872027, + "rf_loss": 0.023509033024311066, + "total_loss": 0.7223416948872027, + "mean_lyap": -0.716987133026123, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 1.0 + }, + { + "step": 137, + "sup_loss": 0.7495671917133016, + "rf_loss": 0.030036117881536484, + "total_loss": 0.7495671917133016, + "mean_lyap": -0.6973689794540405, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 138, + "sup_loss": 0.6006217789008484, + "rf_loss": 0.025087039917707443, + "total_loss": 0.6006217789008484, + "mean_lyap": -0.7467139363288879, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 139, + "sup_loss": 0.655849728214072, + "rf_loss": 0.02381773479282856, + "total_loss": 0.655849728214072, + "mean_lyap": -0.7454387545585632, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 140, + "sup_loss": 0.8102625015074788, + "rf_loss": 0.02733512781560421, + "total_loss": 0.8102625015074788, + "mean_lyap": -0.7079421281814575, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 141, + "sup_loss": 0.8502794994821964, + "rf_loss": 0.0390443429350853, + "total_loss": 0.8502794994821964, + "mean_lyap": -0.6732217073440552, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 1.0 + }, + { + "step": 142, + "sup_loss": 0.4447228354594742, + "rf_loss": 0.006015835329890251, + "total_loss": 0.4447228354594742, + "mean_lyap": -0.8424338698387146, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.5 + }, + { + "step": 143, + "sup_loss": 0.5819806541160968, + "rf_loss": 0.022949546575546265, + "total_loss": 0.5819806541160968, + "mean_lyap": -0.7761141657829285, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 144, + "sup_loss": 0.8907128100588448, + "rf_loss": 0.03222889453172684, + "total_loss": 0.8907128100588448, + "mean_lyap": -0.6746317744255066, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 1.0 + }, + { + "step": 145, + "sup_loss": 0.7475727065584685, + "rf_loss": 0.02313586324453354, + "total_loss": 0.7475727065584685, + "mean_lyap": -0.721125602722168, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 146, + "sup_loss": 0.5155802794452706, + "rf_loss": 0.010369600728154182, + "total_loss": 0.5155802794452706, + "mean_lyap": -0.8093665838241577, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 147, + "sup_loss": 0.5766483210434503, + "rf_loss": 0.030000735074281693, + "total_loss": 0.5766483210434503, + "mean_lyap": -0.7040218114852905, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 148, + "sup_loss": 0.6190240840623844, + "rf_loss": 0.020453080534934998, + "total_loss": 0.6190240840623844, + "mean_lyap": -0.7449148893356323, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 149, + "sup_loss": 0.4418606997334367, + "rf_loss": 0.016067467629909515, + "total_loss": 0.4418606997334367, + "mean_lyap": -0.7565109729766846, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75, + "eval_acc": 0.572265625, + "eval_tok_acc": 0.8472222222222222 + }, + { + "step": 150, + "sup_loss": 0.608863340368222, + "rf_loss": 0.010185742750763893, + "total_loss": 0.608863340368222, + "mean_lyap": -0.7974428534507751, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 151, + "sup_loss": 0.4989138195700283, + "rf_loss": 0.00835250411182642, + "total_loss": 0.4989138195700283, + "mean_lyap": -0.7983441352844238, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 152, + "sup_loss": 0.5762244655978565, + "rf_loss": 0.023800041526556015, + "total_loss": 0.5762244655978565, + "mean_lyap": -0.7355422973632812, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 153, + "sup_loss": 0.6510807838822642, + "rf_loss": 0.025307195261120796, + "total_loss": 0.6510807838822642, + "mean_lyap": -0.7280991077423096, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 154, + "sup_loss": 0.6181092234799915, + "rf_loss": 0.02716577984392643, + "total_loss": 0.6181092234799915, + "mean_lyap": -0.7010234594345093, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 1.0 + }, + { + "step": 155, + "sup_loss": 0.6119396019660481, + "rf_loss": 0.020342616364359856, + "total_loss": 0.6119396019660481, + "mean_lyap": -0.7642773389816284, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 156, + "sup_loss": 0.718161081444468, + "rf_loss": 0.02265091985464096, + "total_loss": 0.718161081444468, + "mean_lyap": -0.7601755857467651, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 157, + "sup_loss": 0.771410969770839, + "rf_loss": 0.023383142426609993, + "total_loss": 0.771410969770839, + "mean_lyap": -0.7313747406005859, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 158, + "sup_loss": 0.7873566901500851, + "rf_loss": 0.02689053863286972, + "total_loss": 0.7873566901500851, + "mean_lyap": -0.7017194032669067, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 1.0 + }, + { + "step": 159, + "sup_loss": 0.6521325517749783, + "rf_loss": 0.023862306028604507, + "total_loss": 0.6521325517749783, + "mean_lyap": -0.756867527961731, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 160, + "sup_loss": 0.8090020016278459, + "rf_loss": 0.02572615258395672, + "total_loss": 0.8090020016278459, + "mean_lyap": -0.6997763514518738, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 1.0 + }, + { + "step": 161, + "sup_loss": 0.39743736247155537, + "rf_loss": 0.008481619879603386, + "total_loss": 0.39743736247155537, + "mean_lyap": -0.8086578845977783, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 162, + "sup_loss": 0.43669027909842767, + "rf_loss": 0.02010437473654747, + "total_loss": 0.43669027909842767, + "mean_lyap": -0.7667924761772156, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 163, + "sup_loss": 0.6943994844592515, + "rf_loss": 0.02065116912126541, + "total_loss": 0.6943994844592515, + "mean_lyap": -0.7488412261009216, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 164, + "sup_loss": 0.5030190359687045, + "rf_loss": 0.008932781405746937, + "total_loss": 0.5030190359687045, + "mean_lyap": -0.8126202821731567, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 165, + "sup_loss": 0.5247402453124103, + "rf_loss": 0.007777722552418709, + "total_loss": 0.5247402453124103, + "mean_lyap": -0.7961758375167847, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 166, + "sup_loss": 0.6398741585180664, + "rf_loss": 0.014761778526008129, + "total_loss": 0.6398741585180664, + "mean_lyap": -0.7567418813705444, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 167, + "sup_loss": 0.8393518590572031, + "rf_loss": 0.020471977069973946, + "total_loss": 0.8393518590572031, + "mean_lyap": -0.7370960116386414, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 168, + "sup_loss": 0.680243910836956, + "rf_loss": 0.030797768384218216, + "total_loss": 0.680243910836956, + "mean_lyap": -0.7304750680923462, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 169, + "sup_loss": 0.6067754357631518, + "rf_loss": 0.010489920154213905, + "total_loss": 0.6067754357631518, + "mean_lyap": -0.7997013330459595, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 170, + "sup_loss": 0.5141692780649875, + "rf_loss": 0.012256162241101265, + "total_loss": 0.5141692780649875, + "mean_lyap": -0.7924825549125671, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 171, + "sup_loss": 0.5914484831356449, + "rf_loss": 0.027381759136915207, + "total_loss": 0.5914484831356449, + "mean_lyap": -0.7163530588150024, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 172, + "sup_loss": 0.5789175642945378, + "rf_loss": 0.01908736303448677, + "total_loss": 0.5789175642945378, + "mean_lyap": -0.7499892115592957, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 173, + "sup_loss": 0.4653848168603588, + "rf_loss": 0.017360832542181015, + "total_loss": 0.4653848168603588, + "mean_lyap": -0.7850685119628906, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 174, + "sup_loss": 0.7090750662356801, + "rf_loss": 0.013011809438467026, + "total_loss": 0.7090750662356801, + "mean_lyap": -0.7769396901130676, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 175, + "sup_loss": 0.5897593552028595, + "rf_loss": 0.026643576100468636, + "total_loss": 0.5897593552028595, + "mean_lyap": -0.7525628209114075, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 176, + "sup_loss": 0.6342460976107454, + "rf_loss": 0.016768531873822212, + "total_loss": 0.6342460976107454, + "mean_lyap": -0.7469217777252197, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 177, + "sup_loss": 0.6386313097045985, + "rf_loss": 0.02367786318063736, + "total_loss": 0.6386313097045985, + "mean_lyap": -0.7474036812782288, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 178, + "sup_loss": 0.615910173449133, + "rf_loss": 0.02075655572116375, + "total_loss": 0.615910173449133, + "mean_lyap": -0.7326847314834595, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 179, + "sup_loss": 0.6833451153579814, + "rf_loss": 0.011882389895617962, + "total_loss": 0.6833451153579814, + "mean_lyap": -0.7710140943527222, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 180, + "sup_loss": 0.6047951549693648, + "rf_loss": 0.017027340829372406, + "total_loss": 0.6047951549693648, + "mean_lyap": -0.753353476524353, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 181, + "sup_loss": 0.43196108913788, + "rf_loss": 0.012315124273300171, + "total_loss": 0.43196108913788, + "mean_lyap": -0.7744470834732056, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 182, + "sup_loss": 0.7267525749423682, + "rf_loss": 0.029976610094308853, + "total_loss": 0.7267525749423682, + "mean_lyap": -0.7055696249008179, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 183, + "sup_loss": 0.6574720505614811, + "rf_loss": 0.017794426530599594, + "total_loss": 0.6574720505614811, + "mean_lyap": -0.7502639293670654, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 184, + "sup_loss": 0.4021980908533472, + "rf_loss": 0.009408671408891678, + "total_loss": 0.4021980908533472, + "mean_lyap": -0.78846275806427, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 185, + "sup_loss": 0.4549793457086916, + "rf_loss": 0.009391248226165771, + "total_loss": 0.4549793457086916, + "mean_lyap": -0.8460166454315186, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.5 + }, + { + "step": 186, + "sup_loss": 0.5413158812363411, + "rf_loss": 0.01666955277323723, + "total_loss": 0.5413158812363411, + "mean_lyap": -0.7738894820213318, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 187, + "sup_loss": 0.6873596727871502, + "rf_loss": 0.024179410189390182, + "total_loss": 0.6873596727871502, + "mean_lyap": -0.7622608542442322, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 188, + "sup_loss": 0.7900023726432761, + "rf_loss": 0.021329613402485847, + "total_loss": 0.7900023726432761, + "mean_lyap": -0.7401515245437622, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 189, + "sup_loss": 0.8101270139540658, + "rf_loss": 0.013392371125519276, + "total_loss": 0.8101270139540658, + "mean_lyap": -0.7592623233795166, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 190, + "sup_loss": 0.6175607499982887, + "rf_loss": 0.016656629741191864, + "total_loss": 0.6175607499982887, + "mean_lyap": -0.7526005506515503, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 191, + "sup_loss": 0.8059601910443276, + "rf_loss": 0.02683323435485363, + "total_loss": 0.8059601910443276, + "mean_lyap": -0.7117177844047546, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 1.0 + }, + { + "step": 192, + "sup_loss": 0.672904260227444, + "rf_loss": 0.024678904563188553, + "total_loss": 0.672904260227444, + "mean_lyap": -0.713874101638794, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 193, + "sup_loss": 0.469671263379558, + "rf_loss": 0.008900867775082588, + "total_loss": 0.469671263379558, + "mean_lyap": -0.787243127822876, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 194, + "sup_loss": 0.793730523902836, + "rf_loss": 0.030314896255731583, + "total_loss": 0.793730523902836, + "mean_lyap": -0.6965149641036987, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 1.0 + }, + { + "step": 195, + "sup_loss": 0.4862851104982111, + "rf_loss": 0.011369990184903145, + "total_loss": 0.4862851104982111, + "mean_lyap": -0.8213597536087036, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.375 + }, + { + "step": 196, + "sup_loss": 0.760971154599404, + "rf_loss": 0.02575698308646679, + "total_loss": 0.760971154599404, + "mean_lyap": -0.6911941766738892, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 1.0 + }, + { + "step": 197, + "sup_loss": 0.6156609812949445, + "rf_loss": 0.016349555924534798, + "total_loss": 0.6156609812949445, + "mean_lyap": -0.78205406665802, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.5 + }, + { + "step": 198, + "sup_loss": 0.6535240735844162, + "rf_loss": 0.017935901880264282, + "total_loss": 0.6535240735844162, + "mean_lyap": -0.7265710830688477, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 1.0 + }, + { + "step": 199, + "sup_loss": 0.5490033841215428, + "rf_loss": 0.020748641341924667, + "total_loss": 0.5490033841215428, + "mean_lyap": -0.7320446968078613, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875, + "eval_acc": 0.5703125, + "eval_tok_acc": 0.8483072916666666 + } + ], + "final_acc": 0.5703125, + "final_tok_acc": 0.8483072916666666 +}
\ No newline at end of file |
