diff options
Diffstat (limited to 'research/flossing/ab_rf.json')
| -rw-r--r-- | research/flossing/ab_rf.json | 1834 |
1 files changed, 1834 insertions, 0 deletions
diff --git a/research/flossing/ab_rf.json b/research/flossing/ab_rf.json new file mode 100644 index 0000000..1cf79d7 --- /dev/null +++ b/research/flossing/ab_rf.json @@ -0,0 +1,1834 @@ +{ + "args": { + "ckpt_root": "/home/yurenh2/rrm/hrm/checkpoints/Sudoku-extreme-1k-aug-1000 ACT-torch/HierarchicalReasoningModel_ACTV1 righteous-python", + "ckpt_name": "step_26040", + "n_steps": 200, + "batch_size": 8, + "lr": 5e-06, + "alpha_rf": 10.0, + "lambda_star": -0.85, + "k_lyap": 2, + "lyap_act_steps": 4, + "rf_mode": "fixed", + "rf_eps": 1e-06, + "seed": 42, + "eval_every": 50, + "eval_n": 512, + "eval_batch_size": 32, + "out": "/home/yurenh2/rrm/research/flossing/ab_rf.json" + }, + "initial_acc": 0.517578125, + "initial_tok_acc": 0.8276668595679012, + "steps": [ + { + "step": 0, + "sup_loss": 0.5765840774308245, + "rf_loss": 0.0188673734664917, + "total_loss": 0.7652578120957415, + "mean_lyap": -0.8058679103851318, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.5 + }, + { + "step": 1, + "sup_loss": 0.40554469917943053, + "rf_loss": 0.0057648103684186935, + "total_loss": 0.46319280286361747, + "mean_lyap": -0.8606383800506592, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.375 + }, + { + "step": 2, + "sup_loss": 0.7296058169551144, + "rf_loss": 0.015785561874508858, + "total_loss": 0.8874614319749127, + "mean_lyap": -0.7925797700881958, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 3, + "sup_loss": 0.5326514314440994, + "rf_loss": 0.010626970790326595, + "total_loss": 0.6389211412100105, + "mean_lyap": -0.8121544718742371, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 4, + "sup_loss": 0.5782588876956446, + "rf_loss": 0.015642518177628517, + "total_loss": 0.7346840657466395, + "mean_lyap": -0.7712883353233337, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 5, + "sup_loss": 0.8227696935048122, + "rf_loss": 0.018467212095856667, + "total_loss": 1.0074418107380887, + "mean_lyap": -0.7421821355819702, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 6, + "sup_loss": 0.6188131744701122, + "rf_loss": 0.009561344981193542, + "total_loss": 0.7144266242820476, + "mean_lyap": -0.788561224937439, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 7, + "sup_loss": 0.6447263690556088, + "rf_loss": 0.016454031690955162, + "total_loss": 0.8092666896904507, + "mean_lyap": -0.7436193823814392, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 8, + "sup_loss": 0.8350695073343292, + "rf_loss": 0.01805921457707882, + "total_loss": 1.0156616568304075, + "mean_lyap": -0.747126579284668, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 9, + "sup_loss": 0.8473975550740483, + "rf_loss": 0.018726294860243797, + "total_loss": 1.0346605074017767, + "mean_lyap": -0.7429935932159424, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 10, + "sup_loss": 0.6606808108891549, + "rf_loss": 0.013587037101387978, + "total_loss": 0.7965511781777443, + "mean_lyap": -0.7866835594177246, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 11, + "sup_loss": 0.63067866316374, + "rf_loss": 0.015567373484373093, + "total_loss": 0.7863524054580515, + "mean_lyap": -0.8150412440299988, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.5 + }, + { + "step": 12, + "sup_loss": 0.42009245690532176, + "rf_loss": 0.007377433590590954, + "total_loss": 0.49386679467387645, + "mean_lyap": -0.8051602840423584, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 13, + "sup_loss": 0.7549789601332388, + "rf_loss": 0.01539212092757225, + "total_loss": 0.9089001768595419, + "mean_lyap": -0.7535644769668579, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 14, + "sup_loss": 0.5930591245881306, + "rf_loss": 0.009710620157420635, + "total_loss": 0.6901653280249821, + "mean_lyap": -0.8298624157905579, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 15, + "sup_loss": 0.6888967057957366, + "rf_loss": 0.009040959179401398, + "total_loss": 0.7793062975897506, + "mean_lyap": -0.8225681781768799, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.5 + }, + { + "step": 16, + "sup_loss": 0.5512681465998905, + "rf_loss": 0.01129273883998394, + "total_loss": 0.6641955387250202, + "mean_lyap": -0.7970694899559021, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 17, + "sup_loss": 0.7051346908681223, + "rf_loss": 0.013604645617306232, + "total_loss": 0.8411811451785395, + "mean_lyap": -0.7470482587814331, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 1.0 + }, + { + "step": 18, + "sup_loss": 0.5706506700775847, + "rf_loss": 0.010291795246303082, + "total_loss": 0.6735686244032607, + "mean_lyap": -0.7999143600463867, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 19, + "sup_loss": 0.5140771953697019, + "rf_loss": 0.009903106838464737, + "total_loss": 0.6131082637543492, + "mean_lyap": -0.8433663845062256, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.5 + }, + { + "step": 20, + "sup_loss": 0.6330896925310444, + "rf_loss": 0.01402571052312851, + "total_loss": 0.7733467977623295, + "mean_lyap": -0.7908270359039307, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 21, + "sup_loss": 0.6550339587133366, + "rf_loss": 0.006650024093687534, + "total_loss": 0.7215341977875668, + "mean_lyap": -0.8017191290855408, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 22, + "sup_loss": 0.6974606218591559, + "rf_loss": 0.004491833969950676, + "total_loss": 0.7423789615586627, + "mean_lyap": -0.8529043197631836, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 23, + "sup_loss": 0.6516045363274935, + "rf_loss": 0.008527552708983421, + "total_loss": 0.736880067142618, + "mean_lyap": -0.7849105000495911, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 24, + "sup_loss": 0.7038636713393823, + "rf_loss": 0.010556367225944996, + "total_loss": 0.8094273417361871, + "mean_lyap": -0.7658365964889526, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 25, + "sup_loss": 0.7385914636352537, + "rf_loss": 0.011378636583685875, + "total_loss": 0.8523778331974028, + "mean_lyap": -0.7721314430236816, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 26, + "sup_loss": 0.9184711394722027, + "rf_loss": 0.009672008454799652, + "total_loss": 1.0151912240201992, + "mean_lyap": -0.7574284672737122, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 1.0 + }, + { + "step": 27, + "sup_loss": 0.5379478623015401, + "rf_loss": 0.0028337787371128798, + "total_loss": 0.5662856501383302, + "mean_lyap": -0.9130557775497437, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.5 + }, + { + "step": 28, + "sup_loss": 0.8321097197966094, + "rf_loss": 0.018430069088935852, + "total_loss": 1.016410410685968, + "mean_lyap": -0.7524938583374023, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 29, + "sup_loss": 0.6580051011166252, + "rf_loss": 0.006823045201599598, + "total_loss": 0.7262355512699761, + "mean_lyap": -0.8172051906585693, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 30, + "sup_loss": 0.7026907211304697, + "rf_loss": 0.006128185428678989, + "total_loss": 0.7639725735546145, + "mean_lyap": -0.8284720182418823, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 31, + "sup_loss": 0.7111782013007493, + "rf_loss": 0.01262273732572794, + "total_loss": 0.8374055801459641, + "mean_lyap": -0.7845479249954224, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 32, + "sup_loss": 0.7769751564526539, + "rf_loss": 0.011238861829042435, + "total_loss": 0.8893637747430783, + "mean_lyap": -0.7726147174835205, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 33, + "sup_loss": 0.6691901240099508, + "rf_loss": 0.00915505737066269, + "total_loss": 0.7607406977165777, + "mean_lyap": -0.7760234475135803, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 34, + "sup_loss": 0.644363289681166, + "rf_loss": 0.002750927349552512, + "total_loss": 0.6718725636423524, + "mean_lyap": -0.8608823418617249, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 35, + "sup_loss": 0.5456849349717176, + "rf_loss": 0.005364897195249796, + "total_loss": 0.5993339059928929, + "mean_lyap": -0.8570781350135803, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.5 + }, + { + "step": 36, + "sup_loss": 0.6267922329648362, + "rf_loss": 0.003588401945307851, + "total_loss": 0.6626762519522534, + "mean_lyap": -0.8452996015548706, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.5 + }, + { + "step": 37, + "sup_loss": 0.860164007674845, + "rf_loss": 0.008395137265324593, + "total_loss": 0.9441153766028006, + "mean_lyap": -0.77690589427948, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 38, + "sup_loss": 0.5987716705341, + "rf_loss": 0.0041238958947360516, + "total_loss": 0.640010630412783, + "mean_lyap": -0.8743618726730347, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.5 + }, + { + "step": 39, + "sup_loss": 0.8265189456842287, + "rf_loss": 0.003278082702308893, + "total_loss": 0.859299771775995, + "mean_lyap": -0.8121100664138794, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 40, + "sup_loss": 0.8077702268238799, + "rf_loss": 0.008977587334811687, + "total_loss": 0.8975461020346419, + "mean_lyap": -0.7760306000709534, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 41, + "sup_loss": 0.5790201635319739, + "rf_loss": 0.004197108559310436, + "total_loss": 0.6209912509877235, + "mean_lyap": -0.8581048250198364, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 42, + "sup_loss": 0.7746859350804977, + "rf_loss": 0.005175702273845673, + "total_loss": 0.8264429578189544, + "mean_lyap": -0.842041015625, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 43, + "sup_loss": 0.7331061879104849, + "rf_loss": 0.007015871815383434, + "total_loss": 0.8032649079269644, + "mean_lyap": -0.8373122215270996, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 44, + "sup_loss": 0.869654493387017, + "rf_loss": 0.005932656116783619, + "total_loss": 0.928981052692208, + "mean_lyap": -0.794861376285553, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 45, + "sup_loss": 0.7347313761262009, + "rf_loss": 0.002129987347871065, + "total_loss": 0.756031248673589, + "mean_lyap": -0.8495690226554871, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 46, + "sup_loss": 0.7337837421752077, + "rf_loss": 0.00745191378518939, + "total_loss": 0.8083028772331339, + "mean_lyap": -0.7664467096328735, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 1.0 + }, + { + "step": 47, + "sup_loss": 0.6650665995203973, + "rf_loss": 0.0027223234064877033, + "total_loss": 0.6922898326539517, + "mean_lyap": -0.8546332120895386, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.5 + }, + { + "step": 48, + "sup_loss": 0.6728695801522537, + "rf_loss": 0.0034027229994535446, + "total_loss": 0.7068968101467892, + "mean_lyap": -0.8466753959655762, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.5 + }, + { + "step": 49, + "sup_loss": 0.6627326494175514, + "rf_loss": 0.0034537501633167267, + "total_loss": 0.6972701510507187, + "mean_lyap": -0.81462562084198, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75, + "eval_acc": 0.501953125, + "eval_tok_acc": 0.8258825231481481 + }, + { + "step": 50, + "sup_loss": 0.6882688690824927, + "rf_loss": 0.004383894149214029, + "total_loss": 0.7321078096433105, + "mean_lyap": -0.8475438356399536, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.5 + }, + { + "step": 51, + "sup_loss": 0.7352788257973389, + "rf_loss": 0.004841075744479895, + "total_loss": 0.7836895841734605, + "mean_lyap": -0.8208181858062744, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 52, + "sup_loss": 0.5743374201962224, + "rf_loss": 0.0043980712071061134, + "total_loss": 0.6183181304046383, + "mean_lyap": -0.8833464980125427, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.5 + }, + { + "step": 53, + "sup_loss": 0.6902312712113531, + "rf_loss": 0.0017992472276091576, + "total_loss": 0.7082237434874447, + "mean_lyap": -0.8604968786239624, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 54, + "sup_loss": 0.6803814939750245, + "rf_loss": 0.002090118359774351, + "total_loss": 0.7012826785040905, + "mean_lyap": -0.854358434677124, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.5 + }, + { + "step": 55, + "sup_loss": 0.595873137691564, + "rf_loss": 0.0007154205814003944, + "total_loss": 0.603027343505568, + "mean_lyap": -0.8682422637939453, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.5 + }, + { + "step": 56, + "sup_loss": 0.6642178889411797, + "rf_loss": 0.0020344697404652834, + "total_loss": 0.6845625858801713, + "mean_lyap": -0.8675365447998047, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.375 + }, + { + "step": 57, + "sup_loss": 0.7082779751524824, + "rf_loss": 0.002637912053614855, + "total_loss": 0.7346570947573083, + "mean_lyap": -0.8571688532829285, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.5 + }, + { + "step": 58, + "sup_loss": 0.9503993268684144, + "rf_loss": 0.002218063920736313, + "total_loss": 0.9725799660757776, + "mean_lyap": -0.8307374119758606, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 59, + "sup_loss": 0.514957090437054, + "rf_loss": 0.004192134365439415, + "total_loss": 0.5568784340914481, + "mean_lyap": -0.8466053605079651, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 60, + "sup_loss": 0.7337269250892405, + "rf_loss": 0.0026380610652267933, + "total_loss": 0.7601075348101859, + "mean_lyap": -0.8249342441558838, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 61, + "sup_loss": 0.6457141234391504, + "rf_loss": 0.003588218940421939, + "total_loss": 0.681596313309031, + "mean_lyap": -0.8772332072257996, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.5 + }, + { + "step": 62, + "sup_loss": 0.6506520749543354, + "rf_loss": 0.0017656260170042515, + "total_loss": 0.6683083341930554, + "mean_lyap": -0.8678171634674072, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 63, + "sup_loss": 0.8176543999352315, + "rf_loss": 0.0010715246899053454, + "total_loss": 0.8283696470671156, + "mean_lyap": -0.856634259223938, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 64, + "sup_loss": 0.7193349828825767, + "rf_loss": 0.0008531392668373883, + "total_loss": 0.7278663754345353, + "mean_lyap": -0.8669513463973999, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.5 + }, + { + "step": 65, + "sup_loss": 0.890794052989882, + "rf_loss": 0.0030753673054277897, + "total_loss": 0.9215477251128373, + "mean_lyap": -0.8245968818664551, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 66, + "sup_loss": 0.669179298339029, + "rf_loss": 0.0008702079649083316, + "total_loss": 0.6778813781045276, + "mean_lyap": -0.8781542778015137, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.5 + }, + { + "step": 67, + "sup_loss": 0.7648352138231196, + "rf_loss": 0.0006933381664566696, + "total_loss": 0.7717685956041016, + "mean_lyap": -0.8727739453315735, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.5 + }, + { + "step": 68, + "sup_loss": 0.6999706120344925, + "rf_loss": 0.0015010021161288023, + "total_loss": 0.7149806336614418, + "mean_lyap": -0.8707051277160645, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.375 + }, + { + "step": 69, + "sup_loss": 0.9418434380324208, + "rf_loss": 0.0017435763729736209, + "total_loss": 0.959279201063665, + "mean_lyap": -0.8469928503036499, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 70, + "sup_loss": 0.7006143229115153, + "rf_loss": 0.0020230370573699474, + "total_loss": 0.7208446944165373, + "mean_lyap": -0.8506982922554016, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 71, + "sup_loss": 0.8045005220383133, + "rf_loss": 0.0029563107527792454, + "total_loss": 0.8340636286347832, + "mean_lyap": -0.8255416750907898, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 72, + "sup_loss": 0.5909958039108959, + "rf_loss": 0.000791383848991245, + "total_loss": 0.5989096420515624, + "mean_lyap": -0.8960390090942383, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.375 + }, + { + "step": 73, + "sup_loss": 0.7791527680286758, + "rf_loss": 0.0012979984749108553, + "total_loss": 0.7921327523121231, + "mean_lyap": -0.8992996215820312, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 74, + "sup_loss": 0.7237759233288961, + "rf_loss": 0.003918964881449938, + "total_loss": 0.7629655712120729, + "mean_lyap": -0.883576512336731, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.375 + }, + { + "step": 75, + "sup_loss": 0.7495548237028731, + "rf_loss": 0.003698775079101324, + "total_loss": 0.7865425754252089, + "mean_lyap": -0.8133399486541748, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 76, + "sup_loss": 0.8387171345259399, + "rf_loss": 0.000946269603446126, + "total_loss": 0.8481798300947399, + "mean_lyap": -0.890516996383667, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.5 + }, + { + "step": 77, + "sup_loss": 0.6733659814251263, + "rf_loss": 0.0012616110034286976, + "total_loss": 0.6859820914594132, + "mean_lyap": -0.8804075717926025, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.375 + }, + { + "step": 78, + "sup_loss": 0.6498098301149544, + "rf_loss": 0.0013569702859967947, + "total_loss": 0.6633795334405836, + "mean_lyap": -0.8641673922538757, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.5 + }, + { + "step": 79, + "sup_loss": 0.7837405819809202, + "rf_loss": 0.003930794075131416, + "total_loss": 0.8230485227322344, + "mean_lyap": -0.8169339895248413, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.875 + }, + { + "step": 80, + "sup_loss": 0.7228979195130653, + "rf_loss": 0.0015914338873699307, + "total_loss": 0.7388122590852565, + "mean_lyap": -0.8627505302429199, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 81, + "sup_loss": 0.512824455845056, + "rf_loss": 0.0014878996880725026, + "total_loss": 0.5277034524929504, + "mean_lyap": -0.892520546913147, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.375 + }, + { + "step": 82, + "sup_loss": 0.6486249230331146, + "rf_loss": 0.0009839815320447087, + "total_loss": 0.658464738120731, + "mean_lyap": -0.8969285488128662, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 83, + "sup_loss": 0.7256714011449372, + "rf_loss": 0.0012018897105008364, + "total_loss": 0.7376902977842843, + "mean_lyap": -0.8888238668441772, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.5 + }, + { + "step": 84, + "sup_loss": 0.7549937936440195, + "rf_loss": 0.0016620211536064744, + "total_loss": 0.7716140049472536, + "mean_lyap": -0.8613144159317017, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.5 + }, + { + "step": 85, + "sup_loss": 0.7208673455028051, + "rf_loss": 0.003234194591641426, + "total_loss": 0.7532092914192193, + "mean_lyap": -0.8572639226913452, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 86, + "sup_loss": 0.78733164410316, + "rf_loss": 0.0003821868449449539, + "total_loss": 0.7911535125526096, + "mean_lyap": -0.9119746685028076, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.25 + }, + { + "step": 87, + "sup_loss": 0.7460947798003501, + "rf_loss": 0.0010051638819277287, + "total_loss": 0.7561464186196274, + "mean_lyap": -0.8535745143890381, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.375 + }, + { + "step": 88, + "sup_loss": 0.8429616427629429, + "rf_loss": 0.0019365833140909672, + "total_loss": 0.86232747497253, + "mean_lyap": -0.84560227394104, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 89, + "sup_loss": 0.7729191359746813, + "rf_loss": 0.0007418215391226113, + "total_loss": 0.7803373512494921, + "mean_lyap": -0.9108904004096985, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.375 + }, + { + "step": 90, + "sup_loss": 0.6472762192297998, + "rf_loss": 0.0025497586466372013, + "total_loss": 0.6727738047648493, + "mean_lyap": -0.8673484325408936, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.5 + }, + { + "step": 91, + "sup_loss": 0.7268315836183069, + "rf_loss": 0.0027970762457698584, + "total_loss": 0.7548023465416668, + "mean_lyap": -0.8777827620506287, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.5 + }, + { + "step": 92, + "sup_loss": 0.8650996847170713, + "rf_loss": 0.0013330209767445922, + "total_loss": 0.8784298947173479, + "mean_lyap": -0.8683897852897644, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.5 + }, + { + "step": 93, + "sup_loss": 0.6219829422071615, + "rf_loss": 0.00327981635928154, + "total_loss": 0.654781105799977, + "mean_lyap": -0.8831396102905273, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 94, + "sup_loss": 0.7396657856699419, + "rf_loss": 0.0007561829988844693, + "total_loss": 0.7472276155423713, + "mean_lyap": -0.858811616897583, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.375 + }, + { + "step": 95, + "sup_loss": 0.6485665339088734, + "rf_loss": 0.0004259114502929151, + "total_loss": 0.6528256485282179, + "mean_lyap": -0.9049522876739502, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.25 + }, + { + "step": 96, + "sup_loss": 0.7588329886284089, + "rf_loss": 0.0001585835125297308, + "total_loss": 0.7604188237537062, + "mean_lyap": -0.8950439691543579, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.5 + }, + { + "step": 97, + "sup_loss": 0.5817993282506224, + "rf_loss": 0.0007148263975977898, + "total_loss": 0.5889475922266003, + "mean_lyap": -0.8832213282585144, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.25 + }, + { + "step": 98, + "sup_loss": 0.7425204173943035, + "rf_loss": 0.0005265326471999288, + "total_loss": 0.7477857436334722, + "mean_lyap": -0.9243374466896057, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.25 + }, + { + "step": 99, + "sup_loss": 0.6873691285748474, + "rf_loss": 8.11655081633944e-06, + "total_loss": 0.6874502940793729, + "mean_lyap": -0.9182790517807007, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.25, + "eval_acc": 0.49609375, + "eval_tok_acc": 0.8252797067901234 + }, + { + "step": 100, + "sup_loss": 0.6026084183219523, + "rf_loss": 0.0004853458667639643, + "total_loss": 0.6074618769313843, + "mean_lyap": -0.9124417304992676, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.25 + }, + { + "step": 101, + "sup_loss": 0.6220611178877565, + "rf_loss": 0.0004740226431749761, + "total_loss": 0.6268013444359216, + "mean_lyap": -0.8781635761260986, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 102, + "sup_loss": 0.6987778955866084, + "rf_loss": 0.0006725198472850025, + "total_loss": 0.7055030941758738, + "mean_lyap": -0.9354679584503174, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.25 + }, + { + "step": 103, + "sup_loss": 0.767754078471799, + "rf_loss": 0.0006385964807122946, + "total_loss": 0.7741400432789219, + "mean_lyap": -0.8678494691848755, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.75 + }, + { + "step": 104, + "sup_loss": 0.8160985431055674, + "rf_loss": 0.0003366229357197881, + "total_loss": 0.8194647724627653, + "mean_lyap": -0.877155065536499, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.375 + }, + { + "step": 105, + "sup_loss": 0.7349091845713341, + "rf_loss": 0.003131158882752061, + "total_loss": 0.7662207747958386, + "mean_lyap": -0.845853328704834, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 106, + "sup_loss": 0.74589114177769, + "rf_loss": 0.00018192798597738147, + "total_loss": 0.7477104216374638, + "mean_lyap": -0.901712954044342, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.25 + }, + { + "step": 107, + "sup_loss": 0.6818353474665824, + "rf_loss": 0.0024454190861433744, + "total_loss": 0.7062895387936774, + "mean_lyap": -0.8897325992584229, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.375 + }, + { + "step": 108, + "sup_loss": 0.6976428133612792, + "rf_loss": 0.00034954847069457173, + "total_loss": 0.7011382979518096, + "mean_lyap": -0.9129797220230103, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.125 + }, + { + "step": 109, + "sup_loss": 0.8553529329252825, + "rf_loss": 0.0001104350303648971, + "total_loss": 0.8564572832434834, + "mean_lyap": -0.911565899848938, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.25 + }, + { + "step": 110, + "sup_loss": 0.7512660300233496, + "rf_loss": 0.00010126684355782345, + "total_loss": 0.7522786985025836, + "mean_lyap": -0.8916236758232117, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.25 + }, + { + "step": 111, + "sup_loss": 0.8306927707564853, + "rf_loss": 0.0005017618532292545, + "total_loss": 0.8357103894051932, + "mean_lyap": -0.8902592658996582, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.375 + }, + { + "step": 112, + "sup_loss": 0.8014963296245178, + "rf_loss": 0.0018839503172785044, + "total_loss": 0.8203358323316415, + "mean_lyap": -0.849699854850769, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.375 + }, + { + "step": 113, + "sup_loss": 0.6868572114680728, + "rf_loss": 0.0007329108193516731, + "total_loss": 0.6941863196615895, + "mean_lyap": -0.872833251953125, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.625 + }, + { + "step": 114, + "sup_loss": 0.5947612592092519, + "rf_loss": 0.0003032636595889926, + "total_loss": 0.5977938958051419, + "mean_lyap": -0.9618325233459473, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.25 + }, + { + "step": 115, + "sup_loss": 0.7042847137019506, + "rf_loss": 0.0, + "total_loss": 0.7042847137019506, + "mean_lyap": -0.9470847845077515, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.0 + }, + { + "step": 116, + "sup_loss": 0.7489247878014463, + "rf_loss": 0.0008320758352056146, + "total_loss": 0.7572455459206718, + "mean_lyap": -0.898413896560669, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.125 + }, + { + "step": 117, + "sup_loss": 0.876237977646276, + "rf_loss": 2.0203569874865934e-05, + "total_loss": 0.8764400133523006, + "mean_lyap": -0.9036564826965332, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.25 + }, + { + "step": 118, + "sup_loss": 0.653222449990118, + "rf_loss": 9.613390284357592e-05, + "total_loss": 0.6541837890331057, + "mean_lyap": -0.9079935550689697, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.125 + }, + { + "step": 119, + "sup_loss": 0.6358519552300581, + "rf_loss": 5.34353603143245e-05, + "total_loss": 0.6363863088332014, + "mean_lyap": -0.9442797899246216, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.125 + }, + { + "step": 120, + "sup_loss": 0.8665854133898904, + "rf_loss": 0.0, + "total_loss": 0.8665854133898904, + "mean_lyap": -0.8926280736923218, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.0 + }, + { + "step": 121, + "sup_loss": 0.8598103150897419, + "rf_loss": 0.0006828162586316466, + "total_loss": 0.866638477908889, + "mean_lyap": -0.8766176700592041, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.25 + }, + { + "step": 122, + "sup_loss": 0.8078568441048074, + "rf_loss": 0.0004030819109175354, + "total_loss": 0.8118876631557751, + "mean_lyap": -0.8897646069526672, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.375 + }, + { + "step": 123, + "sup_loss": 0.9200362731816376, + "rf_loss": 0.00048203484038822353, + "total_loss": 0.9248566214108969, + "mean_lyap": -0.8854799270629883, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.25 + }, + { + "step": 124, + "sup_loss": 0.9848050393934372, + "rf_loss": 0.00012617840548045933, + "total_loss": 0.9860668233900342, + "mean_lyap": -0.8772153854370117, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.375 + }, + { + "step": 125, + "sup_loss": 0.6607478220181227, + "rf_loss": 0.0005259730387479067, + "total_loss": 0.6660075524056017, + "mean_lyap": -0.9292822480201721, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.125 + }, + { + "step": 126, + "sup_loss": 0.6867021993540716, + "rf_loss": 7.451939018210396e-05, + "total_loss": 0.6874473932704446, + "mean_lyap": -0.9073169827461243, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.25 + }, + { + "step": 127, + "sup_loss": 0.6364257130573185, + "rf_loss": 0.0, + "total_loss": 0.6364257130573185, + "mean_lyap": -0.9700829982757568, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.0 + }, + { + "step": 128, + "sup_loss": 0.6242236350181207, + "rf_loss": 0.000526409363374114, + "total_loss": 0.6294877286518619, + "mean_lyap": -0.8782105445861816, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.25 + }, + { + "step": 129, + "sup_loss": 0.7518952417691622, + "rf_loss": 5.401653652370442e-06, + "total_loss": 0.7519492583038669, + "mean_lyap": -0.9081242084503174, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.125 + }, + { + "step": 130, + "sup_loss": 0.7010888911268471, + "rf_loss": 0.0005437328945845366, + "total_loss": 0.7065262200726925, + "mean_lyap": -0.9411056041717529, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.25 + }, + { + "step": 131, + "sup_loss": 0.6262936821470324, + "rf_loss": 5.3317453421186656e-06, + "total_loss": 0.6263469996004536, + "mean_lyap": -0.9436657428741455, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.125 + }, + { + "step": 132, + "sup_loss": 0.6594826955787076, + "rf_loss": 0.0, + "total_loss": 0.6594826955787076, + "mean_lyap": -0.959723949432373, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.0 + }, + { + "step": 133, + "sup_loss": 0.7808365199441274, + "rf_loss": 0.0001904903765534982, + "total_loss": 0.7827414237387662, + "mean_lyap": -0.9030304551124573, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.25 + }, + { + "step": 134, + "sup_loss": 0.6615815674486567, + "rf_loss": 9.443930321140215e-05, + "total_loss": 0.6625259604953226, + "mean_lyap": -0.9540624618530273, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.125 + }, + { + "step": 135, + "sup_loss": 0.9383803996531257, + "rf_loss": 5.5584264373464976e-06, + "total_loss": 0.9384359839165897, + "mean_lyap": -0.895117998123169, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.125 + }, + { + "step": 136, + "sup_loss": 0.6256120798884699, + "rf_loss": 0.0001487397530581802, + "total_loss": 0.6270994774772594, + "mean_lyap": -0.9466730356216431, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.25 + }, + { + "step": 137, + "sup_loss": 0.7893132553639189, + "rf_loss": 8.546098251827061e-05, + "total_loss": 0.7901678651891016, + "mean_lyap": -0.926414966583252, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.125 + }, + { + "step": 138, + "sup_loss": 0.6373277113802505, + "rf_loss": 0.0, + "total_loss": 0.6373277113802505, + "mean_lyap": -0.9512477517127991, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.0 + }, + { + "step": 139, + "sup_loss": 0.7214324678174807, + "rf_loss": 2.2458484636445064e-06, + "total_loss": 0.7214549263012077, + "mean_lyap": -0.9204102754592896, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.125 + }, + { + "step": 140, + "sup_loss": 0.8744445381065415, + "rf_loss": 1.2479569704737514e-05, + "total_loss": 0.8745693338035889, + "mean_lyap": -0.8943758010864258, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.125 + }, + { + "step": 141, + "sup_loss": 0.9106080119595603, + "rf_loss": 0.00022716728562954813, + "total_loss": 0.9128796849031673, + "mean_lyap": -0.8702834844589233, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.375 + }, + { + "step": 142, + "sup_loss": 0.6605944012717532, + "rf_loss": 5.409423101809807e-05, + "total_loss": 0.6611353435746582, + "mean_lyap": -0.9433302879333496, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.125 + }, + { + "step": 143, + "sup_loss": 0.6644375708800444, + "rf_loss": 0.0, + "total_loss": 0.6644375708800444, + "mean_lyap": -0.9677567481994629, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.0 + }, + { + "step": 144, + "sup_loss": 0.8626739651198232, + "rf_loss": 0.000472456740681082, + "total_loss": 0.8673985324102187, + "mean_lyap": -0.8641602993011475, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.375 + }, + { + "step": 145, + "sup_loss": 0.82435230048377, + "rf_loss": 0.00033521762816235423, + "total_loss": 0.8277044768818088, + "mean_lyap": -0.8977236747741699, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.125 + }, + { + "step": 146, + "sup_loss": 0.5942559147617289, + "rf_loss": 0.0, + "total_loss": 0.5942559147617289, + "mean_lyap": -0.9904095530509949, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.0 + }, + { + "step": 147, + "sup_loss": 0.6775069135795322, + "rf_loss": 0.0012124822242185473, + "total_loss": 0.6896317360545483, + "mean_lyap": -0.9161025285720825, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.125 + }, + { + "step": 148, + "sup_loss": 0.7089058125671526, + "rf_loss": 5.499004487319326e-07, + "total_loss": 0.7089113115717536, + "mean_lyap": -0.9259845018386841, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.125 + }, + { + "step": 149, + "sup_loss": 0.6823098888984611, + "rf_loss": 7.607099519191252e-07, + "total_loss": 0.682317495998094, + "mean_lyap": -0.9467276930809021, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.125, + "eval_acc": 0.470703125, + "eval_tok_acc": 0.8197096836419753 + }, + { + "step": 150, + "sup_loss": 0.5880605189294362, + "rf_loss": 0.0, + "total_loss": 0.5880605189294362, + "mean_lyap": -1.0149933099746704, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.0 + }, + { + "step": 151, + "sup_loss": 0.5608997467994814, + "rf_loss": 0.00025979886413551867, + "total_loss": 0.5634977353826289, + "mean_lyap": -0.9140228033065796, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.25 + }, + { + "step": 152, + "sup_loss": 0.770011182713256, + "rf_loss": 0.00036662607453763485, + "total_loss": 0.7736774434586323, + "mean_lyap": -0.9008445739746094, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.125 + }, + { + "step": 153, + "sup_loss": 0.7527060619298268, + "rf_loss": 0.0, + "total_loss": 0.7527060619298268, + "mean_lyap": -0.9543828368186951, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.0 + }, + { + "step": 154, + "sup_loss": 0.8604835750154125, + "rf_loss": 0.00045156461419537663, + "total_loss": 0.864999221040951, + "mean_lyap": -0.8754521608352661, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.25 + }, + { + "step": 155, + "sup_loss": 0.6627886082106433, + "rf_loss": 8.739302575122565e-05, + "total_loss": 0.6636625384972594, + "mean_lyap": -0.9325603246688843, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.125 + }, + { + "step": 156, + "sup_loss": 0.7936160388567672, + "rf_loss": 1.1148687917739153e-05, + "total_loss": 0.7937275257359446, + "mean_lyap": -0.9105346202850342, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.25 + }, + { + "step": 157, + "sup_loss": 0.8559562711499866, + "rf_loss": 3.2170348276849836e-05, + "total_loss": 0.856277974647307, + "mean_lyap": -0.9146195650100708, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.125 + }, + { + "step": 158, + "sup_loss": 1.1579173010988004, + "rf_loss": 0.0, + "total_loss": 1.1579173010988004, + "mean_lyap": -0.9106019139289856, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.0 + }, + { + "step": 159, + "sup_loss": 0.7768453036590028, + "rf_loss": 2.2159605578053743e-05, + "total_loss": 0.7770668997147834, + "mean_lyap": -0.91521155834198, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.25 + }, + { + "step": 160, + "sup_loss": 0.8526309708666, + "rf_loss": 0.0, + "total_loss": 0.8526309708666, + "mean_lyap": -0.9097278714179993, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.0 + }, + { + "step": 161, + "sup_loss": 0.5008964067830873, + "rf_loss": 4.362715480965562e-05, + "total_loss": 0.5013326783239079, + "mean_lyap": -0.9181713461875916, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.125 + }, + { + "step": 162, + "sup_loss": 0.5992556645198497, + "rf_loss": 0.0, + "total_loss": 0.5992556645198497, + "mean_lyap": -0.9893726110458374, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.0 + }, + { + "step": 163, + "sup_loss": 0.9138912274150771, + "rf_loss": 0.0, + "total_loss": 0.9138912274150771, + "mean_lyap": -0.9009734392166138, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.0 + }, + { + "step": 164, + "sup_loss": 0.5685104715166784, + "rf_loss": 7.916953472886235e-05, + "total_loss": 0.5693021668348632, + "mean_lyap": -0.9717569351196289, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.125 + }, + { + "step": 165, + "sup_loss": 0.6171784802888035, + "rf_loss": 0.0, + "total_loss": 0.6171784802888035, + "mean_lyap": -0.9582620859146118, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.0 + }, + { + "step": 166, + "sup_loss": 0.8162310645315405, + "rf_loss": 0.0004606795555446297, + "total_loss": 0.8208378600287791, + "mean_lyap": -0.8890138864517212, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.25 + }, + { + "step": 167, + "sup_loss": 0.9108215564133297, + "rf_loss": 0.0, + "total_loss": 0.9108215564133297, + "mean_lyap": -0.932681679725647, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.0 + }, + { + "step": 168, + "sup_loss": 0.7919173869680999, + "rf_loss": 9.947708167601377e-05, + "total_loss": 0.7929121577557562, + "mean_lyap": -0.8980196714401245, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.125 + }, + { + "step": 169, + "sup_loss": 0.7066477397933514, + "rf_loss": 1.0202940757153556e-05, + "total_loss": 0.7067497692009229, + "mean_lyap": -0.9571099281311035, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.125 + }, + { + "step": 170, + "sup_loss": 0.7139214627311085, + "rf_loss": 0.0, + "total_loss": 0.7139214627311085, + "mean_lyap": -0.945033848285675, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.0 + }, + { + "step": 171, + "sup_loss": 0.7709420698813313, + "rf_loss": 0.0, + "total_loss": 0.7709420698813313, + "mean_lyap": -0.9268935918807983, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.0 + }, + { + "step": 172, + "sup_loss": 0.7228295302931781, + "rf_loss": 0.0, + "total_loss": 0.7228295302931781, + "mean_lyap": -0.9544382691383362, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.0 + }, + { + "step": 173, + "sup_loss": 0.5613587950956674, + "rf_loss": 0.0, + "total_loss": 0.5613587950956674, + "mean_lyap": -0.9500914216041565, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.0 + }, + { + "step": 174, + "sup_loss": 0.8143955457077544, + "rf_loss": 0.0, + "total_loss": 0.8143955457077544, + "mean_lyap": -0.9618834853172302, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.0 + }, + { + "step": 175, + "sup_loss": 0.7053880198605798, + "rf_loss": 3.6345554690342396e-05, + "total_loss": 0.7057514754220351, + "mean_lyap": -0.9382758140563965, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.125 + }, + { + "step": 176, + "sup_loss": 0.8231483867413522, + "rf_loss": 0.0007249140762723982, + "total_loss": 0.8303975273876608, + "mean_lyap": -0.8953840732574463, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.25 + }, + { + "step": 177, + "sup_loss": 0.7462800069585852, + "rf_loss": 0.0, + "total_loss": 0.7462800069585852, + "mean_lyap": -0.9380788803100586, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.0 + }, + { + "step": 178, + "sup_loss": 0.679239566645776, + "rf_loss": 0.00017107199528254569, + "total_loss": 0.6809502865403938, + "mean_lyap": -0.9349449872970581, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.25 + }, + { + "step": 179, + "sup_loss": 0.7266091879144229, + "rf_loss": 6.751439286745153e-06, + "total_loss": 0.7266767023109283, + "mean_lyap": -0.9303713440895081, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.125 + }, + { + "step": 180, + "sup_loss": 0.7539549592732316, + "rf_loss": 2.5886556613841094e-05, + "total_loss": 0.7542138248357321, + "mean_lyap": -0.9419723153114319, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.125 + }, + { + "step": 181, + "sup_loss": 0.5703976586739969, + "rf_loss": 0.0, + "total_loss": 0.5703976586739969, + "mean_lyap": -0.9483802318572998, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.0 + }, + { + "step": 182, + "sup_loss": 0.7608988161297702, + "rf_loss": 2.3554461222374812e-05, + "total_loss": 0.761134360734718, + "mean_lyap": -0.9507160186767578, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.125 + }, + { + "step": 183, + "sup_loss": 0.7900601838013458, + "rf_loss": 3.0407780286623165e-05, + "total_loss": 0.790364261611488, + "mean_lyap": -0.9561895132064819, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.125 + }, + { + "step": 184, + "sup_loss": 0.44977169118096466, + "rf_loss": 0.0, + "total_loss": 0.44977169118096466, + "mean_lyap": -0.9784150123596191, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.0 + }, + { + "step": 185, + "sup_loss": 0.6665112013097232, + "rf_loss": 0.0, + "total_loss": 0.6665112013097232, + "mean_lyap": -1.0041429996490479, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.0 + }, + { + "step": 186, + "sup_loss": 0.6233770500298779, + "rf_loss": 0.0, + "total_loss": 0.6233770500298779, + "mean_lyap": -0.9526543617248535, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.0 + }, + { + "step": 187, + "sup_loss": 0.7298426783748505, + "rf_loss": 0.0, + "total_loss": 0.7298426783748505, + "mean_lyap": -0.9848892092704773, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.0 + }, + { + "step": 188, + "sup_loss": 0.7994810007461031, + "rf_loss": 0.00028055161237716675, + "total_loss": 0.8022865168698747, + "mean_lyap": -0.9289549589157104, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.25 + }, + { + "step": 189, + "sup_loss": 0.9004618817356048, + "rf_loss": 0.00013723062875214964, + "total_loss": 0.9018341879940225, + "mean_lyap": -0.9448005557060242, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.125 + }, + { + "step": 190, + "sup_loss": 0.7561349029708767, + "rf_loss": 1.4175058993259881e-07, + "total_loss": 0.7561363204768045, + "mean_lyap": -0.913082480430603, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.125 + }, + { + "step": 191, + "sup_loss": 0.8108956146054335, + "rf_loss": 0.0, + "total_loss": 0.8108956146054335, + "mean_lyap": -0.943326473236084, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.0 + }, + { + "step": 192, + "sup_loss": 0.7575437858379497, + "rf_loss": 0.00023609009804204106, + "total_loss": 0.7599046867019548, + "mean_lyap": -0.8917972445487976, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.125 + }, + { + "step": 193, + "sup_loss": 0.512322704611355, + "rf_loss": 0.00016364955808967352, + "total_loss": 0.5139592001922517, + "mean_lyap": -0.9673264622688293, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.125 + }, + { + "step": 194, + "sup_loss": 0.9945017941952325, + "rf_loss": 0.0, + "total_loss": 0.9945017941952325, + "mean_lyap": -0.926647424697876, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.0 + }, + { + "step": 195, + "sup_loss": 0.6566390093757337, + "rf_loss": 0.0, + "total_loss": 0.6566390093757337, + "mean_lyap": -0.9722451567649841, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.0 + }, + { + "step": 196, + "sup_loss": 0.8823300548771821, + "rf_loss": 0.0, + "total_loss": 0.8823300548771821, + "mean_lyap": -0.9026472568511963, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.0 + }, + { + "step": 197, + "sup_loss": 0.6761996844994566, + "rf_loss": 0.00010285572352586314, + "total_loss": 0.6772282416910594, + "mean_lyap": -0.9098778963088989, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.25 + }, + { + "step": 198, + "sup_loss": 0.6647835467672074, + "rf_loss": 1.9416085706325248e-05, + "total_loss": 0.6649777076169947, + "mean_lyap": -0.9130200147628784, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.125 + }, + { + "step": 199, + "sup_loss": 0.8157799126017244, + "rf_loss": 0.0, + "total_loss": 0.8157799126017244, + "mean_lyap": -0.948731541633606, + "lam_star": -0.8500000238418579, + "excess_frac_nonzero": 0.0, + "eval_acc": 0.466796875, + "eval_tok_acc": 0.818311149691358 + } + ], + "final_acc": 0.466796875, + "final_tok_acc": 0.818311149691358 +}
\ No newline at end of file |
