summaryrefslogtreecommitdiff
path: root/research/flossing/report_bundle_20260603/tables
diff options
context:
space:
mode:
Diffstat (limited to 'research/flossing/report_bundle_20260603/tables')
-rw-r--r--research/flossing/report_bundle_20260603/tables/base58590_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.summary.csv2
-rw-r--r--research/flossing/report_bundle_20260603/tables/base58590_k25_d64_sigma03_Lonly_n10000_seed20260602.summary.csv2
-rw-r--r--research/flossing/report_bundle_20260603/tables/fig5_qhead_vs_lambda1_ptrm_summary.csv3
-rw-r--r--research/flossing/report_bundle_20260603/tables/headline_trm_multi4_dynamics_table.csv4
-rw-r--r--research/flossing/report_bundle_20260603/tables/hrm_baseline_eval.csv11
-rw-r--r--research/flossing/report_bundle_20260603/tables/hrm_honly_step26040_eval.csv2
-rw-r--r--research/flossing/report_bundle_20260603/tables/hrm_matched_compare.csv6
-rw-r--r--research/flossing/report_bundle_20260603/tables/hrm_multi4_complete_eval.csv11
-rw-r--r--research/flossing/report_bundle_20260603/tables/hrm_multi4_eval.csv6
-rw-r--r--research/flossing/report_bundle_20260603/tables/hrm_multi4_horizon_sweep_768.csv41
-rw-r--r--research/flossing/report_bundle_20260603/tables/hrm_trm_redesigned_summary.csv7
-rw-r--r--research/flossing/report_bundle_20260603/tables/meeting_figures_v2_report.md26
-rw-r--r--research/flossing/report_bundle_20260603/tables/multi4_35805_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.summary.csv2
-rw-r--r--research/flossing/report_bundle_20260603/tables/multi4_35805_k25_d64_sigma03_Lonly_n10000_seed20260602.summary.csv2
-rw-r--r--research/flossing/report_bundle_20260603/tables/paired_ptrm_k100_n1000_seed0_summary.csv2
-rw-r--r--research/flossing/report_bundle_20260603/tables/summary_n512_k8_seed20260602.csv4
-rw-r--r--research/flossing/report_bundle_20260603/tables/summary_n64_k8_seed20260602.csv5
-rw-r--r--research/flossing/report_bundle_20260603/tables/trm_baseline_eval.csv11
-rw-r--r--research/flossing/report_bundle_20260603/tables/trm_gbs768_multi4_step65100_det_n10000_seed20260602.summary.csv2
-rw-r--r--research/flossing/report_bundle_20260603/tables/trm_matched_compare.csv2
-rw-r--r--research/flossing/report_bundle_20260603/tables/trm_multi4_eval.csv2
-rw-r--r--research/flossing/report_bundle_20260603/tables/trm_multi4_eval_full.csv11
-rw-r--r--research/flossing/report_bundle_20260603/tables/trm_official_gbs768_eval.csv11
-rw-r--r--research/flossing/report_bundle_20260603/tables/trm_official_gbs768_multi4_eval.csv21
-rw-r--r--research/flossing/report_bundle_20260603/tables/trm_official_gbs768_multi4_step16275_eval.csv2
-rw-r--r--research/flossing/report_bundle_20260603/tables/wallclock_eval.csv30
26 files changed, 228 insertions, 0 deletions
diff --git a/research/flossing/report_bundle_20260603/tables/base58590_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.summary.csv b/research/flossing/report_bundle_20260603/tables/base58590_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.summary.csv
new file mode 100644
index 0000000..1078ef9
--- /dev/null
+++ b/research/flossing/report_bundle_20260603/tables/base58590_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.summary.csv
@@ -0,0 +1,2 @@
+correct_count/det_fail_mean,correct_count/det_success_mean,correct_count/full_frac,correct_count/ge_10_frac,correct_count/ge_1_frac,correct_count/ge_25_frac,correct_count/ge_5_frac,correct_count/mean,correct_count/median,correct_count/q10,correct_count/q25,correct_count/q75,correct_count/q90,correct_count/std,correct_count/zero_frac,deterministic/exact,deterministic/token_acc,fd_lyap,fd_spectrum_k,include_clean,lambda_fail_mean,lambda_mean,lambda_success_mean,lyap_min/exact,lyap_min/token_acc,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/det_fail_frac,oracle_pass/det_success_frac,oracle_pass/exact,oracle_pass/token_acc,perturb_both,perturb_h,perturb_l,q_fail_mean,q_max/det_fail_frac,q_max/det_success_frac,q_max/exact,q_max/token_acc,q_mean,q_minus_0.25lambda/exact,q_minus_0.25lambda/token_acc,q_minus_0.5lambda/exact,q_minus_0.5lambda/token_acc,q_minus_1lambda/exact,q_minus_1lambda/token_acc,q_minus_2lambda/exact,q_minus_2lambda/token_acc,q_success_mean,rollout0/exact,rollout0/token_acc,rollouts,steps
+15.185714721679688,24.78506851196289,0.859375,0.94921875,0.97265625,0.859375,0.96875,23.47265625,25.0,22.10000228881836,25.0,25.0,25.0,5.083601474761963,0.02734375,0.86328125,0.9498456716537476,1.0,0.0,0.0,6.921443462371826,5.711092472076416,5.632336139678955,0.97265625,0.989703893661499,0.9389062523841858,0.9769077897071838,512.0,0.3,0.800000011920929,1.0,0.97265625,0.9911988973617554,0.0,0.0,1.0,-10.65998649597168,0.800000011920929,1.0,0.97265625,0.989366352558136,6.669083118438721,0.97265625,0.9893181324005127,0.97265625,0.9891493320465088,0.97265625,0.989149272441864,0.97265625,0.9892698526382446,7.7966694831848145,0.94140625,0.9774305820465088,25.0,64.0
diff --git a/research/flossing/report_bundle_20260603/tables/base58590_k25_d64_sigma03_Lonly_n10000_seed20260602.summary.csv b/research/flossing/report_bundle_20260603/tables/base58590_k25_d64_sigma03_Lonly_n10000_seed20260602.summary.csv
new file mode 100644
index 0000000..16c5a1b
--- /dev/null
+++ b/research/flossing/report_bundle_20260603/tables/base58590_k25_d64_sigma03_Lonly_n10000_seed20260602.summary.csv
@@ -0,0 +1,2 @@
+correct_count/det_fail_mean,correct_count/det_success_mean,correct_count/full_frac,correct_count/ge_10_frac,correct_count/ge_1_frac,correct_count/ge_25_frac,correct_count/ge_5_frac,correct_count/mean,correct_count/median,correct_count/q10,correct_count/q25,correct_count/q75,correct_count/q90,correct_count/std,correct_count/zero_frac,deterministic/exact,deterministic/token_acc,fd_lyap,fd_spectrum_k,include_clean,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/det_fail_frac,oracle_pass/det_success_frac,oracle_pass/exact,oracle_pass/token_acc,perturb_both,perturb_h,perturb_l,q_max/det_fail_frac,q_max/det_success_frac,q_max/exact,q_max/token_acc,q_mean,rollout0/exact,rollout0/token_acc,rollouts,steps
+13.544709205627441,24.74855613708496,0.847599983215332,0.944599986076355,0.9776999950408936,0.847599983215332,0.9599999785423279,23.2450008392334,25.0,20.0,25.0,25.0,25.0,5.4259724617004395,0.022299999371170998,0.8658000230789185,0.9498122930526733,0.0,0.0,0.0,0.9297999739646912,0.9728542566299438,10000.0,0.3,0.8338301181793213,1.0,0.9776999950408936,0.9922346472740173,0.0,0.0,1.0,0.8338301181793213,0.9998844861984253,0.9775999784469604,0.9908208847045898,6.504925727844238,0.9284999966621399,0.9722283482551575,25.0,64.0
diff --git a/research/flossing/report_bundle_20260603/tables/fig5_qhead_vs_lambda1_ptrm_summary.csv b/research/flossing/report_bundle_20260603/tables/fig5_qhead_vs_lambda1_ptrm_summary.csv
new file mode 100644
index 0000000..bcbf401
--- /dev/null
+++ b/research/flossing/report_bundle_20260603/tables/fig5_qhead_vs_lambda1_ptrm_summary.csv
@@ -0,0 +1,3 @@
+name,path,n_samples,rollouts,mean_rollout_exact,q_max_exact,lambda_min_exact,oracle_pass_exact,q_lambda_same_argmax_frac,global_pearson_q_vs_stability,global_spearman_q_vs_stability,within_problem_pearson_mean,within_problem_spearman_mean,q_success_mean,q_fail_mean,lambda_success_mean,lambda_fail_mean,mixed_problem_count,zero_success_problem_count,full_success_problem_count,mixed_global_pearson_q_vs_stability,mixed_q_max_exact,mixed_lambda_min_exact,mixed_oracle_exact
+TRM baseline + PTRM rollouts,research/flossing/q_lambda_scatter/base58590_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.npz,512.0,25.0,0.93890625,0.97265625,0.97265625,0.97265625,0.04296875,0.7524171235289728,-0.025655130770112743,0.12497240516050691,0.09656841990607949,7.796669578964886,-10.659986413043478,5.632336168325687,6.921443493469901,58.0,14.0,440.0,0.7857503437605575,1.0,1.0,1.0
+TRM multi4 + PTRM rollouts,research/flossing/q_lambda_scatter/multi4_35805_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.npz,512.0,25.0,0.944609375,0.974609375,0.974609375,0.974609375,0.0859375,0.7289674235313497,-0.13633997124282113,0.18866459504767938,0.11859038341758327,7.7404597169382185,-10.081937588152327,5.7459801223593345,7.271883726456269,43.0,13.0,456.0,0.7911647653323942,1.0,1.0,1.0
diff --git a/research/flossing/report_bundle_20260603/tables/headline_trm_multi4_dynamics_table.csv b/research/flossing/report_bundle_20260603/tables/headline_trm_multi4_dynamics_table.csv
new file mode 100644
index 0000000..742ac68
--- /dev/null
+++ b/research/flossing/report_bundle_20260603/tables/headline_trm_multi4_dynamics_table.csv
@@ -0,0 +1,4 @@
+model,step,full_exact,full_token_acc,lm_loss,dyn_sample_exact,lambda1_all,mean8_all,tail4_all,pos_count_all
+TRM baseline best,58590,0.8686309456825256,0.9508475661277772,0.1155559569597244,0.875,0.02823458132615997,0.013457294571722192,0.0075273313675370546,7.841796875
+TRM multi4 best,35805,0.8964653611183167,0.9604493975639344,0.0945562794804573,0.900390625,0.020381716455975862,0.0065844104191477015,0.001402141885882835,3.841796875
+TRM multi4 final,65100,0.8350536823272705,0.9350366592407228,0.1547555029392242,0.82421875,0.03232463403946895,0.018508151198432188,0.013372940185377047,8.0
diff --git a/research/flossing/report_bundle_20260603/tables/hrm_baseline_eval.csv b/research/flossing/report_bundle_20260603/tables/hrm_baseline_eval.csv
new file mode 100644
index 0000000..afc2b27
--- /dev/null
+++ b/research/flossing/report_bundle_20260603/tables/hrm_baseline_eval.csv
@@ -0,0 +1,11 @@
+run,step,all/exact_accuracy,all/accuracy,all/lm_loss,all/q_halt_accuracy,all/q_halt_loss,all/steps
+hrm_baseline,2604,0.016369983553886414,0.6336106061935425,0.8522627949714661,0.9927079081535339,0.020887982100248337,16
+hrm_baseline,5208,0.06169315055012703,0.6744286417961121,0.7430469989776611,0.9772745370864868,0.07281211763620377,16
+hrm_baseline,7812,0.1358133852481842,0.6985693573951721,0.676047682762146,0.996868371963501,0.0211492907255888,16
+hrm_baseline,10416,0.20248068869113922,0.7259970307350159,0.6480565667152405,0.9983821511268616,0.017285015434026718,16
+hrm_baseline,13020,0.3024248778820038,0.7580602169036865,0.5614010691642761,0.9971805810928345,0.02416178770363331,16
+hrm_baseline,15624,0.36705803871154785,0.7842973470687866,0.49664124846458435,0.9964591860771179,0.02256467007100582,16
+hrm_baseline,18228,0.46287721395492554,0.8077820539474487,0.4641122817993164,0.9953309893608093,0.02509351819753647,16
+hrm_baseline,20832,0.4912721812725067,0.8198283314704895,0.4137057960033417,0.9955745935440063,0.02796635963022709,16
+hrm_baseline,23436,0.5193856954574585,0.8260135054588318,0.41173747181892395,0.9975590705871582,0.024772455915808678,16
+hrm_baseline,26040,0.5265287756919861,0.8270824551582336,0.4161679148674011,0.9960216283798218,0.03298119083046913,16
diff --git a/research/flossing/report_bundle_20260603/tables/hrm_honly_step26040_eval.csv b/research/flossing/report_bundle_20260603/tables/hrm_honly_step26040_eval.csv
new file mode 100644
index 0000000..d3e3ab9
--- /dev/null
+++ b/research/flossing/report_bundle_20260603/tables/hrm_honly_step26040_eval.csv
@@ -0,0 +1,2 @@
+step,accuracy,exact_accuracy,lm_loss,q_halt_accuracy,q_halt_loss,steps
+26040,0.85720146,0.62264127,0.35935268,0.99625105,0.024241636,16.0
diff --git a/research/flossing/report_bundle_20260603/tables/hrm_matched_compare.csv b/research/flossing/report_bundle_20260603/tables/hrm_matched_compare.csv
new file mode 100644
index 0000000..71b8e16
--- /dev/null
+++ b/research/flossing/report_bundle_20260603/tables/hrm_matched_compare.csv
@@ -0,0 +1,6 @@
+family,step,base_exact,multi4_exact,delta_exact,base_acc,multi4_acc,delta_acc,base_loss,multi4_loss,delta_loss,base_steps,multi4_steps
+hrm,2604,0.016369983553886414,0.0123774204403162,-0.003992563113570213,0.6336106061935425,0.6303551197052002,-0.003255486488342285,0.8522627949714661,0.8603715300559998,0.008108735084533691,16,16
+hrm,5208,0.06169315055012703,0.025012653321027756,-0.036680497229099274,0.6744286417961121,0.6685170531272888,-0.005911588668823242,0.7430469989776611,0.7412638068199158,-0.0017831921577453613,16,16
+hrm,7812,0.1358133852481842,0.04651052877306938,-0.08930285647511482,0.6985693573951721,0.687346339225769,-0.011223018169403076,0.676047682762146,0.7044885158538818,0.02844083309173584,16,16
+hrm,10416,0.20248068869113922,0.2006617933511734,-0.0018188953399658203,0.7259970307350159,0.7243355512619019,-0.0016614794731140137,0.6480565667152405,0.6340938210487366,-0.013962745666503906,16,16
+hrm,13020,0.3024248778820038,0.34697696566581726,0.04455208778381348,0.7580602169036865,0.7794705033302307,0.02141028642654419,0.5614010691642761,0.49915269017219543,-0.06224837899208069,16,16
diff --git a/research/flossing/report_bundle_20260603/tables/hrm_multi4_complete_eval.csv b/research/flossing/report_bundle_20260603/tables/hrm_multi4_complete_eval.csv
new file mode 100644
index 0000000..9aac158
--- /dev/null
+++ b/research/flossing/report_bundle_20260603/tables/hrm_multi4_complete_eval.csv
@@ -0,0 +1,11 @@
+run,step,exact,accuracy,loss
+hrm_multi4,2604,0.0123774204403162,0.6303551197052002,0.8603715300559998
+hrm_multi4,5208,0.025012653321027756,0.6685170531272888,0.7412638068199158
+hrm_multi4,7812,0.04651052877306938,0.687346339225769,0.7044885158538818
+hrm_multi4,10416,0.2006617933511734,0.7243355512619019,0.6340938210487366
+hrm_multi4,13020,0.34697696566581726,0.7794705033302307,0.49915269017219543
+hrm_multi4,15624,0.4653252363204956,0.8156101703643799,0.42743897438049316
+hrm_multi4,18228,0.5790873169898987,0.8494690656661987,0.3459467887878418
+hrm_multi4,20832,0.6393187,0.8660642,0.3186217
+hrm_multi4,23436,0.6443189,0.8684137,0.30427682
+hrm_multi4,26040,0.46235448,0.80298746,0.603943
diff --git a/research/flossing/report_bundle_20260603/tables/hrm_multi4_eval.csv b/research/flossing/report_bundle_20260603/tables/hrm_multi4_eval.csv
new file mode 100644
index 0000000..0f7dbc9
--- /dev/null
+++ b/research/flossing/report_bundle_20260603/tables/hrm_multi4_eval.csv
@@ -0,0 +1,6 @@
+run,step,all/exact_accuracy,all/accuracy,all/lm_loss,all/q_halt_accuracy,all/q_halt_loss,all/steps
+hrm_multi4,2604,0.0123774204403162,0.6303551197052002,0.8603715300559998,0.9879773855209351,0.020903315395116806,16
+hrm_multi4,5208,0.025012653321027756,0.6685170531272888,0.7412638068199158,0.9968069195747375,0.019309692084789276,16
+hrm_multi4,7812,0.04651052877306938,0.687346339225769,0.7044885158538818,0.9902976751327515,0.03646523132920265,16
+hrm_multi4,10416,0.2006617933511734,0.7243355512619019,0.6340938210487366,0.9991532564163208,0.011545917019248009,16
+hrm_multi4,13020,0.34697696566581726,0.7794705033302307,0.49915269017219543,0.9987062215805054,0.01581510715186596,16
diff --git a/research/flossing/report_bundle_20260603/tables/hrm_multi4_horizon_sweep_768.csv b/research/flossing/report_bundle_20260603/tables/hrm_multi4_horizon_sweep_768.csv
new file mode 100644
index 0000000..3bdace3
--- /dev/null
+++ b/research/flossing/report_bundle_20260603/tables/hrm_multi4_horizon_sweep_768.csv
@@ -0,0 +1,41 @@
+step,split,horizon,count,exact_accuracy,accuracy,lm_loss,q_halt_loss,steps
+23436,train,2,768.0,0.2955729166666667,0.8364840348561605,0.39225157833573504,0.04459714392820994,2.0
+23436,train,3,768.0,0.5755208333333334,0.8917663097381592,0.2618181909650845,0.01976812755068143,3.0
+23436,train,4,768.0,0.6796875,0.9122781753540039,0.21020127075343642,0.006546831379334132,4.0
+23436,train,5,768.0,0.734375,0.9230967362721761,0.182652537414814,0.00717167928814888,5.0
+23436,train,6,768.0,0.7669270833333334,0.9298482735951742,0.16631383252369117,0.00374875341852506,6.0
+23436,train,8,768.0,0.8033854166666666,0.9389146169026693,0.14464881393209095,0.0037066793690125146,8.0
+23436,train,10,768.0,0.8268229166666666,0.9462127685546875,0.12909535948177878,0.00991838239133358,10.0
+23436,train,12,768.0,0.84375,0.9500546455383301,0.11931335874268405,0.004430865868926048,12.0
+23436,train,14,768.0,0.85546875,0.9517585436503092,0.11357831630187847,0.0033199011037747064,14.0
+23436,train,16,768.0,0.859375,0.9536232948303223,0.11014115689326769,0.003578242535392443,16.0
+23436,test,2,768.0,0.12760416666666666,0.7265946865081787,0.6354224683840087,0.025561923782030743,2.0
+23436,test,3,768.0,0.2526041666666667,0.7604809602101644,0.5506175992783068,0.012921225279569626,3.0
+23436,test,4,768.0,0.3138020833333333,0.7742091019948324,0.515490498860075,0.003827621228992939,4.0
+23436,test,5,768.0,0.3619791666666667,0.7867315610249838,0.48693293612013444,0.00506168728073438,5.0
+23436,test,6,768.0,0.3984375,0.7953317960103353,0.46390732880926117,0.0035611667359868684,6.0
+23436,test,8,768.0,0.4583333333333333,0.8105709552764893,0.4314775246277862,0.005083844686547915,8.0
+23436,test,10,768.0,0.48828125,0.8183674812316895,0.4161860321298086,0.003043775757153829,10.0
+23436,test,12,768.0,0.51171875,0.8260512351989746,0.4016971584101859,0.004879387095570564,12.0
+23436,test,14,768.0,0.5325520833333334,0.831050713857015,0.3878147863194716,0.00292336226751407,14.0
+23436,test,16,768.0,0.5559895833333334,0.8361946741739908,0.37473119346002354,0.0035912382105986276,16.0
+26040,train,2,768.0,0.7825520833333334,0.963814894358317,0.09390118849629236,0.04166571795940399,2.0
+26040,train,3,768.0,0.9127604166666666,0.9810153643290201,0.051538720202730794,0.008062846958637238,3.0
+26040,train,4,768.0,0.9309895833333334,0.9841659863789877,0.04344159450663771,0.012521501630544662,4.0
+26040,train,5,768.0,0.9401041666666666,0.9858539899190267,0.04013312268752094,0.015388640264670054,5.0
+26040,train,6,768.0,0.9401041666666666,0.9870595932006836,0.03853385294570503,0.023091336091359455,6.0
+26040,train,8,768.0,0.94921875,0.9884098370869955,0.03620980748266996,0.01947430024544398,8.0
+26040,train,10,768.0,0.9557291666666666,0.9893261591593424,0.03444665149376691,0.03555429975191752,10.0
+26040,train,12,768.0,0.953125,0.9895029067993164,0.034414704500878884,0.04184401035308838,12.0
+26040,train,14,768.0,0.9557291666666666,0.9893904527028402,0.03586123670240371,0.037144094705581665,14.0
+26040,train,16,768.0,0.9518229166666666,0.9894386132558187,0.03654265702438753,0.041033936043580375,16.0
+26040,test,2,768.0,0.11588541666666667,0.7054398854573568,0.8908620335632751,0.040651207168896995,2.0
+26040,test,3,768.0,0.1875,0.7230902512868246,0.7620792943957264,0.03934991856416067,3.0
+26040,test,4,768.0,0.23567708333333334,0.731706460316976,0.7335753038165317,0.11375004053115845,4.0
+26040,test,5,768.0,0.25,0.7362075646718343,0.7385935210540664,0.160938690106074,5.0
+26040,test,6,768.0,0.2669270833333333,0.7397923469543457,0.743103274276764,0.17507813374201456,6.0
+26040,test,8,768.0,0.296875,0.7452899614969889,0.7468322750276379,0.19044278065363565,8.0
+26040,test,10,768.0,0.3138020833333333,0.7472190856933594,0.7515058945457195,0.21485831340154013,10.0
+26040,test,12,768.0,0.3229166666666667,0.7502892812093099,0.7620030015396181,0.22753063837687174,12.0
+26040,test,14,768.0,0.3307291666666667,0.7518165111541748,0.7553974518406182,0.2158371408780416,14.0
+26040,test,16,768.0,0.3346354166666667,0.7514950434366862,0.7639393310889216,0.23628832896550497,16.0
diff --git a/research/flossing/report_bundle_20260603/tables/hrm_trm_redesigned_summary.csv b/research/flossing/report_bundle_20260603/tables/hrm_trm_redesigned_summary.csv
new file mode 100644
index 0000000..c5a4ad0
--- /dev/null
+++ b/research/flossing/report_bundle_20260603/tables/hrm_trm_redesigned_summary.csv
@@ -0,0 +1,7 @@
+model,label,step,full_exact,sample_exact,lambda1_all,mean8_all,pos_count_all,lambda1_success,lambda1_fail,mean8_success,mean8_fail,pos_count_success,pos_count_fail
+HRM,baseline best,26040,0.5265287756919861,0.5,-0.0569394779099639,-0.10732079181130239,0.98046875,-0.14642834789538028,0.03254939207545249,-0.18957092847483636,-0.025070655147768406,0.01953125,1.94140625
+HRM,multi4 best,23436,0.6443189,0.654296875,-0.04733450568929953,-0.112159715874796,1.166015625,-0.10103540500250659,0.054302789621007604,-0.16998952501653278,-0.0027078172167066595,0.05970149253731343,3.2598870056497176
+HRM,multi4 final,26040,0.46235448,0.4296875,0.02874533511322852,-0.040662085491063316,1.626953125,0.03573289099003887,0.02348073821974127,-0.053027883530268646,-0.031345388338237384,1.8181818181818181,1.4828767123287672
+TRM,baseline best,58590,0.8686309456825256,0.875,0.02823458132615997,0.013457294571722192,7.841796875,0.01761167685357837,0.10259491263423115,0.008003413600119422,0.05163446137294159,7.819196428571429,8.0
+TRM,multi4 best,35805,0.8964653611183167,0.900390625,0.020381716455975862,0.0065844104191477015,3.841796875,0.01118387160416574,0.10352301992037717,0.0019524994650864183,0.04845325257252518,3.3817787418655096,8.0
+TRM,multi4 final,65100,0.8350536823272705,0.82421875,0.03232463403946895,0.018508151198432188,8.0,0.01912124014472792,0.09423388096814354,0.012883653437293365,0.04488079625621645,8.0,8.0
diff --git a/research/flossing/report_bundle_20260603/tables/meeting_figures_v2_report.md b/research/flossing/report_bundle_20260603/tables/meeting_figures_v2_report.md
new file mode 100644
index 0000000..5158c99
--- /dev/null
+++ b/research/flossing/report_bundle_20260603/tables/meeting_figures_v2_report.md
@@ -0,0 +1,26 @@
+# Meeting Figures v2
+
+## Figure Strategy
+
+0. `fig0_motivation_lambda1_success_failure_hrm_trm.png`: first-exponent success/failure distribution in HRM and TRM. This motivates chaos as a detector before introducing the method.
+1. `fig1_hrm_trm_training_curves.png`: performance over training for HRM and TRM. This answers whether the method improves accuracy and where best/final are.
+2. `fig2_accuracy_vs_chaotic_volume_phase.png`: phase view, with accuracy versus mean top-8 Lyapunov exponent. This answers whether better checkpoints are dynamically more stable.
+3. `fig3_hrm_trm_success_failure_spectra.png`: full success/failure spectrum separation for HRM and TRM best checkpoints. This extends Fig0 beyond λ1.
+4. `fig4_ptrm_same_subset_comparison.png`: PTRM same-subset result. This is a secondary inference-time story.
+5. `fig5_qhead_vs_lambda1_ptrm.png`: PTRM Q-head halt logit versus finite-difference stability proxy `-lambda_1`. The bottom row isolates mixed problems where trajectory selection actually matters.
+
+## Key Numbers
+
+- HRM baseline best: 0.5265 exact. HRM multi4 best: 0.6443 exact. HRM multi4 final: 0.4624 exact.
+- TRM baseline best: 0.8686 exact. TRM multi4 best: 0.8965 exact. TRM multi4 final: 0.8351 exact.
+- HRM multi4 best dynamics sample: mean top-8 exponent -0.1122; final -0.0407.
+- TRM multi4 best dynamics sample: mean top-8 exponent +0.0066; final +0.0185.
+- PTRM same subset, K=100: Q-selected 0.984 -> 0.988; mean rollout 0.942 -> 0.954.
+- PTRM Q-vs-stability, K=25/N=512: mixed-problem Pearson is 0.786 for baseline and 0.791 for multi4. In both runs, Q-max selection and lambda-min selection reach the same oracle exact accuracy on this subset.
+
+## Caveats
+
+- Dynamics spectra use N=512 diagnostic samples, not the full test set.
+- PTRM numbers use a fixed N=1000 subset; do not mix its deterministic subset accuracy with full-test W&B exact accuracy.
+- Final checkpoints are collapse diagnostics, not the method's reported performance.
+- Q-head is not a pure lambda ranker: global Spearman is weak because most problems are all-success/all-failure across K rollouts. The strongest evidence is the mixed-problem class separation and selector equivalence.
diff --git a/research/flossing/report_bundle_20260603/tables/multi4_35805_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.summary.csv b/research/flossing/report_bundle_20260603/tables/multi4_35805_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.summary.csv
new file mode 100644
index 0000000..7d15a10
--- /dev/null
+++ b/research/flossing/report_bundle_20260603/tables/multi4_35805_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.summary.csv
@@ -0,0 +1,2 @@
+correct_count/det_fail_mean,correct_count/det_success_mean,correct_count/full_frac,correct_count/ge_10_frac,correct_count/ge_1_frac,correct_count/ge_25_frac,correct_count/ge_5_frac,correct_count/mean,correct_count/median,correct_count/q10,correct_count/q25,correct_count/q75,correct_count/q90,correct_count/std,correct_count/zero_frac,deterministic/exact,deterministic/token_acc,fd_lyap,fd_spectrum_k,include_clean,lambda_fail_mean,lambda_mean,lambda_success_mean,lyap_min/exact,lyap_min/token_acc,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/det_fail_frac,oracle_pass/det_success_frac,oracle_pass/exact,oracle_pass/token_acc,perturb_both,perturb_h,perturb_l,q_fail_mean,q_max/det_fail_frac,q_max/det_success_frac,q_max/exact,q_max/token_acc,q_mean,q_minus_0.25lambda/exact,q_minus_0.25lambda/token_acc,q_minus_0.5lambda/exact,q_minus_0.5lambda/token_acc,q_minus_1lambda/exact,q_minus_1lambda/token_acc,q_minus_2lambda/exact,q_minus_2lambda/token_acc,q_success_mean,rollout0/exact,rollout0/token_acc,rollouts,steps
+13.054545402526855,24.886215209960938,0.890625,0.953125,0.974609375,0.890625,0.966796875,23.615234375,25.0,24.0,25.0,25.0,25.0,5.043018341064453,0.025390625,0.892578125,0.9596836566925049,1.0,0.0,0.0,7.271883964538574,5.830501556396484,5.7459797859191895,0.974609375,0.9897279739379883,0.9446094036102295,0.9784027934074402,512.0,0.3,0.7636363506317139,1.0,0.974609375,0.9913435578346252,0.0,0.0,1.0,-10.081937789916992,0.7636363506317139,1.0,0.974609375,0.9889563918113708,6.753265857696533,0.974609375,0.9889563918113708,0.974609375,0.9890046119689941,0.974609375,0.9889804720878601,0.974609375,0.9889563918113708,7.74045991897583,0.9375,0.9766348004341125,25.0,64.0
diff --git a/research/flossing/report_bundle_20260603/tables/multi4_35805_k25_d64_sigma03_Lonly_n10000_seed20260602.summary.csv b/research/flossing/report_bundle_20260603/tables/multi4_35805_k25_d64_sigma03_Lonly_n10000_seed20260602.summary.csv
new file mode 100644
index 0000000..f991531
--- /dev/null
+++ b/research/flossing/report_bundle_20260603/tables/multi4_35805_k25_d64_sigma03_Lonly_n10000_seed20260602.summary.csv
@@ -0,0 +1,2 @@
+correct_count/det_fail_mean,correct_count/det_success_mean,correct_count/full_frac,correct_count/ge_10_frac,correct_count/ge_1_frac,correct_count/ge_25_frac,correct_count/ge_5_frac,correct_count/mean,correct_count/median,correct_count/q10,correct_count/q25,correct_count/q75,correct_count/q90,correct_count/std,correct_count/zero_frac,deterministic/exact,deterministic/token_acc,fd_lyap,fd_spectrum_k,include_clean,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/det_fail_frac,oracle_pass/det_success_frac,oracle_pass/exact,oracle_pass/token_acc,perturb_both,perturb_h,perturb_l,q_max/det_fail_frac,q_max/det_success_frac,q_max/exact,q_max/token_acc,q_mean,rollout0/exact,rollout0/token_acc,rollouts,steps
+13.846529960632324,24.8140811920166,0.8842999935150146,0.9599000215530396,0.9828000068664551,0.8842999935150146,0.9696999788284302,23.692100524902344,25.0,24.0,25.0,25.0,25.0,4.753871917724609,0.01720000058412552,0.8977000117301941,0.9607678651809692,0.0,0.0,0.0,0.9476839900016785,0.9795129895210266,10000.0,0.3,0.8328445553779602,0.9998885989189148,0.9828000068664551,0.9940740466117859,0.0,0.0,1.0,0.829912006855011,0.9995543956756592,0.982200026512146,0.9926013946533203,6.862946510314941,0.9480999708175659,0.979781448841095,25.0,64.0
diff --git a/research/flossing/report_bundle_20260603/tables/paired_ptrm_k100_n1000_seed0_summary.csv b/research/flossing/report_bundle_20260603/tables/paired_ptrm_k100_n1000_seed0_summary.csv
new file mode 100644
index 0000000..8122604
--- /dev/null
+++ b/research/flossing/report_bundle_20260603/tables/paired_ptrm_k100_n1000_seed0_summary.csv
@@ -0,0 +1,2 @@
+n,rollouts,base_det,multi4_det,delta_det,base_mean_rollout,multi4_mean_rollout,delta_mean_rollout,base_qmax,multi4_qmax,delta_qmax,base_oracle,multi4_oracle,delta_oracle,base_correct_count_mean,multi4_correct_count_mean,delta_correct_count_mean,det_base_only_frac,det_multi4_only_frac,oracle_base_only_frac,oracle_multi4_only_frac
+1000,100,0.887,0.911,0.02400000000000002,0.94188,0.95417,0.012289999999999912,0.984,0.988,0.0040000000000000036,0.985,0.988,0.0030000000000000027,94.188,95.417,1.2289999999999992,0.034,0.058,0.001,0.004
diff --git a/research/flossing/report_bundle_20260603/tables/summary_n512_k8_seed20260602.csv b/research/flossing/report_bundle_20260603/tables/summary_n512_k8_seed20260602.csv
new file mode 100644
index 0000000..c255252
--- /dev/null
+++ b/research/flossing/report_bundle_20260603/tables/summary_n512_k8_seed20260602.csv
@@ -0,0 +1,4 @@
+run,step,n,sample_exact,sample_token_acc,lambda1_all,lambda1_success,lambda1_fail,lambda1_fail_minus_success,mean8_all,mean8_success,mean8_fail,mean8_fail_minus_success,tail4_all,tail4_success,tail4_fail,pos_count_all,pos_count_success,pos_count_fail,pos_mass_all,pos_mass_success,pos_mass_fail,lambda2_all,lambda2_success,lambda2_fail,lambda3_all,lambda3_success,lambda3_fail,lambda4_all,lambda4_success,lambda4_fail,lambda5_all,lambda5_success,lambda5_fail,lambda6_all,lambda6_success,lambda6_fail,lambda7_all,lambda7_success,lambda7_fail,lambda8_all,lambda8_success,lambda8_fail
+baseline_best,58590,512,0.875,0.9539689430384897,0.02823458132615997,0.01761167685357837,0.10259491263423115,0.08498323578065278,0.013457294571722192,0.008003413600119422,0.05163446137294159,0.04363104777282217,0.0075273313675370546,0.004122858266632009,0.03135864307387237,7.841796875,7.819196428571429,8.0,0.10779670139772268,0.0641854171711784,0.4130756909835327,0.0205107267238418,0.012619587999194794,0.07574869779637083,0.016147883449207256,0.0097831444752176,0.060701056267134845,0.012655839604420294,0.007521466406436568,0.04859645199030638,0.010066905798097991,0.00580994511748096,0.03986563056241721,0.00827578879948021,0.00458740731747704,0.0340944591735024,0.006620997387619565,0.003523945934349396,0.028300357560510747,0.0051456334849504515,0.00257013469722064,0.02317412499905913
+multi4_best,35805,512,0.900390625,0.9625048226444051,0.020381716455975862,0.01118387160416574,0.10352301992037717,0.09233914831621143,0.0065844104191477015,0.0019524994650864183,0.04845325257252518,0.046500753107438765,0.001402141885882835,-0.0014188478887233206,0.02690167690732279,3.841796875,3.3817787418655096,8.0,0.06377898653446445,0.027952091227886174,0.38762602058020146,0.012663036363773195,0.0057181008775463405,0.07543980615103946,0.008348809059507634,0.003078277385450445,0.055990281642652025,0.005673153930393582,0.0013151374084221035,0.04506620523684165,0.0034857525132654388,-9.921478389921372e-05,0.035891045140577296,0.002008319042374751,-0.001002505589948276,0.02922381228749074,0.0006402704918926361,-0.0019078789586364823,0.023673542976087213,-0.0005257745040014861,-0.0026657922224093103,0.018818307225135902
+multi4_final,65100,512,0.82421875,0.9289882326847874,0.03232463403946895,0.01912124014472792,0.09423388096814354,0.07511264082341562,0.018508151198432188,0.012883653437293365,0.04488079625621645,0.03199714281892309,0.013372940185377047,0.010672304166179654,0.026035922408724824,8.0,8.0,8.0,0.1480652095874575,0.10306922749834692,0.3590463700497316,0.02459628393262392,0.015458292881759563,0.0674433086377879,0.020185894951282535,0.013495850063401376,0.05155477209223641,0.017466635922573914,0.01230462774373944,0.041670718716664445,0.015463502108104876,0.011429727322452865,0.034377423880828754,0.013748909759215167,0.010808219788353272,0.027537478289256494,0.012621539073734311,0.010437874606770786,0.022860499129941068,0.011657809800453833,0.010013394947141692,0.019368288334872988
diff --git a/research/flossing/report_bundle_20260603/tables/summary_n64_k8_seed20260602.csv b/research/flossing/report_bundle_20260603/tables/summary_n64_k8_seed20260602.csv
new file mode 100644
index 0000000..5a12638
--- /dev/null
+++ b/research/flossing/report_bundle_20260603/tables/summary_n64_k8_seed20260602.csv
@@ -0,0 +1,5 @@
+name,n,acc,lambda1_all,lambda1_success,lambda1_fail,mean8_all,mean8_success,mean8_fail,tail4_all,pos_count_all,pos_count_success,pos_count_fail,pos_mass_all
+baseline_best,64,0.875,0.027470633,0.015822656,0.109006464,0.013350397,0.0077010575,0.052895777,0.0074741757,7.796875,7.767857142857143,8.0,0.10691354
+multi4_bestish,64,0.953125,0.016227467,0.012409109,0.09386742,0.004405942,0.002358008,0.046047267,-0.00011889404,3.375,3.1475409836065573,8.0,0.048455432
+multi4_late,64,0.84375,0.031355858,0.01815009,0.10266701,0.018412568,0.01310199,0.047089677,0.013622271,8.0,8.0,8.0,0.14730054
+multi4_final,64,0.859375,0.028622076,0.01753769,0.09636,0.01713102,0.01240586,0.046007,0.012961711,8.0,8.0,8.0,0.13704816
diff --git a/research/flossing/report_bundle_20260603/tables/trm_baseline_eval.csv b/research/flossing/report_bundle_20260603/tables/trm_baseline_eval.csv
new file mode 100644
index 0000000..0b6bc11
--- /dev/null
+++ b/research/flossing/report_bundle_20260603/tables/trm_baseline_eval.csv
@@ -0,0 +1,11 @@
+run,step,all/exact_accuracy,all/accuracy,all/lm_loss,all/q_halt_accuracy,all/q_halt_loss,all/steps
+trm_baseline,26041,0.5575894117355347,0.8469749093055725,0.35285070538520813,0.9997445344924927,0.003797376062721014,16
+trm_baseline,52082,0.6295099854469299,0.8704391121864319,0.2973524034023285,0.9998486042022705,0.0011324305087327957,16
+trm_baseline,78123,0.6993892788887024,0.8920264840126038,0.2477506548166275,0.9998935461044312,0.0006260558147914708,16
+trm_baseline,104164,0.72928386926651,0.9020143151283264,0.22608688473701477,0.9998770356178284,0.001141014276072383,16
+trm_baseline,130205,0.7653990387916565,0.914251446723938,0.19878524541854858,0.999917209148407,0.0010435190051794052,16
+trm_baseline,156246,0.7596656680107117,0.9119072556495667,0.2041437327861786,0.999862790107727,0.0011596218682825565,16
+trm_baseline,182287,0.7541900873184204,0.9094774723052979,0.2100999504327774,0.9998249411582947,0.0013093978632241488,16
+trm_baseline,208328,0.7732800841331482,0.9166732430458069,0.19410833716392517,0.9998320937156677,0.0033004307188093662,16
+trm_baseline,234369,0.7750374674797058,0.9172152280807495,0.19279460608959198,0.9998533725738525,0.0032994903158396482,16
+trm_baseline,260410,0.7742025256156921,0.9169425964355469,0.19348150491714478,0.9998462796211243,0.002894919365644455,16
diff --git a/research/flossing/report_bundle_20260603/tables/trm_gbs768_multi4_step65100_det_n10000_seed20260602.summary.csv b/research/flossing/report_bundle_20260603/tables/trm_gbs768_multi4_step65100_det_n10000_seed20260602.summary.csv
new file mode 100644
index 0000000..38ca8f5
--- /dev/null
+++ b/research/flossing/report_bundle_20260603/tables/trm_gbs768_multi4_step65100_det_n10000_seed20260602.summary.csv
@@ -0,0 +1,2 @@
+correct_count/det_fail_mean,correct_count/det_success_mean,correct_count/full_frac,correct_count/ge_1_frac,correct_count/mean,correct_count/median,correct_count/q10,correct_count/q25,correct_count/q75,correct_count/q90,correct_count/std,correct_count/zero_frac,deterministic/exact,deterministic/token_acc,fd_lyap,fd_spectrum_k,include_clean,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/det_fail_frac,oracle_pass/det_success_frac,oracle_pass/exact,oracle_pass/token_acc,perturb_both,perturb_h,perturb_l,q_max/det_fail_frac,q_max/det_success_frac,q_max/exact,q_max/token_acc,q_mean,rollout0/exact,rollout0/token_acc,rollouts,steps
+0.0,1.0,0.8312000036239624,0.8312000036239624,0.8312000036239624,1.0,0.0,1.0,1.0,1.0,0.3745751678943634,0.1687999963760376,0.8312000036239624,0.9333752989768982,0.0,0.0,0.0,0.8312000036239624,0.9333752989768982,10000.0,0.0,0.0,1.0,0.8312000036239624,0.9333752989768982,1.0,0.0,0.0,0.0,1.0,0.8312000036239624,0.9333752989768982,3.762489080429077,0.8312000036239624,0.9333752989768982,1.0,16.0
diff --git a/research/flossing/report_bundle_20260603/tables/trm_matched_compare.csv b/research/flossing/report_bundle_20260603/tables/trm_matched_compare.csv
new file mode 100644
index 0000000..32006a5
--- /dev/null
+++ b/research/flossing/report_bundle_20260603/tables/trm_matched_compare.csv
@@ -0,0 +1,2 @@
+family,step,base_exact,multi4_exact,delta_exact,base_acc,multi4_acc,delta_acc,base_loss,multi4_loss,delta_loss,base_steps,multi4_steps
+trm,26041,0.5575894117355347,0.7394047975540161,0.18181538581848145,0.8469749093055725,0.9067130088806152,0.059738099575042725,0.35285070538520813,0.21417337656021118,-0.13867732882499695,16,16
diff --git a/research/flossing/report_bundle_20260603/tables/trm_multi4_eval.csv b/research/flossing/report_bundle_20260603/tables/trm_multi4_eval.csv
new file mode 100644
index 0000000..f3d16b5
--- /dev/null
+++ b/research/flossing/report_bundle_20260603/tables/trm_multi4_eval.csv
@@ -0,0 +1,2 @@
+run,step,all/exact_accuracy,all/accuracy,all/lm_loss,all/q_halt_accuracy,all/q_halt_loss,all/steps
+trm_multi4,26041,0.7394047975540161,0.9067130088806152,0.21417337656021118,0.9997232556343079,0.0024572687689214945,16
diff --git a/research/flossing/report_bundle_20260603/tables/trm_multi4_eval_full.csv b/research/flossing/report_bundle_20260603/tables/trm_multi4_eval_full.csv
new file mode 100644
index 0000000..3411ff5
--- /dev/null
+++ b/research/flossing/report_bundle_20260603/tables/trm_multi4_eval_full.csv
@@ -0,0 +1,11 @@
+run,step,all/exact_accuracy,all/accuracy,all/lm_loss,all/q_halt_accuracy,all/q_halt_loss,all/steps
+trm_multi4_loguniform_repro,26041,0.7394047975540161,0.9067130088806152,0.21417337656021118,0.9997232556343079,0.0024572687689214945,16
+trm_multi4_loguniform_repro,52082,0.8449901342391968,0.9424432516098022,0.1342233270406723,0.9998746514320374,0.0010290677892044187,16
+trm_multi4_loguniform_repro,78123,0.8417639136314392,0.9411031007766724,0.13769488036632538,0.9997493028640747,0.0022251552436500788,16
+trm_multi4_loguniform_repro,104164,0.8547161221504211,0.9456510543823242,0.12779666483402252,0.999834418296814,0.0019055778393521905,16
+trm_multi4_loguniform_repro,130205,0.8536233305931091,0.9453508853912354,0.1282763034105301,0.9998888373374939,0.0018422487191855907,16
+trm_multi4_loguniform_repro,156246,0.8489472270011902,0.9433866739273071,0.13273237645626068,0.999782383441925,0.003051365725696087,16
+trm_multi4_loguniform_repro,182287,0.8558868765830994,0.9459810256958008,0.12728126347064972,0.9997469186782837,0.0024399051908403635,16
+trm_multi4_loguniform_repro,208328,0.8404204249382019,0.9403222799301147,0.13971956074237823,0.9996499419212341,0.0029723909683525562,16
+trm_multi4_loguniform_repro,234369,0.845432460308075,0.9419097304344177,0.13668429851531982,0.9997681975364685,0.0016449446557089686,16
+trm_multi4_loguniform_repro,260410,0.826143741607666,0.9344042539596558,0.15468436479568481,0.999701976776123,0.001810370129533112,16
diff --git a/research/flossing/report_bundle_20260603/tables/trm_official_gbs768_eval.csv b/research/flossing/report_bundle_20260603/tables/trm_official_gbs768_eval.csv
new file mode 100644
index 0000000..66dfe1b
--- /dev/null
+++ b/research/flossing/report_bundle_20260603/tables/trm_official_gbs768_eval.csv
@@ -0,0 +1,11 @@
+run,step,all/exact_accuracy,all/accuracy,all/lm_loss,all/q_halt_accuracy,all/q_halt_loss,all/steps
+trm_official_gbs768,6510,0.20006339251995087,0.7261562347412109,0.6155200004577637,0.9993779063224792,0.0063577014952898026,16
+trm_official_gbs768,13020,0.6248409152030945,0.8679871559143066,0.30110087990760803,0.9997587203979492,0.0018232133006677032,16
+trm_official_gbs768,19530,0.7098508477210999,0.8961462378501892,0.23719573020935059,0.9997185468673706,0.002359110629186034,16
+trm_official_gbs768,26040,0.7558197379112244,0.9113300442695618,0.20295456051826477,0.9997066855430603,0.0026622479781508446,16
+trm_official_gbs768,32550,0.7946407794952393,0.9247151017189026,0.17315243184566498,0.9997208714485168,0.0024344150442630053,16
+trm_official_gbs768,39060,0.8247435092926025,0.9351920485496521,0.14995059370994568,0.9996594190597534,0.002962446305900812,16
+trm_official_gbs768,45570,0.8479396104812622,0.9433117508888245,0.1323014795780182,0.9996806979179382,0.0026382978539913893,16
+trm_official_gbs768,52080,0.8633161783218384,0.9488447904586792,0.12009253352880478,0.9997137784957886,0.002587266732007265,16
+trm_official_gbs768,58590,0.8686309456825256,0.9508475661277771,0.11555595695972443,0.9998438954353333,0.0014915302162989974,16
+trm_official_gbs768,65100,0.86624675989151,0.9500409364700317,0.11748332530260086,0.9996523261070251,0.002605273388326168,16
diff --git a/research/flossing/report_bundle_20260603/tables/trm_official_gbs768_multi4_eval.csv b/research/flossing/report_bundle_20260603/tables/trm_official_gbs768_multi4_eval.csv
new file mode 100644
index 0000000..ec353ce
--- /dev/null
+++ b/research/flossing/report_bundle_20260603/tables/trm_official_gbs768_multi4_eval.csv
@@ -0,0 +1,21 @@
+run,step,all/exact_accuracy,all/accuracy,all/lm_loss,all/q_halt_accuracy,all/q_halt_loss,all/steps,_runtime,_timestamp
+trm_official_gbs768_multi4,3255,0.0175999198108911,0.6504403352737427,0.7941210269927979,0.9824000597000122,0.108424387872219,16.0,12550.718766357,1780192940.452497
+trm_official_gbs768_multi4,6510,0.2277795374393463,0.7379271984100342,0.5934479236602783,0.999262034893036,0.0035974220372736,16.0,25064.702453699,1780205454.4563622
+trm_official_gbs768_multi4,9765,0.6357069611549377,0.8709613084793091,0.2931223213672638,0.9997137784957886,0.0014165197499096,16.0,37579.311725746,1780217969.132149
+trm_official_gbs768_multi4,13020,0.75853031873703,0.910834014415741,0.2033131867647171,0.9997611045837402,0.0021604059729725,16.0,50092.402719112,1780230482.2738986
+trm_official_gbs768_multi4,16275,0.8263778686523438,0.9346956610679626,0.1507271528244018,0.9997800588607788,0.0041020140051841,16.0,62595.579823935,1780242985.398887
+trm_official_gbs768_multi4,19530,0.8654520511627197,0.9488762617111206,0.1198361814022064,0.9997090697288512,0.0052023055031895,16.0,75096.299217356,1780255486.102359
+trm_official_gbs768_multi4,22785,0.8817486763000488,0.9549695253372192,0.1065462753176689,0.9997114539146424,0.0037784865126013,16.0,87607.26667599,1780267997.105077
+trm_official_gbs768_multi4,26040,0.8879267573356628,0.957356870174408,0.1014027148485183,0.999642848968506,0.0053329654037952,16.0,100118.838503384,1780280508.634669
+trm_official_gbs768_multi4,29295,0.8933455944061279,0.959309697151184,0.0971761047840118,0.9996144771575928,0.0041324645280838,16.0,112620.42551711,1780293010.168183
+trm_official_gbs768_multi4,32550,0.8962169885635376,0.960436463356018,0.0948082581162452,0.9996026158332824,0.0144010595977306,16.0,125134.755165262,1780305524.5418072
+trm_official_gbs768_multi4,35805,0.8964653611183167,0.9604493975639344,0.0945562794804573,0.9995790123939514,0.0068304436281323,16.0,137714.797781532,1780318104.6386163
+trm_official_gbs768_multi4,39060,0.8957250118255615,0.9601802825927734,0.0952448472380638,0.999541163444519,0.0185395441949367,16.0,150233.754675447,1780330623.5781178
+trm_official_gbs768_multi4,42315,0.888913094997406,0.9574248790740968,0.1008101180195808,0.9995600581169128,0.0061048995703458,16.0,162739.436977464,1780343129.2559526
+trm_official_gbs768_multi4,45570,0.8858169317245483,0.9561300873756408,0.1036654934287071,0.9994938373565674,0.0054858992807567,16.0,175251.082236918,1780355640.8929477
+trm_official_gbs768_multi4,48825,0.882732629776001,0.9547808170318604,0.1068678125739097,0.9994110465049744,0.0072551541961729,16.0,187778.40637965,1780368168.2221627
+trm_official_gbs768_multi4,52080,0.8782954216003418,0.9530280232429504,0.1104752272367477,0.99934720993042,0.0089566921815276,16.0,200285.373208387,1780380675.1846614
+trm_official_gbs768_multi4,55335,0.8694067597389221,0.94942307472229,0.1194151788949966,0.9987700581550598,0.0272370912134647,16.0,212785.271448664,1780393175.0770009
+trm_official_gbs768_multi4,58590,0.8620365858078003,0.9463443756103516,0.1268824934959411,0.9983466863632202,0.0163928251713514,16.0,225280.516080387,1780405670.3403552
+trm_official_gbs768_multi4,61845,0.850763738155365,0.941650390625,0.1378339380025863,0.9978050589561462,0.0215476993471384,16.0,237778.331426833,1780418168.1822684
+trm_official_gbs768_multi4,65100,0.8350536823272705,0.9350366592407228,0.1547555029392242,0.9962841868400574,0.0310162808746099,16.0,250280.395200839,1780430673.009492
diff --git a/research/flossing/report_bundle_20260603/tables/trm_official_gbs768_multi4_step16275_eval.csv b/research/flossing/report_bundle_20260603/tables/trm_official_gbs768_multi4_step16275_eval.csv
new file mode 100644
index 0000000..a34ae57
--- /dev/null
+++ b/research/flossing/report_bundle_20260603/tables/trm_official_gbs768_multi4_step16275_eval.csv
@@ -0,0 +1,2 @@
+step,accuracy,exact_accuracy,lm_loss,q_halt_accuracy,q_halt_loss,steps
+16275,0.934574,0.82621706,0.15090619,0.99974453,0.00405054,16.0
diff --git a/research/flossing/report_bundle_20260603/tables/wallclock_eval.csv b/research/flossing/report_bundle_20260603/tables/wallclock_eval.csv
new file mode 100644
index 0000000..268a924
--- /dev/null
+++ b/research/flossing/report_bundle_20260603/tables/wallclock_eval.csv
@@ -0,0 +1,30 @@
+run,step,runtime_sec,runtime_hms,timestamp_local,ckpt_exists,ckpt_mtime_local,exact,accuracy,loss
+hrm_baseline,2604,1440.547846523,00:24:00.55,2026-05-22 06:32:03,True,2026-05-22 06:32:04,0.016369983553886414,0.6336106061935425,0.8522627949714661
+hrm_baseline,5208,2915.282126881,00:48:35.28,2026-05-22 06:56:38,True,2026-05-22 06:56:39,0.06169315055012703,0.6744286417961121,0.7430469989776611
+hrm_baseline,7812,3726.920135005,01:02:06.92,2026-05-22 07:10:09,True,2026-05-22 07:10:10,0.1358133852481842,0.6985693573951721,0.676047682762146
+hrm_baseline,10416,4458.114451179,01:14:18.11,2026-05-22 07:22:21,True,2026-05-22 07:22:22,0.20248068869113922,0.7259970307350159,0.6480565667152405
+hrm_baseline,13020,5801.015399971,01:36:41.02,2026-05-22 07:44:43,True,2026-05-22 07:44:45,0.3024248778820038,0.7580602169036865,0.5614010691642761
+hrm_baseline,15624,7219.496414876,02:00:19.50,2026-05-22 08:08:22,True,2026-05-22 08:08:23,0.36705803871154785,0.7842973470687866,0.49664124846458435
+hrm_baseline,18228,8616.18880162,02:23:36.19,2026-05-22 08:31:39,True,2026-05-22 08:31:40,0.46287721395492554,0.8077820539474487,0.4641122817993164
+hrm_baseline,20832,9904.193793478,02:45:04.19,2026-05-22 08:53:07,True,2026-05-22 08:53:08,0.4912721812725067,0.8198283314704895,0.4137057960033417
+hrm_baseline,23436,10634.360398376,02:57:14.36,2026-05-22 09:05:17,True,2026-05-22 09:05:18,0.5193856954574585,0.8260135054588318,0.41173747181892395
+hrm_baseline,26040,11366.743085113,03:09:26.74,2026-05-22 09:17:29,True,2026-05-22 09:17:30,0.5265287756919861,0.8270824551582336,0.4161679148674011
+hrm_multi4,2604,5810.841404196,01:36:50.84,2026-05-27 22:44:06,True,2026-05-27 22:44:07,0.0123774204403162,0.6303551197052002,0.8603715300559998
+hrm_multi4,5208,11587.805059004,03:13:07.81,2026-05-28 00:20:23,True,2026-05-28 00:20:24,0.025012653321027756,0.6685170531272888,0.7412638068199158
+hrm_multi4,7812,17365.375767937,04:49:25.38,2026-05-28 01:56:41,True,2026-05-28 01:56:42,0.04651052877306938,0.687346339225769,0.7044885158538818
+hrm_multi4,10416,23142.813901422,06:25:42.81,2026-05-28 03:32:58,True,2026-05-28 03:32:59,0.2006617933511734,0.7243355512619019,0.6340938210487366
+hrm_multi4,13020,29347.517355686,08:09:07.52,2026-05-28 05:16:23,True,2026-05-28 05:16:24,0.34697696566581726,0.7794705033302307,0.49915269017219543
+hrm_multi4,15624,35123.513458465,09:45:23.51,2026-05-28 06:52:39,True,2026-05-28 06:52:40,0.4653252363204956,0.8156101703643799,0.42743897438049316
+hrm_multi4,18228,40898.005173388,11:21:38.01,2026-05-28 08:28:54,True,2026-05-28 08:28:55,0.5790873169898987,0.8494690656661987,0.3459467887878418
+trm_baseline,26041,8593.415472305,02:23:13.42,2026-05-23 01:55:09,True,2026-05-23 01:55:10,0.5575894117355347,0.8469749093055725,0.35285070538520813
+trm_baseline,52082,17155.876633182,04:45:55.88,2026-05-23 04:17:52,True,2026-05-23 04:17:52,0.6295099854469299,0.8704391121864319,0.2973524034023285
+trm_baseline,78123,25719.395089913,07:08:39.40,2026-05-23 06:40:35,True,2026-05-23 06:40:36,0.6993892788887024,0.8920264840126038,0.2477506548166275
+trm_baseline,104164,34282.065662564,09:31:22.07,2026-05-23 09:03:18,True,2026-05-23 09:03:18,0.72928386926651,0.9020143151283264,0.22608688473701477
+trm_baseline,130205,42852.348430037,11:54:12.35,2026-05-23 11:26:08,True,2026-05-23 11:26:09,0.7653990387916565,0.914251446723938,0.19878524541854858
+trm_baseline,156246,51422.119869545,14:17:02.12,2026-05-23 13:48:58,True,2026-05-23 13:48:58,0.7596656680107117,0.9119072556495667,0.2041437327861786
+trm_baseline,182287,59826.120123505,16:37:06.12,2026-05-23 16:09:02,True,2026-05-23 16:09:02,0.7541900873184204,0.9094774723052979,0.2100999504327774
+trm_baseline,208328,68138.229200214,18:55:38.23,2026-05-23 18:27:34,True,2026-05-23 18:27:34,0.7732800841331482,0.9166732430458069,0.19410833716392517
+trm_baseline,234369,76460.482892161,21:14:20.48,2026-05-23 20:46:17,True,2026-05-23 20:46:17,0.7750374674797058,0.9172152280807495,0.19279460608959198
+trm_baseline,260410,84783.527271934,23:33:03.53,2026-05-23 23:05:00,True,2026-05-23 23:05:00,0.7742025256156921,0.9169425964355469,0.19348150491714478
+trm_multi4,26041,22216.1844709,06:10:16.18,2026-05-28 03:18:26,True,2026-05-28 03:18:26,0.7394047975540161,0.9067130088806152,0.21417337656021118
+trm_multi4,52082,44853.735386924,12:27:33.74,2026-05-28 09:35:44,True,2026-05-28 09:35:44,0.8449901342391968,0.9424432516098022,0.1342233270406723