# 2x2 analysis (convergence x correctness) — generated 2026-06-11 ## hrm26040_n8192 - npz: `/home/yurenh2/rrm/research/flossing/diag_8k.npz`, n=8192, exact_acc=0.525 - late-drift def: mean(drift_zH[:, -4:]), Otsu tau(log10)=0.766, frac_converged=0.509 | cell | n | lam1 median | lam1 IQR | token_acc med | |---|---|---|---|---| | A_conv_correct | 4103 | -0.8617 | [-0.9020, -0.8153] | 1.000 | | B_conv_wrong | 63 | -0.5912 | [-0.8091, -0.5061] | 0.630 | | C_nonconv_correct | 195 | -0.6943 | [-0.7354, -0.6466] | 1.000 | | D_nonconv_wrong | 3831 | -0.5998 | [-0.6475, -0.5488] | 0.630 | - mixture: wrong-that-converged = 0.016; correct-that-nonconverged = 0.045 - dlam1(correct-wrong): overall -0.2586; within-conv -0.2705; within-nonconv -0.0945 - dlam1(wrong: conv - nonconv) = +0.0085 - AUC(-lam1 -> correct): overall 0.984; within-conv 0.852; within-nonconv 0.818 - AUC(-log d_late -> correct) = 0.964; AUC(-lam1 -> converged) = 0.986 ## trm_singleGPU_step260410_n512 - npz: `/home/yurenh2/rrm/research/flossing/diag_trm_singleGPU_step260410_512.npz`, n=512, exact_acc=0.770 - late-drift def: mean(drift_zH[:, -4:]), Otsu tau(log10)=1.406, frac_converged=0.748 | cell | n | lam1 median | lam1 IQR | token_acc med | |---|---|---|---|---| | A_conv_correct | 383 | +0.0047 | [+0.0007, +0.0137] | 1.000 | | B_conv_wrong | 0 | - | - | - | | C_nonconv_correct | 11 | +0.0998 | [+0.0907, +0.1035] | 1.000 | | D_nonconv_wrong | 118 | +0.1023 | [+0.0935, +0.1112] | 0.642 | - mixture: wrong-that-converged = 0.000; correct-that-nonconverged = 0.028 - dlam1(correct-wrong): overall -0.0973; within-conv +nan; within-nonconv -0.0025 - dlam1(wrong: conv - nonconv) = +nan - AUC(-lam1 -> correct): overall 0.989; within-conv nan; within-nonconv 0.619 - AUC(-log d_late -> correct) = 0.975; AUC(-lam1 -> converged) = 0.996 ## trm_singleGPU_step130205_n512 - npz: `/home/yurenh2/rrm/research/flossing/diag_trm_singleGPU_step130205_512.npz`, n=512, exact_acc=0.756 - late-drift def: mean(drift_zH[:, -4:]), Otsu tau(log10)=1.213, frac_converged=0.713 | cell | n | lam1 median | lam1 IQR | token_acc med | |---|---|---|---|---| | A_conv_correct | 365 | +0.0043 | [-0.0007, +0.0141] | 1.000 | | B_conv_wrong | 0 | - | - | - | | C_nonconv_correct | 22 | +0.0852 | [+0.0694, +0.0988] | 1.000 | | D_nonconv_wrong | 125 | +0.1067 | [+0.0995, +0.1126] | 0.630 | - mixture: wrong-that-converged = 0.000; correct-that-nonconverged = 0.057 - dlam1(correct-wrong): overall -0.1017; within-conv +nan; within-nonconv -0.0215 - dlam1(wrong: conv - nonconv) = +nan - AUC(-lam1 -> correct): overall 0.989; within-conv nan; within-nonconv 0.805 - AUC(-log d_late -> correct) = 0.957; AUC(-lam1 -> converged) = 0.996 ## trm_step13020_n512 - npz: `/home/yurenh2/rrm/research/flossing/diag_trm_step13020_512.npz`, n=512, exact_acc=0.596 - late-drift def: mean(drift_zH[:, -4:]), Otsu tau(log10)=0.730, frac_converged=0.598 | cell | n | lam1 median | lam1 IQR | token_acc med | |---|---|---|---|---| | A_conv_correct | 296 | -0.0043 | [-0.0113, +0.0052] | 1.000 | | B_conv_wrong | 10 | -0.0027 | [-0.0090, +0.0009] | 0.556 | | C_nonconv_correct | 9 | +0.0419 | [+0.0323, +0.0480] | 1.000 | | D_nonconv_wrong | 197 | +0.0317 | [+0.0143, +0.0462] | 0.642 | - mixture: wrong-that-converged = 0.048; correct-that-nonconverged = 0.030 - dlam1(correct-wrong): overall -0.0339; within-conv -0.0016; within-nonconv +0.0102 - dlam1(wrong: conv - nonconv) = -0.0344 - AUC(-lam1 -> correct): overall 0.849; within-conv 0.515; within-nonconv 0.360 - AUC(-log d_late -> correct) = 0.976; AUC(-lam1 -> converged) = 0.887