From 07b10f06478514bbe9d9c77461a90f9d3254218b Mon Sep 17 00:00:00 2001 From: YurenHao0426 Date: Wed, 8 Apr 2026 04:46:59 -0500 Subject: Fill in tables 1-3 + generate figures 2/4/5 from existing data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tables filled with real values: Table 1: 5-method audit (3-seed mean ± std for acc, headline Γ, verdict) Table 2: 4-condition mode 2 validation (cos and ρ values from existing checkpoint measurements) Table 3: protocol thresholds (50×, 1e-7, 0.30, 2pp) Figures generated from existing data: fig2_decision_utility.pdf: 5×7 verdict heatmap from results/protocol_audit/ablation_decision_utility.json fig4_penalty_rescue.pdf: 3-panel — trajectory + cos/ρ bars + 2×2 acc from snapshot_evolution_v2 + dfa_residual_penalty + bp_with_penalty fig5_cross_arch_summary.pdf: 5×4 BP/DFA verdict matrix across architectures Compiles to 8 pages with all tables/figures rendered. §1-§7 main body still has only paragraph topic sentences (TODO: per-section prose filling via codex). Figure numbering is wrong (codex put figures in section order not numerical order — need fixing). --- paper/main.tex | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'paper/main.tex') diff --git a/paper/main.tex b/paper/main.tex index 2cae264..b0a7787 100644 --- a/paper/main.tex +++ b/paper/main.tex @@ -50,11 +50,11 @@ On the 4-block pre-LayerNorm ResMLP at $d{=}256$ on CIFAR-10, the field-standard \toprule Method & Test acc. & Headline $\Gamma$ & Status-quo verdict & Protocol verdict \\ \midrule -BP & \textit{TODO} & \textit{TODO} & trustworthy & trustworthy \\ -EP & \textit{TODO} & \textit{TODO} & trustworthy & trustworthy \\ -DFA & \textit{TODO} & \textit{TODO} & trustworthy & walked back \\ -State Bridge & \textit{TODO} & \textit{TODO} & trustworthy & walked back \\ -Credit Bridge & \textit{TODO} & \textit{TODO} & trustworthy & walked back \\ +BP & $0.615 \pm 0.003$ & $\approx 1.0$ & trustworthy & trustworthy \\ +EP & $0.316 \pm 0.030$ & $0.008$ & trustworthy & trustworthy \\ +DFA & $0.306 \pm 0.006$ & $0.10$ & trustworthy & walked back \\ +State Bridge & $0.205 \pm 0.032$ & $0.005$ & trustworthy & walked back \\ +Credit Bridge & $0.289 \pm 0.026$ & $0.07$ & trustworthy & walked back \\ \bottomrule \end{tabular} \end{table} @@ -109,10 +109,10 @@ Temporal replay shows that the protocol fires early enough to change experimenta \toprule Condition & Deep-layer alignment signal & Measurement regime & Interpretation \\ \midrule -Vanilla DFA, early epoch & \textit{TODO} & \textit{TODO} & mode 2 present without mode 1 \\ -Vanilla DFA, converged & \textit{TODO} & \textit{TODO} & mode 1 obscures mode 2 \\ -Penalized DFA & \textit{TODO} & \textit{TODO} & partial alleviation of both modes \\ -Fresh-$B$ null control & \textit{TODO} & meaningful & training-specific adaptation check \\ +Vanilla DFA, early epoch & $\overline{\cos}_{deep}{=}{-}0.008{\pm}0.013$, $\overline{\rho}_{deep}{=}{-}0.003{\pm}0.005$ & meaningful ($\|g\|{\sim}10^{-6}$) & mode 2 present without mode 1 \\ +Vanilla DFA, converged & $\overline{\cos}_{deep}{=}{-}0.022$, $\overline{\rho}_{deep}{=}+0.002$ & degenerate ($\|g\|{\sim}10^{-9}$) & mode 1 obscures mode 2 \\ +Penalized DFA, $\lambda{=}10^{-2}$ & $\overline{\cos}_{deep}{=}+0.155{\pm}0.025$, $\overline{\rho}_{deep}{=}+0.080{\pm}0.011$ & meaningful ($\|g\|{\sim}10^{-6}$) & partial alleviation of both modes \\ +Fresh-$B$ null control & $\overline{\cos}_{deep}{=}+0.002{\pm}0.022$ ($n{=}20$ draws) & meaningful & training-specific adaptation check \\ \bottomrule \end{tabular} \end{table} @@ -161,10 +161,10 @@ The protocol has four diagnostics because the evaluation failure is not visible \toprule Diag. & Measurement & Default threshold & Role \\ \midrule -(a) & Per-layer activation scale via max-per-block growth $\max_l \|h_{l+1}\|/\|h_l\|$ & \textit{TODO} & binary detector \\ -(b) & Deepest hidden-layer BP gradient norm $\|g_L\|$ & \textit{TODO} & binary detector \\ -(c) & Cross-batch direction stability of normalized BP gradients & \textit{TODO} & sub-mode discriminator \\ -(d) & Frozen-blocks baseline margin for trained blocks over random blocks & \textit{TODO} & depth-utilization check \\ +(a) & Per-layer activation scale via max-per-block growth $\max_l \|h_{l+1}\|/\|h_l\|$ & $> 50\times$ & binary detector \\ +(b) & Deepest hidden-layer BP gradient norm $\|g_L\|$ & $< 10^{-7}$ & binary detector \\ +(c) & Cross-batch direction stability of normalized BP gradients & $> 0.30$ & sub-mode discriminator \\ +(d) & Frozen-blocks baseline margin for trained blocks over random blocks & $< 2$pp & depth-utilization check \\ \bottomrule \end{tabular} \end{table} -- cgit v1.2.3