From 9d5f2379ac25b4b58e2600544f61172dbb15b67a Mon Sep 17 00:00:00 2001 From: Yuren Hao Date: Wed, 10 Sep 2025 12:41:28 -0500 Subject: fix ctf --- runs/20250910/baseline_eval/summary.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 runs/20250910/baseline_eval/summary.md (limited to 'runs/20250910/baseline_eval/summary.md') diff --git a/runs/20250910/baseline_eval/summary.md b/runs/20250910/baseline_eval/summary.md new file mode 100644 index 0000000..0fb4b1a --- /dev/null +++ b/runs/20250910/baseline_eval/summary.md @@ -0,0 +1,12 @@ +# Baseline Summary +- Generated: 2025-09-10T12:39:36 + +## Bias +- **CTF-gap**: 0.000173 ± 0.000180 (coverage=1.00) +- **JSD_swap**: 0.058773 ± 0.035938 +- **CrowS ΔlogP** (anti−stereo): 1.335026 ± 2.238039 +- **Wino Acc**: 0.200 ± 0.351 + +## Main +- **MATH EM**: 0.200 ± 0.351 +- **PPL**: 30.86 -- cgit v1.2.3