From 555669e8330472143139c2f82bba15baab1a5e0d Mon Sep 17 00:00:00 2001
From: Sam Larson <166414725+saml212@users.noreply.github.com>
Date: Thu, 19 Mar 2026 14:28:57 -0700
Subject: Int6 + MLP 3x + sliding window: val_bpb=1.1574 (#61)

* Warmdown-quantization co-optimization, val_bpb=1.2154

Novel finding: aggressive LR decay (WARMDOWN_ITERS=20000) reduces int8 quantization
penalty from 0.014 to 0.005 BPB. Combined with FP16 tied embeddings and moderate
NTK-RoPE extrapolation (eval@1408).

Full warmdown sweep across 10 values and detailed analysis in README.

* breakthrough: 1.1574 BPB via int6 + MLP 3x + sliding window stride=256

---------

Co-authored-by: Sam Larson <saml212@users.noreply.github.com>
---
 .../2026-03-19_WarmdownQuantization/submission.json           | 11 +++++++++++
 1 file changed, 11 insertions(+)
 create mode 100644 records/track_10min_16mb/2026-03-19_WarmdownQuantization/submission.json

(limited to 'records/track_10min_16mb/2026-03-19_WarmdownQuantization/submission.json')

diff --git a/records/track_10min_16mb/2026-03-19_WarmdownQuantization/submission.json b/records/track_10min_16mb/2026-03-19_WarmdownQuantization/submission.json
new file mode 100644
index 0000000..07c58c3
--- /dev/null
+++ b/records/track_10min_16mb/2026-03-19_WarmdownQuantization/submission.json
@@ -0,0 +1,11 @@
+{
+  "author": "samuellarson",
+  "github_id": "samuellarson",
+  "name": "Int6 MLP3x Sliding Window",
+  "blurb": "Int6 post-training quantization enables 3x MLP expansion (21.8M params in 16MB). Combined with train@2048 + sliding window eval + FP16 tied embeddings + Late-K passthrough.",
+  "date": "2026-03-20",
+  "val_loss": 1.95428963,
+  "val_bpb": 1.15744040,
+  "bytes_total": 15977717,
+  "bytes_code": 51200
+}
-- 
cgit v1.2.3