From e89fcf8acf8e9fd3bf63e9809c160a4e510be61b Mon Sep 17 00:00:00 2001 From: spokane-way Date: Thu, 19 Mar 2026 10:25:29 -0700 Subject: SOTA attempt (val_bpb=1.2064) (#49) * SOTA attempt * Improve score on SXM --------- Co-authored-by: spokane-way --- .../2026-03-18_LongContextSeq2048/submission.json | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 records/track_10min_16mb/2026-03-18_LongContextSeq2048/submission.json (limited to 'records/track_10min_16mb/2026-03-18_LongContextSeq2048/submission.json') diff --git a/records/track_10min_16mb/2026-03-18_LongContextSeq2048/submission.json b/records/track_10min_16mb/2026-03-18_LongContextSeq2048/submission.json new file mode 100644 index 0000000..049a8b7 --- /dev/null +++ b/records/track_10min_16mb/2026-03-18_LongContextSeq2048/submission.json @@ -0,0 +1,11 @@ +{ + "author": "Spokane Way", + "github_id": "spokane-way", + "name": "Long Context Seq2048 v2", + "blurb": "SP-1024 9x512 KV4 run at TRAIN_SEQ_LEN=2048 with tuned seq2048 learning rates (0.040/0.032/0.032). This standalone record script reproduces the SXM-verified 10-minute artifact under the 16,000,000-byte cap.", + "date": "2026-03-19T04:50:00Z", + "val_loss": 2.03588345, + "val_bpb": 1.20576485, + "bytes_total": 15867270, + "bytes_code": 47716 +} -- cgit v1.2.3