From d84a3e819100504d96879e1e36d022efa5cbb81b Mon Sep 17 00:00:00 2001 From: Matthew Li <156706407+mattqlf@users.noreply.github.com> Date: Thu, 19 Mar 2026 13:28:12 -0400 Subject: Add record: Sliding Window Eval (stride=64), val_bpb=1.1925 (#50) --- .../2026-03-19_SlidingWindowEval/submission.json | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 records/track_10min_16mb/2026-03-19_SlidingWindowEval/submission.json (limited to 'records/track_10min_16mb/2026-03-19_SlidingWindowEval/submission.json') diff --git a/records/track_10min_16mb/2026-03-19_SlidingWindowEval/submission.json b/records/track_10min_16mb/2026-03-19_SlidingWindowEval/submission.json new file mode 100644 index 0000000..d25b325 --- /dev/null +++ b/records/track_10min_16mb/2026-03-19_SlidingWindowEval/submission.json @@ -0,0 +1,17 @@ +{ + "author": "Matthew Li", + "github_id": "mattqlf", + "name": "Sliding Window Eval (stride=64)", + "blurb": "Baseline 9x512 SP-1024 architecture with sliding window evaluation at stride=64. Each token is scored with 960+ tokens of context instead of the baseline's 0-1023. Training is identical to the naive baseline; the improvement comes entirely from the evaluation strategy. Post-quant int8+zlib roundtrip under the 16,000,000-byte cap.", + "date": "2026-03-19T04:48:00Z", + "val_loss": 2.01348383, + "val_bpb": 1.19250007, + "pre_quant_val_loss": 2.0592, + "pre_quant_val_bpb": 1.2196, + "step_stop": 13450, + "wallclock_seconds": 600.028, + "eval_time_seconds": 69.881, + "bytes_total": 15874829, + "bytes_model_int8_zlib": 15816489, + "bytes_code": 58340 +} \ No newline at end of file -- cgit v1.2.3