{ "author": "Matthew Li", "github_id": "mattqlf", "name": "Sliding Window Eval (stride=64)", "blurb": "Baseline 9x512 SP-1024 architecture with sliding window evaluation at stride=64. Each token is scored with 960+ tokens of context instead of the baseline's 0-1023. Training is identical to the naive baseline; the improvement comes entirely from the evaluation strategy. Post-quant int8+zlib roundtrip under the 16,000,000-byte cap.", "date": "2026-03-19T04:48:00Z", "val_loss": 2.01348383, "val_bpb": 1.19250007, "pre_quant_val_loss": 2.0592, "pre_quant_val_bpb": 1.2196, "step_stop": 13450, "wallclock_seconds": 600.028, "eval_time_seconds": 69.881, "bytes_total": 15874829, "bytes_model_int8_zlib": 15816489, "bytes_code": 58340 }