1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
|
{
"author": "Matthew Li",
"github_id": "mattqlf",
"name": "Sliding Window Eval (stride=64)",
"blurb": "Baseline 9x512 SP-1024 architecture with sliding window evaluation at stride=64. Each token is scored with 960+ tokens of context instead of the baseline's 0-1023. Training is identical to the naive baseline; the improvement comes entirely from the evaluation strategy. Post-quant int8+zlib roundtrip under the 16,000,000-byte cap.",
"date": "2026-03-19T04:48:00Z",
"val_loss": 2.01348383,
"val_bpb": 1.19250007,
"pre_quant_val_loss": 2.0592,
"pre_quant_val_bpb": 1.2196,
"step_stop": 13450,
"wallclock_seconds": 600.028,
"eval_time_seconds": 69.881,
"bytes_total": 15874829,
"bytes_model_int8_zlib": 15816489,
"bytes_code": 58340
}
|