records/track_10min_16mb/2026-03-18_LowerLR/submission.json


1
2
3
4
5
6
7
8
9
10
11

{
  "author": "Nan Liu",
  "github_id": "nanlliu",
  "name": "Lower LR",
  "blurb": "Same 9x512 SP-1024 KV4 tied-embedding baseline architecture with lower Muon/Adam learning rates (MATRIX_LR=0.02, SCALAR_LR=0.02, TIED_EMBED_LR=0.03). Systematic LR sweep showed default 0.04 was too high; optimal is ~0.02.",
  "date": "2026-03-18T22:30:00Z",
  "val_loss": 2.06492760,
  "val_bpb": 1.22296644,
  "bytes_total": 15854246,
  "bytes_code": 50919
}