blob: 42ec3279b3b8987a4dd89d5373914d73866c966e (
plain)
1
2
3
4
5
6
7
8
9
10
11
|
{
"author": "Nan Liu",
"github_id": "nanlliu",
"name": "Lower LR",
"blurb": "Same 9x512 SP-1024 KV4 tied-embedding baseline architecture with lower Muon/Adam learning rates (MATRIX_LR=0.02, SCALAR_LR=0.02, TIED_EMBED_LR=0.03). Systematic LR sweep showed default 0.04 was too high; optimal is ~0.02.",
"date": "2026-03-18T22:30:00Z",
"val_loss": 2.06492760,
"val_bpb": 1.22296644,
"bytes_total": 15854246,
"bytes_code": 50919
}
|