diff options
Diffstat (limited to 'records/track_10min_16mb/2026-03-18_LowerLR/submission.json')
| -rw-r--r-- | records/track_10min_16mb/2026-03-18_LowerLR/submission.json | 11 |
1 files changed, 11 insertions, 0 deletions
diff --git a/records/track_10min_16mb/2026-03-18_LowerLR/submission.json b/records/track_10min_16mb/2026-03-18_LowerLR/submission.json new file mode 100644 index 0000000..42ec327 --- /dev/null +++ b/records/track_10min_16mb/2026-03-18_LowerLR/submission.json @@ -0,0 +1,11 @@ +{ + "author": "Nan Liu", + "github_id": "nanlliu", + "name": "Lower LR", + "blurb": "Same 9x512 SP-1024 KV4 tied-embedding baseline architecture with lower Muon/Adam learning rates (MATRIX_LR=0.02, SCALAR_LR=0.02, TIED_EMBED_LR=0.03). Systematic LR sweep showed default 0.04 was too high; optimal is ~0.02.", + "date": "2026-03-18T22:30:00Z", + "val_loss": 2.06492760, + "val_bpb": 1.22296644, + "bytes_total": 15854246, + "bytes_code": 50919 +} |
