records/track_10min_16mb/2026-03-18_FP16Embed_WD3600/submission.json


1
2
3
4
5
6
7
8
9
10
11

{
  "author": "Renier Velazco",
  "github_id": "chonchiog",
  "name": "FP16 Tied Embedding + LR/Warmdown Tuning",
  "blurb": "Keep tok_emb.weight in fp16 during int8 quantization to eliminate the output-head quantization gap (0.007 -> 0.0005 BPB). Slightly reduce MLP hidden (992 vs 1024) to fit within 16MB. Tune warmdown (3600 vs 1200) and matrix LR (0.06 vs 0.04) for better convergence under the 10-min wallclock cap.",
  "date": "2026-03-18",
  "val_loss": 2.05945460,
  "val_bpb": 1.21972502,
  "bytes_total": 15896222,
  "bytes_code": 48125
}