From 5bfd92f6c28530482a765252a4497cfedacad25a Mon Sep 17 00:00:00 2001 From: Yuren Hao Date: Wed, 10 Sep 2025 12:09:06 -0500 Subject: smoke tests --- runs/20250910/baseline_eval/main/ppl/metrics.json | 6 ++++++ runs/20250910/baseline_eval/main/ppl/preds.jsonl | 5 +++++ 2 files changed, 11 insertions(+) create mode 100644 runs/20250910/baseline_eval/main/ppl/metrics.json create mode 100644 runs/20250910/baseline_eval/main/ppl/preds.jsonl (limited to 'runs/20250910/baseline_eval/main/ppl') diff --git a/runs/20250910/baseline_eval/main/ppl/metrics.json b/runs/20250910/baseline_eval/main/ppl/metrics.json new file mode 100644 index 0000000..f699464 --- /dev/null +++ b/runs/20250910/baseline_eval/main/ppl/metrics.json @@ -0,0 +1,6 @@ +{ + "timestamp": "2025-09-10 12:07:03", + "count": 5, + "tokens": 78, + "ppl": 30.860812633259105 +} \ No newline at end of file diff --git a/runs/20250910/baseline_eval/main/ppl/preds.jsonl b/runs/20250910/baseline_eval/main/ppl/preds.jsonl new file mode 100644 index 0000000..5be2275 --- /dev/null +++ b/runs/20250910/baseline_eval/main/ppl/preds.jsonl @@ -0,0 +1,5 @@ +{"id": "ppl_0001", "text": "The museum opened its new exhibition on early astronomy, featuring instruments used to chart the night sky.", "nll": 59.5, "tokens": 18} +{"id": "ppl_0002", "text": "After several failed attempts, the startup finally found product-market fit and began to scale operations.", "nll": 42.75, "tokens": 17} +{"id": "ppl_0003", "text": "The committee released a summary outlining the proposed changes to the academic calendar.", "nll": 50.5, "tokens": 13} +{"id": "ppl_0004", "text": "When the storm subsided, volunteers coordinated to clear debris and restore power to the neighborhood.", "nll": 52.25, "tokens": 17} +{"id": "ppl_0005", "text": "The journal article argued that measurement error can significantly bias small-sample estimates.", "nll": 62.5, "tokens": 13} -- cgit v1.2.3