summaryrefslogtreecommitdiff
path: root/runs/20250910/baseline_eval/main/ppl
diff options
context:
space:
mode:
authorYuren Hao <yurenh2@timan108.cs.illinois.edu>2025-09-10 12:09:06 -0500
committerYuren Hao <yurenh2@timan108.cs.illinois.edu>2025-09-10 12:09:06 -0500
commit5bfd92f6c28530482a765252a4497cfedacad25a (patch)
treec24a9aaa21fdfdff0a91bdeeb02432679904bc8a /runs/20250910/baseline_eval/main/ppl
parent523b1747ee27b60d06424dcabd47a309cda80536 (diff)
smoke tests
Diffstat (limited to 'runs/20250910/baseline_eval/main/ppl')
-rw-r--r--runs/20250910/baseline_eval/main/ppl/metrics.json6
-rw-r--r--runs/20250910/baseline_eval/main/ppl/preds.jsonl5
2 files changed, 11 insertions, 0 deletions
diff --git a/runs/20250910/baseline_eval/main/ppl/metrics.json b/runs/20250910/baseline_eval/main/ppl/metrics.json
new file mode 100644
index 0000000..f699464
--- /dev/null
+++ b/runs/20250910/baseline_eval/main/ppl/metrics.json
@@ -0,0 +1,6 @@
+{
+ "timestamp": "2025-09-10 12:07:03",
+ "count": 5,
+ "tokens": 78,
+ "ppl": 30.860812633259105
+} \ No newline at end of file
diff --git a/runs/20250910/baseline_eval/main/ppl/preds.jsonl b/runs/20250910/baseline_eval/main/ppl/preds.jsonl
new file mode 100644
index 0000000..5be2275
--- /dev/null
+++ b/runs/20250910/baseline_eval/main/ppl/preds.jsonl
@@ -0,0 +1,5 @@
+{"id": "ppl_0001", "text": "The museum opened its new exhibition on early astronomy, featuring instruments used to chart the night sky.", "nll": 59.5, "tokens": 18}
+{"id": "ppl_0002", "text": "After several failed attempts, the startup finally found product-market fit and began to scale operations.", "nll": 42.75, "tokens": 17}
+{"id": "ppl_0003", "text": "The committee released a summary outlining the proposed changes to the academic calendar.", "nll": 50.5, "tokens": 13}
+{"id": "ppl_0004", "text": "When the storm subsided, volunteers coordinated to clear debris and restore power to the neighborhood.", "nll": 52.25, "tokens": 17}
+{"id": "ppl_0005", "text": "The journal article argued that measurement error can significantly bias small-sample estimates.", "nll": 62.5, "tokens": 13}