From 896df7f11b441a9b8dfa50820024a82884da58d0 Mon Sep 17 00:00:00 2001 From: BLUESKY477 Date: Fri, 22 May 2026 19:23:44 -0500 Subject: Add files via upload --- .../dense_retrieval_baselines/dense_summary.csv | 9 +++++ .../dense_retrieval_baselines/dense_summary.md | 20 ++++++++++ .../review_user_K4/summary.json | 39 ++++++++++++++++++ .../topic_user_K4/summary.json | 39 ++++++++++++++++++ .../diagnostics_full/review_user_K4/summary.json | 25 ++++++++++++ .../diagnostics_full/topic_user_K4/summary.json | 25 ++++++++++++ .../review_user_K4/summary.json | 46 ++++++++++++++++++++++ .../topic_user_K4/summary.json | 46 ++++++++++++++++++++++ 8 files changed, 249 insertions(+) create mode 100644 resulets/outputs/dense_retrieval_baselines/dense_summary.csv create mode 100644 resulets/outputs/dense_retrieval_baselines/dense_summary.md create mode 100644 resulets/outputs/dense_retrieval_baselines/review_user_K4/summary.json create mode 100644 resulets/outputs/dense_retrieval_baselines/topic_user_K4/summary.json create mode 100644 resulets/outputs/diagnostics_full/review_user_K4/summary.json create mode 100644 resulets/outputs/diagnostics_full/topic_user_K4/summary.json create mode 100644 resulets/outputs/injection_ablation_rerun/review_user_K4/summary.json create mode 100644 resulets/outputs/injection_ablation_rerun/topic_user_K4/summary.json (limited to 'resulets/outputs') diff --git a/resulets/outputs/dense_retrieval_baselines/dense_summary.csv b/resulets/outputs/dense_retrieval_baselines/dense_summary.csv new file mode 100644 index 0000000..587b09f --- /dev/null +++ b/resulets/outputs/dense_retrieval_baselines/dense_summary.csv @@ -0,0 +1,9 @@ +task,setting,K,method,model,retrieval_text,year,rougeL,meteor,sfd_nolen,avg_len +review,user,4,dense_minilm_top1,sentence-transformers/all-MiniLM-L6-v2,input_output,MiniLM 2020; Sentence-Transformers checkpoint circa 2021,0.13634167996937627,0.19007702913610236,0.6699762816479236,213.215 +review,user,4,dense_mpnet_top1,sentence-transformers/all-mpnet-base-v2,input_output,MPNet 2020; Sentence-Transformers checkpoint circa 2021,0.1373899379012414,0.19026458136155466,0.7026739815740055,217.525 +review,user,4,dense_e5_top1,intfloat/e5-base-v2,input_output,E5 2022,0.13778466039449483,0.19137571071197623,0.6781906955515157,219.245 +review,user,4,dense_bge_top1,BAAI/bge-base-en-v1.5,input_output,BGE v1.5 2023,0.1397550736294082,0.19234841124713037,0.6609804389020235,214.785 +topic,user,4,dense_minilm_top1,sentence-transformers/all-MiniLM-L6-v2,input_output,MiniLM 2020; Sentence-Transformers checkpoint circa 2021,0.1165623349420512,0.18663326580520853,0.7993059511162214,227.195 +topic,user,4,dense_mpnet_top1,sentence-transformers/all-mpnet-base-v2,input_output,MPNet 2020; Sentence-Transformers checkpoint circa 2021,0.11818723277088544,0.18875778323321796,0.8179664549308402,232.69 +topic,user,4,dense_e5_top1,intfloat/e5-base-v2,input_output,E5 2022,0.11910482780107752,0.18752784709181344,0.8803836118758828,236.06 +topic,user,4,dense_bge_top1,BAAI/bge-base-en-v1.5,input_output,BGE v1.5 2023,0.11914980629806343,0.18572971446713582,0.793203306935874,224.995 diff --git a/resulets/outputs/dense_retrieval_baselines/dense_summary.md b/resulets/outputs/dense_retrieval_baselines/dense_summary.md new file mode 100644 index 0000000..54529fd --- /dev/null +++ b/resulets/outputs/dense_retrieval_baselines/dense_summary.md @@ -0,0 +1,20 @@ +# Dense Retrieval Baseline Summary + +All runs use `K=4`, greedy decoding, and `support_input + support_output` as the retrieval passage text. + +| Task | Method | Model | Year | ROUGE-L | METEOR | SFD_-len | Avg Len | +|---|---|---|---|---:|---:|---:|---:| +| topic_user | dense_minilm_top1 | sentence-transformers/all-MiniLM-L6-v2 | MiniLM 2020; ST checkpoint circa 2021 | 0.1166 | 0.1866 | 0.7993 | 227.2 | +| topic_user | dense_mpnet_top1 | sentence-transformers/all-mpnet-base-v2 | MPNet 2020; ST checkpoint circa 2021 | 0.1182 | 0.1888 | 0.8180 | 232.7 | +| topic_user | dense_e5_top1 | intfloat/e5-base-v2 | E5 2022 | 0.1191 | 0.1875 | 0.8804 | 236.1 | +| topic_user | dense_bge_top1 | BAAI/bge-base-en-v1.5 | BGE v1.5 2023 | 0.1191 | 0.1857 | 0.7932 | 225.0 | +| review_user | dense_minilm_top1 | sentence-transformers/all-MiniLM-L6-v2 | MiniLM 2020; ST checkpoint circa 2021 | 0.1363 | 0.1901 | 0.6700 | 213.2 | +| review_user | dense_mpnet_top1 | sentence-transformers/all-mpnet-base-v2 | MPNet 2020; ST checkpoint circa 2021 | 0.1374 | 0.1903 | 0.7027 | 217.5 | +| review_user | dense_e5_top1 | intfloat/e5-base-v2 | E5 2022 | 0.1378 | 0.1914 | 0.6782 | 219.2 | +| review_user | dense_bge_top1 | BAAI/bge-base-en-v1.5 | BGE v1.5 2023 | 0.1398 | 0.1923 | 0.6610 | 214.8 | + +Best dense retriever: + +- `topic_user`: `dense_bge_top1` by ROUGE-L, essentially tied with `dense_e5_top1`. +- `review_user`: `dense_bge_top1`. + diff --git a/resulets/outputs/dense_retrieval_baselines/review_user_K4/summary.json b/resulets/outputs/dense_retrieval_baselines/review_user_K4/summary.json new file mode 100644 index 0000000..08e7540 --- /dev/null +++ b/resulets/outputs/dense_retrieval_baselines/review_user_K4/summary.json @@ -0,0 +1,39 @@ +{ + "aggregate": { + "dense_minilm_top1": { + "rougeL": 0.13634167996937627, + "meteor": 0.19007702913610236, + "sfd_nolen": 0.6699762816479236, + "avg_len": 213.215 + }, + "dense_mpnet_top1": { + "rougeL": 0.1373899379012414, + "meteor": 0.19026458136155466, + "sfd_nolen": 0.7026739815740055, + "avg_len": 217.525 + }, + "dense_e5_top1": { + "rougeL": 0.13778466039449483, + "meteor": 0.19137571071197623, + "sfd_nolen": 0.6781906955515157, + "avg_len": 219.245 + }, + "dense_bge_top1": { + "rougeL": 0.1397550736294082, + "meteor": 0.19234841124713037, + "sfd_nolen": 0.6609804389020235, + "avg_len": 214.785 + } + }, + "significance": {}, + "num_examples": 200, + "task": "review", + "setting": "user", + "K": 4, + "methods": [ + "dense_minilm_top1", + "dense_mpnet_top1", + "dense_e5_top1", + "dense_bge_top1" + ] +} \ No newline at end of file diff --git a/resulets/outputs/dense_retrieval_baselines/topic_user_K4/summary.json b/resulets/outputs/dense_retrieval_baselines/topic_user_K4/summary.json new file mode 100644 index 0000000..bd2cdd8 --- /dev/null +++ b/resulets/outputs/dense_retrieval_baselines/topic_user_K4/summary.json @@ -0,0 +1,39 @@ +{ + "aggregate": { + "dense_minilm_top1": { + "rougeL": 0.1165623349420512, + "meteor": 0.18663326580520853, + "sfd_nolen": 0.7993059511162214, + "avg_len": 227.195 + }, + "dense_mpnet_top1": { + "rougeL": 0.11818723277088544, + "meteor": 0.18875778323321796, + "sfd_nolen": 0.8179664549308402, + "avg_len": 232.69 + }, + "dense_e5_top1": { + "rougeL": 0.11910482780107752, + "meteor": 0.18752784709181344, + "sfd_nolen": 0.8803836118758828, + "avg_len": 236.06 + }, + "dense_bge_top1": { + "rougeL": 0.11914980629806343, + "meteor": 0.18572971446713582, + "sfd_nolen": 0.793203306935874, + "avg_len": 224.995 + } + }, + "significance": {}, + "num_examples": 200, + "task": "topic", + "setting": "user", + "K": 4, + "methods": [ + "dense_minilm_top1", + "dense_mpnet_top1", + "dense_e5_top1", + "dense_bge_top1" + ] +} \ No newline at end of file diff --git a/resulets/outputs/diagnostics_full/review_user_K4/summary.json b/resulets/outputs/diagnostics_full/review_user_K4/summary.json new file mode 100644 index 0000000..e017734 --- /dev/null +++ b/resulets/outputs/diagnostics_full/review_user_K4/summary.json @@ -0,0 +1,25 @@ +{ + "aggregate": { + "user_unigram_bias": { + "rougeL": 0.12527160864644804, + "meteor": 0.15533419842583318, + "sfd_nolen": 0.9565568018108134, + "avg_len": 166.495 + }, + "learned_sparse_logit_bias": { + "rougeL": 0.12442403714079388, + "meteor": 0.15573424774252828, + "sfd_nolen": 0.9361127095590612, + "avg_len": 165.905 + } + }, + "significance": {}, + "num_examples": 200, + "task": "review", + "setting": "user", + "K": 4, + "methods": [ + "user_unigram_bias", + "learned_sparse_logit_bias" + ] +} \ No newline at end of file diff --git a/resulets/outputs/diagnostics_full/topic_user_K4/summary.json b/resulets/outputs/diagnostics_full/topic_user_K4/summary.json new file mode 100644 index 0000000..b3b82d4 --- /dev/null +++ b/resulets/outputs/diagnostics_full/topic_user_K4/summary.json @@ -0,0 +1,25 @@ +{ + "aggregate": { + "user_unigram_bias": { + "rougeL": 0.11952007854701062, + "meteor": 0.20468026869316788, + "sfd_nolen": 1.045830797035999, + "avg_len": 247.045 + }, + "learned_sparse_logit_bias": { + "rougeL": 0.11851260526759347, + "meteor": 0.20384780674291916, + "sfd_nolen": 0.891284645760399, + "avg_len": 246.92 + } + }, + "significance": {}, + "num_examples": 200, + "task": "topic", + "setting": "user", + "K": 4, + "methods": [ + "user_unigram_bias", + "learned_sparse_logit_bias" + ] +} \ No newline at end of file diff --git a/resulets/outputs/injection_ablation_rerun/review_user_K4/summary.json b/resulets/outputs/injection_ablation_rerun/review_user_K4/summary.json new file mode 100644 index 0000000..4f1bce5 --- /dev/null +++ b/resulets/outputs/injection_ablation_rerun/review_user_K4/summary.json @@ -0,0 +1,46 @@ +{ + "aggregate": { + "uph": { + "rougeL": 0.12591913138908858, + "meteor": 0.15704431994591794, + "sfd_nolen": 0.9380754971612366, + "avg_len": 165.04 + }, + "lm_head_update": { + "rougeL": 0.1381619922784921, + "meteor": 0.14988041373383443, + "sfd_nolen": 1.2312511738320773, + "avg_len": 142.135 + } + }, + "significance": { + "lm_head_update": { + "rougeL": { + "mean_a": 0.12591913138908858, + "mean_b": 0.1381619922784921, + "mean_diff": -0.012242860889403491, + "ci_low": -0.015769665293717965, + "ci_high": -0.008716056485089017, + "t_pval": 1.1679421884663955e-10, + "w_pval": 5.342212114561821e-11 + }, + "sfd_nolen": { + "mean_a": 0.9380754971612366, + "mean_b": 1.2312511738320773, + "mean_diff": -0.2931756766708408, + "ci_low": -1.0036757003149965, + "ci_high": 0.41732434697331494, + "t_pval": 0.41961878992446333, + "w_pval": 0.04049481176403265 + } + } + }, + "num_examples": 200, + "task": "review", + "setting": "user", + "K": 4, + "methods": [ + "uph", + "lm_head_update" + ] +} \ No newline at end of file diff --git a/resulets/outputs/injection_ablation_rerun/topic_user_K4/summary.json b/resulets/outputs/injection_ablation_rerun/topic_user_K4/summary.json new file mode 100644 index 0000000..47e5734 --- /dev/null +++ b/resulets/outputs/injection_ablation_rerun/topic_user_K4/summary.json @@ -0,0 +1,46 @@ +{ + "aggregate": { + "uph": { + "rougeL": 0.11947665707568338, + "meteor": 0.2031354029453746, + "sfd_nolen": 0.8995390462886158, + "avg_len": 246.47 + }, + "lm_head_update": { + "rougeL": 0.12993177009628162, + "meteor": 0.19363473440376885, + "sfd_nolen": 0.9707037426803997, + "avg_len": 253.99 + } + }, + "significance": { + "lm_head_update": { + "rougeL": { + "mean_a": 0.11947665707568338, + "mean_b": 0.12993177009628162, + "mean_diff": -0.010455113020598263, + "ci_low": -0.013828552374635705, + "ci_high": -0.0070816736665608206, + "t_pval": 6.213636827361804e-09, + "w_pval": 1.1330271751337874e-11 + }, + "sfd_nolen": { + "mean_a": 0.8995390462886158, + "mean_b": 0.9707037426803997, + "mean_diff": -0.07116469639178391, + "ci_low": -0.1589445052822547, + "ci_high": 0.016615112498686885, + "t_pval": 0.1136461024786351, + "w_pval": 0.7252815218848135 + } + } + }, + "num_examples": 200, + "task": "topic", + "setting": "user", + "K": 4, + "methods": [ + "uph", + "lm_head_update" + ] +} \ No newline at end of file -- cgit v1.2.3