summaryrefslogtreecommitdiff
path: root/data
diff options
context:
space:
mode:
Diffstat (limited to 'data')
-rw-r--r--data/processed/grid_search_results.json449
-rw-r--r--data/processed/highbeta_grid_results.json828
-rw-r--r--data/processed/residual_grid_results.json1016
3 files changed, 2293 insertions, 0 deletions
diff --git a/data/processed/grid_search_results.json b/data/processed/grid_search_results.json
new file mode 100644
index 0000000..cdbd3ca
--- /dev/null
+++ b/data/processed/grid_search_results.json
@@ -0,0 +1,449 @@
+{
+ "meta": {
+ "grid_size": 42,
+ "n_questions": 100,
+ "total_grid_evaluations": 4200,
+ "unique_llm_calls": 281,
+ "faiss_llm_calls": 100,
+ "total_llm_calls": 381,
+ "savings_pct": 91.1,
+ "retrieval_time_s": 0.93,
+ "generation_time_s": 735.92,
+ "total_time_s": 1049.0
+ },
+ "faiss_baseline": {
+ "em": 0.32,
+ "f1": 0.4380753968253968
+ },
+ "grid_results": [
+ {
+ "beta": 0.25,
+ "max_iter": 1,
+ "em": 0.14,
+ "f1": 0.1877,
+ "avg_entropy": 7.1785,
+ "avg_energy_gap": 2.7302,
+ "avg_faiss_overlap": 0.002,
+ "avg_steps": 1.0
+ },
+ {
+ "beta": 0.25,
+ "max_iter": 2,
+ "em": 0.14,
+ "f1": 0.1877,
+ "avg_entropy": 7.1785,
+ "avg_energy_gap": 2.7302,
+ "avg_faiss_overlap": 0.002,
+ "avg_steps": 2.0
+ },
+ {
+ "beta": 0.25,
+ "max_iter": 3,
+ "em": 0.14,
+ "f1": 0.1877,
+ "avg_entropy": 7.1785,
+ "avg_energy_gap": 2.7302,
+ "avg_faiss_overlap": 0.002,
+ "avg_steps": 3.0
+ },
+ {
+ "beta": 0.25,
+ "max_iter": 5,
+ "em": 0.14,
+ "f1": 0.1877,
+ "avg_entropy": 7.1785,
+ "avg_energy_gap": 2.7302,
+ "avg_faiss_overlap": 0.002,
+ "avg_steps": 3.0
+ },
+ {
+ "beta": 0.25,
+ "max_iter": 8,
+ "em": 0.14,
+ "f1": 0.1877,
+ "avg_entropy": 7.1785,
+ "avg_energy_gap": 2.7302,
+ "avg_faiss_overlap": 0.002,
+ "avg_steps": 3.0
+ },
+ {
+ "beta": 0.25,
+ "max_iter": 15,
+ "em": 0.14,
+ "f1": 0.1877,
+ "avg_entropy": 7.1785,
+ "avg_energy_gap": 2.7302,
+ "avg_faiss_overlap": 0.002,
+ "avg_steps": 3.0
+ },
+ {
+ "beta": 0.5,
+ "max_iter": 1,
+ "em": 0.14,
+ "f1": 0.1877,
+ "avg_entropy": 7.1784,
+ "avg_energy_gap": 2.7292,
+ "avg_faiss_overlap": 0.002,
+ "avg_steps": 1.0
+ },
+ {
+ "beta": 0.5,
+ "max_iter": 2,
+ "em": 0.14,
+ "f1": 0.1877,
+ "avg_entropy": 7.1784,
+ "avg_energy_gap": 2.7292,
+ "avg_faiss_overlap": 0.002,
+ "avg_steps": 2.0
+ },
+ {
+ "beta": 0.5,
+ "max_iter": 3,
+ "em": 0.14,
+ "f1": 0.1877,
+ "avg_entropy": 7.1784,
+ "avg_energy_gap": 2.7292,
+ "avg_faiss_overlap": 0.002,
+ "avg_steps": 3.0
+ },
+ {
+ "beta": 0.5,
+ "max_iter": 5,
+ "em": 0.14,
+ "f1": 0.1877,
+ "avg_entropy": 7.1784,
+ "avg_energy_gap": 2.7292,
+ "avg_faiss_overlap": 0.002,
+ "avg_steps": 3.0
+ },
+ {
+ "beta": 0.5,
+ "max_iter": 8,
+ "em": 0.14,
+ "f1": 0.1877,
+ "avg_entropy": 7.1784,
+ "avg_energy_gap": 2.7292,
+ "avg_faiss_overlap": 0.002,
+ "avg_steps": 3.0
+ },
+ {
+ "beta": 0.5,
+ "max_iter": 15,
+ "em": 0.14,
+ "f1": 0.1877,
+ "avg_entropy": 7.1784,
+ "avg_energy_gap": 2.7292,
+ "avg_faiss_overlap": 0.002,
+ "avg_steps": 3.0
+ },
+ {
+ "beta": 1.0,
+ "max_iter": 1,
+ "em": 0.14,
+ "f1": 0.1877,
+ "avg_entropy": 7.1781,
+ "avg_energy_gap": 2.7273,
+ "avg_faiss_overlap": 0.002,
+ "avg_steps": 1.0
+ },
+ {
+ "beta": 1.0,
+ "max_iter": 2,
+ "em": 0.14,
+ "f1": 0.1877,
+ "avg_entropy": 7.1781,
+ "avg_energy_gap": 2.7273,
+ "avg_faiss_overlap": 0.002,
+ "avg_steps": 2.0
+ },
+ {
+ "beta": 1.0,
+ "max_iter": 3,
+ "em": 0.14,
+ "f1": 0.1877,
+ "avg_entropy": 7.1781,
+ "avg_energy_gap": 2.7273,
+ "avg_faiss_overlap": 0.002,
+ "avg_steps": 3.0
+ },
+ {
+ "beta": 1.0,
+ "max_iter": 5,
+ "em": 0.14,
+ "f1": 0.1877,
+ "avg_entropy": 7.1781,
+ "avg_energy_gap": 2.7273,
+ "avg_faiss_overlap": 0.002,
+ "avg_steps": 4.0
+ },
+ {
+ "beta": 1.0,
+ "max_iter": 8,
+ "em": 0.14,
+ "f1": 0.1877,
+ "avg_entropy": 7.1781,
+ "avg_energy_gap": 2.7273,
+ "avg_faiss_overlap": 0.002,
+ "avg_steps": 4.0
+ },
+ {
+ "beta": 1.0,
+ "max_iter": 15,
+ "em": 0.14,
+ "f1": 0.1877,
+ "avg_entropy": 7.1781,
+ "avg_energy_gap": 2.7273,
+ "avg_faiss_overlap": 0.002,
+ "avg_steps": 4.0
+ },
+ {
+ "beta": 2.0,
+ "max_iter": 1,
+ "em": 0.15,
+ "f1": 0.1977,
+ "avg_entropy": 7.1767,
+ "avg_energy_gap": 2.7234,
+ "avg_faiss_overlap": 0.004,
+ "avg_steps": 1.0
+ },
+ {
+ "beta": 2.0,
+ "max_iter": 2,
+ "em": 0.14,
+ "f1": 0.1877,
+ "avg_entropy": 7.1767,
+ "avg_energy_gap": 2.7235,
+ "avg_faiss_overlap": 0.002,
+ "avg_steps": 2.0
+ },
+ {
+ "beta": 2.0,
+ "max_iter": 3,
+ "em": 0.14,
+ "f1": 0.1877,
+ "avg_entropy": 7.1767,
+ "avg_energy_gap": 2.7235,
+ "avg_faiss_overlap": 0.002,
+ "avg_steps": 3.0
+ },
+ {
+ "beta": 2.0,
+ "max_iter": 5,
+ "em": 0.14,
+ "f1": 0.1877,
+ "avg_entropy": 7.1767,
+ "avg_energy_gap": 2.7235,
+ "avg_faiss_overlap": 0.002,
+ "avg_steps": 4.0
+ },
+ {
+ "beta": 2.0,
+ "max_iter": 8,
+ "em": 0.14,
+ "f1": 0.1877,
+ "avg_entropy": 7.1767,
+ "avg_energy_gap": 2.7235,
+ "avg_faiss_overlap": 0.002,
+ "avg_steps": 4.0
+ },
+ {
+ "beta": 2.0,
+ "max_iter": 15,
+ "em": 0.14,
+ "f1": 0.1877,
+ "avg_entropy": 7.1767,
+ "avg_energy_gap": 2.7235,
+ "avg_faiss_overlap": 0.002,
+ "avg_steps": 4.0
+ },
+ {
+ "beta": 3.0,
+ "max_iter": 1,
+ "em": 0.14,
+ "f1": 0.2016,
+ "avg_entropy": 7.1742,
+ "avg_energy_gap": 2.7193,
+ "avg_faiss_overlap": 0.006,
+ "avg_steps": 1.0
+ },
+ {
+ "beta": 3.0,
+ "max_iter": 2,
+ "em": 0.14,
+ "f1": 0.1877,
+ "avg_entropy": 7.1742,
+ "avg_energy_gap": 2.7196,
+ "avg_faiss_overlap": 0.002,
+ "avg_steps": 2.0
+ },
+ {
+ "beta": 3.0,
+ "max_iter": 3,
+ "em": 0.14,
+ "f1": 0.1877,
+ "avg_entropy": 7.1742,
+ "avg_energy_gap": 2.7196,
+ "avg_faiss_overlap": 0.002,
+ "avg_steps": 3.0
+ },
+ {
+ "beta": 3.0,
+ "max_iter": 5,
+ "em": 0.14,
+ "f1": 0.1877,
+ "avg_entropy": 7.1742,
+ "avg_energy_gap": 2.7196,
+ "avg_faiss_overlap": 0.002,
+ "avg_steps": 5.0
+ },
+ {
+ "beta": 3.0,
+ "max_iter": 8,
+ "em": 0.14,
+ "f1": 0.1877,
+ "avg_entropy": 7.1742,
+ "avg_energy_gap": 2.7196,
+ "avg_faiss_overlap": 0.002,
+ "avg_steps": 5.0
+ },
+ {
+ "beta": 3.0,
+ "max_iter": 15,
+ "em": 0.14,
+ "f1": 0.1877,
+ "avg_entropy": 7.1742,
+ "avg_energy_gap": 2.7196,
+ "avg_faiss_overlap": 0.002,
+ "avg_steps": 5.0
+ },
+ {
+ "beta": 5.0,
+ "max_iter": 1,
+ "em": 0.16,
+ "f1": 0.2207,
+ "avg_entropy": 7.1659,
+ "avg_energy_gap": 2.7105,
+ "avg_faiss_overlap": 0.018,
+ "avg_steps": 1.0
+ },
+ {
+ "beta": 5.0,
+ "max_iter": 2,
+ "em": 0.14,
+ "f1": 0.1877,
+ "avg_entropy": 7.1661,
+ "avg_energy_gap": 2.7114,
+ "avg_faiss_overlap": 0.002,
+ "avg_steps": 2.0
+ },
+ {
+ "beta": 5.0,
+ "max_iter": 3,
+ "em": 0.14,
+ "f1": 0.1877,
+ "avg_entropy": 7.1661,
+ "avg_energy_gap": 2.7114,
+ "avg_faiss_overlap": 0.002,
+ "avg_steps": 3.0
+ },
+ {
+ "beta": 5.0,
+ "max_iter": 5,
+ "em": 0.14,
+ "f1": 0.1877,
+ "avg_entropy": 7.1661,
+ "avg_energy_gap": 2.7114,
+ "avg_faiss_overlap": 0.002,
+ "avg_steps": 5.0
+ },
+ {
+ "beta": 5.0,
+ "max_iter": 8,
+ "em": 0.14,
+ "f1": 0.1877,
+ "avg_entropy": 7.1661,
+ "avg_energy_gap": 2.7114,
+ "avg_faiss_overlap": 0.002,
+ "avg_steps": 5.0
+ },
+ {
+ "beta": 5.0,
+ "max_iter": 15,
+ "em": 0.14,
+ "f1": 0.1877,
+ "avg_entropy": 7.1661,
+ "avg_energy_gap": 2.7114,
+ "avg_faiss_overlap": 0.002,
+ "avg_steps": 5.0
+ },
+ {
+ "beta": 8.0,
+ "max_iter": 1,
+ "em": 0.21,
+ "f1": 0.2938,
+ "avg_entropy": 7.1438,
+ "avg_energy_gap": 2.6938,
+ "avg_faiss_overlap": 0.068,
+ "avg_steps": 1.0
+ },
+ {
+ "beta": 8.0,
+ "max_iter": 2,
+ "em": 0.12,
+ "f1": 0.1766,
+ "avg_entropy": 7.145,
+ "avg_energy_gap": 2.6972,
+ "avg_faiss_overlap": 0.004,
+ "avg_steps": 2.0
+ },
+ {
+ "beta": 8.0,
+ "max_iter": 3,
+ "em": 0.14,
+ "f1": 0.1877,
+ "avg_entropy": 7.1449,
+ "avg_energy_gap": 2.6972,
+ "avg_faiss_overlap": 0.002,
+ "avg_steps": 3.0
+ },
+ {
+ "beta": 8.0,
+ "max_iter": 5,
+ "em": 0.14,
+ "f1": 0.1877,
+ "avg_entropy": 7.1448,
+ "avg_energy_gap": 2.6972,
+ "avg_faiss_overlap": 0.002,
+ "avg_steps": 5.0
+ },
+ {
+ "beta": 8.0,
+ "max_iter": 8,
+ "em": 0.14,
+ "f1": 0.1877,
+ "avg_entropy": 7.1448,
+ "avg_energy_gap": 2.6972,
+ "avg_faiss_overlap": 0.002,
+ "avg_steps": 7.0
+ },
+ {
+ "beta": 8.0,
+ "max_iter": 15,
+ "em": 0.14,
+ "f1": 0.1877,
+ "avg_entropy": 7.1448,
+ "avg_energy_gap": 2.6972,
+ "avg_faiss_overlap": 0.002,
+ "avg_steps": 7.0
+ }
+ ],
+ "best_config": {
+ "beta": 8.0,
+ "max_iter": 1,
+ "em": 0.21,
+ "f1": 0.2938,
+ "avg_entropy": 7.1438,
+ "avg_energy_gap": 2.6938,
+ "avg_faiss_overlap": 0.068
+ }
+} \ No newline at end of file
diff --git a/data/processed/highbeta_grid_results.json b/data/processed/highbeta_grid_results.json
new file mode 100644
index 0000000..a04895a
--- /dev/null
+++ b/data/processed/highbeta_grid_results.json
@@ -0,0 +1,828 @@
+{
+ "meta": {
+ "n_questions": 100,
+ "total_configs": 105,
+ "unique_llm_calls": 1379,
+ "total_time_s": 4571.0
+ },
+ "faiss_baseline": {
+ "em": 0.32,
+ "f1": 0.4381
+ },
+ "grid_results": [
+ {
+ "config": "\u03b2=20.0_iter=1_standard",
+ "em": 0.38,
+ "f1": 0.4691,
+ "avg_faiss_overlap": 0.48,
+ "avg_entropy": 4.5305
+ },
+ {
+ "config": "\u03b2=50.0_iter=1_standard",
+ "em": 0.36,
+ "f1": 0.4565,
+ "avg_faiss_overlap": 0.508,
+ "avg_entropy": 0.3196
+ },
+ {
+ "config": "\u03b2=20.0_iter=1_residual_0.9",
+ "em": 0.34,
+ "f1": 0.4552,
+ "avg_faiss_overlap": 0.966,
+ "avg_entropy": 3.5526
+ },
+ {
+ "config": "\u03b2=20.0_iter=2_residual_0.95",
+ "em": 0.34,
+ "f1": 0.4552,
+ "avg_faiss_overlap": 0.966,
+ "avg_entropy": 3.5503
+ },
+ {
+ "config": "\u03b2=500.0_iter=1_residual_0.9",
+ "em": 0.36,
+ "f1": 0.4545,
+ "avg_faiss_overlap": 0.692,
+ "avg_entropy": 0.0074
+ },
+ {
+ "config": "\u03b2=50.0_iter=1_normalized",
+ "em": 0.37,
+ "f1": 0.4539,
+ "avg_faiss_overlap": 0.464,
+ "avg_entropy": 1.7333
+ },
+ {
+ "config": "\u03b2=500.0_iter=1_residual_0.95",
+ "em": 0.36,
+ "f1": 0.4536,
+ "avg_faiss_overlap": 0.748,
+ "avg_entropy": 0.013
+ },
+ {
+ "config": "\u03b2=20.0_iter=1_residual_0.95",
+ "em": 0.34,
+ "f1": 0.4511,
+ "avg_faiss_overlap": 0.98,
+ "avg_entropy": 3.5011
+ },
+ {
+ "config": "\u03b2=50.0_iter=2_normalized",
+ "em": 0.37,
+ "f1": 0.4498,
+ "avg_faiss_overlap": 0.38,
+ "avg_entropy": 1.0954
+ },
+ {
+ "config": "\u03b2=20.0_iter=3_residual_0.95",
+ "em": 0.32,
+ "f1": 0.4494,
+ "avg_faiss_overlap": 0.946,
+ "avg_entropy": 3.5994
+ },
+ {
+ "config": "\u03b2=50.0_iter=1_residual_0.95",
+ "em": 0.34,
+ "f1": 0.4486,
+ "avg_faiss_overlap": 0.974,
+ "avg_entropy": 0.677
+ },
+ {
+ "config": "\u03b2=100.0_iter=1_residual_0.95",
+ "em": 0.34,
+ "f1": 0.4464,
+ "avg_faiss_overlap": 0.97,
+ "avg_entropy": 0.2081
+ },
+ {
+ "config": "\u03b2=200.0_iter=1_residual_0.95",
+ "em": 0.34,
+ "f1": 0.4464,
+ "avg_faiss_overlap": 0.968,
+ "avg_entropy": 0.0722
+ },
+ {
+ "config": "\u03b2=100.0_iter=1_normalized",
+ "em": 0.35,
+ "f1": 0.446,
+ "avg_faiss_overlap": 0.508,
+ "avg_entropy": 0.1139
+ },
+ {
+ "config": "\u03b2=500.0_iter=2_residual_0.95",
+ "em": 0.35,
+ "f1": 0.4445,
+ "avg_faiss_overlap": 0.692,
+ "avg_entropy": 0.0037
+ },
+ {
+ "config": "\u03b2=50.0_iter=2_residual_0.9",
+ "em": 0.33,
+ "f1": 0.4441,
+ "avg_faiss_overlap": 0.886,
+ "avg_entropy": 0.4749
+ },
+ {
+ "config": "\u03b2=100.0_iter=2_residual_0.9",
+ "em": 0.33,
+ "f1": 0.4441,
+ "avg_faiss_overlap": 0.878,
+ "avg_entropy": 0.0623
+ },
+ {
+ "config": "\u03b2=50.0_iter=8_residual_0.95",
+ "em": 0.32,
+ "f1": 0.4431,
+ "avg_faiss_overlap": 0.808,
+ "avg_entropy": 0.2083
+ },
+ {
+ "config": "\u03b2=100.0_iter=8_residual_0.95",
+ "em": 0.32,
+ "f1": 0.4431,
+ "avg_faiss_overlap": 0.794,
+ "avg_entropy": 0.0019
+ },
+ {
+ "config": "\u03b2=100.0_iter=3_residual_0.95",
+ "em": 0.33,
+ "f1": 0.4427,
+ "avg_faiss_overlap": 0.9,
+ "avg_entropy": 0.0754
+ },
+ {
+ "config": "\u03b2=20.0_iter=8_residual_0.95",
+ "em": 0.33,
+ "f1": 0.442,
+ "avg_faiss_overlap": 0.878,
+ "avg_entropy": 3.8365
+ },
+ {
+ "config": "\u03b2=50.0_iter=1_residual_0.9",
+ "em": 0.32,
+ "f1": 0.4419,
+ "avg_faiss_overlap": 0.946,
+ "avg_entropy": 0.6043
+ },
+ {
+ "config": "\u03b2=50.0_iter=2_residual_0.95",
+ "em": 0.32,
+ "f1": 0.4419,
+ "avg_faiss_overlap": 0.944,
+ "avg_entropy": 0.5941
+ },
+ {
+ "config": "\u03b2=50.0_iter=5_residual_0.95",
+ "em": 0.32,
+ "f1": 0.4407,
+ "avg_faiss_overlap": 0.87,
+ "avg_entropy": 0.3877
+ },
+ {
+ "config": "\u03b2=200.0_iter=1_normalized",
+ "em": 0.33,
+ "f1": 0.4404,
+ "avg_faiss_overlap": 0.484,
+ "avg_entropy": 0.0109
+ },
+ {
+ "config": "\u03b2=500.0_iter=5_residual_0.95",
+ "em": 0.35,
+ "f1": 0.4404,
+ "avg_faiss_overlap": 0.546,
+ "avg_entropy": 0.0
+ },
+ {
+ "config": "\u03b2=50.0_iter=8_residual_0.9",
+ "em": 0.34,
+ "f1": 0.4402,
+ "avg_faiss_overlap": 0.666,
+ "avg_entropy": 0.0306
+ },
+ {
+ "config": "\u03b2=100.0_iter=1_standard",
+ "em": 0.33,
+ "f1": 0.44,
+ "avg_faiss_overlap": 0.464,
+ "avg_entropy": 0.0196
+ },
+ {
+ "config": "\u03b2=100.0_iter=1_residual_0.9",
+ "em": 0.32,
+ "f1": 0.4397,
+ "avg_faiss_overlap": 0.932,
+ "avg_entropy": 0.1471
+ },
+ {
+ "config": "\u03b2=100.0_iter=2_residual_0.95",
+ "em": 0.32,
+ "f1": 0.4397,
+ "avg_faiss_overlap": 0.932,
+ "avg_entropy": 0.1268
+ },
+ {
+ "config": "\u03b2=100.0_iter=5_residual_0.95",
+ "em": 0.32,
+ "f1": 0.4391,
+ "avg_faiss_overlap": 0.858,
+ "avg_entropy": 0.019
+ },
+ {
+ "config": "\u03b2=20.0_iter=0_standard",
+ "em": 0.32,
+ "f1": 0.4381,
+ "avg_faiss_overlap": 1.0,
+ "avg_entropy": 3.452
+ },
+ {
+ "config": "\u03b2=50.0_iter=0_standard",
+ "em": 0.32,
+ "f1": 0.4381,
+ "avg_faiss_overlap": 1.0,
+ "avg_entropy": 0.7723
+ },
+ {
+ "config": "\u03b2=50.0_iter=5_standard",
+ "em": 0.33,
+ "f1": 0.4381,
+ "avg_faiss_overlap": 0.408,
+ "avg_entropy": 0.0
+ },
+ {
+ "config": "\u03b2=50.0_iter=8_standard",
+ "em": 0.33,
+ "f1": 0.4381,
+ "avg_faiss_overlap": 0.408,
+ "avg_entropy": 0.0
+ },
+ {
+ "config": "\u03b2=50.0_iter=8_normalized",
+ "em": 0.34,
+ "f1": 0.4381,
+ "avg_faiss_overlap": 0.358,
+ "avg_entropy": 0.0
+ },
+ {
+ "config": "\u03b2=100.0_iter=0_standard",
+ "em": 0.32,
+ "f1": 0.4381,
+ "avg_faiss_overlap": 1.0,
+ "avg_entropy": 0.3128
+ },
+ {
+ "config": "\u03b2=100.0_iter=2_normalized",
+ "em": 0.33,
+ "f1": 0.4381,
+ "avg_faiss_overlap": 0.422,
+ "avg_entropy": 0.0143
+ },
+ {
+ "config": "\u03b2=100.0_iter=3_normalized",
+ "em": 0.33,
+ "f1": 0.4381,
+ "avg_faiss_overlap": 0.412,
+ "avg_entropy": 0.0
+ },
+ {
+ "config": "\u03b2=100.0_iter=5_normalized",
+ "em": 0.33,
+ "f1": 0.4381,
+ "avg_faiss_overlap": 0.41,
+ "avg_entropy": 0.0
+ },
+ {
+ "config": "\u03b2=100.0_iter=8_normalized",
+ "em": 0.33,
+ "f1": 0.4381,
+ "avg_faiss_overlap": 0.41,
+ "avg_entropy": 0.0
+ },
+ {
+ "config": "\u03b2=200.0_iter=0_standard",
+ "em": 0.32,
+ "f1": 0.4381,
+ "avg_faiss_overlap": 1.0,
+ "avg_entropy": 0.1535
+ },
+ {
+ "config": "\u03b2=50.0_iter=3_residual_0.95",
+ "em": 0.32,
+ "f1": 0.4377,
+ "avg_faiss_overlap": 0.912,
+ "avg_entropy": 0.5215
+ },
+ {
+ "config": "\u03b2=20.0_iter=5_residual_0.9",
+ "em": 0.32,
+ "f1": 0.437,
+ "avg_faiss_overlap": 0.846,
+ "avg_entropy": 3.9311
+ },
+ {
+ "config": "\u03b2=20.0_iter=2_residual_0.9",
+ "em": 0.31,
+ "f1": 0.4349,
+ "avg_faiss_overlap": 0.926,
+ "avg_entropy": 3.6521
+ },
+ {
+ "config": "\u03b2=200.0_iter=1_residual_0.9",
+ "em": 0.32,
+ "f1": 0.4347,
+ "avg_faiss_overlap": 0.928,
+ "avg_entropy": 0.0466
+ },
+ {
+ "config": "\u03b2=200.0_iter=2_residual_0.95",
+ "em": 0.32,
+ "f1": 0.4347,
+ "avg_faiss_overlap": 0.928,
+ "avg_entropy": 0.0274
+ },
+ {
+ "config": "\u03b2=50.0_iter=3_residual_0.9",
+ "em": 0.31,
+ "f1": 0.4344,
+ "avg_faiss_overlap": 0.842,
+ "avg_entropy": 0.3574
+ },
+ {
+ "config": "\u03b2=200.0_iter=2_residual_0.9",
+ "em": 0.32,
+ "f1": 0.4341,
+ "avg_faiss_overlap": 0.876,
+ "avg_entropy": 0.008
+ },
+ {
+ "config": "\u03b2=200.0_iter=5_residual_0.95",
+ "em": 0.31,
+ "f1": 0.4341,
+ "avg_faiss_overlap": 0.852,
+ "avg_entropy": 0.0005
+ },
+ {
+ "config": "\u03b2=200.0_iter=1_standard",
+ "em": 0.33,
+ "f1": 0.4331,
+ "avg_faiss_overlap": 0.43,
+ "avg_entropy": 0.0061
+ },
+ {
+ "config": "\u03b2=200.0_iter=3_residual_0.95",
+ "em": 0.32,
+ "f1": 0.4327,
+ "avg_faiss_overlap": 0.898,
+ "avg_entropy": 0.0082
+ },
+ {
+ "config": "\u03b2=20.0_iter=3_residual_0.9",
+ "em": 0.31,
+ "f1": 0.4313,
+ "avg_faiss_overlap": 0.9,
+ "avg_entropy": 3.7492
+ },
+ {
+ "config": "\u03b2=500.0_iter=3_residual_0.95",
+ "em": 0.35,
+ "f1": 0.4312,
+ "avg_faiss_overlap": 0.636,
+ "avg_entropy": 0.0
+ },
+ {
+ "config": "\u03b2=20.0_iter=8_residual_0.9",
+ "em": 0.31,
+ "f1": 0.4302,
+ "avg_faiss_overlap": 0.748,
+ "avg_entropy": 4.1579
+ },
+ {
+ "config": "\u03b2=20.0_iter=5_residual_0.95",
+ "em": 0.31,
+ "f1": 0.4299,
+ "avg_faiss_overlap": 0.91,
+ "avg_entropy": 3.6963
+ },
+ {
+ "config": "\u03b2=50.0_iter=3_normalized",
+ "em": 0.33,
+ "f1": 0.4291,
+ "avg_faiss_overlap": 0.368,
+ "avg_entropy": 0.6769
+ },
+ {
+ "config": "\u03b2=50.0_iter=5_residual_0.9",
+ "em": 0.32,
+ "f1": 0.4291,
+ "avg_faiss_overlap": 0.778,
+ "avg_entropy": 0.1599
+ },
+ {
+ "config": "\u03b2=50.0_iter=3_standard",
+ "em": 0.32,
+ "f1": 0.4281,
+ "avg_faiss_overlap": 0.41,
+ "avg_entropy": 0.0016
+ },
+ {
+ "config": "\u03b2=50.0_iter=5_normalized",
+ "em": 0.33,
+ "f1": 0.4281,
+ "avg_faiss_overlap": 0.356,
+ "avg_entropy": 0.1417
+ },
+ {
+ "config": "\u03b2=100.0_iter=3_standard",
+ "em": 0.32,
+ "f1": 0.4281,
+ "avg_faiss_overlap": 0.406,
+ "avg_entropy": 0.0
+ },
+ {
+ "config": "\u03b2=100.0_iter=5_standard",
+ "em": 0.32,
+ "f1": 0.4281,
+ "avg_faiss_overlap": 0.406,
+ "avg_entropy": 0.0
+ },
+ {
+ "config": "\u03b2=100.0_iter=8_standard",
+ "em": 0.32,
+ "f1": 0.4281,
+ "avg_faiss_overlap": 0.406,
+ "avg_entropy": 0.0
+ },
+ {
+ "config": "\u03b2=200.0_iter=2_standard",
+ "em": 0.32,
+ "f1": 0.4281,
+ "avg_faiss_overlap": 0.408,
+ "avg_entropy": 0.0
+ },
+ {
+ "config": "\u03b2=200.0_iter=2_normalized",
+ "em": 0.32,
+ "f1": 0.4281,
+ "avg_faiss_overlap": 0.408,
+ "avg_entropy": 0.0
+ },
+ {
+ "config": "\u03b2=200.0_iter=3_standard",
+ "em": 0.32,
+ "f1": 0.4281,
+ "avg_faiss_overlap": 0.406,
+ "avg_entropy": 0.0
+ },
+ {
+ "config": "\u03b2=200.0_iter=3_normalized",
+ "em": 0.32,
+ "f1": 0.4281,
+ "avg_faiss_overlap": 0.406,
+ "avg_entropy": 0.0
+ },
+ {
+ "config": "\u03b2=200.0_iter=5_standard",
+ "em": 0.32,
+ "f1": 0.4281,
+ "avg_faiss_overlap": 0.406,
+ "avg_entropy": 0.0
+ },
+ {
+ "config": "\u03b2=200.0_iter=5_normalized",
+ "em": 0.32,
+ "f1": 0.4281,
+ "avg_faiss_overlap": 0.406,
+ "avg_entropy": 0.0
+ },
+ {
+ "config": "\u03b2=200.0_iter=8_standard",
+ "em": 0.32,
+ "f1": 0.4281,
+ "avg_faiss_overlap": 0.406,
+ "avg_entropy": 0.0
+ },
+ {
+ "config": "\u03b2=200.0_iter=8_normalized",
+ "em": 0.32,
+ "f1": 0.4281,
+ "avg_faiss_overlap": 0.406,
+ "avg_entropy": 0.0
+ },
+ {
+ "config": "\u03b2=50.0_iter=2_standard",
+ "em": 0.32,
+ "f1": 0.4265,
+ "avg_faiss_overlap": 0.422,
+ "avg_entropy": 0.0481
+ },
+ {
+ "config": "\u03b2=500.0_iter=3_residual_0.9",
+ "em": 0.33,
+ "f1": 0.4265,
+ "avg_faiss_overlap": 0.51,
+ "avg_entropy": 0.0
+ },
+ {
+ "config": "\u03b2=100.0_iter=3_residual_0.9",
+ "em": 0.29,
+ "f1": 0.4244,
+ "avg_faiss_overlap": 0.828,
+ "avg_entropy": 0.019
+ },
+ {
+ "config": "\u03b2=200.0_iter=3_residual_0.9",
+ "em": 0.29,
+ "f1": 0.4244,
+ "avg_faiss_overlap": 0.824,
+ "avg_entropy": 0.0021
+ },
+ {
+ "config": "\u03b2=500.0_iter=0_standard",
+ "em": 0.33,
+ "f1": 0.4236,
+ "avg_faiss_overlap": 0.798,
+ "avg_entropy": 0.0746
+ },
+ {
+ "config": "\u03b2=200.0_iter=8_residual_0.95",
+ "em": 0.3,
+ "f1": 0.4231,
+ "avg_faiss_overlap": 0.786,
+ "avg_entropy": 0.0
+ },
+ {
+ "config": "\u03b2=100.0_iter=2_standard",
+ "em": 0.31,
+ "f1": 0.4181,
+ "avg_faiss_overlap": 0.41,
+ "avg_entropy": 0.0
+ },
+ {
+ "config": "\u03b2=100.0_iter=8_residual_0.9",
+ "em": 0.32,
+ "f1": 0.4152,
+ "avg_faiss_overlap": 0.636,
+ "avg_entropy": 0.0
+ },
+ {
+ "config": "\u03b2=200.0_iter=8_residual_0.9",
+ "em": 0.32,
+ "f1": 0.4152,
+ "avg_faiss_overlap": 0.634,
+ "avg_entropy": 0.0
+ },
+ {
+ "config": "\u03b2=100.0_iter=5_residual_0.9",
+ "em": 0.3,
+ "f1": 0.4141,
+ "avg_faiss_overlap": 0.764,
+ "avg_entropy": 0.0014
+ },
+ {
+ "config": "\u03b2=500.0_iter=2_residual_0.9",
+ "em": 0.32,
+ "f1": 0.4098,
+ "avg_faiss_overlap": 0.584,
+ "avg_entropy": 0.0001
+ },
+ {
+ "config": "\u03b2=200.0_iter=5_residual_0.9",
+ "em": 0.29,
+ "f1": 0.4041,
+ "avg_faiss_overlap": 0.754,
+ "avg_entropy": 0.0
+ },
+ {
+ "config": "\u03b2=500.0_iter=5_residual_0.9",
+ "em": 0.31,
+ "f1": 0.3949,
+ "avg_faiss_overlap": 0.34,
+ "avg_entropy": 0.0
+ },
+ {
+ "config": "\u03b2=500.0_iter=8_residual_0.95",
+ "em": 0.29,
+ "f1": 0.3839,
+ "avg_faiss_overlap": 0.424,
+ "avg_entropy": 0.0
+ },
+ {
+ "config": "\u03b2=500.0_iter=8_residual_0.9",
+ "em": 0.27,
+ "f1": 0.3743,
+ "avg_faiss_overlap": 0.228,
+ "avg_entropy": 0.0
+ },
+ {
+ "config": "\u03b2=20.0_iter=2_standard",
+ "em": 0.29,
+ "f1": 0.3713,
+ "avg_faiss_overlap": 0.248,
+ "avg_entropy": 4.6996
+ },
+ {
+ "config": "\u03b2=500.0_iter=1_normalized",
+ "em": 0.27,
+ "f1": 0.3693,
+ "avg_faiss_overlap": 0.236,
+ "avg_entropy": 0.0018
+ },
+ {
+ "config": "\u03b2=500.0_iter=1_standard",
+ "em": 0.25,
+ "f1": 0.35,
+ "avg_faiss_overlap": 0.22,
+ "avg_entropy": 0.0003
+ },
+ {
+ "config": "\u03b2=500.0_iter=2_standard",
+ "em": 0.25,
+ "f1": 0.35,
+ "avg_faiss_overlap": 0.202,
+ "avg_entropy": 0.0
+ },
+ {
+ "config": "\u03b2=500.0_iter=2_normalized",
+ "em": 0.25,
+ "f1": 0.35,
+ "avg_faiss_overlap": 0.202,
+ "avg_entropy": 0.0
+ },
+ {
+ "config": "\u03b2=500.0_iter=3_standard",
+ "em": 0.25,
+ "f1": 0.35,
+ "avg_faiss_overlap": 0.202,
+ "avg_entropy": 0.0
+ },
+ {
+ "config": "\u03b2=500.0_iter=3_normalized",
+ "em": 0.25,
+ "f1": 0.35,
+ "avg_faiss_overlap": 0.202,
+ "avg_entropy": 0.0
+ },
+ {
+ "config": "\u03b2=500.0_iter=5_standard",
+ "em": 0.25,
+ "f1": 0.35,
+ "avg_faiss_overlap": 0.202,
+ "avg_entropy": 0.0
+ },
+ {
+ "config": "\u03b2=500.0_iter=5_normalized",
+ "em": 0.25,
+ "f1": 0.35,
+ "avg_faiss_overlap": 0.202,
+ "avg_entropy": 0.0
+ },
+ {
+ "config": "\u03b2=500.0_iter=8_standard",
+ "em": 0.25,
+ "f1": 0.35,
+ "avg_faiss_overlap": 0.202,
+ "avg_entropy": 0.0
+ },
+ {
+ "config": "\u03b2=500.0_iter=8_normalized",
+ "em": 0.25,
+ "f1": 0.35,
+ "avg_faiss_overlap": 0.202,
+ "avg_entropy": 0.0
+ },
+ {
+ "config": "\u03b2=20.0_iter=1_normalized",
+ "em": 0.27,
+ "f1": 0.3367,
+ "avg_faiss_overlap": 0.14,
+ "avg_entropy": 6.5894
+ },
+ {
+ "config": "\u03b2=20.0_iter=3_standard",
+ "em": 0.27,
+ "f1": 0.332,
+ "avg_faiss_overlap": 0.184,
+ "avg_entropy": 4.6947
+ },
+ {
+ "config": "\u03b2=20.0_iter=5_standard",
+ "em": 0.24,
+ "f1": 0.3116,
+ "avg_faiss_overlap": 0.162,
+ "avg_entropy": 4.6875
+ },
+ {
+ "config": "\u03b2=20.0_iter=8_standard",
+ "em": 0.23,
+ "f1": 0.3016,
+ "avg_faiss_overlap": 0.16,
+ "avg_entropy": 4.684
+ },
+ {
+ "config": "\u03b2=20.0_iter=2_normalized",
+ "em": 0.18,
+ "f1": 0.2255,
+ "avg_faiss_overlap": 0.044,
+ "avg_entropy": 6.4837
+ },
+ {
+ "config": "\u03b2=20.0_iter=8_normalized",
+ "em": 0.14,
+ "f1": 0.2221,
+ "avg_faiss_overlap": 0.02,
+ "avg_entropy": 6.3573
+ },
+ {
+ "config": "\u03b2=20.0_iter=3_normalized",
+ "em": 0.14,
+ "f1": 0.2155,
+ "avg_faiss_overlap": 0.024,
+ "avg_entropy": 6.4174
+ },
+ {
+ "config": "\u03b2=20.0_iter=5_normalized",
+ "em": 0.13,
+ "f1": 0.1961,
+ "avg_faiss_overlap": 0.02,
+ "avg_entropy": 6.3707
+ }
+ ],
+ "best_config": {
+ "config": "\u03b2=20.0_iter=1_standard",
+ "em": 0.38,
+ "f1": 0.4691,
+ "avg_faiss_overlap": 0.48,
+ "avg_entropy": 4.5305
+ },
+ "top10": [
+ {
+ "config": "\u03b2=20.0_iter=1_standard",
+ "em": 0.38,
+ "f1": 0.4691,
+ "avg_faiss_overlap": 0.48,
+ "avg_entropy": 4.5305
+ },
+ {
+ "config": "\u03b2=50.0_iter=1_standard",
+ "em": 0.36,
+ "f1": 0.4565,
+ "avg_faiss_overlap": 0.508,
+ "avg_entropy": 0.3196
+ },
+ {
+ "config": "\u03b2=20.0_iter=1_residual_0.9",
+ "em": 0.34,
+ "f1": 0.4552,
+ "avg_faiss_overlap": 0.966,
+ "avg_entropy": 3.5526
+ },
+ {
+ "config": "\u03b2=20.0_iter=2_residual_0.95",
+ "em": 0.34,
+ "f1": 0.4552,
+ "avg_faiss_overlap": 0.966,
+ "avg_entropy": 3.5503
+ },
+ {
+ "config": "\u03b2=500.0_iter=1_residual_0.9",
+ "em": 0.36,
+ "f1": 0.4545,
+ "avg_faiss_overlap": 0.692,
+ "avg_entropy": 0.0074
+ },
+ {
+ "config": "\u03b2=50.0_iter=1_normalized",
+ "em": 0.37,
+ "f1": 0.4539,
+ "avg_faiss_overlap": 0.464,
+ "avg_entropy": 1.7333
+ },
+ {
+ "config": "\u03b2=500.0_iter=1_residual_0.95",
+ "em": 0.36,
+ "f1": 0.4536,
+ "avg_faiss_overlap": 0.748,
+ "avg_entropy": 0.013
+ },
+ {
+ "config": "\u03b2=20.0_iter=1_residual_0.95",
+ "em": 0.34,
+ "f1": 0.4511,
+ "avg_faiss_overlap": 0.98,
+ "avg_entropy": 3.5011
+ },
+ {
+ "config": "\u03b2=50.0_iter=2_normalized",
+ "em": 0.37,
+ "f1": 0.4498,
+ "avg_faiss_overlap": 0.38,
+ "avg_entropy": 1.0954
+ },
+ {
+ "config": "\u03b2=20.0_iter=3_residual_0.95",
+ "em": 0.32,
+ "f1": 0.4494,
+ "avg_faiss_overlap": 0.946,
+ "avg_entropy": 3.5994
+ }
+ ]
+} \ No newline at end of file
diff --git a/data/processed/residual_grid_results.json b/data/processed/residual_grid_results.json
new file mode 100644
index 0000000..c5b6d28
--- /dev/null
+++ b/data/processed/residual_grid_results.json
@@ -0,0 +1,1016 @@
+{
+ "meta": {
+ "n_questions": 100,
+ "total_configs": 100,
+ "unique_llm_calls": 1666,
+ "faiss_llm_calls": 100,
+ "total_time_s": 5113.4
+ },
+ "faiss_baseline": {
+ "em": 0.32,
+ "f1": 0.4381
+ },
+ "grid_results": [
+ {
+ "beta": 5.0,
+ "lambda": 0.7,
+ "max_iter": 1,
+ "em": 0.36,
+ "f1": 0.4809,
+ "avg_faiss_overlap": 0.902,
+ "avg_entropy": 7.1163
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.9,
+ "max_iter": 3,
+ "em": 0.36,
+ "f1": 0.4809,
+ "avg_faiss_overlap": 0.912,
+ "avg_entropy": 7.1122
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.7,
+ "max_iter": 1,
+ "em": 0.36,
+ "f1": 0.4809,
+ "avg_faiss_overlap": 0.9,
+ "avg_entropy": 6.8422
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.95,
+ "max_iter": 8,
+ "em": 0.36,
+ "f1": 0.4797,
+ "avg_faiss_overlap": 0.886,
+ "avg_entropy": 6.8941
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.95,
+ "max_iter": 8,
+ "em": 0.35,
+ "f1": 0.4697,
+ "avg_faiss_overlap": 0.886,
+ "avg_entropy": 7.1219
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.95,
+ "max_iter": 3,
+ "em": 0.35,
+ "f1": 0.4692,
+ "avg_faiss_overlap": 0.956,
+ "avg_entropy": 7.09
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.95,
+ "max_iter": 5,
+ "em": 0.35,
+ "f1": 0.4692,
+ "avg_faiss_overlap": 0.928,
+ "avg_entropy": 7.105
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.9,
+ "max_iter": 1,
+ "em": 0.35,
+ "f1": 0.4692,
+ "avg_faiss_overlap": 0.966,
+ "avg_entropy": 6.6138
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.95,
+ "max_iter": 3,
+ "em": 0.35,
+ "f1": 0.4692,
+ "avg_faiss_overlap": 0.956,
+ "avg_entropy": 6.6763
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.9,
+ "max_iter": 1,
+ "em": 0.34,
+ "f1": 0.4672,
+ "avg_faiss_overlap": 0.97,
+ "avg_entropy": 7.0815
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.9,
+ "max_iter": 5,
+ "em": 0.34,
+ "f1": 0.4637,
+ "avg_faiss_overlap": 0.856,
+ "avg_entropy": 6.9499
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.5,
+ "max_iter": 1,
+ "em": 0.33,
+ "f1": 0.4627,
+ "avg_faiss_overlap": 0.786,
+ "avg_entropy": 7.1407
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.9,
+ "max_iter": 8,
+ "em": 0.34,
+ "f1": 0.4625,
+ "avg_faiss_overlap": 0.724,
+ "avg_entropy": 7.1479
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.5,
+ "max_iter": 1,
+ "em": 0.33,
+ "f1": 0.4622,
+ "avg_faiss_overlap": 0.808,
+ "avg_entropy": 6.9842
+ },
+ {
+ "beta": 100.0,
+ "lambda": 0.5,
+ "max_iter": 1,
+ "em": 0.36,
+ "f1": 0.4608,
+ "avg_faiss_overlap": 0.722,
+ "avg_entropy": 0.0442
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.9,
+ "max_iter": 8,
+ "em": 0.33,
+ "f1": 0.46,
+ "avg_faiss_overlap": 0.744,
+ "avg_entropy": 7.0404
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.8,
+ "max_iter": 1,
+ "em": 0.34,
+ "f1": 0.4592,
+ "avg_faiss_overlap": 0.944,
+ "avg_entropy": 7.1003
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.8,
+ "max_iter": 1,
+ "em": 0.34,
+ "f1": 0.4592,
+ "avg_faiss_overlap": 0.938,
+ "avg_entropy": 6.7407
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.95,
+ "max_iter": 5,
+ "em": 0.34,
+ "f1": 0.4592,
+ "avg_faiss_overlap": 0.928,
+ "avg_entropy": 6.7819
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.95,
+ "max_iter": 1,
+ "em": 0.34,
+ "f1": 0.4556,
+ "avg_faiss_overlap": 0.984,
+ "avg_entropy": 7.0709
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.95,
+ "max_iter": 1,
+ "em": 0.34,
+ "f1": 0.4556,
+ "avg_faiss_overlap": 0.984,
+ "avg_entropy": 6.5398
+ },
+ {
+ "beta": 20.0,
+ "lambda": 0.9,
+ "max_iter": 1,
+ "em": 0.34,
+ "f1": 0.4552,
+ "avg_faiss_overlap": 0.966,
+ "avg_entropy": 3.5526
+ },
+ {
+ "beta": 20.0,
+ "lambda": 0.5,
+ "max_iter": 1,
+ "em": 0.34,
+ "f1": 0.4548,
+ "avg_faiss_overlap": 0.796,
+ "avg_entropy": 4.0138
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.8,
+ "max_iter": 3,
+ "em": 0.32,
+ "f1": 0.4527,
+ "avg_faiss_overlap": 0.8,
+ "avg_entropy": 7.14
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.8,
+ "max_iter": 3,
+ "em": 0.32,
+ "f1": 0.4527,
+ "avg_faiss_overlap": 0.8,
+ "avg_entropy": 6.9964
+ },
+ {
+ "beta": 50.0,
+ "lambda": 0.8,
+ "max_iter": 3,
+ "em": 0.34,
+ "f1": 0.4524,
+ "avg_faiss_overlap": 0.74,
+ "avg_entropy": 0.1516
+ },
+ {
+ "beta": 20.0,
+ "lambda": 0.95,
+ "max_iter": 1,
+ "em": 0.34,
+ "f1": 0.4511,
+ "avg_faiss_overlap": 0.98,
+ "avg_entropy": 3.5011
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.9,
+ "max_iter": 3,
+ "em": 0.33,
+ "f1": 0.4509,
+ "avg_faiss_overlap": 0.918,
+ "avg_entropy": 6.828
+ },
+ {
+ "beta": 50.0,
+ "lambda": 0.7,
+ "max_iter": 5,
+ "em": 0.34,
+ "f1": 0.4509,
+ "avg_faiss_overlap": 0.492,
+ "avg_entropy": 0.0065
+ },
+ {
+ "beta": 50.0,
+ "lambda": 0.5,
+ "max_iter": 3,
+ "em": 0.35,
+ "f1": 0.4501,
+ "avg_faiss_overlap": 0.472,
+ "avg_entropy": 0.0137
+ },
+ {
+ "beta": 20.0,
+ "lambda": 0.8,
+ "max_iter": 3,
+ "em": 0.33,
+ "f1": 0.4498,
+ "avg_faiss_overlap": 0.798,
+ "avg_entropy": 4.0296
+ },
+ {
+ "beta": 50.0,
+ "lambda": 0.5,
+ "max_iter": 1,
+ "em": 0.34,
+ "f1": 0.4496,
+ "avg_faiss_overlap": 0.752,
+ "avg_entropy": 0.3624
+ },
+ {
+ "beta": 20.0,
+ "lambda": 0.95,
+ "max_iter": 3,
+ "em": 0.32,
+ "f1": 0.4494,
+ "avg_faiss_overlap": 0.946,
+ "avg_entropy": 3.5994
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.9,
+ "max_iter": 5,
+ "em": 0.32,
+ "f1": 0.4487,
+ "avg_faiss_overlap": 0.842,
+ "avg_entropy": 7.1313
+ },
+ {
+ "beta": 50.0,
+ "lambda": 0.95,
+ "max_iter": 1,
+ "em": 0.34,
+ "f1": 0.4486,
+ "avg_faiss_overlap": 0.974,
+ "avg_entropy": 0.677
+ },
+ {
+ "beta": 100.0,
+ "lambda": 0.95,
+ "max_iter": 1,
+ "em": 0.34,
+ "f1": 0.4464,
+ "avg_faiss_overlap": 0.97,
+ "avg_entropy": 0.2081
+ },
+ {
+ "beta": 50.0,
+ "lambda": 0.8,
+ "max_iter": 8,
+ "em": 0.33,
+ "f1": 0.4459,
+ "avg_faiss_overlap": 0.482,
+ "avg_entropy": 0.001
+ },
+ {
+ "beta": 100.0,
+ "lambda": 0.8,
+ "max_iter": 3,
+ "em": 0.34,
+ "f1": 0.4458,
+ "avg_faiss_overlap": 0.704,
+ "avg_entropy": 0.0034
+ },
+ {
+ "beta": 100.0,
+ "lambda": 0.8,
+ "max_iter": 1,
+ "em": 0.33,
+ "f1": 0.4441,
+ "avg_faiss_overlap": 0.878,
+ "avg_entropy": 0.0927
+ },
+ {
+ "beta": 20.0,
+ "lambda": 0.7,
+ "max_iter": 3,
+ "em": 0.33,
+ "f1": 0.4433,
+ "avg_faiss_overlap": 0.676,
+ "avg_entropy": 4.2538
+ },
+ {
+ "beta": 50.0,
+ "lambda": 0.95,
+ "max_iter": 8,
+ "em": 0.32,
+ "f1": 0.4431,
+ "avg_faiss_overlap": 0.808,
+ "avg_entropy": 0.2083
+ },
+ {
+ "beta": 100.0,
+ "lambda": 0.95,
+ "max_iter": 8,
+ "em": 0.32,
+ "f1": 0.4431,
+ "avg_faiss_overlap": 0.794,
+ "avg_entropy": 0.0019
+ },
+ {
+ "beta": 50.0,
+ "lambda": 0.8,
+ "max_iter": 1,
+ "em": 0.32,
+ "f1": 0.443,
+ "avg_faiss_overlap": 0.894,
+ "avg_entropy": 0.5053
+ },
+ {
+ "beta": 100.0,
+ "lambda": 0.95,
+ "max_iter": 3,
+ "em": 0.33,
+ "f1": 0.4427,
+ "avg_faiss_overlap": 0.9,
+ "avg_entropy": 0.0754
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.7,
+ "max_iter": 3,
+ "em": 0.31,
+ "f1": 0.4426,
+ "avg_faiss_overlap": 0.654,
+ "avg_entropy": 7.0699
+ },
+ {
+ "beta": 20.0,
+ "lambda": 0.95,
+ "max_iter": 8,
+ "em": 0.33,
+ "f1": 0.442,
+ "avg_faiss_overlap": 0.878,
+ "avg_entropy": 3.8365
+ },
+ {
+ "beta": 50.0,
+ "lambda": 0.9,
+ "max_iter": 1,
+ "em": 0.32,
+ "f1": 0.4419,
+ "avg_faiss_overlap": 0.946,
+ "avg_entropy": 0.6043
+ },
+ {
+ "beta": 50.0,
+ "lambda": 0.7,
+ "max_iter": 1,
+ "em": 0.32,
+ "f1": 0.4415,
+ "avg_faiss_overlap": 0.836,
+ "avg_entropy": 0.4413
+ },
+ {
+ "beta": 100.0,
+ "lambda": 0.7,
+ "max_iter": 1,
+ "em": 0.32,
+ "f1": 0.4415,
+ "avg_faiss_overlap": 0.824,
+ "avg_entropy": 0.069
+ },
+ {
+ "beta": 20.0,
+ "lambda": 0.7,
+ "max_iter": 1,
+ "em": 0.32,
+ "f1": 0.4413,
+ "avg_faiss_overlap": 0.888,
+ "avg_entropy": 3.7761
+ },
+ {
+ "beta": 50.0,
+ "lambda": 0.95,
+ "max_iter": 5,
+ "em": 0.32,
+ "f1": 0.4407,
+ "avg_faiss_overlap": 0.87,
+ "avg_entropy": 0.3877
+ },
+ {
+ "beta": 50.0,
+ "lambda": 0.9,
+ "max_iter": 8,
+ "em": 0.34,
+ "f1": 0.4402,
+ "avg_faiss_overlap": 0.666,
+ "avg_entropy": 0.0306
+ },
+ {
+ "beta": 100.0,
+ "lambda": 0.9,
+ "max_iter": 1,
+ "em": 0.32,
+ "f1": 0.4397,
+ "avg_faiss_overlap": 0.932,
+ "avg_entropy": 0.1471
+ },
+ {
+ "beta": 100.0,
+ "lambda": 0.95,
+ "max_iter": 5,
+ "em": 0.32,
+ "f1": 0.4391,
+ "avg_faiss_overlap": 0.858,
+ "avg_entropy": 0.019
+ },
+ {
+ "beta": 50.0,
+ "lambda": 0.8,
+ "max_iter": 5,
+ "em": 0.32,
+ "f1": 0.439,
+ "avg_faiss_overlap": 0.592,
+ "avg_entropy": 0.024
+ },
+ {
+ "beta": 50.0,
+ "lambda": 0.5,
+ "max_iter": 8,
+ "em": 0.33,
+ "f1": 0.4381,
+ "avg_faiss_overlap": 0.41,
+ "avg_entropy": 0.0
+ },
+ {
+ "beta": 50.0,
+ "lambda": 0.95,
+ "max_iter": 3,
+ "em": 0.32,
+ "f1": 0.4377,
+ "avg_faiss_overlap": 0.912,
+ "avg_entropy": 0.5215
+ },
+ {
+ "beta": 20.0,
+ "lambda": 0.9,
+ "max_iter": 5,
+ "em": 0.32,
+ "f1": 0.437,
+ "avg_faiss_overlap": 0.846,
+ "avg_entropy": 3.9311
+ },
+ {
+ "beta": 100.0,
+ "lambda": 0.7,
+ "max_iter": 5,
+ "em": 0.32,
+ "f1": 0.4359,
+ "avg_faiss_overlap": 0.474,
+ "avg_entropy": 0.0
+ },
+ {
+ "beta": 100.0,
+ "lambda": 0.8,
+ "max_iter": 8,
+ "em": 0.32,
+ "f1": 0.4359,
+ "avg_faiss_overlap": 0.472,
+ "avg_entropy": 0.0
+ },
+ {
+ "beta": 20.0,
+ "lambda": 0.8,
+ "max_iter": 5,
+ "em": 0.32,
+ "f1": 0.4356,
+ "avg_faiss_overlap": 0.658,
+ "avg_entropy": 4.2886
+ },
+ {
+ "beta": 100.0,
+ "lambda": 0.5,
+ "max_iter": 3,
+ "em": 0.33,
+ "f1": 0.4351,
+ "avg_faiss_overlap": 0.456,
+ "avg_entropy": 0.0
+ },
+ {
+ "beta": 20.0,
+ "lambda": 0.8,
+ "max_iter": 1,
+ "em": 0.31,
+ "f1": 0.4349,
+ "avg_faiss_overlap": 0.924,
+ "avg_entropy": 3.6613
+ },
+ {
+ "beta": 50.0,
+ "lambda": 0.9,
+ "max_iter": 3,
+ "em": 0.31,
+ "f1": 0.4344,
+ "avg_faiss_overlap": 0.842,
+ "avg_entropy": 0.3574
+ },
+ {
+ "beta": 20.0,
+ "lambda": 0.9,
+ "max_iter": 3,
+ "em": 0.31,
+ "f1": 0.4313,
+ "avg_faiss_overlap": 0.9,
+ "avg_entropy": 3.7492
+ },
+ {
+ "beta": 20.0,
+ "lambda": 0.9,
+ "max_iter": 8,
+ "em": 0.31,
+ "f1": 0.4302,
+ "avg_faiss_overlap": 0.748,
+ "avg_entropy": 4.1579
+ },
+ {
+ "beta": 20.0,
+ "lambda": 0.95,
+ "max_iter": 5,
+ "em": 0.31,
+ "f1": 0.4299,
+ "avg_faiss_overlap": 0.91,
+ "avg_entropy": 3.6963
+ },
+ {
+ "beta": 50.0,
+ "lambda": 0.7,
+ "max_iter": 3,
+ "em": 0.32,
+ "f1": 0.4299,
+ "avg_faiss_overlap": 0.614,
+ "avg_entropy": 0.0708
+ },
+ {
+ "beta": 50.0,
+ "lambda": 0.9,
+ "max_iter": 5,
+ "em": 0.32,
+ "f1": 0.4291,
+ "avg_faiss_overlap": 0.778,
+ "avg_entropy": 0.1599
+ },
+ {
+ "beta": 100.0,
+ "lambda": 0.5,
+ "max_iter": 8,
+ "em": 0.32,
+ "f1": 0.4281,
+ "avg_faiss_overlap": 0.408,
+ "avg_entropy": 0.0
+ },
+ {
+ "beta": 50.0,
+ "lambda": 0.7,
+ "max_iter": 8,
+ "em": 0.33,
+ "f1": 0.4257,
+ "avg_faiss_overlap": 0.426,
+ "avg_entropy": 0.0
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.8,
+ "max_iter": 5,
+ "em": 0.29,
+ "f1": 0.4249,
+ "avg_faiss_overlap": 0.632,
+ "avg_entropy": 7.0765
+ },
+ {
+ "beta": 100.0,
+ "lambda": 0.9,
+ "max_iter": 3,
+ "em": 0.29,
+ "f1": 0.4244,
+ "avg_faiss_overlap": 0.828,
+ "avg_entropy": 0.019
+ },
+ {
+ "beta": 50.0,
+ "lambda": 0.5,
+ "max_iter": 5,
+ "em": 0.32,
+ "f1": 0.4225,
+ "avg_faiss_overlap": 0.42,
+ "avg_entropy": 0.0
+ },
+ {
+ "beta": 100.0,
+ "lambda": 0.8,
+ "max_iter": 5,
+ "em": 0.31,
+ "f1": 0.4165,
+ "avg_faiss_overlap": 0.562,
+ "avg_entropy": 0.0
+ },
+ {
+ "beta": 100.0,
+ "lambda": 0.7,
+ "max_iter": 8,
+ "em": 0.32,
+ "f1": 0.4157,
+ "avg_faiss_overlap": 0.422,
+ "avg_entropy": 0.0
+ },
+ {
+ "beta": 100.0,
+ "lambda": 0.9,
+ "max_iter": 8,
+ "em": 0.32,
+ "f1": 0.4152,
+ "avg_faiss_overlap": 0.636,
+ "avg_entropy": 0.0
+ },
+ {
+ "beta": 100.0,
+ "lambda": 0.9,
+ "max_iter": 5,
+ "em": 0.3,
+ "f1": 0.4141,
+ "avg_faiss_overlap": 0.764,
+ "avg_entropy": 0.0014
+ },
+ {
+ "beta": 20.0,
+ "lambda": 0.7,
+ "max_iter": 5,
+ "em": 0.3,
+ "f1": 0.4127,
+ "avg_faiss_overlap": 0.498,
+ "avg_entropy": 4.4813
+ },
+ {
+ "beta": 100.0,
+ "lambda": 0.5,
+ "max_iter": 5,
+ "em": 0.31,
+ "f1": 0.4125,
+ "avg_faiss_overlap": 0.416,
+ "avg_entropy": 0.0
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.8,
+ "max_iter": 5,
+ "em": 0.27,
+ "f1": 0.4054,
+ "avg_faiss_overlap": 0.614,
+ "avg_entropy": 7.1555
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.7,
+ "max_iter": 3,
+ "em": 0.27,
+ "f1": 0.4051,
+ "avg_faiss_overlap": 0.64,
+ "avg_entropy": 7.1544
+ },
+ {
+ "beta": 100.0,
+ "lambda": 0.7,
+ "max_iter": 3,
+ "em": 0.3,
+ "f1": 0.4049,
+ "avg_faiss_overlap": 0.582,
+ "avg_entropy": 0.0002
+ },
+ {
+ "beta": 20.0,
+ "lambda": 0.7,
+ "max_iter": 8,
+ "em": 0.31,
+ "f1": 0.3988,
+ "avg_faiss_overlap": 0.28,
+ "avg_entropy": 4.5698
+ },
+ {
+ "beta": 20.0,
+ "lambda": 0.5,
+ "max_iter": 5,
+ "em": 0.32,
+ "f1": 0.3946,
+ "avg_faiss_overlap": 0.258,
+ "avg_entropy": 4.6045
+ },
+ {
+ "beta": 20.0,
+ "lambda": 0.8,
+ "max_iter": 8,
+ "em": 0.28,
+ "f1": 0.3927,
+ "avg_faiss_overlap": 0.48,
+ "avg_entropy": 4.4867
+ },
+ {
+ "beta": 20.0,
+ "lambda": 0.5,
+ "max_iter": 3,
+ "em": 0.29,
+ "f1": 0.3925,
+ "avg_faiss_overlap": 0.452,
+ "avg_entropy": 4.5183
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.7,
+ "max_iter": 5,
+ "em": 0.3,
+ "f1": 0.379,
+ "avg_faiss_overlap": 0.334,
+ "avg_entropy": 7.112
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.8,
+ "max_iter": 8,
+ "em": 0.28,
+ "f1": 0.3659,
+ "avg_faiss_overlap": 0.318,
+ "avg_entropy": 7.1124
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.5,
+ "max_iter": 3,
+ "em": 0.28,
+ "f1": 0.353,
+ "avg_faiss_overlap": 0.222,
+ "avg_entropy": 7.1169
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.5,
+ "max_iter": 3,
+ "em": 0.27,
+ "f1": 0.3436,
+ "avg_faiss_overlap": 0.156,
+ "avg_entropy": 7.1645
+ },
+ {
+ "beta": 20.0,
+ "lambda": 0.5,
+ "max_iter": 8,
+ "em": 0.25,
+ "f1": 0.3298,
+ "avg_faiss_overlap": 0.186,
+ "avg_entropy": 4.565
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.7,
+ "max_iter": 5,
+ "em": 0.24,
+ "f1": 0.3274,
+ "avg_faiss_overlap": 0.278,
+ "avg_entropy": 7.1633
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.8,
+ "max_iter": 8,
+ "em": 0.24,
+ "f1": 0.3274,
+ "avg_faiss_overlap": 0.276,
+ "avg_entropy": 7.1634
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.7,
+ "max_iter": 8,
+ "em": 0.15,
+ "f1": 0.1995,
+ "avg_faiss_overlap": 0.044,
+ "avg_entropy": 7.123
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.5,
+ "max_iter": 5,
+ "em": 0.15,
+ "f1": 0.1983,
+ "avg_faiss_overlap": 0.022,
+ "avg_entropy": 7.1239
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.5,
+ "max_iter": 5,
+ "em": 0.14,
+ "f1": 0.1895,
+ "avg_faiss_overlap": 0.018,
+ "avg_entropy": 7.166
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.5,
+ "max_iter": 8,
+ "em": 0.14,
+ "f1": 0.1877,
+ "avg_faiss_overlap": 0.002,
+ "avg_entropy": 7.1661
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.7,
+ "max_iter": 8,
+ "em": 0.12,
+ "f1": 0.1824,
+ "avg_faiss_overlap": 0.034,
+ "avg_entropy": 7.1658
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.5,
+ "max_iter": 8,
+ "em": 0.12,
+ "f1": 0.1766,
+ "avg_faiss_overlap": 0.002,
+ "avg_entropy": 7.1239
+ }
+ ],
+ "best_config": {
+ "beta": 5.0,
+ "lambda": 0.7,
+ "max_iter": 1,
+ "em": 0.36,
+ "f1": 0.4809,
+ "avg_faiss_overlap": 0.902,
+ "avg_entropy": 7.1163
+ },
+ "top10": [
+ {
+ "beta": 5.0,
+ "lambda": 0.7,
+ "max_iter": 1,
+ "em": 0.36,
+ "f1": 0.4809,
+ "avg_faiss_overlap": 0.902,
+ "avg_entropy": 7.1163
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.9,
+ "max_iter": 3,
+ "em": 0.36,
+ "f1": 0.4809,
+ "avg_faiss_overlap": 0.912,
+ "avg_entropy": 7.1122
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.7,
+ "max_iter": 1,
+ "em": 0.36,
+ "f1": 0.4809,
+ "avg_faiss_overlap": 0.9,
+ "avg_entropy": 6.8422
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.95,
+ "max_iter": 8,
+ "em": 0.36,
+ "f1": 0.4797,
+ "avg_faiss_overlap": 0.886,
+ "avg_entropy": 6.8941
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.95,
+ "max_iter": 8,
+ "em": 0.35,
+ "f1": 0.4697,
+ "avg_faiss_overlap": 0.886,
+ "avg_entropy": 7.1219
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.95,
+ "max_iter": 3,
+ "em": 0.35,
+ "f1": 0.4692,
+ "avg_faiss_overlap": 0.956,
+ "avg_entropy": 7.09
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.95,
+ "max_iter": 5,
+ "em": 0.35,
+ "f1": 0.4692,
+ "avg_faiss_overlap": 0.928,
+ "avg_entropy": 7.105
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.9,
+ "max_iter": 1,
+ "em": 0.35,
+ "f1": 0.4692,
+ "avg_faiss_overlap": 0.966,
+ "avg_entropy": 6.6138
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.95,
+ "max_iter": 3,
+ "em": 0.35,
+ "f1": 0.4692,
+ "avg_faiss_overlap": 0.956,
+ "avg_entropy": 6.6763
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.9,
+ "max_iter": 1,
+ "em": 0.34,
+ "f1": 0.4672,
+ "avg_faiss_overlap": 0.97,
+ "avg_entropy": 7.0815
+ }
+ ]
+} \ No newline at end of file