summaryrefslogtreecommitdiff
path: root/data/processed/residual_grid_results.json
diff options
context:
space:
mode:
Diffstat (limited to 'data/processed/residual_grid_results.json')
-rw-r--r--data/processed/residual_grid_results.json1016
1 files changed, 1016 insertions, 0 deletions
diff --git a/data/processed/residual_grid_results.json b/data/processed/residual_grid_results.json
new file mode 100644
index 0000000..c5b6d28
--- /dev/null
+++ b/data/processed/residual_grid_results.json
@@ -0,0 +1,1016 @@
+{
+ "meta": {
+ "n_questions": 100,
+ "total_configs": 100,
+ "unique_llm_calls": 1666,
+ "faiss_llm_calls": 100,
+ "total_time_s": 5113.4
+ },
+ "faiss_baseline": {
+ "em": 0.32,
+ "f1": 0.4381
+ },
+ "grid_results": [
+ {
+ "beta": 5.0,
+ "lambda": 0.7,
+ "max_iter": 1,
+ "em": 0.36,
+ "f1": 0.4809,
+ "avg_faiss_overlap": 0.902,
+ "avg_entropy": 7.1163
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.9,
+ "max_iter": 3,
+ "em": 0.36,
+ "f1": 0.4809,
+ "avg_faiss_overlap": 0.912,
+ "avg_entropy": 7.1122
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.7,
+ "max_iter": 1,
+ "em": 0.36,
+ "f1": 0.4809,
+ "avg_faiss_overlap": 0.9,
+ "avg_entropy": 6.8422
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.95,
+ "max_iter": 8,
+ "em": 0.36,
+ "f1": 0.4797,
+ "avg_faiss_overlap": 0.886,
+ "avg_entropy": 6.8941
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.95,
+ "max_iter": 8,
+ "em": 0.35,
+ "f1": 0.4697,
+ "avg_faiss_overlap": 0.886,
+ "avg_entropy": 7.1219
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.95,
+ "max_iter": 3,
+ "em": 0.35,
+ "f1": 0.4692,
+ "avg_faiss_overlap": 0.956,
+ "avg_entropy": 7.09
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.95,
+ "max_iter": 5,
+ "em": 0.35,
+ "f1": 0.4692,
+ "avg_faiss_overlap": 0.928,
+ "avg_entropy": 7.105
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.9,
+ "max_iter": 1,
+ "em": 0.35,
+ "f1": 0.4692,
+ "avg_faiss_overlap": 0.966,
+ "avg_entropy": 6.6138
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.95,
+ "max_iter": 3,
+ "em": 0.35,
+ "f1": 0.4692,
+ "avg_faiss_overlap": 0.956,
+ "avg_entropy": 6.6763
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.9,
+ "max_iter": 1,
+ "em": 0.34,
+ "f1": 0.4672,
+ "avg_faiss_overlap": 0.97,
+ "avg_entropy": 7.0815
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.9,
+ "max_iter": 5,
+ "em": 0.34,
+ "f1": 0.4637,
+ "avg_faiss_overlap": 0.856,
+ "avg_entropy": 6.9499
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.5,
+ "max_iter": 1,
+ "em": 0.33,
+ "f1": 0.4627,
+ "avg_faiss_overlap": 0.786,
+ "avg_entropy": 7.1407
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.9,
+ "max_iter": 8,
+ "em": 0.34,
+ "f1": 0.4625,
+ "avg_faiss_overlap": 0.724,
+ "avg_entropy": 7.1479
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.5,
+ "max_iter": 1,
+ "em": 0.33,
+ "f1": 0.4622,
+ "avg_faiss_overlap": 0.808,
+ "avg_entropy": 6.9842
+ },
+ {
+ "beta": 100.0,
+ "lambda": 0.5,
+ "max_iter": 1,
+ "em": 0.36,
+ "f1": 0.4608,
+ "avg_faiss_overlap": 0.722,
+ "avg_entropy": 0.0442
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.9,
+ "max_iter": 8,
+ "em": 0.33,
+ "f1": 0.46,
+ "avg_faiss_overlap": 0.744,
+ "avg_entropy": 7.0404
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.8,
+ "max_iter": 1,
+ "em": 0.34,
+ "f1": 0.4592,
+ "avg_faiss_overlap": 0.944,
+ "avg_entropy": 7.1003
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.8,
+ "max_iter": 1,
+ "em": 0.34,
+ "f1": 0.4592,
+ "avg_faiss_overlap": 0.938,
+ "avg_entropy": 6.7407
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.95,
+ "max_iter": 5,
+ "em": 0.34,
+ "f1": 0.4592,
+ "avg_faiss_overlap": 0.928,
+ "avg_entropy": 6.7819
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.95,
+ "max_iter": 1,
+ "em": 0.34,
+ "f1": 0.4556,
+ "avg_faiss_overlap": 0.984,
+ "avg_entropy": 7.0709
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.95,
+ "max_iter": 1,
+ "em": 0.34,
+ "f1": 0.4556,
+ "avg_faiss_overlap": 0.984,
+ "avg_entropy": 6.5398
+ },
+ {
+ "beta": 20.0,
+ "lambda": 0.9,
+ "max_iter": 1,
+ "em": 0.34,
+ "f1": 0.4552,
+ "avg_faiss_overlap": 0.966,
+ "avg_entropy": 3.5526
+ },
+ {
+ "beta": 20.0,
+ "lambda": 0.5,
+ "max_iter": 1,
+ "em": 0.34,
+ "f1": 0.4548,
+ "avg_faiss_overlap": 0.796,
+ "avg_entropy": 4.0138
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.8,
+ "max_iter": 3,
+ "em": 0.32,
+ "f1": 0.4527,
+ "avg_faiss_overlap": 0.8,
+ "avg_entropy": 7.14
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.8,
+ "max_iter": 3,
+ "em": 0.32,
+ "f1": 0.4527,
+ "avg_faiss_overlap": 0.8,
+ "avg_entropy": 6.9964
+ },
+ {
+ "beta": 50.0,
+ "lambda": 0.8,
+ "max_iter": 3,
+ "em": 0.34,
+ "f1": 0.4524,
+ "avg_faiss_overlap": 0.74,
+ "avg_entropy": 0.1516
+ },
+ {
+ "beta": 20.0,
+ "lambda": 0.95,
+ "max_iter": 1,
+ "em": 0.34,
+ "f1": 0.4511,
+ "avg_faiss_overlap": 0.98,
+ "avg_entropy": 3.5011
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.9,
+ "max_iter": 3,
+ "em": 0.33,
+ "f1": 0.4509,
+ "avg_faiss_overlap": 0.918,
+ "avg_entropy": 6.828
+ },
+ {
+ "beta": 50.0,
+ "lambda": 0.7,
+ "max_iter": 5,
+ "em": 0.34,
+ "f1": 0.4509,
+ "avg_faiss_overlap": 0.492,
+ "avg_entropy": 0.0065
+ },
+ {
+ "beta": 50.0,
+ "lambda": 0.5,
+ "max_iter": 3,
+ "em": 0.35,
+ "f1": 0.4501,
+ "avg_faiss_overlap": 0.472,
+ "avg_entropy": 0.0137
+ },
+ {
+ "beta": 20.0,
+ "lambda": 0.8,
+ "max_iter": 3,
+ "em": 0.33,
+ "f1": 0.4498,
+ "avg_faiss_overlap": 0.798,
+ "avg_entropy": 4.0296
+ },
+ {
+ "beta": 50.0,
+ "lambda": 0.5,
+ "max_iter": 1,
+ "em": 0.34,
+ "f1": 0.4496,
+ "avg_faiss_overlap": 0.752,
+ "avg_entropy": 0.3624
+ },
+ {
+ "beta": 20.0,
+ "lambda": 0.95,
+ "max_iter": 3,
+ "em": 0.32,
+ "f1": 0.4494,
+ "avg_faiss_overlap": 0.946,
+ "avg_entropy": 3.5994
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.9,
+ "max_iter": 5,
+ "em": 0.32,
+ "f1": 0.4487,
+ "avg_faiss_overlap": 0.842,
+ "avg_entropy": 7.1313
+ },
+ {
+ "beta": 50.0,
+ "lambda": 0.95,
+ "max_iter": 1,
+ "em": 0.34,
+ "f1": 0.4486,
+ "avg_faiss_overlap": 0.974,
+ "avg_entropy": 0.677
+ },
+ {
+ "beta": 100.0,
+ "lambda": 0.95,
+ "max_iter": 1,
+ "em": 0.34,
+ "f1": 0.4464,
+ "avg_faiss_overlap": 0.97,
+ "avg_entropy": 0.2081
+ },
+ {
+ "beta": 50.0,
+ "lambda": 0.8,
+ "max_iter": 8,
+ "em": 0.33,
+ "f1": 0.4459,
+ "avg_faiss_overlap": 0.482,
+ "avg_entropy": 0.001
+ },
+ {
+ "beta": 100.0,
+ "lambda": 0.8,
+ "max_iter": 3,
+ "em": 0.34,
+ "f1": 0.4458,
+ "avg_faiss_overlap": 0.704,
+ "avg_entropy": 0.0034
+ },
+ {
+ "beta": 100.0,
+ "lambda": 0.8,
+ "max_iter": 1,
+ "em": 0.33,
+ "f1": 0.4441,
+ "avg_faiss_overlap": 0.878,
+ "avg_entropy": 0.0927
+ },
+ {
+ "beta": 20.0,
+ "lambda": 0.7,
+ "max_iter": 3,
+ "em": 0.33,
+ "f1": 0.4433,
+ "avg_faiss_overlap": 0.676,
+ "avg_entropy": 4.2538
+ },
+ {
+ "beta": 50.0,
+ "lambda": 0.95,
+ "max_iter": 8,
+ "em": 0.32,
+ "f1": 0.4431,
+ "avg_faiss_overlap": 0.808,
+ "avg_entropy": 0.2083
+ },
+ {
+ "beta": 100.0,
+ "lambda": 0.95,
+ "max_iter": 8,
+ "em": 0.32,
+ "f1": 0.4431,
+ "avg_faiss_overlap": 0.794,
+ "avg_entropy": 0.0019
+ },
+ {
+ "beta": 50.0,
+ "lambda": 0.8,
+ "max_iter": 1,
+ "em": 0.32,
+ "f1": 0.443,
+ "avg_faiss_overlap": 0.894,
+ "avg_entropy": 0.5053
+ },
+ {
+ "beta": 100.0,
+ "lambda": 0.95,
+ "max_iter": 3,
+ "em": 0.33,
+ "f1": 0.4427,
+ "avg_faiss_overlap": 0.9,
+ "avg_entropy": 0.0754
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.7,
+ "max_iter": 3,
+ "em": 0.31,
+ "f1": 0.4426,
+ "avg_faiss_overlap": 0.654,
+ "avg_entropy": 7.0699
+ },
+ {
+ "beta": 20.0,
+ "lambda": 0.95,
+ "max_iter": 8,
+ "em": 0.33,
+ "f1": 0.442,
+ "avg_faiss_overlap": 0.878,
+ "avg_entropy": 3.8365
+ },
+ {
+ "beta": 50.0,
+ "lambda": 0.9,
+ "max_iter": 1,
+ "em": 0.32,
+ "f1": 0.4419,
+ "avg_faiss_overlap": 0.946,
+ "avg_entropy": 0.6043
+ },
+ {
+ "beta": 50.0,
+ "lambda": 0.7,
+ "max_iter": 1,
+ "em": 0.32,
+ "f1": 0.4415,
+ "avg_faiss_overlap": 0.836,
+ "avg_entropy": 0.4413
+ },
+ {
+ "beta": 100.0,
+ "lambda": 0.7,
+ "max_iter": 1,
+ "em": 0.32,
+ "f1": 0.4415,
+ "avg_faiss_overlap": 0.824,
+ "avg_entropy": 0.069
+ },
+ {
+ "beta": 20.0,
+ "lambda": 0.7,
+ "max_iter": 1,
+ "em": 0.32,
+ "f1": 0.4413,
+ "avg_faiss_overlap": 0.888,
+ "avg_entropy": 3.7761
+ },
+ {
+ "beta": 50.0,
+ "lambda": 0.95,
+ "max_iter": 5,
+ "em": 0.32,
+ "f1": 0.4407,
+ "avg_faiss_overlap": 0.87,
+ "avg_entropy": 0.3877
+ },
+ {
+ "beta": 50.0,
+ "lambda": 0.9,
+ "max_iter": 8,
+ "em": 0.34,
+ "f1": 0.4402,
+ "avg_faiss_overlap": 0.666,
+ "avg_entropy": 0.0306
+ },
+ {
+ "beta": 100.0,
+ "lambda": 0.9,
+ "max_iter": 1,
+ "em": 0.32,
+ "f1": 0.4397,
+ "avg_faiss_overlap": 0.932,
+ "avg_entropy": 0.1471
+ },
+ {
+ "beta": 100.0,
+ "lambda": 0.95,
+ "max_iter": 5,
+ "em": 0.32,
+ "f1": 0.4391,
+ "avg_faiss_overlap": 0.858,
+ "avg_entropy": 0.019
+ },
+ {
+ "beta": 50.0,
+ "lambda": 0.8,
+ "max_iter": 5,
+ "em": 0.32,
+ "f1": 0.439,
+ "avg_faiss_overlap": 0.592,
+ "avg_entropy": 0.024
+ },
+ {
+ "beta": 50.0,
+ "lambda": 0.5,
+ "max_iter": 8,
+ "em": 0.33,
+ "f1": 0.4381,
+ "avg_faiss_overlap": 0.41,
+ "avg_entropy": 0.0
+ },
+ {
+ "beta": 50.0,
+ "lambda": 0.95,
+ "max_iter": 3,
+ "em": 0.32,
+ "f1": 0.4377,
+ "avg_faiss_overlap": 0.912,
+ "avg_entropy": 0.5215
+ },
+ {
+ "beta": 20.0,
+ "lambda": 0.9,
+ "max_iter": 5,
+ "em": 0.32,
+ "f1": 0.437,
+ "avg_faiss_overlap": 0.846,
+ "avg_entropy": 3.9311
+ },
+ {
+ "beta": 100.0,
+ "lambda": 0.7,
+ "max_iter": 5,
+ "em": 0.32,
+ "f1": 0.4359,
+ "avg_faiss_overlap": 0.474,
+ "avg_entropy": 0.0
+ },
+ {
+ "beta": 100.0,
+ "lambda": 0.8,
+ "max_iter": 8,
+ "em": 0.32,
+ "f1": 0.4359,
+ "avg_faiss_overlap": 0.472,
+ "avg_entropy": 0.0
+ },
+ {
+ "beta": 20.0,
+ "lambda": 0.8,
+ "max_iter": 5,
+ "em": 0.32,
+ "f1": 0.4356,
+ "avg_faiss_overlap": 0.658,
+ "avg_entropy": 4.2886
+ },
+ {
+ "beta": 100.0,
+ "lambda": 0.5,
+ "max_iter": 3,
+ "em": 0.33,
+ "f1": 0.4351,
+ "avg_faiss_overlap": 0.456,
+ "avg_entropy": 0.0
+ },
+ {
+ "beta": 20.0,
+ "lambda": 0.8,
+ "max_iter": 1,
+ "em": 0.31,
+ "f1": 0.4349,
+ "avg_faiss_overlap": 0.924,
+ "avg_entropy": 3.6613
+ },
+ {
+ "beta": 50.0,
+ "lambda": 0.9,
+ "max_iter": 3,
+ "em": 0.31,
+ "f1": 0.4344,
+ "avg_faiss_overlap": 0.842,
+ "avg_entropy": 0.3574
+ },
+ {
+ "beta": 20.0,
+ "lambda": 0.9,
+ "max_iter": 3,
+ "em": 0.31,
+ "f1": 0.4313,
+ "avg_faiss_overlap": 0.9,
+ "avg_entropy": 3.7492
+ },
+ {
+ "beta": 20.0,
+ "lambda": 0.9,
+ "max_iter": 8,
+ "em": 0.31,
+ "f1": 0.4302,
+ "avg_faiss_overlap": 0.748,
+ "avg_entropy": 4.1579
+ },
+ {
+ "beta": 20.0,
+ "lambda": 0.95,
+ "max_iter": 5,
+ "em": 0.31,
+ "f1": 0.4299,
+ "avg_faiss_overlap": 0.91,
+ "avg_entropy": 3.6963
+ },
+ {
+ "beta": 50.0,
+ "lambda": 0.7,
+ "max_iter": 3,
+ "em": 0.32,
+ "f1": 0.4299,
+ "avg_faiss_overlap": 0.614,
+ "avg_entropy": 0.0708
+ },
+ {
+ "beta": 50.0,
+ "lambda": 0.9,
+ "max_iter": 5,
+ "em": 0.32,
+ "f1": 0.4291,
+ "avg_faiss_overlap": 0.778,
+ "avg_entropy": 0.1599
+ },
+ {
+ "beta": 100.0,
+ "lambda": 0.5,
+ "max_iter": 8,
+ "em": 0.32,
+ "f1": 0.4281,
+ "avg_faiss_overlap": 0.408,
+ "avg_entropy": 0.0
+ },
+ {
+ "beta": 50.0,
+ "lambda": 0.7,
+ "max_iter": 8,
+ "em": 0.33,
+ "f1": 0.4257,
+ "avg_faiss_overlap": 0.426,
+ "avg_entropy": 0.0
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.8,
+ "max_iter": 5,
+ "em": 0.29,
+ "f1": 0.4249,
+ "avg_faiss_overlap": 0.632,
+ "avg_entropy": 7.0765
+ },
+ {
+ "beta": 100.0,
+ "lambda": 0.9,
+ "max_iter": 3,
+ "em": 0.29,
+ "f1": 0.4244,
+ "avg_faiss_overlap": 0.828,
+ "avg_entropy": 0.019
+ },
+ {
+ "beta": 50.0,
+ "lambda": 0.5,
+ "max_iter": 5,
+ "em": 0.32,
+ "f1": 0.4225,
+ "avg_faiss_overlap": 0.42,
+ "avg_entropy": 0.0
+ },
+ {
+ "beta": 100.0,
+ "lambda": 0.8,
+ "max_iter": 5,
+ "em": 0.31,
+ "f1": 0.4165,
+ "avg_faiss_overlap": 0.562,
+ "avg_entropy": 0.0
+ },
+ {
+ "beta": 100.0,
+ "lambda": 0.7,
+ "max_iter": 8,
+ "em": 0.32,
+ "f1": 0.4157,
+ "avg_faiss_overlap": 0.422,
+ "avg_entropy": 0.0
+ },
+ {
+ "beta": 100.0,
+ "lambda": 0.9,
+ "max_iter": 8,
+ "em": 0.32,
+ "f1": 0.4152,
+ "avg_faiss_overlap": 0.636,
+ "avg_entropy": 0.0
+ },
+ {
+ "beta": 100.0,
+ "lambda": 0.9,
+ "max_iter": 5,
+ "em": 0.3,
+ "f1": 0.4141,
+ "avg_faiss_overlap": 0.764,
+ "avg_entropy": 0.0014
+ },
+ {
+ "beta": 20.0,
+ "lambda": 0.7,
+ "max_iter": 5,
+ "em": 0.3,
+ "f1": 0.4127,
+ "avg_faiss_overlap": 0.498,
+ "avg_entropy": 4.4813
+ },
+ {
+ "beta": 100.0,
+ "lambda": 0.5,
+ "max_iter": 5,
+ "em": 0.31,
+ "f1": 0.4125,
+ "avg_faiss_overlap": 0.416,
+ "avg_entropy": 0.0
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.8,
+ "max_iter": 5,
+ "em": 0.27,
+ "f1": 0.4054,
+ "avg_faiss_overlap": 0.614,
+ "avg_entropy": 7.1555
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.7,
+ "max_iter": 3,
+ "em": 0.27,
+ "f1": 0.4051,
+ "avg_faiss_overlap": 0.64,
+ "avg_entropy": 7.1544
+ },
+ {
+ "beta": 100.0,
+ "lambda": 0.7,
+ "max_iter": 3,
+ "em": 0.3,
+ "f1": 0.4049,
+ "avg_faiss_overlap": 0.582,
+ "avg_entropy": 0.0002
+ },
+ {
+ "beta": 20.0,
+ "lambda": 0.7,
+ "max_iter": 8,
+ "em": 0.31,
+ "f1": 0.3988,
+ "avg_faiss_overlap": 0.28,
+ "avg_entropy": 4.5698
+ },
+ {
+ "beta": 20.0,
+ "lambda": 0.5,
+ "max_iter": 5,
+ "em": 0.32,
+ "f1": 0.3946,
+ "avg_faiss_overlap": 0.258,
+ "avg_entropy": 4.6045
+ },
+ {
+ "beta": 20.0,
+ "lambda": 0.8,
+ "max_iter": 8,
+ "em": 0.28,
+ "f1": 0.3927,
+ "avg_faiss_overlap": 0.48,
+ "avg_entropy": 4.4867
+ },
+ {
+ "beta": 20.0,
+ "lambda": 0.5,
+ "max_iter": 3,
+ "em": 0.29,
+ "f1": 0.3925,
+ "avg_faiss_overlap": 0.452,
+ "avg_entropy": 4.5183
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.7,
+ "max_iter": 5,
+ "em": 0.3,
+ "f1": 0.379,
+ "avg_faiss_overlap": 0.334,
+ "avg_entropy": 7.112
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.8,
+ "max_iter": 8,
+ "em": 0.28,
+ "f1": 0.3659,
+ "avg_faiss_overlap": 0.318,
+ "avg_entropy": 7.1124
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.5,
+ "max_iter": 3,
+ "em": 0.28,
+ "f1": 0.353,
+ "avg_faiss_overlap": 0.222,
+ "avg_entropy": 7.1169
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.5,
+ "max_iter": 3,
+ "em": 0.27,
+ "f1": 0.3436,
+ "avg_faiss_overlap": 0.156,
+ "avg_entropy": 7.1645
+ },
+ {
+ "beta": 20.0,
+ "lambda": 0.5,
+ "max_iter": 8,
+ "em": 0.25,
+ "f1": 0.3298,
+ "avg_faiss_overlap": 0.186,
+ "avg_entropy": 4.565
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.7,
+ "max_iter": 5,
+ "em": 0.24,
+ "f1": 0.3274,
+ "avg_faiss_overlap": 0.278,
+ "avg_entropy": 7.1633
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.8,
+ "max_iter": 8,
+ "em": 0.24,
+ "f1": 0.3274,
+ "avg_faiss_overlap": 0.276,
+ "avg_entropy": 7.1634
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.7,
+ "max_iter": 8,
+ "em": 0.15,
+ "f1": 0.1995,
+ "avg_faiss_overlap": 0.044,
+ "avg_entropy": 7.123
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.5,
+ "max_iter": 5,
+ "em": 0.15,
+ "f1": 0.1983,
+ "avg_faiss_overlap": 0.022,
+ "avg_entropy": 7.1239
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.5,
+ "max_iter": 5,
+ "em": 0.14,
+ "f1": 0.1895,
+ "avg_faiss_overlap": 0.018,
+ "avg_entropy": 7.166
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.5,
+ "max_iter": 8,
+ "em": 0.14,
+ "f1": 0.1877,
+ "avg_faiss_overlap": 0.002,
+ "avg_entropy": 7.1661
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.7,
+ "max_iter": 8,
+ "em": 0.12,
+ "f1": 0.1824,
+ "avg_faiss_overlap": 0.034,
+ "avg_entropy": 7.1658
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.5,
+ "max_iter": 8,
+ "em": 0.12,
+ "f1": 0.1766,
+ "avg_faiss_overlap": 0.002,
+ "avg_entropy": 7.1239
+ }
+ ],
+ "best_config": {
+ "beta": 5.0,
+ "lambda": 0.7,
+ "max_iter": 1,
+ "em": 0.36,
+ "f1": 0.4809,
+ "avg_faiss_overlap": 0.902,
+ "avg_entropy": 7.1163
+ },
+ "top10": [
+ {
+ "beta": 5.0,
+ "lambda": 0.7,
+ "max_iter": 1,
+ "em": 0.36,
+ "f1": 0.4809,
+ "avg_faiss_overlap": 0.902,
+ "avg_entropy": 7.1163
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.9,
+ "max_iter": 3,
+ "em": 0.36,
+ "f1": 0.4809,
+ "avg_faiss_overlap": 0.912,
+ "avg_entropy": 7.1122
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.7,
+ "max_iter": 1,
+ "em": 0.36,
+ "f1": 0.4809,
+ "avg_faiss_overlap": 0.9,
+ "avg_entropy": 6.8422
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.95,
+ "max_iter": 8,
+ "em": 0.36,
+ "f1": 0.4797,
+ "avg_faiss_overlap": 0.886,
+ "avg_entropy": 6.8941
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.95,
+ "max_iter": 8,
+ "em": 0.35,
+ "f1": 0.4697,
+ "avg_faiss_overlap": 0.886,
+ "avg_entropy": 7.1219
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.95,
+ "max_iter": 3,
+ "em": 0.35,
+ "f1": 0.4692,
+ "avg_faiss_overlap": 0.956,
+ "avg_entropy": 7.09
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.95,
+ "max_iter": 5,
+ "em": 0.35,
+ "f1": 0.4692,
+ "avg_faiss_overlap": 0.928,
+ "avg_entropy": 7.105
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.9,
+ "max_iter": 1,
+ "em": 0.35,
+ "f1": 0.4692,
+ "avg_faiss_overlap": 0.966,
+ "avg_entropy": 6.6138
+ },
+ {
+ "beta": 10.0,
+ "lambda": 0.95,
+ "max_iter": 3,
+ "em": 0.35,
+ "f1": 0.4692,
+ "avg_faiss_overlap": 0.956,
+ "avg_entropy": 6.6763
+ },
+ {
+ "beta": 5.0,
+ "lambda": 0.9,
+ "max_iter": 1,
+ "em": 0.34,
+ "f1": 0.4672,
+ "avg_faiss_overlap": 0.97,
+ "avg_entropy": 7.0815
+ }
+ ]
+} \ No newline at end of file