diff options
| author | YurenHao0426 <Blackhao0426@gmail.com> | 2026-02-16 14:44:42 -0600 |
|---|---|---|
| committer | YurenHao0426 <Blackhao0426@gmail.com> | 2026-02-16 14:44:42 -0600 |
| commit | 09d50e47860da0035e178a442dc936028808a0b3 (patch) | |
| tree | 9d651b0c7d289a9a0405953f2da989a3c431f147 /data/processed | |
| parent | c90b48e3f8da9dd0f8d2ae82ddf977436bb0cfc3 (diff) | |
- Add centering support to MemoryBank (center_query, apply_centering, mean
persistence in save/load) to remove centroid attractor in Hopfield dynamics
- Add center flag to MemoryBankConfig, device field to PipelineConfig
- Grid search scripts: initial (β≤8), residual, high-β, and centered grids
with dedup-based LLM caching (89-91% call savings)
- Energy landscape visualization: 2D contour, 1D profile, UMAP, PCA heatmap
comparing centered vs uncentered dynamics
- Experiment log (note.md) documenting 4 rounds of results and root cause
analysis of centroid attractor problem
- Key finding: β_critical ≈ 37.6 for centered memory; best configs beat
FAISS baseline by +3-4% F1
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Diffstat (limited to 'data/processed')
| -rw-r--r-- | data/processed/grid_search_results.json | 449 | ||||
| -rw-r--r-- | data/processed/highbeta_grid_results.json | 828 | ||||
| -rw-r--r-- | data/processed/residual_grid_results.json | 1016 |
3 files changed, 2293 insertions, 0 deletions
diff --git a/data/processed/grid_search_results.json b/data/processed/grid_search_results.json new file mode 100644 index 0000000..cdbd3ca --- /dev/null +++ b/data/processed/grid_search_results.json @@ -0,0 +1,449 @@ +{ + "meta": { + "grid_size": 42, + "n_questions": 100, + "total_grid_evaluations": 4200, + "unique_llm_calls": 281, + "faiss_llm_calls": 100, + "total_llm_calls": 381, + "savings_pct": 91.1, + "retrieval_time_s": 0.93, + "generation_time_s": 735.92, + "total_time_s": 1049.0 + }, + "faiss_baseline": { + "em": 0.32, + "f1": 0.4380753968253968 + }, + "grid_results": [ + { + "beta": 0.25, + "max_iter": 1, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1785, + "avg_energy_gap": 2.7302, + "avg_faiss_overlap": 0.002, + "avg_steps": 1.0 + }, + { + "beta": 0.25, + "max_iter": 2, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1785, + "avg_energy_gap": 2.7302, + "avg_faiss_overlap": 0.002, + "avg_steps": 2.0 + }, + { + "beta": 0.25, + "max_iter": 3, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1785, + "avg_energy_gap": 2.7302, + "avg_faiss_overlap": 0.002, + "avg_steps": 3.0 + }, + { + "beta": 0.25, + "max_iter": 5, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1785, + "avg_energy_gap": 2.7302, + "avg_faiss_overlap": 0.002, + "avg_steps": 3.0 + }, + { + "beta": 0.25, + "max_iter": 8, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1785, + "avg_energy_gap": 2.7302, + "avg_faiss_overlap": 0.002, + "avg_steps": 3.0 + }, + { + "beta": 0.25, + "max_iter": 15, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1785, + "avg_energy_gap": 2.7302, + "avg_faiss_overlap": 0.002, + "avg_steps": 3.0 + }, + { + "beta": 0.5, + "max_iter": 1, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1784, + "avg_energy_gap": 2.7292, + "avg_faiss_overlap": 0.002, + "avg_steps": 1.0 + }, + { + "beta": 0.5, + "max_iter": 2, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1784, + "avg_energy_gap": 2.7292, + "avg_faiss_overlap": 0.002, + "avg_steps": 2.0 + }, + { + "beta": 0.5, + "max_iter": 3, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1784, + "avg_energy_gap": 2.7292, + "avg_faiss_overlap": 0.002, + "avg_steps": 3.0 + }, + { + "beta": 0.5, + "max_iter": 5, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1784, + "avg_energy_gap": 2.7292, + "avg_faiss_overlap": 0.002, + "avg_steps": 3.0 + }, + { + "beta": 0.5, + "max_iter": 8, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1784, + "avg_energy_gap": 2.7292, + "avg_faiss_overlap": 0.002, + "avg_steps": 3.0 + }, + { + "beta": 0.5, + "max_iter": 15, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1784, + "avg_energy_gap": 2.7292, + "avg_faiss_overlap": 0.002, + "avg_steps": 3.0 + }, + { + "beta": 1.0, + "max_iter": 1, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1781, + "avg_energy_gap": 2.7273, + "avg_faiss_overlap": 0.002, + "avg_steps": 1.0 + }, + { + "beta": 1.0, + "max_iter": 2, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1781, + "avg_energy_gap": 2.7273, + "avg_faiss_overlap": 0.002, + "avg_steps": 2.0 + }, + { + "beta": 1.0, + "max_iter": 3, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1781, + "avg_energy_gap": 2.7273, + "avg_faiss_overlap": 0.002, + "avg_steps": 3.0 + }, + { + "beta": 1.0, + "max_iter": 5, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1781, + "avg_energy_gap": 2.7273, + "avg_faiss_overlap": 0.002, + "avg_steps": 4.0 + }, + { + "beta": 1.0, + "max_iter": 8, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1781, + "avg_energy_gap": 2.7273, + "avg_faiss_overlap": 0.002, + "avg_steps": 4.0 + }, + { + "beta": 1.0, + "max_iter": 15, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1781, + "avg_energy_gap": 2.7273, + "avg_faiss_overlap": 0.002, + "avg_steps": 4.0 + }, + { + "beta": 2.0, + "max_iter": 1, + "em": 0.15, + "f1": 0.1977, + "avg_entropy": 7.1767, + "avg_energy_gap": 2.7234, + "avg_faiss_overlap": 0.004, + "avg_steps": 1.0 + }, + { + "beta": 2.0, + "max_iter": 2, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1767, + "avg_energy_gap": 2.7235, + "avg_faiss_overlap": 0.002, + "avg_steps": 2.0 + }, + { + "beta": 2.0, + "max_iter": 3, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1767, + "avg_energy_gap": 2.7235, + "avg_faiss_overlap": 0.002, + "avg_steps": 3.0 + }, + { + "beta": 2.0, + "max_iter": 5, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1767, + "avg_energy_gap": 2.7235, + "avg_faiss_overlap": 0.002, + "avg_steps": 4.0 + }, + { + "beta": 2.0, + "max_iter": 8, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1767, + "avg_energy_gap": 2.7235, + "avg_faiss_overlap": 0.002, + "avg_steps": 4.0 + }, + { + "beta": 2.0, + "max_iter": 15, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1767, + "avg_energy_gap": 2.7235, + "avg_faiss_overlap": 0.002, + "avg_steps": 4.0 + }, + { + "beta": 3.0, + "max_iter": 1, + "em": 0.14, + "f1": 0.2016, + "avg_entropy": 7.1742, + "avg_energy_gap": 2.7193, + "avg_faiss_overlap": 0.006, + "avg_steps": 1.0 + }, + { + "beta": 3.0, + "max_iter": 2, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1742, + "avg_energy_gap": 2.7196, + "avg_faiss_overlap": 0.002, + "avg_steps": 2.0 + }, + { + "beta": 3.0, + "max_iter": 3, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1742, + "avg_energy_gap": 2.7196, + "avg_faiss_overlap": 0.002, + "avg_steps": 3.0 + }, + { + "beta": 3.0, + "max_iter": 5, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1742, + "avg_energy_gap": 2.7196, + "avg_faiss_overlap": 0.002, + "avg_steps": 5.0 + }, + { + "beta": 3.0, + "max_iter": 8, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1742, + "avg_energy_gap": 2.7196, + "avg_faiss_overlap": 0.002, + "avg_steps": 5.0 + }, + { + "beta": 3.0, + "max_iter": 15, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1742, + "avg_energy_gap": 2.7196, + "avg_faiss_overlap": 0.002, + "avg_steps": 5.0 + }, + { + "beta": 5.0, + "max_iter": 1, + "em": 0.16, + "f1": 0.2207, + "avg_entropy": 7.1659, + "avg_energy_gap": 2.7105, + "avg_faiss_overlap": 0.018, + "avg_steps": 1.0 + }, + { + "beta": 5.0, + "max_iter": 2, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1661, + "avg_energy_gap": 2.7114, + "avg_faiss_overlap": 0.002, + "avg_steps": 2.0 + }, + { + "beta": 5.0, + "max_iter": 3, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1661, + "avg_energy_gap": 2.7114, + "avg_faiss_overlap": 0.002, + "avg_steps": 3.0 + }, + { + "beta": 5.0, + "max_iter": 5, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1661, + "avg_energy_gap": 2.7114, + "avg_faiss_overlap": 0.002, + "avg_steps": 5.0 + }, + { + "beta": 5.0, + "max_iter": 8, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1661, + "avg_energy_gap": 2.7114, + "avg_faiss_overlap": 0.002, + "avg_steps": 5.0 + }, + { + "beta": 5.0, + "max_iter": 15, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1661, + "avg_energy_gap": 2.7114, + "avg_faiss_overlap": 0.002, + "avg_steps": 5.0 + }, + { + "beta": 8.0, + "max_iter": 1, + "em": 0.21, + "f1": 0.2938, + "avg_entropy": 7.1438, + "avg_energy_gap": 2.6938, + "avg_faiss_overlap": 0.068, + "avg_steps": 1.0 + }, + { + "beta": 8.0, + "max_iter": 2, + "em": 0.12, + "f1": 0.1766, + "avg_entropy": 7.145, + "avg_energy_gap": 2.6972, + "avg_faiss_overlap": 0.004, + "avg_steps": 2.0 + }, + { + "beta": 8.0, + "max_iter": 3, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1449, + "avg_energy_gap": 2.6972, + "avg_faiss_overlap": 0.002, + "avg_steps": 3.0 + }, + { + "beta": 8.0, + "max_iter": 5, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1448, + "avg_energy_gap": 2.6972, + "avg_faiss_overlap": 0.002, + "avg_steps": 5.0 + }, + { + "beta": 8.0, + "max_iter": 8, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1448, + "avg_energy_gap": 2.6972, + "avg_faiss_overlap": 0.002, + "avg_steps": 7.0 + }, + { + "beta": 8.0, + "max_iter": 15, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1448, + "avg_energy_gap": 2.6972, + "avg_faiss_overlap": 0.002, + "avg_steps": 7.0 + } + ], + "best_config": { + "beta": 8.0, + "max_iter": 1, + "em": 0.21, + "f1": 0.2938, + "avg_entropy": 7.1438, + "avg_energy_gap": 2.6938, + "avg_faiss_overlap": 0.068 + } +}
\ No newline at end of file diff --git a/data/processed/highbeta_grid_results.json b/data/processed/highbeta_grid_results.json new file mode 100644 index 0000000..a04895a --- /dev/null +++ b/data/processed/highbeta_grid_results.json @@ -0,0 +1,828 @@ +{ + "meta": { + "n_questions": 100, + "total_configs": 105, + "unique_llm_calls": 1379, + "total_time_s": 4571.0 + }, + "faiss_baseline": { + "em": 0.32, + "f1": 0.4381 + }, + "grid_results": [ + { + "config": "\u03b2=20.0_iter=1_standard", + "em": 0.38, + "f1": 0.4691, + "avg_faiss_overlap": 0.48, + "avg_entropy": 4.5305 + }, + { + "config": "\u03b2=50.0_iter=1_standard", + "em": 0.36, + "f1": 0.4565, + "avg_faiss_overlap": 0.508, + "avg_entropy": 0.3196 + }, + { + "config": "\u03b2=20.0_iter=1_residual_0.9", + "em": 0.34, + "f1": 0.4552, + "avg_faiss_overlap": 0.966, + "avg_entropy": 3.5526 + }, + { + "config": "\u03b2=20.0_iter=2_residual_0.95", + "em": 0.34, + "f1": 0.4552, + "avg_faiss_overlap": 0.966, + "avg_entropy": 3.5503 + }, + { + "config": "\u03b2=500.0_iter=1_residual_0.9", + "em": 0.36, + "f1": 0.4545, + "avg_faiss_overlap": 0.692, + "avg_entropy": 0.0074 + }, + { + "config": "\u03b2=50.0_iter=1_normalized", + "em": 0.37, + "f1": 0.4539, + "avg_faiss_overlap": 0.464, + "avg_entropy": 1.7333 + }, + { + "config": "\u03b2=500.0_iter=1_residual_0.95", + "em": 0.36, + "f1": 0.4536, + "avg_faiss_overlap": 0.748, + "avg_entropy": 0.013 + }, + { + "config": "\u03b2=20.0_iter=1_residual_0.95", + "em": 0.34, + "f1": 0.4511, + "avg_faiss_overlap": 0.98, + "avg_entropy": 3.5011 + }, + { + "config": "\u03b2=50.0_iter=2_normalized", + "em": 0.37, + "f1": 0.4498, + "avg_faiss_overlap": 0.38, + "avg_entropy": 1.0954 + }, + { + "config": "\u03b2=20.0_iter=3_residual_0.95", + "em": 0.32, + "f1": 0.4494, + "avg_faiss_overlap": 0.946, + "avg_entropy": 3.5994 + }, + { + "config": "\u03b2=50.0_iter=1_residual_0.95", + "em": 0.34, + "f1": 0.4486, + "avg_faiss_overlap": 0.974, + "avg_entropy": 0.677 + }, + { + "config": "\u03b2=100.0_iter=1_residual_0.95", + "em": 0.34, + "f1": 0.4464, + "avg_faiss_overlap": 0.97, + "avg_entropy": 0.2081 + }, + { + "config": "\u03b2=200.0_iter=1_residual_0.95", + "em": 0.34, + "f1": 0.4464, + "avg_faiss_overlap": 0.968, + "avg_entropy": 0.0722 + }, + { + "config": "\u03b2=100.0_iter=1_normalized", + "em": 0.35, + "f1": 0.446, + "avg_faiss_overlap": 0.508, + "avg_entropy": 0.1139 + }, + { + "config": "\u03b2=500.0_iter=2_residual_0.95", + "em": 0.35, + "f1": 0.4445, + "avg_faiss_overlap": 0.692, + "avg_entropy": 0.0037 + }, + { + "config": "\u03b2=50.0_iter=2_residual_0.9", + "em": 0.33, + "f1": 0.4441, + "avg_faiss_overlap": 0.886, + "avg_entropy": 0.4749 + }, + { + "config": "\u03b2=100.0_iter=2_residual_0.9", + "em": 0.33, + "f1": 0.4441, + "avg_faiss_overlap": 0.878, + "avg_entropy": 0.0623 + }, + { + "config": "\u03b2=50.0_iter=8_residual_0.95", + "em": 0.32, + "f1": 0.4431, + "avg_faiss_overlap": 0.808, + "avg_entropy": 0.2083 + }, + { + "config": "\u03b2=100.0_iter=8_residual_0.95", + "em": 0.32, + "f1": 0.4431, + "avg_faiss_overlap": 0.794, + "avg_entropy": 0.0019 + }, + { + "config": "\u03b2=100.0_iter=3_residual_0.95", + "em": 0.33, + "f1": 0.4427, + "avg_faiss_overlap": 0.9, + "avg_entropy": 0.0754 + }, + { + "config": "\u03b2=20.0_iter=8_residual_0.95", + "em": 0.33, + "f1": 0.442, + "avg_faiss_overlap": 0.878, + "avg_entropy": 3.8365 + }, + { + "config": "\u03b2=50.0_iter=1_residual_0.9", + "em": 0.32, + "f1": 0.4419, + "avg_faiss_overlap": 0.946, + "avg_entropy": 0.6043 + }, + { + "config": "\u03b2=50.0_iter=2_residual_0.95", + "em": 0.32, + "f1": 0.4419, + "avg_faiss_overlap": 0.944, + "avg_entropy": 0.5941 + }, + { + "config": "\u03b2=50.0_iter=5_residual_0.95", + "em": 0.32, + "f1": 0.4407, + "avg_faiss_overlap": 0.87, + "avg_entropy": 0.3877 + }, + { + "config": "\u03b2=200.0_iter=1_normalized", + "em": 0.33, + "f1": 0.4404, + "avg_faiss_overlap": 0.484, + "avg_entropy": 0.0109 + }, + { + "config": "\u03b2=500.0_iter=5_residual_0.95", + "em": 0.35, + "f1": 0.4404, + "avg_faiss_overlap": 0.546, + "avg_entropy": 0.0 + }, + { + "config": "\u03b2=50.0_iter=8_residual_0.9", + "em": 0.34, + "f1": 0.4402, + "avg_faiss_overlap": 0.666, + "avg_entropy": 0.0306 + }, + { + "config": "\u03b2=100.0_iter=1_standard", + "em": 0.33, + "f1": 0.44, + "avg_faiss_overlap": 0.464, + "avg_entropy": 0.0196 + }, + { + "config": "\u03b2=100.0_iter=1_residual_0.9", + "em": 0.32, + "f1": 0.4397, + "avg_faiss_overlap": 0.932, + "avg_entropy": 0.1471 + }, + { + "config": "\u03b2=100.0_iter=2_residual_0.95", + "em": 0.32, + "f1": 0.4397, + "avg_faiss_overlap": 0.932, + "avg_entropy": 0.1268 + }, + { + "config": "\u03b2=100.0_iter=5_residual_0.95", + "em": 0.32, + "f1": 0.4391, + "avg_faiss_overlap": 0.858, + "avg_entropy": 0.019 + }, + { + "config": "\u03b2=20.0_iter=0_standard", + "em": 0.32, + "f1": 0.4381, + "avg_faiss_overlap": 1.0, + "avg_entropy": 3.452 + }, + { + "config": "\u03b2=50.0_iter=0_standard", + "em": 0.32, + "f1": 0.4381, + "avg_faiss_overlap": 1.0, + "avg_entropy": 0.7723 + }, + { + "config": "\u03b2=50.0_iter=5_standard", + "em": 0.33, + "f1": 0.4381, + "avg_faiss_overlap": 0.408, + "avg_entropy": 0.0 + }, + { + "config": "\u03b2=50.0_iter=8_standard", + "em": 0.33, + "f1": 0.4381, + "avg_faiss_overlap": 0.408, + "avg_entropy": 0.0 + }, + { + "config": "\u03b2=50.0_iter=8_normalized", + "em": 0.34, + "f1": 0.4381, + "avg_faiss_overlap": 0.358, + "avg_entropy": 0.0 + }, + { + "config": "\u03b2=100.0_iter=0_standard", + "em": 0.32, + "f1": 0.4381, + "avg_faiss_overlap": 1.0, + "avg_entropy": 0.3128 + }, + { + "config": "\u03b2=100.0_iter=2_normalized", + "em": 0.33, + "f1": 0.4381, + "avg_faiss_overlap": 0.422, + "avg_entropy": 0.0143 + }, + { + "config": "\u03b2=100.0_iter=3_normalized", + "em": 0.33, + "f1": 0.4381, + "avg_faiss_overlap": 0.412, + "avg_entropy": 0.0 + }, + { + "config": "\u03b2=100.0_iter=5_normalized", + "em": 0.33, + "f1": 0.4381, + "avg_faiss_overlap": 0.41, + "avg_entropy": 0.0 + }, + { + "config": "\u03b2=100.0_iter=8_normalized", + "em": 0.33, + "f1": 0.4381, + "avg_faiss_overlap": 0.41, + "avg_entropy": 0.0 + }, + { + "config": "\u03b2=200.0_iter=0_standard", + "em": 0.32, + "f1": 0.4381, + "avg_faiss_overlap": 1.0, + "avg_entropy": 0.1535 + }, + { + "config": "\u03b2=50.0_iter=3_residual_0.95", + "em": 0.32, + "f1": 0.4377, + "avg_faiss_overlap": 0.912, + "avg_entropy": 0.5215 + }, + { + "config": "\u03b2=20.0_iter=5_residual_0.9", + "em": 0.32, + "f1": 0.437, + "avg_faiss_overlap": 0.846, + "avg_entropy": 3.9311 + }, + { + "config": "\u03b2=20.0_iter=2_residual_0.9", + "em": 0.31, + "f1": 0.4349, + "avg_faiss_overlap": 0.926, + "avg_entropy": 3.6521 + }, + { + "config": "\u03b2=200.0_iter=1_residual_0.9", + "em": 0.32, + "f1": 0.4347, + "avg_faiss_overlap": 0.928, + "avg_entropy": 0.0466 + }, + { + "config": "\u03b2=200.0_iter=2_residual_0.95", + "em": 0.32, + "f1": 0.4347, + "avg_faiss_overlap": 0.928, + "avg_entropy": 0.0274 + }, + { + "config": "\u03b2=50.0_iter=3_residual_0.9", + "em": 0.31, + "f1": 0.4344, + "avg_faiss_overlap": 0.842, + "avg_entropy": 0.3574 + }, + { + "config": "\u03b2=200.0_iter=2_residual_0.9", + "em": 0.32, + "f1": 0.4341, + "avg_faiss_overlap": 0.876, + "avg_entropy": 0.008 + }, + { + "config": "\u03b2=200.0_iter=5_residual_0.95", + "em": 0.31, + "f1": 0.4341, + "avg_faiss_overlap": 0.852, + "avg_entropy": 0.0005 + }, + { + "config": "\u03b2=200.0_iter=1_standard", + "em": 0.33, + "f1": 0.4331, + "avg_faiss_overlap": 0.43, + "avg_entropy": 0.0061 + }, + { + "config": "\u03b2=200.0_iter=3_residual_0.95", + "em": 0.32, + "f1": 0.4327, + "avg_faiss_overlap": 0.898, + "avg_entropy": 0.0082 + }, + { + "config": "\u03b2=20.0_iter=3_residual_0.9", + "em": 0.31, + "f1": 0.4313, + "avg_faiss_overlap": 0.9, + "avg_entropy": 3.7492 + }, + { + "config": "\u03b2=500.0_iter=3_residual_0.95", + "em": 0.35, + "f1": 0.4312, + "avg_faiss_overlap": 0.636, + "avg_entropy": 0.0 + }, + { + "config": "\u03b2=20.0_iter=8_residual_0.9", + "em": 0.31, + "f1": 0.4302, + "avg_faiss_overlap": 0.748, + "avg_entropy": 4.1579 + }, + { + "config": "\u03b2=20.0_iter=5_residual_0.95", + "em": 0.31, + "f1": 0.4299, + "avg_faiss_overlap": 0.91, + "avg_entropy": 3.6963 + }, + { + "config": "\u03b2=50.0_iter=3_normalized", + "em": 0.33, + "f1": 0.4291, + "avg_faiss_overlap": 0.368, + "avg_entropy": 0.6769 + }, + { + "config": "\u03b2=50.0_iter=5_residual_0.9", + "em": 0.32, + "f1": 0.4291, + "avg_faiss_overlap": 0.778, + "avg_entropy": 0.1599 + }, + { + "config": "\u03b2=50.0_iter=3_standard", + "em": 0.32, + "f1": 0.4281, + "avg_faiss_overlap": 0.41, + "avg_entropy": 0.0016 + }, + { + "config": "\u03b2=50.0_iter=5_normalized", + "em": 0.33, + "f1": 0.4281, + "avg_faiss_overlap": 0.356, + "avg_entropy": 0.1417 + }, + { + "config": "\u03b2=100.0_iter=3_standard", + "em": 0.32, + "f1": 0.4281, + "avg_faiss_overlap": 0.406, + "avg_entropy": 0.0 + }, + { + "config": "\u03b2=100.0_iter=5_standard", + "em": 0.32, + "f1": 0.4281, + "avg_faiss_overlap": 0.406, + "avg_entropy": 0.0 + }, + { + "config": "\u03b2=100.0_iter=8_standard", + "em": 0.32, + "f1": 0.4281, + "avg_faiss_overlap": 0.406, + "avg_entropy": 0.0 + }, + { + "config": "\u03b2=200.0_iter=2_standard", + "em": 0.32, + "f1": 0.4281, + "avg_faiss_overlap": 0.408, + "avg_entropy": 0.0 + }, + { + "config": "\u03b2=200.0_iter=2_normalized", + "em": 0.32, + "f1": 0.4281, + "avg_faiss_overlap": 0.408, + "avg_entropy": 0.0 + }, + { + "config": "\u03b2=200.0_iter=3_standard", + "em": 0.32, + "f1": 0.4281, + "avg_faiss_overlap": 0.406, + "avg_entropy": 0.0 + }, + { + "config": "\u03b2=200.0_iter=3_normalized", + "em": 0.32, + "f1": 0.4281, + "avg_faiss_overlap": 0.406, + "avg_entropy": 0.0 + }, + { + "config": "\u03b2=200.0_iter=5_standard", + "em": 0.32, + "f1": 0.4281, + "avg_faiss_overlap": 0.406, + "avg_entropy": 0.0 + }, + { + "config": "\u03b2=200.0_iter=5_normalized", + "em": 0.32, + "f1": 0.4281, + "avg_faiss_overlap": 0.406, + "avg_entropy": 0.0 + }, + { + "config": "\u03b2=200.0_iter=8_standard", + "em": 0.32, + "f1": 0.4281, + "avg_faiss_overlap": 0.406, + "avg_entropy": 0.0 + }, + { + "config": "\u03b2=200.0_iter=8_normalized", + "em": 0.32, + "f1": 0.4281, + "avg_faiss_overlap": 0.406, + "avg_entropy": 0.0 + }, + { + "config": "\u03b2=50.0_iter=2_standard", + "em": 0.32, + "f1": 0.4265, + "avg_faiss_overlap": 0.422, + "avg_entropy": 0.0481 + }, + { + "config": "\u03b2=500.0_iter=3_residual_0.9", + "em": 0.33, + "f1": 0.4265, + "avg_faiss_overlap": 0.51, + "avg_entropy": 0.0 + }, + { + "config": "\u03b2=100.0_iter=3_residual_0.9", + "em": 0.29, + "f1": 0.4244, + "avg_faiss_overlap": 0.828, + "avg_entropy": 0.019 + }, + { + "config": "\u03b2=200.0_iter=3_residual_0.9", + "em": 0.29, + "f1": 0.4244, + "avg_faiss_overlap": 0.824, + "avg_entropy": 0.0021 + }, + { + "config": "\u03b2=500.0_iter=0_standard", + "em": 0.33, + "f1": 0.4236, + "avg_faiss_overlap": 0.798, + "avg_entropy": 0.0746 + }, + { + "config": "\u03b2=200.0_iter=8_residual_0.95", + "em": 0.3, + "f1": 0.4231, + "avg_faiss_overlap": 0.786, + "avg_entropy": 0.0 + }, + { + "config": "\u03b2=100.0_iter=2_standard", + "em": 0.31, + "f1": 0.4181, + "avg_faiss_overlap": 0.41, + "avg_entropy": 0.0 + }, + { + "config": "\u03b2=100.0_iter=8_residual_0.9", + "em": 0.32, + "f1": 0.4152, + "avg_faiss_overlap": 0.636, + "avg_entropy": 0.0 + }, + { + "config": "\u03b2=200.0_iter=8_residual_0.9", + "em": 0.32, + "f1": 0.4152, + "avg_faiss_overlap": 0.634, + "avg_entropy": 0.0 + }, + { + "config": "\u03b2=100.0_iter=5_residual_0.9", + "em": 0.3, + "f1": 0.4141, + "avg_faiss_overlap": 0.764, + "avg_entropy": 0.0014 + }, + { + "config": "\u03b2=500.0_iter=2_residual_0.9", + "em": 0.32, + "f1": 0.4098, + "avg_faiss_overlap": 0.584, + "avg_entropy": 0.0001 + }, + { + "config": "\u03b2=200.0_iter=5_residual_0.9", + "em": 0.29, + "f1": 0.4041, + "avg_faiss_overlap": 0.754, + "avg_entropy": 0.0 + }, + { + "config": "\u03b2=500.0_iter=5_residual_0.9", + "em": 0.31, + "f1": 0.3949, + "avg_faiss_overlap": 0.34, + "avg_entropy": 0.0 + }, + { + "config": "\u03b2=500.0_iter=8_residual_0.95", + "em": 0.29, + "f1": 0.3839, + "avg_faiss_overlap": 0.424, + "avg_entropy": 0.0 + }, + { + "config": "\u03b2=500.0_iter=8_residual_0.9", + "em": 0.27, + "f1": 0.3743, + "avg_faiss_overlap": 0.228, + "avg_entropy": 0.0 + }, + { + "config": "\u03b2=20.0_iter=2_standard", + "em": 0.29, + "f1": 0.3713, + "avg_faiss_overlap": 0.248, + "avg_entropy": 4.6996 + }, + { + "config": "\u03b2=500.0_iter=1_normalized", + "em": 0.27, + "f1": 0.3693, + "avg_faiss_overlap": 0.236, + "avg_entropy": 0.0018 + }, + { + "config": "\u03b2=500.0_iter=1_standard", + "em": 0.25, + "f1": 0.35, + "avg_faiss_overlap": 0.22, + "avg_entropy": 0.0003 + }, + { + "config": "\u03b2=500.0_iter=2_standard", + "em": 0.25, + "f1": 0.35, + "avg_faiss_overlap": 0.202, + "avg_entropy": 0.0 + }, + { + "config": "\u03b2=500.0_iter=2_normalized", + "em": 0.25, + "f1": 0.35, + "avg_faiss_overlap": 0.202, + "avg_entropy": 0.0 + }, + { + "config": "\u03b2=500.0_iter=3_standard", + "em": 0.25, + "f1": 0.35, + "avg_faiss_overlap": 0.202, + "avg_entropy": 0.0 + }, + { + "config": "\u03b2=500.0_iter=3_normalized", + "em": 0.25, + "f1": 0.35, + "avg_faiss_overlap": 0.202, + "avg_entropy": 0.0 + }, + { + "config": "\u03b2=500.0_iter=5_standard", + "em": 0.25, + "f1": 0.35, + "avg_faiss_overlap": 0.202, + "avg_entropy": 0.0 + }, + { + "config": "\u03b2=500.0_iter=5_normalized", + "em": 0.25, + "f1": 0.35, + "avg_faiss_overlap": 0.202, + "avg_entropy": 0.0 + }, + { + "config": "\u03b2=500.0_iter=8_standard", + "em": 0.25, + "f1": 0.35, + "avg_faiss_overlap": 0.202, + "avg_entropy": 0.0 + }, + { + "config": "\u03b2=500.0_iter=8_normalized", + "em": 0.25, + "f1": 0.35, + "avg_faiss_overlap": 0.202, + "avg_entropy": 0.0 + }, + { + "config": "\u03b2=20.0_iter=1_normalized", + "em": 0.27, + "f1": 0.3367, + "avg_faiss_overlap": 0.14, + "avg_entropy": 6.5894 + }, + { + "config": "\u03b2=20.0_iter=3_standard", + "em": 0.27, + "f1": 0.332, + "avg_faiss_overlap": 0.184, + "avg_entropy": 4.6947 + }, + { + "config": "\u03b2=20.0_iter=5_standard", + "em": 0.24, + "f1": 0.3116, + "avg_faiss_overlap": 0.162, + "avg_entropy": 4.6875 + }, + { + "config": "\u03b2=20.0_iter=8_standard", + "em": 0.23, + "f1": 0.3016, + "avg_faiss_overlap": 0.16, + "avg_entropy": 4.684 + }, + { + "config": "\u03b2=20.0_iter=2_normalized", + "em": 0.18, + "f1": 0.2255, + "avg_faiss_overlap": 0.044, + "avg_entropy": 6.4837 + }, + { + "config": "\u03b2=20.0_iter=8_normalized", + "em": 0.14, + "f1": 0.2221, + "avg_faiss_overlap": 0.02, + "avg_entropy": 6.3573 + }, + { + "config": "\u03b2=20.0_iter=3_normalized", + "em": 0.14, + "f1": 0.2155, + "avg_faiss_overlap": 0.024, + "avg_entropy": 6.4174 + }, + { + "config": "\u03b2=20.0_iter=5_normalized", + "em": 0.13, + "f1": 0.1961, + "avg_faiss_overlap": 0.02, + "avg_entropy": 6.3707 + } + ], + "best_config": { + "config": "\u03b2=20.0_iter=1_standard", + "em": 0.38, + "f1": 0.4691, + "avg_faiss_overlap": 0.48, + "avg_entropy": 4.5305 + }, + "top10": [ + { + "config": "\u03b2=20.0_iter=1_standard", + "em": 0.38, + "f1": 0.4691, + "avg_faiss_overlap": 0.48, + "avg_entropy": 4.5305 + }, + { + "config": "\u03b2=50.0_iter=1_standard", + "em": 0.36, + "f1": 0.4565, + "avg_faiss_overlap": 0.508, + "avg_entropy": 0.3196 + }, + { + "config": "\u03b2=20.0_iter=1_residual_0.9", + "em": 0.34, + "f1": 0.4552, + "avg_faiss_overlap": 0.966, + "avg_entropy": 3.5526 + }, + { + "config": "\u03b2=20.0_iter=2_residual_0.95", + "em": 0.34, + "f1": 0.4552, + "avg_faiss_overlap": 0.966, + "avg_entropy": 3.5503 + }, + { + "config": "\u03b2=500.0_iter=1_residual_0.9", + "em": 0.36, + "f1": 0.4545, + "avg_faiss_overlap": 0.692, + "avg_entropy": 0.0074 + }, + { + "config": "\u03b2=50.0_iter=1_normalized", + "em": 0.37, + "f1": 0.4539, + "avg_faiss_overlap": 0.464, + "avg_entropy": 1.7333 + }, + { + "config": "\u03b2=500.0_iter=1_residual_0.95", + "em": 0.36, + "f1": 0.4536, + "avg_faiss_overlap": 0.748, + "avg_entropy": 0.013 + }, + { + "config": "\u03b2=20.0_iter=1_residual_0.95", + "em": 0.34, + "f1": 0.4511, + "avg_faiss_overlap": 0.98, + "avg_entropy": 3.5011 + }, + { + "config": "\u03b2=50.0_iter=2_normalized", + "em": 0.37, + "f1": 0.4498, + "avg_faiss_overlap": 0.38, + "avg_entropy": 1.0954 + }, + { + "config": "\u03b2=20.0_iter=3_residual_0.95", + "em": 0.32, + "f1": 0.4494, + "avg_faiss_overlap": 0.946, + "avg_entropy": 3.5994 + } + ] +}
\ No newline at end of file diff --git a/data/processed/residual_grid_results.json b/data/processed/residual_grid_results.json new file mode 100644 index 0000000..c5b6d28 --- /dev/null +++ b/data/processed/residual_grid_results.json @@ -0,0 +1,1016 @@ +{ + "meta": { + "n_questions": 100, + "total_configs": 100, + "unique_llm_calls": 1666, + "faiss_llm_calls": 100, + "total_time_s": 5113.4 + }, + "faiss_baseline": { + "em": 0.32, + "f1": 0.4381 + }, + "grid_results": [ + { + "beta": 5.0, + "lambda": 0.7, + "max_iter": 1, + "em": 0.36, + "f1": 0.4809, + "avg_faiss_overlap": 0.902, + "avg_entropy": 7.1163 + }, + { + "beta": 5.0, + "lambda": 0.9, + "max_iter": 3, + "em": 0.36, + "f1": 0.4809, + "avg_faiss_overlap": 0.912, + "avg_entropy": 7.1122 + }, + { + "beta": 10.0, + "lambda": 0.7, + "max_iter": 1, + "em": 0.36, + "f1": 0.4809, + "avg_faiss_overlap": 0.9, + "avg_entropy": 6.8422 + }, + { + "beta": 10.0, + "lambda": 0.95, + "max_iter": 8, + "em": 0.36, + "f1": 0.4797, + "avg_faiss_overlap": 0.886, + "avg_entropy": 6.8941 + }, + { + "beta": 5.0, + "lambda": 0.95, + "max_iter": 8, + "em": 0.35, + "f1": 0.4697, + "avg_faiss_overlap": 0.886, + "avg_entropy": 7.1219 + }, + { + "beta": 5.0, + "lambda": 0.95, + "max_iter": 3, + "em": 0.35, + "f1": 0.4692, + "avg_faiss_overlap": 0.956, + "avg_entropy": 7.09 + }, + { + "beta": 5.0, + "lambda": 0.95, + "max_iter": 5, + "em": 0.35, + "f1": 0.4692, + "avg_faiss_overlap": 0.928, + "avg_entropy": 7.105 + }, + { + "beta": 10.0, + "lambda": 0.9, + "max_iter": 1, + "em": 0.35, + "f1": 0.4692, + "avg_faiss_overlap": 0.966, + "avg_entropy": 6.6138 + }, + { + "beta": 10.0, + "lambda": 0.95, + "max_iter": 3, + "em": 0.35, + "f1": 0.4692, + "avg_faiss_overlap": 0.956, + "avg_entropy": 6.6763 + }, + { + "beta": 5.0, + "lambda": 0.9, + "max_iter": 1, + "em": 0.34, + "f1": 0.4672, + "avg_faiss_overlap": 0.97, + "avg_entropy": 7.0815 + }, + { + "beta": 10.0, + "lambda": 0.9, + "max_iter": 5, + "em": 0.34, + "f1": 0.4637, + "avg_faiss_overlap": 0.856, + "avg_entropy": 6.9499 + }, + { + "beta": 5.0, + "lambda": 0.5, + "max_iter": 1, + "em": 0.33, + "f1": 0.4627, + "avg_faiss_overlap": 0.786, + "avg_entropy": 7.1407 + }, + { + "beta": 5.0, + "lambda": 0.9, + "max_iter": 8, + "em": 0.34, + "f1": 0.4625, + "avg_faiss_overlap": 0.724, + "avg_entropy": 7.1479 + }, + { + "beta": 10.0, + "lambda": 0.5, + "max_iter": 1, + "em": 0.33, + "f1": 0.4622, + "avg_faiss_overlap": 0.808, + "avg_entropy": 6.9842 + }, + { + "beta": 100.0, + "lambda": 0.5, + "max_iter": 1, + "em": 0.36, + "f1": 0.4608, + "avg_faiss_overlap": 0.722, + "avg_entropy": 0.0442 + }, + { + "beta": 10.0, + "lambda": 0.9, + "max_iter": 8, + "em": 0.33, + "f1": 0.46, + "avg_faiss_overlap": 0.744, + "avg_entropy": 7.0404 + }, + { + "beta": 5.0, + "lambda": 0.8, + "max_iter": 1, + "em": 0.34, + "f1": 0.4592, + "avg_faiss_overlap": 0.944, + "avg_entropy": 7.1003 + }, + { + "beta": 10.0, + "lambda": 0.8, + "max_iter": 1, + "em": 0.34, + "f1": 0.4592, + "avg_faiss_overlap": 0.938, + "avg_entropy": 6.7407 + }, + { + "beta": 10.0, + "lambda": 0.95, + "max_iter": 5, + "em": 0.34, + "f1": 0.4592, + "avg_faiss_overlap": 0.928, + "avg_entropy": 6.7819 + }, + { + "beta": 5.0, + "lambda": 0.95, + "max_iter": 1, + "em": 0.34, + "f1": 0.4556, + "avg_faiss_overlap": 0.984, + "avg_entropy": 7.0709 + }, + { + "beta": 10.0, + "lambda": 0.95, + "max_iter": 1, + "em": 0.34, + "f1": 0.4556, + "avg_faiss_overlap": 0.984, + "avg_entropy": 6.5398 + }, + { + "beta": 20.0, + "lambda": 0.9, + "max_iter": 1, + "em": 0.34, + "f1": 0.4552, + "avg_faiss_overlap": 0.966, + "avg_entropy": 3.5526 + }, + { + "beta": 20.0, + "lambda": 0.5, + "max_iter": 1, + "em": 0.34, + "f1": 0.4548, + "avg_faiss_overlap": 0.796, + "avg_entropy": 4.0138 + }, + { + "beta": 5.0, + "lambda": 0.8, + "max_iter": 3, + "em": 0.32, + "f1": 0.4527, + "avg_faiss_overlap": 0.8, + "avg_entropy": 7.14 + }, + { + "beta": 10.0, + "lambda": 0.8, + "max_iter": 3, + "em": 0.32, + "f1": 0.4527, + "avg_faiss_overlap": 0.8, + "avg_entropy": 6.9964 + }, + { + "beta": 50.0, + "lambda": 0.8, + "max_iter": 3, + "em": 0.34, + "f1": 0.4524, + "avg_faiss_overlap": 0.74, + "avg_entropy": 0.1516 + }, + { + "beta": 20.0, + "lambda": 0.95, + "max_iter": 1, + "em": 0.34, + "f1": 0.4511, + "avg_faiss_overlap": 0.98, + "avg_entropy": 3.5011 + }, + { + "beta": 10.0, + "lambda": 0.9, + "max_iter": 3, + "em": 0.33, + "f1": 0.4509, + "avg_faiss_overlap": 0.918, + "avg_entropy": 6.828 + }, + { + "beta": 50.0, + "lambda": 0.7, + "max_iter": 5, + "em": 0.34, + "f1": 0.4509, + "avg_faiss_overlap": 0.492, + "avg_entropy": 0.0065 + }, + { + "beta": 50.0, + "lambda": 0.5, + "max_iter": 3, + "em": 0.35, + "f1": 0.4501, + "avg_faiss_overlap": 0.472, + "avg_entropy": 0.0137 + }, + { + "beta": 20.0, + "lambda": 0.8, + "max_iter": 3, + "em": 0.33, + "f1": 0.4498, + "avg_faiss_overlap": 0.798, + "avg_entropy": 4.0296 + }, + { + "beta": 50.0, + "lambda": 0.5, + "max_iter": 1, + "em": 0.34, + "f1": 0.4496, + "avg_faiss_overlap": 0.752, + "avg_entropy": 0.3624 + }, + { + "beta": 20.0, + "lambda": 0.95, + "max_iter": 3, + "em": 0.32, + "f1": 0.4494, + "avg_faiss_overlap": 0.946, + "avg_entropy": 3.5994 + }, + { + "beta": 5.0, + "lambda": 0.9, + "max_iter": 5, + "em": 0.32, + "f1": 0.4487, + "avg_faiss_overlap": 0.842, + "avg_entropy": 7.1313 + }, + { + "beta": 50.0, + "lambda": 0.95, + "max_iter": 1, + "em": 0.34, + "f1": 0.4486, + "avg_faiss_overlap": 0.974, + "avg_entropy": 0.677 + }, + { + "beta": 100.0, + "lambda": 0.95, + "max_iter": 1, + "em": 0.34, + "f1": 0.4464, + "avg_faiss_overlap": 0.97, + "avg_entropy": 0.2081 + }, + { + "beta": 50.0, + "lambda": 0.8, + "max_iter": 8, + "em": 0.33, + "f1": 0.4459, + "avg_faiss_overlap": 0.482, + "avg_entropy": 0.001 + }, + { + "beta": 100.0, + "lambda": 0.8, + "max_iter": 3, + "em": 0.34, + "f1": 0.4458, + "avg_faiss_overlap": 0.704, + "avg_entropy": 0.0034 + }, + { + "beta": 100.0, + "lambda": 0.8, + "max_iter": 1, + "em": 0.33, + "f1": 0.4441, + "avg_faiss_overlap": 0.878, + "avg_entropy": 0.0927 + }, + { + "beta": 20.0, + "lambda": 0.7, + "max_iter": 3, + "em": 0.33, + "f1": 0.4433, + "avg_faiss_overlap": 0.676, + "avg_entropy": 4.2538 + }, + { + "beta": 50.0, + "lambda": 0.95, + "max_iter": 8, + "em": 0.32, + "f1": 0.4431, + "avg_faiss_overlap": 0.808, + "avg_entropy": 0.2083 + }, + { + "beta": 100.0, + "lambda": 0.95, + "max_iter": 8, + "em": 0.32, + "f1": 0.4431, + "avg_faiss_overlap": 0.794, + "avg_entropy": 0.0019 + }, + { + "beta": 50.0, + "lambda": 0.8, + "max_iter": 1, + "em": 0.32, + "f1": 0.443, + "avg_faiss_overlap": 0.894, + "avg_entropy": 0.5053 + }, + { + "beta": 100.0, + "lambda": 0.95, + "max_iter": 3, + "em": 0.33, + "f1": 0.4427, + "avg_faiss_overlap": 0.9, + "avg_entropy": 0.0754 + }, + { + "beta": 10.0, + "lambda": 0.7, + "max_iter": 3, + "em": 0.31, + "f1": 0.4426, + "avg_faiss_overlap": 0.654, + "avg_entropy": 7.0699 + }, + { + "beta": 20.0, + "lambda": 0.95, + "max_iter": 8, + "em": 0.33, + "f1": 0.442, + "avg_faiss_overlap": 0.878, + "avg_entropy": 3.8365 + }, + { + "beta": 50.0, + "lambda": 0.9, + "max_iter": 1, + "em": 0.32, + "f1": 0.4419, + "avg_faiss_overlap": 0.946, + "avg_entropy": 0.6043 + }, + { + "beta": 50.0, + "lambda": 0.7, + "max_iter": 1, + "em": 0.32, + "f1": 0.4415, + "avg_faiss_overlap": 0.836, + "avg_entropy": 0.4413 + }, + { + "beta": 100.0, + "lambda": 0.7, + "max_iter": 1, + "em": 0.32, + "f1": 0.4415, + "avg_faiss_overlap": 0.824, + "avg_entropy": 0.069 + }, + { + "beta": 20.0, + "lambda": 0.7, + "max_iter": 1, + "em": 0.32, + "f1": 0.4413, + "avg_faiss_overlap": 0.888, + "avg_entropy": 3.7761 + }, + { + "beta": 50.0, + "lambda": 0.95, + "max_iter": 5, + "em": 0.32, + "f1": 0.4407, + "avg_faiss_overlap": 0.87, + "avg_entropy": 0.3877 + }, + { + "beta": 50.0, + "lambda": 0.9, + "max_iter": 8, + "em": 0.34, + "f1": 0.4402, + "avg_faiss_overlap": 0.666, + "avg_entropy": 0.0306 + }, + { + "beta": 100.0, + "lambda": 0.9, + "max_iter": 1, + "em": 0.32, + "f1": 0.4397, + "avg_faiss_overlap": 0.932, + "avg_entropy": 0.1471 + }, + { + "beta": 100.0, + "lambda": 0.95, + "max_iter": 5, + "em": 0.32, + "f1": 0.4391, + "avg_faiss_overlap": 0.858, + "avg_entropy": 0.019 + }, + { + "beta": 50.0, + "lambda": 0.8, + "max_iter": 5, + "em": 0.32, + "f1": 0.439, + "avg_faiss_overlap": 0.592, + "avg_entropy": 0.024 + }, + { + "beta": 50.0, + "lambda": 0.5, + "max_iter": 8, + "em": 0.33, + "f1": 0.4381, + "avg_faiss_overlap": 0.41, + "avg_entropy": 0.0 + }, + { + "beta": 50.0, + "lambda": 0.95, + "max_iter": 3, + "em": 0.32, + "f1": 0.4377, + "avg_faiss_overlap": 0.912, + "avg_entropy": 0.5215 + }, + { + "beta": 20.0, + "lambda": 0.9, + "max_iter": 5, + "em": 0.32, + "f1": 0.437, + "avg_faiss_overlap": 0.846, + "avg_entropy": 3.9311 + }, + { + "beta": 100.0, + "lambda": 0.7, + "max_iter": 5, + "em": 0.32, + "f1": 0.4359, + "avg_faiss_overlap": 0.474, + "avg_entropy": 0.0 + }, + { + "beta": 100.0, + "lambda": 0.8, + "max_iter": 8, + "em": 0.32, + "f1": 0.4359, + "avg_faiss_overlap": 0.472, + "avg_entropy": 0.0 + }, + { + "beta": 20.0, + "lambda": 0.8, + "max_iter": 5, + "em": 0.32, + "f1": 0.4356, + "avg_faiss_overlap": 0.658, + "avg_entropy": 4.2886 + }, + { + "beta": 100.0, + "lambda": 0.5, + "max_iter": 3, + "em": 0.33, + "f1": 0.4351, + "avg_faiss_overlap": 0.456, + "avg_entropy": 0.0 + }, + { + "beta": 20.0, + "lambda": 0.8, + "max_iter": 1, + "em": 0.31, + "f1": 0.4349, + "avg_faiss_overlap": 0.924, + "avg_entropy": 3.6613 + }, + { + "beta": 50.0, + "lambda": 0.9, + "max_iter": 3, + "em": 0.31, + "f1": 0.4344, + "avg_faiss_overlap": 0.842, + "avg_entropy": 0.3574 + }, + { + "beta": 20.0, + "lambda": 0.9, + "max_iter": 3, + "em": 0.31, + "f1": 0.4313, + "avg_faiss_overlap": 0.9, + "avg_entropy": 3.7492 + }, + { + "beta": 20.0, + "lambda": 0.9, + "max_iter": 8, + "em": 0.31, + "f1": 0.4302, + "avg_faiss_overlap": 0.748, + "avg_entropy": 4.1579 + }, + { + "beta": 20.0, + "lambda": 0.95, + "max_iter": 5, + "em": 0.31, + "f1": 0.4299, + "avg_faiss_overlap": 0.91, + "avg_entropy": 3.6963 + }, + { + "beta": 50.0, + "lambda": 0.7, + "max_iter": 3, + "em": 0.32, + "f1": 0.4299, + "avg_faiss_overlap": 0.614, + "avg_entropy": 0.0708 + }, + { + "beta": 50.0, + "lambda": 0.9, + "max_iter": 5, + "em": 0.32, + "f1": 0.4291, + "avg_faiss_overlap": 0.778, + "avg_entropy": 0.1599 + }, + { + "beta": 100.0, + "lambda": 0.5, + "max_iter": 8, + "em": 0.32, + "f1": 0.4281, + "avg_faiss_overlap": 0.408, + "avg_entropy": 0.0 + }, + { + "beta": 50.0, + "lambda": 0.7, + "max_iter": 8, + "em": 0.33, + "f1": 0.4257, + "avg_faiss_overlap": 0.426, + "avg_entropy": 0.0 + }, + { + "beta": 10.0, + "lambda": 0.8, + "max_iter": 5, + "em": 0.29, + "f1": 0.4249, + "avg_faiss_overlap": 0.632, + "avg_entropy": 7.0765 + }, + { + "beta": 100.0, + "lambda": 0.9, + "max_iter": 3, + "em": 0.29, + "f1": 0.4244, + "avg_faiss_overlap": 0.828, + "avg_entropy": 0.019 + }, + { + "beta": 50.0, + "lambda": 0.5, + "max_iter": 5, + "em": 0.32, + "f1": 0.4225, + "avg_faiss_overlap": 0.42, + "avg_entropy": 0.0 + }, + { + "beta": 100.0, + "lambda": 0.8, + "max_iter": 5, + "em": 0.31, + "f1": 0.4165, + "avg_faiss_overlap": 0.562, + "avg_entropy": 0.0 + }, + { + "beta": 100.0, + "lambda": 0.7, + "max_iter": 8, + "em": 0.32, + "f1": 0.4157, + "avg_faiss_overlap": 0.422, + "avg_entropy": 0.0 + }, + { + "beta": 100.0, + "lambda": 0.9, + "max_iter": 8, + "em": 0.32, + "f1": 0.4152, + "avg_faiss_overlap": 0.636, + "avg_entropy": 0.0 + }, + { + "beta": 100.0, + "lambda": 0.9, + "max_iter": 5, + "em": 0.3, + "f1": 0.4141, + "avg_faiss_overlap": 0.764, + "avg_entropy": 0.0014 + }, + { + "beta": 20.0, + "lambda": 0.7, + "max_iter": 5, + "em": 0.3, + "f1": 0.4127, + "avg_faiss_overlap": 0.498, + "avg_entropy": 4.4813 + }, + { + "beta": 100.0, + "lambda": 0.5, + "max_iter": 5, + "em": 0.31, + "f1": 0.4125, + "avg_faiss_overlap": 0.416, + "avg_entropy": 0.0 + }, + { + "beta": 5.0, + "lambda": 0.8, + "max_iter": 5, + "em": 0.27, + "f1": 0.4054, + "avg_faiss_overlap": 0.614, + "avg_entropy": 7.1555 + }, + { + "beta": 5.0, + "lambda": 0.7, + "max_iter": 3, + "em": 0.27, + "f1": 0.4051, + "avg_faiss_overlap": 0.64, + "avg_entropy": 7.1544 + }, + { + "beta": 100.0, + "lambda": 0.7, + "max_iter": 3, + "em": 0.3, + "f1": 0.4049, + "avg_faiss_overlap": 0.582, + "avg_entropy": 0.0002 + }, + { + "beta": 20.0, + "lambda": 0.7, + "max_iter": 8, + "em": 0.31, + "f1": 0.3988, + "avg_faiss_overlap": 0.28, + "avg_entropy": 4.5698 + }, + { + "beta": 20.0, + "lambda": 0.5, + "max_iter": 5, + "em": 0.32, + "f1": 0.3946, + "avg_faiss_overlap": 0.258, + "avg_entropy": 4.6045 + }, + { + "beta": 20.0, + "lambda": 0.8, + "max_iter": 8, + "em": 0.28, + "f1": 0.3927, + "avg_faiss_overlap": 0.48, + "avg_entropy": 4.4867 + }, + { + "beta": 20.0, + "lambda": 0.5, + "max_iter": 3, + "em": 0.29, + "f1": 0.3925, + "avg_faiss_overlap": 0.452, + "avg_entropy": 4.5183 + }, + { + "beta": 10.0, + "lambda": 0.7, + "max_iter": 5, + "em": 0.3, + "f1": 0.379, + "avg_faiss_overlap": 0.334, + "avg_entropy": 7.112 + }, + { + "beta": 10.0, + "lambda": 0.8, + "max_iter": 8, + "em": 0.28, + "f1": 0.3659, + "avg_faiss_overlap": 0.318, + "avg_entropy": 7.1124 + }, + { + "beta": 10.0, + "lambda": 0.5, + "max_iter": 3, + "em": 0.28, + "f1": 0.353, + "avg_faiss_overlap": 0.222, + "avg_entropy": 7.1169 + }, + { + "beta": 5.0, + "lambda": 0.5, + "max_iter": 3, + "em": 0.27, + "f1": 0.3436, + "avg_faiss_overlap": 0.156, + "avg_entropy": 7.1645 + }, + { + "beta": 20.0, + "lambda": 0.5, + "max_iter": 8, + "em": 0.25, + "f1": 0.3298, + "avg_faiss_overlap": 0.186, + "avg_entropy": 4.565 + }, + { + "beta": 5.0, + "lambda": 0.7, + "max_iter": 5, + "em": 0.24, + "f1": 0.3274, + "avg_faiss_overlap": 0.278, + "avg_entropy": 7.1633 + }, + { + "beta": 5.0, + "lambda": 0.8, + "max_iter": 8, + "em": 0.24, + "f1": 0.3274, + "avg_faiss_overlap": 0.276, + "avg_entropy": 7.1634 + }, + { + "beta": 10.0, + "lambda": 0.7, + "max_iter": 8, + "em": 0.15, + "f1": 0.1995, + "avg_faiss_overlap": 0.044, + "avg_entropy": 7.123 + }, + { + "beta": 10.0, + "lambda": 0.5, + "max_iter": 5, + "em": 0.15, + "f1": 0.1983, + "avg_faiss_overlap": 0.022, + "avg_entropy": 7.1239 + }, + { + "beta": 5.0, + "lambda": 0.5, + "max_iter": 5, + "em": 0.14, + "f1": 0.1895, + "avg_faiss_overlap": 0.018, + "avg_entropy": 7.166 + }, + { + "beta": 5.0, + "lambda": 0.5, + "max_iter": 8, + "em": 0.14, + "f1": 0.1877, + "avg_faiss_overlap": 0.002, + "avg_entropy": 7.1661 + }, + { + "beta": 5.0, + "lambda": 0.7, + "max_iter": 8, + "em": 0.12, + "f1": 0.1824, + "avg_faiss_overlap": 0.034, + "avg_entropy": 7.1658 + }, + { + "beta": 10.0, + "lambda": 0.5, + "max_iter": 8, + "em": 0.12, + "f1": 0.1766, + "avg_faiss_overlap": 0.002, + "avg_entropy": 7.1239 + } + ], + "best_config": { + "beta": 5.0, + "lambda": 0.7, + "max_iter": 1, + "em": 0.36, + "f1": 0.4809, + "avg_faiss_overlap": 0.902, + "avg_entropy": 7.1163 + }, + "top10": [ + { + "beta": 5.0, + "lambda": 0.7, + "max_iter": 1, + "em": 0.36, + "f1": 0.4809, + "avg_faiss_overlap": 0.902, + "avg_entropy": 7.1163 + }, + { + "beta": 5.0, + "lambda": 0.9, + "max_iter": 3, + "em": 0.36, + "f1": 0.4809, + "avg_faiss_overlap": 0.912, + "avg_entropy": 7.1122 + }, + { + "beta": 10.0, + "lambda": 0.7, + "max_iter": 1, + "em": 0.36, + "f1": 0.4809, + "avg_faiss_overlap": 0.9, + "avg_entropy": 6.8422 + }, + { + "beta": 10.0, + "lambda": 0.95, + "max_iter": 8, + "em": 0.36, + "f1": 0.4797, + "avg_faiss_overlap": 0.886, + "avg_entropy": 6.8941 + }, + { + "beta": 5.0, + "lambda": 0.95, + "max_iter": 8, + "em": 0.35, + "f1": 0.4697, + "avg_faiss_overlap": 0.886, + "avg_entropy": 7.1219 + }, + { + "beta": 5.0, + "lambda": 0.95, + "max_iter": 3, + "em": 0.35, + "f1": 0.4692, + "avg_faiss_overlap": 0.956, + "avg_entropy": 7.09 + }, + { + "beta": 5.0, + "lambda": 0.95, + "max_iter": 5, + "em": 0.35, + "f1": 0.4692, + "avg_faiss_overlap": 0.928, + "avg_entropy": 7.105 + }, + { + "beta": 10.0, + "lambda": 0.9, + "max_iter": 1, + "em": 0.35, + "f1": 0.4692, + "avg_faiss_overlap": 0.966, + "avg_entropy": 6.6138 + }, + { + "beta": 10.0, + "lambda": 0.95, + "max_iter": 3, + "em": 0.35, + "f1": 0.4692, + "avg_faiss_overlap": 0.956, + "avg_entropy": 6.6763 + }, + { + "beta": 5.0, + "lambda": 0.9, + "max_iter": 1, + "em": 0.34, + "f1": 0.4672, + "avg_faiss_overlap": 0.97, + "avg_entropy": 7.0815 + } + ] +}
\ No newline at end of file |
