diff options
Diffstat (limited to 'data/processed/grid_search_results.json')
| -rw-r--r-- | data/processed/grid_search_results.json | 449 |
1 files changed, 449 insertions, 0 deletions
diff --git a/data/processed/grid_search_results.json b/data/processed/grid_search_results.json new file mode 100644 index 0000000..cdbd3ca --- /dev/null +++ b/data/processed/grid_search_results.json @@ -0,0 +1,449 @@ +{ + "meta": { + "grid_size": 42, + "n_questions": 100, + "total_grid_evaluations": 4200, + "unique_llm_calls": 281, + "faiss_llm_calls": 100, + "total_llm_calls": 381, + "savings_pct": 91.1, + "retrieval_time_s": 0.93, + "generation_time_s": 735.92, + "total_time_s": 1049.0 + }, + "faiss_baseline": { + "em": 0.32, + "f1": 0.4380753968253968 + }, + "grid_results": [ + { + "beta": 0.25, + "max_iter": 1, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1785, + "avg_energy_gap": 2.7302, + "avg_faiss_overlap": 0.002, + "avg_steps": 1.0 + }, + { + "beta": 0.25, + "max_iter": 2, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1785, + "avg_energy_gap": 2.7302, + "avg_faiss_overlap": 0.002, + "avg_steps": 2.0 + }, + { + "beta": 0.25, + "max_iter": 3, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1785, + "avg_energy_gap": 2.7302, + "avg_faiss_overlap": 0.002, + "avg_steps": 3.0 + }, + { + "beta": 0.25, + "max_iter": 5, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1785, + "avg_energy_gap": 2.7302, + "avg_faiss_overlap": 0.002, + "avg_steps": 3.0 + }, + { + "beta": 0.25, + "max_iter": 8, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1785, + "avg_energy_gap": 2.7302, + "avg_faiss_overlap": 0.002, + "avg_steps": 3.0 + }, + { + "beta": 0.25, + "max_iter": 15, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1785, + "avg_energy_gap": 2.7302, + "avg_faiss_overlap": 0.002, + "avg_steps": 3.0 + }, + { + "beta": 0.5, + "max_iter": 1, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1784, + "avg_energy_gap": 2.7292, + "avg_faiss_overlap": 0.002, + "avg_steps": 1.0 + }, + { + "beta": 0.5, + "max_iter": 2, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1784, + "avg_energy_gap": 2.7292, + "avg_faiss_overlap": 0.002, + "avg_steps": 2.0 + }, + { + "beta": 0.5, + "max_iter": 3, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1784, + "avg_energy_gap": 2.7292, + "avg_faiss_overlap": 0.002, + "avg_steps": 3.0 + }, + { + "beta": 0.5, + "max_iter": 5, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1784, + "avg_energy_gap": 2.7292, + "avg_faiss_overlap": 0.002, + "avg_steps": 3.0 + }, + { + "beta": 0.5, + "max_iter": 8, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1784, + "avg_energy_gap": 2.7292, + "avg_faiss_overlap": 0.002, + "avg_steps": 3.0 + }, + { + "beta": 0.5, + "max_iter": 15, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1784, + "avg_energy_gap": 2.7292, + "avg_faiss_overlap": 0.002, + "avg_steps": 3.0 + }, + { + "beta": 1.0, + "max_iter": 1, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1781, + "avg_energy_gap": 2.7273, + "avg_faiss_overlap": 0.002, + "avg_steps": 1.0 + }, + { + "beta": 1.0, + "max_iter": 2, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1781, + "avg_energy_gap": 2.7273, + "avg_faiss_overlap": 0.002, + "avg_steps": 2.0 + }, + { + "beta": 1.0, + "max_iter": 3, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1781, + "avg_energy_gap": 2.7273, + "avg_faiss_overlap": 0.002, + "avg_steps": 3.0 + }, + { + "beta": 1.0, + "max_iter": 5, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1781, + "avg_energy_gap": 2.7273, + "avg_faiss_overlap": 0.002, + "avg_steps": 4.0 + }, + { + "beta": 1.0, + "max_iter": 8, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1781, + "avg_energy_gap": 2.7273, + "avg_faiss_overlap": 0.002, + "avg_steps": 4.0 + }, + { + "beta": 1.0, + "max_iter": 15, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1781, + "avg_energy_gap": 2.7273, + "avg_faiss_overlap": 0.002, + "avg_steps": 4.0 + }, + { + "beta": 2.0, + "max_iter": 1, + "em": 0.15, + "f1": 0.1977, + "avg_entropy": 7.1767, + "avg_energy_gap": 2.7234, + "avg_faiss_overlap": 0.004, + "avg_steps": 1.0 + }, + { + "beta": 2.0, + "max_iter": 2, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1767, + "avg_energy_gap": 2.7235, + "avg_faiss_overlap": 0.002, + "avg_steps": 2.0 + }, + { + "beta": 2.0, + "max_iter": 3, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1767, + "avg_energy_gap": 2.7235, + "avg_faiss_overlap": 0.002, + "avg_steps": 3.0 + }, + { + "beta": 2.0, + "max_iter": 5, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1767, + "avg_energy_gap": 2.7235, + "avg_faiss_overlap": 0.002, + "avg_steps": 4.0 + }, + { + "beta": 2.0, + "max_iter": 8, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1767, + "avg_energy_gap": 2.7235, + "avg_faiss_overlap": 0.002, + "avg_steps": 4.0 + }, + { + "beta": 2.0, + "max_iter": 15, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1767, + "avg_energy_gap": 2.7235, + "avg_faiss_overlap": 0.002, + "avg_steps": 4.0 + }, + { + "beta": 3.0, + "max_iter": 1, + "em": 0.14, + "f1": 0.2016, + "avg_entropy": 7.1742, + "avg_energy_gap": 2.7193, + "avg_faiss_overlap": 0.006, + "avg_steps": 1.0 + }, + { + "beta": 3.0, + "max_iter": 2, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1742, + "avg_energy_gap": 2.7196, + "avg_faiss_overlap": 0.002, + "avg_steps": 2.0 + }, + { + "beta": 3.0, + "max_iter": 3, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1742, + "avg_energy_gap": 2.7196, + "avg_faiss_overlap": 0.002, + "avg_steps": 3.0 + }, + { + "beta": 3.0, + "max_iter": 5, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1742, + "avg_energy_gap": 2.7196, + "avg_faiss_overlap": 0.002, + "avg_steps": 5.0 + }, + { + "beta": 3.0, + "max_iter": 8, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1742, + "avg_energy_gap": 2.7196, + "avg_faiss_overlap": 0.002, + "avg_steps": 5.0 + }, + { + "beta": 3.0, + "max_iter": 15, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1742, + "avg_energy_gap": 2.7196, + "avg_faiss_overlap": 0.002, + "avg_steps": 5.0 + }, + { + "beta": 5.0, + "max_iter": 1, + "em": 0.16, + "f1": 0.2207, + "avg_entropy": 7.1659, + "avg_energy_gap": 2.7105, + "avg_faiss_overlap": 0.018, + "avg_steps": 1.0 + }, + { + "beta": 5.0, + "max_iter": 2, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1661, + "avg_energy_gap": 2.7114, + "avg_faiss_overlap": 0.002, + "avg_steps": 2.0 + }, + { + "beta": 5.0, + "max_iter": 3, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1661, + "avg_energy_gap": 2.7114, + "avg_faiss_overlap": 0.002, + "avg_steps": 3.0 + }, + { + "beta": 5.0, + "max_iter": 5, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1661, + "avg_energy_gap": 2.7114, + "avg_faiss_overlap": 0.002, + "avg_steps": 5.0 + }, + { + "beta": 5.0, + "max_iter": 8, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1661, + "avg_energy_gap": 2.7114, + "avg_faiss_overlap": 0.002, + "avg_steps": 5.0 + }, + { + "beta": 5.0, + "max_iter": 15, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1661, + "avg_energy_gap": 2.7114, + "avg_faiss_overlap": 0.002, + "avg_steps": 5.0 + }, + { + "beta": 8.0, + "max_iter": 1, + "em": 0.21, + "f1": 0.2938, + "avg_entropy": 7.1438, + "avg_energy_gap": 2.6938, + "avg_faiss_overlap": 0.068, + "avg_steps": 1.0 + }, + { + "beta": 8.0, + "max_iter": 2, + "em": 0.12, + "f1": 0.1766, + "avg_entropy": 7.145, + "avg_energy_gap": 2.6972, + "avg_faiss_overlap": 0.004, + "avg_steps": 2.0 + }, + { + "beta": 8.0, + "max_iter": 3, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1449, + "avg_energy_gap": 2.6972, + "avg_faiss_overlap": 0.002, + "avg_steps": 3.0 + }, + { + "beta": 8.0, + "max_iter": 5, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1448, + "avg_energy_gap": 2.6972, + "avg_faiss_overlap": 0.002, + "avg_steps": 5.0 + }, + { + "beta": 8.0, + "max_iter": 8, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1448, + "avg_energy_gap": 2.6972, + "avg_faiss_overlap": 0.002, + "avg_steps": 7.0 + }, + { + "beta": 8.0, + "max_iter": 15, + "em": 0.14, + "f1": 0.1877, + "avg_entropy": 7.1448, + "avg_energy_gap": 2.6972, + "avg_faiss_overlap": 0.002, + "avg_steps": 7.0 + } + ], + "best_config": { + "beta": 8.0, + "max_iter": 1, + "em": 0.21, + "f1": 0.2938, + "avg_entropy": 7.1438, + "avg_energy_gap": 2.6938, + "avg_faiss_overlap": 0.068 + } +}
\ No newline at end of file |
