{ "meta": { "grid_size": 42, "n_questions": 100, "total_grid_evaluations": 4200, "unique_llm_calls": 281, "faiss_llm_calls": 100, "total_llm_calls": 381, "savings_pct": 91.1, "retrieval_time_s": 0.93, "generation_time_s": 735.92, "total_time_s": 1049.0 }, "faiss_baseline": { "em": 0.32, "f1": 0.4380753968253968 }, "grid_results": [ { "beta": 0.25, "max_iter": 1, "em": 0.14, "f1": 0.1877, "avg_entropy": 7.1785, "avg_energy_gap": 2.7302, "avg_faiss_overlap": 0.002, "avg_steps": 1.0 }, { "beta": 0.25, "max_iter": 2, "em": 0.14, "f1": 0.1877, "avg_entropy": 7.1785, "avg_energy_gap": 2.7302, "avg_faiss_overlap": 0.002, "avg_steps": 2.0 }, { "beta": 0.25, "max_iter": 3, "em": 0.14, "f1": 0.1877, "avg_entropy": 7.1785, "avg_energy_gap": 2.7302, "avg_faiss_overlap": 0.002, "avg_steps": 3.0 }, { "beta": 0.25, "max_iter": 5, "em": 0.14, "f1": 0.1877, "avg_entropy": 7.1785, "avg_energy_gap": 2.7302, "avg_faiss_overlap": 0.002, "avg_steps": 3.0 }, { "beta": 0.25, "max_iter": 8, "em": 0.14, "f1": 0.1877, "avg_entropy": 7.1785, "avg_energy_gap": 2.7302, "avg_faiss_overlap": 0.002, "avg_steps": 3.0 }, { "beta": 0.25, "max_iter": 15, "em": 0.14, "f1": 0.1877, "avg_entropy": 7.1785, "avg_energy_gap": 2.7302, "avg_faiss_overlap": 0.002, "avg_steps": 3.0 }, { "beta": 0.5, "max_iter": 1, "em": 0.14, "f1": 0.1877, "avg_entropy": 7.1784, "avg_energy_gap": 2.7292, "avg_faiss_overlap": 0.002, "avg_steps": 1.0 }, { "beta": 0.5, "max_iter": 2, "em": 0.14, "f1": 0.1877, "avg_entropy": 7.1784, "avg_energy_gap": 2.7292, "avg_faiss_overlap": 0.002, "avg_steps": 2.0 }, { "beta": 0.5, "max_iter": 3, "em": 0.14, "f1": 0.1877, "avg_entropy": 7.1784, "avg_energy_gap": 2.7292, "avg_faiss_overlap": 0.002, "avg_steps": 3.0 }, { "beta": 0.5, "max_iter": 5, "em": 0.14, "f1": 0.1877, "avg_entropy": 7.1784, "avg_energy_gap": 2.7292, "avg_faiss_overlap": 0.002, "avg_steps": 3.0 }, { "beta": 0.5, "max_iter": 8, "em": 0.14, "f1": 0.1877, "avg_entropy": 7.1784, "avg_energy_gap": 2.7292, "avg_faiss_overlap": 0.002, "avg_steps": 3.0 }, { "beta": 0.5, "max_iter": 15, "em": 0.14, "f1": 0.1877, "avg_entropy": 7.1784, "avg_energy_gap": 2.7292, "avg_faiss_overlap": 0.002, "avg_steps": 3.0 }, { "beta": 1.0, "max_iter": 1, "em": 0.14, "f1": 0.1877, "avg_entropy": 7.1781, "avg_energy_gap": 2.7273, "avg_faiss_overlap": 0.002, "avg_steps": 1.0 }, { "beta": 1.0, "max_iter": 2, "em": 0.14, "f1": 0.1877, "avg_entropy": 7.1781, "avg_energy_gap": 2.7273, "avg_faiss_overlap": 0.002, "avg_steps": 2.0 }, { "beta": 1.0, "max_iter": 3, "em": 0.14, "f1": 0.1877, "avg_entropy": 7.1781, "avg_energy_gap": 2.7273, "avg_faiss_overlap": 0.002, "avg_steps": 3.0 }, { "beta": 1.0, "max_iter": 5, "em": 0.14, "f1": 0.1877, "avg_entropy": 7.1781, "avg_energy_gap": 2.7273, "avg_faiss_overlap": 0.002, "avg_steps": 4.0 }, { "beta": 1.0, "max_iter": 8, "em": 0.14, "f1": 0.1877, "avg_entropy": 7.1781, "avg_energy_gap": 2.7273, "avg_faiss_overlap": 0.002, "avg_steps": 4.0 }, { "beta": 1.0, "max_iter": 15, "em": 0.14, "f1": 0.1877, "avg_entropy": 7.1781, "avg_energy_gap": 2.7273, "avg_faiss_overlap": 0.002, "avg_steps": 4.0 }, { "beta": 2.0, "max_iter": 1, "em": 0.15, "f1": 0.1977, "avg_entropy": 7.1767, "avg_energy_gap": 2.7234, "avg_faiss_overlap": 0.004, "avg_steps": 1.0 }, { "beta": 2.0, "max_iter": 2, "em": 0.14, "f1": 0.1877, "avg_entropy": 7.1767, "avg_energy_gap": 2.7235, "avg_faiss_overlap": 0.002, "avg_steps": 2.0 }, { "beta": 2.0, "max_iter": 3, "em": 0.14, "f1": 0.1877, "avg_entropy": 7.1767, "avg_energy_gap": 2.7235, "avg_faiss_overlap": 0.002, "avg_steps": 3.0 }, { "beta": 2.0, "max_iter": 5, "em": 0.14, "f1": 0.1877, "avg_entropy": 7.1767, "avg_energy_gap": 2.7235, "avg_faiss_overlap": 0.002, "avg_steps": 4.0 }, { "beta": 2.0, "max_iter": 8, "em": 0.14, "f1": 0.1877, "avg_entropy": 7.1767, "avg_energy_gap": 2.7235, "avg_faiss_overlap": 0.002, "avg_steps": 4.0 }, { "beta": 2.0, "max_iter": 15, "em": 0.14, "f1": 0.1877, "avg_entropy": 7.1767, "avg_energy_gap": 2.7235, "avg_faiss_overlap": 0.002, "avg_steps": 4.0 }, { "beta": 3.0, "max_iter": 1, "em": 0.14, "f1": 0.2016, "avg_entropy": 7.1742, "avg_energy_gap": 2.7193, "avg_faiss_overlap": 0.006, "avg_steps": 1.0 }, { "beta": 3.0, "max_iter": 2, "em": 0.14, "f1": 0.1877, "avg_entropy": 7.1742, "avg_energy_gap": 2.7196, "avg_faiss_overlap": 0.002, "avg_steps": 2.0 }, { "beta": 3.0, "max_iter": 3, "em": 0.14, "f1": 0.1877, "avg_entropy": 7.1742, "avg_energy_gap": 2.7196, "avg_faiss_overlap": 0.002, "avg_steps": 3.0 }, { "beta": 3.0, "max_iter": 5, "em": 0.14, "f1": 0.1877, "avg_entropy": 7.1742, "avg_energy_gap": 2.7196, "avg_faiss_overlap": 0.002, "avg_steps": 5.0 }, { "beta": 3.0, "max_iter": 8, "em": 0.14, "f1": 0.1877, "avg_entropy": 7.1742, "avg_energy_gap": 2.7196, "avg_faiss_overlap": 0.002, "avg_steps": 5.0 }, { "beta": 3.0, "max_iter": 15, "em": 0.14, "f1": 0.1877, "avg_entropy": 7.1742, "avg_energy_gap": 2.7196, "avg_faiss_overlap": 0.002, "avg_steps": 5.0 }, { "beta": 5.0, "max_iter": 1, "em": 0.16, "f1": 0.2207, "avg_entropy": 7.1659, "avg_energy_gap": 2.7105, "avg_faiss_overlap": 0.018, "avg_steps": 1.0 }, { "beta": 5.0, "max_iter": 2, "em": 0.14, "f1": 0.1877, "avg_entropy": 7.1661, "avg_energy_gap": 2.7114, "avg_faiss_overlap": 0.002, "avg_steps": 2.0 }, { "beta": 5.0, "max_iter": 3, "em": 0.14, "f1": 0.1877, "avg_entropy": 7.1661, "avg_energy_gap": 2.7114, "avg_faiss_overlap": 0.002, "avg_steps": 3.0 }, { "beta": 5.0, "max_iter": 5, "em": 0.14, "f1": 0.1877, "avg_entropy": 7.1661, "avg_energy_gap": 2.7114, "avg_faiss_overlap": 0.002, "avg_steps": 5.0 }, { "beta": 5.0, "max_iter": 8, "em": 0.14, "f1": 0.1877, "avg_entropy": 7.1661, "avg_energy_gap": 2.7114, "avg_faiss_overlap": 0.002, "avg_steps": 5.0 }, { "beta": 5.0, "max_iter": 15, "em": 0.14, "f1": 0.1877, "avg_entropy": 7.1661, "avg_energy_gap": 2.7114, "avg_faiss_overlap": 0.002, "avg_steps": 5.0 }, { "beta": 8.0, "max_iter": 1, "em": 0.21, "f1": 0.2938, "avg_entropy": 7.1438, "avg_energy_gap": 2.6938, "avg_faiss_overlap": 0.068, "avg_steps": 1.0 }, { "beta": 8.0, "max_iter": 2, "em": 0.12, "f1": 0.1766, "avg_entropy": 7.145, "avg_energy_gap": 2.6972, "avg_faiss_overlap": 0.004, "avg_steps": 2.0 }, { "beta": 8.0, "max_iter": 3, "em": 0.14, "f1": 0.1877, "avg_entropy": 7.1449, "avg_energy_gap": 2.6972, "avg_faiss_overlap": 0.002, "avg_steps": 3.0 }, { "beta": 8.0, "max_iter": 5, "em": 0.14, "f1": 0.1877, "avg_entropy": 7.1448, "avg_energy_gap": 2.6972, "avg_faiss_overlap": 0.002, "avg_steps": 5.0 }, { "beta": 8.0, "max_iter": 8, "em": 0.14, "f1": 0.1877, "avg_entropy": 7.1448, "avg_energy_gap": 2.6972, "avg_faiss_overlap": 0.002, "avg_steps": 7.0 }, { "beta": 8.0, "max_iter": 15, "em": 0.14, "f1": 0.1877, "avg_entropy": 7.1448, "avg_energy_gap": 2.6972, "avg_faiss_overlap": 0.002, "avg_steps": 7.0 } ], "best_config": { "beta": 8.0, "max_iter": 1, "em": 0.21, "f1": 0.2938, "avg_entropy": 7.1438, "avg_energy_gap": 2.6938, "avg_faiss_overlap": 0.068 } }