summaryrefslogtreecommitdiff
path: root/scripts/run_baseline.py
diff options
context:
space:
mode:
authorYurenHao0426 <Blackhao0426@gmail.com>2026-02-16 14:44:42 -0600
committerYurenHao0426 <Blackhao0426@gmail.com>2026-02-16 14:44:42 -0600
commit09d50e47860da0035e178a442dc936028808a0b3 (patch)
tree9d651b0c7d289a9a0405953f2da989a3c431f147 /scripts/run_baseline.py
parentc90b48e3f8da9dd0f8d2ae82ddf977436bb0cfc3 (diff)
Add memory centering, grid search experiments, and energy visualizationsHEADmaster
- Add centering support to MemoryBank (center_query, apply_centering, mean persistence in save/load) to remove centroid attractor in Hopfield dynamics - Add center flag to MemoryBankConfig, device field to PipelineConfig - Grid search scripts: initial (β≤8), residual, high-β, and centered grids with dedup-based LLM caching (89-91% call savings) - Energy landscape visualization: 2D contour, 1D profile, UMAP, PCA heatmap comparing centered vs uncentered dynamics - Experiment log (note.md) documenting 4 rounds of results and root cause analysis of centroid attractor problem - Key finding: β_critical ≈ 37.6 for centered memory; best configs beat FAISS baseline by +3-4% F1 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Diffstat (limited to 'scripts/run_baseline.py')
-rw-r--r--scripts/run_baseline.py9
1 files changed, 5 insertions, 4 deletions
diff --git a/scripts/run_baseline.py b/scripts/run_baseline.py
index 74c4710..beef76b 100644
--- a/scripts/run_baseline.py
+++ b/scripts/run_baseline.py
@@ -27,6 +27,7 @@ def main() -> None:
parser.add_argument("--memory-bank", type=str, required=True)
parser.add_argument("--question", type=str, required=True)
parser.add_argument("--top-k", type=int, default=5)
+ parser.add_argument("--device", type=str, default="cpu")
args = parser.parse_args()
with open(args.config) as f:
@@ -44,17 +45,17 @@ def main() -> None:
from hag.config import MemoryBankConfig
mb = MemoryBank(MemoryBankConfig(**cfg.get("memory", {})))
- mb.load(args.memory_bank)
+ mb.load(args.memory_bank) # FAISS needs CPU, load on CPU
# Build FAISS index from memory bank embeddings
import numpy as np
- embeddings_np = mb.embeddings.T.numpy().astype(np.float32) # (N, d)
+ embeddings_np = mb.embeddings.T.cpu().numpy().astype(np.float32) # (N, d)
faiss_ret = FAISSRetriever(top_k=args.top_k)
faiss_ret.build_index(embeddings_np, mb.passages)
- encoder = Encoder(pipeline_config.encoder)
- generator = Generator(pipeline_config.generator)
+ encoder = Encoder(pipeline_config.encoder, device=args.device)
+ generator = Generator(pipeline_config.generator, device=args.device)
pipeline = RAGPipeline(
config=pipeline_config,