"""Safe GPU-2 benchmark wrapper for the a-select speed test. Runs test_aselect_deepdive.py's main() on GPU 2 (shared with japardi2's NV-Embed server) with a HARD allocator cap + a start-time free-memory guard so we can never OOM the neighbour. Forwards all CLI args to the underlying test. Usage: CUDA_VISIBLE_DEVICES=2 MEMFRAC=0.010 python3 bench_gpu2.py --B 1 --T2 80 --T1 2 """ import os, sys, torch, runpy torch.cuda.init() free0, total = torch.cuda.mem_get_info() f0 = free0 / 1024**2 tot = total / 1024**2 print(f"[guard] GPU free at start = {f0:.0f} MiB (of {tot:.0f})", flush=True) MIN_FREE = float(os.environ.get("MINFREE", "1100")) if f0 < MIN_FREE: sys.exit(f"[guard] ABORT: free {f0:.0f} < {MIN_FREE:.0f} MiB — too risky for the neighbour, back off.") frac = float(os.environ.get("MEMFRAC", "0.010")) torch.cuda.set_per_process_memory_fraction(frac) cap = frac * tot print(f"[guard] allocator hard-capped at {cap:.0f} MiB (frac={frac}); leaving >= {f0-cap-700:.0f} MiB headroom after ~700 MiB ctx", flush=True) # forward remaining argv to the test's main() sys.argv = ["test_aselect_deepdive.py"] + sys.argv[1:] try: runpy.run_path("/home/yurenh2/ept/ep_run/test_aselect_deepdive.py", run_name="__main__") finally: free1, _ = torch.cuda.mem_get_info() print(f"[guard] GPU free at end = {free1/1024**2:.0f} MiB; my peak reserved = {torch.cuda.max_memory_reserved()/1024**2:.0f} MiB", flush=True)