summaryrefslogtreecommitdiff
path: root/scripts/diagnose_oom.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/diagnose_oom.py')
-rw-r--r--scripts/diagnose_oom.py78
1 files changed, 78 insertions, 0 deletions
diff --git a/scripts/diagnose_oom.py b/scripts/diagnose_oom.py
new file mode 100644
index 0000000..22de3f9
--- /dev/null
+++ b/scripts/diagnose_oom.py
@@ -0,0 +1,78 @@
+import torch
+from transformers import AutoModel, AutoTokenizer
+import os
+import psutil
+import time
+import sys
+import gc
+
+def log_mem(msg):
+ mem = psutil.Process().memory_info().rss / (1024**3)
+ if torch.cuda.is_available():
+ gpu = torch.cuda.memory_allocated() / (1024**3)
+ gpu_res = torch.cuda.memory_reserved() / (1024**3)
+ print(f"[{msg}] RAM: {mem:.2f}GB | GPU Alloc: {gpu:.2f}GB | GPU Res: {gpu_res:.2f}GB")
+ else:
+ print(f"[{msg}] RAM: {mem:.2f}GB | GPU: N/A")
+ sys.stdout.flush()
+
+def main():
+ print("--- Diagnostic Script ---")
+ log_mem("Start")
+
+ model_path = "models/qwen3-embedding-8b"
+ print(f"Model path: {model_path}")
+
+ # Check config
+ import yaml
+ try:
+ with open("configs/local_models.yaml", "r") as f:
+ cfg = yaml.safe_load(f)
+ print("Config loaded from local_models.yaml:")
+ print(cfg['models']['embedding']['qwen3'])
+ except Exception as e:
+ print(f"Could not load config: {e}")
+
+ # Explicit garbage collection
+ gc.collect()
+ torch.cuda.empty_cache()
+ log_mem("Pre-Load")
+
+ print("Loading Tokenizer...")
+ tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True, trust_remote_code=False)
+ log_mem("Tokenizer Loaded")
+
+ print("Loading Model (trust_remote_code=False)...")
+ try:
+ # Load with low_cpu_mem_usage=True explicit (though auto/cuda usually does it)
+ model = AutoModel.from_pretrained(
+ model_path,
+ device_map="cuda:0",
+ torch_dtype=torch.bfloat16,
+ trust_remote_code=False,
+ low_cpu_mem_usage=True
+ )
+ print("Model loaded successfully.")
+ except Exception as e:
+ print(f"Model load failed: {e}")
+ return
+
+ log_mem("Model Loaded")
+
+ print("Testing forward pass with small input...")
+ input_text = "Hello world"
+ inputs = tokenizer(input_text, return_tensors="pt").to("cuda:0")
+
+ try:
+ with torch.no_grad():
+ outputs = model(**inputs)
+ print("Forward pass success.")
+ print(f"Output shape: {outputs.last_hidden_state.shape}")
+ except Exception as e:
+ print(f"Forward pass failed: {e}")
+
+ log_mem("End")
+
+if __name__ == "__main__":
+ main()
+