diff options
| author | YurenHao0426 <blackhao0426@gmail.com> | 2025-12-17 04:29:37 -0600 |
|---|---|---|
| committer | YurenHao0426 <blackhao0426@gmail.com> | 2025-12-17 04:29:37 -0600 |
| commit | e43b3f8aa36c198b95c1e46bea2eaf3893b13dc3 (patch) | |
| tree | 6ce8a00d2f8b9ebd83c894a27ea01ac50cfb2ff5 /scripts/diagnose_oom.py | |
Diffstat (limited to 'scripts/diagnose_oom.py')
| -rw-r--r-- | scripts/diagnose_oom.py | 78 |
1 files changed, 78 insertions, 0 deletions
diff --git a/scripts/diagnose_oom.py b/scripts/diagnose_oom.py new file mode 100644 index 0000000..22de3f9 --- /dev/null +++ b/scripts/diagnose_oom.py @@ -0,0 +1,78 @@ +import torch +from transformers import AutoModel, AutoTokenizer +import os +import psutil +import time +import sys +import gc + +def log_mem(msg): + mem = psutil.Process().memory_info().rss / (1024**3) + if torch.cuda.is_available(): + gpu = torch.cuda.memory_allocated() / (1024**3) + gpu_res = torch.cuda.memory_reserved() / (1024**3) + print(f"[{msg}] RAM: {mem:.2f}GB | GPU Alloc: {gpu:.2f}GB | GPU Res: {gpu_res:.2f}GB") + else: + print(f"[{msg}] RAM: {mem:.2f}GB | GPU: N/A") + sys.stdout.flush() + +def main(): + print("--- Diagnostic Script ---") + log_mem("Start") + + model_path = "models/qwen3-embedding-8b" + print(f"Model path: {model_path}") + + # Check config + import yaml + try: + with open("configs/local_models.yaml", "r") as f: + cfg = yaml.safe_load(f) + print("Config loaded from local_models.yaml:") + print(cfg['models']['embedding']['qwen3']) + except Exception as e: + print(f"Could not load config: {e}") + + # Explicit garbage collection + gc.collect() + torch.cuda.empty_cache() + log_mem("Pre-Load") + + print("Loading Tokenizer...") + tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True, trust_remote_code=False) + log_mem("Tokenizer Loaded") + + print("Loading Model (trust_remote_code=False)...") + try: + # Load with low_cpu_mem_usage=True explicit (though auto/cuda usually does it) + model = AutoModel.from_pretrained( + model_path, + device_map="cuda:0", + torch_dtype=torch.bfloat16, + trust_remote_code=False, + low_cpu_mem_usage=True + ) + print("Model loaded successfully.") + except Exception as e: + print(f"Model load failed: {e}") + return + + log_mem("Model Loaded") + + print("Testing forward pass with small input...") + input_text = "Hello world" + inputs = tokenizer(input_text, return_tensors="pt").to("cuda:0") + + try: + with torch.no_grad(): + outputs = model(**inputs) + print("Forward pass success.") + print(f"Output shape: {outputs.last_hidden_state.shape}") + except Exception as e: + print(f"Forward pass failed: {e}") + + log_mem("End") + +if __name__ == "__main__": + main() + |
