`torch_dtype` is deprecated! Use `dtype` instead! Loading checkpoint shards: 0%| | 0/4 [00:00 results["full_session"] = test_full_session() ^^^^^^^^^^^^^^^^^^^ File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/scripts/test_multiturn.py", line 173, in test_full_session adapter.initialize() File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/adapters/personalized_llm_adapter.py", line 87, in initialize self._llm = PersonalizedLLM( ^^^^^^^^^^^^^^^^ File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/src/personalization/serving/personalized_llm.py", line 227, in __init__ self._load_models() File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/src/personalization/serving/personalized_llm.py", line 295, in _load_models self._reranker = Qwen3Reranker( ^^^^^^^^^^^^^^ File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/src/personalization/models/reranker/qwen3_reranker.py", line 26, in __init__ self.model = AutoModelForCausalLM.from_pretrained( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py", line 604, in from_pretrained return model_class.from_pretrained( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/modeling_utils.py", line 277, in _wrapper return func(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^ File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/modeling_utils.py", line 5048, in from_pretrained ) = cls._load_pretrained_model( ^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/modeling_utils.py", line 5468, in _load_pretrained_model _error_msgs, disk_offload_index = load_shard_file(args) ^^^^^^^^^^^^^^^^^^^^^ File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/modeling_utils.py", line 843, in load_shard_file disk_offload_index = _load_state_dict_into_meta_model( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context return func(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^ File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/modeling_utils.py", line 770, in _load_state_dict_into_meta_model _load_parameter_into_model(model, param_name, param.to(param_device)) ^^^^^^^^^^^^^^^^^^^^^^ torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 1 has a total capacity of 39.49 GiB of which 30.31 MiB is free. Including non-PyTorch memory, this process has 39.46 GiB memory in use. Of the allocated memory 38.87 GiB is allocated by PyTorch, and 87.01 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)