summaryrefslogtreecommitdiff
path: root/collaborativeagents/slurm/logs/test_multiturn_14357119.err
diff options
context:
space:
mode:
authorYurenHao0426 <blackhao0426@gmail.com>2026-01-27 09:57:37 -0600
committerYurenHao0426 <blackhao0426@gmail.com>2026-01-27 09:57:37 -0600
commitdc801c07cf38b0c495686463e6ca6f871a64440e (patch)
tree599f03114775921dbc472403c701f4a3a8ea188a /collaborativeagents/slurm/logs/test_multiturn_14357119.err
parente43b3f8aa36c198b95c1e46bea2eaf3893b13dc3 (diff)
Add collaborativeagents module and update gitignore
- Add collaborativeagents subproject with adapters, agents, and evaluation modules - Update .gitignore to exclude large binary files (.whl, .tar), wandb logs, and results Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
Diffstat (limited to 'collaborativeagents/slurm/logs/test_multiturn_14357119.err')
-rw-r--r--collaborativeagents/slurm/logs/test_multiturn_14357119.err46
1 files changed, 46 insertions, 0 deletions
diff --git a/collaborativeagents/slurm/logs/test_multiturn_14357119.err b/collaborativeagents/slurm/logs/test_multiturn_14357119.err
new file mode 100644
index 0000000..1a66fce
--- /dev/null
+++ b/collaborativeagents/slurm/logs/test_multiturn_14357119.err
@@ -0,0 +1,46 @@
+`torch_dtype` is deprecated! Use `dtype` instead!
+ Loading checkpoint shards: 0%| | 0/4 [00:00<?, ?it/s] Loading checkpoint shards: 25%|██▌ | 1/4 [00:04<00:13, 4.50s/it] Loading checkpoint shards: 50%|█████ | 2/4 [00:08<00:08, 4.48s/it] Loading checkpoint shards: 75%|███████▌ | 3/4 [00:13<00:04, 4.29s/it] Loading checkpoint shards: 100%|██████████| 4/4 [00:14<00:00, 3.24s/it] Loading checkpoint shards: 100%|██████████| 4/4 [00:14<00:00, 3.66s/it]
+ Loading checkpoint shards: 0%| | 0/4 [00:00<?, ?it/s] Loading checkpoint shards: 25%|██▌ | 1/4 [00:02<00:06, 2.33s/it] Loading checkpoint shards: 50%|█████ | 2/4 [00:06<00:06, 3.13s/it] Loading checkpoint shards: 75%|███████▌ | 3/4 [00:09<00:03, 3.45s/it] Loading checkpoint shards: 100%|██████████| 4/4 [00:10<00:00, 2.16s/it] Loading checkpoint shards: 100%|██████████| 4/4 [00:10<00:00, 2.51s/it]
+ Loading checkpoint shards: 0%| | 0/5 [00:00<?, ?it/s] Loading checkpoint shards: 20%|██ | 1/5 [00:04<00:17, 4.31s/it] Loading checkpoint shards: 40%|████ | 2/5 [00:08<00:12, 4.11s/it] Loading checkpoint shards: 60%|██████ | 3/5 [00:12<00:08, 4.30s/it] Loading checkpoint shards: 80%|████████ | 4/5 [00:15<00:03, 3.70s/it] Loading checkpoint shards: 100%|██████████| 5/5 [00:16<00:00, 2.69s/it] Loading checkpoint shards: 100%|██████████| 5/5 [00:16<00:00, 3.30s/it]
+ Loading checkpoint shards: 0%| | 0/4 [00:00<?, ?it/s] Loading checkpoint shards: 25%|██▌ | 1/4 [00:04<00:12, 4.31s/it] Loading checkpoint shards: 50%|█████ | 2/4 [00:09<00:10, 5.03s/it] Loading checkpoint shards: 75%|███████▌ | 3/4 [00:14<00:04, 4.67s/it] Loading checkpoint shards: 100%|██████████| 4/4 [00:16<00:00, 3.74s/it] Loading checkpoint shards: 100%|██████████| 4/4 [00:16<00:00, 4.10s/it]
+ Loading checkpoint shards: 0%| | 0/4 [00:00<?, ?it/s] Loading checkpoint shards: 25%|██▌ | 1/4 [00:02<00:08, 2.88s/it] Loading checkpoint shards: 50%|█████ | 2/4 [00:07<00:07, 3.62s/it] Loading checkpoint shards: 75%|███████▌ | 3/4 [00:10<00:03, 3.74s/it] Loading checkpoint shards: 100%|██████████| 4/4 [00:11<00:00, 2.37s/it] Loading checkpoint shards: 100%|██████████| 4/4 [00:11<00:00, 2.80s/it]
+ Loading checkpoint shards: 0%| | 0/5 [00:00<?, ?it/s] Loading checkpoint shards: 20%|██ | 1/5 [00:04<00:18, 4.50s/it] Loading checkpoint shards: 40%|████ | 2/5 [00:07<00:11, 3.86s/it] Loading checkpoint shards: 60%|██████ | 3/5 [00:12<00:08, 4.03s/it] Loading checkpoint shards: 60%|██████ | 3/5 [00:14<00:09, 4.82s/it]
+Traceback (most recent call last):
+ File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/scripts/test_multiturn.py", line 227, in <module>
+ results["full_session"] = test_full_session()
+ ^^^^^^^^^^^^^^^^^^^
+ File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/scripts/test_multiturn.py", line 173, in test_full_session
+ adapter.initialize()
+ File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/adapters/personalized_llm_adapter.py", line 87, in initialize
+ self._llm = PersonalizedLLM(
+ ^^^^^^^^^^^^^^^^
+ File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/src/personalization/serving/personalized_llm.py", line 227, in __init__
+ self._load_models()
+ File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/src/personalization/serving/personalized_llm.py", line 295, in _load_models
+ self._reranker = Qwen3Reranker(
+ ^^^^^^^^^^^^^^
+ File "/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/src/personalization/models/reranker/qwen3_reranker.py", line 26, in __init__
+ self.model = AutoModelForCausalLM.from_pretrained(
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py", line 604, in from_pretrained
+ return model_class.from_pretrained(
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/modeling_utils.py", line 277, in _wrapper
+ return func(*args, **kwargs)
+ ^^^^^^^^^^^^^^^^^^^^^
+ File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/modeling_utils.py", line 5048, in from_pretrained
+ ) = cls._load_pretrained_model(
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/modeling_utils.py", line 5468, in _load_pretrained_model
+ _error_msgs, disk_offload_index = load_shard_file(args)
+ ^^^^^^^^^^^^^^^^^^^^^
+ File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/modeling_utils.py", line 843, in load_shard_file
+ disk_offload_index = _load_state_dict_into_meta_model(
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context
+ return func(*args, **kwargs)
+ ^^^^^^^^^^^^^^^^^^^^^
+ File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/modeling_utils.py", line 770, in _load_state_dict_into_meta_model
+ _load_parameter_into_model(model, param_name, param.to(param_device))
+ ^^^^^^^^^^^^^^^^^^^^^^
+torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 32.00 MiB. GPU 1 has a total capacity of 39.49 GiB of which 30.31 MiB is free. Including non-PyTorch memory, this process has 39.46 GiB memory in use. Of the allocated memory 38.87 GiB is allocated by PyTorch, and 87.01 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)