From dc801c07cf38b0c495686463e6ca6f871a64440e Mon Sep 17 00:00:00 2001 From: YurenHao0426 Date: Tue, 27 Jan 2026 09:57:37 -0600 Subject: Add collaborativeagents module and update gitignore - Add collaborativeagents subproject with adapters, agents, and evaluation modules - Update .gitignore to exclude large binary files (.whl, .tar), wandb logs, and results Co-Authored-By: Claude Opus 4.5 --- .../slurm/logs/vllm_bench_70b_8b_14367370.err | 180 +++++++++++++++++++++ 1 file changed, 180 insertions(+) create mode 100644 collaborativeagents/slurm/logs/vllm_bench_70b_8b_14367370.err (limited to 'collaborativeagents/slurm/logs/vllm_bench_70b_8b_14367370.err') diff --git a/collaborativeagents/slurm/logs/vllm_bench_70b_8b_14367370.err b/collaborativeagents/slurm/logs/vllm_bench_70b_8b_14367370.err new file mode 100644 index 0000000..91e66ef --- /dev/null +++ b/collaborativeagents/slurm/logs/vllm_bench_70b_8b_14367370.err @@ -0,0 +1,180 @@ + Fetching 19 files: 0%| | 0/19 [00:00", line 198, in _run_module_as_main +(APIServer pid=3643829) File "", line 88, in _run_code +(APIServer pid=3643829) File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/vllm/entrypoints/openai/api_server.py", line 1469, in +(APIServer pid=3643829) uvloop.run(run_server(args)) +(APIServer pid=3643829) File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/uvloop/__init__.py", line 92, in run +(APIServer pid=3643829) return runner.run(wrapper()) +(APIServer pid=3643829) ^^^^^^^^^^^^^^^^^^^^^ +(APIServer pid=3643829) File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/asyncio/runners.py", line 118, in run +(APIServer pid=3643829) return self._loop.run_until_complete(task) +(APIServer pid=3643829) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +(APIServer pid=3643829) File "uvloop/loop.pyx", line 1518, in uvloop.loop.Loop.run_until_complete +(APIServer pid=3643829) File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/uvloop/__init__.py", line 48, in wrapper +(APIServer pid=3643829) return await main +(APIServer pid=3643829) ^^^^^^^^^^ +(APIServer pid=3643829) File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/vllm/entrypoints/openai/api_server.py", line 1398, in run_server +(APIServer pid=3643829) await run_server_worker(listen_address, sock, args, **uvicorn_kwargs) +(APIServer pid=3643829) File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/vllm/entrypoints/openai/api_server.py", line 1417, in run_server_worker +(APIServer pid=3643829) async with build_async_engine_client( +(APIServer pid=3643829) File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/contextlib.py", line 210, in __aenter__ +(APIServer pid=3643829) return await anext(self.gen) +(APIServer pid=3643829) ^^^^^^^^^^^^^^^^^^^^^ +(APIServer pid=3643829) File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/vllm/entrypoints/openai/api_server.py", line 172, in build_async_engine_client +(APIServer pid=3643829) async with build_async_engine_client_from_engine_args( +(APIServer pid=3643829) File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/contextlib.py", line 210, in __aenter__ +(APIServer pid=3643829) return await anext(self.gen) +(APIServer pid=3643829) ^^^^^^^^^^^^^^^^^^^^^ +(APIServer pid=3643829) File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/vllm/entrypoints/openai/api_server.py", line 213, in build_async_engine_client_from_engine_args +(APIServer pid=3643829) async_llm = AsyncLLM.from_vllm_config( +(APIServer pid=3643829) ^^^^^^^^^^^^^^^^^^^^^^^^^^ +(APIServer pid=3643829) File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/vllm/v1/engine/async_llm.py", line 215, in from_vllm_config +(APIServer pid=3643829) return cls( +(APIServer pid=3643829) ^^^^ +(APIServer pid=3643829) File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/vllm/v1/engine/async_llm.py", line 134, in __init__ +(APIServer pid=3643829) self.engine_core = EngineCoreClient.make_async_mp_client( +(APIServer pid=3643829) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +(APIServer pid=3643829) File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/vllm/v1/engine/core_client.py", line 121, in make_async_mp_client +(APIServer pid=3643829) return AsyncMPClient(*client_args) +(APIServer pid=3643829) ^^^^^^^^^^^^^^^^^^^^^^^^^^^ +(APIServer pid=3643829) File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/vllm/v1/engine/core_client.py", line 820, in __init__ +(APIServer pid=3643829) super().__init__( +(APIServer pid=3643829) File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/vllm/v1/engine/core_client.py", line 477, in __init__ +(APIServer pid=3643829) with launch_core_engines(vllm_config, executor_class, log_stats) as ( +(APIServer pid=3643829) File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/contextlib.py", line 144, in __exit__ +(APIServer pid=3643829) next(self.gen) +(APIServer pid=3643829) File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/vllm/v1/engine/utils.py", line 903, in launch_core_engines +(APIServer pid=3643829) wait_for_engine_startup( +(APIServer pid=3643829) File "/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/vllm/v1/engine/utils.py", line 960, in wait_for_engine_startup +(APIServer pid=3643829) raise RuntimeError( +(APIServer pid=3643829) RuntimeError: Engine core initialization failed. See root cause above. Failed core proc(s): {} +[2025-12-29T07:04:21.056] error: *** JOB 14367370 ON gpua051 CANCELLED AT 2025-12-29T07:04:21 DUE to SIGNAL Terminated *** -- cgit v1.2.3