1 files changed, 215 insertions, 0 deletions
diff --git a/collaborativeagents/slurm/fullscale/test_local_user_15652698.err b/collaborativeagents/slurm/fullscale/test_local_user_15652698.err
new file mode 100644
index 0000000..4acc458
--- /dev/null
+++ b/collaborativeagents/slurm/fullscale/test_local_user_15652698.err
@@ -0,0 +1,215 @@
+/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead.
+  warnings.warn(
+/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead.
+  warnings.warn(
+usage: api_server.py [-h] [--headless] [--api-server-count API_SERVER_COUNT]
+                     [--config CONFIG] [--host HOST] [--port PORT] [--uds UDS]
+                     [--uvicorn-log-level {critical,debug,error,info,trace,warning}]
+                     [--disable-uvicorn-access-log | --no-disable-uvicorn-access-log]
+                     [--allow-credentials | --no-allow-credentials]
+                     [--allowed-origins ALLOWED_ORIGINS]
+                     [--allowed-methods ALLOWED_METHODS]
+                     [--allowed-headers ALLOWED_HEADERS]
+                     [--api-key API_KEY [API_KEY ...]]
+                     [--lora-modules LORA_MODULES [LORA_MODULES ...]]
+                     [--chat-template CHAT_TEMPLATE]
+                     [--chat-template-content-format {auto,openai,string}]
+                     [--trust-request-chat-template | --no-trust-request-chat-template]
+                     [--response-role RESPONSE_ROLE]
+                     [--ssl-keyfile SSL_KEYFILE] [--ssl-certfile SSL_CERTFILE]
+                     [--ssl-ca-certs SSL_CA_CERTS]
+                     [--enable-ssl-refresh | --no-enable-ssl-refresh]
+                     [--ssl-cert-reqs SSL_CERT_REQS] [--root-path ROOT_PATH]
+                     [--middleware MIDDLEWARE]
+                     [--return-tokens-as-token-ids | --no-return-tokens-as-token-ids]
+                     [--disable-frontend-multiprocessing | --no-disable-frontend-multiprocessing]
+                     [--enable-request-id-headers | --no-enable-request-id-headers]
+                     [--enable-auto-tool-choice | --no-enable-auto-tool-choice]
+                     [--exclude-tools-when-tool-choice-none | --no-exclude-tools-when-tool-choice-none]
+                     [--tool-call-parser {deepseek_v3,deepseek_v31,deepseek_v32,ernie45,gigachat3,glm45,granite,granite-20b-fc,hermes,hunyuan_a13b,internlm,jamba,kimi_k2,llama3_json,llama4_json,llama4_pythonic,longcat,minimax,minimax_m2,mistral,olmo3,openai,phi4_mini_json,pythonic,qwen3_coder,qwen3_xml,seed_oss,step3,xlam} or name registered in --tool-parser-plugin]
+                     [--tool-parser-plugin TOOL_PARSER_PLUGIN]
+                     [--tool-server TOOL_SERVER]
+                     [--log-config-file LOG_CONFIG_FILE]
+                     [--max-log-len MAX_LOG_LEN]
+                     [--disable-fastapi-docs | --no-disable-fastapi-docs]
+                     [--enable-prompt-tokens-details | --no-enable-prompt-tokens-details]
+                     [--enable-server-load-tracking | --no-enable-server-load-tracking]
+                     [--enable-force-include-usage | --no-enable-force-include-usage]
+                     [--enable-tokenizer-info-endpoint | --no-enable-tokenizer-info-endpoint]
+                     [--enable-log-outputs | --no-enable-log-outputs]
+                     [--h11-max-incomplete-event-size H11_MAX_INCOMPLETE_EVENT_SIZE]
+                     [--h11-max-header-count H11_MAX_HEADER_COUNT]
+                     [--log-error-stack | --no-log-error-stack]
+                     [--tokens-only | --no-tokens-only] [--model MODEL]
+                     [--runner {auto,draft,generate,pooling}]
+                     [--convert {auto,classify,embed,none,reward}]
+                     [--tokenizer TOKENIZER]
+                     [--tokenizer-mode ['auto', 'deepseek_v32', 'hf', 'mistral', 'slow']]
+                     [--trust-remote-code | --no-trust-remote-code]
+                     [--dtype {auto,bfloat16,float,float16,float32,half}]
+                     [--seed SEED] [--hf-config-path HF_CONFIG_PATH]
+                     [--allowed-local-media-path ALLOWED_LOCAL_MEDIA_PATH]
+                     [--allowed-media-domains ALLOWED_MEDIA_DOMAINS [ALLOWED_MEDIA_DOMAINS ...]]
+                     [--revision REVISION] [--code-revision CODE_REVISION]
+                     [--tokenizer-revision TOKENIZER_REVISION]
+                     [--max-model-len MAX_MODEL_LEN]
+                     [--quantization QUANTIZATION]
+                     [--enforce-eager | --no-enforce-eager]
+                     [--max-logprobs MAX_LOGPROBS]
+                     [--logprobs-mode {processed_logits,processed_logprobs,raw_logits,raw_logprobs}]
+                     [--disable-sliding-window | --no-disable-sliding-window]
+                     [--disable-cascade-attn | --no-disable-cascade-attn]
+                     [--skip-tokenizer-init | --no-skip-tokenizer-init]
+                     [--enable-prompt-embeds | --no-enable-prompt-embeds]
+                     [--served-model-name SERVED_MODEL_NAME [SERVED_MODEL_NAME ...]]
+                     [--config-format ['auto', 'hf', 'mistral']]
+                     [--hf-token [HF_TOKEN]] [--hf-overrides HF_OVERRIDES]
+                     [--pooler-config POOLER_CONFIG]
+                     [--logits-processor-pattern LOGITS_PROCESSOR_PATTERN]
+                     [--generation-config GENERATION_CONFIG]
+                     [--override-generation-config OVERRIDE_GENERATION_CONFIG]
+                     [--enable-sleep-mode | --no-enable-sleep-mode]
+                     [--model-impl ['auto', 'terratorch', 'transformers', 'vllm']]
+                     [--override-attention-dtype OVERRIDE_ATTENTION_DTYPE]
+                     [--logits-processors LOGITS_PROCESSORS [LOGITS_PROCESSORS ...]]
+                     [--io-processor-plugin IO_PROCESSOR_PLUGIN]
+                     [--load-format LOAD_FORMAT] [--download-dir DOWNLOAD_DIR]
+                     [--safetensors-load-strategy SAFETENSORS_LOAD_STRATEGY]
+                     [--model-loader-extra-config MODEL_LOADER_EXTRA_CONFIG]
+                     [--ignore-patterns IGNORE_PATTERNS [IGNORE_PATTERNS ...]]
+                     [--use-tqdm-on-load | --no-use-tqdm-on-load]
+                     [--pt-load-map-location PT_LOAD_MAP_LOCATION]
+                     [--attention-backend ATTENTION_BACKEND]
+                     [--reasoning-parser REASONING_PARSER]
+                     [--reasoning-parser-plugin REASONING_PARSER_PLUGIN]
+                     [--distributed-executor-backend ['external_launcher', 'mp', 'ray', 'uni']]
+                     [--pipeline-parallel-size PIPELINE_PARALLEL_SIZE]
+                     [--master-addr MASTER_ADDR] [--master-port MASTER_PORT]
+                     [--nnodes NNODES] [--node-rank NODE_RANK]
+                     [--tensor-parallel-size TENSOR_PARALLEL_SIZE]
+                     [--decode-context-parallel-size DECODE_CONTEXT_PARALLEL_SIZE]
+                     [--dcp-kv-cache-interleave-size DCP_KV_CACHE_INTERLEAVE_SIZE]
+                     [--cp-kv-cache-interleave-size CP_KV_CACHE_INTERLEAVE_SIZE]
+                     [--prefill-context-parallel-size PREFILL_CONTEXT_PARALLEL_SIZE]
+                     [--data-parallel-size DATA_PARALLEL_SIZE]
+                     [--data-parallel-rank DATA_PARALLEL_RANK]
+                     [--data-parallel-start-rank DATA_PARALLEL_START_RANK]
+                     [--data-parallel-size-local DATA_PARALLEL_SIZE_LOCAL]
+                     [--data-parallel-address DATA_PARALLEL_ADDRESS]
+                     [--data-parallel-rpc-port DATA_PARALLEL_RPC_PORT]
+                     [--data-parallel-backend DATA_PARALLEL_BACKEND]
+                     [--data-parallel-hybrid-lb | --no-data-parallel-hybrid-lb | -dph]
+                     [--data-parallel-external-lb | --no-data-parallel-external-lb | -dpe]
+                     [--enable-expert-parallel | --no-enable-expert-parallel]
+                     [--all2all-backend {allgather_reducescatter,deepep_high_throughput,deepep_low_latency,flashinfer_all2allv,naive,pplx,None}]
+                     [--enable-dbo | --no-enable-dbo]
+                     [--dbo-decode-token-threshold DBO_DECODE_TOKEN_THRESHOLD]
+                     [--dbo-prefill-token-threshold DBO_PREFILL_TOKEN_THRESHOLD]
+                     [--disable-nccl-for-dp-synchronization | --no-disable-nccl-for-dp-synchronization]
+                     [--enable-eplb | --no-enable-eplb]
+                     [--eplb-config EPLB_CONFIG]
+                     [--expert-placement-strategy {linear,round_robin}]
+                     [--max-parallel-loading-workers MAX_PARALLEL_LOADING_WORKERS]
+                     [--ray-workers-use-nsight | --no-ray-workers-use-nsight]
+                     [--disable-custom-all-reduce | --no-disable-custom-all-reduce]
+                     [--worker-cls WORKER_CLS]
+                     [--worker-extension-cls WORKER_EXTENSION_CLS]
+                     [--block-size {1,8,16,32,64,128,256}]
+                     [--gpu-memory-utilization GPU_MEMORY_UTILIZATION]
+                     [--kv-cache-memory-bytes KV_CACHE_MEMORY_BYTES]
+                     [--swap-space SWAP_SPACE]
+                     [--kv-cache-dtype {auto,bfloat16,fp8,fp8_ds_mla,fp8_e4m3,fp8_e5m2,fp8_inc}]
+                     [--num-gpu-blocks-override NUM_GPU_BLOCKS_OVERRIDE]
+                     [--enable-prefix-caching | --no-enable-prefix-caching]
+                     [--prefix-caching-hash-algo {sha256,sha256_cbor,xxhash,xxhash_cbor}]
+                     [--cpu-offload-gb CPU_OFFLOAD_GB]
+                     [--calculate-kv-scales | --no-calculate-kv-scales]
+                     [--kv-sharing-fast-prefill | --no-kv-sharing-fast-prefill]
+                     [--mamba-cache-dtype {auto,float16,float32}]
+                     [--mamba-ssm-cache-dtype {auto,float16,float32}]
+                     [--mamba-block-size MAMBA_BLOCK_SIZE]
+                     [--kv-offloading-size KV_OFFLOADING_SIZE]
+                     [--kv-offloading-backend {lmcache,native,None}]
+                     [--limit-mm-per-prompt LIMIT_MM_PER_PROMPT]
+                     [--enable-mm-embeds | --no-enable-mm-embeds]
+                     [--media-io-kwargs MEDIA_IO_KWARGS]
+                     [--mm-processor-kwargs MM_PROCESSOR_KWARGS]
+                     [--mm-processor-cache-gb MM_PROCESSOR_CACHE_GB]
+                     [--mm-processor-cache-type {lru,shm}]
+                     [--mm-shm-cache-max-object-size-mb MM_SHM_CACHE_MAX_OBJECT_SIZE_MB]
+                     [--mm-encoder-tp-mode {data,weights}]
+                     [--mm-encoder-attn-backend MM_ENCODER_ATTN_BACKEND]
+                     [--interleave-mm-strings | --no-interleave-mm-strings]
+                     [--skip-mm-profiling | --no-skip-mm-profiling]
+                     [--video-pruning-rate VIDEO_PRUNING_RATE]
+                     [--enable-lora | --no-enable-lora]
+                     [--max-loras MAX_LORAS]
+                     [--max-lora-rank {1,8,16,32,64,128,256,320,512}]
+                     [--lora-dtype {auto,bfloat16,float16}]
+                     [--max-cpu-loras MAX_CPU_LORAS]
+                     [--fully-sharded-loras | --no-fully-sharded-loras]
+                     [--default-mm-loras DEFAULT_MM_LORAS]
+                     [--show-hidden-metrics-for-version SHOW_HIDDEN_METRICS_FOR_VERSION]
+                     [--otlp-traces-endpoint OTLP_TRACES_ENDPOINT]
+                     [--collect-detailed-traces {all,model,worker,None} [{all,model,worker,None} ...]]
+                     [--kv-cache-metrics | --no-kv-cache-metrics]
+                     [--kv-cache-metrics-sample KV_CACHE_METRICS_SAMPLE]
+                     [--cudagraph-metrics | --no-cudagraph-metrics]
+                     [--enable-layerwise-nvtx-tracing | --no-enable-layerwise-nvtx-tracing]
+                     [--max-num-batched-tokens MAX_NUM_BATCHED_TOKENS]
+                     [--max-num-seqs MAX_NUM_SEQS]
+                     [--max-num-partial-prefills MAX_NUM_PARTIAL_PREFILLS]
+                     [--max-long-partial-prefills MAX_LONG_PARTIAL_PREFILLS]
+                     [--long-prefill-token-threshold LONG_PREFILL_TOKEN_THRESHOLD]
+                     [--scheduling-policy {fcfs,priority}]
+                     [--enable-chunked-prefill | --no-enable-chunked-prefill]
+                     [--disable-chunked-mm-input | --no-disable-chunked-mm-input]
+                     [--scheduler-cls SCHEDULER_CLS]
+                     [--disable-hybrid-kv-cache-manager | --no-disable-hybrid-kv-cache-manager]
+                     [--async-scheduling | --no-async-scheduling]
+                     [--stream-interval STREAM_INTERVAL]
+                     [--cudagraph-capture-sizes CUDAGRAPH_CAPTURE_SIZES [CUDAGRAPH_CAPTURE_SIZES ...]]
+                     [--max-cudagraph-capture-size MAX_CUDAGRAPH_CAPTURE_SIZE]
+                     [--speculative-config SPECULATIVE_CONFIG]
+                     [--kv-transfer-config KV_TRANSFER_CONFIG]
+                     [--kv-events-config KV_EVENTS_CONFIG]
+                     [--ec-transfer-config EC_TRANSFER_CONFIG]
+                     [--compilation-config COMPILATION_CONFIG]
+                     [--attention-config ATTENTION_CONFIG]
+                     [--additional-config ADDITIONAL_CONFIG]
+                     [--structured-outputs-config STRUCTURED_OUTPUTS_CONFIG]
+                     [--profiler-config PROFILER_CONFIG]
+                     [--optimization-level OPTIMIZATION_LEVEL]
+                     [--disable-log-stats] [--aggregate-engine-logging]
+                     [--enable-log-requests | --no-enable-log-requests]
+                     [--disable-log-requests | --no-disable-log-requests]
+                     [model_tag]
+api_server.py: error: unrecognized arguments: --guided-decoding-backend
+/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead.
+  warnings.warn(
+[0;36m(EngineCore_DP0 pid=3603174)[0;0m 
+Loading safetensors checkpoint shards:   0% Completed | 0/4 [00:00<?, ?it/s]
+[0;36m(EngineCore_DP0 pid=3603174)[0;0m 
+Loading safetensors checkpoint shards:  25% Completed | 1/4 [00:06<00:19,  6.52s/it]
+[0;36m(EngineCore_DP0 pid=3603174)[0;0m 
+Loading safetensors checkpoint shards:  50% Completed | 2/4 [00:08<00:07,  3.70s/it]
+[0;36m(EngineCore_DP0 pid=3603174)[0;0m 
+Loading safetensors checkpoint shards:  75% Completed | 3/4 [00:15<00:05,  5.28s/it]
+[0;36m(EngineCore_DP0 pid=3603174)[0;0m 
+Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:21<00:00,  5.57s/it]
+[0;36m(EngineCore_DP0 pid=3603174)[0;0m 
+Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:21<00:00,  5.35s/it]
+[0;36m(EngineCore_DP0 pid=3603174)[0;0m 
+[0;36m(EngineCore_DP0 pid=3603174)[0;0m 
+Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):   0%|          | 0/51 [00:00<?, ?it/s]
+Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):   4%|▍         | 2/51 [00:00<00:03, 14.83it/s]
+Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):   8%|▊         | 4/51 [00:00<00:03, 15.62it/s]
+Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  12%|█▏        | 6/51 [00:00<00:02, 16.45it/s]
+Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  16%|█▌        | 8/51 [00:00<00:02, 16.87it/s]
+Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  22%|██▏       | 11/51 [00:00<00:02, 18.23it/s]
+Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  27%|██▋       | 14/51 [00:00<00:01, 19.00it/s]
+Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  33%|███▎      | 17/51 [00:00<00:01, 19.74it/s]
+Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  39%|███▉      | 20/51 [00:01<00:01, 20.74it/s]
+Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  45%|████▌     | 23/51 [00:01<00:01, 21.38it/s]
+Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  51%|█████     | 26/51 [00:01<00:01, 21.90it/s]
+Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  57%|█████▋    | 29/51 [00:01<00:01, 20.09it/s]
+Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  63%|██████▎   | 32/51 [00:01<00:00, 21.04it/s]
+Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  69%|██████▊   | 35/51 [00:01<00:00, 21.92it/s]
+Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  75%|███████▍  | 38/51 [00:01<00:00, 22.58it/s]
+Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  80%|████████  | 41/51 [00:01<00:00, 23.20it/s]
+Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  86%|████████▋ | 44/51 [00:02<00:00, 23.84it/s]
+Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  92%|█████████▏| 47/51 [00:02<00:00, 23.47it/s]
+Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  98%|█████████▊| 50/51 [00:02<00:00, 24.19it/s]
+Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 100%|██████████| 51/51 [00:02<00:00, 20.91it/s]
+[0;36m(EngineCore_DP0 pid=3603174)[0;0m 
+Capturing CUDA graphs (decode, FULL):   0%|          | 0/35 [00:00<?, ?it/s]
+Capturing CUDA graphs (decode, FULL):   3%|▎         | 1/35 [00:00<00:04,  7.51it/s]
+Capturing CUDA graphs (decode, FULL):  11%|█▏        | 4/35 [00:00<00:01, 16.43it/s]
+Capturing CUDA graphs (decode, FULL):  20%|██        | 7/35 [00:00<00:01, 19.40it/s]
+Capturing CUDA graphs (decode, FULL):  29%|██▊       | 10/35 [00:00<00:01, 20.92it/s]
+Capturing CUDA graphs (decode, FULL):  37%|███▋      | 13/35 [00:00<00:01, 21.93it/s]
+Capturing CUDA graphs (decode, FULL):  46%|████▌     | 16/35 [00:00<00:00, 22.62it/s]
+Capturing CUDA graphs (decode, FULL):  54%|█████▍    | 19/35 [00:00<00:00, 23.03it/s]
+Capturing CUDA graphs (decode, FULL):  63%|██████▎   | 22/35 [00:01<00:00, 23.33it/s]
+Capturing CUDA graphs (decode, FULL):  71%|███████▏  | 25/35 [00:01<00:00, 23.31it/s]
+Capturing CUDA graphs (decode, FULL):  80%|████████  | 28/35 [00:01<00:00, 23.67it/s]
+Capturing CUDA graphs (decode, FULL):  89%|████████▊ | 31/35 [00:01<00:00, 24.00it/s]
+Capturing CUDA graphs (decode, FULL):  97%|█████████▋| 34/35 [00:01<00:00, 24.21it/s]
+Capturing CUDA graphs (decode, FULL): 100%|██████████| 35/35 [00:01<00:00, 22.44it/s]
+[0;36m(APIServer pid=3602630)[0;0m INFO:     Started server process [3602630]
+[0;36m(APIServer pid=3602630)[0;0m INFO:     Waiting for application startup.
+[0;36m(APIServer pid=3602630)[0;0m INFO:     Application startup complete.
+/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead.
+  warnings.warn(
+2026-01-26 12:38:05,935 - INFO - Loaded dataset: math-hard
+2026-01-26 12:38:05,935 - INFO - Profile path configured: ../data/complex_profiles_v2/profiles_200.jsonl
+2026-01-26 12:38:06,001 - INFO - Loaded 200 profiles from ../data/complex_profiles_v2/profiles_200.jsonl
+2026-01-26 12:38:06,003 - INFO - Running method: vanilla
+2026-01-26 12:38:06,004 - INFO -   Running profiles 0 to 9 (10 remaining)
+2026-01-26 12:38:06,004 - INFO -   Using BATCH processing (local vLLM user) for vanilla
+2026-01-26 12:38:06,006 - INFO -   Using local vLLM user simulator: http://localhost:8004/v1
+2026-01-26 12:38:11,230 - INFO -   Batch: 10 profiles, 20 sessions remaining
+2026-01-26 12:38:11,240 - INFO -   Session round 1/2: 10 total, 6876 sessions/hr
+2026-01-26 12:38:11,248 - INFO -   Session round 2/2: 20 total, 13730 sessions/hr
+2026-01-26 12:38:11,996 - INFO -   GPU memory freed after vanilla: 0.0GB allocated
+2026-01-26 12:38:12,000 - INFO - Report saved to ../results/test_local_user_20260126_123755/20260126_123805/report.md