summaryrefslogtreecommitdiff
path: root/collaborativeagents/slurm/fullscale/test_local_user_15652698.err
diff options
context:
space:
mode:
Diffstat (limited to 'collaborativeagents/slurm/fullscale/test_local_user_15652698.err')
-rw-r--r--collaborativeagents/slurm/fullscale/test_local_user_15652698.err215
1 files changed, 215 insertions, 0 deletions
diff --git a/collaborativeagents/slurm/fullscale/test_local_user_15652698.err b/collaborativeagents/slurm/fullscale/test_local_user_15652698.err
new file mode 100644
index 0000000..4acc458
--- /dev/null
+++ b/collaborativeagents/slurm/fullscale/test_local_user_15652698.err
@@ -0,0 +1,215 @@
+/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead.
+ warnings.warn(
+/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead.
+ warnings.warn(
+usage: api_server.py [-h] [--headless] [--api-server-count API_SERVER_COUNT]
+ [--config CONFIG] [--host HOST] [--port PORT] [--uds UDS]
+ [--uvicorn-log-level {critical,debug,error,info,trace,warning}]
+ [--disable-uvicorn-access-log | --no-disable-uvicorn-access-log]
+ [--allow-credentials | --no-allow-credentials]
+ [--allowed-origins ALLOWED_ORIGINS]
+ [--allowed-methods ALLOWED_METHODS]
+ [--allowed-headers ALLOWED_HEADERS]
+ [--api-key API_KEY [API_KEY ...]]
+ [--lora-modules LORA_MODULES [LORA_MODULES ...]]
+ [--chat-template CHAT_TEMPLATE]
+ [--chat-template-content-format {auto,openai,string}]
+ [--trust-request-chat-template | --no-trust-request-chat-template]
+ [--response-role RESPONSE_ROLE]
+ [--ssl-keyfile SSL_KEYFILE] [--ssl-certfile SSL_CERTFILE]
+ [--ssl-ca-certs SSL_CA_CERTS]
+ [--enable-ssl-refresh | --no-enable-ssl-refresh]
+ [--ssl-cert-reqs SSL_CERT_REQS] [--root-path ROOT_PATH]
+ [--middleware MIDDLEWARE]
+ [--return-tokens-as-token-ids | --no-return-tokens-as-token-ids]
+ [--disable-frontend-multiprocessing | --no-disable-frontend-multiprocessing]
+ [--enable-request-id-headers | --no-enable-request-id-headers]
+ [--enable-auto-tool-choice | --no-enable-auto-tool-choice]
+ [--exclude-tools-when-tool-choice-none | --no-exclude-tools-when-tool-choice-none]
+ [--tool-call-parser {deepseek_v3,deepseek_v31,deepseek_v32,ernie45,gigachat3,glm45,granite,granite-20b-fc,hermes,hunyuan_a13b,internlm,jamba,kimi_k2,llama3_json,llama4_json,llama4_pythonic,longcat,minimax,minimax_m2,mistral,olmo3,openai,phi4_mini_json,pythonic,qwen3_coder,qwen3_xml,seed_oss,step3,xlam} or name registered in --tool-parser-plugin]
+ [--tool-parser-plugin TOOL_PARSER_PLUGIN]
+ [--tool-server TOOL_SERVER]
+ [--log-config-file LOG_CONFIG_FILE]
+ [--max-log-len MAX_LOG_LEN]
+ [--disable-fastapi-docs | --no-disable-fastapi-docs]
+ [--enable-prompt-tokens-details | --no-enable-prompt-tokens-details]
+ [--enable-server-load-tracking | --no-enable-server-load-tracking]
+ [--enable-force-include-usage | --no-enable-force-include-usage]
+ [--enable-tokenizer-info-endpoint | --no-enable-tokenizer-info-endpoint]
+ [--enable-log-outputs | --no-enable-log-outputs]
+ [--h11-max-incomplete-event-size H11_MAX_INCOMPLETE_EVENT_SIZE]
+ [--h11-max-header-count H11_MAX_HEADER_COUNT]
+ [--log-error-stack | --no-log-error-stack]
+ [--tokens-only | --no-tokens-only] [--model MODEL]
+ [--runner {auto,draft,generate,pooling}]
+ [--convert {auto,classify,embed,none,reward}]
+ [--tokenizer TOKENIZER]
+ [--tokenizer-mode ['auto', 'deepseek_v32', 'hf', 'mistral', 'slow']]
+ [--trust-remote-code | --no-trust-remote-code]
+ [--dtype {auto,bfloat16,float,float16,float32,half}]
+ [--seed SEED] [--hf-config-path HF_CONFIG_PATH]
+ [--allowed-local-media-path ALLOWED_LOCAL_MEDIA_PATH]
+ [--allowed-media-domains ALLOWED_MEDIA_DOMAINS [ALLOWED_MEDIA_DOMAINS ...]]
+ [--revision REVISION] [--code-revision CODE_REVISION]
+ [--tokenizer-revision TOKENIZER_REVISION]
+ [--max-model-len MAX_MODEL_LEN]
+ [--quantization QUANTIZATION]
+ [--enforce-eager | --no-enforce-eager]
+ [--max-logprobs MAX_LOGPROBS]
+ [--logprobs-mode {processed_logits,processed_logprobs,raw_logits,raw_logprobs}]
+ [--disable-sliding-window | --no-disable-sliding-window]
+ [--disable-cascade-attn | --no-disable-cascade-attn]
+ [--skip-tokenizer-init | --no-skip-tokenizer-init]
+ [--enable-prompt-embeds | --no-enable-prompt-embeds]
+ [--served-model-name SERVED_MODEL_NAME [SERVED_MODEL_NAME ...]]
+ [--config-format ['auto', 'hf', 'mistral']]
+ [--hf-token [HF_TOKEN]] [--hf-overrides HF_OVERRIDES]
+ [--pooler-config POOLER_CONFIG]
+ [--logits-processor-pattern LOGITS_PROCESSOR_PATTERN]
+ [--generation-config GENERATION_CONFIG]
+ [--override-generation-config OVERRIDE_GENERATION_CONFIG]
+ [--enable-sleep-mode | --no-enable-sleep-mode]
+ [--model-impl ['auto', 'terratorch', 'transformers', 'vllm']]
+ [--override-attention-dtype OVERRIDE_ATTENTION_DTYPE]
+ [--logits-processors LOGITS_PROCESSORS [LOGITS_PROCESSORS ...]]
+ [--io-processor-plugin IO_PROCESSOR_PLUGIN]
+ [--load-format LOAD_FORMAT] [--download-dir DOWNLOAD_DIR]
+ [--safetensors-load-strategy SAFETENSORS_LOAD_STRATEGY]
+ [--model-loader-extra-config MODEL_LOADER_EXTRA_CONFIG]
+ [--ignore-patterns IGNORE_PATTERNS [IGNORE_PATTERNS ...]]
+ [--use-tqdm-on-load | --no-use-tqdm-on-load]
+ [--pt-load-map-location PT_LOAD_MAP_LOCATION]
+ [--attention-backend ATTENTION_BACKEND]
+ [--reasoning-parser REASONING_PARSER]
+ [--reasoning-parser-plugin REASONING_PARSER_PLUGIN]
+ [--distributed-executor-backend ['external_launcher', 'mp', 'ray', 'uni']]
+ [--pipeline-parallel-size PIPELINE_PARALLEL_SIZE]
+ [--master-addr MASTER_ADDR] [--master-port MASTER_PORT]
+ [--nnodes NNODES] [--node-rank NODE_RANK]
+ [--tensor-parallel-size TENSOR_PARALLEL_SIZE]
+ [--decode-context-parallel-size DECODE_CONTEXT_PARALLEL_SIZE]
+ [--dcp-kv-cache-interleave-size DCP_KV_CACHE_INTERLEAVE_SIZE]
+ [--cp-kv-cache-interleave-size CP_KV_CACHE_INTERLEAVE_SIZE]
+ [--prefill-context-parallel-size PREFILL_CONTEXT_PARALLEL_SIZE]
+ [--data-parallel-size DATA_PARALLEL_SIZE]
+ [--data-parallel-rank DATA_PARALLEL_RANK]
+ [--data-parallel-start-rank DATA_PARALLEL_START_RANK]
+ [--data-parallel-size-local DATA_PARALLEL_SIZE_LOCAL]
+ [--data-parallel-address DATA_PARALLEL_ADDRESS]
+ [--data-parallel-rpc-port DATA_PARALLEL_RPC_PORT]
+ [--data-parallel-backend DATA_PARALLEL_BACKEND]
+ [--data-parallel-hybrid-lb | --no-data-parallel-hybrid-lb | -dph]
+ [--data-parallel-external-lb | --no-data-parallel-external-lb | -dpe]
+ [--enable-expert-parallel | --no-enable-expert-parallel]
+ [--all2all-backend {allgather_reducescatter,deepep_high_throughput,deepep_low_latency,flashinfer_all2allv,naive,pplx,None}]
+ [--enable-dbo | --no-enable-dbo]
+ [--dbo-decode-token-threshold DBO_DECODE_TOKEN_THRESHOLD]
+ [--dbo-prefill-token-threshold DBO_PREFILL_TOKEN_THRESHOLD]
+ [--disable-nccl-for-dp-synchronization | --no-disable-nccl-for-dp-synchronization]
+ [--enable-eplb | --no-enable-eplb]
+ [--eplb-config EPLB_CONFIG]
+ [--expert-placement-strategy {linear,round_robin}]
+ [--max-parallel-loading-workers MAX_PARALLEL_LOADING_WORKERS]
+ [--ray-workers-use-nsight | --no-ray-workers-use-nsight]
+ [--disable-custom-all-reduce | --no-disable-custom-all-reduce]
+ [--worker-cls WORKER_CLS]
+ [--worker-extension-cls WORKER_EXTENSION_CLS]
+ [--block-size {1,8,16,32,64,128,256}]
+ [--gpu-memory-utilization GPU_MEMORY_UTILIZATION]
+ [--kv-cache-memory-bytes KV_CACHE_MEMORY_BYTES]
+ [--swap-space SWAP_SPACE]
+ [--kv-cache-dtype {auto,bfloat16,fp8,fp8_ds_mla,fp8_e4m3,fp8_e5m2,fp8_inc}]
+ [--num-gpu-blocks-override NUM_GPU_BLOCKS_OVERRIDE]
+ [--enable-prefix-caching | --no-enable-prefix-caching]
+ [--prefix-caching-hash-algo {sha256,sha256_cbor,xxhash,xxhash_cbor}]
+ [--cpu-offload-gb CPU_OFFLOAD_GB]
+ [--calculate-kv-scales | --no-calculate-kv-scales]
+ [--kv-sharing-fast-prefill | --no-kv-sharing-fast-prefill]
+ [--mamba-cache-dtype {auto,float16,float32}]
+ [--mamba-ssm-cache-dtype {auto,float16,float32}]
+ [--mamba-block-size MAMBA_BLOCK_SIZE]
+ [--kv-offloading-size KV_OFFLOADING_SIZE]
+ [--kv-offloading-backend {lmcache,native,None}]
+ [--limit-mm-per-prompt LIMIT_MM_PER_PROMPT]
+ [--enable-mm-embeds | --no-enable-mm-embeds]
+ [--media-io-kwargs MEDIA_IO_KWARGS]
+ [--mm-processor-kwargs MM_PROCESSOR_KWARGS]
+ [--mm-processor-cache-gb MM_PROCESSOR_CACHE_GB]
+ [--mm-processor-cache-type {lru,shm}]
+ [--mm-shm-cache-max-object-size-mb MM_SHM_CACHE_MAX_OBJECT_SIZE_MB]
+ [--mm-encoder-tp-mode {data,weights}]
+ [--mm-encoder-attn-backend MM_ENCODER_ATTN_BACKEND]
+ [--interleave-mm-strings | --no-interleave-mm-strings]
+ [--skip-mm-profiling | --no-skip-mm-profiling]
+ [--video-pruning-rate VIDEO_PRUNING_RATE]
+ [--enable-lora | --no-enable-lora]
+ [--max-loras MAX_LORAS]
+ [--max-lora-rank {1,8,16,32,64,128,256,320,512}]
+ [--lora-dtype {auto,bfloat16,float16}]
+ [--max-cpu-loras MAX_CPU_LORAS]
+ [--fully-sharded-loras | --no-fully-sharded-loras]
+ [--default-mm-loras DEFAULT_MM_LORAS]
+ [--show-hidden-metrics-for-version SHOW_HIDDEN_METRICS_FOR_VERSION]
+ [--otlp-traces-endpoint OTLP_TRACES_ENDPOINT]
+ [--collect-detailed-traces {all,model,worker,None} [{all,model,worker,None} ...]]
+ [--kv-cache-metrics | --no-kv-cache-metrics]
+ [--kv-cache-metrics-sample KV_CACHE_METRICS_SAMPLE]
+ [--cudagraph-metrics | --no-cudagraph-metrics]
+ [--enable-layerwise-nvtx-tracing | --no-enable-layerwise-nvtx-tracing]
+ [--max-num-batched-tokens MAX_NUM_BATCHED_TOKENS]
+ [--max-num-seqs MAX_NUM_SEQS]
+ [--max-num-partial-prefills MAX_NUM_PARTIAL_PREFILLS]
+ [--max-long-partial-prefills MAX_LONG_PARTIAL_PREFILLS]
+ [--long-prefill-token-threshold LONG_PREFILL_TOKEN_THRESHOLD]
+ [--scheduling-policy {fcfs,priority}]
+ [--enable-chunked-prefill | --no-enable-chunked-prefill]
+ [--disable-chunked-mm-input | --no-disable-chunked-mm-input]
+ [--scheduler-cls SCHEDULER_CLS]
+ [--disable-hybrid-kv-cache-manager | --no-disable-hybrid-kv-cache-manager]
+ [--async-scheduling | --no-async-scheduling]
+ [--stream-interval STREAM_INTERVAL]
+ [--cudagraph-capture-sizes CUDAGRAPH_CAPTURE_SIZES [CUDAGRAPH_CAPTURE_SIZES ...]]
+ [--max-cudagraph-capture-size MAX_CUDAGRAPH_CAPTURE_SIZE]
+ [--speculative-config SPECULATIVE_CONFIG]
+ [--kv-transfer-config KV_TRANSFER_CONFIG]
+ [--kv-events-config KV_EVENTS_CONFIG]
+ [--ec-transfer-config EC_TRANSFER_CONFIG]
+ [--compilation-config COMPILATION_CONFIG]
+ [--attention-config ATTENTION_CONFIG]
+ [--additional-config ADDITIONAL_CONFIG]
+ [--structured-outputs-config STRUCTURED_OUTPUTS_CONFIG]
+ [--profiler-config PROFILER_CONFIG]
+ [--optimization-level OPTIMIZATION_LEVEL]
+ [--disable-log-stats] [--aggregate-engine-logging]
+ [--enable-log-requests | --no-enable-log-requests]
+ [--disable-log-requests | --no-disable-log-requests]
+ [model_tag]
+api_server.py: error: unrecognized arguments: --guided-decoding-backend
+/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead.
+ warnings.warn(
+(EngineCore_DP0 pid=3603174) Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
+(EngineCore_DP0 pid=3603174) Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:06<00:19, 6.52s/it]
+(EngineCore_DP0 pid=3603174) Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:08<00:07, 3.70s/it]
+(EngineCore_DP0 pid=3603174) Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:15<00:05, 5.28s/it]
+(EngineCore_DP0 pid=3603174) Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:21<00:00, 5.57s/it]
+(EngineCore_DP0 pid=3603174) Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:21<00:00, 5.35s/it]
+(EngineCore_DP0 pid=3603174)
+(EngineCore_DP0 pid=3603174) Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 0%| | 0/51 [00:00<?, ?it/s] Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 4%|▍ | 2/51 [00:00<00:03, 14.83it/s] Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 8%|▊ | 4/51 [00:00<00:03, 15.62it/s] Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 12%|█▏ | 6/51 [00:00<00:02, 16.45it/s] Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 16%|█▌ | 8/51 [00:00<00:02, 16.87it/s] Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 22%|██▏ | 11/51 [00:00<00:02, 18.23it/s] Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 27%|██▋ | 14/51 [00:00<00:01, 19.00it/s] Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 33%|███▎ | 17/51 [00:00<00:01, 19.74it/s] Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 39%|███▉ | 20/51 [00:01<00:01, 20.74it/s] Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 45%|████▌ | 23/51 [00:01<00:01, 21.38it/s] Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 51%|█████ | 26/51 [00:01<00:01, 21.90it/s] Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 57%|█████▋ | 29/51 [00:01<00:01, 20.09it/s] Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 63%|██████▎ | 32/51 [00:01<00:00, 21.04it/s] Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 69%|██████▊ | 35/51 [00:01<00:00, 21.92it/s] Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 75%|███████▍ | 38/51 [00:01<00:00, 22.58it/s] Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 80%|████████ | 41/51 [00:01<00:00, 23.20it/s] Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 86%|████████▋ | 44/51 [00:02<00:00, 23.84it/s] Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 92%|█████████▏| 47/51 [00:02<00:00, 23.47it/s] Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 98%|█████████▊| 50/51 [00:02<00:00, 24.19it/s] Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 100%|██████████| 51/51 [00:02<00:00, 20.91it/s]
+(EngineCore_DP0 pid=3603174) Capturing CUDA graphs (decode, FULL): 0%| | 0/35 [00:00<?, ?it/s] Capturing CUDA graphs (decode, FULL): 3%|▎ | 1/35 [00:00<00:04, 7.51it/s] Capturing CUDA graphs (decode, FULL): 11%|█▏ | 4/35 [00:00<00:01, 16.43it/s] Capturing CUDA graphs (decode, FULL): 20%|██ | 7/35 [00:00<00:01, 19.40it/s] Capturing CUDA graphs (decode, FULL): 29%|██▊ | 10/35 [00:00<00:01, 20.92it/s] Capturing CUDA graphs (decode, FULL): 37%|███▋ | 13/35 [00:00<00:01, 21.93it/s] Capturing CUDA graphs (decode, FULL): 46%|████▌ | 16/35 [00:00<00:00, 22.62it/s] Capturing CUDA graphs (decode, FULL): 54%|█████▍ | 19/35 [00:00<00:00, 23.03it/s] Capturing CUDA graphs (decode, FULL): 63%|██████▎ | 22/35 [00:01<00:00, 23.33it/s] Capturing CUDA graphs (decode, FULL): 71%|███████▏ | 25/35 [00:01<00:00, 23.31it/s] Capturing CUDA graphs (decode, FULL): 80%|████████ | 28/35 [00:01<00:00, 23.67it/s] Capturing CUDA graphs (decode, FULL): 89%|████████▊ | 31/35 [00:01<00:00, 24.00it/s] Capturing CUDA graphs (decode, FULL): 97%|█████████▋| 34/35 [00:01<00:00, 24.21it/s] Capturing CUDA graphs (decode, FULL): 100%|██████████| 35/35 [00:01<00:00, 22.44it/s]
+(APIServer pid=3602630) INFO: Started server process [3602630]
+(APIServer pid=3602630) INFO: Waiting for application startup.
+(APIServer pid=3602630) INFO: Application startup complete.
+/u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead.
+ warnings.warn(
+2026-01-26 12:38:05,935 - INFO - Loaded dataset: math-hard
+2026-01-26 12:38:05,935 - INFO - Profile path configured: ../data/complex_profiles_v2/profiles_200.jsonl
+2026-01-26 12:38:06,001 - INFO - Loaded 200 profiles from ../data/complex_profiles_v2/profiles_200.jsonl
+2026-01-26 12:38:06,003 - INFO - Running method: vanilla
+2026-01-26 12:38:06,004 - INFO - Running profiles 0 to 9 (10 remaining)
+2026-01-26 12:38:06,004 - INFO - Using BATCH processing (local vLLM user) for vanilla
+2026-01-26 12:38:06,006 - INFO - Using local vLLM user simulator: http://localhost:8004/v1
+2026-01-26 12:38:11,230 - INFO - Batch: 10 profiles, 20 sessions remaining
+2026-01-26 12:38:11,240 - INFO - Session round 1/2: 10 total, 6876 sessions/hr
+2026-01-26 12:38:11,248 - INFO - Session round 2/2: 20 total, 13730 sessions/hr
+2026-01-26 12:38:11,996 - INFO - GPU memory freed after vanilla: 0.0GB allocated
+2026-01-26 12:38:12,000 - INFO - Report saved to ../results/test_local_user_20260126_123755/20260126_123805/report.md