summaryrefslogtreecommitdiff
path: root/collaborativeagents/training/grpo_verl/outputs/2026-01-11/03-50-42/.hydra/config.yaml
blob: 7af183b3cf88256c39d75a433654e7191e0eb220 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
actor_rollout_ref:
  actor:
    optim:
      _target_: verl.workers.config.FSDPOptimizerConfig
      optimizer: AdamW
      optimizer_impl: torch.optim
      lr: 1.0e-06
      lr_warmup_steps_ratio: 0.0
      total_training_steps: -1
      weight_decay: 0.01
      lr_warmup_steps: -1
      betas:
      - 0.9
      - 0.999
      clip_grad: 1.0
      min_lr_ratio: 0.0
      num_cycles: 0.5
      lr_scheduler_type: constant
      warmup_style: null
      override_optimizer_config: null
    fsdp_config:
      _target_: verl.workers.config.FSDPEngineConfig
      wrap_policy:
        min_num_params: 0
      param_offload: false
      optimizer_offload: false
      offload_policy: false
      reshard_after_forward: true
      fsdp_size: -1
      forward_prefetch: false
      model_dtype: bfloat16
      use_orig_params: false
      seed: 42
      full_determinism: false
      ulysses_sequence_parallel_size: 1
      entropy_from_logits_with_chunking: false
      use_torch_compile: true
      entropy_checkpointing: false
      forward_only: false
      strategy: fsdp
      dtype: bfloat16
    _target_: verl.workers.config.FSDPActorConfig
    rollout_n: ${oc.select:actor_rollout_ref.rollout.n,1}
    strategy: fsdp
    ppo_mini_batch_size: 8
    ppo_micro_batch_size: null
    ppo_micro_batch_size_per_gpu: 4
    use_dynamic_bsz: false
    ppo_max_token_len_per_gpu: 16384
    clip_ratio: 0.2
    clip_ratio_low: 0.2
    clip_ratio_high: 0.2
    tau_pos: 1.0
    tau_neg: 1.05
    freeze_vision_tower: false
    policy_loss:
      _target_: verl.workers.config.PolicyLossConfig
      loss_mode: vanilla
      clip_cov_ratio: 0.0002
      clip_cov_lb: 1.0
      clip_cov_ub: 5.0
      kl_cov_ratio: 0.0002
      ppo_kl_coef: 0.1
    clip_ratio_c: 3.0
    loss_agg_mode: token-mean
    loss_scale_factor: null
    entropy_coeff: 0
    calculate_entropy: false
    use_kl_loss: true
    use_torch_compile: true
    kl_loss_coef: 0.003
    kl_loss_type: low_var_kl
    ppo_epochs: 1
    shuffle: false
    data_loader_seed: 42
    checkpoint:
      _target_: verl.trainer.config.CheckpointConfig
      save_contents:
      - model
      - optimizer
      - extra
      load_contents: ${.save_contents}
      async_save: false
    use_fused_kernels: ${oc.select:actor_rollout_ref.model.use_fused_kernels,false}
    profiler:
      _target_: verl.utils.profiler.ProfilerConfig
      tool: ${oc.select:global_profiler.tool,null}
      enable: false
      all_ranks: false
      ranks: []
      save_path: ${oc.select:global_profiler.save_path,null}
      tool_config:
        nsys:
          _target_: verl.utils.profiler.config.NsightToolConfig
          discrete: ${oc.select:global_profiler.global_tool_config.nsys.discrete}
        npu:
          _target_: verl.utils.profiler.config.NPUToolConfig
          contents: []
          level: level0
          analysis: true
          discrete: false
        torch:
          _target_: verl.utils.profiler.config.TorchProfilerToolConfig
          step_start: 0
          step_end: null
        torch_memory:
          _target_: verl.utils.profiler.config.TorchMemoryToolConfig
          trace_alloc_max_entries: ${oc.select:global_profiler.global_tool_config.torch_memory.trace_alloc_max_entries,100000}
          stack_depth: ${oc.select:global_profiler.global_tool_config.torch_memory.stack_depth,32}
    router_replay:
      _target_: verl.workers.config.RouterReplayConfig
      mode: disabled
      record_file: null
      replay_file: null
    grad_clip: 1.0
    ulysses_sequence_parallel_size: 1
    entropy_from_logits_with_chunking: false
    entropy_checkpointing: false
    use_remove_padding: ${oc.select:actor_rollout_ref.model.use_remove_padding,false}
  ref:
    rollout_n: ${oc.select:actor_rollout_ref.rollout.n,1}
    strategy: ${actor_rollout_ref.actor.strategy}
    use_torch_compile: ${oc.select:actor_rollout_ref.actor.use_torch_compile,true}
    log_prob_micro_batch_size: null
    log_prob_micro_batch_size_per_gpu: 4
    log_prob_use_dynamic_bsz: ${oc.select:actor_rollout_ref.actor.use_dynamic_bsz,false}
    log_prob_max_token_len_per_gpu: ${oc.select:actor_rollout_ref.actor.ppo_max_token_len_per_gpu,16384}
    profiler:
      _target_: verl.utils.profiler.ProfilerConfig
      tool: ${oc.select:global_profiler.tool,null}
      enable: false
      all_ranks: false
      ranks: []
      save_path: ${oc.select:global_profiler.save_path,null}
      tool_config:
        nsys:
          _target_: verl.utils.profiler.config.NsightToolConfig
          discrete: ${oc.select:global_profiler.global_tool_config.nsys.discrete}
        npu:
          _target_: verl.utils.profiler.config.NPUToolConfig
          contents: []
          level: level0
          analysis: true
          discrete: false
        torch:
          _target_: verl.utils.profiler.config.TorchProfilerToolConfig
          step_start: 0
          step_end: null
        torch_memory:
          _target_: verl.utils.profiler.config.TorchMemoryToolConfig
          trace_alloc_max_entries: ${oc.select:global_profiler.global_tool_config.torch_memory.trace_alloc_max_entries,100000}
          stack_depth: ${oc.select:global_profiler.global_tool_config.torch_memory.stack_depth,32}
    router_replay:
      _target_: verl.workers.config.RouterReplayConfig
      mode: disabled
      record_file: null
      replay_file: null
    fsdp_config:
      _target_: verl.workers.config.FSDPEngineConfig
      wrap_policy:
        min_num_params: 0
      param_offload: true
      optimizer_offload: false
      offload_policy: false
      reshard_after_forward: true
      fsdp_size: -1
      forward_prefetch: false
      model_dtype: bfloat16
      use_orig_params: false
      seed: 42
      full_determinism: false
      ulysses_sequence_parallel_size: 1
      entropy_from_logits_with_chunking: false
      use_torch_compile: true
      entropy_checkpointing: false
      forward_only: true
      strategy: fsdp
      dtype: bfloat16
    _target_: verl.workers.config.FSDPActorConfig
    ulysses_sequence_parallel_size: ${oc.select:actor_rollout_ref.actor.ulysses_sequence_parallel_size,1}
    entropy_from_logits_with_chunking: false
    entropy_checkpointing: false
  rollout:
    _target_: verl.workers.config.RolloutConfig
    name: vllm
    mode: async
    temperature: 0.9
    top_k: -1
    top_p: 0.9
    prompt_length: ${oc.select:data.max_prompt_length,512}
    response_length: ${oc.select:data.max_response_length,512}
    dtype: bfloat16
    gpu_memory_utilization: 0.5
    ignore_eos: false
    enforce_eager: false
    cudagraph_capture_sizes: null
    free_cache_engine: true
    tensor_model_parallel_size: 1
    data_parallel_size: 1
    expert_parallel_size: 1
    pipeline_model_parallel_size: 1
    max_num_batched_tokens: 8192
    max_model_len: null
    max_num_seqs: 1024
    enable_chunked_prefill: true
    enable_prefix_caching: true
    logprobs_mode: processed_logprobs
    load_format: dummy
    log_prob_micro_batch_size: null
    log_prob_micro_batch_size_per_gpu: 4
    log_prob_use_dynamic_bsz: ${oc.select:actor_rollout_ref.actor.use_dynamic_bsz,false}
    log_prob_max_token_len_per_gpu: ${oc.select:actor_rollout_ref.actor.ppo_max_token_len_per_gpu,16384}
    disable_log_stats: true
    do_sample: true
    'n': 8
    over_sample_rate: 0
    multi_stage_wake_up: false
    engine_kwargs:
      vllm: {}
      sglang: {}
    val_kwargs:
      _target_: verl.workers.config.SamplingConfig
      top_k: -1
      top_p: 1.0
      temperature: 0
      'n': 1
      do_sample: false
    multi_turn:
      _target_: verl.workers.config.MultiTurnConfig
      enable: false
      max_assistant_turns: null
      tool_config_path: null
      max_user_turns: null
      max_parallel_calls: 1
      max_tool_response_length: 256
      tool_response_truncate_side: middle
      interaction_config_path: null
      use_inference_chat_template: false
      tokenization_sanity_check_mode: strict
      format: hermes
      num_repeat_rollouts: null
    calculate_log_probs: false
    agent:
      _target_: verl.workers.config.AgentLoopConfig
      num_workers: 8
      default_agent_loop: single_turn_agent
      agent_loop_config_path: null
      custom_async_server:
        _target_: verl.workers.config.CustomAsyncServerConfig
        path: null
        name: null
    update_weights_bucket_megabytes: 512
    trace:
      _target_: verl.workers.config.TraceConfig
      backend: null
      token2text: false
      max_samples_per_step_per_worker: null
    skip_rollout: false
    skip_dump_dir: /tmp/rollout_dump
    skip_tokenizer_init: true
    enable_rollout_routing_replay: false
    profiler:
      _target_: verl.utils.profiler.ProfilerConfig
      tool: ${oc.select:global_profiler.tool,null}
      enable: ${oc.select:actor_rollout_ref.actor.profiler.enable,false}
      all_ranks: ${oc.select:actor_rollout_ref.actor.profiler.all_ranks,false}
      ranks: ${oc.select:actor_rollout_ref.actor.profiler.ranks,[]}
      save_path: ${oc.select:global_profiler.save_path,null}
      tool_config: ${oc.select:actor_rollout_ref.actor.profiler.tool_config,null}
    prometheus:
      _target_: verl.workers.config.PrometheusConfig
      enable: false
      port: 9090
      file: /tmp/ray/session_latest/metrics/prometheus/prometheus.yml
      served_model_name: ${oc.select:actor_rollout_ref.model.path,null}
    quantization: null
    quantization_config_file: null
    layered_summon: false
  model:
    _target_: verl.workers.config.HFModelConfig
    path: /work/nvme/bfqt/yurenh2/sft_checkpoints/checkpoint-200
    hf_config_path: null
    tokenizer_path: null
    use_shm: false
    trust_remote_code: false
    custom_chat_template: null
    external_lib: null
    override_config: {}
    enable_gradient_checkpointing: true
    enable_activation_offload: false
    use_remove_padding: true
    lora_rank: 0
    lora_alpha: 16
    target_modules: all-linear
    exclude_modules: null
    lora_adapter_path: null
    use_liger: false
    use_fused_kernels: false
    fused_kernel_options:
      impl_backend: torch
    tiled_mlp:
      enabled: false
      num_shards: 4
  hybrid_engine: true
  nccl_timeout: 600
data:
  tokenizer: null
  use_shm: false
  train_files: /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/training/grpo_verl/data/session_level_reflection_grpo_train.parquet
  val_files: /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/training/grpo_verl/data/session_level_reflection_grpo_train.parquet
  train_max_samples: -1
  val_max_samples: -1
  prompt_key: prompt
  reward_fn_key: data_source
  max_prompt_length: 2048
  max_response_length: 1024
  train_batch_size: 64
  val_batch_size: null
  tool_config_path: ${oc.select:actor_rollout_ref.rollout.multi_turn.tool_config_path,
    null}
  return_raw_input_ids: false
  return_raw_chat: true
  return_full_prompt: false
  shuffle: true
  seed: null
  dataloader_num_workers: 8
  image_patch_size: 14
  validation_shuffle: false
  filter_overlong_prompts: true
  filter_overlong_prompts_workers: 1
  truncation: error
  image_key: images
  video_key: videos
  trust_remote_code: false
  custom_cls:
    path: null
    name: null
  return_multi_modal_inputs: true
  sampler:
    class_path: null
    class_name: null
  datagen:
    path: null
    name: null
  apply_chat_template_kwargs: {}
reward_manager:
  _target_: verl.trainer.config.config.RewardManagerConfig
  source: register
  name: ${oc.select:reward_model.reward_manager,naive}
  module:
    _target_: verl.trainer.config.config.ModuleConfig
    path: null
    name: custom_reward_manager
critic:
  optim:
    _target_: verl.workers.config.FSDPOptimizerConfig
    optimizer: AdamW
    optimizer_impl: torch.optim
    lr: 1.0e-05
    lr_warmup_steps_ratio: 0.0
    total_training_steps: -1
    weight_decay: 0.01
    lr_warmup_steps: -1
    betas:
    - 0.9
    - 0.999
    clip_grad: 1.0
    min_lr_ratio: 0.0
    num_cycles: 0.5
    lr_scheduler_type: constant
    warmup_style: null
    override_optimizer_config: null
  model:
    fsdp_config:
      _target_: verl.workers.config.FSDPEngineConfig
      wrap_policy:
        min_num_params: 0
      param_offload: false
      optimizer_offload: false
      offload_policy: false
      reshard_after_forward: true
      fsdp_size: -1
      forward_prefetch: false
      model_dtype: fp32
      use_orig_params: false
      seed: 42
      full_determinism: false
      ulysses_sequence_parallel_size: 1
      entropy_from_logits_with_chunking: false
      use_torch_compile: true
      entropy_checkpointing: false
      forward_only: false
      strategy: fsdp
      dtype: bfloat16
    path: ~/models/deepseek-llm-7b-chat
    tokenizer_path: ${oc.select:actor_rollout_ref.model.path,"~/models/deepseek-llm-7b-chat"}
    override_config: {}
    external_lib: ${oc.select:actor_rollout_ref.model.external_lib,null}
    trust_remote_code: ${oc.select:actor_rollout_ref.model.trust_remote_code,false}
    _target_: verl.workers.config.FSDPCriticModelCfg
    use_shm: false
    enable_gradient_checkpointing: true
    enable_activation_offload: false
    use_remove_padding: false
    lora_rank: 0
    lora_alpha: 16
    target_modules: all-linear
    tiled_mlp:
      enabled: false
      num_shards: 4
  _target_: verl.workers.config.FSDPCriticConfig
  rollout_n: ${oc.select:actor_rollout_ref.rollout.n,1}
  strategy: fsdp
  enable: null
  ppo_mini_batch_size: ${oc.select:actor_rollout_ref.actor.ppo_mini_batch_size,256}
  ppo_micro_batch_size: null
  ppo_micro_batch_size_per_gpu: ${oc.select:.ppo_micro_batch_size,null}
  use_dynamic_bsz: ${oc.select:actor_rollout_ref.actor.use_dynamic_bsz,false}
  ppo_max_token_len_per_gpu: 32768
  forward_max_token_len_per_gpu: ${.ppo_max_token_len_per_gpu}
  ppo_epochs: ${oc.select:actor_rollout_ref.actor.ppo_epochs,1}
  shuffle: ${oc.select:actor_rollout_ref.actor.shuffle,false}
  data_loader_seed: 42
  cliprange_value: 0.5
  loss_agg_mode: ${oc.select:actor_rollout_ref.actor.loss_agg_mode,token-mean}
  checkpoint:
    _target_: verl.trainer.config.CheckpointConfig
    save_contents:
    - model
    - optimizer
    - extra
    load_contents: ${.save_contents}
    async_save: false
  profiler:
    _target_: verl.utils.profiler.ProfilerConfig
    tool: ${oc.select:global_profiler.tool,null}
    enable: false
    all_ranks: false
    ranks: []
    save_path: ${oc.select:global_profiler.save_path,null}
    tool_config:
      nsys:
        _target_: verl.utils.profiler.config.NsightToolConfig
        discrete: ${oc.select:global_profiler.global_tool_config.nsys.discrete}
      npu:
        _target_: verl.utils.profiler.config.NPUToolConfig
        contents: []
        level: level0
        analysis: true
        discrete: false
      torch:
        _target_: verl.utils.profiler.config.TorchProfilerToolConfig
        step_start: 0
        step_end: null
      torch_memory:
        _target_: verl.utils.profiler.config.TorchMemoryToolConfig
        trace_alloc_max_entries: ${oc.select:global_profiler.global_tool_config.torch_memory.trace_alloc_max_entries,100000}
        stack_depth: ${oc.select:global_profiler.global_tool_config.torch_memory.stack_depth,32}
  forward_micro_batch_size: ${oc.select:.ppo_micro_batch_size,null}
  forward_micro_batch_size_per_gpu: ${oc.select:.ppo_micro_batch_size_per_gpu,null}
  ulysses_sequence_parallel_size: 1
  grad_clip: 1.0
reward_model:
  enable: false
  enable_resource_pool: false
  n_gpus_per_node: 8
  nnodes: 0
  strategy: fsdp
  model:
    input_tokenizer: ${actor_rollout_ref.model.path}
    path: ~/models/FsfairX-LLaMA3-RM-v0.1
    external_lib: ${actor_rollout_ref.model.external_lib}
    trust_remote_code: false
    override_config: {}
    use_shm: false
    use_remove_padding: false
    use_fused_kernels: ${actor_rollout_ref.model.use_fused_kernels}
    fsdp_config:
      _target_: verl.workers.config.FSDPEngineConfig
      wrap_policy:
        min_num_params: 0
      param_offload: false
      reshard_after_forward: true
      fsdp_size: -1
      forward_prefetch: false
  micro_batch_size: null
  micro_batch_size_per_gpu: null
  max_length: null
  use_dynamic_bsz: ${critic.use_dynamic_bsz}
  forward_max_token_len_per_gpu: ${critic.forward_max_token_len_per_gpu}
  reward_manager: naive
  reward_loop_source: register
  reward_loop_module_path: null
  reward_loop_class_name: null
  launch_reward_fn_async: false
  sandbox_fusion:
    url: null
    max_concurrent: 64
    memory_limit_mb: 1024
  profiler:
    _target_: verl.utils.profiler.ProfilerConfig
    tool: ${oc.select:global_profiler.tool,null}
    enable: false
    all_ranks: false
    ranks: []
    save_path: ${oc.select:global_profiler.save_path,null}
    tool_config: ${oc.select:actor_rollout_ref.actor.profiler.tool_config,null}
  ulysses_sequence_parallel_size: 1
  use_reward_loop: true
  num_workers: 1
  rollout:
    _target_: verl.workers.config.RolloutConfig
    name: ???
    dtype: bfloat16
    gpu_memory_utilization: 0.5
    enforce_eager: true
    cudagraph_capture_sizes: null
    free_cache_engine: true
    data_parallel_size: 1
    expert_parallel_size: 1
    tensor_model_parallel_size: 2
    max_num_batched_tokens: 8192
    max_model_len: null
    max_num_seqs: 1024
    load_format: auto
    engine_kwargs: {}
    limit_images: null
    enable_chunked_prefill: true
    enable_prefix_caching: true
    disable_log_stats: true
    skip_tokenizer_init: false
    prompt_length: 2048
    response_length: 2048
algorithm:
  rollout_correction:
    rollout_is: null
    rollout_is_threshold: 2.0
    rollout_rs: null
    rollout_rs_threshold: null
    rollout_rs_threshold_lower: null
    rollout_token_veto_threshold: null
    bypass_mode: false
    loss_type: ppo_clip
    rollout_is_batch_normalize: false
  _target_: verl.trainer.config.AlgoConfig
  gamma: 1.0
  lam: 1.0
  adv_estimator: grpo
  norm_adv_by_std_in_grpo: true
  use_kl_in_reward: false
  kl_penalty: kl
  kl_ctrl:
    _target_: verl.trainer.config.KLControlConfig
    type: fixed
    kl_coef: 0.001
    horizon: 10000
    target_kl: 0.1
  use_pf_ppo: false
  pf_ppo:
    reweight_method: pow
    weight_pow: 2.0
custom_reward_function:
  path: /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/training/grpo_verl/verl_reward_functions.py
  name: compute_score
trainer:
  balance_batch: true
  total_epochs: 1
  total_training_steps: null
  project_name: collaborative-agent-reflection-grpo
  experiment_name: llama3.1-8b-grpo
  logger:
  - console
  log_val_generations: 0
  rollout_data_dir: null
  validation_data_dir: null
  nnodes: 1
  n_gpus_per_node: 2
  save_freq: 50
  esi_redundant_time: 0
  resume_mode: auto
  resume_from_path: null
  val_before_train: false
  val_only: false
  test_freq: 100
  critic_warmup: 0
  default_hdfs_dir: null
  del_local_ckpt_after_load: false
  default_local_dir: /scratch/bfqt/yurenh2/grpo_outputs
  max_actor_ckpt_to_keep: null
  max_critic_ckpt_to_keep: null
  ray_wait_register_center_timeout: 300
  device: cuda
  use_legacy_worker_impl: auto
global_profiler:
  _target_: verl.utils.profiler.ProfilerConfig
  tool: null
  steps: null
  profile_continuous_steps: false
  save_path: outputs/profile
  global_tool_config:
    nsys:
      _target_: verl.utils.profiler.config.NsightToolConfig
      discrete: false
      controller_nsight_options:
        trace: cuda,nvtx,cublas,ucx
        cuda-memory-usage: 'true'
        cuda-graph-trace: graph
      worker_nsight_options:
        trace: cuda,nvtx,cublas,ucx
        cuda-memory-usage: 'true'
        cuda-graph-trace: graph
        capture-range: cudaProfilerApi
        capture-range-end: null
        kill: none
    torch_memory:
      trace_alloc_max_entries: 100000
      stack_depth: 32
      context: all
      stacks: all
      kw_args: {}
transfer_queue:
  enable: false
ray_kwargs:
  ray_init:
    num_cpus: null
  timeline_json_file: null