1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
|
actor_rollout_ref:
actor:
optim:
_target_: verl.workers.config.FSDPOptimizerConfig
optimizer: AdamW
optimizer_impl: torch.optim
lr: 1.0e-06
lr_warmup_steps_ratio: 0.0
total_training_steps: -1
weight_decay: 0.01
lr_warmup_steps: -1
betas:
- 0.9
- 0.999
clip_grad: 1.0
min_lr_ratio: 0.0
num_cycles: 0.5
lr_scheduler_type: constant
warmup_style: null
override_optimizer_config: null
fsdp_config:
_target_: verl.workers.config.FSDPEngineConfig
wrap_policy:
min_num_params: 0
param_offload: false
optimizer_offload: false
offload_policy: false
reshard_after_forward: true
fsdp_size: -1
forward_prefetch: false
model_dtype: bfloat16
use_orig_params: false
seed: 42
full_determinism: false
ulysses_sequence_parallel_size: 1
entropy_from_logits_with_chunking: false
use_torch_compile: true
entropy_checkpointing: false
forward_only: false
strategy: fsdp
dtype: bfloat16
_target_: verl.workers.config.FSDPActorConfig
rollout_n: ${oc.select:actor_rollout_ref.rollout.n,1}
strategy: fsdp
ppo_mini_batch_size: 8
ppo_micro_batch_size: null
ppo_micro_batch_size_per_gpu: 4
use_dynamic_bsz: false
ppo_max_token_len_per_gpu: 16384
clip_ratio: 0.2
clip_ratio_low: 0.2
clip_ratio_high: 0.2
tau_pos: 1.0
tau_neg: 1.05
freeze_vision_tower: false
policy_loss:
_target_: verl.workers.config.PolicyLossConfig
loss_mode: vanilla
clip_cov_ratio: 0.0002
clip_cov_lb: 1.0
clip_cov_ub: 5.0
kl_cov_ratio: 0.0002
ppo_kl_coef: 0.1
clip_ratio_c: 3.0
loss_agg_mode: token-mean
loss_scale_factor: null
entropy_coeff: 0
calculate_entropy: false
use_kl_loss: true
use_torch_compile: true
kl_loss_coef: 0.003
kl_loss_type: low_var_kl
ppo_epochs: 1
shuffle: false
data_loader_seed: 42
checkpoint:
_target_: verl.trainer.config.CheckpointConfig
save_contents:
- model
- optimizer
- extra
load_contents: ${.save_contents}
async_save: false
use_fused_kernels: ${oc.select:actor_rollout_ref.model.use_fused_kernels,false}
profiler:
_target_: verl.utils.profiler.ProfilerConfig
tool: ${oc.select:global_profiler.tool,null}
enable: false
all_ranks: false
ranks: []
save_path: ${oc.select:global_profiler.save_path,null}
tool_config:
nsys:
_target_: verl.utils.profiler.config.NsightToolConfig
discrete: ${oc.select:global_profiler.global_tool_config.nsys.discrete}
npu:
_target_: verl.utils.profiler.config.NPUToolConfig
contents: []
level: level0
analysis: true
discrete: false
torch:
_target_: verl.utils.profiler.config.TorchProfilerToolConfig
step_start: 0
step_end: null
torch_memory:
_target_: verl.utils.profiler.config.TorchMemoryToolConfig
trace_alloc_max_entries: ${oc.select:global_profiler.global_tool_config.torch_memory.trace_alloc_max_entries,100000}
stack_depth: ${oc.select:global_profiler.global_tool_config.torch_memory.stack_depth,32}
router_replay:
_target_: verl.workers.config.RouterReplayConfig
mode: disabled
record_file: null
replay_file: null
grad_clip: 1.0
ulysses_sequence_parallel_size: 1
entropy_from_logits_with_chunking: false
entropy_checkpointing: false
use_remove_padding: ${oc.select:actor_rollout_ref.model.use_remove_padding,false}
ref:
rollout_n: ${oc.select:actor_rollout_ref.rollout.n,1}
strategy: ${actor_rollout_ref.actor.strategy}
use_torch_compile: ${oc.select:actor_rollout_ref.actor.use_torch_compile,true}
log_prob_micro_batch_size: null
log_prob_micro_batch_size_per_gpu: 4
log_prob_use_dynamic_bsz: ${oc.select:actor_rollout_ref.actor.use_dynamic_bsz,false}
log_prob_max_token_len_per_gpu: ${oc.select:actor_rollout_ref.actor.ppo_max_token_len_per_gpu,16384}
profiler:
_target_: verl.utils.profiler.ProfilerConfig
tool: ${oc.select:global_profiler.tool,null}
enable: false
all_ranks: false
ranks: []
save_path: ${oc.select:global_profiler.save_path,null}
tool_config:
nsys:
_target_: verl.utils.profiler.config.NsightToolConfig
discrete: ${oc.select:global_profiler.global_tool_config.nsys.discrete}
npu:
_target_: verl.utils.profiler.config.NPUToolConfig
contents: []
level: level0
analysis: true
discrete: false
torch:
_target_: verl.utils.profiler.config.TorchProfilerToolConfig
step_start: 0
step_end: null
torch_memory:
_target_: verl.utils.profiler.config.TorchMemoryToolConfig
trace_alloc_max_entries: ${oc.select:global_profiler.global_tool_config.torch_memory.trace_alloc_max_entries,100000}
stack_depth: ${oc.select:global_profiler.global_tool_config.torch_memory.stack_depth,32}
router_replay:
_target_: verl.workers.config.RouterReplayConfig
mode: disabled
record_file: null
replay_file: null
fsdp_config:
_target_: verl.workers.config.FSDPEngineConfig
wrap_policy:
min_num_params: 0
param_offload: true
optimizer_offload: false
offload_policy: false
reshard_after_forward: true
fsdp_size: -1
forward_prefetch: false
model_dtype: bfloat16
use_orig_params: false
seed: 42
full_determinism: false
ulysses_sequence_parallel_size: 1
entropy_from_logits_with_chunking: false
use_torch_compile: true
entropy_checkpointing: false
forward_only: true
strategy: fsdp
dtype: bfloat16
_target_: verl.workers.config.FSDPActorConfig
ulysses_sequence_parallel_size: ${oc.select:actor_rollout_ref.actor.ulysses_sequence_parallel_size,1}
entropy_from_logits_with_chunking: false
entropy_checkpointing: false
rollout:
_target_: verl.workers.config.RolloutConfig
name: vllm
mode: async
temperature: 0.9
top_k: -1
top_p: 0.9
prompt_length: ${oc.select:data.max_prompt_length,512}
response_length: ${oc.select:data.max_response_length,512}
dtype: bfloat16
gpu_memory_utilization: 0.5
ignore_eos: false
enforce_eager: false
cudagraph_capture_sizes: null
free_cache_engine: true
tensor_model_parallel_size: 1
data_parallel_size: 1
expert_parallel_size: 1
pipeline_model_parallel_size: 1
max_num_batched_tokens: 8192
max_model_len: null
max_num_seqs: 1024
enable_chunked_prefill: true
enable_prefix_caching: true
logprobs_mode: processed_logprobs
load_format: dummy
log_prob_micro_batch_size: null
log_prob_micro_batch_size_per_gpu: 4
log_prob_use_dynamic_bsz: ${oc.select:actor_rollout_ref.actor.use_dynamic_bsz,false}
log_prob_max_token_len_per_gpu: ${oc.select:actor_rollout_ref.actor.ppo_max_token_len_per_gpu,16384}
disable_log_stats: true
do_sample: true
'n': 8
over_sample_rate: 0
multi_stage_wake_up: false
engine_kwargs:
vllm: {}
sglang: {}
val_kwargs:
_target_: verl.workers.config.SamplingConfig
top_k: -1
top_p: 1.0
temperature: 0
'n': 1
do_sample: false
multi_turn:
_target_: verl.workers.config.MultiTurnConfig
enable: false
max_assistant_turns: null
tool_config_path: null
max_user_turns: null
max_parallel_calls: 1
max_tool_response_length: 256
tool_response_truncate_side: middle
interaction_config_path: null
use_inference_chat_template: false
tokenization_sanity_check_mode: strict
format: hermes
num_repeat_rollouts: null
calculate_log_probs: false
agent:
_target_: verl.workers.config.AgentLoopConfig
num_workers: 8
default_agent_loop: single_turn_agent
agent_loop_config_path: null
custom_async_server:
_target_: verl.workers.config.CustomAsyncServerConfig
path: null
name: null
update_weights_bucket_megabytes: 512
trace:
_target_: verl.workers.config.TraceConfig
backend: null
token2text: false
max_samples_per_step_per_worker: null
skip_rollout: false
skip_dump_dir: /tmp/rollout_dump
skip_tokenizer_init: true
enable_rollout_routing_replay: false
profiler:
_target_: verl.utils.profiler.ProfilerConfig
tool: ${oc.select:global_profiler.tool,null}
enable: ${oc.select:actor_rollout_ref.actor.profiler.enable,false}
all_ranks: ${oc.select:actor_rollout_ref.actor.profiler.all_ranks,false}
ranks: ${oc.select:actor_rollout_ref.actor.profiler.ranks,[]}
save_path: ${oc.select:global_profiler.save_path,null}
tool_config: ${oc.select:actor_rollout_ref.actor.profiler.tool_config,null}
prometheus:
_target_: verl.workers.config.PrometheusConfig
enable: false
port: 9090
file: /tmp/ray/session_latest/metrics/prometheus/prometheus.yml
served_model_name: ${oc.select:actor_rollout_ref.model.path,null}
quantization: null
quantization_config_file: null
layered_summon: false
model:
_target_: verl.workers.config.HFModelConfig
path: /work/nvme/bfqt/yurenh2/sft_checkpoints/checkpoint-200
hf_config_path: null
tokenizer_path: null
use_shm: false
trust_remote_code: false
custom_chat_template: null
external_lib: null
override_config: {}
enable_gradient_checkpointing: true
enable_activation_offload: false
use_remove_padding: true
lora_rank: 0
lora_alpha: 16
target_modules: all-linear
exclude_modules: null
lora_adapter_path: null
use_liger: false
use_fused_kernels: false
fused_kernel_options:
impl_backend: torch
tiled_mlp:
enabled: false
num_shards: 4
hybrid_engine: true
nccl_timeout: 600
data:
tokenizer: null
use_shm: false
train_files: /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/training/grpo_verl/data/session_level_reflection_grpo_train.parquet
val_files: /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/training/grpo_verl/data/session_level_reflection_grpo_train.parquet
train_max_samples: -1
val_max_samples: -1
prompt_key: prompt
reward_fn_key: data_source
max_prompt_length: 2048
max_response_length: 1024
train_batch_size: 64
val_batch_size: null
tool_config_path: ${oc.select:actor_rollout_ref.rollout.multi_turn.tool_config_path,
null}
return_raw_input_ids: false
return_raw_chat: true
return_full_prompt: false
shuffle: true
seed: null
dataloader_num_workers: 8
image_patch_size: 14
validation_shuffle: false
filter_overlong_prompts: true
filter_overlong_prompts_workers: 1
truncation: error
image_key: images
video_key: videos
trust_remote_code: false
custom_cls:
path: null
name: null
return_multi_modal_inputs: true
sampler:
class_path: null
class_name: null
datagen:
path: null
name: null
apply_chat_template_kwargs: {}
reward_manager:
_target_: verl.trainer.config.config.RewardManagerConfig
source: register
name: ${oc.select:reward_model.reward_manager,naive}
module:
_target_: verl.trainer.config.config.ModuleConfig
path: null
name: custom_reward_manager
critic:
optim:
_target_: verl.workers.config.FSDPOptimizerConfig
optimizer: AdamW
optimizer_impl: torch.optim
lr: 1.0e-05
lr_warmup_steps_ratio: 0.0
total_training_steps: -1
weight_decay: 0.01
lr_warmup_steps: -1
betas:
- 0.9
- 0.999
clip_grad: 1.0
min_lr_ratio: 0.0
num_cycles: 0.5
lr_scheduler_type: constant
warmup_style: null
override_optimizer_config: null
model:
fsdp_config:
_target_: verl.workers.config.FSDPEngineConfig
wrap_policy:
min_num_params: 0
param_offload: false
optimizer_offload: false
offload_policy: false
reshard_after_forward: true
fsdp_size: -1
forward_prefetch: false
model_dtype: fp32
use_orig_params: false
seed: 42
full_determinism: false
ulysses_sequence_parallel_size: 1
entropy_from_logits_with_chunking: false
use_torch_compile: true
entropy_checkpointing: false
forward_only: false
strategy: fsdp
dtype: bfloat16
path: ~/models/deepseek-llm-7b-chat
tokenizer_path: ${oc.select:actor_rollout_ref.model.path,"~/models/deepseek-llm-7b-chat"}
override_config: {}
external_lib: ${oc.select:actor_rollout_ref.model.external_lib,null}
trust_remote_code: ${oc.select:actor_rollout_ref.model.trust_remote_code,false}
_target_: verl.workers.config.FSDPCriticModelCfg
use_shm: false
enable_gradient_checkpointing: true
enable_activation_offload: false
use_remove_padding: false
lora_rank: 0
lora_alpha: 16
target_modules: all-linear
tiled_mlp:
enabled: false
num_shards: 4
_target_: verl.workers.config.FSDPCriticConfig
rollout_n: ${oc.select:actor_rollout_ref.rollout.n,1}
strategy: fsdp
enable: null
ppo_mini_batch_size: ${oc.select:actor_rollout_ref.actor.ppo_mini_batch_size,256}
ppo_micro_batch_size: null
ppo_micro_batch_size_per_gpu: ${oc.select:.ppo_micro_batch_size,null}
use_dynamic_bsz: ${oc.select:actor_rollout_ref.actor.use_dynamic_bsz,false}
ppo_max_token_len_per_gpu: 32768
forward_max_token_len_per_gpu: ${.ppo_max_token_len_per_gpu}
ppo_epochs: ${oc.select:actor_rollout_ref.actor.ppo_epochs,1}
shuffle: ${oc.select:actor_rollout_ref.actor.shuffle,false}
data_loader_seed: 42
cliprange_value: 0.5
loss_agg_mode: ${oc.select:actor_rollout_ref.actor.loss_agg_mode,token-mean}
checkpoint:
_target_: verl.trainer.config.CheckpointConfig
save_contents:
- model
- optimizer
- extra
load_contents: ${.save_contents}
async_save: false
profiler:
_target_: verl.utils.profiler.ProfilerConfig
tool: ${oc.select:global_profiler.tool,null}
enable: false
all_ranks: false
ranks: []
save_path: ${oc.select:global_profiler.save_path,null}
tool_config:
nsys:
_target_: verl.utils.profiler.config.NsightToolConfig
discrete: ${oc.select:global_profiler.global_tool_config.nsys.discrete}
npu:
_target_: verl.utils.profiler.config.NPUToolConfig
contents: []
level: level0
analysis: true
discrete: false
torch:
_target_: verl.utils.profiler.config.TorchProfilerToolConfig
step_start: 0
step_end: null
torch_memory:
_target_: verl.utils.profiler.config.TorchMemoryToolConfig
trace_alloc_max_entries: ${oc.select:global_profiler.global_tool_config.torch_memory.trace_alloc_max_entries,100000}
stack_depth: ${oc.select:global_profiler.global_tool_config.torch_memory.stack_depth,32}
forward_micro_batch_size: ${oc.select:.ppo_micro_batch_size,null}
forward_micro_batch_size_per_gpu: ${oc.select:.ppo_micro_batch_size_per_gpu,null}
ulysses_sequence_parallel_size: 1
grad_clip: 1.0
reward_model:
enable: false
enable_resource_pool: false
n_gpus_per_node: 8
nnodes: 0
strategy: fsdp
model:
input_tokenizer: ${actor_rollout_ref.model.path}
path: ~/models/FsfairX-LLaMA3-RM-v0.1
external_lib: ${actor_rollout_ref.model.external_lib}
trust_remote_code: false
override_config: {}
use_shm: false
use_remove_padding: false
use_fused_kernels: ${actor_rollout_ref.model.use_fused_kernels}
fsdp_config:
_target_: verl.workers.config.FSDPEngineConfig
wrap_policy:
min_num_params: 0
param_offload: false
reshard_after_forward: true
fsdp_size: -1
forward_prefetch: false
micro_batch_size: null
micro_batch_size_per_gpu: null
max_length: null
use_dynamic_bsz: ${critic.use_dynamic_bsz}
forward_max_token_len_per_gpu: ${critic.forward_max_token_len_per_gpu}
reward_manager: naive
reward_loop_source: register
reward_loop_module_path: null
reward_loop_class_name: null
launch_reward_fn_async: false
sandbox_fusion:
url: null
max_concurrent: 64
memory_limit_mb: 1024
profiler:
_target_: verl.utils.profiler.ProfilerConfig
tool: ${oc.select:global_profiler.tool,null}
enable: false
all_ranks: false
ranks: []
save_path: ${oc.select:global_profiler.save_path,null}
tool_config: ${oc.select:actor_rollout_ref.actor.profiler.tool_config,null}
ulysses_sequence_parallel_size: 1
use_reward_loop: true
num_workers: 1
rollout:
_target_: verl.workers.config.RolloutConfig
name: ???
dtype: bfloat16
gpu_memory_utilization: 0.5
enforce_eager: true
cudagraph_capture_sizes: null
free_cache_engine: true
data_parallel_size: 1
expert_parallel_size: 1
tensor_model_parallel_size: 2
max_num_batched_tokens: 8192
max_model_len: null
max_num_seqs: 1024
load_format: auto
engine_kwargs: {}
limit_images: null
enable_chunked_prefill: true
enable_prefix_caching: true
disable_log_stats: true
skip_tokenizer_init: false
prompt_length: 2048
response_length: 2048
algorithm:
rollout_correction:
rollout_is: null
rollout_is_threshold: 2.0
rollout_rs: null
rollout_rs_threshold: null
rollout_rs_threshold_lower: null
rollout_token_veto_threshold: null
bypass_mode: false
loss_type: ppo_clip
rollout_is_batch_normalize: false
_target_: verl.trainer.config.AlgoConfig
gamma: 1.0
lam: 1.0
adv_estimator: grpo
norm_adv_by_std_in_grpo: true
use_kl_in_reward: false
kl_penalty: kl
kl_ctrl:
_target_: verl.trainer.config.KLControlConfig
type: fixed
kl_coef: 0.001
horizon: 10000
target_kl: 0.1
use_pf_ppo: false
pf_ppo:
reweight_method: pow
weight_pow: 2.0
custom_reward_function:
path: /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/training/grpo_verl/verl_reward_functions.py
name: compute_score
trainer:
balance_batch: true
total_epochs: 1
total_training_steps: null
project_name: collaborative-agent-reflection-grpo
experiment_name: llama3.1-8b-grpo
logger:
- console
log_val_generations: 0
rollout_data_dir: null
validation_data_dir: null
nnodes: 1
n_gpus_per_node: 2
save_freq: 50
esi_redundant_time: 0
resume_mode: auto
resume_from_path: null
val_before_train: false
val_only: false
test_freq: 100
critic_warmup: 0
default_hdfs_dir: null
del_local_ckpt_after_load: false
default_local_dir: /scratch/bfqt/yurenh2/grpo_outputs
max_actor_ckpt_to_keep: null
max_critic_ckpt_to_keep: null
ray_wait_register_center_timeout: 300
device: cuda
use_legacy_worker_impl: auto
global_profiler:
_target_: verl.utils.profiler.ProfilerConfig
tool: null
steps: null
profile_continuous_steps: false
save_path: outputs/profile
global_tool_config:
nsys:
_target_: verl.utils.profiler.config.NsightToolConfig
discrete: false
controller_nsight_options:
trace: cuda,nvtx,cublas,ucx
cuda-memory-usage: 'true'
cuda-graph-trace: graph
worker_nsight_options:
trace: cuda,nvtx,cublas,ucx
cuda-memory-usage: 'true'
cuda-graph-trace: graph
capture-range: cudaProfilerApi
capture-range-end: null
kill: none
torch_memory:
trace_alloc_max_entries: 100000
stack_depth: 32
context: all
stacks: all
kw_args: {}
transfer_queue:
enable: false
ray_kwargs:
ray_init:
num_cpus: null
timeline_json_file: null
|