summaryrefslogtreecommitdiff
path: root/collaborativeagents/training/grpo_verl/outputs/2026-01-11/03-50-42/.hydra/hydra.yaml
diff options
context:
space:
mode:
Diffstat (limited to 'collaborativeagents/training/grpo_verl/outputs/2026-01-11/03-50-42/.hydra/hydra.yaml')
-rw-r--r--collaborativeagents/training/grpo_verl/outputs/2026-01-11/03-50-42/.hydra/hydra.yaml214
1 files changed, 214 insertions, 0 deletions
diff --git a/collaborativeagents/training/grpo_verl/outputs/2026-01-11/03-50-42/.hydra/hydra.yaml b/collaborativeagents/training/grpo_verl/outputs/2026-01-11/03-50-42/.hydra/hydra.yaml
new file mode 100644
index 0000000..8e4c4ec
--- /dev/null
+++ b/collaborativeagents/training/grpo_verl/outputs/2026-01-11/03-50-42/.hydra/hydra.yaml
@@ -0,0 +1,214 @@
+hydra:
+ run:
+ dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}
+ sweep:
+ dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
+ subdir: ${hydra.job.num}
+ launcher:
+ _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
+ sweeper:
+ _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
+ max_batch_size: null
+ params: null
+ help:
+ app_name: ${hydra.job.name}
+ header: '${hydra.help.app_name} is powered by Hydra.
+
+ '
+ footer: 'Powered by Hydra (https://hydra.cc)
+
+ Use --hydra-help to view Hydra specific help
+
+ '
+ template: '${hydra.help.header}
+
+ == Configuration groups ==
+
+ Compose your configuration from those groups (group=option)
+
+
+ $APP_CONFIG_GROUPS
+
+
+ == Config ==
+
+ Override anything in the config (foo.bar=value)
+
+
+ $CONFIG
+
+
+ ${hydra.help.footer}
+
+ '
+ hydra_help:
+ template: 'Hydra (${hydra.runtime.version})
+
+ See https://hydra.cc for more info.
+
+
+ == Flags ==
+
+ $FLAGS_HELP
+
+
+ == Configuration groups ==
+
+ Compose your configuration from those groups (For example, append hydra/job_logging=disabled
+ to command line)
+
+
+ $HYDRA_CONFIG_GROUPS
+
+
+ Use ''--cfg hydra'' to Show the Hydra config.
+
+ '
+ hydra_help: ???
+ hydra_logging:
+ version: 1
+ formatters:
+ simple:
+ format: '[%(asctime)s][HYDRA] %(message)s'
+ handlers:
+ console:
+ class: logging.StreamHandler
+ formatter: simple
+ stream: ext://sys.stdout
+ root:
+ level: INFO
+ handlers:
+ - console
+ loggers:
+ logging_example:
+ level: DEBUG
+ disable_existing_loggers: false
+ job_logging:
+ version: 1
+ formatters:
+ simple:
+ format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
+ handlers:
+ console:
+ class: logging.StreamHandler
+ formatter: simple
+ stream: ext://sys.stdout
+ file:
+ class: logging.FileHandler
+ formatter: simple
+ filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
+ root:
+ level: INFO
+ handlers:
+ - console
+ - file
+ disable_existing_loggers: false
+ env: {}
+ mode: RUN
+ searchpath: []
+ callbacks: {}
+ output_subdir: .hydra
+ overrides:
+ hydra:
+ - hydra.mode=RUN
+ task:
+ - algorithm.adv_estimator=grpo
+ - data.train_files=/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/training/grpo_verl/data/session_level_reflection_grpo_train.parquet
+ - data.val_files=/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/training/grpo_verl/data/session_level_reflection_grpo_train.parquet
+ - data.train_batch_size=64
+ - data.max_prompt_length=2048
+ - data.max_response_length=1024
+ - data.filter_overlong_prompts=True
+ - data.truncation=error
+ - data.prompt_key=prompt
+ - data.reward_fn_key=data_source
+ - actor_rollout_ref.model.path=/work/nvme/bfqt/yurenh2/sft_checkpoints/checkpoint-200
+ - actor_rollout_ref.actor.optim.lr=1e-6
+ - actor_rollout_ref.model.use_remove_padding=True
+ - actor_rollout_ref.actor.ppo_mini_batch_size=8
+ - actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4
+ - actor_rollout_ref.actor.use_kl_loss=True
+ - actor_rollout_ref.actor.kl_loss_coef=0.003
+ - actor_rollout_ref.actor.kl_loss_type=low_var_kl
+ - actor_rollout_ref.actor.entropy_coeff=0
+ - actor_rollout_ref.model.enable_gradient_checkpointing=True
+ - actor_rollout_ref.actor.fsdp_config.model_dtype=bfloat16
+ - actor_rollout_ref.actor.fsdp_config.param_offload=False
+ - actor_rollout_ref.actor.fsdp_config.optimizer_offload=False
+ - actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=4
+ - actor_rollout_ref.rollout.tensor_model_parallel_size=1
+ - actor_rollout_ref.rollout.name=vllm
+ - actor_rollout_ref.rollout.gpu_memory_utilization=0.5
+ - actor_rollout_ref.rollout.n=8
+ - actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=4
+ - actor_rollout_ref.ref.fsdp_config.model_dtype=bfloat16
+ - actor_rollout_ref.ref.fsdp_config.param_offload=True
+ - actor_rollout_ref.rollout.temperature=0.9
+ - actor_rollout_ref.rollout.top_p=0.9
+ - custom_reward_function.path=/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/training/grpo_verl/verl_reward_functions.py
+ - custom_reward_function.name=compute_score
+ - algorithm.use_kl_in_reward=False
+ - trainer.critic_warmup=0
+ - trainer.val_before_train=False
+ - trainer.logger=["console"]
+ - trainer.project_name=collaborative-agent-reflection-grpo
+ - trainer.experiment_name=llama3.1-8b-grpo
+ - trainer.n_gpus_per_node=2
+ - trainer.nnodes=1
+ - trainer.save_freq=50
+ - trainer.test_freq=100
+ - trainer.total_epochs=1
+ - trainer.default_local_dir=/scratch/bfqt/yurenh2/grpo_outputs
+ job:
+ name: main_ppo
+ chdir: null
+ override_dirname: actor_rollout_ref.actor.entropy_coeff=0,actor_rollout_ref.actor.fsdp_config.model_dtype=bfloat16,actor_rollout_ref.actor.fsdp_config.optimizer_offload=False,actor_rollout_ref.actor.fsdp_config.param_offload=False,actor_rollout_ref.actor.kl_loss_coef=0.003,actor_rollout_ref.actor.kl_loss_type=low_var_kl,actor_rollout_ref.actor.optim.lr=1e-6,actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4,actor_rollout_ref.actor.ppo_mini_batch_size=8,actor_rollout_ref.actor.use_kl_loss=True,actor_rollout_ref.model.enable_gradient_checkpointing=True,actor_rollout_ref.model.path=/work/nvme/bfqt/yurenh2/sft_checkpoints/checkpoint-200,actor_rollout_ref.model.use_remove_padding=True,actor_rollout_ref.ref.fsdp_config.model_dtype=bfloat16,actor_rollout_ref.ref.fsdp_config.param_offload=True,actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=4,actor_rollout_ref.rollout.gpu_memory_utilization=0.5,actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=4,actor_rollout_ref.rollout.n=8,actor_rollout_ref.rollout.name=vllm,actor_rollout_ref.rollout.temperature=0.9,actor_rollout_ref.rollout.tensor_model_parallel_size=1,actor_rollout_ref.rollout.top_p=0.9,algorithm.adv_estimator=grpo,algorithm.use_kl_in_reward=False,custom_reward_function.name=compute_score,custom_reward_function.path=/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/training/grpo_verl/verl_reward_functions.py,data.filter_overlong_prompts=True,data.max_prompt_length=2048,data.max_response_length=1024,data.prompt_key=prompt,data.reward_fn_key=data_source,data.train_batch_size=64,data.train_files=/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/training/grpo_verl/data/session_level_reflection_grpo_train.parquet,data.truncation=error,data.val_files=/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/training/grpo_verl/data/session_level_reflection_grpo_train.parquet,trainer.critic_warmup=0,trainer.default_local_dir=/scratch/bfqt/yurenh2/grpo_outputs,trainer.experiment_name=llama3.1-8b-grpo,trainer.logger=["console"],trainer.n_gpus_per_node=2,trainer.nnodes=1,trainer.project_name=collaborative-agent-reflection-grpo,trainer.save_freq=50,trainer.test_freq=100,trainer.total_epochs=1,trainer.val_before_train=False
+ id: ???
+ num: ???
+ config_name: ppo_trainer
+ env_set: {}
+ env_copy: []
+ config:
+ override_dirname:
+ kv_sep: '='
+ item_sep: ','
+ exclude_keys: []
+ runtime:
+ version: 1.3.2
+ version_base: '1.3'
+ cwd: /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/training/grpo_verl
+ config_sources:
+ - path: hydra.conf
+ schema: pkg
+ provider: hydra
+ - path: /u/yurenh2/miniforge3/envs/eval/lib/python3.11/site-packages/verl/trainer/config
+ schema: file
+ provider: main
+ - path: ''
+ schema: structured
+ provider: schema
+ output_dir: /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/training/grpo_verl/outputs/2026-01-11/03-50-42
+ choices:
+ algorithm@algorithm.rollout_correction: rollout_correction
+ reward_model: dp_reward_loop
+ critic: dp_critic
+ critic/../engine@critic.model.fsdp_config: fsdp
+ critic/../optim@critic.optim: fsdp
+ model@actor_rollout_ref.model: hf_model
+ rollout@actor_rollout_ref.rollout: rollout
+ ref@actor_rollout_ref.ref: dp_ref
+ ref/../engine@actor_rollout_ref.ref.fsdp_config: fsdp
+ data: legacy_data
+ actor@actor_rollout_ref.actor: dp_actor
+ actor/../engine@actor_rollout_ref.actor.fsdp_config: fsdp
+ actor/../optim@actor_rollout_ref.actor.optim: fsdp
+ hydra/env: default
+ hydra/callbacks: null
+ hydra/job_logging: default
+ hydra/hydra_logging: default
+ hydra/hydra_help: default
+ hydra/help: default
+ hydra/sweeper: basic
+ hydra/launcher: basic
+ hydra/output: default
+ verbose: false