From f1c2cc22d46a6976df3555391e667c7e61592fad Mon Sep 17 00:00:00 2001 From: YurenHao0426 Date: Wed, 4 Feb 2026 18:59:35 -0600 Subject: Initial commit: RL floating-point noise project --- configs/deepspeed_zero2.json | 31 +++++++++++++ configs/deepspeed_zero3.json | 38 ++++++++++++++++ configs/eval_tasks_config.json | 99 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 168 insertions(+) create mode 100644 configs/deepspeed_zero2.json create mode 100644 configs/deepspeed_zero3.json create mode 100644 configs/eval_tasks_config.json (limited to 'configs') diff --git a/configs/deepspeed_zero2.json b/configs/deepspeed_zero2.json new file mode 100644 index 0000000..bb7f7aa --- /dev/null +++ b/configs/deepspeed_zero2.json @@ -0,0 +1,31 @@ +{ + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "gradient_accumulation_steps": "auto", + + "zero_optimization": { + "stage": 2, + "offload_optimizer": { + "device": "none" + }, + "contiguous_gradients": true, + "overlap_comm": true, + "reduce_scatter": true, + "reduce_bucket_size": 5e8, + "allgather_bucket_size": 5e8 + }, + + "bf16": { + "enabled": false + }, + + "fp16": { + "enabled": false + }, + + "gradient_clipping": 1.0, + + "zero_allow_untested_optimizer": true, + + "wall_clock_breakdown": false +} diff --git a/configs/deepspeed_zero3.json b/configs/deepspeed_zero3.json new file mode 100644 index 0000000..6e68c8f --- /dev/null +++ b/configs/deepspeed_zero3.json @@ -0,0 +1,38 @@ +{ + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto", + "gradient_accumulation_steps": "auto", + + "zero_optimization": { + "stage": 3, + "offload_optimizer": { + "device": "none" + }, + "offload_param": { + "device": "none" + }, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1e9, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1e9, + "stage3_max_reuse_distance": 1e9, + "stage3_gather_16bit_weights_on_model_save": true + }, + + "bf16": { + "enabled": false + }, + + "fp16": { + "enabled": false + }, + + "gradient_clipping": 1.0, + + "zero_allow_untested_optimizer": true, + + "wall_clock_breakdown": false +} diff --git a/configs/eval_tasks_config.json b/configs/eval_tasks_config.json new file mode 100644 index 0000000..e0dda43 --- /dev/null +++ b/configs/eval_tasks_config.json @@ -0,0 +1,99 @@ +[ + { + "name": "dm_val", + "task_type": "math", + "dataset_path": "./data/dm_val.json", + "is_verifiable": true, + "metric_type": "accuracy", + "num_samples": -1, + "max_gen_len": 2048, + "temperature": 0.7, + "top_p": 0.8, + "num_samples_per_prompt": 1 + }, + { + "name": "aime24", + "task_type": "math", + "dataset_path": "./data/aime24.json", + "is_verifiable": true, + "metric_type": "accuracy", + "num_samples": -1, + "max_gen_len": 4096, + "temperature": 0.7, + "top_p": 0.8, + "num_samples_per_prompt": 1 + }, + { + "name": "aime25", + "task_type": "math", + "dataset_path": "./data/aime25.json", + "is_verifiable": true, + "metric_type": "accuracy", + "num_samples": -1, + "max_gen_len": 4096, + "temperature": 0.7, + "top_p": 0.8, + "num_samples_per_prompt": 1 + }, + { + "name": "amc23", + "task_type": "math", + "dataset_path": "./data/amc23.json", + "is_verifiable": true, + "metric_type": "accuracy", + "num_samples": -1, + "max_gen_len": 2048, + "temperature": 0.7, + "top_p": 0.8, + "num_samples_per_prompt": 1 + }, + { + "name": "math500", + "task_type": "math", + "dataset_path": "./data/math500.json", + "is_verifiable": true, + "metric_type": "accuracy", + "num_samples": 500, + "max_gen_len": 2048, + "temperature": 0.7, + "top_p": 0.8, + "num_samples_per_prompt": 1 + }, + { + "name": "gsm8k", + "task_type": "math", + "dataset_path": "./data/gsm8k.json", + "is_verifiable": true, + "metric_type": "accuracy", + "num_samples": 500, + "max_gen_len": 1024, + "temperature": 0.7, + "top_p": 0.8, + "num_samples_per_prompt": 1 + }, + { + "name": "mmlu_stem", + "task_type": "qa", + "dataset_path": "./data/mmlu_stem.json", + "is_verifiable": true, + "metric_type": "accuracy", + "num_samples": 500, + "max_gen_len": 512, + "temperature": 0.3, + "top_p": 0.9, + "num_samples_per_prompt": 1 + }, + { + "name": "humaneval", + "task_type": "code", + "dataset_path": "./data/humaneval.json", + "is_verifiable": true, + "metric_type": "accuracy", + "num_samples": 164, + "max_gen_len": 1024, + "temperature": 0.2, + "top_p": 0.95, + "num_samples_per_prompt": 1 + } +] + -- cgit v1.2.3