From f1c2cc22d46a6976df3555391e667c7e61592fad Mon Sep 17 00:00:00 2001 From: YurenHao0426 Date: Wed, 4 Feb 2026 18:59:35 -0600 Subject: Initial commit: RL floating-point noise project --- configs/eval_tasks_config.json | 99 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 configs/eval_tasks_config.json (limited to 'configs/eval_tasks_config.json') diff --git a/configs/eval_tasks_config.json b/configs/eval_tasks_config.json new file mode 100644 index 0000000..e0dda43 --- /dev/null +++ b/configs/eval_tasks_config.json @@ -0,0 +1,99 @@ +[ + { + "name": "dm_val", + "task_type": "math", + "dataset_path": "./data/dm_val.json", + "is_verifiable": true, + "metric_type": "accuracy", + "num_samples": -1, + "max_gen_len": 2048, + "temperature": 0.7, + "top_p": 0.8, + "num_samples_per_prompt": 1 + }, + { + "name": "aime24", + "task_type": "math", + "dataset_path": "./data/aime24.json", + "is_verifiable": true, + "metric_type": "accuracy", + "num_samples": -1, + "max_gen_len": 4096, + "temperature": 0.7, + "top_p": 0.8, + "num_samples_per_prompt": 1 + }, + { + "name": "aime25", + "task_type": "math", + "dataset_path": "./data/aime25.json", + "is_verifiable": true, + "metric_type": "accuracy", + "num_samples": -1, + "max_gen_len": 4096, + "temperature": 0.7, + "top_p": 0.8, + "num_samples_per_prompt": 1 + }, + { + "name": "amc23", + "task_type": "math", + "dataset_path": "./data/amc23.json", + "is_verifiable": true, + "metric_type": "accuracy", + "num_samples": -1, + "max_gen_len": 2048, + "temperature": 0.7, + "top_p": 0.8, + "num_samples_per_prompt": 1 + }, + { + "name": "math500", + "task_type": "math", + "dataset_path": "./data/math500.json", + "is_verifiable": true, + "metric_type": "accuracy", + "num_samples": 500, + "max_gen_len": 2048, + "temperature": 0.7, + "top_p": 0.8, + "num_samples_per_prompt": 1 + }, + { + "name": "gsm8k", + "task_type": "math", + "dataset_path": "./data/gsm8k.json", + "is_verifiable": true, + "metric_type": "accuracy", + "num_samples": 500, + "max_gen_len": 1024, + "temperature": 0.7, + "top_p": 0.8, + "num_samples_per_prompt": 1 + }, + { + "name": "mmlu_stem", + "task_type": "qa", + "dataset_path": "./data/mmlu_stem.json", + "is_verifiable": true, + "metric_type": "accuracy", + "num_samples": 500, + "max_gen_len": 512, + "temperature": 0.3, + "top_p": 0.9, + "num_samples_per_prompt": 1 + }, + { + "name": "humaneval", + "task_type": "code", + "dataset_path": "./data/humaneval.json", + "is_verifiable": true, + "metric_type": "accuracy", + "num_samples": 164, + "max_gen_len": 1024, + "temperature": 0.2, + "top_p": 0.95, + "num_samples_per_prompt": 1 + } +] + -- cgit v1.2.3