diff options
| author | YurenHao0426 <blackhao0426@gmail.com> | 2026-02-04 18:59:35 -0600 |
|---|---|---|
| committer | YurenHao0426 <blackhao0426@gmail.com> | 2026-02-04 18:59:35 -0600 |
| commit | f1c2cc22d46a6976df3555391e667c7e61592fad (patch) | |
| tree | 0b37b52c8ff91042a742d3b3ec54542cb6d6e2f6 /configs/eval_tasks_config.json | |
Diffstat (limited to 'configs/eval_tasks_config.json')
| -rw-r--r-- | configs/eval_tasks_config.json | 99 |
1 files changed, 99 insertions, 0 deletions
diff --git a/configs/eval_tasks_config.json b/configs/eval_tasks_config.json new file mode 100644 index 0000000..e0dda43 --- /dev/null +++ b/configs/eval_tasks_config.json @@ -0,0 +1,99 @@ +[ + { + "name": "dm_val", + "task_type": "math", + "dataset_path": "./data/dm_val.json", + "is_verifiable": true, + "metric_type": "accuracy", + "num_samples": -1, + "max_gen_len": 2048, + "temperature": 0.7, + "top_p": 0.8, + "num_samples_per_prompt": 1 + }, + { + "name": "aime24", + "task_type": "math", + "dataset_path": "./data/aime24.json", + "is_verifiable": true, + "metric_type": "accuracy", + "num_samples": -1, + "max_gen_len": 4096, + "temperature": 0.7, + "top_p": 0.8, + "num_samples_per_prompt": 1 + }, + { + "name": "aime25", + "task_type": "math", + "dataset_path": "./data/aime25.json", + "is_verifiable": true, + "metric_type": "accuracy", + "num_samples": -1, + "max_gen_len": 4096, + "temperature": 0.7, + "top_p": 0.8, + "num_samples_per_prompt": 1 + }, + { + "name": "amc23", + "task_type": "math", + "dataset_path": "./data/amc23.json", + "is_verifiable": true, + "metric_type": "accuracy", + "num_samples": -1, + "max_gen_len": 2048, + "temperature": 0.7, + "top_p": 0.8, + "num_samples_per_prompt": 1 + }, + { + "name": "math500", + "task_type": "math", + "dataset_path": "./data/math500.json", + "is_verifiable": true, + "metric_type": "accuracy", + "num_samples": 500, + "max_gen_len": 2048, + "temperature": 0.7, + "top_p": 0.8, + "num_samples_per_prompt": 1 + }, + { + "name": "gsm8k", + "task_type": "math", + "dataset_path": "./data/gsm8k.json", + "is_verifiable": true, + "metric_type": "accuracy", + "num_samples": 500, + "max_gen_len": 1024, + "temperature": 0.7, + "top_p": 0.8, + "num_samples_per_prompt": 1 + }, + { + "name": "mmlu_stem", + "task_type": "qa", + "dataset_path": "./data/mmlu_stem.json", + "is_verifiable": true, + "metric_type": "accuracy", + "num_samples": 500, + "max_gen_len": 512, + "temperature": 0.3, + "top_p": 0.9, + "num_samples_per_prompt": 1 + }, + { + "name": "humaneval", + "task_type": "code", + "dataset_path": "./data/humaneval.json", + "is_verifiable": true, + "metric_type": "accuracy", + "num_samples": 164, + "max_gen_len": 1024, + "temperature": 0.2, + "top_p": 0.95, + "num_samples_per_prompt": 1 + } +] + |
