summaryrefslogtreecommitdiff
path: root/Qwen2.5-Eval/evaluation/data_loader.py
diff options
context:
space:
mode:
authorYuren Hao <yurenh2@timan108.cs.illinois.edu>2025-09-04 22:16:22 -0500
committerYuren Hao <yurenh2@timan108.cs.illinois.edu>2025-09-04 22:16:22 -0500
commitfc6d57ffb8d5ddb5820fcc00b5491a585c259ebc (patch)
treee9841f93a353e2107225cfc721d1ce57c0e594dc /Qwen2.5-Eval/evaluation/data_loader.py
Initial commit
Diffstat (limited to 'Qwen2.5-Eval/evaluation/data_loader.py')
-rwxr-xr-xQwen2.5-Eval/evaluation/data_loader.py99
1 files changed, 99 insertions, 0 deletions
diff --git a/Qwen2.5-Eval/evaluation/data_loader.py b/Qwen2.5-Eval/evaluation/data_loader.py
new file mode 100755
index 0000000..8e4376f
--- /dev/null
+++ b/Qwen2.5-Eval/evaluation/data_loader.py
@@ -0,0 +1,99 @@
+import os
+import json
+import random
+import datasets
+from datasets import load_dataset, Dataset, concatenate_datasets
+from utils import load_jsonl, lower_keys
+
+
+def load_data(data_name, split, data_dir="./data"):
+ data_file = f"{data_dir}/{data_name}/{split}.jsonl"
+ print(f"data_name: {data_name}")
+ print(f"data_file: {data_file}")
+ if os.path.exists(data_file):
+ examples = list(load_jsonl(data_file))
+ elif data_name == "deepscaler" or data_name == "deepscaler_random3p" or data_name == "deepscaler_random3p_noInstruct":
+ data_file = data_dir
+ print(data_file)
+ if os.path.exists(data_file):
+ with open(data_file, 'r', encoding='utf-8') as f:
+ examples = json.load(f)
+ # examples = list(load_jsonl(data_file))
+ # elif data_name == "deepscaler":
+ # data_file = f"{data_dir}/{split}/{data_name}.json"
+ # if os.path.exists(data_file):
+ # with open(data_file, 'r', encoding='utf-8') as f:
+ # examples = json.load(f)
+ else:
+ if data_name == "math":
+ dataset = load_dataset(
+ "competition_math",
+ split=split,
+ name="main",
+ cache_dir=f"{data_dir}/temp",
+ )
+ elif data_name == "gsm8k":
+ dataset = load_dataset(data_name, split=split)
+ elif data_name == "svamp":
+ # evaluate on training set + test set
+ dataset = load_dataset("ChilleD/SVAMP", split="train")
+ dataset = concatenate_datasets(
+ [dataset, load_dataset("ChilleD/SVAMP", split="test")]
+ )
+ elif data_name == "asdiv":
+ dataset = load_dataset("EleutherAI/asdiv", split="validation")
+ dataset = dataset.filter(
+ lambda x: ";" not in x["answer"]
+ ) # remove multi-answer examples
+ elif data_name == "mawps":
+ examples = []
+ # four sub-tasks
+ for data_name in ["singleeq", "singleop", "addsub", "multiarith"]:
+ sub_examples = list(load_jsonl(f"{data_dir}/mawps/{data_name}.jsonl"))
+ for example in sub_examples:
+ example["type"] = data_name
+ examples.extend(sub_examples)
+ dataset = Dataset.from_list(examples)
+ elif data_name == "mmlu_stem":
+ dataset = load_dataset("hails/mmlu_no_train", "all", split="test")
+ # only keep stem subjects
+ stem_subjects = [
+ "abstract_algebra",
+ "astronomy",
+ "college_biology",
+ "college_chemistry",
+ "college_computer_science",
+ "college_mathematics",
+ "college_physics",
+ "computer_security",
+ "conceptual_physics",
+ "electrical_engineering",
+ "elementary_mathematics",
+ "high_school_biology",
+ "high_school_chemistry",
+ "high_school_computer_science",
+ "high_school_mathematics",
+ "high_school_physics",
+ "high_school_statistics",
+ "machine_learning",
+ ]
+ dataset = dataset.rename_column("subject", "type")
+ dataset = dataset.filter(lambda x: x["type"] in stem_subjects)
+ elif data_name == "carp_en":
+ dataset = load_jsonl(f"{data_dir}/carp_en/test.jsonl")
+ else:
+ raise NotImplementedError(data_name)
+
+ examples = list(dataset)
+ examples = [lower_keys(example) for example in examples]
+ dataset = Dataset.from_list(examples)
+ os.makedirs(f"{data_dir}/{data_name}", exist_ok=True)
+ dataset.to_json(data_file)
+
+ # add 'idx' in the first column
+ if "idx" not in examples[0]:
+ examples = [{"idx": i, **example} for i, example in enumerate(examples)]
+
+ # dedepulicate & sort
+ examples = sorted(examples, key=lambda x: x["idx"])
+ return examples