Initial commit

author: Yuren Hao <yurenh2@timan108.cs.illinois.edu> 2025-09-04 22:16:22 -0500
committer: Yuren Hao <yurenh2@timan108.cs.illinois.edu> 2025-09-04 22:16:22 -0500
commit: fc6d57ffb8d5ddb5820fcc00b5491a585c259ebc (patch)
tree: e9841f93a353e2107225cfc721d1ce57c0e594dc /code_eval/OpenCodeEval/benchmark/BigCodeBench.py
1 files changed, 113 insertions, 0 deletions
diff --git a/code_eval/OpenCodeEval/benchmark/BigCodeBench.py b/code_eval/OpenCodeEval/benchmark/BigCodeBench.py
new file mode 100644
index 0000000..abc4faf
--- /dev/null
+++ b/code_eval/OpenCodeEval/benchmark/BigCodeBench.py
@@ -0,0 +1,113 @@
+import os
+from typing import Literal
+
+ROOT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from OpenCodeEval.benchmark.base import Benchmark, PYTHON_STOP, PYTHON_IMPORTS
+from OpenCodeEval.utils import refine_text, stream_jsonl
+from OpenCodeEval.eval.func_eval import check_correctness
+from OpenCodeEval.eval.sanitize import sanitize
+
+class BigCodeBench(Benchmark):
+
+    name: str = "BigCodeBench"
+    path: str = None
+
+    fullset_path = os.path.abspath(os.path.join(ROOT, "../data/BigCodeBench.jsonl"))
+    subset_path = os.path.abspath(os.path.join(ROOT, "../data/BigCodeBench_Hard.jsonl"))
+
+    imports_code = PYTHON_IMPORTS
+    chat_stop = PYTHON_STOP
+    base_stop = ['\n"""', "\nassert"]
+
+    def __init__(self,
+                 name: str = "BigCodeBench",
+                 timeout:float = 10.0,
+                 prompt_type: Literal["Completion", "Instruction"] = "Completion"
+                 ):
+        
+        super().__init__()
+        
+        self.name = name
+        self.timeout = timeout
+        self.prompt_type = prompt_type
+
+        if self.name == "BigCodeHard":
+            self.path = self.subset_path
+        elif self.name == "BigCodeBench":
+            self.path = self.fullset_path
+
+        self.tasks = self.get_task()
+
+    def get_task(self):
+        """
+        Get the task data from the jsonl file into a dictionary.
+        """
+
+        tasks = {}
+        
+        for task_data in stream_jsonl(filename=self.path):
+
+            task_id = int(task_data["task_id"].split("/")[-1])
+            
+            tasks[task_id] = task_data
+        
+        return tasks
+    
+    def get_prompt(self):
+        """
+        Builds the prompt for the LM to generate from.
+        """
+
+        prompts = []
+        for task_id, task_data in self.tasks.items():
+
+            if self.prompt_type == "Completion":
+                prompt = task_data['complete_prompt']
+            elif self.prompt_type == "Instruction":
+                prompt = task_data['instruct_prompt']
+
+            prompts.append(
+                dict(
+                    task_id = task_id,
+                    prompt = refine_text(prompt)
+                )
+            )
+
+        return prompts
+
+    def postprocess_generation(self, generation):
+        """
+        Postprocess the generations.
+        """
+
+        entry_point = self.tasks[generation['task_id']]["entry_point"]
+
+        result = dict(
+            task_id = generation['task_id'],
+            completion_id = generation['completion_id'],
+            solution = sanitize(generation['completion'], entry_point)
+        )
+
+        return result
+
+    def process_results(self, solution):
+        """
+        Takes the list of LM generations and evaluates them against the test cases
+        """
+
+        task_data = self.tasks[solution['task_id']]
+
+        code = (
+            task_data["code_prompt"] + "\n" 
+            + "    pass\n" + "\n"
+            + solution['solution'] + "\n"
+        )
+        
+        result = check_correctness(solution['task_id'],
+                                   solution['completion_id'],
+                                   code,
+                                   task_data["test"],
+                                   self.timeout)
+        
+        return result
+\ No newline at end of file
author	Yuren Hao <yurenh2@timan108.cs.illinois.edu>	2025-09-04 22:16:22 -0500
committer	Yuren Hao <yurenh2@timan108.cs.illinois.edu>	2025-09-04 22:16:22 -0500
commit	fc6d57ffb8d5ddb5820fcc00b5491a585c259ebc (patch)
tree	e9841f93a353e2107225cfc721d1ce57c0e594dc /code_eval/OpenCodeEval/benchmark/BigCodeBench.py