1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
|
"""
Model loader package for mathematical problem solving.
This package provides a unified interface for loading and using different AI models
to solve mathematical problems and grade solutions.
Usage:
# Create an OpenAI loader
loader = create_loader("openai", solver_model="gpt-4o-mini", grader_model="o3")
# Create an OpenRouter loader
loader = create_loader("openrouter", solver_model="openai/gpt-4o", grader_model="anthropic/claude-3-opus")
# Or directly instantiate
from loader import OpenAIModelLoader
loader = OpenAIModelLoader()
# Test a problem
import json
with open("dataset/1938-A-1.json") as f:
data = json.load(f)
result = await loader.test_single_problem(data, variant_type="original")
"""
from .base import ModelLoader
from .openai_client import OpenAIModelLoader, KimiModelLoader
from .anthropic_client import AnthropicModelLoader
from .gemini_client import GeminiModelLoader
from .xai_client import XAIModelLoader
from .openrouter_client import OpenRouterModelLoader
from .vllm_local import VLLMModelLoader
from .vllm_direct import VLLMDirectModelLoader
from .hf_local import HuggingFaceModelLoader
from .cross_provider import CrossProviderLoader
from .prompts import (
SOLVER_SYSTEM_PROMPT,
SOLVER_USER_TEMPLATE,
PROOF_GRADER_SYSTEM_PROMPT,
CALCULATION_GRADER_SYSTEM_PROMPT,
PROOF_GRADER_USER_TEMPLATE,
CALCULATION_GRADER_USER_TEMPLATE,
RESPONSE_FORMAT,
DEFAULT_RETRIES,
DEFAULT_TIMEOUT_BASE
)
from typing import Optional
def create_loader(provider: str, **kwargs) -> ModelLoader:
"""
Factory function to create model loaders.
Args:
provider: Provider name ("openai", "anthropic", "gemini", etc.)
**kwargs: Additional arguments passed to the loader constructor
Returns:
ModelLoader instance
Raises:
ValueError: If provider is not supported
Examples:
# Create OpenAI loader
loader = create_loader("openai", solver_model="gpt-4o-mini", grader_model="o3")
# Create loader with custom settings
loader = create_loader(
"openai",
solver_model="gpt-4",
grader_model="o3",
retries=5,
timeout_base=600
)
"""
provider_lower = provider.lower()
if provider_lower == "openai":
return OpenAIModelLoader(**kwargs)
elif provider_lower == "anthropic":
return AnthropicModelLoader(**kwargs)
elif provider_lower == "gemini":
return GeminiModelLoader(**kwargs)
elif provider_lower == "xai":
return XAIModelLoader(**kwargs)
elif provider_lower == "openrouter":
return OpenRouterModelLoader(**kwargs)
elif provider_lower == "kimi":
return KimiModelLoader(**kwargs)
elif provider_lower == "vllm":
return VLLMModelLoader(**kwargs)
elif provider_lower == "vllm_direct":
return VLLMDirectModelLoader(**kwargs)
elif provider_lower in ["huggingface", "hf"]:
return HuggingFaceModelLoader(**kwargs)
else:
supported = ["openai", "anthropic", "gemini", "xai", "openrouter", "kimi", "vllm", "vllm_direct", "huggingface"]
raise ValueError(f"Unsupported provider: {provider}. Supported providers: {supported}")
def create_cross_provider_loader(
solver_provider: str,
grader_provider: Optional[str] = None,
solver_model: Optional[str] = None,
grader_model: Optional[str] = None,
**kwargs
) -> ModelLoader:
"""
Create a loader that can use different providers for solving and grading.
Args:
solver_provider: Provider for solving problems
grader_provider: Provider for grading (if None, uses solver_provider)
solver_model: Override solver model
grader_model: Override grader model
**kwargs: Additional arguments (can include provider-specific settings)
Returns:
CrossProviderLoader instance
Examples:
# Use Kimi for solving and OpenAI for grading
loader = create_cross_provider_loader(
solver_provider="kimi",
grader_provider="openai",
solver_model="Kimi-K2-Instruct",
grader_model="o3"
)
# Use same provider but different models
loader = create_cross_provider_loader(
solver_provider="openai",
solver_model="gpt-4o-mini",
grader_model="o3"
)
"""
# Extract provider-specific kwargs
solver_kwargs = kwargs.pop('solver_kwargs', {})
grader_kwargs = kwargs.pop('grader_kwargs', {})
# Extract common parameters that should be passed to both loaders
quick = kwargs.pop('quick', False)
debug = kwargs.pop('debug', False)
# Add common parameters to both solver and grader kwargs
solver_kwargs.update({'quick': quick, 'debug': debug})
grader_kwargs.update({'quick': quick, 'debug': debug})
# Get default models if not specified
if not solver_model:
solver_defaults = get_default_models(solver_provider)
solver_model = solver_defaults['solver_model']
if not grader_provider:
grader_provider = solver_provider
if not grader_model:
grader_defaults = get_default_models(grader_provider)
grader_model = grader_defaults['grader_model']
# Create solver loader
solver_loader = create_loader(
solver_provider,
solver_model=solver_model,
grader_model=solver_model, # Use solver model for both in solver loader
**solver_kwargs
)
# Create grader loader if different provider
if grader_provider != solver_provider:
grader_loader = create_loader(
grader_provider,
solver_model=grader_model, # Use grader model for both in grader loader
grader_model=grader_model,
**grader_kwargs
)
return CrossProviderLoader(solver_loader, grader_loader, **kwargs)
else:
# Same provider, but possibly different models
if solver_model != grader_model:
# Need to create a single loader with both models
single_loader = create_loader(
solver_provider,
solver_model=solver_model,
grader_model=grader_model,
**solver_kwargs
)
return single_loader
else:
# Same provider and model
return solver_loader
def get_supported_providers() -> list[str]:
"""
Get list of supported model providers.
Returns:
List of supported provider names
"""
return ["openai", "anthropic", "gemini", "xai", "openrouter", "kimi", "vllm", "vllm_direct", "huggingface"]
def get_default_models(provider: str) -> dict[str, str]:
"""
Get default model names for a provider.
Args:
provider: Provider name
Returns:
Dictionary with default solver_model and grader_model
"""
defaults = {
"openai": {
"solver_model": "gpt-4o-mini",
"grader_model": "o3"
},
"anthropic": {
"solver_model": "claude-3-5-haiku-20241022",
"grader_model": "claude-3-5-sonnet-20241022"
},
"gemini": {
"solver_model": "gemini-1.5-flash",
"grader_model": "gemini-1.5-pro"
},
"xai": {
"solver_model": "grok-3",
"grader_model": "grok-3"
},
"openrouter": {
"solver_model": "openai/gpt-4o",
"grader_model": "openai/gpt-4o"
},
"kimi": {
"solver_model": "moonshot-v1-8k",
"grader_model": "moonshot-v1-8k"
},
"vllm": {
"solver_model": "meta-llama/Llama-3.2-3B-Instruct",
"grader_model": "meta-llama/Llama-3.2-8B-Instruct"
},
"vllm_direct": {
"solver_model": "gpt2",
"grader_model": "gpt2"
},
"huggingface": {
"solver_model": "microsoft/DialoGPT-medium",
"grader_model": "microsoft/DialoGPT-large"
}
}
provider_lower = provider.lower()
if provider_lower not in defaults:
raise ValueError(f"No defaults available for provider: {provider}")
return defaults[provider_lower]
# Export main classes and functions
__all__ = [
# Main classes
"ModelLoader",
"OpenAIModelLoader",
"AnthropicModelLoader",
"GeminiModelLoader",
"XAIModelLoader",
"OpenRouterModelLoader",
"KimiModelLoader",
"VLLMModelLoader",
"VLLMDirectModelLoader",
"HuggingFaceModelLoader",
"CrossProviderLoader",
# Factory functions
"create_loader",
"create_cross_provider_loader",
"get_supported_providers",
"get_default_models",
# Prompts (for advanced users)
"SOLVER_SYSTEM_PROMPT",
"SOLVER_USER_TEMPLATE",
"PROOF_GRADER_SYSTEM_PROMPT",
"CALCULATION_GRADER_SYSTEM_PROMPT",
"PROOF_GRADER_USER_TEMPLATE",
"CALCULATION_GRADER_USER_TEMPLATE",
# Configuration constants
"RESPONSE_FORMAT",
"DEFAULT_RETRIES",
"DEFAULT_TIMEOUT_BASE"
]
|