1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
|
#!/usr/bin/env python3
"""
Example of using OpenRouter with putnam-bench to solve mathematical problems.
This example demonstrates:
1. Using different model combinations from different providers
2. Solving a real problem from the dataset
3. Comparing results across different models
"""
import asyncio
import json
import os
from loader import create_loader
async def solve_with_openrouter():
"""Example of solving a Putnam problem using OpenRouter."""
# Check API key
if not os.getenv('OPENROUTER_API_KEY'):
print("โ Please set OPENROUTER_API_KEY environment variable")
return
# Load a sample problem
problem_file = "dataset/1938-A-1.json"
if not os.path.exists(problem_file):
print(f"โ Problem file not found: {problem_file}")
print(" Make sure you're running from the project root directory")
return
with open(problem_file) as f:
problem_data = json.load(f)
print(f"๐ Problem: {problem_data['problem_statement'][:100]}...")
print(f" Type: {problem_data['problem_type']}")
print(f" Year: {problem_data['year']}")
# Test with different model combinations
test_configs = [
{
"name": "OpenAI Only",
"solver": "openai/gpt-4o-mini",
"grader": "openai/gpt-4o"
},
{
"name": "Mixed OpenAI/Anthropic",
"solver": "openai/gpt-4o",
"grader": "anthropic/claude-3-haiku"
},
{
"name": "Google Gemini",
"solver": "google/gemini-pro",
"grader": "google/gemini-pro"
}
]
for config in test_configs:
print(f"\n{'='*60}")
print(f"๐งช Testing: {config['name']}")
print(f" Solver: {config['solver']}")
print(f" Grader: {config['grader']}")
try:
# Create loader with specific models
loader = create_loader(
"openrouter",
solver_model=config['solver'],
grader_model=config['grader'],
retries=3,
timeout_base=120
)
# Solve the problem
print("\nโณ Solving problem...")
solution, raw = await loader.solve_problem(problem_data['problem_statement'])
if solution:
print("โ
Solution found!")
print(f" Final answer: {solution.get('final_answer', 'N/A')}")
# Grade the solution (if it's a proof problem)
if problem_data['problem_type'] == 'proof':
print("\nโณ Grading solution...")
grade_result = await loader.grade_solution(
problem_data['problem_statement'],
solution['solution'],
problem_data.get('ground_truth_solution', ''),
problem_type='proof'
)
if grade_result:
print(f"๐ Grade: {grade_result.get('score', 'N/A')}/10")
print(f" Reasoning: {grade_result.get('reasoning', 'N/A')[:100]}...")
else:
print(" (Calculation problem - grading skipped)")
else:
print("โ Failed to get solution")
except Exception as e:
print(f"โ Error: {type(e).__name__}: {e}")
print(f"\n{'='*60}")
print("โ
Example completed!")
async def list_recommended_models():
"""List recommended model combinations for different use cases."""
print("\n๐ Recommended OpenRouter Model Combinations:\n")
recommendations = [
{
"use_case": "Best Quality (Expensive)",
"solver": "openai/gpt-4o",
"grader": "anthropic/claude-3-opus",
"notes": "Highest accuracy but most expensive"
},
{
"use_case": "Balanced Performance",
"solver": "openai/gpt-4o-mini",
"grader": "anthropic/claude-3-sonnet",
"notes": "Good balance of cost and performance"
},
{
"use_case": "Budget Friendly",
"solver": "openai/gpt-3.5-turbo",
"grader": "google/gemini-pro",
"notes": "Cheapest option, still decent quality"
},
{
"use_case": "Open Source Models",
"solver": "meta-llama/llama-3-70b-instruct",
"grader": "mistralai/mixtral-8x7b-instruct",
"notes": "Using open-source models only"
},
{
"use_case": "Code-Focused",
"solver": "deepseek/deepseek-coder",
"grader": "meta-llama/codellama-70b-instruct",
"notes": "Optimized for problems with code"
}
]
for rec in recommendations:
print(f"๐ฏ {rec['use_case']}")
print(f" Solver: {rec['solver']}")
print(f" Grader: {rec['grader']}")
print(f" Notes: {rec['notes']}")
print()
if __name__ == "__main__":
print("๐ OpenRouter Example for Putnam Bench")
# Run the example
asyncio.run(solve_with_openrouter())
# Show recommendations
asyncio.run(list_recommended_models())
|