1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
|
"""
Preference Bank Schemas
Defines the data structures for user preferences, organized by topic.
Each preference has a condition (when it applies), action (what the user wants),
and optional conflict group (preferences in the same group are mutually exclusive).
"""
from dataclasses import dataclass, field
from typing import Optional, List, Dict, Any
import json
@dataclass
class PreferenceItem:
"""A single user preference."""
id: str # Unique ID, e.g., "math_fmt_001"
topic: str # Topic name, e.g., "math_formatting"
condition: str # When this preference applies
action: str # What the user prefers
conflict_group: Optional[str] # If set, only one pref from this group can be selected
enforce_description: str # Description for user simulator on how to enforce
example_violation: str # Example of agent response that violates this
example_compliance: str # Example that follows this preference
def to_dict(self) -> Dict[str, Any]:
return {
"id": self.id,
"topic": self.topic,
"condition": self.condition,
"action": self.action,
"conflict_group": self.conflict_group,
"enforce_description": self.enforce_description,
"example_violation": self.example_violation,
"example_compliance": self.example_compliance,
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "PreferenceItem":
return cls(**data)
def format_for_user(self) -> str:
"""Format for user simulator prompt."""
return f"When {self.condition}: {self.action}"
def format_for_enforcement(self) -> str:
"""Format with enforcement details."""
return f"[{self.id}] When {self.condition}: {self.action}\n Enforce if: {self.enforce_description}"
@dataclass
class PreferenceTopic:
"""A topic containing multiple related preferences."""
name: str # Topic name, e.g., "math_formatting"
description: str # Description of this topic
related_datasets: List[str] # Datasets where this topic is relevant
preferences: List[PreferenceItem] = field(default_factory=list)
def to_dict(self) -> Dict[str, Any]:
return {
"name": self.name,
"description": self.description,
"related_datasets": self.related_datasets,
"preferences": [p.to_dict() for p in self.preferences],
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "PreferenceTopic":
prefs = [PreferenceItem.from_dict(p) for p in data.get("preferences", [])]
return cls(
name=data["name"],
description=data["description"],
related_datasets=data["related_datasets"],
preferences=prefs,
)
@dataclass
class PreferenceBank:
"""
A bank of preferences organized by topic.
Used to generate user profiles by sampling preferences.
"""
topics: Dict[str, PreferenceTopic] = field(default_factory=dict)
version: str = "1.0"
def add_topic(self, topic: PreferenceTopic):
self.topics[topic.name] = topic
def get_all_preferences(self) -> List[PreferenceItem]:
"""Get all preferences across all topics."""
all_prefs = []
for topic in self.topics.values():
all_prefs.extend(topic.preferences)
return all_prefs
def get_preferences_for_dataset(self, dataset: str) -> List[PreferenceItem]:
"""Get preferences relevant to a specific dataset."""
relevant = []
for topic in self.topics.values():
if dataset in topic.related_datasets or "all" in topic.related_datasets:
relevant.extend(topic.preferences)
return relevant
def to_dict(self) -> Dict[str, Any]:
return {
"version": self.version,
"topics": {name: topic.to_dict() for name, topic in self.topics.items()},
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "PreferenceBank":
bank = cls(version=data.get("version", "1.0"))
for name, topic_data in data.get("topics", {}).items():
bank.topics[name] = PreferenceTopic.from_dict(topic_data)
return bank
def save(self, path: str):
"""Save bank to JSON file."""
with open(path, "w", encoding="utf-8") as f:
json.dump(self.to_dict(), f, indent=2, ensure_ascii=False)
@classmethod
def load(cls, path: str) -> "PreferenceBank":
"""Load bank from JSON file."""
with open(path, "r", encoding="utf-8") as f:
data = json.load(f)
return cls.from_dict(data)
def stats(self) -> Dict[str, Any]:
"""Get statistics about the bank."""
total_prefs = 0
conflict_groups = set()
for topic in self.topics.values():
total_prefs += len(topic.preferences)
for pref in topic.preferences:
if pref.conflict_group:
conflict_groups.add(pref.conflict_group)
return {
"num_topics": len(self.topics),
"total_preferences": total_prefs,
"num_conflict_groups": len(conflict_groups),
"prefs_per_topic": {name: len(t.preferences) for name, t in self.topics.items()},
}
|