1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
|
import { useState } from 'react';
import ReactMarkdown from 'react-markdown';
import remarkGfm from 'remark-gfm';
import './Stage2.css';
function deAnonymizeText(text, labelToModel) {
if (!labelToModel) return text;
let result = text;
// Replace each "Response X" with the actual model name
Object.entries(labelToModel).forEach(([label, model]) => {
const modelShortName = model.split('/')[1] || model;
result = result.replace(new RegExp(label, 'g'), `**${modelShortName}**`);
});
return result;
}
export default function Stage2({ rankings, labelToModel, aggregateRankings }) {
const [activeTab, setActiveTab] = useState(0);
if (!rankings || rankings.length === 0) {
return null;
}
return (
<div className="stage stage2">
<h3 className="stage-title">Stage 2: Peer Rankings</h3>
<h4>Raw Evaluations</h4>
<p className="stage-description">
Each model evaluated all responses (anonymized as Response A, B, C, etc.) and provided rankings.
Below, model names are shown in <strong>bold</strong> for readability, but the original evaluation used anonymous labels.
</p>
<div className="tabs">
{rankings.map((rank, index) => (
<button
key={index}
className={`tab ${activeTab === index ? 'active' : ''}`}
onClick={() => setActiveTab(index)}
>
{rank.model.split('/')[1] || rank.model}
</button>
))}
</div>
<div className="tab-content">
<div className="ranking-model">
{rankings[activeTab].model}
</div>
<div className="ranking-content markdown-content">
<ReactMarkdown remarkPlugins={[remarkGfm]}>
{deAnonymizeText(rankings[activeTab].ranking, labelToModel)}
</ReactMarkdown>
</div>
{rankings[activeTab].parsed_ranking &&
rankings[activeTab].parsed_ranking.length > 0 && (
<div className="parsed-ranking">
<strong>Extracted Ranking:</strong>
<ol>
{rankings[activeTab].parsed_ranking.map((label, i) => (
<li key={i}>
{labelToModel && labelToModel[label]
? labelToModel[label].split('/')[1] || labelToModel[label]
: label}
</li>
))}
</ol>
</div>
)}
</div>
{aggregateRankings && aggregateRankings.length > 0 && (
<div className="aggregate-rankings">
<h4>Aggregate Rankings (Street Cred)</h4>
<p className="stage-description">
Combined results across all peer evaluations (lower score is better):
</p>
<div className="aggregate-list">
{aggregateRankings.map((agg, index) => (
<div key={index} className="aggregate-item">
<span className="rank-position">#{index + 1}</span>
<span className="rank-model">
{agg.model.split('/')[1] || agg.model}
</span>
<span className="rank-score">
Avg: {agg.average_rank.toFixed(2)}
</span>
<span className="rank-count">
({agg.rankings_count} votes)
</span>
</div>
))}
</div>
</div>
)}
</div>
);
}
|