import { useState } from 'react'; import ReactMarkdown from 'react-markdown'; import remarkGfm from 'remark-gfm'; import './Stage2.css'; function deAnonymizeText(text, labelToModel) { if (!labelToModel) return text; let result = text; // Replace each "Response X" with the actual model name Object.entries(labelToModel).forEach(([label, model]) => { const modelShortName = model.split('/')[1] || model; result = result.replace(new RegExp(label, 'g'), `**${modelShortName}**`); }); return result; } export default function Stage2({ rankings, labelToModel, aggregateRankings }) { const [activeTab, setActiveTab] = useState(0); if (!rankings || rankings.length === 0) { return null; } return (
Each model evaluated all responses (anonymized as Response A, B, C, etc.) and provided rankings. Below, model names are shown in bold for readability, but the original evaluation used anonymous labels.
Combined results across all peer evaluations (lower score is better):