import { useState } from 'react'; import ReactMarkdown from 'react-markdown'; import remarkGfm from 'remark-gfm'; import './Stage2.css'; function deAnonymizeText(text, labelToModel) { if (!labelToModel) return text; let result = text; // Replace each "Response X" with the actual model name Object.entries(labelToModel).forEach(([label, model]) => { const modelShortName = model.split('/')[1] || model; result = result.replace(new RegExp(label, 'g'), `**${modelShortName}**`); }); return result; } export default function Stage2({ rankings, labelToModel, aggregateRankings }) { const [activeTab, setActiveTab] = useState(0); if (!rankings || rankings.length === 0) { return null; } return (

Stage 2: Peer Rankings

Raw Evaluations

Each model evaluated all responses (anonymized as Response A, B, C, etc.) and provided rankings. Below, model names are shown in bold for readability, but the original evaluation used anonymous labels.

{rankings.map((rank, index) => ( ))}
{rankings[activeTab].model}
{deAnonymizeText(rankings[activeTab].ranking, labelToModel)}
{rankings[activeTab].parsed_ranking && rankings[activeTab].parsed_ranking.length > 0 && (
Extracted Ranking:
    {rankings[activeTab].parsed_ranking.map((label, i) => (
  1. {labelToModel && labelToModel[label] ? labelToModel[label].split('/')[1] || labelToModel[label] : label}
  2. ))}
)}
{aggregateRankings && aggregateRankings.length > 0 && (

Aggregate Rankings (Street Cred)

Combined results across all peer evaluations (lower score is better):

{aggregateRankings.map((agg, index) => (
#{index + 1} {agg.model.split('/')[1] || agg.model} Avg: {agg.average_rank.toFixed(2)} ({agg.rankings_count} votes)
))}
)}
); }