From eb0eb26f4cefa4880c895ff017f312e8674f9b73 Mon Sep 17 00:00:00 2001 From: karpathy Date: Sat, 22 Nov 2025 14:27:53 -0800 Subject: v0 --- frontend/src/components/Stage2.jsx | 99 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 frontend/src/components/Stage2.jsx (limited to 'frontend/src/components/Stage2.jsx') diff --git a/frontend/src/components/Stage2.jsx b/frontend/src/components/Stage2.jsx new file mode 100644 index 0000000..2550fa6 --- /dev/null +++ b/frontend/src/components/Stage2.jsx @@ -0,0 +1,99 @@ +import { useState } from 'react'; +import ReactMarkdown from 'react-markdown'; +import './Stage2.css'; + +function deAnonymizeText(text, labelToModel) { + if (!labelToModel) return text; + + let result = text; + // Replace each "Response X" with the actual model name + Object.entries(labelToModel).forEach(([label, model]) => { + const modelShortName = model.split('/')[1] || model; + result = result.replace(new RegExp(label, 'g'), `**${modelShortName}**`); + }); + return result; +} + +export default function Stage2({ rankings, labelToModel, aggregateRankings }) { + const [activeTab, setActiveTab] = useState(0); + + if (!rankings || rankings.length === 0) { + return null; + } + + return ( +
+

Stage 2: Peer Rankings

+ +

Raw Evaluations

+

+ Each model evaluated all responses (anonymized as Response A, B, C, etc.) and provided rankings. + Below, model names are shown in bold for readability, but the original evaluation used anonymous labels. +

+ +
+ {rankings.map((rank, index) => ( + + ))} +
+ +
+
+ {rankings[activeTab].model} +
+
+ + {deAnonymizeText(rankings[activeTab].ranking, labelToModel)} + +
+ + {rankings[activeTab].parsed_ranking && + rankings[activeTab].parsed_ranking.length > 0 && ( +
+ Extracted Ranking: +
    + {rankings[activeTab].parsed_ranking.map((label, i) => ( +
  1. + {labelToModel && labelToModel[label] + ? labelToModel[label].split('/')[1] || labelToModel[label] + : label} +
  2. + ))} +
+
+ )} +
+ + {aggregateRankings && aggregateRankings.length > 0 && ( +
+

Aggregate Rankings (Street Cred)

+

+ Combined results across all peer evaluations (lower score is better): +

+
+ {aggregateRankings.map((agg, index) => ( +
+ #{index + 1} + + {agg.model.split('/')[1] || agg.model} + + + Avg: {agg.average_rank.toFixed(2)} + + + ({agg.rankings_count} votes) + +
+ ))} +
+
+ )} +
+ ); +} -- cgit v1.2.3