Nora Petrova
Add project to new space
20e666e
raw
history blame
27.7 kB
// components/LLMComparisonDashboard.jsx
"use client";
import React, { useState, useMemo } from "react";
import {
getScoreBadgeColor,
formatDisplayKey, // Use this for displaying snake_case keys nicely
getMetricTooltip,
getEquityIndicatorStyle, // Use this for Max Equity Gap status
} from "../lib/utils"; // Adjust path as needed
import TaskPerformance from "./TaskPerformance";
import DemographicAnalysis from "./DemographicAnalysis";
import MetricsBreakdown from "./MetricsBreakdown";
import About from "./About";
import { Tooltip } from "./Tooltip"; // Assuming this is your Tooltip component
// Helper component for info tooltips (assuming it exists and works)
const InfoTooltip = ({ text }) => {
const [isVisible, setIsVisible] = useState(false);
return (
<div className="relative inline-block ml-1 align-middle">
<button
className="text-gray-400 hover:text-gray-600 focus:outline-none"
onMouseEnter={() => setIsVisible(true)}
onMouseLeave={() => setIsVisible(false)}
onClick={(e) => {
e.stopPropagation();
setIsVisible(!isVisible);
}}
aria-label="Info"
>
<svg
xmlns="http://www.w3.org/2000/svg"
className="h-4 w-4"
viewBox="0 0 20 20"
fill="currentColor"
>
<path
fillRule="evenodd"
d="M18 10a8 8 0 11-16 0 8 8 0 0116 0zm-7-4a1 1 0 11-2 0 1 1 0 012 0zM9 9a1 1 0 000 2v3a1 1 0 001 1h1a1 1 0 100-2v-3a1 1 0 00-1-1H9z"
clipRule="evenodd"
/>
</svg>
</button>
{isVisible && (
<div className="absolute z-10 w-64 p-2 bg-white border rounded shadow-lg text-xs text-gray-700 -translate-x-1/2 left-1/2 mt-1 normal-case">
{text}
</div>
)}
</div>
);
};
// Main dashboard component
const LLMComparisonDashboard = ({ data: processedData }) => {
const [activeTab, setActiveTab] = useState("overview");
const [topPerformersView, setTopPerformersView] = useState("high-level");
// Destructure data - top-level keys are camelCase
// Nested rawData and equityAnalysis retain original snake_case keys
const {
models: rankedModels = [], // This is overallRankingProcessed with camelCase keys
metricsData = { highLevelCategories: {}, lowLevelMetrics: {} }, // Title Case keys inside
radarData = [],
overviewCardData = {}, // camelCase keys inside expected
rawData = {
// camelCase keys for objects, snake_case keys inside those objects
taskLevelPerformance: {},
mrpDemographics: {},
demographicOptions: {},
availableMetrics: [], // Title Case
tasks: [],
taskCategories: {},
taskMetrics: [], // Title Case
taskMetricsSnake: [], // snake_case
taskCategoryMap: {},
},
bestPerCategory = {}, // Title Case keys
bestPerMetric = {}, // Title Case keys
equityAnalysis = {
// Original snake_case keys
all_equity_gaps: [],
model_max_effect_gaps: {},
universal_issues: [],
assessment_method: {},
demographic_variation_stats: {},
},
metadata = {}, // Original keys
} = processedData || {};
// NEW: Helper function to get color for Max Equity Gap bubble
const getEquityGapBadgeColor = (model) => {
const isConcern = model.maxEffectConcernFlag;
const isSignificant = model.maxEffectSignificant;
const effectSizeClass = model.maxEffectSizeClass;
const isLargeEffect = effectSizeClass === "Large";
if (isConcern && isSignificant && isLargeEffect) {
return "bg-red-100 text-red-800"; // Equity Concern
}
if (isLargeEffect) {
return "bg-yellow-100 text-yellow-800"; // Large Effect
}
if (isSignificant) {
return "bg-blue-100 text-blue-800"; // Significant
}
return "bg-gray-100 text-gray-800"; // No concern
};
// UPDATED: Render cell for Max Equity Gap column with bubble design
const renderMaxEquityGapCell = (model) => {
// model object has camelCase keys
const gapValue = model.maxEffectGap;
const isConcern = model.maxEffectConcernFlag;
const significanceStatus = model.maxEffectSignificant;
const pValue = model.maxEffectPValue;
const effectSizeClass = model.maxEffectSizeClass;
const isLargeEffect = effectSizeClass === "Large";
// Access nested details using original snake_case keys
const gapDetails = model.maxEffectGapDetails || {};
const ciLower = gapDetails.gap_confidence_interval_95_lower;
const ciUpper = gapDetails.gap_confidence_interval_95_upper;
const displayValue =
typeof gapValue === "number" ? gapValue.toFixed(1) : "N/A";
if (displayValue === "N/A")
return <span className="text-xs text-gray-500">N/A</span>;
const indicator = getEquityIndicatorStyle(
isConcern,
isLargeEffect,
significanceStatus,
pValue,
effectSizeClass
);
let fullTooltipContent = indicator.tooltip;
if (typeof ciLower === "number" && typeof ciUpper === "number") {
fullTooltipContent += `\n95% CI: [${ciLower.toFixed(
1
)}, ${ciUpper.toFixed(1)}]`;
} else {
fullTooltipContent += `\n95% CI: N/A`;
}
return (
<Tooltip
content={
<div className="whitespace-pre-line">{fullTooltipContent}</div>
}
>
<span
className={`px-2 py-0.5 rounded-full text-xs font-medium ${getEquityGapBadgeColor(
model
)}`}
>
{displayValue}
</span>
</Tooltip>
);
};
// NEW: Helper for equity concerns percentage badge color
const getEquityConcernBadgeColor = (percentage) => {
if (percentage === null || percentage === undefined)
return "bg-gray-100 text-gray-800";
if (percentage === 0) return "bg-green-100 text-green-800";
if (percentage <= 2.5) return "bg-blue-100 text-blue-800";
if (percentage <= 5) return "bg-yellow-100 text-yellow-800";
return "bg-red-100 text-red-800";
};
return (
<div className="max-w-7xl mx-auto p-4 bg-white">
{/* Header */}
<div className="relative mb-6 overflow-hidden">
<div className="absolute inset-0 bg-gradient-to-br from-blue-50 to-sky-50 opacity-70"></div>
<div className="relative max-w-5xl mx-auto px-6 py-6">
<div className="text-center">
<h1 className="text-4xl font-bold mb-2 tracking-narrow text-blue-700">
Prolific's AI User Experience Leaderboard
</h1>
<p className="text-gray-600 max-w-4xl mx-auto">
A benchmark assessing how well language models handle real-world
tasks based on user experiences.
</p>
</div>
</div>
</div>
{/* Tab Buttons */}
<div className="flex flex-wrap mb-6 border-b">
{[
"overview",
"metrics-breakdown",
"task-performance",
"demographic-analysis",
"about",
].map((tab) => (
<button
key={tab}
className={`px-4 py-2 font-medium capitalize ${
activeTab === tab
? "text-blue-600 border-b-2 border-blue-600"
: "text-gray-500 hover:text-gray-700"
}`}
onClick={() => setActiveTab(tab)}
>
{" "}
{tab.replace("-", " ")}{" "}
</button>
))}
</div>
{/* Overview Tab */}
{activeTab === "overview" && (
<div>
{/* Overall Rankings Card */}
<div className="mb-6 border rounded-lg overflow-hidden shadow-sm">
<div className="px-4 py-3 bg-gray-50 border-b">
<h2 className="text-xl font-semibold text-gray-800">
Overall Model Rankings
</h2>
</div>
<div className="p-4">
<div className="overflow-x-auto">
<table className="w-full min-w-[850px] table-auto divide-y divide-gray-200">
<thead>
<tr className="bg-gray-50">
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider w-12">
Rank
</th>
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider w-48">
Model
</th>
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider w-28">
<span>Overall Score</span>
</th>
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider w-24">
<span>Overall SD</span>
</th>
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider w-32">
<span>Max Equity Gap</span>
</th>
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider w-38">
<span>Max Gap Area</span>
</th>
<th className="px-3 py-2 text-center text-xs font-medium text-gray-500 uppercase tracking-wider w-36">
<span>Equity Concerns</span>
</th>
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider w-32">
<span>User Retention</span>
</th>
</tr>
</thead>
<tbody className="divide-y divide-gray-200">
{/* Use camelCase model object from rankedModels */}
{rankedModels.map((model) => (
<tr key={model.model} className="hover:bg-gray-50">
<td className="px-3 py-3 text-sm font-medium text-gray-900">
{model.rank}
</td>
<td className="px-3 py-3">
<div className="flex items-center">
<div
className="w-3 h-3 rounded-full mr-2 flex-shrink-0"
style={{ backgroundColor: model.color }}
></div>
<span className="text-sm font-medium text-gray-900">
{model.model}
</span>
</div>
</td>
<td className="px-3 py-3 text-sm font-semibold text-gray-800">
{model.overallScore !== null
? model.overallScore.toFixed(1)
: "N/A"}
</td>
<td className="px-3 py-3 text-sm text-gray-600">
{model.stdDevAcrossCats !== "N/A" &&
model.stdDevAcrossCats !== null
? `± ${Number(model.stdDevAcrossCats).toFixed(1)}`
: "N/A"}
</td>
<td className="px-3 py-3 text-sm">
{renderMaxEquityGapCell(model)}
</td>
<td className="px-3 py-3">
{model.maxEffectFactor &&
model.maxEffectFactor !== "N/A" ? (
<div className="flex flex-col">
<span className="text-xs font-medium text-gray-900">
{formatDisplayKey(model.maxEffectFactor)}
</span>
<span className="text-xs text-gray-500">
{formatDisplayKey(model.maxEffectCategory)}
</span>
</div>
) : (
<span className="text-xs text-gray-500">N/A</span>
)}
</td>
<td className="px-3 py-3 text-sm text-center">
{model.equityConcernPercentage !== null ? (
<span>
{model.equityConcernPercentage.toFixed(1)}%
</span>
) : (
<span className="text-xs text-gray-500">N/A</span>
)}
</td>
<td className="px-3 py-3 text-sm">
{model.repeatUsageScore !== null ? (
<span
className={`px-2 py-0.5 rounded-full text-xs font-medium ${getScoreBadgeColor(
model.repeatUsageScore
)}`}
>
{model.repeatUsageScore.toFixed(1)}%
</span>
) : (
<span className="text-xs text-gray-500">N/A</span>
)}
</td>
</tr>
))}
</tbody>
</table>
</div>
{/* UPDATED: Vertical list for column descriptions with detailed info */}
<div className="mt-4 pt-3 border-t border-gray-200 text-xs text-gray-600">
{/* Column descriptions in vertical list */}
<div className="mb-2">
<div>
<span className="font-semibold">Overall Score:</span> Avg.
score across high-level categories
</div>
<div>
<span className="font-semibold">Overall SD:</span> Standard
deviation across high-level categories (lower = more
consistent)
</div>
<div>
<span className="font-semibold">Max Equity Gap:</span>{" "}
Largest demographic score difference (hover for details on
significance and effect size)
</div>
<div>
<span className="font-semibold">Max Gap Area:</span>{" "}
Demographic group and Category where the Max Equity Gap
occurs
</div>
<div>
<span className="font-semibold">Equity Concerns:</span>{" "}
Percentage of demographic gaps flagged as concerns (large
effect & statistically significant)
</div>
<div>
<span className="font-semibold">User Retention:</span>{" "}
Percentage of participants who said they would use the model
again
</div>
</div>
{/* Color key on a single line */}
<div className="mt-2 pt-2 border-t border-gray-100 flex flex-wrap items-center gap-x-4 gap-y-2">
<span className="font-semibold whitespace-nowrap">
Color Key:
</span>
<div className="flex items-center">
<span className="inline-block w-4 h-4 rounded-full bg-red-100 mr-1"></span>
<span>
Equity Concern (Large Effect & Statistically Significant)
</span>
</div>
<div className="flex items-center">
<span className="inline-block w-4 h-4 rounded-full bg-yellow-100 mr-1"></span>
<span>Large Effect (Not Statistically Significant)</span>
</div>
</div>
</div>
</div>
</div>
{/* Top Performers Section */}
<div className="mb-4 flex items-center">
<h3 className="font-semibold text-xl mr-4">
Top Performers by Category
</h3>
<div className="flex space-x-1 p-1 bg-gray-200 rounded-lg">
<button
className={`px-4 py-1.5 text-sm font-medium rounded-md transition-colors duration-150 ${
topPerformersView === "high-level"
? "bg-white shadow text-blue-600"
: "text-gray-600 hover:text-gray-800"
}`}
onClick={() => setTopPerformersView("high-level")}
>
{" "}
High-Level Categories{" "}
</button>
<button
className={`px-4 py-1.5 text-sm font-medium rounded-md transition-colors duration-150 ${
topPerformersView === "low-level"
? "bg-white shadow text-blue-600"
: "text-gray-600 hover:text-gray-800"
}`}
onClick={() => setTopPerformersView("low-level")}
>
{" "}
Low-Level Metrics{" "}
</button>
</div>
</div>
{/* Top Performers Tables - Access using Title Case keys */}
{topPerformersView === "high-level" && (
<div className="border rounded-lg overflow-hidden shadow-sm mb-6">
<div className="px-4 py-3 bg-gray-50 border-b">
<h3 className="font-semibold text-gray-800">
Top Performers by High-Level Category
</h3>
</div>
<div className="p-4">
{Object.entries(bestPerCategory || {}).length > 0 ? (
<table className="min-w-full divide-y divide-gray-200">
<thead>
<tr>
<th
scope="col"
className="px-3 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider"
>
Category
</th>
<th
scope="col"
className="px-3 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider"
>
Best Model
</th>
<th
scope="col"
className="px-3 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider"
>
Score
</th>
</tr>
</thead>
<tbody className="bg-white divide-y divide-gray-200">
{Object.entries(bestPerCategory)
.sort(([a], [b]) => a.localeCompare(b))
.map(([catDisplayKey, bestInfo], idx) => (
<tr
key={catDisplayKey}
className={
idx % 2 === 0 ? "bg-white" : "bg-gray-50"
}
>
<td className="px-3 py-2 font-medium text-sm text-gray-900">
<Tooltip
content={getMetricTooltip(catDisplayKey)}
>
<span>{catDisplayKey}</span>
</Tooltip>
</td>
<td className="px-3 py-2">
{bestInfo.model !== "N/A" ? (
<div className="flex items-center">
<div
className="w-3 h-3 rounded-full mr-2 shrink-0"
style={{ backgroundColor: bestInfo.color }}
></div>
<span className="text-sm">
{bestInfo.model}
</span>
</div>
) : (
<span className="text-sm text-gray-500">
N/A
</span>
)}
</td>
<td className="px-3 py-2">
{bestInfo.score !== null ? (
<span
className={`px-2 py-0.5 rounded-full text-xs font-medium ${getScoreBadgeColor(
bestInfo.score
)}`}
>
{bestInfo.score.toFixed(1)}
</span>
) : (
<span className="text-sm text-gray-500">
N/A
</span>
)}
</td>
</tr>
))}
</tbody>
</table>
) : (
<p className="text-center text-gray-500 py-4">
Top performer data not available.
</p>
)}
<p className="text-xs text-gray-500 mt-2">
Scores based on user ratings, normalized to 0-100.
</p>
</div>
</div>
)}
{topPerformersView === "low-level" && (
<div className="border rounded-lg overflow-hidden shadow-sm mb-6">
<div className="px-4 py-3 bg-gray-50 border-b">
<h3 className="font-semibold text-gray-800">
Top Performers by Low-Level Metric
</h3>
</div>
<div className="p-4">
{Object.entries(bestPerMetric || {}).length > 0 ? (
<table className="min-w-full divide-y divide-gray-200">
<thead>
<tr>
<th
scope="col"
className="px-3 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider"
>
Metric
</th>
<th
scope="col"
className="px-3 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider"
>
Best Model
</th>
<th
scope="col"
className="px-3 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider"
>
Score
</th>
</tr>
</thead>
<tbody className="bg-white divide-y divide-gray-200">
{Object.entries(bestPerMetric)
.sort(([a], [b]) => a.localeCompare(b))
.map(([metricDisplayKey, bestInfo], idx) => (
<tr
key={metricDisplayKey}
className={
idx % 2 === 0 ? "bg-white" : "bg-gray-50"
}
>
<td className="px-3 py-2 font-medium text-sm text-gray-900">
<Tooltip
content={getMetricTooltip(metricDisplayKey)}
>
<span>{metricDisplayKey}</span>
</Tooltip>
</td>
<td className="px-3 py-2">
{bestInfo.model !== "N/A" ? (
<div className="flex items-center">
<div
className="w-3 h-3 rounded-full mr-2 shrink-0"
style={{ backgroundColor: bestInfo.color }}
></div>
<span className="text-sm">
{bestInfo.model}
</span>
</div>
) : (
<span className="text-sm text-gray-500">
N/A
</span>
)}
</td>
<td className="px-3 py-2">
{bestInfo.score !== null ? (
<span
className={`px-2 py-0.5 rounded-full text-xs font-medium ${getScoreBadgeColor(
bestInfo.score
)}`}
>
{bestInfo.score.toFixed(1)}
</span>
) : (
<span className="text-sm text-gray-500">
N/A
</span>
)}
</td>
</tr>
))}
</tbody>
</table>
) : (
<p className="text-center text-gray-500 py-4">
Low-level metric top performer data not available.
</p>
)}
<p className="text-xs text-gray-500 mt-2">
Scores based on user ratings, normalized to 0-100.
</p>
</div>
</div>
)}
</div>
)}{" "}
{/* End Overview Tab */}
{/* Other Tabs - Pass Correct Props */}
{activeTab === "metrics-breakdown" && (
<MetricsBreakdown
metricsData={metricsData} // Title Case keys inside, plus internalMetricKey
modelsMeta={rankedModels} // camelCase keys inside
radarData={radarData}
/>
)}
{activeTab === "task-performance" && (
<TaskPerformance
rawData={rawData} // Contains camelCase top-level, snake_case nested
modelsMeta={rankedModels}
metricsData={metricsData} // Title Case keys inside, plus internalMetricKey
overviewCardData={overviewCardData}
/>
)}
{activeTab === "demographic-analysis" && (
<DemographicAnalysis
rawData={rawData} // Contains camelCase top-level, snake_case/Title Case nested
modelsMeta={rankedModels}
metricsData={metricsData} // Title Case keys inside, plus internalMetricKey
equityAnalysis={equityAnalysis} // Original snake_case structure
/>
)}
{activeTab === "about" && <About metadata={metadata} />}
</div>
);
};
export default LLMComparisonDashboard;