// components/LLMComparisonDashboard.jsx "use client"; import React, { useState, useMemo } from "react"; import { getScoreBadgeColor, formatDisplayKey, // Use this for displaying snake_case keys nicely getMetricTooltip, getEquityIndicatorStyle, // Use this for Max Equity Gap status } from "../lib/utils"; // Adjust path as needed import TaskPerformance from "./TaskPerformance"; import DemographicAnalysis from "./DemographicAnalysis"; import MetricsBreakdown from "./MetricsBreakdown"; import About from "./About"; import { Tooltip } from "./Tooltip"; // Assuming this is your Tooltip component // Helper component for info tooltips (assuming it exists and works) const InfoTooltip = ({ text }) => { const [isVisible, setIsVisible] = useState(false); return (
A benchmark assessing how well language models handle real-world tasks based on user experiences.
Rank | Model | Overall Score | Overall SD | Max Equity Gap | Max Gap Area | Equity Concerns | User Retention |
---|---|---|---|---|---|---|---|
{model.rank} |
{model.model}
|
{model.overallScore !== null ? model.overallScore.toFixed(1) : "N/A"} | {model.stdDevAcrossCats !== "N/A" && model.stdDevAcrossCats !== null ? `± ${Number(model.stdDevAcrossCats).toFixed(1)}` : "N/A"} | {renderMaxEquityGapCell(model)} |
{model.maxEffectFactor &&
model.maxEffectFactor !== "N/A" ? (
{formatDisplayKey(model.maxEffectFactor)}
{formatDisplayKey(model.maxEffectCategory)}
) : (
N/A
)}
|
{model.equityConcernPercentage !== null ? ( {model.equityConcernPercentage.toFixed(1)}% ) : ( N/A )} | {model.repeatUsageScore !== null ? ( {model.repeatUsageScore.toFixed(1)}% ) : ( N/A )} |
Category | Best Model | Score |
---|---|---|
|
{bestInfo.model !== "N/A" ? (
{bestInfo.model}
) : (
N/A
)}
|
{bestInfo.score !== null ? ( {bestInfo.score.toFixed(1)} ) : ( N/A )} |
Top performer data not available.
)}Scores based on user ratings, normalized to 0-100.
Metric | Best Model | Score |
---|---|---|
|
{bestInfo.model !== "N/A" ? (
{bestInfo.model}
) : (
N/A
)}
|
{bestInfo.score !== null ? ( {bestInfo.score.toFixed(1)} ) : ( N/A )} |
Low-level metric top performer data not available.
)}Scores based on user ratings, normalized to 0-100.