// components/TaskPerformance.jsx
"use client";
import React, { useState, useMemo, useEffect } from "react";
import {
BarChart,
Bar,
XAxis,
YAxis,
CartesianGrid,
Tooltip as RechartsTooltip,
ResponsiveContainer,
Cell,
} from "recharts";
import {
getMetricTooltip,
getScoreBadgeColor,
formatDisplayKey,
camelToTitle,
} from "../lib/utils"; // Import formatDisplayKey
// Helper component for info tooltips
const InfoTooltip = ({ text }) => {
/* ... (no change) ... */
const [isVisible, setIsVisible] = useState(false);
return (
{" "}
{isVisible && (
{text}
)}{" "}
);
};
// Custom tooltip for charts
const CustomTooltip = ({ active, payload, label }) => {
/* ... (no change needed) ... */
if (active && payload && payload.length) {
const sortedPayload = [...payload].sort(
(a, b) => (b.value || 0) - (a.value || 0)
);
return (
{label}
{" "}
{sortedPayload.map((entry, index) => (
{" "}
{entry.name}: {" "}
{typeof entry.value === "number" ? entry.value.toFixed(1) : "N/A"}
{" "}
))}{" "}
);
}
return null;
};
// Tab component
const TabButton = ({ active, onClick, children }) => (
);
// Main component
const TaskPerformance = ({
rawData,
modelsMeta,
metricsData, // Expects Title Case keys (e.g., Context Memory) containing internalMetricKey
overviewCardData,
}) => {
const [activeTab, setActiveTab] = useState("top-performers");
// *** Use Title Case metric keys from processed metricsData ***
const highLevelMetricDisplayKeys = useMemo(
() => Object.keys(metricsData?.highLevelCategories || {}).sort(),
[metricsData?.highLevelCategories]
);
const lowLevelMetricDisplayKeys = useMemo(
() => Object.keys(metricsData?.lowLevelMetrics || {}).sort(),
[metricsData?.lowLevelMetrics]
);
// **************************************************************
// Access original snake_case keys from rawData
const { taskLevelPerformance = {}, tasks = [] } = rawData || {};
const { bestModelPerTask = {} } = overviewCardData || {};
const models = modelsMeta || [];
// State for 'Model Performance' tab
const [selectedTask, setSelectedTask] = useState(
tasks.length > 0 ? tasks[0] : "all"
);
const [selectedMetricType, setSelectedMetricType] = useState("high");
// *** selectedMetric now stores the Title Case display key ***
const [selectedMetricDisplayKey, setSelectedMetricDisplayKey] = useState("");
// ***********************************************************
const [selectedModels, setSelectedModels] = useState([]);
// Determine current metrics list (Title Case display keys)
const currentMetricDisplayKeysList = useMemo(
() =>
selectedMetricType === "high"
? highLevelMetricDisplayKeys
: lowLevelMetricDisplayKeys,
[selectedMetricType, highLevelMetricDisplayKeys, lowLevelMetricDisplayKeys]
);
// Load models on mount
useEffect(() => {
if (models.length > 0 && selectedModels.length === 0) {
setSelectedModels(models.map((m) => m.model));
}
}, [models, selectedModels.length]);
// Set default metric display key when the list or type changes
useEffect(() => {
if (currentMetricDisplayKeysList.length > 0) {
if (
!selectedMetricDisplayKey ||
!currentMetricDisplayKeysList.includes(selectedMetricDisplayKey)
) {
setSelectedMetricDisplayKey(currentMetricDisplayKeysList[0]); // Set to the first Title Case key
}
} else {
setSelectedMetricDisplayKey("");
}
}, [currentMetricDisplayKeysList, selectedMetricDisplayKey]);
// Prep chart data - *** UPDATED to use internalMetricKey looked up via selectedMetricDisplayKey ***
const chartData = useMemo(() => {
if (
!taskLevelPerformance ||
!selectedMetricDisplayKey ||
selectedModels.length === 0
)
return [];
// Find the internal snake_case key using the selected Title Case display name
const allMetricsProcessed = {
...(metricsData?.highLevelCategories || {}),
...(metricsData?.lowLevelMetrics || {}),
};
const metricInfo = allMetricsProcessed[selectedMetricDisplayKey]; // Look up using Title Case key
const internalMetricKey = metricInfo?.internalMetricKey; // Access the stored snake_case key
if (!internalMetricKey) {
console.warn(
`Could not find internal key for selected metric: ${selectedMetricDisplayKey}`
);
return [];
}
let data = [];
if (selectedTask === "all") {
const modelAggregates = {};
tasks.forEach((task) => {
if (taskLevelPerformance[task]) {
Object.entries(taskLevelPerformance[task]).forEach(
([model, metrics]) => {
if (selectedModels.includes(model)) {
// *** Use the FOUND snake_case internalMetricKey ***
const score = metrics?.[internalMetricKey];
if (score !== undefined && score !== null && score !== "N/A") {
const numScore = parseFloat(score);
if (!isNaN(numScore)) {
if (!modelAggregates[model])
modelAggregates[model] = { sum: 0, count: 0 };
modelAggregates[model].sum += numScore;
modelAggregates[model].count++;
}
}
}
}
);
}
});
data = Object.entries(modelAggregates).map(([model, aggregates]) => {
const modelMeta = models.find((m) => m.model === model) || {};
return {
model: model,
score:
aggregates.count > 0 ? aggregates.sum / aggregates.count : null,
color: modelMeta.color || "#999999",
};
});
} else if (taskLevelPerformance[selectedTask]) {
data = Object.entries(taskLevelPerformance[selectedTask])
.filter(([model, _metrics]) => selectedModels.includes(model))
.map(([model, metrics]) => {
// *** Use the FOUND snake_case internalMetricKey ***
const score = metrics?.[internalMetricKey];
const modelMeta = models.find((m) => m.model === model) || {};
return {
model: model,
score:
score !== undefined && score !== null && score !== "N/A"
? parseFloat(score)
: null,
color: modelMeta.color || "#999999",
};
});
}
return data
.filter((item) => item.score !== null && !isNaN(item.score))
.sort((a, b) => b.score - a.score);
// Update dependencies
}, [
selectedTask,
selectedMetricDisplayKey,
selectedModels,
taskLevelPerformance,
models,
metricsData,
tasks,
]);
// Task definitions
const featuredTasks = useMemo(
() => [
/* ... (keep task definitions array) ... */ {
id: "Generating a Creative Idea",
title: "Generating Creative Ideas",
description: "Brainstorming unique birthday gift ideas.",
icon: (color) => (
),
},
{
id: "Creating a Travel Itinerary",
title: "Creating Travel Itinerary",
description: "Planning a European city break.",
icon: (color) => (
),
},
{
id: "Following Up on a Job Application",
title: "Following Up on Job App",
description: "Drafting a professional follow-up email.",
icon: (color) => (
),
},
{
id: "Planning Your Weekly Meals",
title: "Planning Weekly Meals",
description: "Creating a meal plan accommodating dietary restrictions.",
icon: (color) => (
),
},
{
id: "Making a Decision Between Options",
title: "Making a Decision",
description: "Comparing tech products for purchase.",
icon: (color) => (
),
},
{
id: "Understanding a Complex Topic",
title: "Understanding a Complex Topic",
description: "Learning about day trading concepts.",
icon: (color) => (
),
},
],
[]
);
const tasksToDisplay = useMemo(() => {
const availableTaskKeys = bestModelPerTask
? Object.keys(bestModelPerTask)
: [];
return featuredTasks.filter((ft) => availableTaskKeys.includes(ft.id));
}, [bestModelPerTask, featuredTasks]);
const taskRankings = useMemo(() => {
const rankings = {};
tasksToDisplay.forEach((task) => {
const taskId = task.id;
if (!taskLevelPerformance[taskId]) {
rankings[taskId] = [];
return;
}
const taskScores = models
.map((modelMeta) => {
const modelData = taskLevelPerformance[taskId][modelMeta.model];
if (!modelData) return null;
const scores = Object.values(modelData)
.map((s) => parseFloat(s))
.filter((s) => !isNaN(s));
if (scores.length === 0) return null;
const avgScore =
scores.reduce((sum, score) => sum + score, 0) / scores.length;
return {
model: modelMeta.model,
taskAvgScore: avgScore,
color: modelMeta.color || "#999999",
};
})
.filter((item) => item !== null)
.sort((a, b) => b.taskAvgScore - a.taskAvgScore);
rankings[taskId] = taskScores;
});
return rankings;
}, [tasksToDisplay, taskLevelPerformance, models]);
const renderTopPerformersTab = () => (
{tasksToDisplay.length === 0 && (
No task performance data available.
)}
{tasksToDisplay.map((task) => {
const bestModelInfo = bestModelPerTask?.[task.id];
const topModelsForTask = taskRankings[task.id] || [];
if (!bestModelInfo || bestModelInfo.model === "N/A") return null;
const modelColor = bestModelInfo.color || "#6b7280";
return (
{task.icon(modelColor)}
{bestModelInfo.model}
Avg. Score: {bestModelInfo.score?.toFixed(1) ?? "N/A"}
Task Ranking
{topModelsForTask.length > 0 ? (
{topModelsForTask.map((rankedModel, index) => (
-
{index + 1}.
{rankedModel.model}
{rankedModel.taskAvgScore?.toFixed(1) ?? "N/A"}
))}
) : (
Ranking data not available.
)}
Task Example: {task.description}
);
})}
);
// Render the model performance analysis tab - *** UPDATED SELECTOR & LABELS ***
const renderModelPerformanceTab = () => (
{/* Controls Panel */}
Task Analysis Controls
{/* Task Selector */}
{/* Metric Type Selector Pills */}
setSelectedMetricType("high")}
>
High-Level
setSelectedMetricType("low")}
>
Low-Level
{/* Metric Selector - VALUE is Title Case key, displays Title Case */}
{/* Chart Visualization */}
{/* Use selectedMetricDisplayKey for title */}
{`${selectedMetricDisplayKey || "Selected Metric"} Comparison for `}
{selectedTask === "all"
? "All Tasks (Average)"
: `"${selectedTask}"`}
{chartData.length > 0 ? (
}
wrapperStyle={{ zIndex: 10 }}
/>
{/* Use Title Case key for Bar name */}
{chartData.map((entry, index) => (
|
))}
{chartData.map((entry) => (
))}
) : (
No Data Available
No data available for the selected task, metric, and models.
)}
{/* Corrected margin-top */}
{/* Use Title Case key for display and lookup */}
This chart shows{" "}
{selectedMetricDisplayKey || "the selected metric"}
{" "}
scores (0-100, higher is better) for models on
{selectedTask === "all"
? "average across all tasks"
: `the "${selectedTask}" task`}
.
{selectedMetricDisplayKey &&
` Metric definition: ${getMetricTooltip(
selectedMetricDisplayKey
)}`}
);
// Main return with tabs
return (
setActiveTab("top-performers")}
>
Top Performing Models by Task
{" "}
setActiveTab("model-performance")}
>
Model Performance Comparison
{" "}
{" "}
{activeTab === "top-performers"
? renderTopPerformersTab()
: renderModelPerformanceTab()}
);
};
export default TaskPerformance;