// components/MetricsBreakdown.jsx "use client"; import React, { useState, useEffect, useMemo } from "react"; import { RadarChart, PolarGrid, PolarAngleAxis, PolarRadiusAxis, Radar, Tooltip as RechartsTooltip, // Renamed to avoid conflict with local Tooltip Legend, ResponsiveContainer, } from "recharts"; import { getScoreColor, getMetricTooltip } from "../lib/utils"; import { Tooltip } from "./Tooltip"; // Your custom Tooltip component for headers etc. // Component receives processed metrics data, model metadata, and category radar data const MetricsBreakdown = ({ metricsData, modelsMeta, radarData: categoryRadarDataProp, // Already processed radar data for categories }) => { const [subTab, setSubTab] = useState("categories"); // 'categories' or 'metrics' const [selectedModels, setSelectedModels] = useState([]); // console.log("Metrics Data in Breakdown:", metricsData); // For debugging // console.log("Models Meta in Breakdown:", modelsMeta); // console.log("Category Radar Data Prop:", categoryRadarDataProp); // Extract data from props with defaults const { highLevelCategories, lowLevelMetrics } = metricsData || { highLevelCategories: {}, lowLevelMetrics: {}, }; // Use modelsMeta directly for clarity, aliasing if preferred const models = modelsMeta || []; // Get sorted lists of category and metric names const sortedCategoryNames = useMemo( () => Object.keys(highLevelCategories || {}).sort((a, b) => a.localeCompare(b)), [highLevelCategories] ); const sortedMetricNames = useMemo( () => Object.keys(lowLevelMetrics || {}).sort((a, b) => a.localeCompare(b)), [lowLevelMetrics] ); // Initialize selections useEffect(() => { if (selectedModels.length === 0 && models.length > 0) { setSelectedModels(models.map((m) => m.model)); } // eslint-disable-next-line react-hooks/exhaustive-deps }, [models]); // Only depends on models changing/loading // --- Memoized data generation functions --- // Radar data for LL Metrics (used when subTab === 'metrics') - CORRECTED ACCESSORS const metricRadarData = useMemo(() => { if ( !lowLevelMetrics || models.length === 0 || sortedMetricNames.length === 0 ) return []; return sortedMetricNames.map((metricName) => { const entry = { category: metricName }; // Use metric name as the axis category const metricData = lowLevelMetrics[metricName]; if (metricData) { models .filter((m) => selectedModels.includes(m.model)) .forEach((model) => { // Use correct camelCase keys entry[model.model] = Number(metricData.modelScores?.[model.model]?.nationalScore) || 0; // Standard deviation per metric is NOT available, so we don't add it here }); } return entry; }); }, [lowLevelMetrics, models, selectedModels, sortedMetricNames]); // Custom tooltip (common for both radar charts) - CORRECTED (removed std dev logic) const CustomRadarTooltip = ({ active, payload, label }) => { if (active && payload && payload.length) { return (

{label}

{/* Get tooltip description for the category/metric itself */}

{getMetricTooltip(label)}

{payload // Sort models by score within tooltip .sort((a, b) => (b.value || 0) - (a.value || 0)) .map((entry) => (

{entry.name}: {/* name is also the model name */} {/* Ensure value exists and format */} {entry.value !== null && entry.value !== undefined ? Number(entry.value).toFixed(1) : "N/A"} {/* Removed standard deviation display */}

))}

); } return null; }; // Use the radar data passed via prop for categories view, filtered by selected models - CORRECTED (removed std dev logic) const filteredCategoryRadarData = useMemo(() => { if (!categoryRadarDataProp || models.length === 0) return []; // Filter based on selected models, removing std dev keys return categoryRadarDataProp.map((item) => { const newItem = { category: item.category }; models .filter((m) => selectedModels.includes(m.model)) .forEach((model) => { // We only need the model score itself for the radar data newItem[model.model] = item[model.model] ?? 0; // Use nullish coalescing for default }); return newItem; }); }, [categoryRadarDataProp, models, selectedModels]); return ( <> {/* Top Controls: Model Selector & Sub-Tab Pills (No changes needed) */}

{/* Sub-Tab Pills */}

{" "} {" "} {" "}

{/* Model Selector */}

{" "} Models:{" "} {models?.map((model) => ( ))}{" "}

{/* Conditional content based on sub-tab */} {subTab === "categories" && (

{/* CATEGORIES VIEW */} {/* Summary Table: Models as Rows, Categories as Columns - CORRECTED ACCESSORS */}

Category Performance Summary

{sortedCategoryNames.length > 0 ? ( {sortedCategoryNames.map((catName) => ( ))} {models ?.filter((m) => selectedModels.includes(m.model)) .map((model, idx) => ( {sortedCategoryNames.map((catName) => { // Use correct camelCase keys const scoreData = highLevelCategories[catName]?.modelScores?.[ model.model ]; const score = scoreData?.nationalScore; // Access camelCase key const displayScore = score !== null && score !== undefined ? Number(score).toFixed(1) : "N/A"; return ( ); })} ))}

Model	{catName}
{" "} {/* Keep sticky styles */} {model.model}	{displayScore}

) : (

No category data available.

)}

{/* Radar Chart for Categories (Uses filteredCategoryRadarData) */}

Performance Across Categories

National Average Scores

{filteredCategoryRadarData && filteredCategoryRadarData.length > 0 ? (

{models ?.filter((m) => selectedModels.includes(m.model)) .map((model) => ( ))} {/* Use the corrected CustomRadarTooltip */} } />

) : (

Radar data not available.

)}

This radar chart visualizes how each model performs across different high-level evaluation categories. The further out on each axis, the better the performance on that category.

)} {subTab === "metrics" && (

{/* METRICS VIEW */} {/* Radar Chart for Metrics (Uses metricRadarData) */}

Performance Across All Metrics

National Average Scores

{metricRadarData.length > 0 ? (

{" "} {/* Increased height */} {" "} {/* Use metricRadarData */} {" "} {/* Adjusted font size */} {models ?.filter((m) => selectedModels.includes(m.model)) .map((model) => ( ))} {/* Use the corrected CustomRadarTooltip */} } />

) : (

Metric data not available for radar chart.

)}

This radar chart visualizes how each model performs across different low-level metrics. The further out on each axis, the better the performance on that metric.

{/* Optional: Add a table summary for low-level metrics similar to the categories one if desired */}

)} ); }; export default MetricsBreakdown;