Spaces:

nlpetprolific
/

user-experience-leaderboard

Running

App Files Files Community

user-experience-leaderboard / leaderboard-app /lib /utils.js

Nora Petrova

Add project to new space

20e666e 29 days ago

26.4 kB

	// lib/utils.js

	/**
	* Constants
	*/
	const MODEL_COLORS = {
	"gpt-4o": "#0072B2", // Strong blue
	"claude-3.7-sonnet": "#D55E00", // Vermillion/orange-red
	"deepseek-r1": "#F0E442", // Yellow
	o1: "#CC79A7", // Pink
	"gemini-2.0-flash-001": "#009E73", // Bluish green
	"llama-3.1-405b-instruct": "#56B4E9", // Light blue
	};

	// --- Helper Functions ---

	/**
	* Converts camelCase to Title Case.
	* @param {string} str Input string.
	* @returns {string} Title Case string.
	*/
	export const camelToTitle = (str) => {
	if (!str) return str;
	const spaced = str.replace(/([A-Z])/g, " $1");
	return spaced.charAt(0).toUpperCase() + spaced.slice(1).trim();
	};

	/**
	* Helper to format metric/factor names (snake/kebab to Title Case)
	* Needed for display consistency when keys are snake_case.
	*/
	export const formatDisplayKey = (key) => {
	if (!key \|\| typeof key !== "string") return "N/A";
	if (key === "N/A") return "N/A";
	// Handle snake_case or kebab-case input
	return key
	.replace(/_/g, " ")
	.replace(/-/g, " ")
	.trim()
	.replace(/\b\w/g, (l) => l.toUpperCase());
	};

	/**
	* Helper to get Significance indicator style and tooltip
	*/
	export function getSignificanceIndicator(isSignificant, pValue, alpha = 0.05) {
	const pValueFormatted =
	typeof pValue === "number" && !isNaN(pValue) ? pValue.toFixed(3) : "N/A";
	if (isSignificant === true) {
	return {
	symbol: "✓",
	className: "text-green-600",
	tooltip: `Statistically Significant (p=${pValueFormatted} < ${alpha})`,
	};
	} else if (isSignificant === false) {
	return {
	symbol: "✗",
	className: "text-red-600",
	tooltip: `Not Statistically Significant (p=${pValueFormatted} ≥ ${alpha})`,
	};
	} else {
	return {
	symbol: "?",
	className: "text-gray-400",
	tooltip: "Significance Undetermined",
	};
	}
	}

	/**
	* Determines the style and tooltip for an equity gap status indicator.
	*/
	export function getEquityIndicatorStyle(
	isConcern,
	isLargeEffect,
	isSignificant,
	pValue,
	effectSizeClass
	) {
	const pValueText =
	typeof pValue === "number" && !isNaN(pValue)
	? `p=${pValue.toFixed(3)}`
	: "p=N/A";
	const effectText = `Effect: ${effectSizeClass \|\| "N/A"}`;
	if (isConcern === true) {
	return {
	icon: "▲",
	colorClass: "text-red-600",
	tooltip: `Equity Concern (${effectText}, Significant, ${pValueText})`,
	};
	} else if (isSignificant === null) {
	return {
	icon: "?",
	colorClass: "text-gray-500",
	tooltip: `Significance Undetermined (${effectText})`,
	};
	} else if (isLargeEffect === true && isSignificant === false) {
	return {
	icon: "●",
	colorClass: "text-yellow-600",
	tooltip: `Large Effect but Not Statistically Significant (${pValueText})`,
	};
	} else if (isSignificant === true) {
	return {
	icon: "✓",
	colorClass: "text-green-600",
	tooltip: `Statistically Significant but Not Large Effect (${effectText}, ${pValueText})`,
	};
	} else {
	return {
	icon: "✓",
	colorClass: "text-gray-400",
	tooltip: `Not Statistically Significant (${effectText}, ${pValueText})`,
	};
	}
	}

	/**
	* Determine styling based on score for generic BADGES (background + text)
	*/
	export function getScoreBadgeColor(score, min = 0, max = 100) {
	const numericScore = Number(score);
	if (
	score === null \|\|
	score === undefined \|\|
	score === "N/A" \|\|
	isNaN(numericScore)
	) {
	return "bg-gray-100 text-gray-800";
	}
	const range = Math.abs(max - min);
	if (range <= 0) return "bg-gray-100 text-gray-800";
	let percent;
	if (max > min) {
	percent = ((numericScore - min) / range) * 100;
	} else {
	percent = ((min - numericScore) / range) * 100;
	}
	if (percent >= 80) return "bg-green-100 text-green-800";
	if (percent >= 50) return "bg-blue-100 text-blue-800";
	if (percent >= 20) return "bg-yellow-100 text-yellow-800";
	return "bg-red-100 text-red-800";
	}

	/**
	* Determine TEXT color based on score (0-100 scale, higher is better)
	*/
	export function getScoreColor(score) {
	const numericScore = Number(score);
	if (
	score === null \|\|
	score === undefined \|\|
	score === "N/A" \|\|
	isNaN(numericScore)
	) {
	return "text-gray-400";
	}
	if (numericScore >= 80) return "text-green-600 font-medium";
	if (numericScore >= 60) return "text-blue-600";
	if (numericScore >= 40) return "text-yellow-600";
	return "text-red-600";
	}

	/**
	* Tooltip text for metrics and table headers - Accepts original keys
	*/
	export const getMetricTooltip = (key) => {
	// Format the key for display/lookup in tooltips map if needed
	const titleCaseKey = formatDisplayKey(key); // Convert snake_case/camelCase to Title Case

	const tooltips = {
	// Use Title Case keys matching dropdowns/headers
	// High-level
	Helpfulness:
	"How well the model provides useful assistance that addresses user needs",
	Communication:
	"Quality of clarity, coherence, and appropriateness of writing style",
	Understanding:
	"How well the model comprehends requests and contextual information",
	Adaptiveness:
	"How well the model adjusts to user needs and feedback during conversation",
	Trustworthiness:
	"Transparency, accuracy, and consistency in model responses",
	Personality:
	"Consistency and definition of the model's persona and ethical alignment",
	"Background And Culture":
	"Cultural sensitivity, relevance, and freedom from bias",
	"Repeat Usage":
	"User satisfaction and willingness to use the model again (score 0-100).",

	// Low-level (use formatted names matching display)
	Effectiveness: "How effectively the model helps accomplish specific goals",
	Comprehensiveness:
	"How thoroughly the model addresses all aspects of requests",
	Usefulness: "Practicality and relevance of suggestions or solutions",
	"Tone And Language Style":
	"Appropriateness of tone and language for the context",
	"Conversation Flow": "Natural and conversational quality of responses",
	"Detail And Technical Language":
	"Appropriate level of detail and technical language",
	Accuracy: "Accuracy in interpreting user requests",
	"Context Memory": "Ability to maintain conversation context",
	Intuitiveness: "Ability to pick up on implicit aspects of requests",
	Flexibility: "Adapting responses based on user feedback",
	Clarity: "Ability to clarify ambiguities or misunderstandings",
	"Conversation Building": "Building upon previous exchanges in conversation",
	Consistency: "Consistency of responses across similar questions",
	Confidence: "User confidence in accuracy of information",
	Transparency: "Openness about limitations or uncertainties",
	"Personality Consistency":
	"Consistency of personality throughout interactions",
	"Distinct Personality": "How well-defined the model's personality is",
	"Honesty Empathy Fairness": "Alignment with ethical expectations",
	"Ethical Alignment": "Alignment with user culture, viewpoint, or values",
	"Cultural Awareness":
	"Recognition of when cultural perspective is relevant",
	"Bias And Stereotypes": "Freedom from stereotypes and bias in responses",

	// Table headers
	"Overall Score":
	"Average score across high-level categories (0-100). Higher is better.",
	"Overall SD":
	"Standard Deviation (± points) of scores across high-level categories. Lower indicates more consistent performance across capabilities.",
	"Max Equity Gap":
	"Score difference (points) for the demographic gap with the largest statistical effect size for this model. Status icon indicates Equity Concern (▲) and/or Significance (✓/✗/?). Hover for details.",
	"Max Gap Area":
	"The specific Demographic Factor and Category where the 'Max Equity Gap' (largest effect size gap) occurred for this model.",
	"Equity Concerns (%)":
	"Percentage of evaluated demographic gaps flagged as Equity Concerns (Large Effect & Statistically Significant, p<0.05). Lower is better.",
	"User Retention":
	"Model score for the 'Repeat Usage' category (0-100), indicating likelihood of users using the model again.",
	};
	// Try lookup with formatted key, then original key as fallback
	return tooltips[titleCaseKey] \|\| tooltips[key] \|\| "No description available";
	};

	/**
	* Badge color based on Effect Size Class
	*/
	export function getEffectSizeBadgeColor(effectSizeClass) {
	if (!effectSizeClass \|\| effectSizeClass === "N/A") {
	return "bg-gray-100 text-gray-800";
	}
	switch (effectSizeClass) {
	case "Negligible":
	return "bg-green-100 text-green-800";
	case "Small":
	return "bg-blue-100 text-blue-800";
	case "Medium":
	return "bg-yellow-100 text-yellow-800";
	case "Large":
	return "bg-red-100 text-red-800";
	default:
	return "bg-gray-100 text-gray-800";
	}
	}

	/**
	* Helper function to process task performance data
	* Expects rawData input with snake_case keys
	*/
	function processTaskPerformance(rawData, taskCategoryMap, modelOrder) {
	const result = {
	bestModelPerTask: {},
	keyMetricsByTask: {},
	bestModelPerTaskCategory: {
	creative: null,
	practical: null,
	analytical: null,
	},
	keyMetricsByTaskCategory: { creative: [], practical: [], analytical: [] },
	};
	// Access original snake_case key from input
	const taskPerformance = rawData?.task_level_performance;

	if (!taskPerformance \|\| typeof taskPerformance !== "object") {
	console.warn(
	"Task level performance data missing or invalid in processTaskPerformance input."
	);
	return result;
	}

	// Task names are keys in taskPerformance
	Object.keys(taskPerformance).forEach((taskName) => {
	const taskData = taskPerformance[taskName];
	if (!taskData) return;
	let taskBestModel = null;
	let taskBestAvgScore = -Infinity;
	let taskBestModelMetrics = null;
	modelOrder.forEach((modelName) => {
	// Iterate through known models
	const modelMetrics = taskData[modelName];
	if (modelMetrics && typeof modelMetrics === "object") {
	// Access metric scores using original snake_case keys within modelMetrics
	const scores = Object.values(modelMetrics)
	.map((s) => Number(s))
	.filter((s) => !isNaN(s));
	if (scores.length > 0) {
	const avgScore =
	scores.reduce((sum, score) => sum + score, 0) / scores.length;
	if (avgScore > taskBestAvgScore) {
	taskBestAvgScore = avgScore;
	taskBestModel = modelName;
	taskBestModelMetrics = modelMetrics;
	}
	}
	}
	});

	if (taskBestModel && taskBestModelMetrics) {
	result.bestModelPerTask[taskName] = {
	model: taskBestModel,
	score: taskBestAvgScore,
	color: MODEL_COLORS[taskBestModel] \|\| "#999999",
	};
	// Extract top metrics (keys are snake_case)
	const metricsArray = Object.entries(taskBestModelMetrics)
	.map(([metricKey, score]) => ({ metricKey, score: Number(score) \|\| 0 }))
	.sort((a, b) => b.score - a.score);
	// Store with snake_case key, add display name
	result.keyMetricsByTask[taskName] = metricsArray
	.slice(0, 3)
	.map((m) => ({ ...m, metricName: formatDisplayKey(m.metricKey) }));
	} else {
	result.bestModelPerTask[taskName] = {
	model: "N/A",
	score: "N/A",
	color: "#999999",
	};
	result.keyMetricsByTask[taskName] = [];
	}
	});

	// Task Categories processing
	const tasksByCategory = { creative: [], practical: [], analytical: [] };
	Object.entries(taskCategoryMap).forEach(([task, category]) => {
	if (tasksByCategory[category] && taskPerformance[task]) {
	tasksByCategory[category].push(task);
	}
	});
	Object.entries(tasksByCategory).forEach(([category, tasks]) => {
	const categoryNameDisplay = `${
	category.charAt(0).toUpperCase() + category.slice(1)
	} Tasks`;
	if (tasks.length === 0) {
	result.bestModelPerTaskCategory[category] = {
	model: "N/A",
	score: "N/A",
	color: "#999999",
	categoryName: categoryNameDisplay,
	};
	result.keyMetricsByTaskCategory[category] = [];
	return;
	}
	const categoryModelScores = {};
	modelOrder.forEach((modelName) => {
	categoryModelScores[modelName] = { totalScore: 0, count: 0, metrics: {} };
	tasks.forEach((task) => {
	if (taskPerformance[task]?.[modelName]) {
	// metricKey is original snake_case here
	Object.entries(taskPerformance[task][modelName]).forEach(
	([metricKey, score]) => {
	const numScore = Number(score);
	if (!isNaN(numScore)) {
	categoryModelScores[modelName].totalScore += numScore;
	categoryModelScores[modelName].count++;
	if (!categoryModelScores[modelName].metrics[metricKey])
	categoryModelScores[modelName].metrics[metricKey] = {
	sum: 0,
	count: 0,
	};
	categoryModelScores[modelName].metrics[metricKey].sum +=
	numScore;
	categoryModelScores[modelName].metrics[metricKey].count++;
	}
	}
	);
	}
	});
	});
	let bestAvg = -Infinity;
	let bestCatModel = null;
	Object.entries(categoryModelScores).forEach(([model, data]) => {
	if (data.count > 0) {
	const avg = data.totalScore / data.count;
	if (avg > bestAvg) {
	bestAvg = avg;
	bestCatModel = model;
	}
	}
	});

	if (bestCatModel) {
	result.bestModelPerTaskCategory[category] = {
	model: bestCatModel,
	score: Number(bestAvg.toFixed(1)),
	color: MODEL_COLORS[bestCatModel] \|\| "#999999",
	categoryName: categoryNameDisplay,
	};
	const bestModelMetricsData =
	categoryModelScores[bestCatModel]?.metrics \|\| {};
	// metricKey is snake_case
	const metricAverages = Object.entries(bestModelMetricsData)
	.map(([metricKey, data]) => ({
	metricKey,
	score: data.count > 0 ? data.sum / data.count : 0,
	}))
	.sort((a, b) => b.score - a.score);
	// Store with original key, add display name
	result.keyMetricsByTaskCategory[category] = metricAverages
	.slice(0, 5)
	.map((m) => ({
	metric: formatDisplayKey(m.metricKey),
	score: m.score,
	scoreDisplay: m.score.toFixed(1),
	}));
	} else {
	result.bestModelPerTaskCategory[category] = {
	model: "N/A",
	score: "N/A",
	color: "#999999",
	categoryName: categoryNameDisplay,
	};
	result.keyMetricsByTaskCategory[category] = [];
	}
	});
	return result; // Returns object with camelCase keys
	}

	/**
	* Prepares the data from leaderboard_data.json for visualization
	* FINAL v4: Reverted deep camelCase conversion. Processes top-level keys and adds equity concern %.
	* Keeps nested raw data keys as original (snake_case).
	* @param {Object} rawDataInput - The raw data from leaderboard_data.json (expected snake_case)
	* @returns {Object} - Processed data ready for visualization
	*/
	export function prepareDataForVisualization(rawDataInput) {
	// Basic Validation
	const defaultReturn = {
	models: [],
	metricsData: { highLevelCategories: {}, lowLevelMetrics: {} },
	radarData: [],
	bestPerCategory: {},
	bestPerMetric: {},
	overviewCardData: {},
	rawData: {},
	metadata: {},
	equityAnalysis: {},
	};
	if (
	!rawDataInput \|\|
	!rawDataInput.model_order \|\|
	!Array.isArray(rawDataInput.model_order)
	) {
	console.error(
	"prepareDataForVisualization received invalid rawData.",
	rawDataInput
	);
	return defaultReturn;
	}

	// Keep original references where structure is maintained
	const modelOrder = rawDataInput.model_order;
	const equityAnalysis = rawDataInput.equity_analysis \|\| {
	all_equity_gaps: [],
	model_max_effect_gaps: {},
	universal_issues: [],
	assessment_method: {},
	demographic_variation_stats: {},
	};
	const allGaps = equityAnalysis.all_equity_gaps \|\| [];
	const metadata = rawDataInput.metadata \|\| {};
	const mrpDemographicsRaw = rawDataInput.mrp_demographics \|\| {};
	const taskLevelPerformanceRaw = rawDataInput.task_level_performance \|\| {};

	// Process MRP Demographics for filtering options
	const demographicFactors = new Set();
	const demographicLevels = {};
	const availableMetrics = new Set();
	if (mrpDemographicsRaw && typeof mrpDemographicsRaw === "object") {
	Object.values(mrpDemographicsRaw).forEach((modelData) => {
	Object.entries(modelData \|\| {}).forEach(([factor, factorData]) => {
	demographicFactors.add(factor);
	if (!demographicLevels[factor]) demographicLevels[factor] = new Set();
	Object.entries(factorData \|\| {}).forEach(([level, levelData]) => {
	demographicLevels[factor].add(level);
	Object.keys(levelData \|\| {}).forEach((metric) =>
	availableMetrics.add(metric)
	);
	});
	});
	}); // metric is Title Case here from Python processing
	}
	const demographicOptions = {};
	demographicFactors.forEach((factor) => {
	demographicOptions[factor] = Array.from(
	demographicLevels[factor] \|\| new Set()
	).sort();
	});
	const availableMetricsList = Array.from(availableMetrics).sort(); // These are Title Case

	// Process Overall Rankings -> camelCase & add equity concern %
	const overallRankingProcessed = (rawDataInput.overall_ranking \|\| []).map(
	(modelData) => {
	const modelName = modelData.model;
	// details object keys are snake_case from python
	const maxEffectGapDetails = modelData.max_effect_gap_details \|\| {};
	const safeParseFloat = (val) => {
	const num = Number(val);
	return isNaN(num) ? null : num;
	};

	const modelSpecificGaps = allGaps.filter(
	(gap) => gap.model === modelName
	); // Access snake_case keys in allGaps
	const totalGapsForModel = modelSpecificGaps.length;
	const concernCountForModel = modelSpecificGaps.filter(
	(gap) => gap.is_equity_concern === true
	).length;
	let equityConcernPercentage = null;
	if (totalGapsForModel > 0) {
	equityConcernPercentage =
	(concernCountForModel / totalGapsForModel) * 100;
	}

	// Return structure with camelCase keys
	return {
	rank: modelData.rank,
	model: modelName,
	overallScore: safeParseFloat(modelData.overall_score),
	highLevelCatScore: safeParseFloat(modelData.high_level_cat_score),
	lowLevelCatScore: safeParseFloat(modelData.low_level_cat_score),
	color: MODEL_COLORS[modelName] \|\| "#999999",
	// Use snake_case keys from input JSON for these fields
	stdDevAcrossCats: modelData.std_dev_across_cats,
	stdDevAcrossCatsNumeric: safeParseFloat(modelData.std_dev_across_cats),
	repeatUsageScore: safeParseFloat(modelData.repeat_usage_score),
	maxEffectCategory: modelData.max_effect_category, // snake_case from input
	maxEffectFactor: maxEffectGapDetails.demographic_factor, // snake_case from input
	maxEffectSize: safeParseFloat(maxEffectGapDetails.effect_size),
	maxEffectGap: safeParseFloat(maxEffectGapDetails.score_range),
	maxEffectConcernFlag: maxEffectGapDetails.is_equity_concern ?? false,
	maxEffectSignificant: maxEffectGapDetails.is_statistically_significant,
	maxEffectPValue: maxEffectGapDetails.p_value,
	maxEffectSizeClass: maxEffectGapDetails.effect_size_class \|\| "N/A",
	maxEffectRawNHeuristic:
	maxEffectGapDetails.raw_n_confidence_heuristic \|\| "N/A",
	maxEffectGapDetails: maxEffectGapDetails, // Pass original snake_case details
	equityConcernPercentage: equityConcernPercentage,
	};
	}
	);

	// Process Metrics Breakdown -> camelCase keys for structure, keep original metric keys inside
	const metricsBreakdownProcessed = {
	highLevelCategories: {},
	lowLevelMetrics: {},
	};
	if (
	rawDataInput.metrics_breakdown &&
	typeof rawDataInput.metrics_breakdown === "object"
	) {
	const processCategory = (displayKey, categoryData) => {
	// Input displayKey is Title Case from python output
	if (!categoryData \|\| !categoryData.model_scores) {
	console.warn(`Missing model_scores for category: ${displayKey}`);
	return {
	modelScores: {},
	topPerformer: { model: "N/A", score: null, color: "#999999" },
	};
	}
	const internalMetricKey = categoryData._internal_category_name; // Get original snake_case key
	const processedModelScores = {};
	modelOrder.forEach((modelName) => {
	const scores = categoryData.model_scores[modelName]; // Access model scores
	if (!scores) {
	processedModelScores[modelName] = {
	nationalScore: null,
	color: MODEL_COLORS[modelName] \|\| "#999999",
	maxEffectGapInfo: {},
	};
	return;
	}
	const maxEffectGapInfoForCat = scores.max_effect_gap_info \|\| {}; // snake_case keys inside? Check python output. Assume yes.
	processedModelScores[modelName] = {
	nationalScore: scores.national_score ?? null,
	color: MODEL_COLORS[modelName] \|\| "#999999",
	// Keep original snake_case keys for gap info within this structure
	maxEffectGapInfo: maxEffectGapInfoForCat,
	};
	});
	const topPerf = categoryData.top_performer \|\| {};
	const topPerfScore =
	topPerf.score === "N/A" \|\| topPerf.score === null
	? null
	: Number(topPerf.score);
	return {
	modelScores: processedModelScores, // Nested scores
	topPerformer: {
	model: topPerf.model \|\| "N/A",
	score: isNaN(topPerfScore) ? null : topPerfScore,
	color: MODEL_COLORS[topPerf.model] \|\| "#999999",
	},
	internalMetricKey: internalMetricKey, // Store original snake_case key
	};
	};
	Object.entries(
	rawDataInput.metrics_breakdown.high_level_categories \|\| {}
	).forEach(([displayKey, catData]) => {
	metricsBreakdownProcessed.highLevelCategories[displayKey] =
	processCategory(displayKey, catData);
	});
	Object.entries(
	rawDataInput.metrics_breakdown.low_level_metrics \|\| {}
	).forEach(([displayKey, metricData]) => {
	metricsBreakdownProcessed.lowLevelMetrics[displayKey] = processCategory(
	displayKey,
	metricData
	);
	});
	} else {
	console.warn("rawDataInput.metrics_breakdown is missing or not an object.");
	}

	// Prepare Radar Chart Data
	const radarChartData = Object.entries(
	metricsBreakdownProcessed.highLevelCategories
	).map(([displayKey, categoryData]) => {
	// displayKey is Title Case here
	const radarEntry = { category: displayKey }; // Use Title Case for radar axis label
	modelOrder.forEach((modelName) => {
	radarEntry[modelName] =
	Number(categoryData.modelScores[modelName]?.nationalScore) \|\| 0;
	});
	return radarEntry;
	});

	// Prepare Top Performers
	const bestPerCategory = {};
	Object.entries(metricsBreakdownProcessed.highLevelCategories).forEach(
	([displayKey, catData]) => {
	bestPerCategory[displayKey] = catData.topPerformer;
	}
	);
	const bestPerMetric = {};
	Object.entries(metricsBreakdownProcessed.lowLevelMetrics).forEach(
	([displayKey, metricData]) => {
	bestPerMetric[displayKey] = metricData.topPerformer;
	}
	);

	// Prepare Task Performance Data
	const taskCategoryMap = {
	"Generating a Creative Idea": "creative",
	"Creating a Travel Itinerary": "creative",
	"Following Up on a Job Application": "practical",
	"Planning Your Weekly Meals": "practical",
	"Making a Decision Between Options": "analytical",
	"Understanding a Complex Topic": "analytical",
	};
	// Pass the original rawDataInput to the helper, which expects snake_case keys internally
	const taskPerformanceResults = processTaskPerformance(
	rawDataInput,
	taskCategoryMap,
	modelOrder
	);
	const tasks = Object.keys(taskLevelPerformanceRaw \|\| {}); // Use original snake_case keys
	const taskCategories = {};
	Object.entries(taskCategoryMap).forEach(([task, category]) => {
	if (!taskCategories[category]) taskCategories[category] = [];
	if (tasks.includes(task)) taskCategories[category].push(task);
	});
	const taskMetrics = new Set();
	Object.values(taskLevelPerformanceRaw \|\| {}).forEach((taskData) => {
	Object.values(taskData \|\| {}).forEach((modelData) => {
	Object.keys(modelData \|\| {}).forEach((metric) => taskMetrics.add(metric));
	});
	}); // metric is snake_case
	const taskMetricsDisplayList = Array.from(taskMetrics)
	.map(formatDisplayKey)
	.sort(); // Create display list
	const taskMetricsSnakeList = Array.from(taskMetrics).sort(); // List of original snake_case keys

	// Final Return Structure
	return {
	models: overallRankingProcessed, // camelCase keys for top level
	metricsData: metricsBreakdownProcessed, // Title Case keys for categories/metrics
	radarData: radarChartData,
	bestPerCategory: bestPerCategory, // Title Case keys
	bestPerMetric: bestPerMetric, // Title Case keys
	overviewCardData: taskPerformanceResults, // camelCase keys expected from helper
	rawData: {
	// Keep original structures under camelCase keys for clarity
	taskLevelPerformance: taskLevelPerformanceRaw, // snake_case keys inside
	mrpDemographics: mrpDemographicsRaw, // Title Case metric keys inside
	// Processed lists/maps for filtering/display
	demographicOptions: demographicOptions,
	availableMetrics: availableMetricsList, // Title Case metric names
	tasks: tasks,
	taskCategories: taskCategories,
	taskMetrics: taskMetricsDisplayList, // Title Case metric names for display
	taskMetricsSnake: taskMetricsSnakeList, // snake_case keys for lookup
	taskCategoryMap: taskCategoryMap,
	},
	metadata: metadata, // Original structure
	equityAnalysis: equityAnalysis, // Original structure (snake_case keys)
	};
	}