|
|
|
|
|
|
|
|
|
|
|
const MODEL_COLORS = { |
|
"gpt-4o": "#0072B2", |
|
"claude-3.7-sonnet": "#D55E00", |
|
"deepseek-r1": "#F0E442", |
|
o1: "#CC79A7", |
|
"gemini-2.0-flash-001": "#009E73", |
|
"llama-3.1-405b-instruct": "#56B4E9", |
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export const camelToTitle = (str) => { |
|
if (!str) return str; |
|
const spaced = str.replace(/([A-Z])/g, " $1"); |
|
return spaced.charAt(0).toUpperCase() + spaced.slice(1).trim(); |
|
}; |
|
|
|
|
|
|
|
|
|
|
|
export const formatDisplayKey = (key) => { |
|
if (!key || typeof key !== "string") return "N/A"; |
|
if (key === "N/A") return "N/A"; |
|
|
|
return key |
|
.replace(/_/g, " ") |
|
.replace(/-/g, " ") |
|
.trim() |
|
.replace(/\b\w/g, (l) => l.toUpperCase()); |
|
}; |
|
|
|
|
|
|
|
|
|
export function getSignificanceIndicator(isSignificant, pValue, alpha = 0.05) { |
|
const pValueFormatted = |
|
typeof pValue === "number" && !isNaN(pValue) ? pValue.toFixed(3) : "N/A"; |
|
if (isSignificant === true) { |
|
return { |
|
symbol: "✓", |
|
className: "text-green-600", |
|
tooltip: `Statistically Significant (p=${pValueFormatted} < ${alpha})`, |
|
}; |
|
} else if (isSignificant === false) { |
|
return { |
|
symbol: "✗", |
|
className: "text-red-600", |
|
tooltip: `Not Statistically Significant (p=${pValueFormatted} ≥ ${alpha})`, |
|
}; |
|
} else { |
|
return { |
|
symbol: "?", |
|
className: "text-gray-400", |
|
tooltip: "Significance Undetermined", |
|
}; |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
export function getEquityIndicatorStyle( |
|
isConcern, |
|
isLargeEffect, |
|
isSignificant, |
|
pValue, |
|
effectSizeClass |
|
) { |
|
const pValueText = |
|
typeof pValue === "number" && !isNaN(pValue) |
|
? `p=${pValue.toFixed(3)}` |
|
: "p=N/A"; |
|
const effectText = `Effect: ${effectSizeClass || "N/A"}`; |
|
if (isConcern === true) { |
|
return { |
|
icon: "▲", |
|
colorClass: "text-red-600", |
|
tooltip: `Equity Concern (${effectText}, Significant, ${pValueText})`, |
|
}; |
|
} else if (isSignificant === null) { |
|
return { |
|
icon: "?", |
|
colorClass: "text-gray-500", |
|
tooltip: `Significance Undetermined (${effectText})`, |
|
}; |
|
} else if (isLargeEffect === true && isSignificant === false) { |
|
return { |
|
icon: "●", |
|
colorClass: "text-yellow-600", |
|
tooltip: `Large Effect but Not Statistically Significant (${pValueText})`, |
|
}; |
|
} else if (isSignificant === true) { |
|
return { |
|
icon: "✓", |
|
colorClass: "text-green-600", |
|
tooltip: `Statistically Significant but Not Large Effect (${effectText}, ${pValueText})`, |
|
}; |
|
} else { |
|
return { |
|
icon: "✓", |
|
colorClass: "text-gray-400", |
|
tooltip: `Not Statistically Significant (${effectText}, ${pValueText})`, |
|
}; |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
export function getScoreBadgeColor(score, min = 0, max = 100) { |
|
const numericScore = Number(score); |
|
if ( |
|
score === null || |
|
score === undefined || |
|
score === "N/A" || |
|
isNaN(numericScore) |
|
) { |
|
return "bg-gray-100 text-gray-800"; |
|
} |
|
const range = Math.abs(max - min); |
|
if (range <= 0) return "bg-gray-100 text-gray-800"; |
|
let percent; |
|
if (max > min) { |
|
percent = ((numericScore - min) / range) * 100; |
|
} else { |
|
percent = ((min - numericScore) / range) * 100; |
|
} |
|
if (percent >= 80) return "bg-green-100 text-green-800"; |
|
if (percent >= 50) return "bg-blue-100 text-blue-800"; |
|
if (percent >= 20) return "bg-yellow-100 text-yellow-800"; |
|
return "bg-red-100 text-red-800"; |
|
} |
|
|
|
|
|
|
|
|
|
export function getScoreColor(score) { |
|
const numericScore = Number(score); |
|
if ( |
|
score === null || |
|
score === undefined || |
|
score === "N/A" || |
|
isNaN(numericScore) |
|
) { |
|
return "text-gray-400"; |
|
} |
|
if (numericScore >= 80) return "text-green-600 font-medium"; |
|
if (numericScore >= 60) return "text-blue-600"; |
|
if (numericScore >= 40) return "text-yellow-600"; |
|
return "text-red-600"; |
|
} |
|
|
|
|
|
|
|
|
|
export const getMetricTooltip = (key) => { |
|
|
|
const titleCaseKey = formatDisplayKey(key); |
|
|
|
const tooltips = { |
|
|
|
|
|
Helpfulness: |
|
"How well the model provides useful assistance that addresses user needs", |
|
Communication: |
|
"Quality of clarity, coherence, and appropriateness of writing style", |
|
Understanding: |
|
"How well the model comprehends requests and contextual information", |
|
Adaptiveness: |
|
"How well the model adjusts to user needs and feedback during conversation", |
|
Trustworthiness: |
|
"Transparency, accuracy, and consistency in model responses", |
|
Personality: |
|
"Consistency and definition of the model's persona and ethical alignment", |
|
"Background And Culture": |
|
"Cultural sensitivity, relevance, and freedom from bias", |
|
"Repeat Usage": |
|
"User satisfaction and willingness to use the model again (score 0-100).", |
|
|
|
|
|
Effectiveness: "How effectively the model helps accomplish specific goals", |
|
Comprehensiveness: |
|
"How thoroughly the model addresses all aspects of requests", |
|
Usefulness: "Practicality and relevance of suggestions or solutions", |
|
"Tone And Language Style": |
|
"Appropriateness of tone and language for the context", |
|
"Conversation Flow": "Natural and conversational quality of responses", |
|
"Detail And Technical Language": |
|
"Appropriate level of detail and technical language", |
|
Accuracy: "Accuracy in interpreting user requests", |
|
"Context Memory": "Ability to maintain conversation context", |
|
Intuitiveness: "Ability to pick up on implicit aspects of requests", |
|
Flexibility: "Adapting responses based on user feedback", |
|
Clarity: "Ability to clarify ambiguities or misunderstandings", |
|
"Conversation Building": "Building upon previous exchanges in conversation", |
|
Consistency: "Consistency of responses across similar questions", |
|
Confidence: "User confidence in accuracy of information", |
|
Transparency: "Openness about limitations or uncertainties", |
|
"Personality Consistency": |
|
"Consistency of personality throughout interactions", |
|
"Distinct Personality": "How well-defined the model's personality is", |
|
"Honesty Empathy Fairness": "Alignment with ethical expectations", |
|
"Ethical Alignment": "Alignment with user culture, viewpoint, or values", |
|
"Cultural Awareness": |
|
"Recognition of when cultural perspective is relevant", |
|
"Bias And Stereotypes": "Freedom from stereotypes and bias in responses", |
|
|
|
|
|
"Overall Score": |
|
"Average score across high-level categories (0-100). Higher is better.", |
|
"Overall SD": |
|
"Standard Deviation (± points) of scores across high-level categories. Lower indicates more consistent performance across capabilities.", |
|
"Max Equity Gap": |
|
"Score difference (points) for the demographic gap with the largest statistical effect size for this model. Status icon indicates Equity Concern (▲) and/or Significance (✓/✗/?). Hover for details.", |
|
"Max Gap Area": |
|
"The specific Demographic Factor and Category where the 'Max Equity Gap' (largest effect size gap) occurred for this model.", |
|
"Equity Concerns (%)": |
|
"Percentage of evaluated demographic gaps flagged as Equity Concerns (Large Effect & Statistically Significant, p<0.05). Lower is better.", |
|
"User Retention": |
|
"Model score for the 'Repeat Usage' category (0-100), indicating likelihood of users using the model again.", |
|
}; |
|
|
|
return tooltips[titleCaseKey] || tooltips[key] || "No description available"; |
|
}; |
|
|
|
|
|
|
|
|
|
export function getEffectSizeBadgeColor(effectSizeClass) { |
|
if (!effectSizeClass || effectSizeClass === "N/A") { |
|
return "bg-gray-100 text-gray-800"; |
|
} |
|
switch (effectSizeClass) { |
|
case "Negligible": |
|
return "bg-green-100 text-green-800"; |
|
case "Small": |
|
return "bg-blue-100 text-blue-800"; |
|
case "Medium": |
|
return "bg-yellow-100 text-yellow-800"; |
|
case "Large": |
|
return "bg-red-100 text-red-800"; |
|
default: |
|
return "bg-gray-100 text-gray-800"; |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
|
|
function processTaskPerformance(rawData, taskCategoryMap, modelOrder) { |
|
const result = { |
|
bestModelPerTask: {}, |
|
keyMetricsByTask: {}, |
|
bestModelPerTaskCategory: { |
|
creative: null, |
|
practical: null, |
|
analytical: null, |
|
}, |
|
keyMetricsByTaskCategory: { creative: [], practical: [], analytical: [] }, |
|
}; |
|
|
|
const taskPerformance = rawData?.task_level_performance; |
|
|
|
if (!taskPerformance || typeof taskPerformance !== "object") { |
|
console.warn( |
|
"Task level performance data missing or invalid in processTaskPerformance input." |
|
); |
|
return result; |
|
} |
|
|
|
|
|
Object.keys(taskPerformance).forEach((taskName) => { |
|
const taskData = taskPerformance[taskName]; |
|
if (!taskData) return; |
|
let taskBestModel = null; |
|
let taskBestAvgScore = -Infinity; |
|
let taskBestModelMetrics = null; |
|
modelOrder.forEach((modelName) => { |
|
|
|
const modelMetrics = taskData[modelName]; |
|
if (modelMetrics && typeof modelMetrics === "object") { |
|
|
|
const scores = Object.values(modelMetrics) |
|
.map((s) => Number(s)) |
|
.filter((s) => !isNaN(s)); |
|
if (scores.length > 0) { |
|
const avgScore = |
|
scores.reduce((sum, score) => sum + score, 0) / scores.length; |
|
if (avgScore > taskBestAvgScore) { |
|
taskBestAvgScore = avgScore; |
|
taskBestModel = modelName; |
|
taskBestModelMetrics = modelMetrics; |
|
} |
|
} |
|
} |
|
}); |
|
|
|
if (taskBestModel && taskBestModelMetrics) { |
|
result.bestModelPerTask[taskName] = { |
|
model: taskBestModel, |
|
score: taskBestAvgScore, |
|
color: MODEL_COLORS[taskBestModel] || "#999999", |
|
}; |
|
|
|
const metricsArray = Object.entries(taskBestModelMetrics) |
|
.map(([metricKey, score]) => ({ metricKey, score: Number(score) || 0 })) |
|
.sort((a, b) => b.score - a.score); |
|
|
|
result.keyMetricsByTask[taskName] = metricsArray |
|
.slice(0, 3) |
|
.map((m) => ({ ...m, metricName: formatDisplayKey(m.metricKey) })); |
|
} else { |
|
result.bestModelPerTask[taskName] = { |
|
model: "N/A", |
|
score: "N/A", |
|
color: "#999999", |
|
}; |
|
result.keyMetricsByTask[taskName] = []; |
|
} |
|
}); |
|
|
|
|
|
const tasksByCategory = { creative: [], practical: [], analytical: [] }; |
|
Object.entries(taskCategoryMap).forEach(([task, category]) => { |
|
if (tasksByCategory[category] && taskPerformance[task]) { |
|
tasksByCategory[category].push(task); |
|
} |
|
}); |
|
Object.entries(tasksByCategory).forEach(([category, tasks]) => { |
|
const categoryNameDisplay = `${ |
|
category.charAt(0).toUpperCase() + category.slice(1) |
|
} Tasks`; |
|
if (tasks.length === 0) { |
|
result.bestModelPerTaskCategory[category] = { |
|
model: "N/A", |
|
score: "N/A", |
|
color: "#999999", |
|
categoryName: categoryNameDisplay, |
|
}; |
|
result.keyMetricsByTaskCategory[category] = []; |
|
return; |
|
} |
|
const categoryModelScores = {}; |
|
modelOrder.forEach((modelName) => { |
|
categoryModelScores[modelName] = { totalScore: 0, count: 0, metrics: {} }; |
|
tasks.forEach((task) => { |
|
if (taskPerformance[task]?.[modelName]) { |
|
|
|
Object.entries(taskPerformance[task][modelName]).forEach( |
|
([metricKey, score]) => { |
|
const numScore = Number(score); |
|
if (!isNaN(numScore)) { |
|
categoryModelScores[modelName].totalScore += numScore; |
|
categoryModelScores[modelName].count++; |
|
if (!categoryModelScores[modelName].metrics[metricKey]) |
|
categoryModelScores[modelName].metrics[metricKey] = { |
|
sum: 0, |
|
count: 0, |
|
}; |
|
categoryModelScores[modelName].metrics[metricKey].sum += |
|
numScore; |
|
categoryModelScores[modelName].metrics[metricKey].count++; |
|
} |
|
} |
|
); |
|
} |
|
}); |
|
}); |
|
let bestAvg = -Infinity; |
|
let bestCatModel = null; |
|
Object.entries(categoryModelScores).forEach(([model, data]) => { |
|
if (data.count > 0) { |
|
const avg = data.totalScore / data.count; |
|
if (avg > bestAvg) { |
|
bestAvg = avg; |
|
bestCatModel = model; |
|
} |
|
} |
|
}); |
|
|
|
if (bestCatModel) { |
|
result.bestModelPerTaskCategory[category] = { |
|
model: bestCatModel, |
|
score: Number(bestAvg.toFixed(1)), |
|
color: MODEL_COLORS[bestCatModel] || "#999999", |
|
categoryName: categoryNameDisplay, |
|
}; |
|
const bestModelMetricsData = |
|
categoryModelScores[bestCatModel]?.metrics || {}; |
|
|
|
const metricAverages = Object.entries(bestModelMetricsData) |
|
.map(([metricKey, data]) => ({ |
|
metricKey, |
|
score: data.count > 0 ? data.sum / data.count : 0, |
|
})) |
|
.sort((a, b) => b.score - a.score); |
|
|
|
result.keyMetricsByTaskCategory[category] = metricAverages |
|
.slice(0, 5) |
|
.map((m) => ({ |
|
metric: formatDisplayKey(m.metricKey), |
|
score: m.score, |
|
scoreDisplay: m.score.toFixed(1), |
|
})); |
|
} else { |
|
result.bestModelPerTaskCategory[category] = { |
|
model: "N/A", |
|
score: "N/A", |
|
color: "#999999", |
|
categoryName: categoryNameDisplay, |
|
}; |
|
result.keyMetricsByTaskCategory[category] = []; |
|
} |
|
}); |
|
return result; |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export function prepareDataForVisualization(rawDataInput) { |
|
|
|
const defaultReturn = { |
|
models: [], |
|
metricsData: { highLevelCategories: {}, lowLevelMetrics: {} }, |
|
radarData: [], |
|
bestPerCategory: {}, |
|
bestPerMetric: {}, |
|
overviewCardData: {}, |
|
rawData: {}, |
|
metadata: {}, |
|
equityAnalysis: {}, |
|
}; |
|
if ( |
|
!rawDataInput || |
|
!rawDataInput.model_order || |
|
!Array.isArray(rawDataInput.model_order) |
|
) { |
|
console.error( |
|
"prepareDataForVisualization received invalid rawData.", |
|
rawDataInput |
|
); |
|
return defaultReturn; |
|
} |
|
|
|
|
|
const modelOrder = rawDataInput.model_order; |
|
const equityAnalysis = rawDataInput.equity_analysis || { |
|
all_equity_gaps: [], |
|
model_max_effect_gaps: {}, |
|
universal_issues: [], |
|
assessment_method: {}, |
|
demographic_variation_stats: {}, |
|
}; |
|
const allGaps = equityAnalysis.all_equity_gaps || []; |
|
const metadata = rawDataInput.metadata || {}; |
|
const mrpDemographicsRaw = rawDataInput.mrp_demographics || {}; |
|
const taskLevelPerformanceRaw = rawDataInput.task_level_performance || {}; |
|
|
|
|
|
const demographicFactors = new Set(); |
|
const demographicLevels = {}; |
|
const availableMetrics = new Set(); |
|
if (mrpDemographicsRaw && typeof mrpDemographicsRaw === "object") { |
|
Object.values(mrpDemographicsRaw).forEach((modelData) => { |
|
Object.entries(modelData || {}).forEach(([factor, factorData]) => { |
|
demographicFactors.add(factor); |
|
if (!demographicLevels[factor]) demographicLevels[factor] = new Set(); |
|
Object.entries(factorData || {}).forEach(([level, levelData]) => { |
|
demographicLevels[factor].add(level); |
|
Object.keys(levelData || {}).forEach((metric) => |
|
availableMetrics.add(metric) |
|
); |
|
}); |
|
}); |
|
}); |
|
} |
|
const demographicOptions = {}; |
|
demographicFactors.forEach((factor) => { |
|
demographicOptions[factor] = Array.from( |
|
demographicLevels[factor] || new Set() |
|
).sort(); |
|
}); |
|
const availableMetricsList = Array.from(availableMetrics).sort(); |
|
|
|
|
|
const overallRankingProcessed = (rawDataInput.overall_ranking || []).map( |
|
(modelData) => { |
|
const modelName = modelData.model; |
|
|
|
const maxEffectGapDetails = modelData.max_effect_gap_details || {}; |
|
const safeParseFloat = (val) => { |
|
const num = Number(val); |
|
return isNaN(num) ? null : num; |
|
}; |
|
|
|
const modelSpecificGaps = allGaps.filter( |
|
(gap) => gap.model === modelName |
|
); |
|
const totalGapsForModel = modelSpecificGaps.length; |
|
const concernCountForModel = modelSpecificGaps.filter( |
|
(gap) => gap.is_equity_concern === true |
|
).length; |
|
let equityConcernPercentage = null; |
|
if (totalGapsForModel > 0) { |
|
equityConcernPercentage = |
|
(concernCountForModel / totalGapsForModel) * 100; |
|
} |
|
|
|
|
|
return { |
|
rank: modelData.rank, |
|
model: modelName, |
|
overallScore: safeParseFloat(modelData.overall_score), |
|
highLevelCatScore: safeParseFloat(modelData.high_level_cat_score), |
|
lowLevelCatScore: safeParseFloat(modelData.low_level_cat_score), |
|
color: MODEL_COLORS[modelName] || "#999999", |
|
|
|
stdDevAcrossCats: modelData.std_dev_across_cats, |
|
stdDevAcrossCatsNumeric: safeParseFloat(modelData.std_dev_across_cats), |
|
repeatUsageScore: safeParseFloat(modelData.repeat_usage_score), |
|
maxEffectCategory: modelData.max_effect_category, |
|
maxEffectFactor: maxEffectGapDetails.demographic_factor, |
|
maxEffectSize: safeParseFloat(maxEffectGapDetails.effect_size), |
|
maxEffectGap: safeParseFloat(maxEffectGapDetails.score_range), |
|
maxEffectConcernFlag: maxEffectGapDetails.is_equity_concern ?? false, |
|
maxEffectSignificant: maxEffectGapDetails.is_statistically_significant, |
|
maxEffectPValue: maxEffectGapDetails.p_value, |
|
maxEffectSizeClass: maxEffectGapDetails.effect_size_class || "N/A", |
|
maxEffectRawNHeuristic: |
|
maxEffectGapDetails.raw_n_confidence_heuristic || "N/A", |
|
maxEffectGapDetails: maxEffectGapDetails, |
|
equityConcernPercentage: equityConcernPercentage, |
|
}; |
|
} |
|
); |
|
|
|
|
|
const metricsBreakdownProcessed = { |
|
highLevelCategories: {}, |
|
lowLevelMetrics: {}, |
|
}; |
|
if ( |
|
rawDataInput.metrics_breakdown && |
|
typeof rawDataInput.metrics_breakdown === "object" |
|
) { |
|
const processCategory = (displayKey, categoryData) => { |
|
|
|
if (!categoryData || !categoryData.model_scores) { |
|
console.warn(`Missing model_scores for category: ${displayKey}`); |
|
return { |
|
modelScores: {}, |
|
topPerformer: { model: "N/A", score: null, color: "#999999" }, |
|
}; |
|
} |
|
const internalMetricKey = categoryData._internal_category_name; |
|
const processedModelScores = {}; |
|
modelOrder.forEach((modelName) => { |
|
const scores = categoryData.model_scores[modelName]; |
|
if (!scores) { |
|
processedModelScores[modelName] = { |
|
nationalScore: null, |
|
color: MODEL_COLORS[modelName] || "#999999", |
|
maxEffectGapInfo: {}, |
|
}; |
|
return; |
|
} |
|
const maxEffectGapInfoForCat = scores.max_effect_gap_info || {}; |
|
processedModelScores[modelName] = { |
|
nationalScore: scores.national_score ?? null, |
|
color: MODEL_COLORS[modelName] || "#999999", |
|
|
|
maxEffectGapInfo: maxEffectGapInfoForCat, |
|
}; |
|
}); |
|
const topPerf = categoryData.top_performer || {}; |
|
const topPerfScore = |
|
topPerf.score === "N/A" || topPerf.score === null |
|
? null |
|
: Number(topPerf.score); |
|
return { |
|
modelScores: processedModelScores, |
|
topPerformer: { |
|
model: topPerf.model || "N/A", |
|
score: isNaN(topPerfScore) ? null : topPerfScore, |
|
color: MODEL_COLORS[topPerf.model] || "#999999", |
|
}, |
|
internalMetricKey: internalMetricKey, |
|
}; |
|
}; |
|
Object.entries( |
|
rawDataInput.metrics_breakdown.high_level_categories || {} |
|
).forEach(([displayKey, catData]) => { |
|
metricsBreakdownProcessed.highLevelCategories[displayKey] = |
|
processCategory(displayKey, catData); |
|
}); |
|
Object.entries( |
|
rawDataInput.metrics_breakdown.low_level_metrics || {} |
|
).forEach(([displayKey, metricData]) => { |
|
metricsBreakdownProcessed.lowLevelMetrics[displayKey] = processCategory( |
|
displayKey, |
|
metricData |
|
); |
|
}); |
|
} else { |
|
console.warn("rawDataInput.metrics_breakdown is missing or not an object."); |
|
} |
|
|
|
|
|
const radarChartData = Object.entries( |
|
metricsBreakdownProcessed.highLevelCategories |
|
).map(([displayKey, categoryData]) => { |
|
|
|
const radarEntry = { category: displayKey }; |
|
modelOrder.forEach((modelName) => { |
|
radarEntry[modelName] = |
|
Number(categoryData.modelScores[modelName]?.nationalScore) || 0; |
|
}); |
|
return radarEntry; |
|
}); |
|
|
|
|
|
const bestPerCategory = {}; |
|
Object.entries(metricsBreakdownProcessed.highLevelCategories).forEach( |
|
([displayKey, catData]) => { |
|
bestPerCategory[displayKey] = catData.topPerformer; |
|
} |
|
); |
|
const bestPerMetric = {}; |
|
Object.entries(metricsBreakdownProcessed.lowLevelMetrics).forEach( |
|
([displayKey, metricData]) => { |
|
bestPerMetric[displayKey] = metricData.topPerformer; |
|
} |
|
); |
|
|
|
|
|
const taskCategoryMap = { |
|
"Generating a Creative Idea": "creative", |
|
"Creating a Travel Itinerary": "creative", |
|
"Following Up on a Job Application": "practical", |
|
"Planning Your Weekly Meals": "practical", |
|
"Making a Decision Between Options": "analytical", |
|
"Understanding a Complex Topic": "analytical", |
|
}; |
|
|
|
const taskPerformanceResults = processTaskPerformance( |
|
rawDataInput, |
|
taskCategoryMap, |
|
modelOrder |
|
); |
|
const tasks = Object.keys(taskLevelPerformanceRaw || {}); |
|
const taskCategories = {}; |
|
Object.entries(taskCategoryMap).forEach(([task, category]) => { |
|
if (!taskCategories[category]) taskCategories[category] = []; |
|
if (tasks.includes(task)) taskCategories[category].push(task); |
|
}); |
|
const taskMetrics = new Set(); |
|
Object.values(taskLevelPerformanceRaw || {}).forEach((taskData) => { |
|
Object.values(taskData || {}).forEach((modelData) => { |
|
Object.keys(modelData || {}).forEach((metric) => taskMetrics.add(metric)); |
|
}); |
|
}); |
|
const taskMetricsDisplayList = Array.from(taskMetrics) |
|
.map(formatDisplayKey) |
|
.sort(); |
|
const taskMetricsSnakeList = Array.from(taskMetrics).sort(); |
|
|
|
|
|
return { |
|
models: overallRankingProcessed, |
|
metricsData: metricsBreakdownProcessed, |
|
radarData: radarChartData, |
|
bestPerCategory: bestPerCategory, |
|
bestPerMetric: bestPerMetric, |
|
overviewCardData: taskPerformanceResults, |
|
rawData: { |
|
|
|
taskLevelPerformance: taskLevelPerformanceRaw, |
|
mrpDemographics: mrpDemographicsRaw, |
|
|
|
demographicOptions: demographicOptions, |
|
availableMetrics: availableMetricsList, |
|
tasks: tasks, |
|
taskCategories: taskCategories, |
|
taskMetrics: taskMetricsDisplayList, |
|
taskMetricsSnake: taskMetricsSnakeList, |
|
taskCategoryMap: taskCategoryMap, |
|
}, |
|
metadata: metadata, |
|
equityAnalysis: equityAnalysis, |
|
}; |
|
} |
|
|