Spaces:

nlpetprolific
/

user-experience-leaderboard

Running

File size: 26,408 Bytes

20e666e

// lib/utils.js

/**
 * Constants
 */
const MODEL_COLORS = {
  "gpt-4o": "#0072B2", // Strong blue
  "claude-3.7-sonnet": "#D55E00", // Vermillion/orange-red
  "deepseek-r1": "#F0E442", // Yellow
  o1: "#CC79A7", // Pink
  "gemini-2.0-flash-001": "#009E73", // Bluish green
  "llama-3.1-405b-instruct": "#56B4E9", // Light blue
};

// --- Helper Functions ---

/**
 * Converts camelCase to Title Case.
 * @param {string} str Input string.
 * @returns {string} Title Case string.
 */
export const camelToTitle = (str) => {
  if (!str) return str;
  const spaced = str.replace(/([A-Z])/g, " $1");
  return spaced.charAt(0).toUpperCase() + spaced.slice(1).trim();
};

/**
 * Helper to format metric/factor names (snake/kebab to Title Case)
 * Needed for display consistency when keys are snake_case.
 */
export const formatDisplayKey = (key) => {
  if (!key || typeof key !== "string") return "N/A";
  if (key === "N/A") return "N/A";
  // Handle snake_case or kebab-case input
  return key
    .replace(/_/g, " ")
    .replace(/-/g, " ")
    .trim()
    .replace(/\b\w/g, (l) => l.toUpperCase());
};

/**
 * Helper to get Significance indicator style and tooltip
 */
export function getSignificanceIndicator(isSignificant, pValue, alpha = 0.05) {
  const pValueFormatted =
    typeof pValue === "number" && !isNaN(pValue) ? pValue.toFixed(3) : "N/A";
  if (isSignificant === true) {
    return {
      symbol: "✓",
      className: "text-green-600",
      tooltip: `Statistically Significant (p=${pValueFormatted} < ${alpha})`,
    };
  } else if (isSignificant === false) {
    return {
      symbol: "✗",
      className: "text-red-600",
      tooltip: `Not Statistically Significant (p=${pValueFormatted} ≥ ${alpha})`,
    };
  } else {
    return {
      symbol: "?",
      className: "text-gray-400",
      tooltip: "Significance Undetermined",
    };
  }
}

/**
 * Determines the style and tooltip for an equity gap status indicator.
 */
export function getEquityIndicatorStyle(
  isConcern,
  isLargeEffect,
  isSignificant,
  pValue,
  effectSizeClass
) {
  const pValueText =
    typeof pValue === "number" && !isNaN(pValue)
      ? `p=${pValue.toFixed(3)}`
      : "p=N/A";
  const effectText = `Effect: ${effectSizeClass || "N/A"}`;
  if (isConcern === true) {
    return {
      icon: "▲",
      colorClass: "text-red-600",
      tooltip: `Equity Concern (${effectText}, Significant, ${pValueText})`,
    };
  } else if (isSignificant === null) {
    return {
      icon: "?",
      colorClass: "text-gray-500",
      tooltip: `Significance Undetermined (${effectText})`,
    };
  } else if (isLargeEffect === true && isSignificant === false) {
    return {
      icon: "●",
      colorClass: "text-yellow-600",
      tooltip: `Large Effect but Not Statistically Significant (${pValueText})`,
    };
  } else if (isSignificant === true) {
    return {
      icon: "✓",
      colorClass: "text-green-600",
      tooltip: `Statistically Significant but Not Large Effect (${effectText}, ${pValueText})`,
    };
  } else {
    return {
      icon: "✓",
      colorClass: "text-gray-400",
      tooltip: `Not Statistically Significant (${effectText}, ${pValueText})`,
    };
  }
}

/**
 * Determine styling based on score for generic BADGES (background + text)
 */
export function getScoreBadgeColor(score, min = 0, max = 100) {
  const numericScore = Number(score);
  if (
    score === null ||
    score === undefined ||
    score === "N/A" ||
    isNaN(numericScore)
  ) {
    return "bg-gray-100 text-gray-800";
  }
  const range = Math.abs(max - min);
  if (range <= 0) return "bg-gray-100 text-gray-800";
  let percent;
  if (max > min) {
    percent = ((numericScore - min) / range) * 100;
  } else {
    percent = ((min - numericScore) / range) * 100;
  }
  if (percent >= 80) return "bg-green-100 text-green-800";
  if (percent >= 50) return "bg-blue-100 text-blue-800";
  if (percent >= 20) return "bg-yellow-100 text-yellow-800";
  return "bg-red-100 text-red-800";
}

/**
 * Determine TEXT color based on score (0-100 scale, higher is better)
 */
export function getScoreColor(score) {
  const numericScore = Number(score);
  if (
    score === null ||
    score === undefined ||
    score === "N/A" ||
    isNaN(numericScore)
  ) {
    return "text-gray-400";
  }
  if (numericScore >= 80) return "text-green-600 font-medium";
  if (numericScore >= 60) return "text-blue-600";
  if (numericScore >= 40) return "text-yellow-600";
  return "text-red-600";
}

/**
 * Tooltip text for metrics and table headers - Accepts original keys
 */
export const getMetricTooltip = (key) => {
  // Format the key for display/lookup in tooltips map if needed
  const titleCaseKey = formatDisplayKey(key); // Convert snake_case/camelCase to Title Case

  const tooltips = {
    // Use Title Case keys matching dropdowns/headers
    // High-level
    Helpfulness:
      "How well the model provides useful assistance that addresses user needs",
    Communication:
      "Quality of clarity, coherence, and appropriateness of writing style",
    Understanding:
      "How well the model comprehends requests and contextual information",
    Adaptiveness:
      "How well the model adjusts to user needs and feedback during conversation",
    Trustworthiness:
      "Transparency, accuracy, and consistency in model responses",
    Personality:
      "Consistency and definition of the model's persona and ethical alignment",
    "Background And Culture":
      "Cultural sensitivity, relevance, and freedom from bias",
    "Repeat Usage":
      "User satisfaction and willingness to use the model again (score 0-100).",

    // Low-level (use formatted names matching display)
    Effectiveness: "How effectively the model helps accomplish specific goals",
    Comprehensiveness:
      "How thoroughly the model addresses all aspects of requests",
    Usefulness: "Practicality and relevance of suggestions or solutions",
    "Tone And Language Style":
      "Appropriateness of tone and language for the context",
    "Conversation Flow": "Natural and conversational quality of responses",
    "Detail And Technical Language":
      "Appropriate level of detail and technical language",
    Accuracy: "Accuracy in interpreting user requests",
    "Context Memory": "Ability to maintain conversation context",
    Intuitiveness: "Ability to pick up on implicit aspects of requests",
    Flexibility: "Adapting responses based on user feedback",
    Clarity: "Ability to clarify ambiguities or misunderstandings",
    "Conversation Building": "Building upon previous exchanges in conversation",
    Consistency: "Consistency of responses across similar questions",
    Confidence: "User confidence in accuracy of information",
    Transparency: "Openness about limitations or uncertainties",
    "Personality Consistency":
      "Consistency of personality throughout interactions",
    "Distinct Personality": "How well-defined the model's personality is",
    "Honesty Empathy Fairness": "Alignment with ethical expectations",
    "Ethical Alignment": "Alignment with user culture, viewpoint, or values",
    "Cultural Awareness":
      "Recognition of when cultural perspective is relevant",
    "Bias And Stereotypes": "Freedom from stereotypes and bias in responses",

    // Table headers
    "Overall Score":
      "Average score across high-level categories (0-100). Higher is better.",
    "Overall SD":
      "Standard Deviation (± points) of scores across high-level categories. Lower indicates more consistent performance across capabilities.",
    "Max Equity Gap":
      "Score difference (points) for the demographic gap with the largest statistical effect size for this model. Status icon indicates Equity Concern (▲) and/or Significance (✓/✗/?). Hover for details.",
    "Max Gap Area":
      "The specific Demographic Factor and Category where the 'Max Equity Gap' (largest effect size gap) occurred for this model.",
    "Equity Concerns (%)":
      "Percentage of evaluated demographic gaps flagged as Equity Concerns (Large Effect & Statistically Significant, p<0.05). Lower is better.",
    "User Retention":
      "Model score for the 'Repeat Usage' category (0-100), indicating likelihood of users using the model again.",
  };
  // Try lookup with formatted key, then original key as fallback
  return tooltips[titleCaseKey] || tooltips[key] || "No description available";
};

/**
 * Badge color based on Effect Size Class
 */
export function getEffectSizeBadgeColor(effectSizeClass) {
  if (!effectSizeClass || effectSizeClass === "N/A") {
    return "bg-gray-100 text-gray-800";
  }
  switch (effectSizeClass) {
    case "Negligible":
      return "bg-green-100 text-green-800";
    case "Small":
      return "bg-blue-100 text-blue-800";
    case "Medium":
      return "bg-yellow-100 text-yellow-800";
    case "Large":
      return "bg-red-100 text-red-800";
    default:
      return "bg-gray-100 text-gray-800";
  }
}

/**
 * Helper function to process task performance data
 * Expects rawData input with snake_case keys
 */
function processTaskPerformance(rawData, taskCategoryMap, modelOrder) {
  const result = {
    bestModelPerTask: {},
    keyMetricsByTask: {},
    bestModelPerTaskCategory: {
      creative: null,
      practical: null,
      analytical: null,
    },
    keyMetricsByTaskCategory: { creative: [], practical: [], analytical: [] },
  };
  // Access original snake_case key from input
  const taskPerformance = rawData?.task_level_performance;

  if (!taskPerformance || typeof taskPerformance !== "object") {
    console.warn(
      "Task level performance data missing or invalid in processTaskPerformance input."
    );
    return result;
  }

  // Task names are keys in taskPerformance
  Object.keys(taskPerformance).forEach((taskName) => {
    const taskData = taskPerformance[taskName];
    if (!taskData) return;
    let taskBestModel = null;
    let taskBestAvgScore = -Infinity;
    let taskBestModelMetrics = null;
    modelOrder.forEach((modelName) => {
      // Iterate through known models
      const modelMetrics = taskData[modelName];
      if (modelMetrics && typeof modelMetrics === "object") {
        // Access metric scores using original snake_case keys within modelMetrics
        const scores = Object.values(modelMetrics)
          .map((s) => Number(s))
          .filter((s) => !isNaN(s));
        if (scores.length > 0) {
          const avgScore =
            scores.reduce((sum, score) => sum + score, 0) / scores.length;
          if (avgScore > taskBestAvgScore) {
            taskBestAvgScore = avgScore;
            taskBestModel = modelName;
            taskBestModelMetrics = modelMetrics;
          }
        }
      }
    });

    if (taskBestModel && taskBestModelMetrics) {
      result.bestModelPerTask[taskName] = {
        model: taskBestModel,
        score: taskBestAvgScore,
        color: MODEL_COLORS[taskBestModel] || "#999999",
      };
      // Extract top metrics (keys are snake_case)
      const metricsArray = Object.entries(taskBestModelMetrics)
        .map(([metricKey, score]) => ({ metricKey, score: Number(score) || 0 }))
        .sort((a, b) => b.score - a.score);
      // Store with snake_case key, add display name
      result.keyMetricsByTask[taskName] = metricsArray
        .slice(0, 3)
        .map((m) => ({ ...m, metricName: formatDisplayKey(m.metricKey) }));
    } else {
      result.bestModelPerTask[taskName] = {
        model: "N/A",
        score: "N/A",
        color: "#999999",
      };
      result.keyMetricsByTask[taskName] = [];
    }
  });

  // Task Categories processing
  const tasksByCategory = { creative: [], practical: [], analytical: [] };
  Object.entries(taskCategoryMap).forEach(([task, category]) => {
    if (tasksByCategory[category] && taskPerformance[task]) {
      tasksByCategory[category].push(task);
    }
  });
  Object.entries(tasksByCategory).forEach(([category, tasks]) => {
    const categoryNameDisplay = `${
      category.charAt(0).toUpperCase() + category.slice(1)
    } Tasks`;
    if (tasks.length === 0) {
      result.bestModelPerTaskCategory[category] = {
        model: "N/A",
        score: "N/A",
        color: "#999999",
        categoryName: categoryNameDisplay,
      };
      result.keyMetricsByTaskCategory[category] = [];
      return;
    }
    const categoryModelScores = {};
    modelOrder.forEach((modelName) => {
      categoryModelScores[modelName] = { totalScore: 0, count: 0, metrics: {} };
      tasks.forEach((task) => {
        if (taskPerformance[task]?.[modelName]) {
          // metricKey is original snake_case here
          Object.entries(taskPerformance[task][modelName]).forEach(
            ([metricKey, score]) => {
              const numScore = Number(score);
              if (!isNaN(numScore)) {
                categoryModelScores[modelName].totalScore += numScore;
                categoryModelScores[modelName].count++;
                if (!categoryModelScores[modelName].metrics[metricKey])
                  categoryModelScores[modelName].metrics[metricKey] = {
                    sum: 0,
                    count: 0,
                  };
                categoryModelScores[modelName].metrics[metricKey].sum +=
                  numScore;
                categoryModelScores[modelName].metrics[metricKey].count++;
              }
            }
          );
        }
      });
    });
    let bestAvg = -Infinity;
    let bestCatModel = null;
    Object.entries(categoryModelScores).forEach(([model, data]) => {
      if (data.count > 0) {
        const avg = data.totalScore / data.count;
        if (avg > bestAvg) {
          bestAvg = avg;
          bestCatModel = model;
        }
      }
    });

    if (bestCatModel) {
      result.bestModelPerTaskCategory[category] = {
        model: bestCatModel,
        score: Number(bestAvg.toFixed(1)),
        color: MODEL_COLORS[bestCatModel] || "#999999",
        categoryName: categoryNameDisplay,
      };
      const bestModelMetricsData =
        categoryModelScores[bestCatModel]?.metrics || {};
      // metricKey is snake_case
      const metricAverages = Object.entries(bestModelMetricsData)
        .map(([metricKey, data]) => ({
          metricKey,
          score: data.count > 0 ? data.sum / data.count : 0,
        }))
        .sort((a, b) => b.score - a.score);
      // Store with original key, add display name
      result.keyMetricsByTaskCategory[category] = metricAverages
        .slice(0, 5)
        .map((m) => ({
          metric: formatDisplayKey(m.metricKey),
          score: m.score,
          scoreDisplay: m.score.toFixed(1),
        }));
    } else {
      result.bestModelPerTaskCategory[category] = {
        model: "N/A",
        score: "N/A",
        color: "#999999",
        categoryName: categoryNameDisplay,
      };
      result.keyMetricsByTaskCategory[category] = [];
    }
  });
  return result; // Returns object with camelCase keys
}

/**
 * Prepares the data from leaderboard_data.json for visualization
 * FINAL v4: Reverted deep camelCase conversion. Processes top-level keys and adds equity concern %.
 * Keeps nested raw data keys as original (snake_case).
 * @param {Object} rawDataInput - The raw data from leaderboard_data.json (expected snake_case)
 * @returns {Object} - Processed data ready for visualization
 */
export function prepareDataForVisualization(rawDataInput) {
  // Basic Validation
  const defaultReturn = {
    models: [],
    metricsData: { highLevelCategories: {}, lowLevelMetrics: {} },
    radarData: [],
    bestPerCategory: {},
    bestPerMetric: {},
    overviewCardData: {},
    rawData: {},
    metadata: {},
    equityAnalysis: {},
  };
  if (
    !rawDataInput ||
    !rawDataInput.model_order ||
    !Array.isArray(rawDataInput.model_order)
  ) {
    console.error(
      "prepareDataForVisualization received invalid rawData.",
      rawDataInput
    );
    return defaultReturn;
  }

  // Keep original references where structure is maintained
  const modelOrder = rawDataInput.model_order;
  const equityAnalysis = rawDataInput.equity_analysis || {
    all_equity_gaps: [],
    model_max_effect_gaps: {},
    universal_issues: [],
    assessment_method: {},
    demographic_variation_stats: {},
  };
  const allGaps = equityAnalysis.all_equity_gaps || [];
  const metadata = rawDataInput.metadata || {};
  const mrpDemographicsRaw = rawDataInput.mrp_demographics || {};
  const taskLevelPerformanceRaw = rawDataInput.task_level_performance || {};

  // Process MRP Demographics for filtering options
  const demographicFactors = new Set();
  const demographicLevels = {};
  const availableMetrics = new Set();
  if (mrpDemographicsRaw && typeof mrpDemographicsRaw === "object") {
    Object.values(mrpDemographicsRaw).forEach((modelData) => {
      Object.entries(modelData || {}).forEach(([factor, factorData]) => {
        demographicFactors.add(factor);
        if (!demographicLevels[factor]) demographicLevels[factor] = new Set();
        Object.entries(factorData || {}).forEach(([level, levelData]) => {
          demographicLevels[factor].add(level);
          Object.keys(levelData || {}).forEach((metric) =>
            availableMetrics.add(metric)
          );
        });
      });
    }); // metric is Title Case here from Python processing
  }
  const demographicOptions = {};
  demographicFactors.forEach((factor) => {
    demographicOptions[factor] = Array.from(
      demographicLevels[factor] || new Set()
    ).sort();
  });
  const availableMetricsList = Array.from(availableMetrics).sort(); // These are Title Case

  // Process Overall Rankings -> camelCase & add equity concern %
  const overallRankingProcessed = (rawDataInput.overall_ranking || []).map(
    (modelData) => {
      const modelName = modelData.model;
      // details object keys are snake_case from python
      const maxEffectGapDetails = modelData.max_effect_gap_details || {};
      const safeParseFloat = (val) => {
        const num = Number(val);
        return isNaN(num) ? null : num;
      };

      const modelSpecificGaps = allGaps.filter(
        (gap) => gap.model === modelName
      ); // Access snake_case keys in allGaps
      const totalGapsForModel = modelSpecificGaps.length;
      const concernCountForModel = modelSpecificGaps.filter(
        (gap) => gap.is_equity_concern === true
      ).length;
      let equityConcernPercentage = null;
      if (totalGapsForModel > 0) {
        equityConcernPercentage =
          (concernCountForModel / totalGapsForModel) * 100;
      }

      // Return structure with camelCase keys
      return {
        rank: modelData.rank,
        model: modelName,
        overallScore: safeParseFloat(modelData.overall_score),
        highLevelCatScore: safeParseFloat(modelData.high_level_cat_score),
        lowLevelCatScore: safeParseFloat(modelData.low_level_cat_score),
        color: MODEL_COLORS[modelName] || "#999999",
        // Use snake_case keys from input JSON for these fields
        stdDevAcrossCats: modelData.std_dev_across_cats,
        stdDevAcrossCatsNumeric: safeParseFloat(modelData.std_dev_across_cats),
        repeatUsageScore: safeParseFloat(modelData.repeat_usage_score),
        maxEffectCategory: modelData.max_effect_category, // snake_case from input
        maxEffectFactor: maxEffectGapDetails.demographic_factor, // snake_case from input
        maxEffectSize: safeParseFloat(maxEffectGapDetails.effect_size),
        maxEffectGap: safeParseFloat(maxEffectGapDetails.score_range),
        maxEffectConcernFlag: maxEffectGapDetails.is_equity_concern ?? false,
        maxEffectSignificant: maxEffectGapDetails.is_statistically_significant,
        maxEffectPValue: maxEffectGapDetails.p_value,
        maxEffectSizeClass: maxEffectGapDetails.effect_size_class || "N/A",
        maxEffectRawNHeuristic:
          maxEffectGapDetails.raw_n_confidence_heuristic || "N/A",
        maxEffectGapDetails: maxEffectGapDetails, // Pass original snake_case details
        equityConcernPercentage: equityConcernPercentage,
      };
    }
  );

  // Process Metrics Breakdown -> camelCase keys for structure, keep original metric keys inside
  const metricsBreakdownProcessed = {
    highLevelCategories: {},
    lowLevelMetrics: {},
  };
  if (
    rawDataInput.metrics_breakdown &&
    typeof rawDataInput.metrics_breakdown === "object"
  ) {
    const processCategory = (displayKey, categoryData) => {
      // Input displayKey is Title Case from python output
      if (!categoryData || !categoryData.model_scores) {
        console.warn(`Missing model_scores for category: ${displayKey}`);
        return {
          modelScores: {},
          topPerformer: { model: "N/A", score: null, color: "#999999" },
        };
      }
      const internalMetricKey = categoryData._internal_category_name; // Get original snake_case key
      const processedModelScores = {};
      modelOrder.forEach((modelName) => {
        const scores = categoryData.model_scores[modelName]; // Access model scores
        if (!scores) {
          processedModelScores[modelName] = {
            nationalScore: null,
            color: MODEL_COLORS[modelName] || "#999999",
            maxEffectGapInfo: {},
          };
          return;
        }
        const maxEffectGapInfoForCat = scores.max_effect_gap_info || {}; // snake_case keys inside? Check python output. Assume yes.
        processedModelScores[modelName] = {
          nationalScore: scores.national_score ?? null,
          color: MODEL_COLORS[modelName] || "#999999",
          // Keep original snake_case keys for gap info within this structure
          maxEffectGapInfo: maxEffectGapInfoForCat,
        };
      });
      const topPerf = categoryData.top_performer || {};
      const topPerfScore =
        topPerf.score === "N/A" || topPerf.score === null
          ? null
          : Number(topPerf.score);
      return {
        modelScores: processedModelScores, // Nested scores
        topPerformer: {
          model: topPerf.model || "N/A",
          score: isNaN(topPerfScore) ? null : topPerfScore,
          color: MODEL_COLORS[topPerf.model] || "#999999",
        },
        internalMetricKey: internalMetricKey, // Store original snake_case key
      };
    };
    Object.entries(
      rawDataInput.metrics_breakdown.high_level_categories || {}
    ).forEach(([displayKey, catData]) => {
      metricsBreakdownProcessed.highLevelCategories[displayKey] =
        processCategory(displayKey, catData);
    });
    Object.entries(
      rawDataInput.metrics_breakdown.low_level_metrics || {}
    ).forEach(([displayKey, metricData]) => {
      metricsBreakdownProcessed.lowLevelMetrics[displayKey] = processCategory(
        displayKey,
        metricData
      );
    });
  } else {
    console.warn("rawDataInput.metrics_breakdown is missing or not an object.");
  }

  // Prepare Radar Chart Data
  const radarChartData = Object.entries(
    metricsBreakdownProcessed.highLevelCategories
  ).map(([displayKey, categoryData]) => {
    // displayKey is Title Case here
    const radarEntry = { category: displayKey }; // Use Title Case for radar axis label
    modelOrder.forEach((modelName) => {
      radarEntry[modelName] =
        Number(categoryData.modelScores[modelName]?.nationalScore) || 0;
    });
    return radarEntry;
  });

  // Prepare Top Performers
  const bestPerCategory = {};
  Object.entries(metricsBreakdownProcessed.highLevelCategories).forEach(
    ([displayKey, catData]) => {
      bestPerCategory[displayKey] = catData.topPerformer;
    }
  );
  const bestPerMetric = {};
  Object.entries(metricsBreakdownProcessed.lowLevelMetrics).forEach(
    ([displayKey, metricData]) => {
      bestPerMetric[displayKey] = metricData.topPerformer;
    }
  );

  // Prepare Task Performance Data
  const taskCategoryMap = {
    "Generating a Creative Idea": "creative",
    "Creating a Travel Itinerary": "creative",
    "Following Up on a Job Application": "practical",
    "Planning Your Weekly Meals": "practical",
    "Making a Decision Between Options": "analytical",
    "Understanding a Complex Topic": "analytical",
  };
  // Pass the original rawDataInput to the helper, which expects snake_case keys internally
  const taskPerformanceResults = processTaskPerformance(
    rawDataInput,
    taskCategoryMap,
    modelOrder
  );
  const tasks = Object.keys(taskLevelPerformanceRaw || {}); // Use original snake_case keys
  const taskCategories = {};
  Object.entries(taskCategoryMap).forEach(([task, category]) => {
    if (!taskCategories[category]) taskCategories[category] = [];
    if (tasks.includes(task)) taskCategories[category].push(task);
  });
  const taskMetrics = new Set();
  Object.values(taskLevelPerformanceRaw || {}).forEach((taskData) => {
    Object.values(taskData || {}).forEach((modelData) => {
      Object.keys(modelData || {}).forEach((metric) => taskMetrics.add(metric));
    });
  }); // metric is snake_case
  const taskMetricsDisplayList = Array.from(taskMetrics)
    .map(formatDisplayKey)
    .sort(); // Create display list
  const taskMetricsSnakeList = Array.from(taskMetrics).sort(); // List of original snake_case keys

  // Final Return Structure
  return {
    models: overallRankingProcessed, // camelCase keys for top level
    metricsData: metricsBreakdownProcessed, // Title Case keys for categories/metrics
    radarData: radarChartData,
    bestPerCategory: bestPerCategory, // Title Case keys
    bestPerMetric: bestPerMetric, // Title Case keys
    overviewCardData: taskPerformanceResults, // camelCase keys expected from helper
    rawData: {
      // Keep original structures under camelCase keys for clarity
      taskLevelPerformance: taskLevelPerformanceRaw, // snake_case keys inside
      mrpDemographics: mrpDemographicsRaw, // Title Case metric keys inside
      // Processed lists/maps for filtering/display
      demographicOptions: demographicOptions,
      availableMetrics: availableMetricsList, // Title Case metric names
      tasks: tasks,
      taskCategories: taskCategories,
      taskMetrics: taskMetricsDisplayList, // Title Case metric names for display
      taskMetricsSnake: taskMetricsSnakeList, // snake_case keys for lookup
      taskCategoryMap: taskCategoryMap,
    },
    metadata: metadata, // Original structure
    equityAnalysis: equityAnalysis, // Original structure (snake_case keys)
  };
}