diff --git "a/leaderboard-app/public/leaderboard_data.json" "b/leaderboard-app/public/leaderboard_data.json" new file mode 100644--- /dev/null +++ "b/leaderboard-app/public/leaderboard_data.json" @@ -0,0 +1,36056 @@ +{ + "metadata": { + "data_generated_at": "2025-04-17T14:22:23.780442", + "mrp_data_source": "data/complete_dataset_hl_cats.csv", + "raw_data_source": "data/task_data.csv", + "mrp_info": "Scores for demographic groups ('mrp_demographics') and overall metrics ('national_score' in breakdown) are MRP-adjusted estimates.", + "raw_info": "'task_level_performance' scores are raw averages from task data, normalized to 0-100.", + "min_raw_n_for_mrp_demog_data": 0, + "equity_confidence_thresholds": { + "High": 50, + "Medium": 20, + "Low": 0 + }, + "large_effect_size_threshold": 0.8, + "significance_level_alpha": 0.05, + "fdr_correction_method": "fdr_bh", + "notes": [ + "Overall score is calculated as the average of high-level category scores at the national level. 'std_dev_across_cats' in overall ranking measures model consistency across high-level capabilities. Metrics excluded from overall scores and equity analysis: Speed Perception, Repeat Usage, Overall. Equity analysis ('equity_analysis' section) identifies the largest gap between demographic groups within each factor. It uses effect sizes (gap relative to category score standard deviation) and statistical significance testing (based on MRP standard errors, p < 0.05) to assess gaps. The 'is_equity_concern' flag indicates gaps that are both large (Effect Size >= 0.8) AND statistically significant. The 'raw_n_confidence_heuristic' provides secondary information based on raw participant counts." + ], + "scale_info": "Metrics using 7-point scale (raw): Adaptiveness, Background And Culture, Communication, Helpfulness, Personality, Repeat Usage, Trustworthiness, Understanding. Most others use 5-point.", + "high_level_categories_used": [ + "Adaptiveness", + "Background And Culture", + "Communication", + "Helpfulness", + "Personality", + "Trustworthiness", + "Understanding" + ], + "low_level_categories_used": [ + "Accuracy", + "Bias And Stereotypes", + "Clarity", + "Comprehensiveness", + "Confidence", + "Consistency", + "Context Memory", + "Conversation Building", + "Conversation Flow", + "Cultural Awareness", + "Detail And Technical Language", + "Distinct Personality", + "Effectiveness", + "Ethical Alignment", + "Flexibility", + "Honesty Empathy Fairness", + "Intuitiveness", + "Personality Consistency", + "Tone And Language Style", + "Transparency", + "Usefulness" + ] + }, + "model_order": [ + "claude-3.7-sonnet", + "deepseek-r1", + "gemini-2.0-flash-001", + "gpt-4o", + "llama-3.1-405b-instruct", + "o1" + ], + "overall_ranking": [ + { + "rank": 1.0, + "model": "gpt-4o", + "overall_score": 87.81, + "high_level_cat_score": 87.81, + "low_level_cat_score": 87.89, + "max_effect_category": "understanding", + "max_effect_factor": "Age", + "max_effect_size": 1.719, + "max_effect_gap": 5.1, + "max_effect_concern_flag": true, + "max_effect_significant": true, + "max_effect_p_value": 0.0, + "max_effect_raw_n_heuristic": "High", + "std_dev_across_cats": 5.24, + "repeat_usage_score": 88.4, + "max_effect_gap_details": { + "model": "gpt-4o", + "category": "understanding", + "demographic_factor": "Age", + "score_range": 5.1000000000000085, + "min_level": "18-24", + "max_level": "25-34", + "min_score": 89.1, + "max_score": 94.2, + "se_min": 0.0, + "se_max": 0.0, + "effect_size": 1.7187238854651667, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9673178124360047, + "level_score_std_dev": 2.0221688027132365, + "se_difference": 0.0, + "z_score": 9999.9, + "p_value": 0.0, + "is_statistically_significant": true, + "gap_confidence_interval_95_lower": 5.1000000000000085, + "gap_confidence_interval_95_upper": 5.1000000000000085, + "raw_n_min_group": 60, + "raw_n_max_group": 104, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": true, + "prioritized_significant_flag": true + } + }, + { + "rank": 2.0, + "model": "claude-3.7-sonnet", + "overall_score": 86.36, + "high_level_cat_score": 86.36, + "low_level_cat_score": 87.97, + "max_effect_category": "trustworthiness", + "max_effect_factor": "Age", + "max_effect_size": 2.054, + "max_effect_gap": 6.5, + "max_effect_concern_flag": true, + "max_effect_significant": true, + "max_effect_p_value": 0.0, + "max_effect_raw_n_heuristic": "High", + "std_dev_across_cats": 5.41, + "repeat_usage_score": 86.5, + "max_effect_gap_details": { + "model": "claude-3.7-sonnet", + "category": "trustworthiness", + "demographic_factor": "Age", + "score_range": 6.5, + "min_level": "18-24", + "max_level": "65+", + "min_score": 86.6, + "max_score": 93.1, + "se_min": 0.0, + "se_max": 0.0, + "effect_size": 2.054186748854409, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1642692679353313, + "level_score_std_dev": 2.11002896241313, + "se_difference": 0.0, + "z_score": 9999.9, + "p_value": 0.0, + "is_statistically_significant": true, + "gap_confidence_interval_95_lower": 6.5, + "gap_confidence_interval_95_upper": 6.5, + "raw_n_min_group": 60, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": true, + "prioritized_significant_flag": true + } + }, + { + "rank": 3.0, + "model": "gemini-2.0-flash-001", + "overall_score": 85.87, + "high_level_cat_score": 85.87, + "low_level_cat_score": 86.18, + "max_effect_category": "confidence", + "max_effect_factor": "Education", + "max_effect_size": 1.855, + "max_effect_gap": 6.1, + "max_effect_concern_flag": true, + "max_effect_significant": true, + "max_effect_p_value": 0.0, + "max_effect_raw_n_heuristic": "Low", + "std_dev_across_cats": 5.05, + "repeat_usage_score": 86.4, + "max_effect_gap_details": { + "model": "gemini-2.0-flash-001", + "category": "confidence", + "demographic_factor": "Education", + "score_range": 6.099999999999994, + "min_level": "College", + "max_level": "No College", + "min_score": 87.5, + "max_score": 93.6, + "se_min": 0.0, + "se_max": 0.0, + "effect_size": 1.855123622193575, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.2881905696328215, + "level_score_std_dev": 3.049999999999997, + "se_difference": 0.0, + "z_score": 9999.9, + "p_value": 0.0, + "is_statistically_significant": true, + "gap_confidence_interval_95_lower": 6.099999999999994, + "gap_confidence_interval_95_upper": 6.099999999999994, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": true, + "prioritized_significant_flag": true + } + }, + { + "rank": 4.0, + "model": "llama-3.1-405b-instruct", + "overall_score": 85.01, + "high_level_cat_score": 85.01, + "low_level_cat_score": 86.78, + "max_effect_category": "tone_and_language_style", + "max_effect_factor": "Age", + "max_effect_size": 2.725, + "max_effect_gap": 9.1, + "max_effect_concern_flag": true, + "max_effect_significant": true, + "max_effect_p_value": 0.0, + "max_effect_raw_n_heuristic": "High", + "std_dev_across_cats": 4.9, + "repeat_usage_score": 82.0, + "max_effect_gap_details": { + "model": "llama-3.1-405b-instruct", + "category": "tone_and_language_style", + "demographic_factor": "Age", + "score_range": 9.100000000000009, + "min_level": "45-54", + "max_level": "65+", + "min_score": 84.8, + "max_score": 93.9, + "se_min": 0.0, + "se_max": 0.0, + "effect_size": 2.7248155210679417, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3396756329519985, + "level_score_std_dev": 3.367326667979944, + "se_difference": 0.0, + "z_score": 9999.9, + "p_value": 0.0, + "is_statistically_significant": true, + "gap_confidence_interval_95_lower": 9.100000000000009, + "gap_confidence_interval_95_upper": 9.100000000000009, + "raw_n_min_group": 83, + "raw_n_max_group": 64, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": true, + "prioritized_significant_flag": true + } + }, + { + "rank": 5.0, + "model": "o1", + "overall_score": 82.73, + "high_level_cat_score": 82.73, + "low_level_cat_score": 84.06, + "max_effect_category": "conversation_flow", + "max_effect_factor": "Ethnicity", + "max_effect_size": 3.38, + "max_effect_gap": 16.8, + "max_effect_concern_flag": false, + "max_effect_significant": false, + "max_effect_p_value": 0.9401, + "max_effect_raw_n_heuristic": "Low", + "std_dev_across_cats": 5.73, + "repeat_usage_score": 80.4, + "max_effect_gap_details": { + "model": "o1", + "category": "conversation_flow", + "demographic_factor": "Ethnicity", + "score_range": 16.799999999999997, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 70.4, + "max_score": 87.2, + "se_min": 6.8, + "se_max": 4.1, + "effect_size": 3.380285539012137, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.969994341043056, + "level_score_std_dev": 6.439477851503177, + "se_difference": 7.940403012442126, + "z_score": 2.11576162742312, + "p_value": 0.9400724891990658, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": 1.2370960728800782, + "gap_confidence_interval_95_upper": 32.362903927119916, + "raw_n_min_group": 30, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false, + "prioritized_significant_flag": false + } + }, + { + "rank": 6.0, + "model": "deepseek-r1", + "overall_score": 79.2, + "high_level_cat_score": 79.2, + "low_level_cat_score": 81.57, + "max_effect_category": "distinct_personality", + "max_effect_factor": "Ethnicity", + "max_effect_size": 4.094, + "max_effect_gap": 17.7, + "max_effect_concern_flag": false, + "max_effect_significant": false, + "max_effect_p_value": 0.9401, + "max_effect_raw_n_heuristic": "Medium", + "std_dev_across_cats": 4.67, + "repeat_usage_score": 57.0, + "max_effect_gap_details": { + "model": "deepseek-r1", + "category": "distinct_personality", + "demographic_factor": "Ethnicity", + "score_range": 17.700000000000003, + "min_level": "Hispanic", + "max_level": "Asian", + "min_score": 60.8, + "max_score": 78.5, + "se_min": 7.7, + "se_max": 5.7, + "effect_size": 4.094050141519659, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.323347147240846, + "level_score_std_dev": 7.117364329581564, + "se_difference": 9.58018788959799, + "z_score": 1.847562929242585, + "p_value": 0.9400724891990658, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -1.0768232287388457, + "gap_confidence_interval_95_upper": 36.47682322873885, + "raw_n_min_group": 34, + "raw_n_max_group": 40, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false, + "prioritized_significant_flag": false + } + } + ], + "metrics_breakdown": { + "high_level_categories": { + "Adaptiveness": { + "top_performer": { + "model": "gpt-4o", + "score": 90.8 + }, + "model_scores": { + "claude-3.7-sonnet": { + "national_score": 88.2, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.148, + "gap": 4.5, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "18-24", + "max_level": "65+" + } + }, + "deepseek-r1": { + "national_score": 79.8, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.404, + "gap": 5.5, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "18-24", + "max_level": "65+" + } + }, + "gemini-2.0-flash-001": { + "national_score": 89.4, + "max_effect_gap_info": { + "factor": "Education", + "max_effect_size": 1.608, + "gap": 6.3, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "College", + "max_level": "No College" + } + }, + "gpt-4o": { + "national_score": 90.8, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 0.97, + "gap": 3.8, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "18-24", + "max_level": "55-64" + } + }, + "llama-3.1-405b-instruct": { + "national_score": 87.3, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.352, + "gap": 5.3, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "45-54", + "max_level": "65+" + } + }, + "o1": { + "national_score": 84.4, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 1.506, + "gap": 5.9, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Asian", + "max_level": "African American" + } + } + }, + "_internal_category_name": "adaptiveness" + }, + "Background And Culture": { + "top_performer": { + "model": "gemini-2.0-flash-001", + "score": 79.9 + }, + "model_scores": { + "claude-3.7-sonnet": { + "national_score": 77.3, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 3.296, + "gap": 11.4, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "45-54", + "max_level": "65+" + } + }, + "deepseek-r1": { + "national_score": 73.6, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 2.804, + "gap": 9.7, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "18-24", + "max_level": "55-64" + } + }, + "gemini-2.0-flash-001": { + "national_score": 79.9, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 2.457, + "gap": 8.5, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Asian", + "max_level": "African American" + } + }, + "gpt-4o": { + "national_score": 78.8, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.85, + "gap": 6.4, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "18-24", + "max_level": "55-64" + } + }, + "llama-3.1-405b-instruct": { + "national_score": 77.6, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.937, + "gap": 6.7, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "45-54", + "max_level": "65+" + } + }, + "o1": { + "national_score": 72.8, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 2.66, + "gap": 9.2, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Asian", + "max_level": "African American" + } + } + }, + "_internal_category_name": "background_and_culture" + }, + "Communication": { + "top_performer": { + "model": "gpt-4o", + "score": 90.4 + }, + "model_scores": { + "claude-3.7-sonnet": { + "national_score": 88.1, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.534, + "gap": 6.2, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "18-24", + "max_level": "65+" + } + }, + "deepseek-r1": { + "national_score": 80.2, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.905, + "gap": 7.7, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "18-24", + "max_level": "65+" + } + }, + "gemini-2.0-flash-001": { + "national_score": 88.9, + "max_effect_gap_info": { + "factor": "Education", + "max_effect_size": 1.385, + "gap": 5.6, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "College", + "max_level": "No College" + } + }, + "gpt-4o": { + "national_score": 90.4, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.138, + "gap": 4.6, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "45-54", + "max_level": "65+" + } + }, + "llama-3.1-405b-instruct": { + "national_score": 89.7, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.657, + "gap": 6.7, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "18-24", + "max_level": "65+" + } + }, + "o1": { + "national_score": 85.3, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 2.053, + "gap": 8.3, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Hispanic", + "max_level": "African American" + } + } + }, + "_internal_category_name": "communication" + }, + "Helpfulness": { + "top_performer": { + "model": "gpt-4o", + "score": 91.2 + }, + "model_scores": { + "claude-3.7-sonnet": { + "national_score": 90.8, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 1.943, + "gap": 7.0, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Asian", + "max_level": "Hispanic" + } + }, + "deepseek-r1": { + "national_score": 82.0, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.305, + "gap": 4.7, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "18-24", + "max_level": "55-64" + } + }, + "gemini-2.0-flash-001": { + "national_score": 91.0, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.277, + "gap": 4.6, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "18-24", + "max_level": "65+" + } + }, + "gpt-4o": { + "national_score": 91.2, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.416, + "gap": 5.1, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "45-54", + "max_level": "55-64" + } + }, + "llama-3.1-405b-instruct": { + "national_score": 87.7, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 2.276, + "gap": 8.2, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "18-24", + "max_level": "65+" + } + }, + "o1": { + "national_score": 87.9, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 1.693, + "gap": 6.1, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Asian", + "max_level": "African American" + } + } + }, + "_internal_category_name": "helpfulness" + }, + "Personality": { + "top_performer": { + "model": "gpt-4o", + "score": 80.5 + }, + "model_scores": { + "claude-3.7-sonnet": { + "national_score": 78.7, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 1.04, + "gap": 3.5, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "White", + "max_level": "Hispanic" + } + }, + "deepseek-r1": { + "national_score": 71.0, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 1.753, + "gap": 5.9, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Hispanic", + "max_level": "African American" + } + }, + "gemini-2.0-flash-001": { + "national_score": 76.7, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.456, + "gap": 4.9, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "35-44", + "max_level": "55-64" + } + }, + "gpt-4o": { + "national_score": 80.5, + "max_effect_gap_info": { + "factor": "Education", + "max_effect_size": 2.021, + "gap": 6.8, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "College", + "max_level": "No College" + } + }, + "llama-3.1-405b-instruct": { + "national_score": 77.2, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 1.099, + "gap": 3.7, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Hispanic", + "max_level": "African American" + } + }, + "o1": { + "national_score": 75.0, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 1.605, + "gap": 5.4, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Hispanic", + "max_level": "African American" + } + } + }, + "_internal_category_name": "personality" + }, + "Trustworthiness": { + "top_performer": { + "model": "claude-3.7-sonnet", + "score": 91.1 + }, + "model_scores": { + "claude-3.7-sonnet": { + "national_score": 91.1, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 2.054, + "gap": 6.5, + "significant": true, + "p_value": 0.0, + "concern": true, + "min_level": "18-24", + "max_level": "65+" + } + }, + "deepseek-r1": { + "national_score": 83.2, + "max_effect_gap_info": { + "factor": "Education", + "max_effect_size": 2.307, + "gap": 7.3, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "College", + "max_level": "No College" + } + }, + "gemini-2.0-flash-001": { + "national_score": 86.0, + "max_effect_gap_info": { + "factor": "Education", + "max_effect_size": 2.686, + "gap": 8.5, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "College", + "max_level": "No College" + } + }, + "gpt-4o": { + "national_score": 90.2, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.359, + "gap": 4.3, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "18-24", + "max_level": "55-64" + } + }, + "llama-3.1-405b-instruct": { + "national_score": 86.8, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.896, + "gap": 6.0, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "18-24", + "max_level": "65+" + } + }, + "o1": { + "national_score": 86.0, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 2.117, + "gap": 6.7, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "18-24", + "max_level": "35-44" + } + } + }, + "_internal_category_name": "trustworthiness" + }, + "Understanding": { + "top_performer": { + "model": "gpt-4o", + "score": 92.8 + }, + "model_scores": { + "claude-3.7-sonnet": { + "national_score": 90.3, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.82, + "gap": 5.4, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "18-24", + "max_level": "35-44" + } + }, + "deepseek-r1": { + "national_score": 84.6, + "max_effect_gap_info": { + "factor": "Education", + "max_effect_size": 2.089, + "gap": 6.2, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "College", + "max_level": "No College" + } + }, + "gemini-2.0-flash-001": { + "national_score": 89.2, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.651, + "gap": 4.9, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "18-24", + "max_level": "65+" + } + }, + "gpt-4o": { + "national_score": 92.8, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.719, + "gap": 5.1, + "significant": true, + "p_value": 0.0, + "concern": true, + "min_level": "18-24", + "max_level": "25-34" + } + }, + "llama-3.1-405b-instruct": { + "national_score": 88.8, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 2.561, + "gap": 7.6, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "18-24", + "max_level": "55-64" + } + }, + "o1": { + "national_score": 87.7, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.988, + "gap": 5.9, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "25-34", + "max_level": "55-64" + } + } + }, + "_internal_category_name": "understanding" + } + }, + "low_level_metrics": { + "Accuracy": { + "top_performer": { + "model": "llama-3.1-405b-instruct", + "score": 93.5 + }, + "model_scores": { + "claude-3.7-sonnet": { + "national_score": 89.6, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 2.655, + "gap": 7.8, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "45-54", + "max_level": "65+" + } + }, + "deepseek-r1": { + "national_score": 87.2, + "max_effect_gap_info": { + "factor": "Education", + "max_effect_size": 4.152, + "gap": 12.2, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "College", + "max_level": "No College" + } + }, + "gemini-2.0-flash-001": { + "national_score": 90.4, + "max_effect_gap_info": { + "factor": "Urbanicity", + "max_effect_size": 1.259, + "gap": 3.7, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Urban", + "max_level": "Rural" + } + }, + "gpt-4o": { + "national_score": 91.7, + "max_effect_gap_info": { + "factor": "Urbanicity", + "max_effect_size": 1.055, + "gap": 3.1, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Urban", + "max_level": "Rural" + } + }, + "llama-3.1-405b-instruct": { + "national_score": 93.5, + "max_effect_gap_info": { + "factor": "Education", + "max_effect_size": 2.076, + "gap": 6.1, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "College", + "max_level": "No College" + } + }, + "o1": { + "national_score": 87.9, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 2.485, + "gap": 7.3, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "45-54", + "max_level": "35-44" + } + } + }, + "_internal_category_name": "accuracy" + }, + "Bias And Stereotypes": { + "top_performer": { + "model": "claude-3.7-sonnet", + "score": 91.4 + }, + "model_scores": { + "claude-3.7-sonnet": { + "national_score": 91.4, + "max_effect_gap_info": { + "factor": "Education", + "max_effect_size": 2.73, + "gap": 7.6, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "College", + "max_level": "No College" + } + }, + "deepseek-r1": { + "national_score": 86.4, + "max_effect_gap_info": { + "factor": "Education", + "max_effect_size": 3.843, + "gap": 10.7, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "College", + "max_level": "No College" + } + }, + "gemini-2.0-flash-001": { + "national_score": 87.6, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 2.119, + "gap": 5.9, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "18-24", + "max_level": "55-64" + } + }, + "gpt-4o": { + "national_score": 88.5, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 2.766, + "gap": 7.7, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "18-24", + "max_level": "35-44" + } + }, + "llama-3.1-405b-instruct": { + "national_score": 86.8, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 2.802, + "gap": 7.8, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "18-24", + "max_level": "65+" + } + }, + "o1": { + "national_score": 87.8, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 3.053, + "gap": 8.5, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "18-24", + "max_level": "35-44" + } + } + }, + "_internal_category_name": "bias_and_stereotypes" + }, + "Clarity": { + "top_performer": { + "model": "claude-3.7-sonnet", + "score": 88.2 + }, + "model_scores": { + "claude-3.7-sonnet": { + "national_score": 88.2, + "max_effect_gap_info": { + "factor": "Education", + "max_effect_size": 1.838, + "gap": 8.3, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "College", + "max_level": "No College" + } + }, + "deepseek-r1": { + "national_score": 76.3, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 2.126, + "gap": 9.6, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Hispanic", + "max_level": "Asian" + } + }, + "gemini-2.0-flash-001": { + "national_score": 85.5, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.262, + "gap": 5.7, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "35-44", + "max_level": "65+" + } + }, + "gpt-4o": { + "national_score": 86.7, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.617, + "gap": 7.3, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "45-54", + "max_level": "65+" + } + }, + "llama-3.1-405b-instruct": { + "national_score": 84.6, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 0.819, + "gap": 3.7, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "45-54", + "max_level": "55-64" + } + }, + "o1": { + "national_score": 82.2, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 2.148, + "gap": 9.7, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Hispanic", + "max_level": "African American" + } + } + }, + "_internal_category_name": "clarity" + }, + "Comprehensiveness": { + "top_performer": { + "model": "gpt-4o", + "score": 91.4 + }, + "model_scores": { + "claude-3.7-sonnet": { + "national_score": 90.6, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 2.512, + "gap": 9.0, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "45-54", + "max_level": "65+" + } + }, + "deepseek-r1": { + "national_score": 82.6, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 2.736, + "gap": 9.8, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "18-24", + "max_level": "65+" + } + }, + "gemini-2.0-flash-001": { + "national_score": 88.4, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.507, + "gap": 5.4, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "25-34", + "max_level": "65+" + } + }, + "gpt-4o": { + "national_score": 91.4, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.563, + "gap": 5.6, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "45-54", + "max_level": "55-64" + } + }, + "llama-3.1-405b-instruct": { + "national_score": 90.2, + "max_effect_gap_info": { + "factor": "Education", + "max_effect_size": 1.898, + "gap": 6.8, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "College", + "max_level": "No College" + } + }, + "o1": { + "national_score": 88.0, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 1.48, + "gap": 5.3, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Hispanic", + "max_level": "African American" + } + } + }, + "_internal_category_name": "comprehensiveness" + }, + "Confidence": { + "top_performer": { + "model": "claude-3.7-sonnet", + "score": 91.6 + }, + "model_scores": { + "claude-3.7-sonnet": { + "national_score": 91.6, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 2.007, + "gap": 6.6, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "18-24", + "max_level": "65+" + } + }, + "deepseek-r1": { + "national_score": 83.6, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 2.859, + "gap": 9.4, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Hispanic", + "max_level": "Asian" + } + }, + "gemini-2.0-flash-001": { + "national_score": 91.5, + "max_effect_gap_info": { + "factor": "Education", + "max_effect_size": 1.855, + "gap": 6.1, + "significant": true, + "p_value": 0.0, + "concern": true, + "min_level": "College", + "max_level": "No College" + } + }, + "gpt-4o": { + "national_score": 89.4, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.216, + "gap": 4.0, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "45-54", + "max_level": "35-44" + } + }, + "llama-3.1-405b-instruct": { + "national_score": 87.9, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.977, + "gap": 6.5, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "25-34", + "max_level": "65+" + } + }, + "o1": { + "national_score": 87.9, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 3.102, + "gap": 10.2, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Hispanic", + "max_level": "African American" + } + } + }, + "_internal_category_name": "confidence" + }, + "Consistency": { + "top_performer": { + "model": "claude-3.7-sonnet", + "score": 93.4 + }, + "model_scores": { + "claude-3.7-sonnet": { + "national_score": 93.4, + "max_effect_gap_info": { + "factor": "Education", + "max_effect_size": 1.512, + "gap": 4.5, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "College", + "max_level": "No College" + } + }, + "deepseek-r1": { + "national_score": 87.3, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 2.654, + "gap": 7.9, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "18-24", + "max_level": "65+" + } + }, + "gemini-2.0-flash-001": { + "national_score": 92.5, + "max_effect_gap_info": { + "factor": "Education", + "max_effect_size": 1.646, + "gap": 4.9, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "College", + "max_level": "No College" + } + }, + "gpt-4o": { + "national_score": 92.1, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.78, + "gap": 5.3, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "45-54", + "max_level": "55-64" + } + }, + "llama-3.1-405b-instruct": { + "national_score": 92.9, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.579, + "gap": 4.7, + "significant": true, + "p_value": 0.0, + "concern": true, + "min_level": "25-34", + "max_level": "65+" + } + }, + "o1": { + "national_score": 88.5, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.78, + "gap": 5.3, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "25-34", + "max_level": "35-44" + } + } + }, + "_internal_category_name": "consistency" + }, + "Context Memory": { + "top_performer": { + "model": "gpt-4o", + "score": 95.1 + }, + "model_scores": { + "claude-3.7-sonnet": { + "national_score": 92.4, + "max_effect_gap_info": { + "factor": "Education", + "max_effect_size": 1.593, + "gap": 5.4, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "College", + "max_level": "No College" + } + }, + "deepseek-r1": { + "national_score": 86.0, + "max_effect_gap_info": { + "factor": "Education", + "max_effect_size": 2.361, + "gap": 8.0, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "College", + "max_level": "No College" + } + }, + "gemini-2.0-flash-001": { + "national_score": 93.6, + "max_effect_gap_info": { + "factor": "Education", + "max_effect_size": 1.239, + "gap": 4.2, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "College", + "max_level": "No College" + } + }, + "gpt-4o": { + "national_score": 95.1, + "max_effect_gap_info": { + "factor": "Education", + "max_effect_size": 2.125, + "gap": 7.2, + "significant": false, + "p_value": 0.4082, + "concern": false, + "min_level": "College", + "max_level": "No College" + } + }, + "llama-3.1-405b-instruct": { + "national_score": 91.6, + "max_effect_gap_info": { + "factor": "Education", + "max_effect_size": 2.007, + "gap": 6.8, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "College", + "max_level": "No College" + } + }, + "o1": { + "national_score": 91.9, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 2.154, + "gap": 7.3, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Hispanic", + "max_level": "African American" + } + } + }, + "_internal_category_name": "context_memory" + }, + "Conversation Building": { + "top_performer": { + "model": "gpt-4o", + "score": 93.1 + }, + "model_scores": { + "claude-3.7-sonnet": { + "national_score": 91.7, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.757, + "gap": 7.6, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "45-54", + "max_level": "65+" + } + }, + "deepseek-r1": { + "national_score": 83.8, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 3.236, + "gap": 14.0, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "18-24", + "max_level": "65+" + } + }, + "gemini-2.0-flash-001": { + "national_score": 85.0, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.572, + "gap": 6.8, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "45-54", + "max_level": "65+" + } + }, + "gpt-4o": { + "national_score": 93.1, + "max_effect_gap_info": { + "factor": "Education", + "max_effect_size": 1.78, + "gap": 7.7, + "significant": false, + "p_value": 0.8244, + "concern": false, + "min_level": "College", + "max_level": "No College" + } + }, + "llama-3.1-405b-instruct": { + "national_score": 87.4, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 2.335, + "gap": 10.1, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Asian", + "max_level": "Hispanic" + } + }, + "o1": { + "national_score": 85.0, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 1.965, + "gap": 8.5, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Hispanic", + "max_level": "African American" + } + } + }, + "_internal_category_name": "conversation_building" + }, + "Conversation Flow": { + "top_performer": { + "model": "claude-3.7-sonnet", + "score": 89.1 + }, + "model_scores": { + "claude-3.7-sonnet": { + "national_score": 89.1, + "max_effect_gap_info": { + "factor": "Education", + "max_effect_size": 1.429, + "gap": 7.1, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "College", + "max_level": "No College" + } + }, + "deepseek-r1": { + "national_score": 76.3, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 2.314, + "gap": 11.5, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "18-24", + "max_level": "55-64" + } + }, + "gemini-2.0-flash-001": { + "national_score": 84.7, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 1.59, + "gap": 7.9, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Hispanic", + "max_level": "African American" + } + }, + "gpt-4o": { + "national_score": 88.5, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.549, + "gap": 7.7, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "18-24", + "max_level": "65+" + } + }, + "llama-3.1-405b-instruct": { + "national_score": 85.6, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.388, + "gap": 6.9, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "18-24", + "max_level": "55-64" + } + }, + "o1": { + "national_score": 83.0, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 3.38, + "gap": 16.8, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Hispanic", + "max_level": "African American" + } + } + }, + "_internal_category_name": "conversation_flow" + }, + "Cultural Awareness": { + "top_performer": { + "model": "claude-3.7-sonnet", + "score": 78.2 + }, + "model_scores": { + "claude-3.7-sonnet": { + "national_score": 78.2, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 2.127, + "gap": 7.9, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "45-54", + "max_level": "65+" + } + }, + "deepseek-r1": { + "national_score": 71.7, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 3.258, + "gap": 12.1, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Hispanic", + "max_level": "African American" + } + }, + "gemini-2.0-flash-001": { + "national_score": 75.3, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 2.666, + "gap": 9.9, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Asian", + "max_level": "African American" + } + }, + "gpt-4o": { + "national_score": 77.1, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 1.804, + "gap": 6.7, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Hispanic", + "max_level": "African American" + } + }, + "llama-3.1-405b-instruct": { + "national_score": 74.3, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.858, + "gap": 6.9, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "45-54", + "max_level": "55-64" + } + }, + "o1": { + "national_score": 72.1, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 3.985, + "gap": 14.8, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Hispanic", + "max_level": "African American" + } + } + }, + "_internal_category_name": "cultural_awareness" + }, + "Detail And Technical Language": { + "top_performer": { + "model": "gpt-4o", + "score": 91.2 + }, + "model_scores": { + "claude-3.7-sonnet": { + "national_score": 87.8, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.854, + "gap": 5.8, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "45-54", + "max_level": "35-44" + } + }, + "deepseek-r1": { + "national_score": 86.6, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 3.164, + "gap": 9.9, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "45-54", + "max_level": "55-64" + } + }, + "gemini-2.0-flash-001": { + "national_score": 91.1, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.758, + "gap": 5.5, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "45-54", + "max_level": "65+" + } + }, + "gpt-4o": { + "national_score": 91.2, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 1.534, + "gap": 4.8, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Asian", + "max_level": "Hispanic" + } + }, + "llama-3.1-405b-instruct": { + "national_score": 90.0, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 2.525, + "gap": 7.9, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Asian", + "max_level": "African American" + } + }, + "o1": { + "national_score": 86.9, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 4.346, + "gap": 13.6, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Hispanic", + "max_level": "African American" + } + } + }, + "_internal_category_name": "detail_and_technical_language" + }, + "Distinct Personality": { + "top_performer": { + "model": "gpt-4o", + "score": 81.5 + }, + "model_scores": { + "claude-3.7-sonnet": { + "national_score": 78.3, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 2.105, + "gap": 9.1, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "35-44", + "max_level": "65+" + } + }, + "deepseek-r1": { + "national_score": 72.7, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 4.094, + "gap": 17.7, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Hispanic", + "max_level": "Asian" + } + }, + "gemini-2.0-flash-001": { + "national_score": 76.8, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 1.55, + "gap": 6.7, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "White", + "max_level": "African American" + } + }, + "gpt-4o": { + "national_score": 81.5, + "max_effect_gap_info": { + "factor": "Politics", + "max_effect_size": 1.503, + "gap": 6.5, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Independent", + "max_level": "Republican" + } + }, + "llama-3.1-405b-instruct": { + "national_score": 75.9, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 1.411, + "gap": 6.1, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "White", + "max_level": "African American" + } + }, + "o1": { + "national_score": 72.9, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 3.099, + "gap": 13.4, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Hispanic", + "max_level": "African American" + } + } + }, + "_internal_category_name": "distinct_personality" + }, + "Effectiveness": { + "top_performer": { + "model": "claude-3.7-sonnet", + "score": 94.2 + }, + "model_scores": { + "claude-3.7-sonnet": { + "national_score": 94.2, + "max_effect_gap_info": { + "factor": "Education", + "max_effect_size": 1.237, + "gap": 4.9, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "College", + "max_level": "No College" + } + }, + "deepseek-r1": { + "national_score": 84.0, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 2.879, + "gap": 11.4, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "45-54", + "max_level": "65+" + } + }, + "gemini-2.0-flash-001": { + "national_score": 92.3, + "max_effect_gap_info": { + "factor": "Education", + "max_effect_size": 1.288, + "gap": 5.1, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "College", + "max_level": "No College" + } + }, + "gpt-4o": { + "national_score": 92.7, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.111, + "gap": 4.4, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "45-54", + "max_level": "55-64" + } + }, + "llama-3.1-405b-instruct": { + "national_score": 90.8, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.944, + "gap": 7.7, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "18-24", + "max_level": "65+" + } + }, + "o1": { + "national_score": 87.9, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 1.086, + "gap": 4.3, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Asian", + "max_level": "African American" + } + } + }, + "_internal_category_name": "effectiveness" + }, + "Ethical Alignment": { + "top_performer": { + "model": "claude-3.7-sonnet", + "score": 80.6 + }, + "model_scores": { + "claude-3.7-sonnet": { + "national_score": 80.6, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 2.979, + "gap": 11.2, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "45-54", + "max_level": "65+" + } + }, + "deepseek-r1": { + "national_score": 74.4, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 3.83, + "gap": 14.4, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Hispanic", + "max_level": "African American" + } + }, + "gemini-2.0-flash-001": { + "national_score": 77.9, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 2.5, + "gap": 9.4, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Asian", + "max_level": "African American" + } + }, + "gpt-4o": { + "national_score": 80.4, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 3.059, + "gap": 11.5, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "18-24", + "max_level": "65+" + } + }, + "llama-3.1-405b-instruct": { + "national_score": 76.9, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 2.341, + "gap": 8.8, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "45-54", + "max_level": "55-64" + } + }, + "o1": { + "national_score": 74.9, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 4.176, + "gap": 15.7, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Hispanic", + "max_level": "African American" + } + } + }, + "_internal_category_name": "ethical_alignment" + }, + "Flexibility": { + "top_performer": { + "model": "claude-3.7-sonnet", + "score": 94.0 + }, + "model_scores": { + "claude-3.7-sonnet": { + "national_score": 94.0, + "max_effect_gap_info": { + "factor": "Education", + "max_effect_size": 1.347, + "gap": 4.6, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "College", + "max_level": "No College" + } + }, + "deepseek-r1": { + "national_score": 87.2, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 3.806, + "gap": 13.0, + "significant": false, + "p_value": 0.8244, + "concern": false, + "min_level": "45-54", + "max_level": "65+" + } + }, + "gemini-2.0-flash-001": { + "national_score": 88.4, + "max_effect_gap_info": { + "factor": "Education", + "max_effect_size": 1.932, + "gap": 6.6, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "College", + "max_level": "No College" + } + }, + "gpt-4o": { + "national_score": 92.7, + "max_effect_gap_info": { + "factor": "Education", + "max_effect_size": 1.64, + "gap": 5.6, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "College", + "max_level": "No College" + } + }, + "llama-3.1-405b-instruct": { + "national_score": 90.5, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 2.167, + "gap": 7.4, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "18-24", + "max_level": "65+" + } + }, + "o1": { + "national_score": 88.5, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 3.045, + "gap": 10.4, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Hispanic", + "max_level": "African American" + } + } + }, + "_internal_category_name": "flexibility" + }, + "Honesty Empathy Fairness": { + "top_performer": { + "model": "claude-3.7-sonnet", + "score": 89.0 + }, + "model_scores": { + "claude-3.7-sonnet": { + "national_score": 89.0, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.71, + "gap": 6.5, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "45-54", + "max_level": "65+" + } + }, + "deepseek-r1": { + "national_score": 82.5, + "max_effect_gap_info": { + "factor": "Education", + "max_effect_size": 2.683, + "gap": 10.2, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "College", + "max_level": "No College" + } + }, + "gemini-2.0-flash-001": { + "national_score": 81.0, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 2.893, + "gap": 11.0, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Hispanic", + "max_level": "African American" + } + }, + "gpt-4o": { + "national_score": 87.1, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 1.447, + "gap": 5.5, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Hispanic", + "max_level": "Asian" + } + }, + "llama-3.1-405b-instruct": { + "national_score": 87.9, + "max_effect_gap_info": { + "factor": "Education", + "max_effect_size": 1.92, + "gap": 7.3, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "College", + "max_level": "No College" + } + }, + "o1": { + "national_score": 84.7, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 3.525, + "gap": 13.4, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Hispanic", + "max_level": "African American" + } + } + }, + "_internal_category_name": "honesty_empathy_fairness" + }, + "Intuitiveness": { + "top_performer": { + "model": "llama-3.1-405b-instruct", + "score": 88.2 + }, + "model_scores": { + "claude-3.7-sonnet": { + "national_score": 86.7, + "max_effect_gap_info": { + "factor": "Urbanicity", + "max_effect_size": 2.356, + "gap": 7.3, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Urban", + "max_level": "Rural" + } + }, + "deepseek-r1": { + "national_score": 84.1, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 4.874, + "gap": 15.1, + "significant": false, + "p_value": 0.8244, + "concern": false, + "min_level": "45-54", + "max_level": "65+" + } + }, + "gemini-2.0-flash-001": { + "national_score": 87.2, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 2.033, + "gap": 6.3, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "45-54", + "max_level": "65+" + } + }, + "gpt-4o": { + "national_score": 87.8, + "max_effect_gap_info": { + "factor": "Education", + "max_effect_size": 1.356, + "gap": 4.2, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "College", + "max_level": "No College" + } + }, + "llama-3.1-405b-instruct": { + "national_score": 88.2, + "max_effect_gap_info": { + "factor": "Education", + "max_effect_size": 2.066, + "gap": 6.4, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "College", + "max_level": "No College" + } + }, + "o1": { + "national_score": 83.8, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 4.454, + "gap": 13.8, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Hispanic", + "max_level": "African American" + } + } + }, + "_internal_category_name": "intuitiveness" + }, + "Personality Consistency": { + "top_performer": { + "model": "gpt-4o", + "score": 87.9 + }, + "model_scores": { + "claude-3.7-sonnet": { + "national_score": 83.5, + "max_effect_gap_info": { + "factor": "Education", + "max_effect_size": 1.911, + "gap": 6.1, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "College", + "max_level": "No College" + } + }, + "deepseek-r1": { + "national_score": 81.3, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 2.663, + "gap": 8.5, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Hispanic", + "max_level": "Asian" + } + }, + "gemini-2.0-flash-001": { + "national_score": 87.7, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 1.535, + "gap": 4.9, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Hispanic", + "max_level": "African American" + } + }, + "gpt-4o": { + "national_score": 87.9, + "max_effect_gap_info": { + "factor": "Education", + "max_effect_size": 1.002, + "gap": 3.2, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "College", + "max_level": "No College" + } + }, + "llama-3.1-405b-instruct": { + "national_score": 87.0, + "max_effect_gap_info": { + "factor": "Education", + "max_effect_size": 1.911, + "gap": 6.1, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "College", + "max_level": "No College" + } + }, + "o1": { + "national_score": 85.2, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 2.913, + "gap": 9.3, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Hispanic", + "max_level": "African American" + } + } + }, + "_internal_category_name": "personality_consistency" + }, + "Tone And Language Style": { + "top_performer": { + "model": "llama-3.1-405b-instruct", + "score": 90.3 + }, + "model_scores": { + "claude-3.7-sonnet": { + "national_score": 87.7, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.856, + "gap": 6.2, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "45-54", + "max_level": "65+" + } + }, + "deepseek-r1": { + "national_score": 83.1, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 4.012, + "gap": 13.4, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "18-24", + "max_level": "65+" + } + }, + "gemini-2.0-flash-001": { + "national_score": 85.5, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.886, + "gap": 6.3, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "18-24", + "max_level": "65+" + } + }, + "gpt-4o": { + "national_score": 84.5, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 2.515, + "gap": 8.4, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "45-54", + "max_level": "55-64" + } + }, + "llama-3.1-405b-instruct": { + "national_score": 90.3, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 2.725, + "gap": 9.1, + "significant": true, + "p_value": 0.0, + "concern": true, + "min_level": "45-54", + "max_level": "65+" + } + }, + "o1": { + "national_score": 86.9, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 2.964, + "gap": 9.9, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Hispanic", + "max_level": "Asian" + } + } + }, + "_internal_category_name": "tone_and_language_style" + }, + "Transparency": { + "top_performer": { + "model": "llama-3.1-405b-instruct", + "score": 79.5 + }, + "model_scores": { + "claude-3.7-sonnet": { + "national_score": 78.4, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 1.684, + "gap": 7.6, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Hispanic", + "max_level": "African American" + } + }, + "deepseek-r1": { + "national_score": 70.6, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 3.124, + "gap": 14.1, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Hispanic", + "max_level": "African American" + } + }, + "gemini-2.0-flash-001": { + "national_score": 76.1, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 1.728, + "gap": 7.8, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "White", + "max_level": "African American" + } + }, + "gpt-4o": { + "national_score": 73.4, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 2.548, + "gap": 11.5, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Hispanic", + "max_level": "African American" + } + }, + "llama-3.1-405b-instruct": { + "national_score": 79.5, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 2.171, + "gap": 9.8, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Hispanic", + "max_level": "African American" + } + }, + "o1": { + "national_score": 71.9, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 2.636, + "gap": 11.9, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "45-54", + "max_level": "55-64" + } + } + }, + "_internal_category_name": "transparency" + }, + "Usefulness": { + "top_performer": { + "model": "gpt-4o", + "score": 92.9 + }, + "model_scores": { + "claude-3.7-sonnet": { + "national_score": 90.9, + "max_effect_gap_info": { + "factor": "Education", + "max_effect_size": 1.904, + "gap": 6.4, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "College", + "max_level": "No College" + } + }, + "deepseek-r1": { + "national_score": 85.2, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 3.183, + "gap": 10.7, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Hispanic", + "max_level": "Asian" + } + }, + "gemini-2.0-flash-001": { + "national_score": 91.3, + "max_effect_gap_info": { + "factor": "Education", + "max_effect_size": 1.517, + "gap": 5.1, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "College", + "max_level": "No College" + } + }, + "gpt-4o": { + "national_score": 92.9, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 1.488, + "gap": 5.0, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "45-54", + "max_level": "55-64" + } + }, + "llama-3.1-405b-instruct": { + "national_score": 90.6, + "max_effect_gap_info": { + "factor": "Age", + "max_effect_size": 2.291, + "gap": 7.7, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "45-54", + "max_level": "65+" + } + }, + "o1": { + "national_score": 87.4, + "max_effect_gap_info": { + "factor": "Ethnicity", + "max_effect_size": 2.261, + "gap": 7.6, + "significant": false, + "p_value": 0.9401, + "concern": false, + "min_level": "Hispanic", + "max_level": "African American" + } + } + }, + "_internal_category_name": "usefulness" + } + } + }, + "task_level_performance": { + "Creating a Travel Itinerary": { + "claude-3.7-sonnet": { + "helpfulness": 91.97, + "effectiveness": 92.59, + "comprehensiveness": 92.28, + "usefulness": 91.36, + "communication": 89.92, + "tone_and_language_style": 88.58, + "conversation_flow": 82.72, + "detail_and_technical_language": 89.2, + "understanding": 89.3, + "accuracy": 93.52, + "context_memory": 91.98, + "intuitiveness": 85.49, + "adaptiveness": 88.89, + "flexibility": 90.43, + "clarity": 83.33, + "conversation_building": 91.05, + "trustworthiness": 88.68, + "consistency": 92.9, + "confidence": 87.65, + "transparency": 77.47, + "personality": 75.72, + "personality_consistency": 84.26, + "distinct_personality": 71.6, + "honesty_empathy_fairness": 83.02, + "background_and_culture": 77.78, + "ethical_alignment": 79.63, + "cultural_awareness": 77.16, + "bias_and_stereotypes": 88.27 + }, + "deepseek-r1": { + "helpfulness": 68.26, + "effectiveness": 67.81, + "comprehensiveness": 66.78, + "usefulness": 71.92, + "communication": 65.52, + "tone_and_language_style": 70.89, + "conversation_flow": 64.73, + "detail_and_technical_language": 72.95, + "understanding": 70.55, + "accuracy": 68.15, + "context_memory": 67.81, + "intuitiveness": 63.36, + "adaptiveness": 66.21, + "flexibility": 69.86, + "clarity": 60.27, + "conversation_building": 65.41, + "trustworthiness": 68.72, + "consistency": 70.21, + "confidence": 69.52, + "transparency": 53.77, + "personality": 60.27, + "personality_consistency": 67.12, + "distinct_personality": 61.64, + "honesty_empathy_fairness": 67.47, + "background_and_culture": 65.75, + "ethical_alignment": 66.1, + "cultural_awareness": 64.04, + "bias_and_stereotypes": 70.21 + }, + "gemini-2.0-flash-001": { + "helpfulness": 89.81, + "effectiveness": 89.58, + "comprehensiveness": 86.46, + "usefulness": 85.76, + "communication": 87.5, + "tone_and_language_style": 88.19, + "conversation_flow": 81.6, + "detail_and_technical_language": 85.76, + "understanding": 87.5, + "accuracy": 87.5, + "context_memory": 87.5, + "intuitiveness": 84.38, + "adaptiveness": 88.19, + "flexibility": 88.19, + "clarity": 80.56, + "conversation_building": 86.46, + "trustworthiness": 83.8, + "consistency": 87.5, + "confidence": 83.68, + "transparency": 74.65, + "personality": 80.79, + "personality_consistency": 84.72, + "distinct_personality": 75.0, + "honesty_empathy_fairness": 81.25, + "background_and_culture": 74.31, + "ethical_alignment": 73.26, + "cultural_awareness": 69.1, + "bias_and_stereotypes": 81.94 + }, + "gpt-4o": { + "helpfulness": 94.75, + "effectiveness": 93.75, + "comprehensiveness": 92.66, + "usefulness": 94.57, + "communication": 92.03, + "tone_and_language_style": 89.67, + "conversation_flow": 88.04, + "detail_and_technical_language": 91.58, + "understanding": 92.57, + "accuracy": 91.85, + "context_memory": 93.75, + "intuitiveness": 88.59, + "adaptiveness": 92.03, + "flexibility": 91.3, + "clarity": 84.24, + "conversation_building": 92.39, + "trustworthiness": 90.58, + "consistency": 93.21, + "confidence": 88.86, + "transparency": 72.28, + "personality": 78.26, + "personality_consistency": 85.87, + "distinct_personality": 76.9, + "honesty_empathy_fairness": 87.5, + "background_and_culture": 78.44, + "ethical_alignment": 76.9, + "cultural_awareness": 75.82, + "bias_and_stereotypes": 86.68 + }, + "llama-3.1-405b-instruct": { + "helpfulness": 90.76, + "effectiveness": 89.85, + "comprehensiveness": 88.61, + "usefulness": 89.6, + "communication": 89.93, + "tone_and_language_style": 88.37, + "conversation_flow": 84.65, + "detail_and_technical_language": 89.11, + "understanding": 90.1, + "accuracy": 91.34, + "context_memory": 88.61, + "intuitiveness": 86.88, + "adaptiveness": 88.12, + "flexibility": 88.37, + "clarity": 85.64, + "conversation_building": 87.87, + "trustworthiness": 87.29, + "consistency": 89.11, + "confidence": 86.39, + "transparency": 75.5, + "personality": 80.86, + "personality_consistency": 82.67, + "distinct_personality": 76.49, + "honesty_empathy_fairness": 84.9, + "background_and_culture": 77.56, + "ethical_alignment": 78.47, + "cultural_awareness": 75.99, + "bias_and_stereotypes": 83.66 + }, + "o1": { + "helpfulness": 89.39, + "effectiveness": 88.64, + "comprehensiveness": 86.36, + "usefulness": 89.55, + "communication": 86.97, + "tone_and_language_style": 85.0, + "conversation_flow": 82.73, + "detail_and_technical_language": 85.45, + "understanding": 88.79, + "accuracy": 85.0, + "context_memory": 89.09, + "intuitiveness": 83.64, + "adaptiveness": 84.85, + "flexibility": 87.73, + "clarity": 80.0, + "conversation_building": 85.0, + "trustworthiness": 83.33, + "consistency": 85.91, + "confidence": 86.82, + "transparency": 73.18, + "personality": 76.67, + "personality_consistency": 85.0, + "distinct_personality": 74.55, + "honesty_empathy_fairness": 82.73, + "background_and_culture": 75.45, + "ethical_alignment": 78.18, + "cultural_awareness": 75.91, + "bias_and_stereotypes": 81.36 + } + }, + "Following Up on a Job Application": { + "claude-3.7-sonnet": { + "helpfulness": 91.39, + "effectiveness": 90.38, + "comprehensiveness": 89.29, + "usefulness": 88.19, + "communication": 89.56, + "tone_and_language_style": 89.29, + "conversation_flow": 81.87, + "detail_and_technical_language": 87.09, + "understanding": 89.93, + "accuracy": 91.21, + "context_memory": 92.03, + "intuitiveness": 85.71, + "adaptiveness": 88.83, + "flexibility": 91.21, + "clarity": 81.59, + "conversation_building": 87.09, + "trustworthiness": 89.19, + "consistency": 89.84, + "confidence": 88.46, + "transparency": 72.8, + "personality": 73.99, + "personality_consistency": 86.54, + "distinct_personality": 71.15, + "honesty_empathy_fairness": 84.34, + "background_and_culture": 75.09, + "ethical_alignment": 75.55, + "cultural_awareness": 71.7, + "bias_and_stereotypes": 86.26 + }, + "deepseek-r1": { + "helpfulness": 84.69, + "effectiveness": 83.43, + "comprehensiveness": 85.76, + "usefulness": 84.01, + "communication": 82.56, + "tone_and_language_style": 80.23, + "conversation_flow": 80.52, + "detail_and_technical_language": 85.47, + "understanding": 85.08, + "accuracy": 85.47, + "context_memory": 85.76, + "intuitiveness": 80.52, + "adaptiveness": 82.75, + "flexibility": 83.14, + "clarity": 77.33, + "conversation_building": 86.34, + "trustworthiness": 84.3, + "consistency": 85.76, + "confidence": 83.72, + "transparency": 69.77, + "personality": 72.48, + "personality_consistency": 83.72, + "distinct_personality": 73.26, + "honesty_empathy_fairness": 80.81, + "background_and_culture": 75.19, + "ethical_alignment": 75.29, + "cultural_awareness": 70.93, + "bias_and_stereotypes": 83.14 + }, + "gemini-2.0-flash-001": { + "helpfulness": 86.24, + "effectiveness": 86.05, + "comprehensiveness": 86.34, + "usefulness": 86.92, + "communication": 86.63, + "tone_and_language_style": 83.43, + "conversation_flow": 80.52, + "detail_and_technical_language": 85.76, + "understanding": 87.01, + "accuracy": 86.92, + "context_memory": 89.83, + "intuitiveness": 83.14, + "adaptiveness": 86.24, + "flexibility": 86.63, + "clarity": 79.07, + "conversation_building": 85.17, + "trustworthiness": 83.91, + "consistency": 88.66, + "confidence": 84.88, + "transparency": 74.13, + "personality": 74.61, + "personality_consistency": 83.43, + "distinct_personality": 72.97, + "honesty_empathy_fairness": 80.81, + "background_and_culture": 71.9, + "ethical_alignment": 74.71, + "cultural_awareness": 73.26, + "bias_and_stereotypes": 81.98 + }, + "gpt-4o": { + "helpfulness": 91.29, + "effectiveness": 93.47, + "comprehensiveness": 92.61, + "usefulness": 88.92, + "communication": 90.72, + "tone_and_language_style": 89.77, + "conversation_flow": 87.22, + "detail_and_technical_language": 88.07, + "understanding": 91.67, + "accuracy": 90.91, + "context_memory": 92.61, + "intuitiveness": 87.22, + "adaptiveness": 90.15, + "flexibility": 92.05, + "clarity": 84.94, + "conversation_building": 90.62, + "trustworthiness": 89.39, + "consistency": 92.05, + "confidence": 89.49, + "transparency": 75.85, + "personality": 80.87, + "personality_consistency": 87.78, + "distinct_personality": 79.83, + "honesty_empathy_fairness": 87.22, + "background_and_culture": 78.22, + "ethical_alignment": 77.84, + "cultural_awareness": 77.27, + "bias_and_stereotypes": 85.23 + }, + "llama-3.1-405b-instruct": { + "helpfulness": 89.17, + "effectiveness": 89.43, + "comprehensiveness": 88.66, + "usefulness": 87.89, + "communication": 87.97, + "tone_and_language_style": 87.63, + "conversation_flow": 84.02, + "detail_and_technical_language": 88.14, + "understanding": 88.32, + "accuracy": 88.66, + "context_memory": 89.69, + "intuitiveness": 80.93, + "adaptiveness": 86.77, + "flexibility": 88.4, + "clarity": 81.19, + "conversation_building": 87.11, + "trustworthiness": 85.91, + "consistency": 88.4, + "confidence": 86.6, + "transparency": 72.94, + "personality": 75.77, + "personality_consistency": 81.96, + "distinct_personality": 74.23, + "honesty_empathy_fairness": 84.54, + "background_and_culture": 74.74, + "ethical_alignment": 75.0, + "cultural_awareness": 71.65, + "bias_and_stereotypes": 84.28 + }, + "o1": { + "helpfulness": 88.31, + "effectiveness": 89.18, + "comprehensiveness": 85.45, + "usefulness": 83.58, + "communication": 85.82, + "tone_and_language_style": 82.84, + "conversation_flow": 75.0, + "detail_and_technical_language": 85.07, + "understanding": 84.83, + "accuracy": 86.94, + "context_memory": 87.69, + "intuitiveness": 79.85, + "adaptiveness": 85.07, + "flexibility": 86.94, + "clarity": 72.76, + "conversation_building": 81.34, + "trustworthiness": 83.33, + "consistency": 85.82, + "confidence": 84.7, + "transparency": 65.3, + "personality": 66.92, + "personality_consistency": 76.87, + "distinct_personality": 64.18, + "honesty_empathy_fairness": 76.87, + "background_and_culture": 70.4, + "ethical_alignment": 69.4, + "cultural_awareness": 64.55, + "bias_and_stereotypes": 79.1 + } + }, + "Generating a Creative Idea": { + "claude-3.7-sonnet": { + "helpfulness": 89.24, + "effectiveness": 88.92, + "comprehensiveness": 88.29, + "usefulness": 89.24, + "communication": 88.61, + "tone_and_language_style": 87.03, + "conversation_flow": 82.59, + "detail_and_technical_language": 88.92, + "understanding": 89.03, + "accuracy": 91.77, + "context_memory": 89.87, + "intuitiveness": 80.7, + "adaptiveness": 88.18, + "flexibility": 89.24, + "clarity": 78.16, + "conversation_building": 87.03, + "trustworthiness": 85.65, + "consistency": 90.19, + "confidence": 84.81, + "transparency": 69.94, + "personality": 76.37, + "personality_consistency": 83.23, + "distinct_personality": 73.73, + "honesty_empathy_fairness": 81.33, + "background_and_culture": 77.85, + "ethical_alignment": 76.27, + "cultural_awareness": 74.05, + "bias_and_stereotypes": 83.86 + }, + "deepseek-r1": { + "helpfulness": 68.79, + "effectiveness": 68.88, + "comprehensiveness": 73.94, + "usefulness": 74.2, + "communication": 66.31, + "tone_and_language_style": 74.47, + "conversation_flow": 71.54, + "detail_and_technical_language": 78.19, + "understanding": 73.23, + "accuracy": 70.21, + "context_memory": 73.94, + "intuitiveness": 70.74, + "adaptiveness": 69.5, + "flexibility": 74.2, + "clarity": 68.88, + "conversation_building": 75.0, + "trustworthiness": 71.1, + "consistency": 76.06, + "confidence": 72.07, + "transparency": 61.97, + "personality": 65.6, + "personality_consistency": 72.61, + "distinct_personality": 63.83, + "honesty_empathy_fairness": 74.2, + "background_and_culture": 63.3, + "ethical_alignment": 66.49, + "cultural_awareness": 62.23, + "bias_and_stereotypes": 77.13 + }, + "gemini-2.0-flash-001": { + "helpfulness": 91.23, + "effectiveness": 90.53, + "comprehensiveness": 90.79, + "usefulness": 89.21, + "communication": 88.95, + "tone_and_language_style": 88.68, + "conversation_flow": 83.16, + "detail_and_technical_language": 89.21, + "understanding": 90.35, + "accuracy": 89.47, + "context_memory": 89.74, + "intuitiveness": 85.79, + "adaptiveness": 90.17, + "flexibility": 90.26, + "clarity": 80.79, + "conversation_building": 88.42, + "trustworthiness": 87.19, + "consistency": 90.79, + "confidence": 87.11, + "transparency": 75.53, + "personality": 78.24, + "personality_consistency": 85.26, + "distinct_personality": 78.16, + "honesty_empathy_fairness": 86.32, + "background_and_culture": 78.42, + "ethical_alignment": 78.68, + "cultural_awareness": 76.84, + "bias_and_stereotypes": 85.53 + }, + "gpt-4o": { + "helpfulness": 91.21, + "effectiveness": 91.21, + "comprehensiveness": 87.09, + "usefulness": 88.46, + "communication": 91.03, + "tone_and_language_style": 90.11, + "conversation_flow": 87.91, + "detail_and_technical_language": 89.01, + "understanding": 89.19, + "accuracy": 89.29, + "context_memory": 90.66, + "intuitiveness": 85.71, + "adaptiveness": 87.73, + "flexibility": 89.84, + "clarity": 79.67, + "conversation_building": 88.19, + "trustworthiness": 89.01, + "consistency": 87.91, + "confidence": 88.19, + "transparency": 75.0, + "personality": 82.42, + "personality_consistency": 86.81, + "distinct_personality": 81.32, + "honesty_empathy_fairness": 84.89, + "background_and_culture": 79.67, + "ethical_alignment": 82.97, + "cultural_awareness": 80.22, + "bias_and_stereotypes": 85.99 + }, + "llama-3.1-405b-instruct": { + "helpfulness": 87.93, + "effectiveness": 89.94, + "comprehensiveness": 87.93, + "usefulness": 89.94, + "communication": 89.08, + "tone_and_language_style": 84.2, + "conversation_flow": 81.9, + "detail_and_technical_language": 87.93, + "understanding": 88.12, + "accuracy": 89.66, + "context_memory": 89.08, + "intuitiveness": 83.05, + "adaptiveness": 86.21, + "flexibility": 88.79, + "clarity": 81.03, + "conversation_building": 87.64, + "trustworthiness": 84.1, + "consistency": 88.51, + "confidence": 84.48, + "transparency": 71.55, + "personality": 73.75, + "personality_consistency": 83.33, + "distinct_personality": 72.41, + "honesty_empathy_fairness": 81.61, + "background_and_culture": 70.69, + "ethical_alignment": 75.29, + "cultural_awareness": 72.7, + "bias_and_stereotypes": 85.34 + }, + "o1": { + "helpfulness": 89.91, + "effectiveness": 88.38, + "comprehensiveness": 89.08, + "usefulness": 89.08, + "communication": 88.03, + "tone_and_language_style": 89.08, + "conversation_flow": 82.75, + "detail_and_technical_language": 91.2, + "understanding": 90.14, + "accuracy": 90.49, + "context_memory": 92.61, + "intuitiveness": 83.45, + "adaptiveness": 88.5, + "flexibility": 90.14, + "clarity": 82.39, + "conversation_building": 86.62, + "trustworthiness": 88.03, + "consistency": 90.14, + "confidence": 87.68, + "transparency": 70.42, + "personality": 78.4, + "personality_consistency": 85.21, + "distinct_personality": 75.7, + "honesty_empathy_fairness": 85.56, + "background_and_culture": 75.82, + "ethical_alignment": 79.93, + "cultural_awareness": 73.24, + "bias_and_stereotypes": 86.27 + } + }, + "Making a Decision Between Options": { + "claude-3.7-sonnet": { + "helpfulness": 90.12, + "effectiveness": 89.2, + "comprehensiveness": 89.81, + "usefulness": 88.89, + "communication": 88.27, + "tone_and_language_style": 89.51, + "conversation_flow": 87.04, + "detail_and_technical_language": 89.51, + "understanding": 89.71, + "accuracy": 89.2, + "context_memory": 92.28, + "intuitiveness": 87.04, + "adaptiveness": 86.21, + "flexibility": 90.43, + "clarity": 82.1, + "conversation_building": 89.51, + "trustworthiness": 87.45, + "consistency": 88.58, + "confidence": 84.88, + "transparency": 78.7, + "personality": 79.22, + "personality_consistency": 84.26, + "distinct_personality": 79.32, + "honesty_empathy_fairness": 85.8, + "background_and_culture": 76.95, + "ethical_alignment": 74.07, + "cultural_awareness": 72.22, + "bias_and_stereotypes": 85.19 + }, + "deepseek-r1": { + "helpfulness": 74.58, + "effectiveness": 73.44, + "comprehensiveness": 74.69, + "usefulness": 78.44, + "communication": 72.29, + "tone_and_language_style": 77.81, + "conversation_flow": 75.31, + "detail_and_technical_language": 79.69, + "understanding": 78.33, + "accuracy": 71.88, + "context_memory": 76.88, + "intuitiveness": 71.88, + "adaptiveness": 74.17, + "flexibility": 78.44, + "clarity": 70.94, + "conversation_building": 72.81, + "trustworthiness": 76.25, + "consistency": 77.5, + "confidence": 72.81, + "transparency": 59.69, + "personality": 65.83, + "personality_consistency": 74.06, + "distinct_personality": 65.62, + "honesty_empathy_fairness": 72.81, + "background_and_culture": 68.33, + "ethical_alignment": 66.56, + "cultural_awareness": 65.31, + "bias_and_stereotypes": 76.56 + }, + "gemini-2.0-flash-001": { + "helpfulness": 85.46, + "effectiveness": 84.84, + "comprehensiveness": 86.7, + "usefulness": 85.37, + "communication": 85.11, + "tone_and_language_style": 82.18, + "conversation_flow": 76.6, + "detail_and_technical_language": 85.64, + "understanding": 86.17, + "accuracy": 83.51, + "context_memory": 85.37, + "intuitiveness": 78.19, + "adaptiveness": 84.04, + "flexibility": 84.57, + "clarity": 76.06, + "conversation_building": 85.11, + "trustworthiness": 83.51, + "consistency": 85.9, + "confidence": 82.98, + "transparency": 74.73, + "personality": 72.87, + "personality_consistency": 81.65, + "distinct_personality": 70.74, + "honesty_empathy_fairness": 82.18, + "background_and_culture": 72.16, + "ethical_alignment": 74.2, + "cultural_awareness": 70.21, + "bias_and_stereotypes": 83.78 + }, + "gpt-4o": { + "helpfulness": 90.82, + "effectiveness": 92.42, + "comprehensiveness": 88.48, + "usefulness": 90.17, + "communication": 89.89, + "tone_and_language_style": 88.76, + "conversation_flow": 80.62, + "detail_and_technical_language": 90.45, + "understanding": 89.51, + "accuracy": 90.73, + "context_memory": 91.29, + "intuitiveness": 83.71, + "adaptiveness": 88.2, + "flexibility": 90.73, + "clarity": 83.15, + "conversation_building": 86.8, + "trustworthiness": 84.27, + "consistency": 89.61, + "confidence": 87.08, + "transparency": 72.75, + "personality": 77.34, + "personality_consistency": 86.24, + "distinct_personality": 75.84, + "honesty_empathy_fairness": 83.71, + "background_and_culture": 76.03, + "ethical_alignment": 77.25, + "cultural_awareness": 74.16, + "bias_and_stereotypes": 84.83 + }, + "llama-3.1-405b-instruct": { + "helpfulness": 87.45, + "effectiveness": 90.73, + "comprehensiveness": 90.45, + "usefulness": 89.89, + "communication": 88.01, + "tone_and_language_style": 85.96, + "conversation_flow": 83.71, + "detail_and_technical_language": 92.13, + "understanding": 88.95, + "accuracy": 90.45, + "context_memory": 89.89, + "intuitiveness": 86.24, + "adaptiveness": 88.58, + "flexibility": 90.17, + "clarity": 83.99, + "conversation_building": 89.89, + "trustworthiness": 87.26, + "consistency": 91.01, + "confidence": 88.48, + "transparency": 76.12, + "personality": 72.66, + "personality_consistency": 84.83, + "distinct_personality": 73.6, + "honesty_empathy_fairness": 84.83, + "background_and_culture": 71.72, + "ethical_alignment": 73.31, + "cultural_awareness": 69.1, + "bias_and_stereotypes": 86.8 + }, + "o1": { + "helpfulness": 85.81, + "effectiveness": 87.5, + "comprehensiveness": 88.18, + "usefulness": 86.15, + "communication": 88.29, + "tone_and_language_style": 86.82, + "conversation_flow": 82.77, + "detail_and_technical_language": 88.18, + "understanding": 87.61, + "accuracy": 89.86, + "context_memory": 91.22, + "intuitiveness": 84.12, + "adaptiveness": 85.81, + "flexibility": 84.12, + "clarity": 80.74, + "conversation_building": 86.49, + "trustworthiness": 83.11, + "consistency": 90.54, + "confidence": 81.76, + "transparency": 69.93, + "personality": 71.85, + "personality_consistency": 82.77, + "distinct_personality": 72.3, + "honesty_empathy_fairness": 79.05, + "background_and_culture": 71.17, + "ethical_alignment": 72.3, + "cultural_awareness": 70.27, + "bias_and_stereotypes": 83.45 + } + }, + "Planning Your Weekly Meals": { + "claude-3.7-sonnet": { + "helpfulness": 92.22, + "effectiveness": 92.78, + "comprehensiveness": 93.06, + "usefulness": 93.33, + "communication": 88.52, + "tone_and_language_style": 85.83, + "conversation_flow": 81.39, + "detail_and_technical_language": 91.39, + "understanding": 91.48, + "accuracy": 91.67, + "context_memory": 90.83, + "intuitiveness": 85.56, + "adaptiveness": 88.15, + "flexibility": 92.5, + "clarity": 81.11, + "conversation_building": 88.06, + "trustworthiness": 88.33, + "consistency": 93.06, + "confidence": 87.5, + "transparency": 70.56, + "personality": 75.0, + "personality_consistency": 84.72, + "distinct_personality": 73.33, + "honesty_empathy_fairness": 83.06, + "background_and_culture": 72.78, + "ethical_alignment": 73.89, + "cultural_awareness": 72.22, + "bias_and_stereotypes": 84.44 + }, + "deepseek-r1": { + "helpfulness": 73.24, + "effectiveness": 70.67, + "comprehensiveness": 73.56, + "usefulness": 78.37, + "communication": 71.79, + "tone_and_language_style": 74.52, + "conversation_flow": 69.47, + "detail_and_technical_language": 77.4, + "understanding": 76.92, + "accuracy": 76.92, + "context_memory": 73.8, + "intuitiveness": 71.15, + "adaptiveness": 72.27, + "flexibility": 72.84, + "clarity": 65.87, + "conversation_building": 73.32, + "trustworthiness": 72.12, + "consistency": 75.0, + "confidence": 73.08, + "transparency": 58.41, + "personality": 61.54, + "personality_consistency": 71.39, + "distinct_personality": 60.1, + "honesty_empathy_fairness": 72.36, + "background_and_culture": 63.62, + "ethical_alignment": 64.42, + "cultural_awareness": 61.3, + "bias_and_stereotypes": 75.72 + }, + "gemini-2.0-flash-001": { + "helpfulness": 88.74, + "effectiveness": 89.29, + "comprehensiveness": 87.01, + "usefulness": 89.61, + "communication": 87.44, + "tone_and_language_style": 87.34, + "conversation_flow": 83.77, + "detail_and_technical_language": 86.04, + "understanding": 88.74, + "accuracy": 90.91, + "context_memory": 92.21, + "intuitiveness": 87.01, + "adaptiveness": 86.8, + "flexibility": 90.26, + "clarity": 82.14, + "conversation_building": 88.96, + "trustworthiness": 85.28, + "consistency": 89.61, + "confidence": 87.66, + "transparency": 72.4, + "personality": 77.7, + "personality_consistency": 84.09, + "distinct_personality": 74.03, + "honesty_empathy_fairness": 85.06, + "background_and_culture": 77.27, + "ethical_alignment": 80.52, + "cultural_awareness": 75.65, + "bias_and_stereotypes": 82.14 + }, + "gpt-4o": { + "helpfulness": 91.02, + "effectiveness": 91.03, + "comprehensiveness": 87.18, + "usefulness": 89.74, + "communication": 90.6, + "tone_and_language_style": 87.82, + "conversation_flow": 84.62, + "detail_and_technical_language": 91.35, + "understanding": 91.67, + "accuracy": 91.35, + "context_memory": 90.06, + "intuitiveness": 85.9, + "adaptiveness": 89.1, + "flexibility": 88.78, + "clarity": 82.69, + "conversation_building": 88.78, + "trustworthiness": 88.25, + "consistency": 91.03, + "confidence": 86.54, + "transparency": 76.28, + "personality": 79.91, + "personality_consistency": 87.18, + "distinct_personality": 75.0, + "honesty_empathy_fairness": 83.01, + "background_and_culture": 74.36, + "ethical_alignment": 74.36, + "cultural_awareness": 74.04, + "bias_and_stereotypes": 83.97 + }, + "llama-3.1-405b-instruct": { + "helpfulness": 90.22, + "effectiveness": 90.67, + "comprehensiveness": 90.0, + "usefulness": 89.0, + "communication": 86.89, + "tone_and_language_style": 88.0, + "conversation_flow": 81.67, + "detail_and_technical_language": 89.0, + "understanding": 89.33, + "accuracy": 88.33, + "context_memory": 91.0, + "intuitiveness": 83.33, + "adaptiveness": 86.44, + "flexibility": 89.0, + "clarity": 79.0, + "conversation_building": 88.0, + "trustworthiness": 86.0, + "consistency": 90.0, + "confidence": 88.0, + "transparency": 76.67, + "personality": 78.0, + "personality_consistency": 83.33, + "distinct_personality": 74.33, + "honesty_empathy_fairness": 81.67, + "background_and_culture": 75.55, + "ethical_alignment": 74.67, + "cultural_awareness": 74.0, + "bias_and_stereotypes": 87.0 + }, + "o1": { + "helpfulness": 86.53, + "effectiveness": 86.64, + "comprehensiveness": 89.04, + "usefulness": 87.67, + "communication": 83.56, + "tone_and_language_style": 79.79, + "conversation_flow": 81.16, + "detail_and_technical_language": 85.96, + "understanding": 85.84, + "accuracy": 83.56, + "context_memory": 87.33, + "intuitiveness": 78.08, + "adaptiveness": 83.1, + "flexibility": 83.9, + "clarity": 81.16, + "conversation_building": 83.56, + "trustworthiness": 83.33, + "consistency": 86.64, + "confidence": 87.67, + "transparency": 77.4, + "personality": 71.92, + "personality_consistency": 80.82, + "distinct_personality": 72.26, + "honesty_empathy_fairness": 78.77, + "background_and_culture": 72.15, + "ethical_alignment": 74.32, + "cultural_awareness": 70.89, + "bias_and_stereotypes": 89.73 + } + }, + "Understanding a Complex Topic": { + "claude-3.7-sonnet": { + "helpfulness": 89.18, + "effectiveness": 88.56, + "comprehensiveness": 87.77, + "usefulness": 85.9, + "communication": 89.72, + "tone_and_language_style": 89.1, + "conversation_flow": 83.78, + "detail_and_technical_language": 89.1, + "understanding": 88.65, + "accuracy": 90.96, + "context_memory": 91.49, + "intuitiveness": 83.51, + "adaptiveness": 88.65, + "flexibility": 88.83, + "clarity": 83.78, + "conversation_building": 84.57, + "trustworthiness": 85.28, + "consistency": 89.36, + "confidence": 87.5, + "transparency": 74.73, + "personality": 74.47, + "personality_consistency": 83.24, + "distinct_personality": 72.87, + "honesty_empathy_fairness": 85.64, + "background_and_culture": 73.23, + "ethical_alignment": 76.33, + "cultural_awareness": 71.81, + "bias_and_stereotypes": 86.17 + }, + "deepseek-r1": { + "helpfulness": 73.49, + "effectiveness": 71.08, + "comprehensiveness": 73.8, + "usefulness": 73.19, + "communication": 70.28, + "tone_and_language_style": 73.49, + "conversation_flow": 67.47, + "detail_and_technical_language": 77.41, + "understanding": 75.5, + "accuracy": 75.6, + "context_memory": 74.1, + "intuitiveness": 70.48, + "adaptiveness": 70.48, + "flexibility": 72.89, + "clarity": 69.88, + "conversation_building": 71.39, + "trustworthiness": 73.49, + "consistency": 71.99, + "confidence": 74.1, + "transparency": 65.36, + "personality": 62.45, + "personality_consistency": 68.07, + "distinct_personality": 58.13, + "honesty_empathy_fairness": 68.37, + "background_and_culture": 62.85, + "ethical_alignment": 64.16, + "cultural_awareness": 60.24, + "bias_and_stereotypes": 75.0 + }, + "gemini-2.0-flash-001": { + "helpfulness": 86.58, + "effectiveness": 82.47, + "comprehensiveness": 87.66, + "usefulness": 81.49, + "communication": 84.2, + "tone_and_language_style": 82.14, + "conversation_flow": 79.87, + "detail_and_technical_language": 81.17, + "understanding": 83.55, + "accuracy": 83.12, + "context_memory": 85.39, + "intuitiveness": 77.6, + "adaptiveness": 83.55, + "flexibility": 83.44, + "clarity": 78.25, + "conversation_building": 82.14, + "trustworthiness": 83.77, + "consistency": 86.69, + "confidence": 84.42, + "transparency": 70.78, + "personality": 69.26, + "personality_consistency": 81.49, + "distinct_personality": 68.83, + "honesty_empathy_fairness": 79.87, + "background_and_culture": 70.56, + "ethical_alignment": 74.03, + "cultural_awareness": 70.45, + "bias_and_stereotypes": 83.12 + }, + "gpt-4o": { + "helpfulness": 87.21, + "effectiveness": 86.3, + "comprehensiveness": 85.62, + "usefulness": 87.67, + "communication": 86.53, + "tone_and_language_style": 85.27, + "conversation_flow": 81.85, + "detail_and_technical_language": 84.59, + "understanding": 86.3, + "accuracy": 86.99, + "context_memory": 88.7, + "intuitiveness": 79.45, + "adaptiveness": 84.7, + "flexibility": 86.64, + "clarity": 79.79, + "conversation_building": 83.9, + "trustworthiness": 85.39, + "consistency": 86.99, + "confidence": 83.56, + "transparency": 72.26, + "personality": 78.31, + "personality_consistency": 84.59, + "distinct_personality": 76.03, + "honesty_empathy_fairness": 79.45, + "background_and_culture": 74.2, + "ethical_alignment": 72.95, + "cultural_awareness": 69.86, + "bias_and_stereotypes": 80.82 + }, + "llama-3.1-405b-instruct": { + "helpfulness": 87.92, + "effectiveness": 84.42, + "comprehensiveness": 87.68, + "usefulness": 86.96, + "communication": 87.44, + "tone_and_language_style": 84.42, + "conversation_flow": 83.7, + "detail_and_technical_language": 85.51, + "understanding": 87.92, + "accuracy": 88.04, + "context_memory": 89.13, + "intuitiveness": 82.25, + "adaptiveness": 87.44, + "flexibility": 85.51, + "clarity": 80.07, + "conversation_building": 85.14, + "trustworthiness": 86.47, + "consistency": 88.41, + "confidence": 84.06, + "transparency": 72.83, + "personality": 74.15, + "personality_consistency": 81.52, + "distinct_personality": 75.0, + "honesty_empathy_fairness": 80.07, + "background_and_culture": 74.4, + "ethical_alignment": 77.54, + "cultural_awareness": 70.29, + "bias_and_stereotypes": 84.78 + }, + "o1": { + "helpfulness": 87.81, + "effectiveness": 86.29, + "comprehensiveness": 87.37, + "usefulness": 84.41, + "communication": 84.95, + "tone_and_language_style": 81.99, + "conversation_flow": 78.76, + "detail_and_technical_language": 85.22, + "understanding": 87.45, + "accuracy": 87.9, + "context_memory": 88.44, + "intuitiveness": 80.38, + "adaptiveness": 85.66, + "flexibility": 86.56, + "clarity": 81.45, + "conversation_building": 79.84, + "trustworthiness": 84.77, + "consistency": 88.71, + "confidence": 84.95, + "transparency": 75.81, + "personality": 73.12, + "personality_consistency": 83.33, + "distinct_personality": 69.35, + "honesty_empathy_fairness": 82.53, + "background_and_culture": 71.68, + "ethical_alignment": 72.04, + "cultural_awareness": 66.67, + "bias_and_stereotypes": 85.75 + } + } + }, + "mrp_demographics": { + "claude-3.7-sonnet": { + "Age": { + "18-24": { + "Accuracy": 89.1, + "Adaptiveness": 85.0, + "Ethical Alignment": 78.7, + "Background And Culture": 72.9, + "Bias And Stereotypes": 90.7, + "Clarity": 88.3, + "Communication": 83.8, + "Comprehensiveness": 90.6, + "Confidence": 88.6, + "Consistency": 92.5, + "Cultural Awareness": 77.6, + "Detail And Technical Language": 85.8, + "Effectiveness": 94.0, + "Flexibility": 94.0, + "Helpfulness": 89.7, + "Honesty Empathy Fairness": 88.1, + "Intuitiveness": 85.7, + "Conversation Flow": 89.4, + "Overall": 83.0, + "Conversation Building": 90.4, + "Personality Consistency": 84.0, + "Personality": 77.0, + "Distinct Personality": 77.7, + "Repeat Usage": 85.3, + "Context Memory": 90.5, + "Speed Perception": 81.7, + "Tone And Language Style": 89.2, + "Transparency": 82.1, + "Trustworthiness": 86.6, + "Understanding": 86.0, + "Usefulness": 89.5 + }, + "25-34": { + "Accuracy": 87.7, + "Adaptiveness": 86.9, + "Ethical Alignment": 79.2, + "Background And Culture": 76.1, + "Bias And Stereotypes": 90.4, + "Clarity": 87.4, + "Communication": 85.2, + "Comprehensiveness": 89.2, + "Confidence": 89.1, + "Consistency": 93.6, + "Cultural Awareness": 79.8, + "Detail And Technical Language": 85.6, + "Effectiveness": 93.8, + "Flexibility": 93.5, + "Helpfulness": 87.5, + "Honesty Empathy Fairness": 89.0, + "Intuitiveness": 85.0, + "Conversation Flow": 89.2, + "Overall": 84.34, + "Conversation Building": 89.6, + "Personality Consistency": 83.4, + "Personality": 78.3, + "Distinct Personality": 78.4, + "Repeat Usage": 84.4, + "Context Memory": 91.6, + "Speed Perception": 80.6, + "Tone And Language Style": 86.0, + "Transparency": 80.8, + "Trustworthiness": 88.1, + "Understanding": 88.3, + "Usefulness": 90.0 + }, + "35-44": { + "Accuracy": 89.4, + "Adaptiveness": 87.0, + "Ethical Alignment": 76.3, + "Background And Culture": 74.3, + "Bias And Stereotypes": 91.9, + "Clarity": 85.7, + "Communication": 85.8, + "Comprehensiveness": 91.7, + "Confidence": 90.3, + "Consistency": 93.8, + "Cultural Awareness": 75.4, + "Detail And Technical Language": 89.7, + "Effectiveness": 94.6, + "Flexibility": 93.0, + "Helpfulness": 90.7, + "Honesty Empathy Fairness": 86.7, + "Intuitiveness": 86.7, + "Conversation Flow": 85.7, + "Overall": 85.41, + "Conversation Building": 90.9, + "Personality Consistency": 85.3, + "Personality": 77.3, + "Distinct Personality": 72.8, + "Repeat Usage": 85.7, + "Context Memory": 93.6, + "Speed Perception": 80.0, + "Tone And Language Style": 86.5, + "Transparency": 75.0, + "Trustworthiness": 91.4, + "Understanding": 91.4, + "Usefulness": 90.6 + }, + "45-54": { + "Accuracy": 84.7, + "Adaptiveness": 87.4, + "Ethical Alignment": 75.1, + "Background And Culture": 70.9, + "Bias And Stereotypes": 89.6, + "Clarity": 85.4, + "Communication": 87.8, + "Comprehensiveness": 84.8, + "Confidence": 88.7, + "Consistency": 92.4, + "Cultural Awareness": 74.1, + "Detail And Technical Language": 83.9, + "Effectiveness": 92.6, + "Flexibility": 92.3, + "Helpfulness": 90.1, + "Honesty Empathy Fairness": 85.7, + "Intuitiveness": 87.3, + "Conversation Flow": 85.9, + "Overall": 84.84, + "Conversation Building": 87.3, + "Personality Consistency": 84.4, + "Personality": 78.6, + "Distinct Personality": 77.4, + "Repeat Usage": 86.6, + "Context Memory": 89.2, + "Speed Perception": 81.0, + "Tone And Language Style": 83.9, + "Transparency": 75.1, + "Trustworthiness": 89.8, + "Understanding": 89.3, + "Usefulness": 87.6 + }, + "55-64": { + "Accuracy": 89.1, + "Adaptiveness": 88.7, + "Ethical Alignment": 77.6, + "Background And Culture": 75.7, + "Bias And Stereotypes": 91.4, + "Clarity": 87.1, + "Communication": 88.9, + "Comprehensiveness": 88.8, + "Confidence": 89.5, + "Consistency": 93.5, + "Cultural Awareness": 74.8, + "Detail And Technical Language": 88.8, + "Effectiveness": 93.6, + "Flexibility": 93.8, + "Helpfulness": 90.1, + "Honesty Empathy Fairness": 87.0, + "Intuitiveness": 86.6, + "Conversation Flow": 87.5, + "Overall": 86.2, + "Conversation Building": 90.9, + "Personality Consistency": 84.9, + "Personality": 79.2, + "Distinct Personality": 76.2, + "Repeat Usage": 86.9, + "Context Memory": 93.0, + "Speed Perception": 81.9, + "Tone And Language Style": 88.0, + "Transparency": 76.3, + "Trustworthiness": 90.2, + "Understanding": 90.6, + "Usefulness": 91.5 + }, + "65+": { + "Accuracy": 92.5, + "Adaptiveness": 89.5, + "Ethical Alignment": 86.3, + "Background And Culture": 82.3, + "Bias And Stereotypes": 92.3, + "Clarity": 91.0, + "Communication": 90.0, + "Comprehensiveness": 93.8, + "Confidence": 95.2, + "Consistency": 93.4, + "Cultural Awareness": 82.0, + "Detail And Technical Language": 89.0, + "Effectiveness": 95.1, + "Flexibility": 95.3, + "Helpfulness": 92.6, + "Honesty Empathy Fairness": 92.2, + "Intuitiveness": 87.2, + "Conversation Flow": 92.5, + "Overall": 88.26, + "Conversation Building": 94.9, + "Personality Consistency": 81.9, + "Personality": 79.4, + "Distinct Personality": 81.9, + "Repeat Usage": 87.6, + "Context Memory": 93.3, + "Speed Perception": 81.8, + "Tone And Language Style": 90.1, + "Transparency": 81.0, + "Trustworthiness": 93.1, + "Understanding": 90.9, + "Usefulness": 92.5 + } + }, + "Sex": { + "Female": { + "Accuracy": 89.9, + "Adaptiveness": 89.1, + "Ethical Alignment": 80.5, + "Background And Culture": 78.1, + "Bias And Stereotypes": 91.6, + "Clarity": 88.6, + "Communication": 88.7, + "Comprehensiveness": 91.4, + "Confidence": 92.1, + "Consistency": 93.8, + "Cultural Awareness": 78.7, + "Detail And Technical Language": 87.2, + "Effectiveness": 94.3, + "Flexibility": 94.3, + "Helpfulness": 90.6, + "Honesty Empathy Fairness": 88.9, + "Intuitiveness": 87.5, + "Conversation Flow": 89.0, + "Overall": 86.96, + "Conversation Building": 91.4, + "Personality Consistency": 83.7, + "Personality": 79.5, + "Distinct Personality": 78.6, + "Repeat Usage": 86.5, + "Context Memory": 92.6, + "Speed Perception": 81.6, + "Tone And Language Style": 87.5, + "Transparency": 79.7, + "Trustworthiness": 91.9, + "Understanding": 90.8, + "Usefulness": 91.4 + }, + "Male": { + "Accuracy": 89.3, + "Adaptiveness": 87.3, + "Ethical Alignment": 80.6, + "Background And Culture": 76.4, + "Bias And Stereotypes": 91.2, + "Clarity": 87.8, + "Communication": 87.5, + "Comprehensiveness": 89.8, + "Confidence": 91.0, + "Consistency": 92.9, + "Cultural Awareness": 77.7, + "Detail And Technical Language": 88.5, + "Effectiveness": 94.1, + "Flexibility": 93.7, + "Helpfulness": 91.0, + "Honesty Empathy Fairness": 89.1, + "Intuitiveness": 85.8, + "Conversation Flow": 89.2, + "Overall": 85.7, + "Conversation Building": 92.0, + "Personality Consistency": 83.4, + "Personality": 77.9, + "Distinct Personality": 78.1, + "Repeat Usage": 86.6, + "Context Memory": 92.3, + "Speed Perception": 80.9, + "Tone And Language Style": 87.9, + "Transparency": 77.0, + "Trustworthiness": 90.1, + "Understanding": 89.7, + "Usefulness": 90.4 + } + }, + "Ethnicity": { + "African American": { + "Accuracy": 90.7, + "Adaptiveness": 87.8, + "Ethical Alignment": 82.8, + "Background And Culture": 80.3, + "Bias And Stereotypes": 91.8, + "Clarity": 88.5, + "Communication": 87.8, + "Comprehensiveness": 91.7, + "Confidence": 91.5, + "Consistency": 94.8, + "Cultural Awareness": 81.2, + "Detail And Technical Language": 87.0, + "Effectiveness": 94.8, + "Flexibility": 94.9, + "Helpfulness": 90.8, + "Honesty Empathy Fairness": 91.3, + "Intuitiveness": 88.0, + "Conversation Flow": 89.2, + "Overall": 87.16, + "Conversation Building": 91.3, + "Personality Consistency": 84.6, + "Personality": 80.6, + "Distinct Personality": 84.0, + "Repeat Usage": 87.1, + "Context Memory": 92.2, + "Speed Perception": 81.5, + "Tone And Language Style": 89.3, + "Transparency": 84.1, + "Trustworthiness": 91.8, + "Understanding": 91.0, + "Usefulness": 91.8 + }, + "Asian": { + "Accuracy": 89.3, + "Adaptiveness": 87.7, + "Ethical Alignment": 77.0, + "Background And Culture": 78.4, + "Bias And Stereotypes": 91.1, + "Clarity": 85.7, + "Communication": 88.9, + "Comprehensiveness": 87.2, + "Confidence": 92.3, + "Consistency": 93.6, + "Cultural Awareness": 75.6, + "Detail And Technical Language": 88.2, + "Effectiveness": 93.0, + "Flexibility": 94.1, + "Helpfulness": 88.0, + "Honesty Empathy Fairness": 90.4, + "Intuitiveness": 85.5, + "Conversation Flow": 87.7, + "Overall": 86.21, + "Conversation Building": 91.8, + "Personality Consistency": 80.3, + "Personality": 79.8, + "Distinct Personality": 81.7, + "Repeat Usage": 86.0, + "Context Memory": 92.5, + "Speed Perception": 80.4, + "Tone And Language Style": 87.5, + "Transparency": 77.9, + "Trustworthiness": 91.5, + "Understanding": 89.2, + "Usefulness": 89.6 + }, + "Hispanic": { + "Accuracy": 87.5, + "Adaptiveness": 89.8, + "Ethical Alignment": 81.2, + "Background And Culture": 73.1, + "Bias And Stereotypes": 93.4, + "Clarity": 89.5, + "Communication": 91.3, + "Comprehensiveness": 89.4, + "Confidence": 92.0, + "Consistency": 96.2, + "Cultural Awareness": 74.3, + "Detail And Technical Language": 89.6, + "Effectiveness": 94.9, + "Flexibility": 96.0, + "Helpfulness": 95.0, + "Honesty Empathy Fairness": 88.5, + "Intuitiveness": 84.1, + "Conversation Flow": 92.5, + "Overall": 87.81, + "Conversation Building": 94.6, + "Personality Consistency": 85.9, + "Personality": 81.5, + "Distinct Personality": 83.9, + "Repeat Usage": 86.8, + "Context Memory": 93.0, + "Speed Perception": 80.8, + "Tone And Language Style": 88.9, + "Transparency": 76.5, + "Trustworthiness": 90.4, + "Understanding": 93.6, + "Usefulness": 91.3 + }, + "White": { + "Accuracy": 89.8, + "Adaptiveness": 88.1, + "Ethical Alignment": 80.3, + "Background And Culture": 77.4, + "Bias And Stereotypes": 91.1, + "Clarity": 88.1, + "Communication": 87.6, + "Comprehensiveness": 90.8, + "Confidence": 91.5, + "Consistency": 92.7, + "Cultural Awareness": 78.5, + "Detail And Technical Language": 87.7, + "Effectiveness": 94.1, + "Flexibility": 93.5, + "Helpfulness": 90.3, + "Honesty Empathy Fairness": 88.7, + "Intuitiveness": 87.0, + "Conversation Flow": 88.7, + "Overall": 86.03, + "Conversation Building": 91.3, + "Personality Consistency": 83.2, + "Personality": 78.0, + "Distinct Personality": 76.5, + "Repeat Usage": 86.5, + "Context Memory": 92.4, + "Speed Perception": 81.3, + "Tone And Language Style": 87.3, + "Transparency": 77.9, + "Trustworthiness": 91.0, + "Understanding": 89.8, + "Usefulness": 90.8 + } + }, + "Politics": { + "Democrat": { + "Accuracy": 88.8, + "Adaptiveness": 88.5, + "Ethical Alignment": 79.0, + "Background And Culture": 77.0, + "Bias And Stereotypes": 91.6, + "Clarity": 86.8, + "Communication": 87.8, + "Comprehensiveness": 90.2, + "Confidence": 91.4, + "Consistency": 94.9, + "Cultural Awareness": 77.6, + "Detail And Technical Language": 87.4, + "Effectiveness": 94.0, + "Flexibility": 94.9, + "Helpfulness": 89.7, + "Honesty Empathy Fairness": 88.3, + "Intuitiveness": 86.6, + "Conversation Flow": 88.6, + "Overall": 85.99, + "Conversation Building": 91.6, + "Personality Consistency": 82.9, + "Personality": 78.0, + "Distinct Personality": 78.3, + "Repeat Usage": 85.8, + "Context Memory": 92.5, + "Speed Perception": 80.7, + "Tone And Language Style": 87.2, + "Transparency": 78.3, + "Trustworthiness": 90.5, + "Understanding": 90.4, + "Usefulness": 91.7 + }, + "Independent": { + "Accuracy": 91.4, + "Adaptiveness": 90.2, + "Ethical Alignment": 80.5, + "Background And Culture": 78.3, + "Bias And Stereotypes": 91.5, + "Clarity": 89.2, + "Communication": 89.4, + "Comprehensiveness": 92.5, + "Confidence": 91.3, + "Consistency": 94.0, + "Cultural Awareness": 76.6, + "Detail And Technical Language": 89.2, + "Effectiveness": 95.2, + "Flexibility": 94.5, + "Helpfulness": 91.1, + "Honesty Empathy Fairness": 88.7, + "Intuitiveness": 86.8, + "Conversation Flow": 89.6, + "Overall": 87.16, + "Conversation Building": 92.9, + "Personality Consistency": 83.1, + "Personality": 76.9, + "Distinct Personality": 74.8, + "Repeat Usage": 86.8, + "Context Memory": 93.5, + "Speed Perception": 81.4, + "Tone And Language Style": 89.2, + "Transparency": 74.2, + "Trustworthiness": 91.4, + "Understanding": 92.8, + "Usefulness": 92.4 + }, + "Republican": { + "Accuracy": 89.7, + "Adaptiveness": 87.0, + "Ethical Alignment": 82.3, + "Background And Culture": 77.1, + "Bias And Stereotypes": 91.1, + "Clarity": 89.3, + "Communication": 87.8, + "Comprehensiveness": 90.2, + "Confidence": 92.0, + "Consistency": 91.2, + "Cultural Awareness": 79.8, + "Detail And Technical Language": 87.6, + "Effectiveness": 94.0, + "Flexibility": 92.7, + "Helpfulness": 91.8, + "Honesty Empathy Fairness": 90.0, + "Intuitiveness": 86.8, + "Conversation Flow": 89.5, + "Overall": 86.37, + "Conversation Building": 91.2, + "Personality Consistency": 84.4, + "Personality": 80.4, + "Distinct Personality": 80.1, + "Repeat Usage": 87.3, + "Context Memory": 91.9, + "Speed Perception": 81.8, + "Tone And Language Style": 87.6, + "Transparency": 80.5, + "Trustworthiness": 91.6, + "Understanding": 88.9, + "Usefulness": 89.4 + } + }, + "Education": { + "College": { + "Accuracy": 88.1, + "Adaptiveness": 86.4, + "Ethical Alignment": 77.1, + "Background And Culture": 71.6, + "Bias And Stereotypes": 86.4, + "Clarity": 82.7, + "Communication": 84.7, + "Comprehensiveness": 87.4, + "Confidence": 88.7, + "Consistency": 90.4, + "Cultural Awareness": 74.2, + "Detail And Technical Language": 85.3, + "Effectiveness": 91.0, + "Flexibility": 90.9, + "Helpfulness": 87.2, + "Honesty Empathy Fairness": 84.7, + "Intuitiveness": 85.3, + "Conversation Flow": 84.4, + "Overall": 83.33, + "Conversation Building": 88.5, + "Personality Consistency": 79.5, + "Personality": 76.9, + "Distinct Personality": 75.1, + "Repeat Usage": 86.2, + "Context Memory": 88.9, + "Speed Perception": 79.9, + "Tone And Language Style": 84.2, + "Transparency": 74.2, + "Trustworthiness": 88.5, + "Understanding": 88.0, + "Usefulness": 86.7 + }, + "No College": { + "Accuracy": 90.4, + "Adaptiveness": 89.2, + "Ethical Alignment": 82.3, + "Background And Culture": 80.2, + "Bias And Stereotypes": 94.0, + "Clarity": 91.0, + "Communication": 89.8, + "Comprehensiveness": 92.3, + "Confidence": 93.1, + "Consistency": 94.9, + "Cultural Awareness": 80.3, + "Detail And Technical Language": 89.1, + "Effectiveness": 95.9, + "Flexibility": 95.5, + "Helpfulness": 92.6, + "Honesty Empathy Fairness": 91.2, + "Intuitiveness": 87.4, + "Conversation Flow": 91.5, + "Overall": 87.89, + "Conversation Building": 93.3, + "Personality Consistency": 85.6, + "Personality": 79.6, + "Distinct Personality": 80.0, + "Repeat Usage": 86.7, + "Context Memory": 94.3, + "Speed Perception": 82.0, + "Tone And Language Style": 89.5, + "Transparency": 80.6, + "Trustworthiness": 92.4, + "Understanding": 91.4, + "Usefulness": 93.1 + } + }, + "Urbanicity": { + "Rural": { + "Accuracy": 92.5, + "Adaptiveness": 91.1, + "Ethical Alignment": 82.7, + "Background And Culture": 80.1, + "Bias And Stereotypes": 94.7, + "Clarity": 90.7, + "Communication": 90.7, + "Comprehensiveness": 93.7, + "Confidence": 93.0, + "Consistency": 94.2, + "Cultural Awareness": 81.6, + "Detail And Technical Language": 90.9, + "Effectiveness": 96.4, + "Flexibility": 94.8, + "Helpfulness": 95.0, + "Honesty Empathy Fairness": 91.6, + "Intuitiveness": 91.2, + "Conversation Flow": 90.6, + "Overall": 88.86, + "Conversation Building": 94.0, + "Personality Consistency": 87.5, + "Personality": 79.2, + "Distinct Personality": 78.6, + "Repeat Usage": 88.1, + "Context Memory": 95.6, + "Speed Perception": 82.9, + "Tone And Language Style": 89.2, + "Transparency": 79.0, + "Trustworthiness": 93.4, + "Understanding": 92.5, + "Usefulness": 94.4 + }, + "Suburban": { + "Accuracy": 89.5, + "Adaptiveness": 86.8, + "Ethical Alignment": 79.5, + "Background And Culture": 75.3, + "Bias And Stereotypes": 90.2, + "Clarity": 86.5, + "Communication": 86.1, + "Comprehensiveness": 89.6, + "Confidence": 90.8, + "Consistency": 92.5, + "Cultural Awareness": 75.8, + "Detail And Technical Language": 86.6, + "Effectiveness": 92.9, + "Flexibility": 93.0, + "Helpfulness": 89.7, + "Honesty Empathy Fairness": 87.6, + "Intuitiveness": 85.5, + "Conversation Flow": 88.5, + "Overall": 84.9, + "Conversation Building": 90.6, + "Personality Consistency": 82.2, + "Personality": 78.0, + "Distinct Personality": 76.8, + "Repeat Usage": 85.9, + "Context Memory": 91.1, + "Speed Perception": 81.0, + "Tone And Language Style": 87.2, + "Transparency": 76.0, + "Trustworthiness": 89.4, + "Understanding": 89.0, + "Usefulness": 89.1 + }, + "Urban": { + "Accuracy": 87.0, + "Adaptiveness": 87.3, + "Ethical Alignment": 79.9, + "Background And Culture": 77.2, + "Bias And Stereotypes": 89.8, + "Clarity": 87.9, + "Communication": 88.1, + "Comprehensiveness": 89.0, + "Confidence": 91.3, + "Consistency": 93.7, + "Cultural Awareness": 78.2, + "Detail And Technical Language": 86.4, + "Effectiveness": 94.0, + "Flexibility": 94.5, + "Helpfulness": 88.0, + "Honesty Empathy Fairness": 88.4, + "Intuitiveness": 83.9, + "Conversation Flow": 88.5, + "Overall": 85.8, + "Conversation Building": 91.0, + "Personality Consistency": 81.5, + "Personality": 79.2, + "Distinct Personality": 80.1, + "Repeat Usage": 86.0, + "Context Memory": 91.1, + "Speed Perception": 80.0, + "Tone And Language Style": 86.9, + "Transparency": 81.1, + "Trustworthiness": 91.0, + "Understanding": 89.8, + "Usefulness": 90.0 + } + } + }, + "deepseek-r1": { + "Age": { + "18-24": { + "Accuracy": 86.1, + "Adaptiveness": 76.4, + "Ethical Alignment": 66.6, + "Background And Culture": 65.9, + "Bias And Stereotypes": 78.6, + "Clarity": 71.1, + "Communication": 75.2, + "Comprehensiveness": 75.3, + "Confidence": 80.4, + "Consistency": 83.0, + "Cultural Awareness": 62.9, + "Detail And Technical Language": 79.8, + "Effectiveness": 79.8, + "Flexibility": 83.7, + "Helpfulness": 79.0, + "Honesty Empathy Fairness": 79.4, + "Intuitiveness": 81.2, + "Conversation Flow": 69.0, + "Overall": 75.14, + "Conversation Building": 74.8, + "Personality Consistency": 76.2, + "Personality": 68.3, + "Distinct Personality": 69.0, + "Repeat Usage": 53.8, + "Context Memory": 83.0, + "Speed Perception": 37.2, + "Tone And Language Style": 72.6, + "Transparency": 70.1, + "Trustworthiness": 80.4, + "Understanding": 80.8, + "Usefulness": 80.2 + }, + "25-34": { + "Accuracy": 87.3, + "Adaptiveness": 77.3, + "Ethical Alignment": 75.7, + "Background And Culture": 74.3, + "Bias And Stereotypes": 86.0, + "Clarity": 76.1, + "Communication": 81.4, + "Comprehensiveness": 81.7, + "Confidence": 80.5, + "Consistency": 84.9, + "Cultural Awareness": 72.8, + "Detail And Technical Language": 84.4, + "Effectiveness": 85.0, + "Flexibility": 87.1, + "Helpfulness": 81.8, + "Honesty Empathy Fairness": 83.1, + "Intuitiveness": 86.0, + "Conversation Flow": 75.7, + "Overall": 79.19, + "Conversation Building": 81.0, + "Personality Consistency": 79.6, + "Personality": 71.4, + "Distinct Personality": 73.7, + "Repeat Usage": 55.1, + "Context Memory": 84.6, + "Speed Perception": 37.5, + "Tone And Language Style": 82.3, + "Transparency": 71.6, + "Trustworthiness": 83.3, + "Understanding": 84.8, + "Usefulness": 84.7 + }, + "35-44": { + "Accuracy": 84.7, + "Adaptiveness": 79.3, + "Ethical Alignment": 74.7, + "Background And Culture": 71.9, + "Bias And Stereotypes": 87.6, + "Clarity": 76.3, + "Communication": 77.4, + "Comprehensiveness": 84.9, + "Confidence": 84.1, + "Consistency": 87.0, + "Cultural Awareness": 71.0, + "Detail And Technical Language": 89.0, + "Effectiveness": 83.7, + "Flexibility": 86.5, + "Helpfulness": 80.5, + "Honesty Empathy Fairness": 83.6, + "Intuitiveness": 86.7, + "Conversation Flow": 74.7, + "Overall": 78.04, + "Conversation Building": 84.1, + "Personality Consistency": 82.7, + "Personality": 69.0, + "Distinct Personality": 71.0, + "Repeat Usage": 58.2, + "Context Memory": 86.3, + "Speed Perception": 38.9, + "Tone And Language Style": 84.4, + "Transparency": 72.2, + "Trustworthiness": 84.8, + "Understanding": 83.4, + "Usefulness": 84.7 + }, + "45-54": { + "Accuracy": 80.0, + "Adaptiveness": 78.7, + "Ethical Alignment": 69.5, + "Background And Culture": 70.0, + "Bias And Stereotypes": 81.7, + "Clarity": 70.6, + "Communication": 76.8, + "Comprehensiveness": 76.6, + "Confidence": 79.7, + "Consistency": 83.2, + "Cultural Awareness": 64.7, + "Detail And Technical Language": 79.5, + "Effectiveness": 75.8, + "Flexibility": 77.8, + "Helpfulness": 79.2, + "Honesty Empathy Fairness": 75.5, + "Intuitiveness": 72.5, + "Conversation Flow": 70.8, + "Overall": 76.54, + "Conversation Building": 76.0, + "Personality Consistency": 77.4, + "Personality": 67.3, + "Distinct Personality": 67.3, + "Repeat Usage": 57.5, + "Context Memory": 82.2, + "Speed Perception": 38.1, + "Tone And Language Style": 77.5, + "Transparency": 62.7, + "Trustworthiness": 80.9, + "Understanding": 82.9, + "Usefulness": 78.5 + }, + "55-64": { + "Accuracy": 87.0, + "Adaptiveness": 79.2, + "Ethical Alignment": 76.1, + "Background And Culture": 75.6, + "Bias And Stereotypes": 88.2, + "Clarity": 75.8, + "Communication": 78.8, + "Comprehensiveness": 81.4, + "Confidence": 82.3, + "Consistency": 85.5, + "Cultural Awareness": 72.6, + "Detail And Technical Language": 89.4, + "Effectiveness": 83.5, + "Flexibility": 88.1, + "Helpfulness": 83.7, + "Honesty Empathy Fairness": 82.7, + "Intuitiveness": 82.2, + "Conversation Flow": 80.5, + "Overall": 79.41, + "Conversation Building": 81.9, + "Personality Consistency": 80.2, + "Personality": 72.8, + "Distinct Personality": 75.3, + "Repeat Usage": 56.4, + "Context Memory": 85.7, + "Speed Perception": 40.2, + "Tone And Language Style": 82.0, + "Transparency": 68.7, + "Trustworthiness": 81.2, + "Understanding": 84.6, + "Usefulness": 84.5 + }, + "65+": { + "Accuracy": 91.1, + "Adaptiveness": 81.9, + "Ethical Alignment": 75.3, + "Background And Culture": 74.8, + "Bias And Stereotypes": 87.3, + "Clarity": 79.1, + "Communication": 82.9, + "Comprehensiveness": 85.1, + "Confidence": 86.8, + "Consistency": 90.9, + "Cultural Awareness": 74.4, + "Detail And Technical Language": 88.1, + "Effectiveness": 87.2, + "Flexibility": 90.8, + "Helpfulness": 83.2, + "Honesty Empathy Fairness": 84.7, + "Intuitiveness": 87.6, + "Conversation Flow": 77.8, + "Overall": 80.79, + "Conversation Building": 88.8, + "Personality Consistency": 83.5, + "Personality": 72.5, + "Distinct Personality": 74.2, + "Repeat Usage": 57.3, + "Context Memory": 88.0, + "Speed Perception": 40.7, + "Tone And Language Style": 86.0, + "Transparency": 73.3, + "Trustworthiness": 84.4, + "Understanding": 85.8, + "Usefulness": 88.7 + } + }, + "Sex": { + "Female": { + "Accuracy": 87.1, + "Adaptiveness": 79.9, + "Ethical Alignment": 74.0, + "Background And Culture": 73.7, + "Bias And Stereotypes": 86.6, + "Clarity": 77.1, + "Communication": 80.8, + "Comprehensiveness": 82.3, + "Confidence": 84.3, + "Consistency": 87.1, + "Cultural Awareness": 71.4, + "Detail And Technical Language": 88.0, + "Effectiveness": 83.6, + "Flexibility": 88.7, + "Helpfulness": 82.5, + "Honesty Empathy Fairness": 82.9, + "Intuitiveness": 84.6, + "Conversation Flow": 76.7, + "Overall": 79.49, + "Conversation Building": 84.2, + "Personality Consistency": 80.8, + "Personality": 72.1, + "Distinct Personality": 74.3, + "Repeat Usage": 56.6, + "Context Memory": 86.4, + "Speed Perception": 39.5, + "Tone And Language Style": 82.6, + "Transparency": 71.3, + "Trustworthiness": 82.9, + "Understanding": 84.5, + "Usefulness": 86.1 + }, + "Male": { + "Accuracy": 87.3, + "Adaptiveness": 79.8, + "Ethical Alignment": 74.9, + "Background And Culture": 73.4, + "Bias And Stereotypes": 86.2, + "Clarity": 75.5, + "Communication": 79.5, + "Comprehensiveness": 82.9, + "Confidence": 83.0, + "Consistency": 87.6, + "Cultural Awareness": 72.1, + "Detail And Technical Language": 85.0, + "Effectiveness": 84.4, + "Flexibility": 85.4, + "Helpfulness": 81.4, + "Honesty Empathy Fairness": 82.2, + "Intuitiveness": 83.5, + "Conversation Flow": 75.9, + "Overall": 78.9, + "Conversation Building": 83.4, + "Personality Consistency": 81.8, + "Personality": 69.8, + "Distinct Personality": 71.0, + "Repeat Usage": 57.4, + "Context Memory": 85.5, + "Speed Perception": 39.4, + "Tone And Language Style": 83.8, + "Transparency": 69.8, + "Trustworthiness": 83.7, + "Understanding": 84.7, + "Usefulness": 84.3 + } + }, + "Ethnicity": { + "African American": { + "Accuracy": 86.9, + "Adaptiveness": 80.2, + "Ethical Alignment": 81.4, + "Background And Culture": 75.9, + "Bias And Stereotypes": 88.0, + "Clarity": 77.7, + "Communication": 82.1, + "Comprehensiveness": 82.4, + "Confidence": 84.7, + "Consistency": 87.2, + "Cultural Awareness": 75.5, + "Detail And Technical Language": 89.0, + "Effectiveness": 85.5, + "Flexibility": 88.4, + "Helpfulness": 80.9, + "Honesty Empathy Fairness": 84.8, + "Intuitiveness": 84.9, + "Conversation Flow": 77.2, + "Overall": 80.39, + "Conversation Building": 83.3, + "Personality Consistency": 79.2, + "Personality": 74.2, + "Distinct Personality": 77.9, + "Repeat Usage": 55.0, + "Context Memory": 87.1, + "Speed Perception": 39.9, + "Tone And Language Style": 83.0, + "Transparency": 74.9, + "Trustworthiness": 84.7, + "Understanding": 84.7, + "Usefulness": 87.5 + }, + "Asian": { + "Accuracy": 87.7, + "Adaptiveness": 83.2, + "Ethical Alignment": 75.6, + "Background And Culture": 74.1, + "Bias And Stereotypes": 83.4, + "Clarity": 78.2, + "Communication": 81.7, + "Comprehensiveness": 83.7, + "Confidence": 88.1, + "Consistency": 89.1, + "Cultural Awareness": 73.3, + "Detail And Technical Language": 89.4, + "Effectiveness": 86.0, + "Flexibility": 89.3, + "Helpfulness": 84.0, + "Honesty Empathy Fairness": 87.2, + "Intuitiveness": 85.2, + "Conversation Flow": 81.7, + "Overall": 81.33, + "Conversation Building": 87.4, + "Personality Consistency": 82.8, + "Personality": 73.3, + "Distinct Personality": 78.5, + "Repeat Usage": 53.3, + "Context Memory": 88.8, + "Speed Perception": 39.7, + "Tone And Language Style": 89.0, + "Transparency": 74.7, + "Trustworthiness": 85.6, + "Understanding": 87.4, + "Usefulness": 90.2 + }, + "Hispanic": { + "Accuracy": 85.6, + "Adaptiveness": 81.2, + "Ethical Alignment": 67.0, + "Background And Culture": 73.2, + "Bias And Stereotypes": 83.7, + "Clarity": 68.6, + "Communication": 79.3, + "Comprehensiveness": 80.1, + "Confidence": 78.7, + "Consistency": 82.4, + "Cultural Awareness": 63.4, + "Detail And Technical Language": 83.8, + "Effectiveness": 80.2, + "Flexibility": 81.0, + "Helpfulness": 82.2, + "Honesty Empathy Fairness": 77.2, + "Intuitiveness": 78.4, + "Conversation Flow": 71.2, + "Overall": 78.64, + "Conversation Building": 78.8, + "Personality Consistency": 74.3, + "Personality": 68.3, + "Distinct Personality": 60.8, + "Repeat Usage": 54.7, + "Context Memory": 81.5, + "Speed Perception": 40.4, + "Tone And Language Style": 77.0, + "Transparency": 60.8, + "Trustworthiness": 81.5, + "Understanding": 84.8, + "Usefulness": 79.5 + }, + "White": { + "Accuracy": 87.4, + "Adaptiveness": 79.4, + "Ethical Alignment": 74.4, + "Background And Culture": 73.3, + "Bias And Stereotypes": 86.7, + "Clarity": 77.1, + "Communication": 79.9, + "Comprehensiveness": 82.9, + "Confidence": 84.0, + "Consistency": 87.9, + "Cultural Awareness": 72.3, + "Detail And Technical Language": 86.5, + "Effectiveness": 84.2, + "Flexibility": 87.7, + "Helpfulness": 82.0, + "Honesty Empathy Fairness": 82.7, + "Intuitiveness": 84.7, + "Conversation Flow": 76.6, + "Overall": 79.0, + "Conversation Building": 84.4, + "Personality Consistency": 82.5, + "Personality": 70.8, + "Distinct Personality": 73.3, + "Repeat Usage": 57.7, + "Context Memory": 86.3, + "Speed Perception": 39.2, + "Tone And Language Style": 83.7, + "Transparency": 71.1, + "Trustworthiness": 83.2, + "Understanding": 84.4, + "Usefulness": 85.5 + } + }, + "Politics": { + "Democrat": { + "Accuracy": 86.5, + "Adaptiveness": 80.2, + "Ethical Alignment": 73.5, + "Background And Culture": 73.5, + "Bias And Stereotypes": 86.2, + "Clarity": 76.6, + "Communication": 80.6, + "Comprehensiveness": 81.5, + "Confidence": 83.3, + "Consistency": 87.3, + "Cultural Awareness": 70.3, + "Detail And Technical Language": 87.2, + "Effectiveness": 84.3, + "Flexibility": 86.3, + "Helpfulness": 82.2, + "Honesty Empathy Fairness": 81.7, + "Intuitiveness": 84.7, + "Conversation Flow": 74.3, + "Overall": 79.47, + "Conversation Building": 84.1, + "Personality Consistency": 80.3, + "Personality": 71.3, + "Distinct Personality": 70.9, + "Repeat Usage": 55.7, + "Context Memory": 86.6, + "Speed Perception": 38.8, + "Tone And Language Style": 82.1, + "Transparency": 68.9, + "Trustworthiness": 83.8, + "Understanding": 84.7, + "Usefulness": 84.8 + }, + "Independent": { + "Accuracy": 87.7, + "Adaptiveness": 80.7, + "Ethical Alignment": 74.9, + "Background And Culture": 73.3, + "Bias And Stereotypes": 87.3, + "Clarity": 76.0, + "Communication": 79.2, + "Comprehensiveness": 84.3, + "Confidence": 85.1, + "Consistency": 88.8, + "Cultural Awareness": 71.9, + "Detail And Technical Language": 88.6, + "Effectiveness": 85.5, + "Flexibility": 89.7, + "Helpfulness": 81.8, + "Honesty Empathy Fairness": 83.9, + "Intuitiveness": 85.7, + "Conversation Flow": 78.5, + "Overall": 79.31, + "Conversation Building": 86.2, + "Personality Consistency": 81.6, + "Personality": 70.7, + "Distinct Personality": 70.6, + "Repeat Usage": 57.1, + "Context Memory": 88.2, + "Speed Perception": 39.4, + "Tone And Language Style": 84.6, + "Transparency": 68.6, + "Trustworthiness": 83.9, + "Understanding": 85.6, + "Usefulness": 86.7 + }, + "Republican": { + "Accuracy": 87.7, + "Adaptiveness": 79.0, + "Ethical Alignment": 75.2, + "Background And Culture": 73.8, + "Bias And Stereotypes": 86.1, + "Clarity": 76.2, + "Communication": 80.1, + "Comprehensiveness": 83.0, + "Confidence": 83.4, + "Consistency": 86.6, + "Cultural Awareness": 73.3, + "Detail And Technical Language": 84.8, + "Effectiveness": 82.9, + "Flexibility": 86.9, + "Helpfulness": 81.8, + "Honesty Empathy Fairness": 82.8, + "Intuitiveness": 82.5, + "Conversation Flow": 77.5, + "Overall": 78.84, + "Conversation Building": 82.2, + "Personality Consistency": 82.3, + "Personality": 70.9, + "Distinct Personality": 75.7, + "Repeat Usage": 58.3, + "Context Memory": 84.2, + "Speed Perception": 40.2, + "Tone And Language Style": 83.7, + "Transparency": 73.4, + "Trustworthiness": 82.3, + "Understanding": 84.0, + "Usefulness": 85.1 + } + }, + "Education": { + "College": { + "Accuracy": 79.1, + "Adaptiveness": 77.8, + "Ethical Alignment": 69.6, + "Background And Culture": 69.8, + "Bias And Stereotypes": 79.3, + "Clarity": 74.0, + "Communication": 76.8, + "Comprehensiveness": 79.4, + "Confidence": 79.0, + "Consistency": 82.5, + "Cultural Awareness": 67.2, + "Detail And Technical Language": 82.5, + "Effectiveness": 78.4, + "Flexibility": 80.3, + "Helpfulness": 79.1, + "Honesty Empathy Fairness": 75.8, + "Intuitiveness": 75.6, + "Conversation Flow": 74.1, + "Overall": 75.79, + "Conversation Building": 80.9, + "Personality Consistency": 77.4, + "Personality": 68.1, + "Distinct Personality": 66.9, + "Repeat Usage": 59.9, + "Context Memory": 80.7, + "Speed Perception": 40.5, + "Tone And Language Style": 78.9, + "Transparency": 65.2, + "Trustworthiness": 78.4, + "Understanding": 80.5, + "Usefulness": 80.8 + }, + "No College": { + "Accuracy": 91.3, + "Adaptiveness": 80.9, + "Ethical Alignment": 76.9, + "Background And Culture": 75.5, + "Bias And Stereotypes": 90.0, + "Clarity": 77.6, + "Communication": 81.9, + "Comprehensiveness": 84.2, + "Confidence": 86.0, + "Consistency": 89.8, + "Cultural Awareness": 74.1, + "Detail And Technical Language": 88.6, + "Effectiveness": 86.8, + "Flexibility": 90.7, + "Helpfulness": 83.5, + "Honesty Empathy Fairness": 86.0, + "Intuitiveness": 88.4, + "Conversation Flow": 77.4, + "Overall": 80.96, + "Conversation Building": 85.3, + "Personality Consistency": 83.3, + "Personality": 72.5, + "Distinct Personality": 75.7, + "Repeat Usage": 55.4, + "Context Memory": 88.7, + "Speed Perception": 38.9, + "Tone And Language Style": 85.3, + "Transparency": 73.3, + "Trustworthiness": 85.7, + "Understanding": 86.7, + "Usefulness": 87.5 + } + }, + "Urbanicity": { + "Rural": { + "Accuracy": 90.3, + "Adaptiveness": 81.5, + "Ethical Alignment": 73.7, + "Background And Culture": 74.1, + "Bias And Stereotypes": 88.5, + "Clarity": 77.3, + "Communication": 83.4, + "Comprehensiveness": 85.5, + "Confidence": 87.5, + "Consistency": 89.5, + "Cultural Awareness": 71.5, + "Detail And Technical Language": 87.3, + "Effectiveness": 86.3, + "Flexibility": 89.4, + "Helpfulness": 84.0, + "Honesty Empathy Fairness": 82.0, + "Intuitiveness": 85.0, + "Conversation Flow": 79.1, + "Overall": 80.91, + "Conversation Building": 85.3, + "Personality Consistency": 82.9, + "Personality": 71.2, + "Distinct Personality": 73.5, + "Repeat Usage": 57.6, + "Context Memory": 87.4, + "Speed Perception": 39.7, + "Tone And Language Style": 85.5, + "Transparency": 72.2, + "Trustworthiness": 85.3, + "Understanding": 86.9, + "Usefulness": 86.8 + }, + "Suburban": { + "Accuracy": 86.1, + "Adaptiveness": 78.6, + "Ethical Alignment": 74.4, + "Background And Culture": 72.5, + "Bias And Stereotypes": 85.7, + "Clarity": 75.0, + "Communication": 78.1, + "Comprehensiveness": 81.8, + "Confidence": 81.3, + "Consistency": 86.6, + "Cultural Awareness": 70.6, + "Detail And Technical Language": 85.2, + "Effectiveness": 82.0, + "Flexibility": 86.2, + "Helpfulness": 79.9, + "Honesty Empathy Fairness": 82.4, + "Intuitiveness": 82.9, + "Conversation Flow": 75.6, + "Overall": 78.04, + "Conversation Building": 82.4, + "Personality Consistency": 80.8, + "Personality": 71.2, + "Distinct Personality": 73.0, + "Repeat Usage": 57.5, + "Context Memory": 85.1, + "Speed Perception": 38.1, + "Tone And Language Style": 82.9, + "Transparency": 69.4, + "Trustworthiness": 83.0, + "Understanding": 83.0, + "Usefulness": 84.5 + }, + "Urban": { + "Accuracy": 85.4, + "Adaptiveness": 79.8, + "Ethical Alignment": 75.1, + "Background And Culture": 74.5, + "Bias And Stereotypes": 85.1, + "Clarity": 77.1, + "Communication": 79.7, + "Comprehensiveness": 80.7, + "Confidence": 83.1, + "Consistency": 86.2, + "Cultural Awareness": 73.5, + "Detail And Technical Language": 87.7, + "Effectiveness": 84.3, + "Flexibility": 86.3, + "Helpfulness": 82.9, + "Honesty Empathy Fairness": 83.2, + "Intuitiveness": 84.6, + "Conversation Flow": 74.6, + "Overall": 79.06, + "Conversation Building": 84.1, + "Personality Consistency": 80.4, + "Personality": 70.6, + "Distinct Personality": 71.6, + "Repeat Usage": 55.6, + "Context Memory": 85.8, + "Speed Perception": 41.1, + "Tone And Language Style": 81.2, + "Transparency": 70.5, + "Trustworthiness": 81.5, + "Understanding": 84.4, + "Usefulness": 84.7 + } + } + }, + "gemini-2.0-flash-001": { + "Age": { + "18-24": { + "Accuracy": 89.8, + "Adaptiveness": 87.8, + "Ethical Alignment": 73.0, + "Background And Culture": 75.9, + "Bias And Stereotypes": 83.1, + "Clarity": 86.6, + "Communication": 86.1, + "Comprehensiveness": 87.2, + "Confidence": 91.4, + "Consistency": 92.6, + "Cultural Awareness": 72.2, + "Detail And Technical Language": 90.6, + "Effectiveness": 90.7, + "Flexibility": 89.0, + "Helpfulness": 87.9, + "Honesty Empathy Fairness": 79.1, + "Intuitiveness": 88.3, + "Conversation Flow": 84.5, + "Overall": 82.69, + "Conversation Building": 87.8, + "Personality Consistency": 86.7, + "Personality": 74.2, + "Distinct Personality": 76.5, + "Repeat Usage": 85.8, + "Context Memory": 90.9, + "Speed Perception": 80.8, + "Tone And Language Style": 82.0, + "Transparency": 78.0, + "Trustworthiness": 81.2, + "Understanding": 85.7, + "Usefulness": 92.3 + }, + "25-34": { + "Accuracy": 89.3, + "Adaptiveness": 90.9, + "Ethical Alignment": 77.7, + "Background And Culture": 78.4, + "Bias And Stereotypes": 86.3, + "Clarity": 83.3, + "Communication": 88.8, + "Comprehensiveness": 84.9, + "Confidence": 90.8, + "Consistency": 92.1, + "Cultural Awareness": 75.8, + "Detail And Technical Language": 90.8, + "Effectiveness": 93.4, + "Flexibility": 89.5, + "Helpfulness": 90.5, + "Honesty Empathy Fairness": 80.8, + "Intuitiveness": 86.7, + "Conversation Flow": 82.0, + "Overall": 85.6, + "Conversation Building": 82.9, + "Personality Consistency": 87.3, + "Personality": 76.6, + "Distinct Personality": 74.8, + "Repeat Usage": 86.5, + "Context Memory": 93.3, + "Speed Perception": 81.0, + "Tone And Language Style": 84.5, + "Transparency": 78.5, + "Trustworthiness": 85.5, + "Understanding": 88.5, + "Usefulness": 91.4 + }, + "35-44": { + "Accuracy": 88.9, + "Adaptiveness": 87.1, + "Ethical Alignment": 76.9, + "Background And Culture": 76.0, + "Bias And Stereotypes": 88.3, + "Clarity": 82.7, + "Communication": 89.0, + "Comprehensiveness": 89.2, + "Confidence": 91.0, + "Consistency": 92.3, + "Cultural Awareness": 74.3, + "Detail And Technical Language": 88.5, + "Effectiveness": 91.0, + "Flexibility": 86.8, + "Helpfulness": 90.7, + "Honesty Empathy Fairness": 80.4, + "Intuitiveness": 84.0, + "Conversation Flow": 81.8, + "Overall": 84.34, + "Conversation Building": 84.4, + "Personality Consistency": 87.3, + "Personality": 74.1, + "Distinct Personality": 74.3, + "Repeat Usage": 85.3, + "Context Memory": 92.9, + "Speed Perception": 82.2, + "Tone And Language Style": 82.2, + "Transparency": 77.0, + "Trustworthiness": 85.8, + "Understanding": 87.7, + "Usefulness": 88.7 + }, + "45-54": { + "Accuracy": 89.5, + "Adaptiveness": 88.3, + "Ethical Alignment": 74.6, + "Background And Culture": 75.9, + "Bias And Stereotypes": 85.0, + "Clarity": 83.4, + "Communication": 88.5, + "Comprehensiveness": 87.2, + "Confidence": 90.2, + "Consistency": 90.5, + "Cultural Awareness": 72.2, + "Detail And Technical Language": 88.1, + "Effectiveness": 90.1, + "Flexibility": 86.5, + "Helpfulness": 89.8, + "Honesty Empathy Fairness": 79.6, + "Intuitiveness": 83.5, + "Conversation Flow": 83.7, + "Overall": 84.7, + "Conversation Building": 81.1, + "Personality Consistency": 88.5, + "Personality": 78.1, + "Distinct Personality": 77.9, + "Repeat Usage": 85.7, + "Context Memory": 91.5, + "Speed Perception": 81.2, + "Tone And Language Style": 83.7, + "Transparency": 74.7, + "Trustworthiness": 83.8, + "Understanding": 88.5, + "Usefulness": 90.3 + }, + "55-64": { + "Accuracy": 89.8, + "Adaptiveness": 89.5, + "Ethical Alignment": 78.1, + "Background And Culture": 80.3, + "Bias And Stereotypes": 89.0, + "Clarity": 85.5, + "Communication": 89.8, + "Comprehensiveness": 87.2, + "Confidence": 88.8, + "Consistency": 93.0, + "Cultural Awareness": 74.4, + "Detail And Technical Language": 90.9, + "Effectiveness": 90.4, + "Flexibility": 88.2, + "Helpfulness": 89.4, + "Honesty Empathy Fairness": 81.0, + "Intuitiveness": 87.7, + "Conversation Flow": 86.4, + "Overall": 85.99, + "Conversation Building": 84.1, + "Personality Consistency": 87.8, + "Personality": 79.0, + "Distinct Personality": 74.9, + "Repeat Usage": 87.0, + "Context Memory": 93.8, + "Speed Perception": 81.9, + "Tone And Language Style": 84.8, + "Transparency": 77.6, + "Trustworthiness": 84.8, + "Understanding": 89.1, + "Usefulness": 90.4 + }, + "65+": { + "Accuracy": 92.0, + "Adaptiveness": 90.3, + "Ethical Alignment": 79.7, + "Background And Culture": 83.7, + "Bias And Stereotypes": 88.3, + "Clarity": 88.4, + "Communication": 88.8, + "Comprehensiveness": 90.3, + "Confidence": 93.6, + "Consistency": 93.3, + "Cultural Awareness": 77.2, + "Detail And Technical Language": 93.6, + "Effectiveness": 94.3, + "Flexibility": 89.4, + "Helpfulness": 92.5, + "Honesty Empathy Fairness": 81.8, + "Intuitiveness": 89.8, + "Conversation Flow": 86.4, + "Overall": 87.14, + "Conversation Building": 87.9, + "Personality Consistency": 87.6, + "Personality": 76.4, + "Distinct Personality": 78.9, + "Repeat Usage": 86.8, + "Context Memory": 94.7, + "Speed Perception": 81.3, + "Tone And Language Style": 88.3, + "Transparency": 74.7, + "Trustworthiness": 87.7, + "Understanding": 90.6, + "Usefulness": 93.1 + } + }, + "Sex": { + "Female": { + "Accuracy": 90.6, + "Adaptiveness": 89.0, + "Ethical Alignment": 77.2, + "Background And Culture": 79.8, + "Bias And Stereotypes": 87.7, + "Clarity": 85.2, + "Communication": 88.2, + "Comprehensiveness": 88.2, + "Confidence": 91.9, + "Consistency": 92.3, + "Cultural Awareness": 74.3, + "Detail And Technical Language": 90.8, + "Effectiveness": 91.9, + "Flexibility": 88.1, + "Helpfulness": 90.4, + "Honesty Empathy Fairness": 79.2, + "Intuitiveness": 86.9, + "Conversation Flow": 83.9, + "Overall": 85.36, + "Conversation Building": 85.0, + "Personality Consistency": 87.6, + "Personality": 75.9, + "Distinct Personality": 76.7, + "Repeat Usage": 86.0, + "Context Memory": 93.6, + "Speed Perception": 81.3, + "Tone And Language Style": 84.1, + "Transparency": 76.5, + "Trustworthiness": 85.4, + "Understanding": 88.8, + "Usefulness": 91.3 + }, + "Male": { + "Accuracy": 90.2, + "Adaptiveness": 89.9, + "Ethical Alignment": 78.7, + "Background And Culture": 80.0, + "Bias And Stereotypes": 87.4, + "Clarity": 85.9, + "Communication": 89.7, + "Comprehensiveness": 88.6, + "Confidence": 91.1, + "Consistency": 92.7, + "Cultural Awareness": 76.4, + "Detail And Technical Language": 91.5, + "Effectiveness": 92.9, + "Flexibility": 88.7, + "Helpfulness": 91.5, + "Honesty Empathy Fairness": 82.9, + "Intuitiveness": 87.5, + "Conversation Flow": 85.5, + "Overall": 86.41, + "Conversation Building": 85.1, + "Personality Consistency": 87.8, + "Personality": 77.6, + "Distinct Personality": 76.9, + "Repeat Usage": 86.8, + "Context Memory": 93.6, + "Speed Perception": 81.6, + "Tone And Language Style": 86.9, + "Transparency": 75.7, + "Trustworthiness": 86.5, + "Understanding": 89.7, + "Usefulness": 91.3 + } + }, + "Ethnicity": { + "African American": { + "Accuracy": 90.9, + "Adaptiveness": 89.6, + "Ethical Alignment": 83.0, + "Background And Culture": 83.7, + "Bias And Stereotypes": 89.2, + "Clarity": 89.7, + "Communication": 88.7, + "Comprehensiveness": 89.2, + "Confidence": 89.7, + "Consistency": 93.8, + "Cultural Awareness": 80.5, + "Detail And Technical Language": 91.6, + "Effectiveness": 92.1, + "Flexibility": 91.0, + "Helpfulness": 89.8, + "Honesty Empathy Fairness": 83.7, + "Intuitiveness": 88.1, + "Conversation Flow": 88.3, + "Overall": 86.59, + "Conversation Building": 87.9, + "Personality Consistency": 89.1, + "Personality": 78.2, + "Distinct Personality": 82.3, + "Repeat Usage": 87.5, + "Context Memory": 93.2, + "Speed Perception": 82.2, + "Tone And Language Style": 85.7, + "Transparency": 82.9, + "Trustworthiness": 87.1, + "Understanding": 89.0, + "Usefulness": 93.0 + }, + "Asian": { + "Accuracy": 90.1, + "Adaptiveness": 87.0, + "Ethical Alignment": 73.6, + "Background And Culture": 75.2, + "Bias And Stereotypes": 87.3, + "Clarity": 84.5, + "Communication": 87.9, + "Comprehensiveness": 88.6, + "Confidence": 92.4, + "Consistency": 91.8, + "Cultural Awareness": 70.6, + "Detail And Technical Language": 90.4, + "Effectiveness": 90.9, + "Flexibility": 86.7, + "Helpfulness": 88.5, + "Honesty Empathy Fairness": 82.4, + "Intuitiveness": 89.3, + "Conversation Flow": 86.0, + "Overall": 84.16, + "Conversation Building": 86.3, + "Personality Consistency": 87.5, + "Personality": 78.2, + "Distinct Personality": 77.3, + "Repeat Usage": 85.5, + "Context Memory": 93.1, + "Speed Perception": 80.3, + "Tone And Language Style": 84.2, + "Transparency": 78.8, + "Trustworthiness": 84.4, + "Understanding": 87.9, + "Usefulness": 91.3 + }, + "Hispanic": { + "Accuracy": 89.0, + "Adaptiveness": 90.3, + "Ethical Alignment": 74.3, + "Background And Culture": 81.8, + "Bias And Stereotypes": 84.5, + "Clarity": 84.2, + "Communication": 87.5, + "Comprehensiveness": 89.1, + "Confidence": 90.5, + "Consistency": 94.7, + "Cultural Awareness": 72.5, + "Detail And Technical Language": 90.6, + "Effectiveness": 93.0, + "Flexibility": 86.9, + "Helpfulness": 92.6, + "Honesty Empathy Fairness": 72.7, + "Intuitiveness": 87.3, + "Conversation Flow": 80.4, + "Overall": 85.83, + "Conversation Building": 85.7, + "Personality Consistency": 84.2, + "Personality": 73.4, + "Distinct Personality": 79.4, + "Repeat Usage": 86.4, + "Context Memory": 93.7, + "Speed Perception": 81.6, + "Tone And Language Style": 84.6, + "Transparency": 75.4, + "Trustworthiness": 85.9, + "Understanding": 89.3, + "Usefulness": 92.2 + }, + "White": { + "Accuracy": 90.5, + "Adaptiveness": 89.4, + "Ethical Alignment": 77.9, + "Background And Culture": 79.4, + "Bias And Stereotypes": 87.8, + "Clarity": 85.2, + "Communication": 89.2, + "Comprehensiveness": 88.1, + "Confidence": 91.8, + "Consistency": 92.1, + "Cultural Awareness": 75.2, + "Detail And Technical Language": 91.2, + "Effectiveness": 92.4, + "Flexibility": 88.3, + "Helpfulness": 91.0, + "Honesty Empathy Fairness": 81.7, + "Intuitiveness": 86.9, + "Conversation Flow": 84.7, + "Overall": 85.87, + "Conversation Building": 84.5, + "Personality Consistency": 88.0, + "Personality": 76.9, + "Distinct Personality": 75.6, + "Repeat Usage": 86.3, + "Context Memory": 93.6, + "Speed Perception": 81.4, + "Tone And Language Style": 85.6, + "Transparency": 75.1, + "Trustworthiness": 85.9, + "Understanding": 89.3, + "Usefulness": 91.0 + } + }, + "Politics": { + "Democrat": { + "Accuracy": 90.7, + "Adaptiveness": 88.8, + "Ethical Alignment": 77.0, + "Background And Culture": 79.6, + "Bias And Stereotypes": 88.4, + "Clarity": 85.8, + "Communication": 87.5, + "Comprehensiveness": 87.9, + "Confidence": 90.6, + "Consistency": 92.2, + "Cultural Awareness": 74.0, + "Detail And Technical Language": 90.6, + "Effectiveness": 91.5, + "Flexibility": 88.2, + "Helpfulness": 91.0, + "Honesty Empathy Fairness": 80.2, + "Intuitiveness": 85.8, + "Conversation Flow": 83.0, + "Overall": 85.26, + "Conversation Building": 85.5, + "Personality Consistency": 87.3, + "Personality": 74.9, + "Distinct Personality": 75.0, + "Repeat Usage": 85.9, + "Context Memory": 94.0, + "Speed Perception": 81.4, + "Tone And Language Style": 83.0, + "Transparency": 75.8, + "Trustworthiness": 86.0, + "Understanding": 89.0, + "Usefulness": 91.7 + }, + "Independent": { + "Accuracy": 91.3, + "Adaptiveness": 91.8, + "Ethical Alignment": 76.4, + "Background And Culture": 80.8, + "Bias And Stereotypes": 88.1, + "Clarity": 84.2, + "Communication": 91.4, + "Comprehensiveness": 90.8, + "Confidence": 92.5, + "Consistency": 94.1, + "Cultural Awareness": 73.6, + "Detail And Technical Language": 92.7, + "Effectiveness": 93.1, + "Flexibility": 90.2, + "Helpfulness": 91.5, + "Honesty Empathy Fairness": 81.9, + "Intuitiveness": 88.8, + "Conversation Flow": 87.9, + "Overall": 87.21, + "Conversation Building": 86.3, + "Personality Consistency": 88.4, + "Personality": 77.3, + "Distinct Personality": 74.8, + "Repeat Usage": 86.4, + "Context Memory": 93.4, + "Speed Perception": 81.4, + "Tone And Language Style": 88.9, + "Transparency": 75.0, + "Trustworthiness": 87.2, + "Understanding": 90.5, + "Usefulness": 92.3 + }, + "Republican": { + "Accuracy": 89.6, + "Adaptiveness": 89.0, + "Ethical Alignment": 79.6, + "Background And Culture": 79.9, + "Bias And Stereotypes": 86.4, + "Clarity": 85.8, + "Communication": 89.4, + "Comprehensiveness": 87.8, + "Confidence": 92.0, + "Consistency": 92.2, + "Cultural Awareness": 77.6, + "Detail And Technical Language": 91.0, + "Effectiveness": 92.9, + "Flexibility": 87.7, + "Helpfulness": 90.7, + "Honesty Empathy Fairness": 81.4, + "Intuitiveness": 87.9, + "Conversation Flow": 85.1, + "Overall": 85.94, + "Conversation Building": 83.9, + "Personality Consistency": 87.8, + "Personality": 78.5, + "Distinct Personality": 79.8, + "Repeat Usage": 86.9, + "Context Memory": 93.2, + "Speed Perception": 81.6, + "Tone And Language Style": 86.6, + "Transparency": 76.9, + "Trustworthiness": 85.3, + "Understanding": 88.8, + "Usefulness": 90.4 + } + }, + "Education": { + "College": { + "Accuracy": 88.9, + "Adaptiveness": 85.2, + "Ethical Alignment": 77.7, + "Background And Culture": 76.2, + "Bias And Stereotypes": 84.7, + "Clarity": 82.1, + "Communication": 85.2, + "Comprehensiveness": 86.8, + "Confidence": 87.5, + "Consistency": 89.3, + "Cultural Awareness": 74.4, + "Detail And Technical Language": 87.5, + "Effectiveness": 89.0, + "Flexibility": 84.0, + "Helpfulness": 88.7, + "Honesty Empathy Fairness": 78.5, + "Intuitiveness": 83.9, + "Conversation Flow": 81.5, + "Overall": 82.73, + "Conversation Building": 83.0, + "Personality Consistency": 84.9, + "Personality": 76.6, + "Distinct Personality": 75.9, + "Repeat Usage": 84.7, + "Context Memory": 90.8, + "Speed Perception": 81.1, + "Tone And Language Style": 82.2, + "Transparency": 74.3, + "Trustworthiness": 80.3, + "Understanding": 86.9, + "Usefulness": 87.9 + }, + "No College": { + "Accuracy": 91.2, + "Adaptiveness": 91.5, + "Ethical Alignment": 78.0, + "Background And Culture": 81.8, + "Bias And Stereotypes": 89.0, + "Clarity": 87.3, + "Communication": 90.8, + "Comprehensiveness": 89.2, + "Confidence": 93.6, + "Consistency": 94.2, + "Cultural Awareness": 75.8, + "Detail And Technical Language": 93.0, + "Effectiveness": 94.1, + "Flexibility": 90.6, + "Helpfulness": 92.1, + "Honesty Empathy Fairness": 82.2, + "Intuitiveness": 88.8, + "Conversation Flow": 86.3, + "Overall": 87.46, + "Conversation Building": 86.1, + "Personality Consistency": 89.1, + "Personality": 76.8, + "Distinct Personality": 77.3, + "Repeat Usage": 87.2, + "Context Memory": 95.0, + "Speed Perception": 81.7, + "Tone And Language Style": 87.1, + "Transparency": 77.0, + "Trustworthiness": 88.8, + "Understanding": 90.4, + "Usefulness": 93.0 + } + }, + "Urbanicity": { + "Rural": { + "Accuracy": 92.1, + "Adaptiveness": 92.0, + "Ethical Alignment": 76.2, + "Background And Culture": 78.9, + "Bias And Stereotypes": 88.9, + "Clarity": 86.5, + "Communication": 90.8, + "Comprehensiveness": 91.9, + "Confidence": 92.7, + "Consistency": 94.2, + "Cultural Awareness": 74.1, + "Detail And Technical Language": 93.1, + "Effectiveness": 94.5, + "Flexibility": 90.7, + "Helpfulness": 92.3, + "Honesty Empathy Fairness": 81.0, + "Intuitiveness": 87.4, + "Conversation Flow": 84.1, + "Overall": 87.17, + "Conversation Building": 85.4, + "Personality Consistency": 88.5, + "Personality": 76.0, + "Distinct Personality": 74.3, + "Repeat Usage": 87.4, + "Context Memory": 94.4, + "Speed Perception": 81.9, + "Tone And Language Style": 87.1, + "Transparency": 77.0, + "Trustworthiness": 88.3, + "Understanding": 91.9, + "Usefulness": 93.5 + }, + "Suburban": { + "Accuracy": 90.6, + "Adaptiveness": 89.5, + "Ethical Alignment": 78.7, + "Background And Culture": 81.3, + "Bias And Stereotypes": 88.5, + "Clarity": 84.9, + "Communication": 89.5, + "Comprehensiveness": 86.9, + "Confidence": 92.4, + "Consistency": 92.2, + "Cultural Awareness": 77.4, + "Detail And Technical Language": 91.1, + "Effectiveness": 92.7, + "Flexibility": 88.8, + "Helpfulness": 91.5, + "Honesty Empathy Fairness": 83.0, + "Intuitiveness": 88.3, + "Conversation Flow": 86.2, + "Overall": 86.43, + "Conversation Building": 86.6, + "Personality Consistency": 89.0, + "Personality": 78.8, + "Distinct Personality": 79.4, + "Repeat Usage": 86.0, + "Context Memory": 93.9, + "Speed Perception": 81.5, + "Tone And Language Style": 86.1, + "Transparency": 74.8, + "Trustworthiness": 85.8, + "Understanding": 88.6, + "Usefulness": 90.9 + }, + "Urban": { + "Accuracy": 88.4, + "Adaptiveness": 86.7, + "Ethical Alignment": 78.4, + "Background And Culture": 79.0, + "Bias And Stereotypes": 85.1, + "Clarity": 85.4, + "Communication": 86.2, + "Comprehensiveness": 86.9, + "Confidence": 89.1, + "Consistency": 91.3, + "Cultural Awareness": 73.7, + "Detail And Technical Language": 89.2, + "Effectiveness": 89.7, + "Flexibility": 85.5, + "Helpfulness": 88.9, + "Honesty Empathy Fairness": 78.2, + "Intuitiveness": 85.4, + "Conversation Flow": 83.1, + "Overall": 83.81, + "Conversation Building": 82.7, + "Personality Consistency": 85.1, + "Personality": 74.6, + "Distinct Personality": 75.8, + "Repeat Usage": 85.9, + "Context Memory": 92.3, + "Speed Perception": 81.0, + "Tone And Language Style": 82.9, + "Transparency": 76.9, + "Trustworthiness": 83.9, + "Understanding": 87.4, + "Usefulness": 89.7 + } + } + }, + "gpt-4o": { + "Age": { + "18-24": { + "Accuracy": 90.0, + "Adaptiveness": 87.9, + "Ethical Alignment": 71.5, + "Background And Culture": 74.1, + "Bias And Stereotypes": 83.0, + "Clarity": 82.7, + "Communication": 88.4, + "Comprehensiveness": 90.2, + "Confidence": 86.8, + "Consistency": 89.7, + "Cultural Awareness": 72.5, + "Detail And Technical Language": 90.4, + "Effectiveness": 90.6, + "Flexibility": 93.6, + "Helpfulness": 90.3, + "Honesty Empathy Fairness": 83.8, + "Intuitiveness": 86.6, + "Conversation Flow": 83.4, + "Overall": 85.2, + "Conversation Building": 90.6, + "Personality Consistency": 87.4, + "Personality": 79.3, + "Distinct Personality": 79.7, + "Repeat Usage": 88.5, + "Context Memory": 95.5, + "Speed Perception": 80.6, + "Tone And Language Style": 84.3, + "Transparency": 69.0, + "Trustworthiness": 87.3, + "Understanding": 89.1, + "Usefulness": 89.5 + }, + "25-34": { + "Accuracy": 92.3, + "Adaptiveness": 90.9, + "Ethical Alignment": 80.3, + "Background And Culture": 77.4, + "Bias And Stereotypes": 87.2, + "Clarity": 87.9, + "Communication": 89.1, + "Comprehensiveness": 91.3, + "Confidence": 90.0, + "Consistency": 93.1, + "Cultural Awareness": 77.0, + "Detail And Technical Language": 92.9, + "Effectiveness": 93.2, + "Flexibility": 94.2, + "Helpfulness": 91.1, + "Honesty Empathy Fairness": 87.2, + "Intuitiveness": 88.1, + "Conversation Flow": 87.5, + "Overall": 87.27, + "Conversation Building": 94.9, + "Personality Consistency": 87.8, + "Personality": 80.0, + "Distinct Personality": 82.4, + "Repeat Usage": 89.2, + "Context Memory": 96.3, + "Speed Perception": 80.2, + "Tone And Language Style": 84.6, + "Transparency": 72.9, + "Trustworthiness": 88.2, + "Understanding": 94.2, + "Usefulness": 93.0 + }, + "35-44": { + "Accuracy": 90.7, + "Adaptiveness": 90.4, + "Ethical Alignment": 80.1, + "Background And Culture": 77.3, + "Bias And Stereotypes": 90.7, + "Clarity": 84.3, + "Communication": 90.0, + "Comprehensiveness": 92.7, + "Confidence": 90.2, + "Consistency": 90.9, + "Cultural Awareness": 75.9, + "Detail And Technical Language": 90.4, + "Effectiveness": 91.7, + "Flexibility": 93.1, + "Helpfulness": 90.2, + "Honesty Empathy Fairness": 85.0, + "Intuitiveness": 88.0, + "Conversation Flow": 86.3, + "Overall": 86.91, + "Conversation Building": 92.6, + "Personality Consistency": 87.1, + "Personality": 76.7, + "Distinct Personality": 77.9, + "Repeat Usage": 87.1, + "Context Memory": 93.9, + "Speed Perception": 81.6, + "Tone And Language Style": 85.2, + "Transparency": 74.3, + "Trustworthiness": 90.1, + "Understanding": 93.7, + "Usefulness": 92.9 + }, + "45-54": { + "Accuracy": 90.5, + "Adaptiveness": 90.0, + "Ethical Alignment": 75.8, + "Background And Culture": 76.1, + "Bias And Stereotypes": 84.7, + "Clarity": 81.7, + "Communication": 87.3, + "Comprehensiveness": 87.3, + "Confidence": 86.2, + "Consistency": 88.4, + "Cultural Awareness": 76.1, + "Detail And Technical Language": 89.6, + "Effectiveness": 89.6, + "Flexibility": 88.7, + "Helpfulness": 88.0, + "Honesty Empathy Fairness": 84.3, + "Intuitiveness": 85.0, + "Conversation Flow": 85.2, + "Overall": 85.4, + "Conversation Building": 89.5, + "Personality Consistency": 88.0, + "Personality": 78.7, + "Distinct Personality": 80.2, + "Repeat Usage": 88.7, + "Context Memory": 93.2, + "Speed Perception": 80.4, + "Tone And Language Style": 79.4, + "Transparency": 71.6, + "Trustworthiness": 88.1, + "Understanding": 89.6, + "Usefulness": 89.3 + }, + "55-64": { + "Accuracy": 92.2, + "Adaptiveness": 91.7, + "Ethical Alignment": 79.9, + "Background And Culture": 80.5, + "Bias And Stereotypes": 90.1, + "Clarity": 87.3, + "Communication": 91.5, + "Comprehensiveness": 92.9, + "Confidence": 90.1, + "Consistency": 93.7, + "Cultural Awareness": 77.3, + "Detail And Technical Language": 92.2, + "Effectiveness": 94.0, + "Flexibility": 93.5, + "Helpfulness": 93.1, + "Honesty Empathy Fairness": 88.7, + "Intuitiveness": 87.7, + "Conversation Flow": 89.1, + "Overall": 89.03, + "Conversation Building": 93.2, + "Personality Consistency": 89.0, + "Personality": 81.9, + "Distinct Personality": 82.1, + "Repeat Usage": 89.1, + "Context Memory": 95.6, + "Speed Perception": 81.9, + "Tone And Language Style": 87.8, + "Transparency": 73.5, + "Trustworthiness": 91.6, + "Understanding": 92.9, + "Usefulness": 94.3 + }, + "65+": { + "Accuracy": 92.3, + "Adaptiveness": 90.9, + "Ethical Alignment": 83.0, + "Background And Culture": 80.4, + "Bias And Stereotypes": 89.0, + "Clarity": 89.0, + "Communication": 91.9, + "Comprehensiveness": 92.0, + "Confidence": 89.8, + "Consistency": 93.0, + "Cultural Awareness": 78.0, + "Detail And Technical Language": 91.2, + "Effectiveness": 93.6, + "Flexibility": 93.1, + "Helpfulness": 92.0, + "Honesty Empathy Fairness": 88.5, + "Intuitiveness": 88.8, + "Conversation Flow": 91.1, + "Overall": 88.9, + "Conversation Building": 94.1, + "Personality Consistency": 87.9, + "Personality": 82.4, + "Distinct Personality": 83.0, + "Repeat Usage": 88.2, + "Context Memory": 95.8, + "Speed Perception": 82.0, + "Tone And Language Style": 84.8, + "Transparency": 74.1, + "Trustworthiness": 91.3, + "Understanding": 93.4, + "Usefulness": 93.9 + } + }, + "Sex": { + "Female": { + "Accuracy": 92.7, + "Adaptiveness": 90.9, + "Ethical Alignment": 79.7, + "Background And Culture": 78.5, + "Bias And Stereotypes": 89.0, + "Clarity": 86.7, + "Communication": 90.5, + "Comprehensiveness": 92.1, + "Confidence": 89.9, + "Consistency": 92.4, + "Cultural Awareness": 76.5, + "Detail And Technical Language": 90.8, + "Effectiveness": 93.4, + "Flexibility": 93.0, + "Helpfulness": 91.1, + "Honesty Empathy Fairness": 87.1, + "Intuitiveness": 88.5, + "Conversation Flow": 88.6, + "Overall": 87.99, + "Conversation Building": 92.6, + "Personality Consistency": 87.9, + "Personality": 80.9, + "Distinct Personality": 81.8, + "Repeat Usage": 88.2, + "Context Memory": 95.3, + "Speed Perception": 81.3, + "Tone And Language Style": 84.6, + "Transparency": 74.7, + "Trustworthiness": 90.5, + "Understanding": 93.5, + "Usefulness": 92.8 + }, + "Male": { + "Accuracy": 90.7, + "Adaptiveness": 90.7, + "Ethical Alignment": 81.2, + "Background And Culture": 79.1, + "Bias And Stereotypes": 88.0, + "Clarity": 86.7, + "Communication": 90.4, + "Comprehensiveness": 90.7, + "Confidence": 88.8, + "Consistency": 91.8, + "Cultural Awareness": 77.7, + "Detail And Technical Language": 91.6, + "Effectiveness": 91.9, + "Flexibility": 92.3, + "Helpfulness": 91.2, + "Honesty Empathy Fairness": 87.1, + "Intuitiveness": 87.0, + "Conversation Flow": 88.5, + "Overall": 87.63, + "Conversation Building": 93.6, + "Personality Consistency": 88.0, + "Personality": 80.0, + "Distinct Personality": 81.2, + "Repeat Usage": 88.6, + "Context Memory": 94.9, + "Speed Perception": 81.6, + "Tone And Language Style": 84.4, + "Transparency": 71.9, + "Trustworthiness": 89.9, + "Understanding": 92.1, + "Usefulness": 93.1 + } + }, + "Ethnicity": { + "African American": { + "Accuracy": 91.7, + "Adaptiveness": 90.6, + "Ethical Alignment": 84.2, + "Background And Culture": 79.9, + "Bias And Stereotypes": 88.3, + "Clarity": 85.4, + "Communication": 89.5, + "Comprehensiveness": 89.5, + "Confidence": 90.9, + "Consistency": 92.6, + "Cultural Awareness": 80.2, + "Detail And Technical Language": 89.9, + "Effectiveness": 92.7, + "Flexibility": 93.9, + "Helpfulness": 90.7, + "Honesty Empathy Fairness": 88.2, + "Intuitiveness": 88.2, + "Conversation Flow": 88.7, + "Overall": 88.11, + "Conversation Building": 93.7, + "Personality Consistency": 87.7, + "Personality": 83.0, + "Distinct Personality": 85.0, + "Repeat Usage": 88.7, + "Context Memory": 95.9, + "Speed Perception": 83.1, + "Tone And Language Style": 84.5, + "Transparency": 79.8, + "Trustworthiness": 90.8, + "Understanding": 92.3, + "Usefulness": 93.8 + }, + "Asian": { + "Accuracy": 91.8, + "Adaptiveness": 89.2, + "Ethical Alignment": 74.9, + "Background And Culture": 77.7, + "Bias And Stereotypes": 87.3, + "Clarity": 84.6, + "Communication": 90.8, + "Comprehensiveness": 89.4, + "Confidence": 88.1, + "Consistency": 91.4, + "Cultural Awareness": 74.8, + "Detail And Technical Language": 89.6, + "Effectiveness": 92.2, + "Flexibility": 90.7, + "Helpfulness": 89.2, + "Honesty Empathy Fairness": 88.3, + "Intuitiveness": 85.1, + "Conversation Flow": 89.1, + "Overall": 86.57, + "Conversation Building": 90.7, + "Personality Consistency": 86.6, + "Personality": 79.1, + "Distinct Personality": 82.0, + "Repeat Usage": 86.9, + "Context Memory": 94.1, + "Speed Perception": 80.2, + "Tone And Language Style": 82.7, + "Transparency": 70.9, + "Trustworthiness": 89.6, + "Understanding": 90.4, + "Usefulness": 92.9 + }, + "Hispanic": { + "Accuracy": 91.5, + "Adaptiveness": 90.5, + "Ethical Alignment": 76.4, + "Background And Culture": 79.7, + "Bias And Stereotypes": 87.8, + "Clarity": 86.0, + "Communication": 90.0, + "Comprehensiveness": 89.7, + "Confidence": 87.8, + "Consistency": 92.2, + "Cultural Awareness": 73.5, + "Detail And Technical Language": 94.4, + "Effectiveness": 92.0, + "Flexibility": 92.5, + "Helpfulness": 90.4, + "Honesty Empathy Fairness": 82.8, + "Intuitiveness": 85.7, + "Conversation Flow": 85.6, + "Overall": 87.34, + "Conversation Building": 93.8, + "Personality Consistency": 88.7, + "Personality": 81.9, + "Distinct Personality": 82.4, + "Repeat Usage": 89.1, + "Context Memory": 96.1, + "Speed Perception": 80.9, + "Tone And Language Style": 84.3, + "Transparency": 68.3, + "Trustworthiness": 87.1, + "Understanding": 91.8, + "Usefulness": 91.5 + }, + "White": { + "Accuracy": 91.8, + "Adaptiveness": 90.9, + "Ethical Alignment": 80.8, + "Background And Culture": 78.5, + "Bias And Stereotypes": 88.7, + "Clarity": 87.1, + "Communication": 90.6, + "Comprehensiveness": 92.0, + "Confidence": 89.4, + "Consistency": 92.1, + "Cultural Awareness": 77.3, + "Detail And Technical Language": 91.0, + "Effectiveness": 92.8, + "Flexibility": 92.6, + "Helpfulness": 91.4, + "Honesty Empathy Fairness": 87.5, + "Intuitiveness": 88.2, + "Conversation Flow": 88.9, + "Overall": 87.89, + "Conversation Building": 93.0, + "Personality Consistency": 87.9, + "Personality": 80.0, + "Distinct Personality": 80.9, + "Repeat Usage": 88.3, + "Context Memory": 95.0, + "Speed Perception": 81.3, + "Tone And Language Style": 84.6, + "Transparency": 73.3, + "Trustworthiness": 90.6, + "Understanding": 93.2, + "Usefulness": 93.0 + } + }, + "Politics": { + "Democrat": { + "Accuracy": 92.8, + "Adaptiveness": 90.7, + "Ethical Alignment": 79.7, + "Background And Culture": 78.1, + "Bias And Stereotypes": 88.6, + "Clarity": 85.3, + "Communication": 90.3, + "Comprehensiveness": 91.9, + "Confidence": 89.9, + "Consistency": 92.0, + "Cultural Awareness": 76.9, + "Detail And Technical Language": 92.0, + "Effectiveness": 93.0, + "Flexibility": 93.2, + "Helpfulness": 91.1, + "Honesty Empathy Fairness": 86.3, + "Intuitiveness": 88.1, + "Conversation Flow": 88.4, + "Overall": 87.66, + "Conversation Building": 93.3, + "Personality Consistency": 87.6, + "Personality": 80.1, + "Distinct Personality": 80.9, + "Repeat Usage": 88.3, + "Context Memory": 95.1, + "Speed Perception": 81.1, + "Tone And Language Style": 85.4, + "Transparency": 72.9, + "Trustworthiness": 90.7, + "Understanding": 92.6, + "Usefulness": 93.6 + }, + "Independent": { + "Accuracy": 92.5, + "Adaptiveness": 91.7, + "Ethical Alignment": 79.2, + "Background And Culture": 79.1, + "Bias And Stereotypes": 90.4, + "Clarity": 88.0, + "Communication": 91.8, + "Comprehensiveness": 92.9, + "Confidence": 90.6, + "Consistency": 94.1, + "Cultural Awareness": 75.7, + "Detail And Technical Language": 91.3, + "Effectiveness": 93.7, + "Flexibility": 93.9, + "Helpfulness": 92.9, + "Honesty Empathy Fairness": 89.3, + "Intuitiveness": 89.3, + "Conversation Flow": 89.2, + "Overall": 88.63, + "Conversation Building": 95.5, + "Personality Consistency": 87.7, + "Personality": 79.0, + "Distinct Personality": 77.6, + "Repeat Usage": 88.5, + "Context Memory": 96.7, + "Speed Perception": 82.9, + "Tone And Language Style": 86.5, + "Transparency": 70.8, + "Trustworthiness": 91.3, + "Understanding": 94.6, + "Usefulness": 94.4 + }, + "Republican": { + "Accuracy": 90.2, + "Adaptiveness": 90.4, + "Ethical Alignment": 81.8, + "Background And Culture": 79.4, + "Bias And Stereotypes": 87.5, + "Clarity": 87.6, + "Communication": 90.0, + "Comprehensiveness": 90.2, + "Confidence": 88.1, + "Consistency": 91.2, + "Cultural Awareness": 78.0, + "Detail And Technical Language": 90.3, + "Effectiveness": 91.9, + "Flexibility": 91.5, + "Helpfulness": 90.5, + "Honesty Empathy Fairness": 86.9, + "Intuitiveness": 86.8, + "Conversation Flow": 88.3, + "Overall": 87.61, + "Conversation Building": 91.7, + "Personality Consistency": 88.5, + "Personality": 81.6, + "Distinct Personality": 84.1, + "Repeat Usage": 88.4, + "Context Memory": 94.4, + "Speed Perception": 81.0, + "Tone And Language Style": 82.6, + "Transparency": 75.2, + "Trustworthiness": 89.1, + "Understanding": 92.3, + "Usefulness": 91.4 + } + }, + "Education": { + "College": { + "Accuracy": 89.9, + "Adaptiveness": 88.4, + "Ethical Alignment": 78.6, + "Background And Culture": 77.4, + "Bias And Stereotypes": 85.1, + "Clarity": 82.4, + "Communication": 88.3, + "Comprehensiveness": 89.2, + "Confidence": 87.0, + "Consistency": 89.0, + "Cultural Awareness": 75.7, + "Detail And Technical Language": 89.0, + "Effectiveness": 90.9, + "Flexibility": 89.0, + "Helpfulness": 88.6, + "Honesty Empathy Fairness": 83.5, + "Intuitiveness": 85.0, + "Conversation Flow": 86.1, + "Overall": 85.26, + "Conversation Building": 88.0, + "Personality Consistency": 85.8, + "Personality": 76.0, + "Distinct Personality": 78.2, + "Repeat Usage": 86.4, + "Context Memory": 90.4, + "Speed Perception": 79.8, + "Tone And Language Style": 83.9, + "Transparency": 74.1, + "Trustworthiness": 88.0, + "Understanding": 90.1, + "Usefulness": 89.7 + }, + "No College": { + "Accuracy": 92.7, + "Adaptiveness": 92.0, + "Ethical Alignment": 81.4, + "Background And Culture": 79.5, + "Bias And Stereotypes": 90.2, + "Clarity": 88.9, + "Communication": 91.5, + "Comprehensiveness": 92.6, + "Confidence": 90.5, + "Consistency": 93.7, + "Cultural Awareness": 77.8, + "Detail And Technical Language": 92.4, + "Effectiveness": 93.6, + "Flexibility": 94.6, + "Helpfulness": 92.5, + "Honesty Empathy Fairness": 88.9, + "Intuitiveness": 89.2, + "Conversation Flow": 89.8, + "Overall": 89.13, + "Conversation Building": 95.7, + "Personality Consistency": 89.0, + "Personality": 82.8, + "Distinct Personality": 83.2, + "Repeat Usage": 89.4, + "Context Memory": 97.6, + "Speed Perception": 82.2, + "Tone And Language Style": 84.8, + "Transparency": 73.0, + "Trustworthiness": 91.3, + "Understanding": 94.3, + "Usefulness": 94.6 + } + }, + "Urbanicity": { + "Rural": { + "Accuracy": 93.5, + "Adaptiveness": 92.3, + "Ethical Alignment": 80.8, + "Background And Culture": 79.1, + "Bias And Stereotypes": 91.2, + "Clarity": 89.7, + "Communication": 92.4, + "Comprehensiveness": 93.2, + "Confidence": 90.7, + "Consistency": 93.2, + "Cultural Awareness": 76.0, + "Detail And Technical Language": 92.8, + "Effectiveness": 95.2, + "Flexibility": 94.1, + "Helpfulness": 92.4, + "Honesty Empathy Fairness": 86.9, + "Intuitiveness": 88.7, + "Conversation Flow": 90.1, + "Overall": 88.83, + "Conversation Building": 94.8, + "Personality Consistency": 87.8, + "Personality": 80.8, + "Distinct Personality": 82.9, + "Repeat Usage": 88.7, + "Context Memory": 96.8, + "Speed Perception": 82.0, + "Tone And Language Style": 85.6, + "Transparency": 72.0, + "Trustworthiness": 90.7, + "Understanding": 94.1, + "Usefulness": 95.5 + }, + "Suburban": { + "Accuracy": 91.4, + "Adaptiveness": 90.3, + "Ethical Alignment": 80.4, + "Background And Culture": 78.3, + "Bias And Stereotypes": 88.1, + "Clarity": 85.7, + "Communication": 89.9, + "Comprehensiveness": 90.8, + "Confidence": 88.6, + "Consistency": 92.3, + "Cultural Awareness": 77.3, + "Detail And Technical Language": 90.8, + "Effectiveness": 92.2, + "Flexibility": 92.3, + "Helpfulness": 90.9, + "Honesty Empathy Fairness": 88.0, + "Intuitiveness": 88.2, + "Conversation Flow": 88.2, + "Overall": 87.46, + "Conversation Building": 92.6, + "Personality Consistency": 88.9, + "Personality": 80.8, + "Distinct Personality": 81.7, + "Repeat Usage": 88.5, + "Context Memory": 94.5, + "Speed Perception": 81.5, + "Tone And Language Style": 84.5, + "Transparency": 73.5, + "Trustworthiness": 89.9, + "Understanding": 92.1, + "Usefulness": 92.0 + }, + "Urban": { + "Accuracy": 90.4, + "Adaptiveness": 89.9, + "Ethical Alignment": 80.2, + "Background And Culture": 79.2, + "Bias And Stereotypes": 86.5, + "Clarity": 85.0, + "Communication": 89.2, + "Comprehensiveness": 90.5, + "Confidence": 89.0, + "Consistency": 90.8, + "Cultural Awareness": 77.8, + "Detail And Technical Language": 90.2, + "Effectiveness": 90.9, + "Flexibility": 91.8, + "Helpfulness": 90.3, + "Honesty Empathy Fairness": 86.1, + "Intuitiveness": 86.3, + "Conversation Flow": 87.5, + "Overall": 87.31, + "Conversation Building": 92.1, + "Personality Consistency": 86.9, + "Personality": 79.8, + "Distinct Personality": 79.9, + "Repeat Usage": 87.8, + "Context Memory": 94.4, + "Speed Perception": 80.7, + "Tone And Language Style": 83.4, + "Transparency": 74.6, + "Trustworthiness": 90.1, + "Understanding": 92.7, + "Usefulness": 91.6 + } + } + }, + "llama-3.1-405b-instruct": { + "Age": { + "18-24": { + "Accuracy": 93.8, + "Adaptiveness": 85.5, + "Ethical Alignment": 73.7, + "Background And Culture": 74.7, + "Bias And Stereotypes": 81.4, + "Clarity": 84.3, + "Communication": 85.0, + "Comprehensiveness": 87.9, + "Confidence": 85.4, + "Consistency": 93.7, + "Cultural Awareness": 70.2, + "Detail And Technical Language": 84.7, + "Effectiveness": 85.9, + "Flexibility": 85.0, + "Helpfulness": 82.6, + "Honesty Empathy Fairness": 87.0, + "Intuitiveness": 86.2, + "Conversation Flow": 81.9, + "Overall": 81.37, + "Conversation Building": 85.6, + "Personality Consistency": 87.6, + "Personality": 74.8, + "Distinct Personality": 71.5, + "Repeat Usage": 81.8, + "Context Memory": 91.4, + "Speed Perception": 70.2, + "Tone And Language Style": 88.0, + "Transparency": 74.7, + "Trustworthiness": 83.6, + "Understanding": 83.4, + "Usefulness": 87.8 + }, + "25-34": { + "Accuracy": 93.3, + "Adaptiveness": 84.9, + "Ethical Alignment": 75.8, + "Background And Culture": 77.0, + "Bias And Stereotypes": 83.5, + "Clarity": 84.8, + "Communication": 87.5, + "Comprehensiveness": 88.4, + "Confidence": 84.3, + "Consistency": 89.6, + "Cultural Awareness": 74.5, + "Detail And Technical Language": 87.3, + "Effectiveness": 88.5, + "Flexibility": 89.2, + "Helpfulness": 83.4, + "Honesty Empathy Fairness": 88.2, + "Intuitiveness": 87.3, + "Conversation Flow": 82.4, + "Overall": 82.47, + "Conversation Building": 86.1, + "Personality Consistency": 85.4, + "Personality": 75.5, + "Distinct Personality": 75.8, + "Repeat Usage": 81.2, + "Context Memory": 91.2, + "Speed Perception": 70.6, + "Tone And Language Style": 89.4, + "Transparency": 81.8, + "Trustworthiness": 83.7, + "Understanding": 85.3, + "Usefulness": 88.0 + }, + "35-44": { + "Accuracy": 92.8, + "Adaptiveness": 85.8, + "Ethical Alignment": 76.1, + "Background And Culture": 75.1, + "Bias And Stereotypes": 86.8, + "Clarity": 84.4, + "Communication": 88.2, + "Comprehensiveness": 87.7, + "Confidence": 86.9, + "Consistency": 93.5, + "Cultural Awareness": 74.2, + "Detail And Technical Language": 88.4, + "Effectiveness": 87.3, + "Flexibility": 90.3, + "Helpfulness": 86.5, + "Honesty Empathy Fairness": 87.7, + "Intuitiveness": 88.5, + "Conversation Flow": 83.4, + "Overall": 83.49, + "Conversation Building": 87.6, + "Personality Consistency": 87.4, + "Personality": 76.8, + "Distinct Personality": 72.0, + "Repeat Usage": 81.4, + "Context Memory": 90.0, + "Speed Perception": 70.7, + "Tone And Language Style": 85.6, + "Transparency": 79.5, + "Trustworthiness": 84.3, + "Understanding": 87.7, + "Usefulness": 89.8 + }, + "45-54": { + "Accuracy": 91.7, + "Adaptiveness": 84.1, + "Ethical Alignment": 70.6, + "Background And Culture": 73.5, + "Bias And Stereotypes": 82.6, + "Clarity": 81.8, + "Communication": 87.2, + "Comprehensiveness": 87.5, + "Confidence": 87.7, + "Consistency": 91.1, + "Cultural Awareness": 69.2, + "Detail And Technical Language": 88.0, + "Effectiveness": 90.2, + "Flexibility": 85.6, + "Helpfulness": 84.3, + "Honesty Empathy Fairness": 84.3, + "Intuitiveness": 85.4, + "Conversation Flow": 82.4, + "Overall": 82.6, + "Conversation Building": 82.5, + "Personality Consistency": 86.1, + "Personality": 76.5, + "Distinct Personality": 75.3, + "Repeat Usage": 82.2, + "Context Memory": 89.6, + "Speed Perception": 70.0, + "Tone And Language Style": 84.8, + "Transparency": 77.0, + "Trustworthiness": 84.5, + "Understanding": 88.1, + "Usefulness": 86.4 + }, + "55-64": { + "Accuracy": 93.2, + "Adaptiveness": 89.0, + "Ethical Alignment": 79.4, + "Background And Culture": 78.5, + "Bias And Stereotypes": 88.5, + "Clarity": 85.5, + "Communication": 91.2, + "Comprehensiveness": 91.0, + "Confidence": 85.8, + "Consistency": 93.7, + "Cultural Awareness": 76.1, + "Detail And Technical Language": 91.8, + "Effectiveness": 91.0, + "Flexibility": 92.0, + "Helpfulness": 89.0, + "Honesty Empathy Fairness": 87.5, + "Intuitiveness": 90.2, + "Conversation Flow": 88.8, + "Overall": 86.36, + "Conversation Building": 90.3, + "Personality Consistency": 89.9, + "Personality": 77.9, + "Distinct Personality": 77.0, + "Repeat Usage": 83.6, + "Context Memory": 91.1, + "Speed Perception": 70.1, + "Tone And Language Style": 92.7, + "Transparency": 77.6, + "Trustworthiness": 87.9, + "Understanding": 91.0, + "Usefulness": 89.5 + }, + "65+": { + "Accuracy": 94.7, + "Adaptiveness": 89.4, + "Ethical Alignment": 79.2, + "Background And Culture": 80.2, + "Bias And Stereotypes": 89.2, + "Clarity": 85.3, + "Communication": 91.7, + "Comprehensiveness": 92.7, + "Confidence": 90.8, + "Consistency": 94.3, + "Cultural Awareness": 75.7, + "Detail And Technical Language": 91.9, + "Effectiveness": 93.6, + "Flexibility": 92.4, + "Helpfulness": 90.8, + "Honesty Empathy Fairness": 89.6, + "Intuitiveness": 88.8, + "Conversation Flow": 87.7, + "Overall": 87.1, + "Conversation Building": 88.6, + "Personality Consistency": 86.6, + "Personality": 78.0, + "Distinct Personality": 77.5, + "Repeat Usage": 81.9, + "Context Memory": 93.4, + "Speed Perception": 70.2, + "Tone And Language Style": 93.9, + "Transparency": 80.5, + "Trustworthiness": 89.6, + "Understanding": 90.0, + "Usefulness": 94.1 + } + }, + "Sex": { + "Female": { + "Accuracy": 93.7, + "Adaptiveness": 88.5, + "Ethical Alignment": 76.9, + "Background And Culture": 77.7, + "Bias And Stereotypes": 87.4, + "Clarity": 84.3, + "Communication": 90.2, + "Comprehensiveness": 91.0, + "Confidence": 88.2, + "Consistency": 93.8, + "Cultural Awareness": 74.6, + "Detail And Technical Language": 90.8, + "Effectiveness": 92.1, + "Flexibility": 91.1, + "Helpfulness": 88.8, + "Honesty Empathy Fairness": 88.1, + "Intuitiveness": 88.9, + "Conversation Flow": 85.4, + "Overall": 85.6, + "Conversation Building": 86.7, + "Personality Consistency": 87.5, + "Personality": 77.6, + "Distinct Personality": 76.7, + "Repeat Usage": 82.3, + "Context Memory": 91.6, + "Speed Perception": 70.2, + "Tone And Language Style": 90.4, + "Transparency": 80.2, + "Trustworthiness": 87.3, + "Understanding": 89.1, + "Usefulness": 91.3 + }, + "Male": { + "Accuracy": 93.3, + "Adaptiveness": 86.0, + "Ethical Alignment": 76.9, + "Background And Culture": 77.6, + "Bias And Stereotypes": 86.2, + "Clarity": 84.9, + "Communication": 89.2, + "Comprehensiveness": 89.3, + "Confidence": 87.6, + "Consistency": 92.0, + "Cultural Awareness": 74.0, + "Detail And Technical Language": 89.2, + "Effectiveness": 89.5, + "Flexibility": 89.8, + "Helpfulness": 86.6, + "Honesty Empathy Fairness": 87.8, + "Intuitiveness": 87.5, + "Conversation Flow": 85.8, + "Overall": 84.4, + "Conversation Building": 88.2, + "Personality Consistency": 86.6, + "Personality": 76.7, + "Distinct Personality": 75.0, + "Repeat Usage": 81.8, + "Context Memory": 91.7, + "Speed Perception": 70.4, + "Tone And Language Style": 90.2, + "Transparency": 78.6, + "Trustworthiness": 86.3, + "Understanding": 88.4, + "Usefulness": 89.8 + } + }, + "Ethnicity": { + "African American": { + "Accuracy": 95.0, + "Adaptiveness": 86.8, + "Ethical Alignment": 79.5, + "Background And Culture": 79.2, + "Bias And Stereotypes": 87.1, + "Clarity": 87.4, + "Communication": 91.2, + "Comprehensiveness": 91.1, + "Confidence": 89.7, + "Consistency": 94.4, + "Cultural Awareness": 76.8, + "Detail And Technical Language": 92.2, + "Effectiveness": 92.9, + "Flexibility": 92.4, + "Helpfulness": 87.8, + "Honesty Empathy Fairness": 89.2, + "Intuitiveness": 91.9, + "Conversation Flow": 87.7, + "Overall": 85.97, + "Conversation Building": 90.4, + "Personality Consistency": 87.0, + "Personality": 80.1, + "Distinct Personality": 81.1, + "Repeat Usage": 82.3, + "Context Memory": 90.8, + "Speed Perception": 70.1, + "Tone And Language Style": 92.3, + "Transparency": 86.9, + "Trustworthiness": 87.3, + "Understanding": 89.4, + "Usefulness": 92.4 + }, + "Asian": { + "Accuracy": 92.0, + "Adaptiveness": 85.9, + "Ethical Alignment": 72.8, + "Background And Culture": 76.1, + "Bias And Stereotypes": 86.0, + "Clarity": 84.1, + "Communication": 87.0, + "Comprehensiveness": 87.8, + "Confidence": 86.0, + "Consistency": 92.5, + "Cultural Awareness": 73.6, + "Detail And Technical Language": 84.3, + "Effectiveness": 88.7, + "Flexibility": 87.9, + "Helpfulness": 84.2, + "Honesty Empathy Fairness": 86.3, + "Intuitiveness": 86.6, + "Conversation Flow": 86.8, + "Overall": 83.26, + "Conversation Building": 82.3, + "Personality Consistency": 85.9, + "Personality": 77.4, + "Distinct Personality": 78.0, + "Repeat Usage": 82.1, + "Context Memory": 88.8, + "Speed Perception": 70.8, + "Tone And Language Style": 87.6, + "Transparency": 81.8, + "Trustworthiness": 85.3, + "Understanding": 86.9, + "Usefulness": 85.8 + }, + "Hispanic": { + "Accuracy": 96.5, + "Adaptiveness": 88.0, + "Ethical Alignment": 73.0, + "Background And Culture": 77.4, + "Bias And Stereotypes": 86.5, + "Clarity": 86.8, + "Communication": 90.0, + "Comprehensiveness": 90.8, + "Confidence": 89.0, + "Consistency": 90.6, + "Cultural Awareness": 71.4, + "Detail And Technical Language": 89.2, + "Effectiveness": 91.5, + "Flexibility": 92.2, + "Helpfulness": 89.4, + "Honesty Empathy Fairness": 84.9, + "Intuitiveness": 88.0, + "Conversation Flow": 85.7, + "Overall": 85.3, + "Conversation Building": 92.4, + "Personality Consistency": 86.2, + "Personality": 76.4, + "Distinct Personality": 75.7, + "Repeat Usage": 82.4, + "Context Memory": 92.1, + "Speed Perception": 71.3, + "Tone And Language Style": 87.6, + "Transparency": 77.1, + "Trustworthiness": 84.7, + "Understanding": 91.2, + "Usefulness": 93.0 + }, + "White": { + "Accuracy": 93.0, + "Adaptiveness": 87.3, + "Ethical Alignment": 77.4, + "Background And Culture": 77.5, + "Bias And Stereotypes": 86.9, + "Clarity": 83.9, + "Communication": 89.6, + "Comprehensiveness": 90.1, + "Confidence": 87.6, + "Consistency": 93.1, + "Cultural Awareness": 74.4, + "Detail And Technical Language": 90.1, + "Effectiveness": 90.6, + "Flexibility": 90.1, + "Helpfulness": 87.7, + "Honesty Empathy Fairness": 88.3, + "Intuitiveness": 87.9, + "Conversation Flow": 85.2, + "Overall": 84.96, + "Conversation Building": 86.6, + "Personality Consistency": 87.2, + "Personality": 76.9, + "Distinct Personality": 75.0, + "Repeat Usage": 82.0, + "Context Memory": 91.8, + "Speed Perception": 70.1, + "Tone And Language Style": 90.6, + "Transparency": 78.6, + "Trustworthiness": 87.2, + "Understanding": 88.5, + "Usefulness": 90.2 + } + }, + "Politics": { + "Democrat": { + "Accuracy": 93.3, + "Adaptiveness": 86.5, + "Ethical Alignment": 74.7, + "Background And Culture": 75.9, + "Bias And Stereotypes": 86.3, + "Clarity": 85.7, + "Communication": 89.1, + "Comprehensiveness": 89.5, + "Confidence": 87.9, + "Consistency": 93.0, + "Cultural Awareness": 72.4, + "Detail And Technical Language": 90.4, + "Effectiveness": 90.6, + "Flexibility": 90.9, + "Helpfulness": 87.3, + "Honesty Empathy Fairness": 87.4, + "Intuitiveness": 88.1, + "Conversation Flow": 84.4, + "Overall": 84.23, + "Conversation Building": 88.2, + "Personality Consistency": 86.4, + "Personality": 76.8, + "Distinct Personality": 74.2, + "Repeat Usage": 81.9, + "Context Memory": 91.6, + "Speed Perception": 70.5, + "Tone And Language Style": 89.8, + "Transparency": 81.0, + "Trustworthiness": 85.8, + "Understanding": 88.2, + "Usefulness": 90.6 + }, + "Independent": { + "Accuracy": 95.4, + "Adaptiveness": 89.5, + "Ethical Alignment": 77.2, + "Background And Culture": 77.8, + "Bias And Stereotypes": 88.3, + "Clarity": 85.3, + "Communication": 91.4, + "Comprehensiveness": 94.0, + "Confidence": 87.7, + "Consistency": 94.3, + "Cultural Awareness": 73.9, + "Detail And Technical Language": 91.1, + "Effectiveness": 92.1, + "Flexibility": 92.6, + "Helpfulness": 90.4, + "Honesty Empathy Fairness": 89.1, + "Intuitiveness": 89.5, + "Conversation Flow": 86.3, + "Overall": 86.33, + "Conversation Building": 88.1, + "Personality Consistency": 88.9, + "Personality": 75.5, + "Distinct Personality": 73.3, + "Repeat Usage": 81.6, + "Context Memory": 93.2, + "Speed Perception": 70.0, + "Tone And Language Style": 90.9, + "Transparency": 77.3, + "Trustworthiness": 88.8, + "Understanding": 90.9, + "Usefulness": 91.5 + }, + "Republican": { + "Accuracy": 92.8, + "Adaptiveness": 87.0, + "Ethical Alignment": 79.3, + "Background And Culture": 79.5, + "Bias And Stereotypes": 86.7, + "Clarity": 82.9, + "Communication": 89.7, + "Comprehensiveness": 89.2, + "Confidence": 88.1, + "Consistency": 92.1, + "Cultural Awareness": 76.7, + "Detail And Technical Language": 89.1, + "Effectiveness": 90.5, + "Flexibility": 88.9, + "Helpfulness": 87.1, + "Honesty Empathy Fairness": 88.0, + "Intuitiveness": 87.8, + "Conversation Flow": 86.6, + "Overall": 85.31, + "Conversation Building": 86.3, + "Personality Consistency": 86.8, + "Personality": 78.4, + "Distinct Personality": 79.0, + "Repeat Usage": 82.4, + "Context Memory": 90.9, + "Speed Perception": 70.1, + "Tone And Language Style": 90.6, + "Transparency": 78.7, + "Trustworthiness": 87.1, + "Understanding": 88.4, + "Usefulness": 90.1 + } + }, + "Education": { + "College": { + "Accuracy": 89.5, + "Adaptiveness": 85.7, + "Ethical Alignment": 76.3, + "Background And Culture": 75.0, + "Bias And Stereotypes": 86.0, + "Clarity": 82.1, + "Communication": 86.9, + "Comprehensiveness": 85.7, + "Confidence": 84.5, + "Consistency": 90.1, + "Cultural Awareness": 72.9, + "Detail And Technical Language": 86.8, + "Effectiveness": 87.2, + "Flexibility": 89.1, + "Helpfulness": 85.0, + "Honesty Empathy Fairness": 83.1, + "Intuitiveness": 84.0, + "Conversation Flow": 84.4, + "Overall": 83.21, + "Conversation Building": 83.7, + "Personality Consistency": 83.0, + "Personality": 77.9, + "Distinct Personality": 75.6, + "Repeat Usage": 82.1, + "Context Memory": 87.1, + "Speed Perception": 69.9, + "Tone And Language Style": 87.7, + "Transparency": 74.6, + "Trustworthiness": 84.6, + "Understanding": 87.4, + "Usefulness": 87.2 + }, + "No College": { + "Accuracy": 95.6, + "Adaptiveness": 88.1, + "Ethical Alignment": 77.3, + "Background And Culture": 79.0, + "Bias And Stereotypes": 87.2, + "Clarity": 85.8, + "Communication": 91.2, + "Comprehensiveness": 92.5, + "Confidence": 89.7, + "Consistency": 94.4, + "Cultural Awareness": 75.0, + "Detail And Technical Language": 91.7, + "Effectiveness": 92.7, + "Flexibility": 91.2, + "Helpfulness": 89.2, + "Honesty Empathy Fairness": 90.4, + "Intuitiveness": 90.4, + "Conversation Flow": 86.2, + "Overall": 85.97, + "Conversation Building": 89.3, + "Personality Consistency": 89.1, + "Personality": 76.8, + "Distinct Personality": 76.0, + "Repeat Usage": 82.0, + "Context Memory": 93.9, + "Speed Perception": 70.5, + "Tone And Language Style": 91.7, + "Transparency": 82.0, + "Trustworthiness": 88.0, + "Understanding": 89.5, + "Usefulness": 92.3 + } + }, + "Urbanicity": { + "Rural": { + "Accuracy": 95.6, + "Adaptiveness": 89.4, + "Ethical Alignment": 78.5, + "Background And Culture": 78.9, + "Bias And Stereotypes": 88.6, + "Clarity": 85.6, + "Communication": 91.5, + "Comprehensiveness": 91.3, + "Confidence": 90.4, + "Consistency": 95.3, + "Cultural Awareness": 76.4, + "Detail And Technical Language": 93.7, + "Effectiveness": 92.2, + "Flexibility": 90.6, + "Helpfulness": 88.8, + "Honesty Empathy Fairness": 89.5, + "Intuitiveness": 91.2, + "Conversation Flow": 86.6, + "Overall": 86.43, + "Conversation Building": 90.1, + "Personality Consistency": 89.5, + "Personality": 76.5, + "Distinct Personality": 78.0, + "Repeat Usage": 81.9, + "Context Memory": 93.3, + "Speed Perception": 70.3, + "Tone And Language Style": 90.9, + "Transparency": 81.6, + "Trustworthiness": 88.6, + "Understanding": 91.3, + "Usefulness": 92.0 + }, + "Suburban": { + "Accuracy": 92.7, + "Adaptiveness": 86.3, + "Ethical Alignment": 76.4, + "Background And Culture": 77.3, + "Bias And Stereotypes": 86.6, + "Clarity": 83.4, + "Communication": 89.2, + "Comprehensiveness": 90.4, + "Confidence": 87.4, + "Consistency": 92.5, + "Cultural Awareness": 73.4, + "Detail And Technical Language": 88.9, + "Effectiveness": 91.5, + "Flexibility": 90.3, + "Helpfulness": 87.4, + "Honesty Empathy Fairness": 87.7, + "Intuitiveness": 87.7, + "Conversation Flow": 85.7, + "Overall": 84.63, + "Conversation Building": 86.6, + "Personality Consistency": 86.7, + "Personality": 77.6, + "Distinct Personality": 75.9, + "Repeat Usage": 82.3, + "Context Memory": 91.8, + "Speed Perception": 70.3, + "Tone And Language Style": 90.8, + "Transparency": 77.4, + "Trustworthiness": 86.7, + "Understanding": 87.9, + "Usefulness": 89.8 + }, + "Urban": { + "Accuracy": 92.4, + "Adaptiveness": 86.5, + "Ethical Alignment": 76.1, + "Background And Culture": 76.8, + "Bias And Stereotypes": 85.4, + "Clarity": 85.1, + "Communication": 88.7, + "Comprehensiveness": 88.8, + "Confidence": 86.2, + "Consistency": 91.2, + "Cultural Awareness": 73.5, + "Detail And Technical Language": 87.9, + "Effectiveness": 88.7, + "Flexibility": 90.6, + "Helpfulness": 87.2, + "Honesty Empathy Fairness": 86.8, + "Intuitiveness": 86.2, + "Conversation Flow": 84.5, + "Overall": 84.19, + "Conversation Building": 85.9, + "Personality Consistency": 85.1, + "Personality": 77.3, + "Distinct Personality": 73.8, + "Repeat Usage": 81.9, + "Context Memory": 89.6, + "Speed Perception": 70.3, + "Tone And Language Style": 89.1, + "Transparency": 80.1, + "Trustworthiness": 85.3, + "Understanding": 87.5, + "Usefulness": 90.2 + } + } + }, + "o1": { + "Age": { + "18-24": { + "Accuracy": 89.2, + "Adaptiveness": 81.4, + "Ethical Alignment": 68.5, + "Background And Culture": 69.2, + "Bias And Stereotypes": 81.0, + "Clarity": 79.2, + "Communication": 82.7, + "Comprehensiveness": 88.3, + "Confidence": 88.1, + "Consistency": 87.4, + "Cultural Awareness": 67.8, + "Detail And Technical Language": 84.4, + "Effectiveness": 88.9, + "Flexibility": 88.2, + "Helpfulness": 86.2, + "Honesty Empathy Fairness": 80.1, + "Intuitiveness": 78.2, + "Conversation Flow": 80.6, + "Overall": 79.91, + "Conversation Building": 85.3, + "Personality Consistency": 80.2, + "Personality": 74.1, + "Distinct Personality": 67.0, + "Repeat Usage": 79.9, + "Context Memory": 90.0, + "Speed Perception": 75.1, + "Tone And Language Style": 84.8, + "Transparency": 68.1, + "Trustworthiness": 80.7, + "Understanding": 85.1, + "Usefulness": 87.4 + }, + "25-34": { + "Accuracy": 86.4, + "Adaptiveness": 81.5, + "Ethical Alignment": 76.5, + "Background And Culture": 73.4, + "Bias And Stereotypes": 86.4, + "Clarity": 78.3, + "Communication": 81.6, + "Comprehensiveness": 84.6, + "Confidence": 85.3, + "Consistency": 85.4, + "Cultural Awareness": 73.5, + "Detail And Technical Language": 87.8, + "Effectiveness": 85.8, + "Flexibility": 84.2, + "Helpfulness": 85.8, + "Honesty Empathy Fairness": 86.1, + "Intuitiveness": 83.4, + "Conversation Flow": 81.8, + "Overall": 80.41, + "Conversation Building": 83.8, + "Personality Consistency": 82.1, + "Personality": 74.3, + "Distinct Personality": 71.9, + "Repeat Usage": 80.3, + "Context Memory": 90.8, + "Speed Perception": 74.4, + "Tone And Language Style": 83.9, + "Transparency": 71.9, + "Trustworthiness": 82.7, + "Understanding": 83.6, + "Usefulness": 86.4 + }, + "35-44": { + "Accuracy": 91.4, + "Adaptiveness": 85.4, + "Ethical Alignment": 73.5, + "Background And Culture": 70.4, + "Bias And Stereotypes": 89.5, + "Clarity": 84.0, + "Communication": 84.8, + "Comprehensiveness": 89.0, + "Confidence": 89.3, + "Consistency": 90.7, + "Cultural Awareness": 70.1, + "Detail And Technical Language": 89.3, + "Effectiveness": 88.2, + "Flexibility": 89.0, + "Helpfulness": 88.8, + "Honesty Empathy Fairness": 86.7, + "Intuitiveness": 86.5, + "Conversation Flow": 79.0, + "Overall": 82.86, + "Conversation Building": 85.6, + "Personality Consistency": 89.1, + "Personality": 74.4, + "Distinct Personality": 72.2, + "Repeat Usage": 79.9, + "Context Memory": 92.2, + "Speed Perception": 74.4, + "Tone And Language Style": 84.7, + "Transparency": 68.6, + "Trustworthiness": 87.4, + "Understanding": 88.8, + "Usefulness": 88.1 + }, + "45-54": { + "Accuracy": 84.1, + "Adaptiveness": 82.5, + "Ethical Alignment": 69.7, + "Background And Culture": 69.5, + "Bias And Stereotypes": 83.6, + "Clarity": 77.5, + "Communication": 82.3, + "Comprehensiveness": 85.4, + "Confidence": 86.0, + "Consistency": 87.0, + "Cultural Awareness": 68.3, + "Detail And Technical Language": 80.2, + "Effectiveness": 85.2, + "Flexibility": 84.4, + "Helpfulness": 87.5, + "Honesty Empathy Fairness": 79.3, + "Intuitiveness": 79.7, + "Conversation Flow": 78.8, + "Overall": 80.97, + "Conversation Building": 80.5, + "Personality Consistency": 80.7, + "Personality": 74.2, + "Distinct Personality": 66.9, + "Repeat Usage": 80.8, + "Context Memory": 87.5, + "Speed Perception": 74.1, + "Tone And Language Style": 83.1, + "Transparency": 65.2, + "Trustworthiness": 85.4, + "Understanding": 85.4, + "Usefulness": 82.5 + }, + "55-64": { + "Accuracy": 87.7, + "Adaptiveness": 85.8, + "Ethical Alignment": 75.9, + "Background And Culture": 73.2, + "Bias And Stereotypes": 87.8, + "Clarity": 81.4, + "Communication": 87.2, + "Comprehensiveness": 89.6, + "Confidence": 88.5, + "Consistency": 86.8, + "Cultural Awareness": 73.6, + "Detail And Technical Language": 87.0, + "Effectiveness": 88.9, + "Flexibility": 88.7, + "Helpfulness": 89.3, + "Honesty Empathy Fairness": 84.4, + "Intuitiveness": 84.3, + "Conversation Flow": 82.6, + "Overall": 83.57, + "Conversation Building": 87.6, + "Personality Consistency": 84.7, + "Personality": 73.6, + "Distinct Personality": 71.0, + "Repeat Usage": 81.0, + "Context Memory": 91.4, + "Speed Perception": 74.7, + "Tone And Language Style": 85.7, + "Transparency": 77.1, + "Trustworthiness": 86.4, + "Understanding": 89.5, + "Usefulness": 89.8 + }, + "65+": { + "Accuracy": 88.3, + "Adaptiveness": 85.3, + "Ethical Alignment": 76.7, + "Background And Culture": 74.8, + "Bias And Stereotypes": 89.4, + "Clarity": 85.2, + "Communication": 87.4, + "Comprehensiveness": 89.3, + "Confidence": 88.8, + "Consistency": 90.1, + "Cultural Awareness": 73.4, + "Detail And Technical Language": 88.1, + "Effectiveness": 89.2, + "Flexibility": 91.3, + "Helpfulness": 88.1, + "Honesty Empathy Fairness": 85.7, + "Intuitiveness": 84.5, + "Conversation Flow": 86.9, + "Overall": 83.99, + "Conversation Building": 85.9, + "Personality Consistency": 86.9, + "Personality": 76.5, + "Distinct Personality": 76.8, + "Repeat Usage": 80.3, + "Context Memory": 94.0, + "Speed Perception": 75.6, + "Tone And Language Style": 91.0, + "Transparency": 73.8, + "Trustworthiness": 86.9, + "Understanding": 88.9, + "Usefulness": 88.4 + } + }, + "Sex": { + "Female": { + "Accuracy": 88.6, + "Adaptiveness": 85.1, + "Ethical Alignment": 73.4, + "Background And Culture": 72.8, + "Bias And Stereotypes": 87.4, + "Clarity": 83.0, + "Communication": 85.4, + "Comprehensiveness": 89.1, + "Confidence": 88.5, + "Consistency": 88.7, + "Cultural Awareness": 71.9, + "Detail And Technical Language": 87.2, + "Effectiveness": 88.9, + "Flexibility": 90.5, + "Helpfulness": 89.1, + "Honesty Empathy Fairness": 84.5, + "Intuitiveness": 85.1, + "Conversation Flow": 84.1, + "Overall": 83.01, + "Conversation Building": 85.5, + "Personality Consistency": 85.5, + "Personality": 73.9, + "Distinct Personality": 72.7, + "Repeat Usage": 80.4, + "Context Memory": 91.8, + "Speed Perception": 74.8, + "Tone And Language Style": 87.9, + "Transparency": 72.2, + "Trustworthiness": 86.7, + "Understanding": 88.1, + "Usefulness": 88.8 + }, + "Male": { + "Accuracy": 87.1, + "Adaptiveness": 83.7, + "Ethical Alignment": 76.6, + "Background And Culture": 72.8, + "Bias And Stereotypes": 88.2, + "Clarity": 81.4, + "Communication": 85.3, + "Comprehensiveness": 86.9, + "Confidence": 87.3, + "Consistency": 88.3, + "Cultural Awareness": 72.3, + "Detail And Technical Language": 86.5, + "Effectiveness": 86.9, + "Flexibility": 86.2, + "Helpfulness": 86.7, + "Honesty Empathy Fairness": 84.8, + "Intuitiveness": 82.5, + "Conversation Flow": 81.7, + "Overall": 82.49, + "Conversation Building": 84.5, + "Personality Consistency": 85.0, + "Personality": 76.3, + "Distinct Personality": 73.0, + "Repeat Usage": 80.5, + "Context Memory": 92.0, + "Speed Perception": 74.9, + "Tone And Language Style": 85.9, + "Transparency": 71.6, + "Trustworthiness": 85.3, + "Understanding": 87.3, + "Usefulness": 85.9 + } + }, + "Ethnicity": { + "African American": { + "Accuracy": 87.9, + "Adaptiveness": 86.2, + "Ethical Alignment": 80.4, + "Background And Culture": 76.8, + "Bias And Stereotypes": 88.7, + "Clarity": 85.6, + "Communication": 88.8, + "Comprehensiveness": 88.7, + "Confidence": 91.8, + "Consistency": 89.4, + "Cultural Awareness": 75.3, + "Detail And Technical Language": 90.0, + "Effectiveness": 90.8, + "Flexibility": 91.7, + "Helpfulness": 90.6, + "Honesty Empathy Fairness": 89.2, + "Intuitiveness": 87.8, + "Conversation Flow": 87.2, + "Overall": 85.27, + "Conversation Building": 86.2, + "Personality Consistency": 88.6, + "Personality": 78.2, + "Distinct Personality": 80.0, + "Repeat Usage": 81.7, + "Context Memory": 94.2, + "Speed Perception": 75.6, + "Tone And Language Style": 89.8, + "Transparency": 74.4, + "Trustworthiness": 86.9, + "Understanding": 89.4, + "Usefulness": 91.9 + }, + "Asian": { + "Accuracy": 87.2, + "Adaptiveness": 80.3, + "Ethical Alignment": 66.4, + "Background And Culture": 67.6, + "Bias And Stereotypes": 87.9, + "Clarity": 83.2, + "Communication": 81.2, + "Comprehensiveness": 86.9, + "Confidence": 88.9, + "Consistency": 89.8, + "Cultural Awareness": 63.4, + "Detail And Technical Language": 87.3, + "Effectiveness": 86.5, + "Flexibility": 88.1, + "Helpfulness": 84.5, + "Honesty Empathy Fairness": 84.9, + "Intuitiveness": 84.9, + "Conversation Flow": 83.4, + "Overall": 80.06, + "Conversation Building": 83.8, + "Personality Consistency": 86.3, + "Personality": 75.8, + "Distinct Personality": 74.8, + "Repeat Usage": 80.6, + "Context Memory": 93.0, + "Speed Perception": 74.3, + "Tone And Language Style": 91.5, + "Transparency": 69.1, + "Trustworthiness": 85.3, + "Understanding": 85.7, + "Usefulness": 87.2 + }, + "Hispanic": { + "Accuracy": 82.7, + "Adaptiveness": 83.4, + "Ethical Alignment": 64.7, + "Background And Culture": 67.8, + "Bias And Stereotypes": 85.3, + "Clarity": 75.9, + "Communication": 80.5, + "Comprehensiveness": 83.4, + "Confidence": 81.6, + "Consistency": 87.0, + "Cultural Awareness": 60.5, + "Detail And Technical Language": 76.4, + "Effectiveness": 87.3, + "Flexibility": 81.3, + "Helpfulness": 88.3, + "Honesty Empathy Fairness": 75.8, + "Intuitiveness": 74.0, + "Conversation Flow": 70.4, + "Overall": 80.51, + "Conversation Building": 77.7, + "Personality Consistency": 79.3, + "Personality": 72.8, + "Distinct Personality": 66.6, + "Repeat Usage": 80.7, + "Context Memory": 86.9, + "Speed Perception": 74.3, + "Tone And Language Style": 81.6, + "Transparency": 64.0, + "Trustworthiness": 83.5, + "Understanding": 87.3, + "Usefulness": 84.3 + }, + "White": { + "Accuracy": 88.6, + "Adaptiveness": 84.5, + "Ethical Alignment": 76.0, + "Background And Culture": 73.2, + "Bias And Stereotypes": 88.0, + "Clarity": 82.6, + "Communication": 85.8, + "Comprehensiveness": 88.7, + "Confidence": 88.2, + "Consistency": 88.5, + "Cultural Awareness": 73.7, + "Detail And Technical Language": 87.9, + "Effectiveness": 87.7, + "Flexibility": 89.1, + "Helpfulness": 87.7, + "Honesty Empathy Fairness": 85.3, + "Intuitiveness": 84.6, + "Conversation Flow": 84.1, + "Overall": 82.86, + "Conversation Building": 86.0, + "Personality Consistency": 85.6, + "Personality": 74.9, + "Distinct Personality": 72.6, + "Repeat Usage": 80.2, + "Context Memory": 92.2, + "Speed Perception": 74.9, + "Tone And Language Style": 87.0, + "Transparency": 72.8, + "Trustworthiness": 86.3, + "Understanding": 87.6, + "Usefulness": 87.2 + } + }, + "Politics": { + "Democrat": { + "Accuracy": 87.0, + "Adaptiveness": 82.3, + "Ethical Alignment": 73.0, + "Background And Culture": 71.7, + "Bias And Stereotypes": 87.8, + "Clarity": 81.5, + "Communication": 83.0, + "Comprehensiveness": 86.6, + "Confidence": 87.6, + "Consistency": 88.1, + "Cultural Awareness": 70.1, + "Detail And Technical Language": 85.0, + "Effectiveness": 86.5, + "Flexibility": 87.7, + "Helpfulness": 87.3, + "Honesty Empathy Fairness": 82.8, + "Intuitiveness": 82.6, + "Conversation Flow": 80.1, + "Overall": 81.41, + "Conversation Building": 83.8, + "Personality Consistency": 83.9, + "Personality": 74.5, + "Distinct Personality": 69.9, + "Repeat Usage": 80.9, + "Context Memory": 91.2, + "Speed Perception": 74.8, + "Tone And Language Style": 85.4, + "Transparency": 68.7, + "Trustworthiness": 85.2, + "Understanding": 85.9, + "Usefulness": 86.5 + }, + "Independent": { + "Accuracy": 89.4, + "Adaptiveness": 87.5, + "Ethical Alignment": 75.9, + "Background And Culture": 75.6, + "Bias And Stereotypes": 88.3, + "Clarity": 82.5, + "Communication": 88.3, + "Comprehensiveness": 89.2, + "Confidence": 89.2, + "Consistency": 90.4, + "Cultural Awareness": 73.0, + "Detail And Technical Language": 90.5, + "Effectiveness": 89.4, + "Flexibility": 90.1, + "Helpfulness": 88.9, + "Honesty Empathy Fairness": 84.3, + "Intuitiveness": 85.8, + "Conversation Flow": 85.7, + "Overall": 84.64, + "Conversation Building": 87.7, + "Personality Consistency": 86.0, + "Personality": 73.8, + "Distinct Personality": 69.8, + "Repeat Usage": 80.0, + "Context Memory": 94.0, + "Speed Perception": 75.5, + "Tone And Language Style": 88.7, + "Transparency": 72.5, + "Trustworthiness": 88.4, + "Understanding": 90.0, + "Usefulness": 90.2 + }, + "Republican": { + "Accuracy": 88.1, + "Adaptiveness": 85.3, + "Ethical Alignment": 76.5, + "Background And Culture": 72.6, + "Bias And Stereotypes": 87.6, + "Clarity": 82.9, + "Communication": 86.6, + "Comprehensiveness": 89.1, + "Confidence": 87.7, + "Consistency": 88.0, + "Cultural Awareness": 73.9, + "Detail And Technical Language": 87.2, + "Effectiveness": 88.8, + "Flexibility": 88.6, + "Helpfulness": 88.2, + "Honesty Empathy Fairness": 87.0, + "Intuitiveness": 84.3, + "Conversation Flow": 84.9, + "Overall": 83.33, + "Conversation Building": 85.2, + "Personality Consistency": 86.4, + "Personality": 76.2, + "Distinct Personality": 77.7, + "Repeat Usage": 80.1, + "Context Memory": 91.6, + "Speed Perception": 74.6, + "Tone And Language Style": 87.8, + "Transparency": 75.2, + "Trustworthiness": 85.8, + "Understanding": 88.6, + "Usefulness": 87.1 + } + }, + "Education": { + "College": { + "Accuracy": 86.8, + "Adaptiveness": 82.0, + "Ethical Alignment": 73.6, + "Background And Culture": 72.1, + "Bias And Stereotypes": 85.0, + "Clarity": 80.6, + "Communication": 82.5, + "Comprehensiveness": 87.4, + "Confidence": 84.8, + "Consistency": 88.2, + "Cultural Awareness": 69.4, + "Detail And Technical Language": 80.4, + "Effectiveness": 86.8, + "Flexibility": 86.6, + "Helpfulness": 87.7, + "Honesty Empathy Fairness": 80.5, + "Intuitiveness": 80.9, + "Conversation Flow": 79.4, + "Overall": 81.41, + "Conversation Building": 82.4, + "Personality Consistency": 83.1, + "Personality": 73.8, + "Distinct Personality": 72.1, + "Repeat Usage": 80.1, + "Context Memory": 88.6, + "Speed Perception": 73.8, + "Tone And Language Style": 83.9, + "Transparency": 71.2, + "Trustworthiness": 84.7, + "Understanding": 87.1, + "Usefulness": 84.7 + }, + "No College": { + "Accuracy": 88.4, + "Adaptiveness": 85.6, + "Ethical Alignment": 75.6, + "Background And Culture": 73.1, + "Bias And Stereotypes": 89.2, + "Clarity": 83.1, + "Communication": 86.8, + "Comprehensiveness": 88.4, + "Confidence": 89.5, + "Consistency": 88.7, + "Cultural Awareness": 73.4, + "Detail And Technical Language": 90.2, + "Effectiveness": 88.5, + "Flexibility": 89.4, + "Helpfulness": 88.1, + "Honesty Empathy Fairness": 86.8, + "Intuitiveness": 85.4, + "Conversation Flow": 84.8, + "Overall": 83.43, + "Conversation Building": 86.4, + "Personality Consistency": 86.3, + "Personality": 75.7, + "Distinct Personality": 73.3, + "Repeat Usage": 80.6, + "Context Memory": 93.5, + "Speed Perception": 75.4, + "Tone And Language Style": 88.5, + "Transparency": 72.2, + "Trustworthiness": 86.7, + "Understanding": 88.0, + "Usefulness": 88.8 + } + }, + "Urbanicity": { + "Rural": { + "Accuracy": 90.0, + "Adaptiveness": 86.8, + "Ethical Alignment": 75.2, + "Background And Culture": 72.2, + "Bias And Stereotypes": 90.9, + "Clarity": 85.7, + "Communication": 90.1, + "Comprehensiveness": 91.1, + "Confidence": 87.5, + "Consistency": 89.0, + "Cultural Awareness": 74.9, + "Detail And Technical Language": 90.5, + "Effectiveness": 90.4, + "Flexibility": 89.3, + "Helpfulness": 88.8, + "Honesty Empathy Fairness": 88.3, + "Intuitiveness": 86.6, + "Conversation Flow": 86.5, + "Overall": 84.51, + "Conversation Building": 87.7, + "Personality Consistency": 87.8, + "Personality": 75.8, + "Distinct Personality": 75.3, + "Repeat Usage": 80.3, + "Context Memory": 93.7, + "Speed Perception": 75.3, + "Tone And Language Style": 88.8, + "Transparency": 70.5, + "Trustworthiness": 87.6, + "Understanding": 90.3, + "Usefulness": 89.7 + }, + "Suburban": { + "Accuracy": 87.2, + "Adaptiveness": 83.5, + "Ethical Alignment": 74.2, + "Background And Culture": 72.1, + "Bias And Stereotypes": 87.0, + "Clarity": 80.4, + "Communication": 83.8, + "Comprehensiveness": 86.7, + "Confidence": 87.4, + "Consistency": 88.1, + "Cultural Awareness": 70.0, + "Detail And Technical Language": 86.6, + "Effectiveness": 87.0, + "Flexibility": 88.6, + "Helpfulness": 87.1, + "Honesty Empathy Fairness": 83.1, + "Intuitiveness": 83.6, + "Conversation Flow": 82.5, + "Overall": 81.64, + "Conversation Building": 84.8, + "Personality Consistency": 83.4, + "Personality": 73.6, + "Distinct Personality": 70.7, + "Repeat Usage": 80.2, + "Context Memory": 91.5, + "Speed Perception": 74.8, + "Tone And Language Style": 86.0, + "Transparency": 70.8, + "Trustworthiness": 85.2, + "Understanding": 86.2, + "Usefulness": 87.3 + }, + "Urban": { + "Accuracy": 86.6, + "Adaptiveness": 83.3, + "Ethical Alignment": 75.5, + "Background And Culture": 74.3, + "Bias And Stereotypes": 85.8, + "Clarity": 81.3, + "Communication": 82.7, + "Comprehensiveness": 86.8, + "Confidence": 89.0, + "Consistency": 88.6, + "Cultural Awareness": 72.1, + "Detail And Technical Language": 83.6, + "Effectiveness": 86.8, + "Flexibility": 87.4, + "Helpfulness": 88.3, + "Honesty Empathy Fairness": 83.2, + "Intuitiveness": 81.5, + "Conversation Flow": 80.1, + "Overall": 82.5, + "Conversation Building": 82.7, + "Personality Consistency": 85.2, + "Personality": 76.3, + "Distinct Personality": 73.4, + "Repeat Usage": 80.9, + "Context Memory": 90.5, + "Speed Perception": 74.5, + "Tone And Language Style": 86.2, + "Transparency": 74.8, + "Trustworthiness": 85.5, + "Understanding": 87.1, + "Usefulness": 85.3 + } + } + } + }, + "equity_analysis": { + "assessment_method": { + "type": "effect_size_and_statistical_significance", + "effect_size_normalization": "Pooled standard deviation of MRP scores across all demographic levels for a given category (fallback to global).", + "effect_size_thresholds": { + "Negligible": "< 0.2", + "Small": "0.2 <= ES < 0.5", + "Medium": "0.5 <= ES < 0.8", + "Large": ">= 0.8" + }, + "statistical_significance": { + "method": "Two-sample Z-test using MRP Standard Errors (SE)", + "se_difference_formula": "sqrt(se_min^2 + se_max^2)", + "alpha_level": 0.05, + "interpretation": "A p-value less than 0.05 indicates the observed gap is unlikely due to chance alone.", + "confidence_interval": "95% CI calculated for the score gap (gap +/- 1.96 * SE_difference)." + }, + "raw_n_confidence_heuristic": { + "description": "Secondary heuristic based on minimum raw participant count in the min/max scoring groups.", + "thresholds": { + "High": 50, + "Medium": 20, + "Low": 0 + } + }, + "equity_concern_flag_condition": "(Effect Size >= 0.8) AND (P-value < 0.05)", + "description": "Equity gaps assessed primarily by practical significance (Effect Size) and statistical significance (p-value derived from SE). 'is_equity_concern' flags gaps that meet both criteria. The Raw N heuristic provides supplementary context on data density." + }, + "demographic_variation_stats": { + "global": { + "std": 6.386975519955983 + }, + "by_category": { + "accuracy": { + "std": 2.9380065757055225 + }, + "adaptiveness": { + "std": 3.918757509296367 + }, + "ethical_alignment": { + "std": 3.759631981841958 + }, + "background_and_culture": { + "std": 3.4590505280431443 + }, + "bias_and_stereotypes": { + "std": 2.7841396037794754 + }, + "clarity": { + "std": 4.515050064321916 + }, + "communication": { + "std": 4.042235011585657 + }, + "comprehensiveness": { + "std": 3.5821874136715217 + }, + "confidence": { + "std": 3.2881905696328215 + }, + "consistency": { + "std": 2.977051113658167 + }, + "cultural_awareness": { + "std": 3.7138433783466 + }, + "detail_and_technical_language": { + "std": 3.129178872590486 + }, + "effectiveness": { + "std": 3.9601220519911586 + }, + "flexibility": { + "std": 3.4156111174364616 + }, + "helpfulness": { + "std": 3.6021382538708866 + }, + "honesty_empathy_fairness": { + "std": 3.8017421993081015 + }, + "intuitiveness": { + "std": 3.0981848404437646 + }, + "conversation_flow": { + "std": 4.969994341043056 + }, + "conversation_building": { + "std": 4.325862068870075 + }, + "personality_consistency": { + "std": 3.1921500939370353 + }, + "personality": { + "std": 3.3653095449502612 + }, + "distinct_personality": { + "std": 4.323347147240846 + }, + "context_memory": { + "std": 3.38883368579811 + }, + "tone_and_language_style": { + "std": 3.3396756329519985 + }, + "transparency": { + "std": 4.513771442916543 + }, + "trustworthiness": { + "std": 3.1642692679353313 + }, + "understanding": { + "std": 2.9673178124360047 + }, + "usefulness": { + "std": 3.361162889867467 + } + } + }, + "model_max_effect_gaps": { + "claude-3.7-sonnet": { + "category": "trustworthiness", + "gap_info": { + "model": "claude-3.7-sonnet", + "category": "trustworthiness", + "demographic_factor": "Age", + "score_range": 6.5, + "min_level": "18-24", + "max_level": "65+", + "min_score": 86.6, + "max_score": 93.1, + "se_min": 0.0, + "se_max": 0.0, + "effect_size": 2.054186748854409, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1642692679353313, + "level_score_std_dev": 2.11002896241313, + "se_difference": 0.0, + "z_score": 9999.9, + "p_value": 0.0, + "is_statistically_significant": true, + "gap_confidence_interval_95_lower": 6.5, + "gap_confidence_interval_95_upper": 6.5, + "raw_n_min_group": 60, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": true, + "prioritized_significant_flag": true + } + }, + "deepseek-r1": { + "category": "distinct_personality", + "gap_info": { + "model": "deepseek-r1", + "category": "distinct_personality", + "demographic_factor": "Ethnicity", + "score_range": 17.700000000000003, + "min_level": "Hispanic", + "max_level": "Asian", + "min_score": 60.8, + "max_score": 78.5, + "se_min": 7.7, + "se_max": 5.7, + "effect_size": 4.094050141519659, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.323347147240846, + "level_score_std_dev": 7.117364329581564, + "se_difference": 9.58018788959799, + "z_score": 1.847562929242585, + "p_value": 0.9400724891990658, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -1.0768232287388457, + "gap_confidence_interval_95_upper": 36.47682322873885, + "raw_n_min_group": 34, + "raw_n_max_group": 40, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false, + "prioritized_significant_flag": false + } + }, + "gemini-2.0-flash-001": { + "category": "confidence", + "gap_info": { + "model": "gemini-2.0-flash-001", + "category": "confidence", + "demographic_factor": "Education", + "score_range": 6.099999999999994, + "min_level": "College", + "max_level": "No College", + "min_score": 87.5, + "max_score": 93.6, + "se_min": 0.0, + "se_max": 0.0, + "effect_size": 1.855123622193575, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.2881905696328215, + "level_score_std_dev": 3.049999999999997, + "se_difference": 0.0, + "z_score": 9999.9, + "p_value": 0.0, + "is_statistically_significant": true, + "gap_confidence_interval_95_lower": 6.099999999999994, + "gap_confidence_interval_95_upper": 6.099999999999994, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": true, + "prioritized_significant_flag": true + } + }, + "gpt-4o": { + "category": "understanding", + "gap_info": { + "model": "gpt-4o", + "category": "understanding", + "demographic_factor": "Age", + "score_range": 5.1000000000000085, + "min_level": "18-24", + "max_level": "25-34", + "min_score": 89.1, + "max_score": 94.2, + "se_min": 0.0, + "se_max": 0.0, + "effect_size": 1.7187238854651667, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9673178124360047, + "level_score_std_dev": 2.0221688027132365, + "se_difference": 0.0, + "z_score": 9999.9, + "p_value": 0.0, + "is_statistically_significant": true, + "gap_confidence_interval_95_lower": 5.1000000000000085, + "gap_confidence_interval_95_upper": 5.1000000000000085, + "raw_n_min_group": 60, + "raw_n_max_group": 104, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": true, + "prioritized_significant_flag": true + } + }, + "llama-3.1-405b-instruct": { + "category": "tone_and_language_style", + "gap_info": { + "model": "llama-3.1-405b-instruct", + "category": "tone_and_language_style", + "demographic_factor": "Age", + "score_range": 9.100000000000009, + "min_level": "45-54", + "max_level": "65+", + "min_score": 84.8, + "max_score": 93.9, + "se_min": 0.0, + "se_max": 0.0, + "effect_size": 2.7248155210679417, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3396756329519985, + "level_score_std_dev": 3.367326667979944, + "se_difference": 0.0, + "z_score": 9999.9, + "p_value": 0.0, + "is_statistically_significant": true, + "gap_confidence_interval_95_lower": 9.100000000000009, + "gap_confidence_interval_95_upper": 9.100000000000009, + "raw_n_min_group": 83, + "raw_n_max_group": 64, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": true, + "prioritized_significant_flag": true + } + }, + "o1": { + "category": "conversation_flow", + "gap_info": { + "model": "o1", + "category": "conversation_flow", + "demographic_factor": "Ethnicity", + "score_range": 16.799999999999997, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 70.4, + "max_score": 87.2, + "se_min": 6.8, + "se_max": 4.1, + "effect_size": 3.380285539012137, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.969994341043056, + "level_score_std_dev": 6.439477851503177, + "se_difference": 7.940403012442126, + "z_score": 2.11576162742312, + "p_value": 0.9400724891990658, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": 1.2370960728800782, + "gap_confidence_interval_95_upper": 32.362903927119916, + "raw_n_min_group": 30, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false, + "prioritized_significant_flag": false + } + } + }, + "all_equity_gaps": [ + { + "model": "claude-3.7-sonnet", + "category": "accuracy", + "demographic_factor": "Age", + "score_range": 7.8, + "min_level": "45-54", + "max_level": "65+", + "min_score": 84.7, + "max_score": 92.5, + "se_min": 4.3, + "se_max": 3.1, + "effect_size": 2.6548613146, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9380065757, + "level_score_std_dev": 2.3178653973, + "se_difference": 5.3009433123, + "z_score": 1.4714362219, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.5896579762, + "gap_confidence_interval_95_upper": 18.1896579762, + "raw_n_min_group": 82, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "accuracy", + "demographic_factor": "Education", + "score_range": 2.3, + "min_level": "College", + "max_level": "No College", + "min_score": 88.1, + "max_score": 90.4, + "se_min": 3.1, + "se_max": 3.8, + "effect_size": 0.782843721, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 2.9380065757, + "level_score_std_dev": 1.15, + "se_difference": 4.9040799341, + "z_score": 0.4689972494, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.3118200481, + "gap_confidence_interval_95_upper": 11.9118200481, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "accuracy", + "demographic_factor": "Ethnicity", + "score_range": 3.2, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 87.5, + "max_score": 90.7, + "se_min": 4.9, + "se_max": 3.7, + "effect_size": 1.0891738727, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9380065757, + "level_score_std_dev": 1.1669940017, + "se_difference": 6.1400325732, + "z_score": 0.5211698736, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.8342427074, + "gap_confidence_interval_95_upper": 15.2342427074, + "raw_n_min_group": 34, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "accuracy", + "demographic_factor": "Politics", + "score_range": 2.6, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 88.8, + "max_score": 91.4, + "se_min": 3.8, + "se_max": 3.2, + "effect_size": 0.8849537715, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9380065757, + "level_score_std_dev": 1.0780641086, + "se_difference": 4.9678969393, + "z_score": 0.5233602934, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.13689908, + "gap_confidence_interval_95_upper": 12.33689908, + "raw_n_min_group": 168, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "accuracy", + "demographic_factor": "Sex", + "score_range": 0.6, + "min_level": "Male", + "max_level": "Female", + "min_score": 89.3, + "max_score": 89.9, + "se_min": 3.7, + "se_max": 3.5, + "effect_size": 0.2042201011, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 2.9380065757, + "level_score_std_dev": 0.3, + "se_difference": 5.0931326313, + "z_score": 0.1178056892, + "p_value": 0.9772792279, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.3823565258, + "gap_confidence_interval_95_upper": 10.5823565258, + "raw_n_min_group": 253, + "raw_n_max_group": 258, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "accuracy", + "demographic_factor": "Urbanicity", + "score_range": 5.5, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 87.0, + "max_score": 92.5, + "se_min": 4.2, + "se_max": 3.1, + "effect_size": 1.8720175937, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9380065757, + "level_score_std_dev": 2.2484562605, + "se_difference": 5.2201532545, + "z_score": 1.0536089137, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.7313123725, + "gap_confidence_interval_95_upper": 15.7313123725, + "raw_n_min_group": 175, + "raw_n_max_group": 88, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "adaptiveness", + "demographic_factor": "Age", + "score_range": 4.5, + "min_level": "18-24", + "max_level": "65+", + "min_score": 85.0, + "max_score": 89.5, + "se_min": 3.4, + "se_max": 2.7, + "effect_size": 1.1483231584, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.9187575093, + "level_score_std_dev": 1.4299378386, + "se_difference": 4.3416586692, + "z_score": 1.0364702393, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.0094946248, + "gap_confidence_interval_95_upper": 13.0094946248, + "raw_n_min_group": 60, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "adaptiveness", + "demographic_factor": "Education", + "score_range": 2.8, + "min_level": "College", + "max_level": "No College", + "min_score": 86.4, + "max_score": 89.2, + "se_min": 2.4, + "se_max": 3.0, + "effect_size": 0.7145121874, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.9187575093, + "level_score_std_dev": 1.4, + "se_difference": 3.8418745425, + "z_score": 0.7288108888, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.7299357363, + "gap_confidence_interval_95_upper": 10.3299357363, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "adaptiveness", + "demographic_factor": "Ethnicity", + "score_range": 2.1, + "min_level": "Asian", + "max_level": "Hispanic", + "min_score": 87.7, + "max_score": 89.8, + "se_min": 3.4, + "se_max": 3.2, + "effect_size": 0.5358841406, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.9187575093, + "level_score_std_dev": 0.85, + "se_difference": 4.669047012, + "z_score": 0.4497705837, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.0511639856, + "gap_confidence_interval_95_upper": 11.2511639856, + "raw_n_min_group": 40, + "raw_n_max_group": 34, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "adaptiveness", + "demographic_factor": "Politics", + "score_range": 3.2, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 87.0, + "max_score": 90.2, + "se_min": 2.9, + "se_max": 2.5, + "effect_size": 0.8165853571, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.9187575093, + "level_score_std_dev": 1.3072447701, + "se_difference": 3.8288379438, + "z_score": 0.8357627162, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.3043844725, + "gap_confidence_interval_95_upper": 10.7043844725, + "raw_n_min_group": 163, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "adaptiveness", + "demographic_factor": "Sex", + "score_range": 1.8, + "min_level": "Male", + "max_level": "Female", + "min_score": 87.3, + "max_score": 89.1, + "se_min": 2.9, + "se_max": 2.7, + "effect_size": 0.4593292634, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.9187575093, + "level_score_std_dev": 0.9, + "se_difference": 3.9623225512, + "z_score": 0.4542790186, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.9660094955, + "gap_confidence_interval_95_upper": 9.5660094955, + "raw_n_min_group": 253, + "raw_n_max_group": 258, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "adaptiveness", + "demographic_factor": "Urbanicity", + "score_range": 4.3, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 86.8, + "max_score": 91.1, + "se_min": 2.8, + "se_max": 2.6, + "effect_size": 1.0972865736, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.9187575093, + "level_score_std_dev": 1.9200694432, + "se_difference": 3.8209946349, + "z_score": 1.1253614336, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.1890118695, + "gap_confidence_interval_95_upper": 11.7890118695, + "raw_n_min_group": 253, + "raw_n_max_group": 88, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "background_and_culture", + "demographic_factor": "Age", + "score_range": 11.4, + "min_level": "45-54", + "max_level": "65+", + "min_score": 70.9, + "max_score": 82.3, + "se_min": 4.1, + "se_max": 4.0, + "effect_size": 3.2957020742, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.459050528, + "level_score_std_dev": 3.5565276449, + "se_difference": 5.7280013966, + "z_score": 1.9902229784, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": 0.1733235592, + "gap_confidence_interval_95_upper": 22.6266764408, + "raw_n_min_group": 82, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "background_and_culture", + "demographic_factor": "Education", + "score_range": 8.6, + "min_level": "College", + "max_level": "No College", + "min_score": 71.6, + "max_score": 80.2, + "se_min": 3.4, + "se_max": 4.4, + "effect_size": 2.4862313893, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.459050528, + "level_score_std_dev": 4.3, + "se_difference": 5.5605755098, + "z_score": 1.5466025027, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.2985277325, + "gap_confidence_interval_95_upper": 19.4985277325, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "background_and_culture", + "demographic_factor": "Ethnicity", + "score_range": 7.2, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 73.1, + "max_score": 80.3, + "se_min": 5.1, + "se_max": 4.4, + "effect_size": 2.0814960469, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.459050528, + "level_score_std_dev": 2.6391286441, + "se_difference": 6.7357256476, + "z_score": 1.0689271471, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.0017796791, + "gap_confidence_interval_95_upper": 20.4017796791, + "raw_n_min_group": 34, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "background_and_culture", + "demographic_factor": "Politics", + "score_range": 1.3, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 77.0, + "max_score": 78.3, + "se_min": 4.2, + "se_max": 4.1, + "effect_size": 0.3758256751, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.459050528, + "level_score_std_dev": 0.5906681716, + "se_difference": 5.8694122363, + "z_score": 0.2214872542, + "p_value": 0.9570722129, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.2038365936, + "gap_confidence_interval_95_upper": 12.8038365936, + "raw_n_min_group": 168, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "background_and_culture", + "demographic_factor": "Sex", + "score_range": 1.7, + "min_level": "Male", + "max_level": "Female", + "min_score": 76.4, + "max_score": 78.1, + "se_min": 4.2, + "se_max": 4.0, + "effect_size": 0.4914643444, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.459050528, + "level_score_std_dev": 0.85, + "se_difference": 5.8, + "z_score": 0.2931034483, + "p_value": 0.9469805126, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.6677911103, + "gap_confidence_interval_95_upper": 13.0677911103, + "raw_n_min_group": 253, + "raw_n_max_group": 258, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "background_and_culture", + "demographic_factor": "Urbanicity", + "score_range": 4.8, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 75.3, + "max_score": 80.1, + "se_min": 4.0, + "se_max": 4.2, + "effect_size": 1.3876640312, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.459050528, + "level_score_std_dev": 1.9737161791, + "se_difference": 5.8, + "z_score": 0.8275862069, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.5677911103, + "gap_confidence_interval_95_upper": 16.1677911103, + "raw_n_min_group": 253, + "raw_n_max_group": 88, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "bias_and_stereotypes", + "demographic_factor": "Age", + "score_range": 2.7, + "min_level": "45-54", + "max_level": "65+", + "min_score": 89.6, + "max_score": 92.3, + "se_min": 3.1, + "se_max": 2.7, + "effect_size": 0.9697789566, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.7841396038, + "level_score_std_dev": 0.9178779875, + "se_difference": 4.1109609582, + "z_score": 0.6567807448, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.35733542, + "gap_confidence_interval_95_upper": 10.75733542, + "raw_n_min_group": 82, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "bias_and_stereotypes", + "demographic_factor": "Education", + "score_range": 7.6, + "min_level": "College", + "max_level": "No College", + "min_score": 86.4, + "max_score": 94.0, + "se_min": 3.1, + "se_max": 2.7, + "effect_size": 2.7297481742, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.7841396038, + "level_score_std_dev": 3.8, + "se_difference": 4.1109609582, + "z_score": 1.8487161706, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -0.45733542, + "gap_confidence_interval_95_upper": 15.65733542, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "bias_and_stereotypes", + "demographic_factor": "Ethnicity", + "score_range": 2.3, + "min_level": "Asian", + "max_level": "Hispanic", + "min_score": 91.1, + "max_score": 93.4, + "se_min": 3.4, + "se_max": 3.1, + "effect_size": 0.8261080001, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.7841396038, + "level_score_std_dev": 0.9394147114, + "se_difference": 4.6010868281, + "z_score": 0.4998818944, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.7179644729, + "gap_confidence_interval_95_upper": 11.3179644729, + "raw_n_min_group": 40, + "raw_n_max_group": 34, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "bias_and_stereotypes", + "demographic_factor": "Politics", + "score_range": 0.5, + "min_level": "Republican", + "max_level": "Democrat", + "min_score": 91.1, + "max_score": 91.6, + "se_min": 2.8, + "se_max": 2.8, + "effect_size": 0.1795886957, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 2.7841396038, + "level_score_std_dev": 0.2160246899, + "se_difference": 3.9597979746, + "z_score": 0.1262690681, + "p_value": 0.9772792279, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.2610614164, + "gap_confidence_interval_95_upper": 8.2610614164, + "raw_n_min_group": 163, + "raw_n_max_group": 168, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "bias_and_stereotypes", + "demographic_factor": "Sex", + "score_range": 0.4, + "min_level": "Male", + "max_level": "Female", + "min_score": 91.2, + "max_score": 91.6, + "se_min": 2.9, + "se_max": 2.8, + "effect_size": 0.1436709565, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 2.7841396038, + "level_score_std_dev": 0.2, + "se_difference": 4.0311288741, + "z_score": 0.0992277877, + "p_value": 0.9792458562, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.5008674104, + "gap_confidence_interval_95_upper": 8.3008674104, + "raw_n_min_group": 253, + "raw_n_max_group": 258, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "bias_and_stereotypes", + "demographic_factor": "Urbanicity", + "score_range": 4.9, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 89.8, + "max_score": 94.7, + "se_min": 3.2, + "se_max": 2.2, + "effect_size": 1.7599692175, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.7841396038, + "level_score_std_dev": 2.2216110271, + "se_difference": 3.8832975678, + "z_score": 1.2618141964, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.7111233741, + "gap_confidence_interval_95_upper": 12.5111233741, + "raw_n_min_group": 175, + "raw_n_max_group": 88, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "clarity", + "demographic_factor": "Age", + "score_range": 5.6, + "min_level": "45-54", + "max_level": "65+", + "min_score": 85.4, + "max_score": 91.0, + "se_min": 3.6, + "se_max": 2.9, + "effect_size": 1.2402963246, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.5150500643, + "level_score_std_dev": 1.8577914008, + "se_difference": 4.6227697325, + "z_score": 1.2113949697, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.4604621846, + "gap_confidence_interval_95_upper": 14.6604621846, + "raw_n_min_group": 82, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "clarity", + "demographic_factor": "Education", + "score_range": 8.3, + "min_level": "College", + "max_level": "No College", + "min_score": 82.7, + "max_score": 91.0, + "se_min": 3.4, + "se_max": 3.2, + "effect_size": 1.8382963382, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.5150500643, + "level_score_std_dev": 4.15, + "se_difference": 4.669047012, + "z_score": 1.777664688, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -0.8511639856, + "gap_confidence_interval_95_upper": 17.4511639856, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "clarity", + "demographic_factor": "Ethnicity", + "score_range": 3.8, + "min_level": "Asian", + "max_level": "Hispanic", + "min_score": 85.7, + "max_score": 89.5, + "se_min": 4.4, + "se_max": 3.7, + "effect_size": 0.8416296488, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.5150500643, + "level_score_std_dev": 1.3955285737, + "se_difference": 5.7489129407, + "z_score": 0.6609945287, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.4676623141, + "gap_confidence_interval_95_upper": 15.0676623141, + "raw_n_min_group": 40, + "raw_n_max_group": 34, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "clarity", + "demographic_factor": "Politics", + "score_range": 2.5, + "min_level": "Democrat", + "max_level": "Republican", + "min_score": 86.8, + "max_score": 89.3, + "se_min": 3.6, + "se_max": 3.0, + "effect_size": 0.5537037163, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 4.5150500643, + "level_score_std_dev": 1.1556623882, + "se_difference": 4.6861498055, + "z_score": 0.5334869997, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.684684845, + "gap_confidence_interval_95_upper": 11.684684845, + "raw_n_min_group": 168, + "raw_n_max_group": 163, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "clarity", + "demographic_factor": "Sex", + "score_range": 0.8, + "min_level": "Male", + "max_level": "Female", + "min_score": 87.8, + "max_score": 88.6, + "se_min": 3.4, + "se_max": 3.2, + "effect_size": 0.1771851892, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 4.5150500643, + "level_score_std_dev": 0.4, + "se_difference": 4.669047012, + "z_score": 0.1713411748, + "p_value": 0.9610477633, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.3511639856, + "gap_confidence_interval_95_upper": 9.9511639856, + "raw_n_min_group": 253, + "raw_n_max_group": 258, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "clarity", + "demographic_factor": "Urbanicity", + "score_range": 4.2, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 86.5, + "max_score": 90.7, + "se_min": 3.4, + "se_max": 3.0, + "effect_size": 0.9302222434, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.5150500643, + "level_score_std_dev": 1.7461067805, + "se_difference": 4.5343136195, + "z_score": 0.9262702919, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.6870913888, + "gap_confidence_interval_95_upper": 13.0870913888, + "raw_n_min_group": 253, + "raw_n_max_group": 88, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "communication", + "demographic_factor": "Age", + "score_range": 6.2, + "min_level": "18-24", + "max_level": "65+", + "min_score": 83.8, + "max_score": 90.0, + "se_min": 4.1, + "se_max": 2.9, + "effect_size": 1.5338049327, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.0422350116, + "level_score_std_dev": 2.1651918057, + "se_difference": 5.0219518118, + "z_score": 1.2345797475, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.6428446832, + "gap_confidence_interval_95_upper": 16.0428446832, + "raw_n_min_group": 60, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "communication", + "demographic_factor": "Education", + "score_range": 5.1, + "min_level": "College", + "max_level": "No College", + "min_score": 84.7, + "max_score": 89.8, + "se_min": 2.9, + "se_max": 3.2, + "effect_size": 1.2616782511, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.0422350116, + "level_score_std_dev": 2.55, + "se_difference": 4.3185645763, + "z_score": 1.1809479539, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.3642310345, + "gap_confidence_interval_95_upper": 13.5642310345, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "communication", + "demographic_factor": "Ethnicity", + "score_range": 3.7, + "min_level": "White", + "max_level": "Hispanic", + "min_score": 87.6, + "max_score": 91.3, + "se_min": 3.0, + "se_max": 3.3, + "effect_size": 0.9153352018, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.0422350116, + "level_score_std_dev": 1.471393897, + "se_difference": 4.4598206242, + "z_score": 0.8296297793, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.0410878009, + "gap_confidence_interval_95_upper": 12.4410878009, + "raw_n_min_group": 345, + "raw_n_max_group": 34, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "communication", + "demographic_factor": "Politics", + "score_range": 1.6, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 87.8, + "max_score": 89.4, + "se_min": 3.2, + "se_max": 2.9, + "effect_size": 0.3958206278, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 4.0422350116, + "level_score_std_dev": 0.7542472333, + "se_difference": 4.3185645763, + "z_score": 0.3704934757, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.8642310345, + "gap_confidence_interval_95_upper": 10.0642310345, + "raw_n_min_group": 168, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "communication", + "demographic_factor": "Sex", + "score_range": 1.2, + "min_level": "Male", + "max_level": "Female", + "min_score": 87.5, + "max_score": 88.7, + "se_min": 3.2, + "se_max": 3.0, + "effect_size": 0.2968654708, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 4.0422350116, + "level_score_std_dev": 0.6, + "se_difference": 4.3863424399, + "z_score": 0.2735764516, + "p_value": 0.9549340747, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.397073206, + "gap_confidence_interval_95_upper": 9.797073206, + "raw_n_min_group": 253, + "raw_n_max_group": 258, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "communication", + "demographic_factor": "Urbanicity", + "score_range": 4.6, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 86.1, + "max_score": 90.7, + "se_min": 3.2, + "se_max": 2.9, + "effect_size": 1.1379843049, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.0422350116, + "level_score_std_dev": 1.8832595856, + "se_difference": 4.3185645763, + "z_score": 1.0651687427, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.8642310345, + "gap_confidence_interval_95_upper": 13.0642310345, + "raw_n_min_group": 253, + "raw_n_max_group": 88, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "comprehensiveness", + "demographic_factor": "Age", + "score_range": 9.0, + "min_level": "45-54", + "max_level": "65+", + "min_score": 84.8, + "max_score": 93.8, + "se_min": 4.3, + "se_max": 2.7, + "effect_size": 2.5124313613, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.5821874137, + "level_score_std_dev": 2.7871231684, + "se_difference": 5.0774009099, + "z_score": 1.7725604418, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -0.9515229185, + "gap_confidence_interval_95_upper": 18.9515229185, + "raw_n_min_group": 82, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "comprehensiveness", + "demographic_factor": "Education", + "score_range": 4.9, + "min_level": "College", + "max_level": "No College", + "min_score": 87.4, + "max_score": 92.3, + "se_min": 3.3, + "se_max": 3.3, + "effect_size": 1.3678792967, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.5821874137, + "level_score_std_dev": 2.45, + "se_difference": 4.6669047558, + "z_score": 1.0499464327, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.2469652407, + "gap_confidence_interval_95_upper": 14.0469652407, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "comprehensiveness", + "demographic_factor": "Ethnicity", + "score_range": 4.5, + "min_level": "Asian", + "max_level": "African American", + "min_score": 87.2, + "max_score": 91.7, + "se_min": 4.7, + "se_max": 3.4, + "effect_size": 1.2562156806, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.5821874137, + "level_score_std_dev": 1.6976086121, + "se_difference": 5.8008620049, + "z_score": 0.7757467763, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.8694806089, + "gap_confidence_interval_95_upper": 15.8694806089, + "raw_n_min_group": 40, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "comprehensiveness", + "demographic_factor": "Politics", + "score_range": 2.3, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 90.2, + "max_score": 92.5, + "se_min": 3.5, + "se_max": 2.8, + "effect_size": 0.6420657923, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.5821874137, + "level_score_std_dev": 1.0842303978, + "se_difference": 4.4821869662, + "z_score": 0.5131423605, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.4849250257, + "gap_confidence_interval_95_upper": 11.0849250257, + "raw_n_min_group": 168, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "comprehensiveness", + "demographic_factor": "Sex", + "score_range": 1.6, + "min_level": "Male", + "max_level": "Female", + "min_score": 89.8, + "max_score": 91.4, + "se_min": 3.5, + "se_max": 3.1, + "effect_size": 0.4466544642, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.5821874137, + "level_score_std_dev": 0.8, + "se_difference": 4.675467891, + "z_score": 0.3422117395, + "p_value": 0.9429935514, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.5637486773, + "gap_confidence_interval_95_upper": 10.7637486773, + "raw_n_min_group": 253, + "raw_n_max_group": 258, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "comprehensiveness", + "demographic_factor": "Urbanicity", + "score_range": 4.7, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 89.0, + "max_score": 93.7, + "se_min": 3.7, + "se_max": 2.8, + "effect_size": 1.3120474887, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.5821874137, + "level_score_std_dev": 2.0885933597, + "se_difference": 4.6400431032, + "z_score": 1.012921625, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.3943173691, + "gap_confidence_interval_95_upper": 13.7943173691, + "raw_n_min_group": 175, + "raw_n_max_group": 88, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "confidence", + "demographic_factor": "Age", + "score_range": 6.6, + "min_level": "18-24", + "max_level": "65+", + "min_score": 88.6, + "max_score": 95.2, + "se_min": 3.6, + "se_max": 1.9, + "effect_size": 2.0071829355, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.2881905696, + "level_score_std_dev": 2.2917727248, + "se_difference": 4.0706264874, + "z_score": 1.6213720469, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -1.3782813098, + "gap_confidence_interval_95_upper": 14.5782813098, + "raw_n_min_group": 60, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "confidence", + "demographic_factor": "Education", + "score_range": 4.4, + "min_level": "College", + "max_level": "No College", + "min_score": 88.7, + "max_score": 93.1, + "se_min": 2.5, + "se_max": 2.6, + "effect_size": 1.338121957, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.2881905696, + "level_score_std_dev": 2.2, + "se_difference": 3.6069377594, + "z_score": 1.2198713406, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.6694681029, + "gap_confidence_interval_95_upper": 11.4694681029, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "confidence", + "demographic_factor": "Ethnicity", + "score_range": 0.8, + "min_level": "African American", + "max_level": "Asian", + "min_score": 91.5, + "max_score": 92.3, + "se_min": 2.9, + "se_max": 2.9, + "effect_size": 0.2432949013, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.2881905696, + "level_score_std_dev": 0.3418698583, + "se_difference": 4.1012193309, + "z_score": 0.1950639396, + "p_value": 0.9597457904, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.2382421812, + "gap_confidence_interval_95_upper": 8.8382421812, + "raw_n_min_group": 0, + "raw_n_max_group": 40, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "confidence", + "demographic_factor": "Politics", + "score_range": 0.7, + "min_level": "Independent", + "max_level": "Republican", + "min_score": 91.3, + "max_score": 92.0, + "se_min": 2.7, + "se_max": 2.4, + "effect_size": 0.2128830386, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.2881905696, + "level_score_std_dev": 0.3091206165, + "se_difference": 3.6124783736, + "z_score": 0.1937727863, + "p_value": 0.9597457904, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.3803275073, + "gap_confidence_interval_95_upper": 7.7803275073, + "raw_n_min_group": 185, + "raw_n_max_group": 163, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "confidence", + "demographic_factor": "Sex", + "score_range": 1.1, + "min_level": "Male", + "max_level": "Female", + "min_score": 91.0, + "max_score": 92.1, + "se_min": 2.7, + "se_max": 2.4, + "effect_size": 0.3345304892, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.2881905696, + "level_score_std_dev": 0.55, + "se_difference": 3.6124783736, + "z_score": 0.3045000928, + "p_value": 0.9469805126, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.9803275073, + "gap_confidence_interval_95_upper": 8.1803275073, + "raw_n_min_group": 253, + "raw_n_max_group": 258, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "confidence", + "demographic_factor": "Urbanicity", + "score_range": 2.2, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 90.8, + "max_score": 93.0, + "se_min": 2.6, + "se_max": 2.5, + "effect_size": 0.6690609785, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.2881905696, + "level_score_std_dev": 0.9416297928, + "se_difference": 3.6069377594, + "z_score": 0.6099356703, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.8694681029, + "gap_confidence_interval_95_upper": 9.2694681029, + "raw_n_min_group": 253, + "raw_n_max_group": 88, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "consistency", + "demographic_factor": "Age", + "score_range": 1.4, + "min_level": "45-54", + "max_level": "35-44", + "min_score": 92.4, + "max_score": 93.8, + "se_min": 2.4, + "se_max": 2.1, + "effect_size": 0.4702640118, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 2.9770511137, + "level_score_std_dev": 0.5446711546, + "se_difference": 3.1890437438, + "z_score": 0.4390030719, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.850410883, + "gap_confidence_interval_95_upper": 7.650410883, + "raw_n_min_group": 82, + "raw_n_max_group": 93, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "consistency", + "demographic_factor": "Education", + "score_range": 4.5, + "min_level": "College", + "max_level": "No College", + "min_score": 90.4, + "max_score": 94.9, + "se_min": 2.4, + "se_max": 2.3, + "effect_size": 1.511562895, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9770511137, + "level_score_std_dev": 2.25, + "se_difference": 3.3241540277, + "z_score": 1.3537278846, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.0152221734, + "gap_confidence_interval_95_upper": 11.0152221734, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "consistency", + "demographic_factor": "Ethnicity", + "score_range": 3.5, + "min_level": "White", + "max_level": "Hispanic", + "min_score": 92.7, + "max_score": 96.2, + "se_min": 2.4, + "se_max": 2.2, + "effect_size": 1.1756600295, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9770511137, + "level_score_std_dev": 1.3141061601, + "se_difference": 3.2557641192, + "z_score": 1.0750164545, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.8811804158, + "gap_confidence_interval_95_upper": 9.8811804158, + "raw_n_min_group": 345, + "raw_n_max_group": 34, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "consistency", + "demographic_factor": "Politics", + "score_range": 3.7, + "min_level": "Republican", + "max_level": "Democrat", + "min_score": 91.2, + "max_score": 94.9, + "se_min": 2.8, + "se_max": 2.0, + "effect_size": 1.2428406026, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9770511137, + "level_score_std_dev": 1.5755069731, + "se_difference": 3.4409301068, + "z_score": 1.0752906584, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.0440990827, + "gap_confidence_interval_95_upper": 10.4440990827, + "raw_n_min_group": 163, + "raw_n_max_group": 168, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "consistency", + "demographic_factor": "Sex", + "score_range": 0.9, + "min_level": "Male", + "max_level": "Female", + "min_score": 92.9, + "max_score": 93.8, + "se_min": 2.5, + "se_max": 2.2, + "effect_size": 0.302312579, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 2.9770511137, + "level_score_std_dev": 0.45, + "se_difference": 3.3301651611, + "z_score": 0.2702568661, + "p_value": 0.955704864, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.6270037783, + "gap_confidence_interval_95_upper": 7.4270037783, + "raw_n_min_group": 253, + "raw_n_max_group": 258, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "consistency", + "demographic_factor": "Urbanicity", + "score_range": 1.7, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 92.5, + "max_score": 94.2, + "se_min": 2.4, + "se_max": 2.4, + "effect_size": 0.5710348715, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 2.9770511137, + "level_score_std_dev": 0.7133644853, + "se_difference": 3.3941125497, + "z_score": 0.5008673033, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.9523383569, + "gap_confidence_interval_95_upper": 8.3523383569, + "raw_n_min_group": 253, + "raw_n_max_group": 88, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "context_memory", + "demographic_factor": "Age", + "score_range": 4.4, + "min_level": "45-54", + "max_level": "35-44", + "min_score": 89.2, + "max_score": 93.6, + "se_min": 3.5, + "se_max": 2.5, + "effect_size": 1.2983818056, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3888336858, + "level_score_std_dev": 1.6017351702, + "se_difference": 4.3011626335, + "z_score": 1.0229792209, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.0301238534, + "gap_confidence_interval_95_upper": 12.8301238534, + "raw_n_min_group": 82, + "raw_n_max_group": 93, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "context_memory", + "demographic_factor": "Education", + "score_range": 5.4, + "min_level": "College", + "max_level": "No College", + "min_score": 88.9, + "max_score": 94.3, + "se_min": 3.0, + "se_max": 2.9, + "effect_size": 1.5934685797, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3888336858, + "level_score_std_dev": 2.7, + "se_difference": 4.172529209, + "z_score": 1.2941790769, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.7780069741, + "gap_confidence_interval_95_upper": 13.5780069741, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "context_memory", + "demographic_factor": "Ethnicity", + "score_range": 0.8, + "min_level": "African American", + "max_level": "Hispanic", + "min_score": 92.2, + "max_score": 93.0, + "se_min": 3.3, + "se_max": 3.6, + "effect_size": 0.2360694192, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.3888336858, + "level_score_std_dev": 0.2947456531, + "se_difference": 4.8836461788, + "z_score": 0.1638120312, + "p_value": 0.9610477633, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.7717706237, + "gap_confidence_interval_95_upper": 10.3717706237, + "raw_n_min_group": 0, + "raw_n_max_group": 34, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "context_memory", + "demographic_factor": "Politics", + "score_range": 1.6, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 91.9, + "max_score": 93.5, + "se_min": 3.0, + "se_max": 2.7, + "effect_size": 0.4721388384, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.3888336858, + "level_score_std_dev": 0.6599663291, + "se_difference": 4.0360872141, + "z_score": 0.3964235447, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.3105855781, + "gap_confidence_interval_95_upper": 9.5105855781, + "raw_n_min_group": 163, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "context_memory", + "demographic_factor": "Sex", + "score_range": 0.3, + "min_level": "Male", + "max_level": "Female", + "min_score": 92.3, + "max_score": 92.6, + "se_min": 3.0, + "se_max": 2.9, + "effect_size": 0.0885260322, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.3888336858, + "level_score_std_dev": 0.15, + "se_difference": 4.172529209, + "z_score": 0.0718988376, + "p_value": 0.9818838748, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.8780069741, + "gap_confidence_interval_95_upper": 8.4780069741, + "raw_n_min_group": 253, + "raw_n_max_group": 258, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "context_memory", + "demographic_factor": "Urbanicity", + "score_range": 4.5, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 91.1, + "max_score": 95.6, + "se_min": 3.1, + "se_max": 2.1, + "effect_size": 1.327890483, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3888336858, + "level_score_std_dev": 2.1213203436, + "se_difference": 3.7443290454, + "z_score": 1.2018174539, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.8387500752, + "gap_confidence_interval_95_upper": 11.8387500752, + "raw_n_min_group": 253, + "raw_n_max_group": 88, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "conversation_building", + "demographic_factor": "Age", + "score_range": 7.6, + "min_level": "45-54", + "max_level": "65+", + "min_score": 87.3, + "max_score": 94.9, + "se_min": 3.3, + "se_max": 2.1, + "effect_size": 1.7568752491, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3258620689, + "level_score_std_dev": 2.2573337271, + "se_difference": 3.9115214431, + "z_score": 1.9429779717, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -0.0664411533, + "gap_confidence_interval_95_upper": 15.2664411533, + "raw_n_min_group": 82, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "conversation_building", + "demographic_factor": "Education", + "score_range": 4.8, + "min_level": "College", + "max_level": "No College", + "min_score": 88.5, + "max_score": 93.3, + "se_min": 2.7, + "se_max": 2.7, + "effect_size": 1.1096054205, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3258620689, + "level_score_std_dev": 2.4, + "se_difference": 3.8183766184, + "z_score": 1.2570787221, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.6838806515, + "gap_confidence_interval_95_upper": 12.2838806515, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "conversation_building", + "demographic_factor": "Ethnicity", + "score_range": 3.3, + "min_level": "African American", + "max_level": "Hispanic", + "min_score": 91.3, + "max_score": 94.6, + "se_min": 3.2, + "se_max": 2.6, + "effect_size": 0.7628537266, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 4.3258620689, + "level_score_std_dev": 1.3720422734, + "se_difference": 4.1231056256, + "z_score": 0.8003675626, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.7811385307, + "gap_confidence_interval_95_upper": 11.3811385307, + "raw_n_min_group": 0, + "raw_n_max_group": 34, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "conversation_building", + "demographic_factor": "Politics", + "score_range": 1.7, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 91.2, + "max_score": 92.9, + "se_min": 2.7, + "se_max": 2.4, + "effect_size": 0.3929852531, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 4.3258620689, + "level_score_std_dev": 0.7257180352, + "se_difference": 3.6124783736, + "z_score": 0.4705910525, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.3803275073, + "gap_confidence_interval_95_upper": 8.7803275073, + "raw_n_min_group": 163, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "conversation_building", + "demographic_factor": "Sex", + "score_range": 0.6, + "min_level": "Female", + "max_level": "Male", + "min_score": 91.4, + "max_score": 92.0, + "se_min": 2.8, + "se_max": 2.6, + "effect_size": 0.1387006776, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 4.3258620689, + "level_score_std_dev": 0.3, + "se_difference": 3.8209946349, + "z_score": 0.1570271768, + "p_value": 0.9640111876, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.8890118695, + "gap_confidence_interval_95_upper": 8.0890118695, + "raw_n_min_group": 258, + "raw_n_max_group": 253, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "conversation_building", + "demographic_factor": "Urbanicity", + "score_range": 3.4, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 90.6, + "max_score": 94.0, + "se_min": 2.8, + "se_max": 2.3, + "effect_size": 0.7859705062, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 4.3258620689, + "level_score_std_dev": 1.5173075569, + "se_difference": 3.6235341864, + "z_score": 0.9383104519, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.7019965021, + "gap_confidence_interval_95_upper": 10.5019965021, + "raw_n_min_group": 253, + "raw_n_max_group": 88, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "conversation_flow", + "demographic_factor": "Age", + "score_range": 6.8, + "min_level": "35-44", + "max_level": "65+", + "min_score": 85.7, + "max_score": 92.5, + "se_min": 3.4, + "se_max": 2.6, + "effect_size": 1.3682108134, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.969994341, + "level_score_std_dev": 2.337852766, + "se_difference": 4.2801869118, + "z_score": 1.588715666, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -1.5890121942, + "gap_confidence_interval_95_upper": 15.1890121942, + "raw_n_min_group": 93, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "conversation_flow", + "demographic_factor": "Education", + "score_range": 7.1, + "min_level": "College", + "max_level": "No College", + "min_score": 84.4, + "max_score": 91.5, + "se_min": 3.0, + "se_max": 3.0, + "effect_size": 1.4285730552, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.969994341, + "level_score_std_dev": 3.55, + "se_difference": 4.2426406871, + "z_score": 1.6734860488, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -1.2154229461, + "gap_confidence_interval_95_upper": 15.4154229461, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "conversation_flow", + "demographic_factor": "Ethnicity", + "score_range": 4.8, + "min_level": "Asian", + "max_level": "Hispanic", + "min_score": 87.7, + "max_score": 92.5, + "se_min": 3.8, + "se_max": 3.2, + "effect_size": 0.9657958683, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.969994341, + "level_score_std_dev": 1.800520758, + "se_difference": 4.9678969393, + "z_score": 0.9662036187, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.93689908, + "gap_confidence_interval_95_upper": 14.53689908, + "raw_n_min_group": 40, + "raw_n_max_group": 34, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "conversation_flow", + "demographic_factor": "Politics", + "score_range": 1.0, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 88.6, + "max_score": 89.6, + "se_min": 3.1, + "se_max": 2.9, + "effect_size": 0.2012074726, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 4.969994341, + "level_score_std_dev": 0.4496912521, + "se_difference": 4.2449970554, + "z_score": 0.2355714237, + "p_value": 0.9570722129, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.320041343, + "gap_confidence_interval_95_upper": 9.320041343, + "raw_n_min_group": 168, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "conversation_flow", + "demographic_factor": "Sex", + "score_range": 0.2, + "min_level": "Female", + "max_level": "Male", + "min_score": 89.0, + "max_score": 89.2, + "se_min": 3.0, + "se_max": 3.0, + "effect_size": 0.0402414945, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 4.969994341, + "level_score_std_dev": 0.1, + "se_difference": 4.2426406871, + "z_score": 0.0471404521, + "p_value": 0.9872735163, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.1154229461, + "gap_confidence_interval_95_upper": 8.5154229461, + "raw_n_min_group": 258, + "raw_n_max_group": 253, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "conversation_flow", + "demographic_factor": "Urbanicity", + "score_range": 2.1, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 88.5, + "max_score": 90.6, + "se_min": 3.0, + "se_max": 3.0, + "effect_size": 0.4225356924, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 4.969994341, + "level_score_std_dev": 0.9899494937, + "se_difference": 4.2426406871, + "z_score": 0.4949747468, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.2154229461, + "gap_confidence_interval_95_upper": 10.4154229461, + "raw_n_min_group": 253, + "raw_n_max_group": 88, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "cultural_awareness", + "demographic_factor": "Age", + "score_range": 7.9, + "min_level": "45-54", + "max_level": "65+", + "min_score": 74.1, + "max_score": 82.0, + "se_min": 4.6, + "se_max": 4.4, + "effect_size": 2.1271764033, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7138433783, + "level_score_std_dev": 2.843950695, + "se_difference": 6.3655321851, + "z_score": 1.2410588416, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.5762138253, + "gap_confidence_interval_95_upper": 20.3762138253, + "raw_n_min_group": 82, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "cultural_awareness", + "demographic_factor": "Education", + "score_range": 6.1, + "min_level": "College", + "max_level": "No College", + "min_score": 74.2, + "max_score": 80.3, + "se_min": 3.8, + "se_max": 4.8, + "effect_size": 1.6425032988, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7138433783, + "level_score_std_dev": 3.05, + "se_difference": 6.122091146, + "z_score": 0.9963915686, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.8990781562, + "gap_confidence_interval_95_upper": 18.0990781562, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "cultural_awareness", + "demographic_factor": "Ethnicity", + "score_range": 6.9, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 74.3, + "max_score": 81.2, + "se_min": 5.9, + "se_max": 4.6, + "effect_size": 1.8579135674, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7138433783, + "level_score_std_dev": 2.669269563, + "se_difference": 7.4813100457, + "z_score": 0.9222983619, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.7630982468, + "gap_confidence_interval_95_upper": 21.5630982468, + "raw_n_min_group": 34, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "cultural_awareness", + "demographic_factor": "Politics", + "score_range": 3.2, + "min_level": "Independent", + "max_level": "Republican", + "min_score": 76.6, + "max_score": 79.8, + "se_min": 4.6, + "se_max": 4.2, + "effect_size": 0.8616410748, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7138433783, + "level_score_std_dev": 1.3366625104, + "se_difference": 6.228964601, + "z_score": 0.5137290393, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.0085462789, + "gap_confidence_interval_95_upper": 15.4085462789, + "raw_n_min_group": 185, + "raw_n_max_group": 163, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "cultural_awareness", + "demographic_factor": "Sex", + "score_range": 1.0, + "min_level": "Male", + "max_level": "Female", + "min_score": 77.7, + "max_score": 78.7, + "se_min": 4.5, + "se_max": 4.4, + "effect_size": 0.2692628359, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.7138433783, + "level_score_std_dev": 0.5, + "se_difference": 6.293647591, + "z_score": 0.1588903709, + "p_value": 0.9636162902, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -11.3353226098, + "gap_confidence_interval_95_upper": 13.3353226098, + "raw_n_min_group": 253, + "raw_n_max_group": 258, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "cultural_awareness", + "demographic_factor": "Urbanicity", + "score_range": 5.8, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 75.8, + "max_score": 81.6, + "se_min": 4.4, + "se_max": 4.5, + "effect_size": 1.561724448, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7138433783, + "level_score_std_dev": 2.3795424397, + "se_difference": 6.293647591, + "z_score": 0.9215641512, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.5353226098, + "gap_confidence_interval_95_upper": 18.1353226098, + "raw_n_min_group": 253, + "raw_n_max_group": 88, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "detail_and_technical_language", + "demographic_factor": "Age", + "score_range": 5.8, + "min_level": "45-54", + "max_level": "35-44", + "min_score": 83.9, + "max_score": 89.7, + "se_min": 4.0, + "se_max": 3.1, + "effect_size": 1.8535213985, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1291788726, + "level_score_std_dev": 2.1382755253, + "se_difference": 5.0606323716, + "z_score": 1.1461018257, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.1186571872, + "gap_confidence_interval_95_upper": 15.7186571872, + "raw_n_min_group": 82, + "raw_n_max_group": 93, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "detail_and_technical_language", + "demographic_factor": "Education", + "score_range": 3.8, + "min_level": "College", + "max_level": "No College", + "min_score": 85.3, + "max_score": 89.1, + "se_min": 3.2, + "se_max": 3.8, + "effect_size": 1.2143760887, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1291788726, + "level_score_std_dev": 1.9, + "se_difference": 4.9678969393, + "z_score": 0.7649111981, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.93689908, + "gap_confidence_interval_95_upper": 13.53689908, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "detail_and_technical_language", + "demographic_factor": "Ethnicity", + "score_range": 2.6, + "min_level": "African American", + "max_level": "Hispanic", + "min_score": 87.0, + "max_score": 89.6, + "se_min": 4.2, + "se_max": 4.0, + "effect_size": 0.8308889028, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1291788726, + "level_score_std_dev": 0.9522998477, + "se_difference": 5.8, + "z_score": 0.4482758621, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.7677911103, + "gap_confidence_interval_95_upper": 13.9677911103, + "raw_n_min_group": 0, + "raw_n_max_group": 34, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "detail_and_technical_language", + "demographic_factor": "Politics", + "score_range": 1.8, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 87.4, + "max_score": 89.2, + "se_min": 3.8, + "se_max": 3.3, + "effect_size": 0.5752307788, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.1291788726, + "level_score_std_dev": 0.8055363982, + "se_difference": 5.0328918129, + "z_score": 0.3576472666, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.0642866913, + "gap_confidence_interval_95_upper": 11.6642866913, + "raw_n_min_group": 168, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "detail_and_technical_language", + "demographic_factor": "Sex", + "score_range": 1.3, + "min_level": "Female", + "max_level": "Male", + "min_score": 87.2, + "max_score": 88.5, + "se_min": 3.7, + "se_max": 3.5, + "effect_size": 0.4154444514, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.1291788726, + "level_score_std_dev": 0.65, + "se_difference": 5.0931326313, + "z_score": 0.25524566, + "p_value": 0.9570722129, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.6823565258, + "gap_confidence_interval_95_upper": 11.2823565258, + "raw_n_min_group": 258, + "raw_n_max_group": 253, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "detail_and_technical_language", + "demographic_factor": "Urbanicity", + "score_range": 4.5, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 86.4, + "max_score": 90.9, + "se_min": 3.9, + "se_max": 3.3, + "effect_size": 1.4380769471, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1291788726, + "level_score_std_dev": 2.0757863303, + "se_difference": 5.1088159098, + "z_score": 0.8808303293, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.5130951868, + "gap_confidence_interval_95_upper": 14.5130951868, + "raw_n_min_group": 175, + "raw_n_max_group": 88, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "distinct_personality", + "demographic_factor": "Age", + "score_range": 9.1, + "min_level": "35-44", + "max_level": "65+", + "min_score": 72.8, + "max_score": 81.9, + "se_min": 4.7, + "se_max": 4.2, + "effect_size": 2.1048506377, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3233471472, + "level_score_std_dev": 2.7061657993, + "se_difference": 6.3031738037, + "z_score": 1.443717131, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.2539936436, + "gap_confidence_interval_95_upper": 21.4539936436, + "raw_n_min_group": 93, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "distinct_personality", + "demographic_factor": "Education", + "score_range": 4.9, + "min_level": "College", + "max_level": "No College", + "min_score": 75.1, + "max_score": 80.0, + "se_min": 3.7, + "se_max": 4.7, + "effect_size": 1.1333811126, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3233471472, + "level_score_std_dev": 2.45, + "se_difference": 5.9816385715, + "z_score": 0.8191735327, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.8237961687, + "gap_confidence_interval_95_upper": 16.6237961687, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "distinct_personality", + "demographic_factor": "Ethnicity", + "score_range": 7.5, + "min_level": "White", + "max_level": "African American", + "min_score": 76.5, + "max_score": 84.0, + "se_min": 4.3, + "se_max": 4.3, + "effect_size": 1.7347670091, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3233471472, + "level_score_std_dev": 3.043332877, + "se_difference": 6.0811183182, + "z_score": 1.2333257811, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.4187728894, + "gap_confidence_interval_95_upper": 19.4187728894, + "raw_n_min_group": 345, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "distinct_personality", + "demographic_factor": "Politics", + "score_range": 5.3, + "min_level": "Independent", + "max_level": "Republican", + "min_score": 74.8, + "max_score": 80.1, + "se_min": 4.6, + "se_max": 4.1, + "effect_size": 1.2259020198, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3233471472, + "level_score_std_dev": 2.2005049925, + "se_difference": 6.1619802012, + "z_score": 0.8601131174, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.7772592678, + "gap_confidence_interval_95_upper": 17.3772592678, + "raw_n_min_group": 185, + "raw_n_max_group": 163, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "distinct_personality", + "demographic_factor": "Sex", + "score_range": 0.5, + "min_level": "Male", + "max_level": "Female", + "min_score": 78.1, + "max_score": 78.6, + "se_min": 4.4, + "se_max": 4.3, + "effect_size": 0.1156511339, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 4.3233471472, + "level_score_std_dev": 0.25, + "se_difference": 6.1522353661, + "z_score": 0.081271273, + "p_value": 0.981376128, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -11.558159742, + "gap_confidence_interval_95_upper": 12.558159742, + "raw_n_min_group": 253, + "raw_n_max_group": 258, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "distinct_personality", + "demographic_factor": "Urbanicity", + "score_range": 3.3, + "min_level": "Suburban", + "max_level": "Urban", + "min_score": 76.8, + "max_score": 80.1, + "se_min": 4.3, + "se_max": 4.2, + "effect_size": 0.763297484, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 4.3233471472, + "level_score_std_dev": 1.3490737563, + "se_difference": 6.0108235709, + "z_score": 0.5490096259, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.4809977163, + "gap_confidence_interval_95_upper": 15.0809977163, + "raw_n_min_group": 253, + "raw_n_max_group": 175, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "effectiveness", + "demographic_factor": "Age", + "score_range": 2.5, + "min_level": "45-54", + "max_level": "65+", + "min_score": 92.6, + "max_score": 95.1, + "se_min": 2.4, + "se_max": 2.0, + "effect_size": 0.6312936741, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.960122052, + "level_score_std_dev": 0.7868714423, + "se_difference": 3.1240998704, + "z_score": 0.8002304996, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.62312323, + "gap_confidence_interval_95_upper": 8.62312323, + "raw_n_min_group": 82, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "effectiveness", + "demographic_factor": "Education", + "score_range": 4.9, + "min_level": "College", + "max_level": "No College", + "min_score": 91.0, + "max_score": 95.9, + "se_min": 2.4, + "se_max": 2.0, + "effect_size": 1.2373356012, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.960122052, + "level_score_std_dev": 2.45, + "se_difference": 3.1240998704, + "z_score": 1.5684517792, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -1.22312323, + "gap_confidence_interval_95_upper": 11.02312323, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "effectiveness", + "demographic_factor": "Ethnicity", + "score_range": 1.9, + "min_level": "Asian", + "max_level": "Hispanic", + "min_score": 93.0, + "max_score": 94.9, + "se_min": 2.8, + "se_max": 2.5, + "effect_size": 0.4797831923, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.960122052, + "level_score_std_dev": 0.7582875444, + "se_difference": 3.7536648758, + "z_score": 0.5061719847, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.4570479666, + "gap_confidence_interval_95_upper": 9.2570479666, + "raw_n_min_group": 40, + "raw_n_max_group": 34, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "effectiveness", + "demographic_factor": "Politics", + "score_range": 1.2, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 94.0, + "max_score": 95.2, + "se_min": 2.3, + "se_max": 1.9, + "effect_size": 0.3030209636, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.960122052, + "level_score_std_dev": 0.5656854249, + "se_difference": 2.983286778, + "z_score": 0.4022409139, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.6471346405, + "gap_confidence_interval_95_upper": 7.0471346405, + "raw_n_min_group": 168, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "effectiveness", + "demographic_factor": "Sex", + "score_range": 0.2, + "min_level": "Male", + "max_level": "Female", + "min_score": 94.1, + "max_score": 94.3, + "se_min": 2.2, + "se_max": 2.1, + "effect_size": 0.0505034939, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.960122052, + "level_score_std_dev": 0.1, + "se_difference": 3.0413812651, + "z_score": 0.0657595949, + "p_value": 0.9836763944, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.7609977429, + "gap_confidence_interval_95_upper": 6.1609977429, + "raw_n_min_group": 253, + "raw_n_max_group": 258, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "effectiveness", + "demographic_factor": "Urbanicity", + "score_range": 3.5, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 92.9, + "max_score": 96.4, + "se_min": 2.4, + "se_max": 1.7, + "effect_size": 0.8838111437, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.960122052, + "level_score_std_dev": 1.4613540145, + "se_difference": 2.941088234, + "z_score": 1.1900357016, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.2644270139, + "gap_confidence_interval_95_upper": 9.2644270139, + "raw_n_min_group": 253, + "raw_n_max_group": 88, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "ethical_alignment", + "demographic_factor": "Age", + "score_range": 11.2, + "min_level": "45-54", + "max_level": "65+", + "min_score": 75.1, + "max_score": 86.3, + "se_min": 4.2, + "se_max": 3.6, + "effect_size": 2.9790149818, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7596319818, + "level_score_std_dev": 3.6003086287, + "se_difference": 5.5317266744, + "z_score": 2.024684273, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": 0.3580149459, + "gap_confidence_interval_95_upper": 22.0419850541, + "raw_n_min_group": 82, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "ethical_alignment", + "demographic_factor": "Education", + "score_range": 5.2, + "min_level": "College", + "max_level": "No College", + "min_score": 77.1, + "max_score": 82.3, + "se_min": 3.3, + "se_max": 4.2, + "effect_size": 1.3831140987, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7596319818, + "level_score_std_dev": 2.6, + "se_difference": 5.3413481444, + "z_score": 0.9735369909, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.268849992, + "gap_confidence_interval_95_upper": 15.668849992, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "ethical_alignment", + "demographic_factor": "Ethnicity", + "score_range": 5.8, + "min_level": "Asian", + "max_level": "African American", + "min_score": 77.0, + "max_score": 82.8, + "se_min": 5.0, + "se_max": 4.1, + "effect_size": 1.542704187, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7596319818, + "level_score_std_dev": 2.1182244924, + "se_difference": 6.4660652641, + "z_score": 0.896990637, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.8732550394, + "gap_confidence_interval_95_upper": 18.4732550394, + "raw_n_min_group": 40, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "ethical_alignment", + "demographic_factor": "Politics", + "score_range": 3.3, + "min_level": "Democrat", + "max_level": "Republican", + "min_score": 79.0, + "max_score": 82.3, + "se_min": 4.1, + "se_max": 3.7, + "effect_size": 0.8777454857, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7596319818, + "level_score_std_dev": 1.3490737563, + "se_difference": 5.5226805086, + "z_score": 0.5975359239, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.524254895, + "gap_confidence_interval_95_upper": 14.124254895, + "raw_n_min_group": 168, + "raw_n_max_group": 163, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "ethical_alignment", + "demographic_factor": "Sex", + "score_range": 0.1, + "min_level": "Female", + "max_level": "Male", + "min_score": 80.5, + "max_score": 80.6, + "se_min": 3.9, + "se_max": 4.0, + "effect_size": 0.0265983481, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.7596319818, + "level_score_std_dev": 0.05, + "se_difference": 5.5865910894, + "z_score": 0.0179000035, + "p_value": 0.9926117635, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.8495173315, + "gap_confidence_interval_95_upper": 11.0495173315, + "raw_n_min_group": 258, + "raw_n_max_group": 253, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "ethical_alignment", + "demographic_factor": "Urbanicity", + "score_range": 3.2, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 79.5, + "max_score": 82.7, + "se_min": 3.9, + "se_max": 4.0, + "effect_size": 0.8511471377, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7596319818, + "level_score_std_dev": 1.4236104336, + "se_difference": 5.5865910894, + "z_score": 0.5728001117, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.7495173315, + "gap_confidence_interval_95_upper": 14.1495173315, + "raw_n_min_group": 253, + "raw_n_max_group": 88, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "flexibility", + "demographic_factor": "Age", + "score_range": 3.0, + "min_level": "45-54", + "max_level": "65+", + "min_score": 92.3, + "max_score": 95.3, + "se_min": 2.4, + "se_max": 1.9, + "effect_size": 0.8783201298, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.4156111174, + "level_score_std_dev": 0.9251126058, + "se_difference": 3.061045573, + "z_score": 0.9800572806, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.9995390781, + "gap_confidence_interval_95_upper": 8.9995390781, + "raw_n_min_group": 82, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "flexibility", + "demographic_factor": "Education", + "score_range": 4.6, + "min_level": "College", + "max_level": "No College", + "min_score": 90.9, + "max_score": 95.5, + "se_min": 2.3, + "se_max": 2.1, + "effect_size": 1.3467575324, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.4156111174, + "level_score_std_dev": 2.3, + "se_difference": 3.1144823005, + "z_score": 1.4769709879, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -1.5042731394, + "gap_confidence_interval_95_upper": 10.7042731394, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "flexibility", + "demographic_factor": "Ethnicity", + "score_range": 2.5, + "min_level": "White", + "max_level": "Hispanic", + "min_score": 93.5, + "max_score": 96.0, + "se_min": 2.2, + "se_max": 2.2, + "effect_size": 0.7319334415, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.4156111174, + "level_score_std_dev": 0.93641604, + "se_difference": 3.1112698372, + "z_score": 0.8035304332, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.5979768271, + "gap_confidence_interval_95_upper": 8.5979768271, + "raw_n_min_group": 345, + "raw_n_max_group": 34, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "flexibility", + "demographic_factor": "Politics", + "score_range": 2.2, + "min_level": "Republican", + "max_level": "Democrat", + "min_score": 92.7, + "max_score": 94.9, + "se_min": 2.4, + "se_max": 2.0, + "effect_size": 0.6441014285, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.4156111174, + "level_score_std_dev": 0.956846673, + "se_difference": 3.1240998704, + "z_score": 0.7042028396, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.92312323, + "gap_confidence_interval_95_upper": 8.32312323, + "raw_n_min_group": 163, + "raw_n_max_group": 168, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "flexibility", + "demographic_factor": "Sex", + "score_range": 0.6, + "min_level": "Male", + "max_level": "Female", + "min_score": 93.7, + "max_score": 94.3, + "se_min": 2.3, + "se_max": 2.1, + "effect_size": 0.175664026, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.4156111174, + "level_score_std_dev": 0.3, + "se_difference": 3.1144823005, + "z_score": 0.1926483897, + "p_value": 0.9597457904, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.5042731394, + "gap_confidence_interval_95_upper": 6.7042731394, + "raw_n_min_group": 253, + "raw_n_max_group": 258, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "flexibility", + "demographic_factor": "Urbanicity", + "score_range": 1.8, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 93.0, + "max_score": 94.8, + "se_min": 2.3, + "se_max": 2.1, + "effect_size": 0.5269920779, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.4156111174, + "level_score_std_dev": 0.7874007874, + "se_difference": 3.1144823005, + "z_score": 0.5779451692, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.3042731394, + "gap_confidence_interval_95_upper": 7.9042731394, + "raw_n_min_group": 253, + "raw_n_max_group": 88, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "helpfulness", + "demographic_factor": "Age", + "score_range": 5.1, + "min_level": "25-34", + "max_level": "65+", + "min_score": 87.5, + "max_score": 92.6, + "se_min": 3.6, + "se_max": 2.6, + "effect_size": 1.4158257237, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.6021382539, + "level_score_std_dev": 1.501573249, + "se_difference": 4.4407206622, + "z_score": 1.1484622402, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.6036525634, + "gap_confidence_interval_95_upper": 13.8036525634, + "raw_n_min_group": 106, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "helpfulness", + "demographic_factor": "Education", + "score_range": 5.4, + "min_level": "College", + "max_level": "No College", + "min_score": 87.2, + "max_score": 92.6, + "se_min": 2.8, + "se_max": 2.9, + "effect_size": 1.4991095898, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.6021382539, + "level_score_std_dev": 2.7, + "se_difference": 4.0311288741, + "z_score": 1.3395751336, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.5008674104, + "gap_confidence_interval_95_upper": 13.3008674104, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "helpfulness", + "demographic_factor": "Ethnicity", + "score_range": 7.0, + "min_level": "Asian", + "max_level": "Hispanic", + "min_score": 88.0, + "max_score": 95.0, + "se_min": 3.9, + "se_max": 2.5, + "effect_size": 1.9432902089, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.6021382539, + "level_score_std_dev": 2.5262373206, + "se_difference": 4.6324939288, + "z_score": 1.5110651212, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.079521259, + "gap_confidence_interval_95_upper": 16.079521259, + "raw_n_min_group": 40, + "raw_n_max_group": 34, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "helpfulness", + "demographic_factor": "Politics", + "score_range": 2.1, + "min_level": "Democrat", + "max_level": "Republican", + "min_score": 89.7, + "max_score": 91.8, + "se_min": 3.1, + "se_max": 2.5, + "effect_size": 0.5829870627, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.6021382539, + "level_score_std_dev": 0.8730533902, + "se_difference": 3.9824615503, + "z_score": 0.527312059, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.7054812085, + "gap_confidence_interval_95_upper": 9.9054812085, + "raw_n_min_group": 168, + "raw_n_max_group": 163, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "helpfulness", + "demographic_factor": "Sex", + "score_range": 0.4, + "min_level": "Female", + "max_level": "Male", + "min_score": 90.6, + "max_score": 91.0, + "se_min": 2.9, + "se_max": 2.8, + "effect_size": 0.1110451548, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.6021382539, + "level_score_std_dev": 0.2, + "se_difference": 4.0311288741, + "z_score": 0.0992277877, + "p_value": 0.9792458562, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.5008674104, + "gap_confidence_interval_95_upper": 8.3008674104, + "raw_n_min_group": 258, + "raw_n_max_group": 253, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "helpfulness", + "demographic_factor": "Urbanicity", + "score_range": 7.0, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 88.0, + "max_score": 95.0, + "se_min": 3.3, + "se_max": 2.0, + "effect_size": 1.9432902089, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.6021382539, + "level_score_std_dev": 2.9810512687, + "se_difference": 3.8587562763, + "z_score": 1.8140560063, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -0.5630233267, + "gap_confidence_interval_95_upper": 14.5630233267, + "raw_n_min_group": 175, + "raw_n_max_group": 88, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "honesty_empathy_fairness", + "demographic_factor": "Age", + "score_range": 6.5, + "min_level": "45-54", + "max_level": "65+", + "min_score": 85.7, + "max_score": 92.2, + "se_min": 3.2, + "se_max": 2.5, + "effect_size": 1.7097424442, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.8017421993, + "level_score_std_dev": 2.1035023704, + "se_difference": 4.0607881008, + "z_score": 1.6006745091, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -1.4589984265, + "gap_confidence_interval_95_upper": 14.4589984265, + "raw_n_min_group": 82, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "honesty_empathy_fairness", + "demographic_factor": "Education", + "score_range": 6.5, + "min_level": "College", + "max_level": "No College", + "min_score": 84.7, + "max_score": 91.2, + "se_min": 2.9, + "se_max": 2.9, + "effect_size": 1.7097424442, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.8017421993, + "level_score_std_dev": 3.25, + "se_difference": 4.1012193309, + "z_score": 1.5848945096, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -1.5382421812, + "gap_confidence_interval_95_upper": 14.5382421812, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "honesty_empathy_fairness", + "demographic_factor": "Ethnicity", + "score_range": 2.8, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 88.5, + "max_score": 91.3, + "se_min": 3.8, + "se_max": 2.8, + "effect_size": 0.7365044375, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.8017421993, + "level_score_std_dev": 1.1712706775, + "se_difference": 4.7201694885, + "z_score": 0.593199038, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.4513621984, + "gap_confidence_interval_95_upper": 12.0513621984, + "raw_n_min_group": 34, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "honesty_empathy_fairness", + "demographic_factor": "Politics", + "score_range": 1.7, + "min_level": "Democrat", + "max_level": "Republican", + "min_score": 88.3, + "max_score": 90.0, + "se_min": 3.1, + "se_max": 2.7, + "effect_size": 0.4471634085, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.8017421993, + "level_score_std_dev": 0.7257180352, + "se_difference": 4.1109609582, + "z_score": 0.4135286171, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.35733542, + "gap_confidence_interval_95_upper": 9.75733542, + "raw_n_min_group": 168, + "raw_n_max_group": 163, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "honesty_empathy_fairness", + "demographic_factor": "Sex", + "score_range": 0.2, + "min_level": "Female", + "max_level": "Male", + "min_score": 88.9, + "max_score": 89.1, + "se_min": 2.9, + "se_max": 2.9, + "effect_size": 0.0526074598, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.8017421993, + "level_score_std_dev": 0.1, + "se_difference": 4.1012193309, + "z_score": 0.0487659849, + "p_value": 0.9872735163, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.8382421812, + "gap_confidence_interval_95_upper": 8.2382421812, + "raw_n_min_group": 258, + "raw_n_max_group": 253, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "honesty_empathy_fairness", + "demographic_factor": "Urbanicity", + "score_range": 4.0, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 87.6, + "max_score": 91.6, + "se_min": 3.0, + "se_max": 2.6, + "effect_size": 1.0521491964, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.8017421993, + "level_score_std_dev": 1.7281975196, + "se_difference": 3.9698866483, + "z_score": 1.0075854437, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.7808348533, + "gap_confidence_interval_95_upper": 11.7808348533, + "raw_n_min_group": 253, + "raw_n_max_group": 88, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "intuitiveness", + "demographic_factor": "Age", + "score_range": 2.3, + "min_level": "25-34", + "max_level": "45-54", + "min_score": 85.0, + "max_score": 87.3, + "se_min": 3.9, + "se_max": 3.4, + "effect_size": 0.7423701678, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.0981848404, + "level_score_std_dev": 0.8193832369, + "se_difference": 5.1739733281, + "z_score": 0.4445326356, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.8408013801, + "gap_confidence_interval_95_upper": 12.4408013801, + "raw_n_min_group": 106, + "raw_n_max_group": 82, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "intuitiveness", + "demographic_factor": "Education", + "score_range": 2.1, + "min_level": "College", + "max_level": "No College", + "min_score": 85.3, + "max_score": 87.4, + "se_min": 3.0, + "se_max": 3.8, + "effect_size": 0.6778162402, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.0981848404, + "level_score_std_dev": 1.05, + "se_difference": 4.8414873748, + "z_score": 0.433751002, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.3891408861, + "gap_confidence_interval_95_upper": 11.5891408861, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "intuitiveness", + "demographic_factor": "Ethnicity", + "score_range": 3.9, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 84.1, + "max_score": 88.0, + "se_min": 4.8, + "se_max": 3.7, + "effect_size": 1.2588015889, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.0981848404, + "level_score_std_dev": 1.4807092895, + "se_difference": 6.0605280298, + "z_score": 0.6435082852, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.9784166657, + "gap_confidence_interval_95_upper": 15.7784166657, + "raw_n_min_group": 34, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "intuitiveness", + "demographic_factor": "Politics", + "score_range": 0.2, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 86.6, + "max_score": 86.8, + "se_min": 3.6, + "se_max": 3.5, + "effect_size": 0.0645539276, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.0981848404, + "level_score_std_dev": 0.0942809042, + "se_difference": 5.0209560843, + "z_score": 0.039833051, + "p_value": 0.9872735163, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.6408930931, + "gap_confidence_interval_95_upper": 10.0408930931, + "raw_n_min_group": 168, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "intuitiveness", + "demographic_factor": "Sex", + "score_range": 1.7, + "min_level": "Male", + "max_level": "Female", + "min_score": 85.8, + "max_score": 87.5, + "se_min": 3.7, + "se_max": 3.4, + "effect_size": 0.5487083849, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.0981848404, + "level_score_std_dev": 0.85, + "se_difference": 5.0249378106, + "z_score": 0.3383126447, + "p_value": 0.9429935514, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.1486971333, + "gap_confidence_interval_95_upper": 11.5486971333, + "raw_n_min_group": 253, + "raw_n_max_group": 258, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "intuitiveness", + "demographic_factor": "Urbanicity", + "score_range": 7.3, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 83.9, + "max_score": 91.2, + "se_min": 4.0, + "se_max": 3.0, + "effect_size": 2.3562183588, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.0981848404, + "level_score_std_dev": 3.1329787033, + "se_difference": 5.0, + "z_score": 1.46, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.4998199227, + "gap_confidence_interval_95_upper": 17.0998199227, + "raw_n_min_group": 175, + "raw_n_max_group": 88, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "personality", + "demographic_factor": "Age", + "score_range": 2.4, + "min_level": "18-24", + "max_level": "65+", + "min_score": 77.0, + "max_score": 79.4, + "se_min": 3.7, + "se_max": 3.6, + "effect_size": 0.7131587653, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.365309545, + "level_score_std_dev": 0.894427191, + "se_difference": 5.1623637997, + "z_score": 0.4649033065, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.7180471224, + "gap_confidence_interval_95_upper": 12.5180471224, + "raw_n_min_group": 60, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "personality", + "demographic_factor": "Education", + "score_range": 2.7, + "min_level": "College", + "max_level": "No College", + "min_score": 76.9, + "max_score": 79.6, + "se_min": 2.7, + "se_max": 3.7, + "effect_size": 0.802303611, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.365309545, + "level_score_std_dev": 1.35, + "se_difference": 4.5803929962, + "z_score": 0.5894690701, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.2774053077, + "gap_confidence_interval_95_upper": 11.6774053077, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "personality", + "demographic_factor": "Ethnicity", + "score_range": 3.5, + "min_level": "White", + "max_level": "Hispanic", + "min_score": 78.0, + "max_score": 81.5, + "se_min": 3.2, + "se_max": 4.1, + "effect_size": 1.0400231994, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.365309545, + "level_score_std_dev": 1.2891373084, + "se_difference": 5.2009614496, + "z_score": 0.6729524981, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.6936971262, + "gap_confidence_interval_95_upper": 13.6936971262, + "raw_n_min_group": 345, + "raw_n_max_group": 34, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "personality", + "demographic_factor": "Politics", + "score_range": 3.5, + "min_level": "Independent", + "max_level": "Republican", + "min_score": 76.9, + "max_score": 80.4, + "se_min": 3.4, + "se_max": 3.3, + "effect_size": 1.0400231994, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.365309545, + "level_score_std_dev": 1.4613540145, + "se_difference": 4.7381430962, + "z_score": 0.7386860061, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.7865898221, + "gap_confidence_interval_95_upper": 12.7865898221, + "raw_n_min_group": 185, + "raw_n_max_group": 163, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "personality", + "demographic_factor": "Sex", + "score_range": 1.6, + "min_level": "Male", + "max_level": "Female", + "min_score": 77.9, + "max_score": 79.5, + "se_min": 3.4, + "se_max": 3.4, + "effect_size": 0.4754391769, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.365309545, + "level_score_std_dev": 0.8, + "se_difference": 4.8083261121, + "z_score": 0.3327561323, + "p_value": 0.9429935514, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.8241460056, + "gap_confidence_interval_95_upper": 11.0241460056, + "raw_n_min_group": 253, + "raw_n_max_group": 258, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "personality", + "demographic_factor": "Urbanicity", + "score_range": 1.2, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 78.0, + "max_score": 79.2, + "se_min": 3.3, + "se_max": 3.5, + "effect_size": 0.3565793827, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.365309545, + "level_score_std_dev": 0.5656854249, + "se_difference": 4.8104053883, + "z_score": 0.249459225, + "p_value": 0.9570722129, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.2282213121, + "gap_confidence_interval_95_upper": 10.6282213121, + "raw_n_min_group": 253, + "raw_n_max_group": 88, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "personality_consistency", + "demographic_factor": "Age", + "score_range": 3.4, + "min_level": "65+", + "max_level": "35-44", + "min_score": 81.9, + "max_score": 85.3, + "se_min": 4.7, + "se_max": 3.8, + "effect_size": 1.0651128236, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1921500939, + "level_score_std_dev": 1.1126794487, + "se_difference": 6.0440052945, + "z_score": 0.5625408706, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.4460326996, + "gap_confidence_interval_95_upper": 15.2460326996, + "raw_n_min_group": 65, + "raw_n_max_group": 93, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "personality_consistency", + "demographic_factor": "Education", + "score_range": 6.1, + "min_level": "College", + "max_level": "No College", + "min_score": 79.5, + "max_score": 85.6, + "se_min": 3.9, + "se_max": 4.5, + "effect_size": 1.910937713, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1921500939, + "level_score_std_dev": 3.05, + "se_difference": 5.9548299724, + "z_score": 1.0243785344, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.5712522799, + "gap_confidence_interval_95_upper": 17.7712522799, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "personality_consistency", + "demographic_factor": "Ethnicity", + "score_range": 5.6, + "min_level": "Asian", + "max_level": "Hispanic", + "min_score": 80.3, + "max_score": 85.9, + "se_min": 5.5, + "se_max": 5.0, + "effect_size": 1.7543034742, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1921500939, + "level_score_std_dev": 2.0796634343, + "se_difference": 7.4330343737, + "z_score": 0.7533935293, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.9684796682, + "gap_confidence_interval_95_upper": 20.1684796682, + "raw_n_min_group": 40, + "raw_n_max_group": 34, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "personality_consistency", + "demographic_factor": "Politics", + "score_range": 1.5, + "min_level": "Democrat", + "max_level": "Republican", + "min_score": 82.9, + "max_score": 84.4, + "se_min": 4.4, + "se_max": 4.0, + "effect_size": 0.4699027163, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.1921500939, + "level_score_std_dev": 0.6649979114, + "se_difference": 5.9464274989, + "z_score": 0.2522522977, + "p_value": 0.9570722129, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.1547837346, + "gap_confidence_interval_95_upper": 13.1547837346, + "raw_n_min_group": 168, + "raw_n_max_group": 163, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "personality_consistency", + "demographic_factor": "Sex", + "score_range": 0.3, + "min_level": "Male", + "max_level": "Female", + "min_score": 83.4, + "max_score": 83.7, + "se_min": 4.3, + "se_max": 4.2, + "effect_size": 0.0939805433, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.1921500939, + "level_score_std_dev": 0.15, + "se_difference": 6.0108235709, + "z_score": 0.049909966, + "p_value": 0.9872735163, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -11.4809977163, + "gap_confidence_interval_95_upper": 12.0809977163, + "raw_n_min_group": 253, + "raw_n_max_group": 258, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "personality_consistency", + "demographic_factor": "Urbanicity", + "score_range": 6.0, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 81.5, + "max_score": 87.5, + "se_min": 4.6, + "se_max": 3.9, + "effect_size": 1.8796108652, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1921500939, + "level_score_std_dev": 2.678722747, + "se_difference": 6.0307545133, + "z_score": 0.9949003871, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.8200616457, + "gap_confidence_interval_95_upper": 17.8200616457, + "raw_n_min_group": 175, + "raw_n_max_group": 88, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "tone_and_language_style", + "demographic_factor": "Age", + "score_range": 6.2, + "min_level": "45-54", + "max_level": "65+", + "min_score": 83.9, + "max_score": 90.1, + "se_min": 4.0, + "se_max": 3.3, + "effect_size": 1.8564677177, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.339675633, + "level_score_std_dev": 2.0747824518, + "se_difference": 5.185556865, + "z_score": 1.1956285817, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.9635046952, + "gap_confidence_interval_95_upper": 16.3635046952, + "raw_n_min_group": 82, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "tone_and_language_style", + "demographic_factor": "Education", + "score_range": 5.3, + "min_level": "College", + "max_level": "No College", + "min_score": 84.2, + "max_score": 89.5, + "se_min": 3.4, + "se_max": 3.7, + "effect_size": 1.5869804683, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.339675633, + "level_score_std_dev": 2.65, + "se_difference": 5.0249378106, + "z_score": 1.0547394216, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.5486971333, + "gap_confidence_interval_95_upper": 15.1486971333, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "tone_and_language_style", + "demographic_factor": "Ethnicity", + "score_range": 2.0, + "min_level": "White", + "max_level": "African American", + "min_score": 87.3, + "max_score": 89.3, + "se_min": 3.4, + "se_max": 3.8, + "effect_size": 0.5988605541, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.339675633, + "level_score_std_dev": 0.8645808233, + "se_difference": 5.0990195136, + "z_score": 0.3922322703, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.9938946031, + "gap_confidence_interval_95_upper": 11.9938946031, + "raw_n_min_group": 345, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "tone_and_language_style", + "demographic_factor": "Politics", + "score_range": 2.0, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 87.2, + "max_score": 89.2, + "se_min": 3.8, + "se_max": 3.3, + "effect_size": 0.5988605541, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.339675633, + "level_score_std_dev": 0.8640987598, + "se_difference": 5.0328918129, + "z_score": 0.3973858518, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.8642866913, + "gap_confidence_interval_95_upper": 11.8642866913, + "raw_n_min_group": 168, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "tone_and_language_style", + "demographic_factor": "Sex", + "score_range": 0.4, + "min_level": "Female", + "max_level": "Male", + "min_score": 87.5, + "max_score": 87.9, + "se_min": 3.6, + "se_max": 3.6, + "effect_size": 0.1197721108, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.339675633, + "level_score_std_dev": 0.2, + "se_difference": 5.0911688245, + "z_score": 0.0785674201, + "p_value": 0.9818838748, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.5785075353, + "gap_confidence_interval_95_upper": 10.3785075353, + "raw_n_min_group": 258, + "raw_n_max_group": 253, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "tone_and_language_style", + "demographic_factor": "Urbanicity", + "score_range": 2.3, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 86.9, + "max_score": 89.2, + "se_min": 3.8, + "se_max": 3.6, + "effect_size": 0.6886896372, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.339675633, + "level_score_std_dev": 1.0208928554, + "se_difference": 5.2345009313, + "z_score": 0.4393924139, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.9594333024, + "gap_confidence_interval_95_upper": 12.5594333024, + "raw_n_min_group": 175, + "raw_n_max_group": 88, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "transparency", + "demographic_factor": "Age", + "score_range": 7.1, + "min_level": "35-44", + "max_level": "18-24", + "min_score": 75.0, + "max_score": 82.1, + "se_min": 4.9, + "se_max": 4.9, + "effect_size": 1.5729640036, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.5137714429, + "level_score_std_dev": 2.974007771, + "se_difference": 6.9296464556, + "z_score": 1.0245832952, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.4818574786, + "gap_confidence_interval_95_upper": 20.6818574786, + "raw_n_min_group": 93, + "raw_n_max_group": 60, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "transparency", + "demographic_factor": "Education", + "score_range": 6.4, + "min_level": "College", + "max_level": "No College", + "min_score": 74.2, + "max_score": 80.6, + "se_min": 4.2, + "se_max": 5.1, + "effect_size": 1.4178830455, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.5137714429, + "level_score_std_dev": 3.2, + "se_difference": 6.6068146637, + "z_score": 0.9686967663, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.5491187933, + "gap_confidence_interval_95_upper": 19.3491187933, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "transparency", + "demographic_factor": "Ethnicity", + "score_range": 7.6, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 76.5, + "max_score": 84.1, + "se_min": 6.2, + "se_max": 4.6, + "effect_size": 1.6837361165, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.5137714429, + "level_score_std_dev": 2.9427877939, + "se_difference": 7.7201036262, + "z_score": 0.9844427443, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.5311250644, + "gap_confidence_interval_95_upper": 22.7311250644, + "raw_n_min_group": 34, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "transparency", + "demographic_factor": "Politics", + "score_range": 6.3, + "min_level": "Independent", + "max_level": "Republican", + "min_score": 74.2, + "max_score": 80.5, + "se_min": 5.2, + "se_max": 4.4, + "effect_size": 1.3957286229, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.5137714429, + "level_score_std_dev": 2.6106618999, + "se_difference": 6.8117545464, + "z_score": 0.9248718457, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.0507935824, + "gap_confidence_interval_95_upper": 19.6507935824, + "raw_n_min_group": 185, + "raw_n_max_group": 163, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "transparency", + "demographic_factor": "Sex", + "score_range": 2.7, + "min_level": "Male", + "max_level": "Female", + "min_score": 77.0, + "max_score": 79.7, + "se_min": 5.0, + "se_max": 4.7, + "effect_size": 0.5981694098, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 4.5137714429, + "level_score_std_dev": 1.35, + "se_difference": 6.8622153857, + "z_score": 0.3934589412, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.7496950102, + "gap_confidence_interval_95_upper": 16.1496950102, + "raw_n_min_group": 253, + "raw_n_max_group": 258, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "transparency", + "demographic_factor": "Urbanicity", + "score_range": 5.1, + "min_level": "Suburban", + "max_level": "Urban", + "min_score": 76.0, + "max_score": 81.1, + "se_min": 4.8, + "se_max": 4.6, + "effect_size": 1.1298755519, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.5137714429, + "level_score_std_dev": 2.0928449536, + "se_difference": 6.6483080554, + "z_score": 0.7671124679, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.9304443468, + "gap_confidence_interval_95_upper": 18.1304443468, + "raw_n_min_group": 253, + "raw_n_max_group": 175, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "trustworthiness", + "demographic_factor": "Age", + "score_range": 6.5, + "min_level": "18-24", + "max_level": "65+", + "min_score": 86.6, + "max_score": 93.1, + "se_min": 0.0, + "se_max": 0.0, + "effect_size": 2.0541867489, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1642692679, + "level_score_std_dev": 2.1100289624, + "se_difference": 0.0, + "z_score": 9999.9, + "p_value": 0.0, + "is_statistically_significant": true, + "gap_confidence_interval_95_lower": 6.5, + "gap_confidence_interval_95_upper": 6.5, + "raw_n_min_group": 60, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": true + }, + { + "model": "claude-3.7-sonnet", + "category": "trustworthiness", + "demographic_factor": "Education", + "score_range": 3.9, + "min_level": "College", + "max_level": "No College", + "min_score": 88.5, + "max_score": 92.4, + "se_min": 0.0, + "se_max": 0.0, + "effect_size": 1.2325120493, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1642692679, + "level_score_std_dev": 1.95, + "se_difference": 0.0, + "z_score": 9999.9, + "p_value": 0.0, + "is_statistically_significant": true, + "gap_confidence_interval_95_lower": 3.9, + "gap_confidence_interval_95_upper": 3.9, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": true + }, + { + "model": "claude-3.7-sonnet", + "category": "trustworthiness", + "demographic_factor": "Ethnicity", + "score_range": 1.4, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 90.4, + "max_score": 91.8, + "se_min": 0.0, + "se_max": 0.0, + "effect_size": 0.4424402228, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.1642692679, + "level_score_std_dev": 0.5309190145, + "se_difference": 0.0, + "z_score": 9999.9, + "p_value": 0.0, + "is_statistically_significant": true, + "gap_confidence_interval_95_lower": 1.4, + "gap_confidence_interval_95_upper": 1.4, + "raw_n_min_group": 34, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "trustworthiness", + "demographic_factor": "Politics", + "score_range": 1.1, + "min_level": "Democrat", + "max_level": "Republican", + "min_score": 90.5, + "max_score": 91.6, + "se_min": 0.0, + "se_max": 0.0, + "effect_size": 0.3476316037, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.1642692679, + "level_score_std_dev": 0.4784233365, + "se_difference": 0.0, + "z_score": 9999.9, + "p_value": 0.0, + "is_statistically_significant": true, + "gap_confidence_interval_95_lower": 1.1, + "gap_confidence_interval_95_upper": 1.1, + "raw_n_min_group": 168, + "raw_n_max_group": 163, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "trustworthiness", + "demographic_factor": "Sex", + "score_range": 1.8, + "min_level": "Male", + "max_level": "Female", + "min_score": 90.1, + "max_score": 91.9, + "se_min": 0.0, + "se_max": 0.0, + "effect_size": 0.5688517151, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.1642692679, + "level_score_std_dev": 0.9, + "se_difference": 0.0, + "z_score": 9999.9, + "p_value": 0.0, + "is_statistically_significant": true, + "gap_confidence_interval_95_lower": 1.8, + "gap_confidence_interval_95_upper": 1.8, + "raw_n_min_group": 253, + "raw_n_max_group": 258, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "trustworthiness", + "demographic_factor": "Urbanicity", + "score_range": 4.0, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 89.4, + "max_score": 93.4, + "se_min": 0.0, + "se_max": 0.0, + "effect_size": 1.2641149224, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1642692679, + "level_score_std_dev": 1.6438437341, + "se_difference": 0.0, + "z_score": 9999.9, + "p_value": 0.0, + "is_statistically_significant": true, + "gap_confidence_interval_95_lower": 4.0, + "gap_confidence_interval_95_upper": 4.0, + "raw_n_min_group": 253, + "raw_n_max_group": 88, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": true + }, + { + "model": "claude-3.7-sonnet", + "category": "understanding", + "demographic_factor": "Age", + "score_range": 5.4, + "min_level": "18-24", + "max_level": "35-44", + "min_score": 86.0, + "max_score": 91.4, + "se_min": 3.4, + "se_max": 2.3, + "effect_size": 1.8198252905, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9673178124, + "level_score_std_dev": 1.8469945557, + "se_difference": 4.1048751504, + "z_score": 1.3155089503, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.6454074557, + "gap_confidence_interval_95_upper": 13.4454074557, + "raw_n_min_group": 60, + "raw_n_max_group": 93, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "understanding", + "demographic_factor": "Education", + "score_range": 3.4, + "min_level": "College", + "max_level": "No College", + "min_score": 88.0, + "max_score": 91.4, + "se_min": 2.3, + "se_max": 2.8, + "effect_size": 1.1458159236, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9673178124, + "level_score_std_dev": 1.7, + "se_difference": 3.6235341864, + "z_score": 0.9383104519, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.7019965021, + "gap_confidence_interval_95_upper": 10.5019965021, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "understanding", + "demographic_factor": "Ethnicity", + "score_range": 4.4, + "min_level": "Asian", + "max_level": "Hispanic", + "min_score": 89.2, + "max_score": 93.6, + "se_min": 3.3, + "se_max": 2.6, + "effect_size": 1.4828206071, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9673178124, + "level_score_std_dev": 1.6881943016, + "se_difference": 4.2011903075, + "z_score": 1.0473222296, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.8341816949, + "gap_confidence_interval_95_upper": 12.6341816949, + "raw_n_min_group": 40, + "raw_n_max_group": 34, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "understanding", + "demographic_factor": "Politics", + "score_range": 3.9, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 88.9, + "max_score": 92.8, + "se_min": 2.8, + "se_max": 2.2, + "effect_size": 1.3143182654, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9673178124, + "level_score_std_dev": 1.6062378404, + "se_difference": 3.560898763, + "z_score": 1.0952291148, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.079233328, + "gap_confidence_interval_95_upper": 10.879233328, + "raw_n_min_group": 163, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "understanding", + "demographic_factor": "Sex", + "score_range": 1.1, + "min_level": "Male", + "max_level": "Female", + "min_score": 89.7, + "max_score": 90.8, + "se_min": 2.7, + "se_max": 2.5, + "effect_size": 0.3707051518, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 2.9673178124, + "level_score_std_dev": 0.55, + "se_difference": 3.6796738986, + "z_score": 0.2989395339, + "p_value": 0.9469805126, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.1120283161, + "gap_confidence_interval_95_upper": 8.3120283161, + "raw_n_min_group": 253, + "raw_n_max_group": 258, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "understanding", + "demographic_factor": "Urbanicity", + "score_range": 3.5, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 89.0, + "max_score": 92.5, + "se_min": 2.7, + "se_max": 2.4, + "effect_size": 1.179516392, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9673178124, + "level_score_std_dev": 1.497405163, + "se_difference": 3.6124783736, + "z_score": 0.9688639316, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.5803275073, + "gap_confidence_interval_95_upper": 10.5803275073, + "raw_n_min_group": 253, + "raw_n_max_group": 88, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "usefulness", + "demographic_factor": "Age", + "score_range": 4.9, + "min_level": "45-54", + "max_level": "65+", + "min_score": 87.6, + "max_score": 92.5, + "se_min": 3.8, + "se_max": 3.0, + "effect_size": 1.4578287815, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3611628899, + "level_score_std_dev": 1.5485656446, + "se_difference": 4.8414873748, + "z_score": 1.0120856713, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.5891408861, + "gap_confidence_interval_95_upper": 14.3891408861, + "raw_n_min_group": 82, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "usefulness", + "demographic_factor": "Education", + "score_range": 6.4, + "min_level": "College", + "max_level": "No College", + "min_score": 86.7, + "max_score": 93.1, + "se_min": 3.4, + "se_max": 3.2, + "effect_size": 1.9041028982, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3611628899, + "level_score_std_dev": 3.2, + "se_difference": 4.669047012, + "z_score": 1.370729398, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.7511639856, + "gap_confidence_interval_95_upper": 15.5511639856, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "usefulness", + "demographic_factor": "Ethnicity", + "score_range": 2.2, + "min_level": "Asian", + "max_level": "African American", + "min_score": 89.6, + "max_score": 91.8, + "se_min": 4.1, + "se_max": 3.4, + "effect_size": 0.6545353713, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.3611628899, + "level_score_std_dev": 0.8166241486, + "se_difference": 5.3263495942, + "z_score": 0.4130408568, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.2394533738, + "gap_confidence_interval_95_upper": 12.6394533738, + "raw_n_min_group": 40, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "usefulness", + "demographic_factor": "Politics", + "score_range": 3.0, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 89.4, + "max_score": 92.4, + "se_min": 3.5, + "se_max": 3.0, + "effect_size": 0.8925482335, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3611628899, + "level_score_std_dev": 1.2814921858, + "se_difference": 4.6097722286, + "z_score": 0.6507913735, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.0349875451, + "gap_confidence_interval_95_upper": 12.0349875451, + "raw_n_min_group": 163, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "usefulness", + "demographic_factor": "Sex", + "score_range": 1.0, + "min_level": "Male", + "max_level": "Female", + "min_score": 90.4, + "max_score": 91.4, + "se_min": 3.4, + "se_max": 3.1, + "effect_size": 0.2975160778, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.3611628899, + "level_score_std_dev": 0.5, + "se_difference": 4.6010868281, + "z_score": 0.2173399541, + "p_value": 0.9570722129, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.0179644729, + "gap_confidence_interval_95_upper": 10.0179644729, + "raw_n_min_group": 253, + "raw_n_max_group": 258, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "claude-3.7-sonnet", + "category": "usefulness", + "demographic_factor": "Urbanicity", + "score_range": 5.3, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 89.1, + "max_score": 94.4, + "se_min": 3.5, + "se_max": 2.6, + "effect_size": 1.5768352126, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3611628899, + "level_score_std_dev": 2.3156472577, + "se_difference": 4.3600458713, + "z_score": 1.2155835412, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.2455328787, + "gap_confidence_interval_95_upper": 13.8455328787, + "raw_n_min_group": 253, + "raw_n_max_group": 88, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "accuracy", + "demographic_factor": "Age", + "score_range": 11.1, + "min_level": "45-54", + "max_level": "65+", + "min_score": 80.0, + "max_score": 91.1, + "se_min": 5.1, + "se_max": 3.3, + "effect_size": 3.7780718708, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9380065757, + "level_score_std_dev": 3.3264929814, + "se_difference": 6.0745370194, + "z_score": 1.8272997538, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -0.8058737808, + "gap_confidence_interval_95_upper": 23.0058737808, + "raw_n_min_group": 82, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "accuracy", + "demographic_factor": "Education", + "score_range": 12.2, + "min_level": "College", + "max_level": "No College", + "min_score": 79.1, + "max_score": 91.3, + "se_min": 4.6, + "se_max": 3.7, + "effect_size": 4.1524753896, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9380065757, + "level_score_std_dev": 6.1, + "se_difference": 5.9033888573, + "z_score": 2.0666095856, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": 0.629570453, + "gap_confidence_interval_95_upper": 23.770429547, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "accuracy", + "demographic_factor": "Ethnicity", + "score_range": 2.1, + "min_level": "Hispanic", + "max_level": "Asian", + "min_score": 85.6, + "max_score": 87.7, + "se_min": 5.5, + "se_max": 4.4, + "effect_size": 0.7147703539, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 2.9380065757, + "level_score_std_dev": 0.8031189202, + "se_difference": 7.0434366612, + "z_score": 0.2981499091, + "p_value": 0.9469805126, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -11.7048821833, + "gap_confidence_interval_95_upper": 15.9048821833, + "raw_n_min_group": 34, + "raw_n_max_group": 40, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "accuracy", + "demographic_factor": "Politics", + "score_range": 1.2, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 86.5, + "max_score": 87.7, + "se_min": 4.3, + "se_max": 3.9, + "effect_size": 0.4084402023, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 2.9380065757, + "level_score_std_dev": 0.5656854249, + "se_difference": 5.8051701095, + "z_score": 0.2067122888, + "p_value": 0.9597457904, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.1779243387, + "gap_confidence_interval_95_upper": 12.5779243387, + "raw_n_min_group": 168, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "accuracy", + "demographic_factor": "Sex", + "score_range": 0.2, + "min_level": "Female", + "max_level": "Male", + "min_score": 87.1, + "max_score": 87.3, + "se_min": 4.0, + "se_max": 4.0, + "effect_size": 0.068073367, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 2.9380065757, + "level_score_std_dev": 0.1, + "se_difference": 5.6568542495, + "z_score": 0.0353553391, + "p_value": 0.9894654221, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.8872305948, + "gap_confidence_interval_95_upper": 11.2872305948, + "raw_n_min_group": 263, + "raw_n_max_group": 252, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "accuracy", + "demographic_factor": "Urbanicity", + "score_range": 4.9, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 85.4, + "max_score": 90.3, + "se_min": 4.4, + "se_max": 3.6, + "effect_size": 1.6677974925, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9380065757, + "level_score_std_dev": 2.1638443156, + "se_difference": 5.6850681614, + "z_score": 0.8619069923, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.242528846, + "gap_confidence_interval_95_upper": 16.042528846, + "raw_n_min_group": 176, + "raw_n_max_group": 89, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "adaptiveness", + "demographic_factor": "Age", + "score_range": 5.5, + "min_level": "18-24", + "max_level": "65+", + "min_score": 76.4, + "max_score": 81.9, + "se_min": 3.7, + "se_max": 3.4, + "effect_size": 1.4035060825, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.9187575093, + "level_score_std_dev": 1.7339742405, + "se_difference": 5.0249378106, + "z_score": 1.0945409092, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.3486971333, + "gap_confidence_interval_95_upper": 15.3486971333, + "raw_n_min_group": 60, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "adaptiveness", + "demographic_factor": "Education", + "score_range": 3.1, + "min_level": "College", + "max_level": "No College", + "min_score": 77.8, + "max_score": 80.9, + "se_min": 2.8, + "se_max": 3.7, + "effect_size": 0.7910670647, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.9187575093, + "level_score_std_dev": 1.55, + "se_difference": 4.6400431032, + "z_score": 0.668097242, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.9943173691, + "gap_confidence_interval_95_upper": 12.1943173691, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "adaptiveness", + "demographic_factor": "Ethnicity", + "score_range": 3.8, + "min_level": "White", + "max_level": "Asian", + "min_score": 79.4, + "max_score": 83.2, + "se_min": 3.2, + "se_max": 3.9, + "effect_size": 0.9696951115, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.9187575093, + "level_score_std_dev": 1.4212670404, + "se_difference": 5.0447993023, + "z_score": 0.7532509764, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.0876249416, + "gap_confidence_interval_95_upper": 13.6876249416, + "raw_n_min_group": 349, + "raw_n_max_group": 40, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "adaptiveness", + "demographic_factor": "Politics", + "score_range": 1.7, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 79.0, + "max_score": 80.7, + "se_min": 3.3, + "se_max": 3.4, + "effect_size": 0.4338109709, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.9187575093, + "level_score_std_dev": 0.7133644853, + "se_difference": 4.7381430962, + "z_score": 0.3587903458, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.5865898221, + "gap_confidence_interval_95_upper": 10.9865898221, + "raw_n_min_group": 167, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "adaptiveness", + "demographic_factor": "Sex", + "score_range": 0.1, + "min_level": "Male", + "max_level": "Female", + "min_score": 79.8, + "max_score": 79.9, + "se_min": 3.4, + "se_max": 3.4, + "effect_size": 0.0255182924, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.9187575093, + "level_score_std_dev": 0.05, + "se_difference": 4.8083261121, + "z_score": 0.0207972583, + "p_value": 0.9924215291, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.3241460056, + "gap_confidence_interval_95_upper": 9.5241460056, + "raw_n_min_group": 252, + "raw_n_max_group": 263, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "adaptiveness", + "demographic_factor": "Urbanicity", + "score_range": 2.9, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 78.6, + "max_score": 81.5, + "se_min": 3.3, + "se_max": 3.5, + "effect_size": 0.7400304798, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.9187575093, + "level_score_std_dev": 1.1897712198, + "se_difference": 4.8104053883, + "z_score": 0.6028597937, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.5282213121, + "gap_confidence_interval_95_upper": 12.3282213121, + "raw_n_min_group": 255, + "raw_n_max_group": 89, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "background_and_culture", + "demographic_factor": "Age", + "score_range": 9.7, + "min_level": "18-24", + "max_level": "55-64", + "min_score": 65.9, + "max_score": 75.6, + "se_min": 3.3, + "se_max": 3.7, + "effect_size": 2.8042377298, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.459050528, + "level_score_std_dev": 3.3483412145, + "se_difference": 4.9578221025, + "z_score": 1.9565042471, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -0.0171527626, + "gap_confidence_interval_95_upper": 19.4171527626, + "raw_n_min_group": 60, + "raw_n_max_group": 113, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "background_and_culture", + "demographic_factor": "Education", + "score_range": 5.7, + "min_level": "College", + "max_level": "No College", + "min_score": 69.8, + "max_score": 75.5, + "se_min": 2.9, + "se_max": 4.3, + "effect_size": 1.6478510371, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.459050528, + "level_score_std_dev": 2.85, + "se_difference": 5.186520992, + "z_score": 1.0990025894, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.4653943493, + "gap_confidence_interval_95_upper": 15.8653943493, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "background_and_culture", + "demographic_factor": "Ethnicity", + "score_range": 2.7, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 73.2, + "max_score": 75.9, + "se_min": 4.7, + "se_max": 4.5, + "effect_size": 0.7805610176, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.459050528, + "level_score_std_dev": 1.0825317547, + "se_difference": 6.506919394, + "z_score": 0.4149428995, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.0533276625, + "gap_confidence_interval_95_upper": 15.4533276625, + "raw_n_min_group": 34, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "background_and_culture", + "demographic_factor": "Politics", + "score_range": 0.5, + "min_level": "Independent", + "max_level": "Republican", + "min_score": 73.3, + "max_score": 73.8, + "se_min": 3.8, + "se_max": 3.8, + "effect_size": 0.1445483366, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.459050528, + "level_score_std_dev": 0.2054804668, + "se_difference": 5.374011537, + "z_score": 0.0930403659, + "p_value": 0.9801302996, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.0328690651, + "gap_confidence_interval_95_upper": 11.0328690651, + "raw_n_min_group": 185, + "raw_n_max_group": 167, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "background_and_culture", + "demographic_factor": "Sex", + "score_range": 0.3, + "min_level": "Male", + "max_level": "Female", + "min_score": 73.4, + "max_score": 73.7, + "se_min": 3.9, + "se_max": 3.9, + "effect_size": 0.086729002, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.459050528, + "level_score_std_dev": 0.15, + "se_difference": 5.5154328933, + "z_score": 0.0543928293, + "p_value": 0.9869756083, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.5100498299, + "gap_confidence_interval_95_upper": 11.1100498299, + "raw_n_min_group": 252, + "raw_n_max_group": 263, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "background_and_culture", + "demographic_factor": "Urbanicity", + "score_range": 2.0, + "min_level": "Suburban", + "max_level": "Urban", + "min_score": 72.5, + "max_score": 74.5, + "se_min": 3.7, + "se_max": 4.0, + "effect_size": 0.5781933463, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.459050528, + "level_score_std_dev": 0.8640987598, + "se_difference": 5.4488530903, + "z_score": 0.3670497198, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.6795558141, + "gap_confidence_interval_95_upper": 12.6795558141, + "raw_n_min_group": 255, + "raw_n_max_group": 176, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "bias_and_stereotypes", + "demographic_factor": "Age", + "score_range": 9.6, + "min_level": "18-24", + "max_level": "55-64", + "min_score": 78.6, + "max_score": 88.2, + "se_min": 5.9, + "se_max": 3.3, + "effect_size": 3.4481029568, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.7841396038, + "level_score_std_dev": 3.5374190215, + "se_difference": 6.7601775125, + "z_score": 1.4200810529, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.6497044535, + "gap_confidence_interval_95_upper": 22.8497044535, + "raw_n_min_group": 60, + "raw_n_max_group": 113, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "bias_and_stereotypes", + "demographic_factor": "Education", + "score_range": 10.7, + "min_level": "College", + "max_level": "No College", + "min_score": 79.3, + "max_score": 90.0, + "se_min": 4.2, + "se_max": 3.8, + "effect_size": 3.8431980873, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.7841396038, + "level_score_std_dev": 5.35, + "se_difference": 5.6639209034, + "z_score": 1.8891506754, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -0.401080982, + "gap_confidence_interval_95_upper": 21.801080982, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "bias_and_stereotypes", + "demographic_factor": "Ethnicity", + "score_range": 4.6, + "min_level": "Asian", + "max_level": "African American", + "min_score": 83.4, + "max_score": 88.0, + "se_min": 5.2, + "se_max": 4.2, + "effect_size": 1.6522160001, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.7841396038, + "level_score_std_dev": 1.957677195, + "se_difference": 6.6843099868, + "z_score": 0.6881787363, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.5010068357, + "gap_confidence_interval_95_upper": 17.7010068357, + "raw_n_min_group": 40, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "bias_and_stereotypes", + "demographic_factor": "Politics", + "score_range": 1.2, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 86.1, + "max_score": 87.3, + "se_min": 3.9, + "se_max": 3.8, + "effect_size": 0.4310128696, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 2.7841396038, + "level_score_std_dev": 0.5436502143, + "se_difference": 5.445181356, + "z_score": 0.2203783348, + "p_value": 0.9570722129, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.4723593471, + "gap_confidence_interval_95_upper": 11.8723593471, + "raw_n_min_group": 167, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "bias_and_stereotypes", + "demographic_factor": "Sex", + "score_range": 0.4, + "min_level": "Male", + "max_level": "Female", + "min_score": 86.2, + "max_score": 86.6, + "se_min": 4.0, + "se_max": 3.9, + "effect_size": 0.1436709565, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 2.7841396038, + "level_score_std_dev": 0.2, + "se_difference": 5.5865910894, + "z_score": 0.071600014, + "p_value": 0.9818838748, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.5495173315, + "gap_confidence_interval_95_upper": 11.3495173315, + "raw_n_min_group": 252, + "raw_n_max_group": 263, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "bias_and_stereotypes", + "demographic_factor": "Urbanicity", + "score_range": 3.4, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 85.1, + "max_score": 88.5, + "se_min": 4.2, + "se_max": 3.8, + "effect_size": 1.2212031305, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.7841396038, + "level_score_std_dev": 1.4817407181, + "se_difference": 5.6639209034, + "z_score": 0.6002908688, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.701080982, + "gap_confidence_interval_95_upper": 14.501080982, + "raw_n_min_group": 176, + "raw_n_max_group": 89, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "clarity", + "demographic_factor": "Age", + "score_range": 8.5, + "min_level": "45-54", + "max_level": "65+", + "min_score": 70.6, + "max_score": 79.1, + "se_min": 5.8, + "se_max": 5.3, + "effect_size": 1.8825926355, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.5150500643, + "level_score_std_dev": 3.0208534482, + "se_difference": 7.8568441502, + "z_score": 1.0818593111, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.8991315665, + "gap_confidence_interval_95_upper": 23.8991315665, + "raw_n_min_group": 82, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "clarity", + "demographic_factor": "Education", + "score_range": 3.6, + "min_level": "College", + "max_level": "No College", + "min_score": 74.0, + "max_score": 77.6, + "se_min": 4.6, + "se_max": 5.8, + "effect_size": 0.7973333515, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 4.5150500643, + "level_score_std_dev": 1.8, + "se_difference": 7.4027022093, + "z_score": 0.4863089043, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.9090297186, + "gap_confidence_interval_95_upper": 18.1090297186, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "clarity", + "demographic_factor": "Ethnicity", + "score_range": 9.6, + "min_level": "Hispanic", + "max_level": "Asian", + "min_score": 68.6, + "max_score": 78.2, + "se_min": 7.2, + "se_max": 6.2, + "effect_size": 2.1262222707, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.5150500643, + "level_score_std_dev": 3.9452503089, + "se_difference": 9.5015788162, + "z_score": 1.0103584031, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.022752276, + "gap_confidence_interval_95_upper": 28.222752276, + "raw_n_min_group": 34, + "raw_n_max_group": 40, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "clarity", + "demographic_factor": "Politics", + "score_range": 0.6, + "min_level": "Independent", + "max_level": "Democrat", + "min_score": 76.0, + "max_score": 76.6, + "se_min": 5.5, + "se_max": 5.6, + "effect_size": 0.1328888919, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 4.5150500643, + "level_score_std_dev": 0.2494438258, + "se_difference": 7.8492037813, + "z_score": 0.0764408744, + "p_value": 0.9818838748, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -14.7841567186, + "gap_confidence_interval_95_upper": 15.9841567186, + "raw_n_min_group": 185, + "raw_n_max_group": 168, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "clarity", + "demographic_factor": "Sex", + "score_range": 1.6, + "min_level": "Male", + "max_level": "Female", + "min_score": 75.5, + "max_score": 77.1, + "se_min": 5.6, + "se_max": 5.3, + "effect_size": 0.3543703784, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 4.5150500643, + "level_score_std_dev": 0.8, + "se_difference": 7.7103826105, + "z_score": 0.2075123999, + "p_value": 0.9597457904, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -13.5120722236, + "gap_confidence_interval_95_upper": 16.7120722236, + "raw_n_min_group": 252, + "raw_n_max_group": 263, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "clarity", + "demographic_factor": "Urbanicity", + "score_range": 2.3, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 75.0, + "max_score": 77.3, + "se_min": 5.4, + "se_max": 5.6, + "effect_size": 0.509407419, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 4.5150500643, + "level_score_std_dev": 1.0402991023, + "se_difference": 7.7794601355, + "z_score": 0.2956503356, + "p_value": 0.9469805126, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -12.9474616848, + "gap_confidence_interval_95_upper": 17.5474616848, + "raw_n_min_group": 255, + "raw_n_max_group": 89, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "communication", + "demographic_factor": "Age", + "score_range": 7.7, + "min_level": "18-24", + "max_level": "65+", + "min_score": 75.2, + "max_score": 82.9, + "se_min": 3.8, + "se_max": 3.5, + "effect_size": 1.9048867713, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.0422350116, + "level_score_std_dev": 2.6606703416, + "se_difference": 5.1662365412, + "z_score": 1.4904466605, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.4256375564, + "gap_confidence_interval_95_upper": 17.8256375564, + "raw_n_min_group": 60, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "communication", + "demographic_factor": "Education", + "score_range": 5.1, + "min_level": "College", + "max_level": "No College", + "min_score": 76.8, + "max_score": 81.9, + "se_min": 2.9, + "se_max": 3.8, + "effect_size": 1.2616782511, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.0422350116, + "level_score_std_dev": 2.55, + "se_difference": 4.7801673611, + "z_score": 1.0669082513, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.2689558678, + "gap_confidence_interval_95_upper": 14.4689558678, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "communication", + "demographic_factor": "Ethnicity", + "score_range": 2.8, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 79.3, + "max_score": 82.1, + "se_min": 4.2, + "se_max": 3.8, + "effect_size": 0.6926860986, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 4.0422350116, + "level_score_std_dev": 1.1779218989, + "se_difference": 5.6639209034, + "z_score": 0.4943571861, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.301080982, + "gap_confidence_interval_95_upper": 13.901080982, + "raw_n_min_group": 34, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "communication", + "demographic_factor": "Politics", + "score_range": 1.4, + "min_level": "Independent", + "max_level": "Democrat", + "min_score": 79.2, + "max_score": 80.6, + "se_min": 3.5, + "se_max": 3.6, + "effect_size": 0.3463430493, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 4.0422350116, + "level_score_std_dev": 0.5792715732, + "se_difference": 5.0209560843, + "z_score": 0.2788313573, + "p_value": 0.9523211171, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.4408930931, + "gap_confidence_interval_95_upper": 11.2408930931, + "raw_n_min_group": 185, + "raw_n_max_group": 168, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "communication", + "demographic_factor": "Sex", + "score_range": 1.3, + "min_level": "Male", + "max_level": "Female", + "min_score": 79.5, + "max_score": 80.8, + "se_min": 3.5, + "se_max": 3.5, + "effect_size": 0.3216042601, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 4.0422350116, + "level_score_std_dev": 0.65, + "se_difference": 4.9497474683, + "z_score": 0.2626396616, + "p_value": 0.9570722129, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.4013267704, + "gap_confidence_interval_95_upper": 11.0013267704, + "raw_n_min_group": 252, + "raw_n_max_group": 263, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "communication", + "demographic_factor": "Urbanicity", + "score_range": 5.3, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 78.1, + "max_score": 83.4, + "se_min": 3.4, + "se_max": 3.6, + "effect_size": 1.3111558296, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.0422350116, + "level_score_std_dev": 2.2196095753, + "se_difference": 4.9517673613, + "z_score": 1.070324919, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.4052856879, + "gap_confidence_interval_95_upper": 15.0052856879, + "raw_n_min_group": 255, + "raw_n_max_group": 89, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "comprehensiveness", + "demographic_factor": "Age", + "score_range": 9.8, + "min_level": "18-24", + "max_level": "65+", + "min_score": 75.3, + "max_score": 85.1, + "se_min": 6.1, + "se_max": 4.4, + "effect_size": 2.7357585934, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.5821874137, + "level_score_std_dev": 3.7495184876, + "se_difference": 7.5213030786, + "z_score": 1.3029657092, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.9414831508, + "gap_confidence_interval_95_upper": 24.5414831508, + "raw_n_min_group": 60, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "comprehensiveness", + "demographic_factor": "Education", + "score_range": 4.8, + "min_level": "College", + "max_level": "No College", + "min_score": 79.4, + "max_score": 84.2, + "se_min": 4.1, + "se_max": 4.8, + "effect_size": 1.3399633927, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.5821874137, + "level_score_std_dev": 2.4, + "se_difference": 6.3126856408, + "z_score": 0.7603736782, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.5726365018, + "gap_confidence_interval_95_upper": 17.1726365018, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "comprehensiveness", + "demographic_factor": "Ethnicity", + "score_range": 3.6, + "min_level": "Hispanic", + "max_level": "Asian", + "min_score": 80.1, + "max_score": 83.7, + "se_min": 6.0, + "se_max": 5.6, + "effect_size": 1.0049725445, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.5821874137, + "level_score_std_dev": 1.338609353, + "se_difference": 8.2073138115, + "z_score": 0.4386331609, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -12.4860394803, + "gap_confidence_interval_95_upper": 19.6860394803, + "raw_n_min_group": 34, + "raw_n_max_group": 40, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "comprehensiveness", + "demographic_factor": "Politics", + "score_range": 2.8, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 81.5, + "max_score": 84.3, + "se_min": 4.8, + "se_max": 4.3, + "effect_size": 0.7816453124, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.5821874137, + "level_score_std_dev": 1.1440668201, + "se_difference": 6.4443773943, + "z_score": 0.4344872792, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.8307475956, + "gap_confidence_interval_95_upper": 15.4307475956, + "raw_n_min_group": 168, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "comprehensiveness", + "demographic_factor": "Sex", + "score_range": 0.6, + "min_level": "Female", + "max_level": "Male", + "min_score": 82.3, + "max_score": 82.9, + "se_min": 4.6, + "se_max": 4.5, + "effect_size": 0.1674954241, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.5821874137, + "level_score_std_dev": 0.3, + "se_difference": 6.4350602173, + "z_score": 0.0932392207, + "p_value": 0.9801302996, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -12.0124862642, + "gap_confidence_interval_95_upper": 13.2124862642, + "raw_n_min_group": 263, + "raw_n_max_group": 252, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "comprehensiveness", + "demographic_factor": "Urbanicity", + "score_range": 4.8, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 80.7, + "max_score": 85.5, + "se_min": 4.9, + "se_max": 4.3, + "effect_size": 1.3399633927, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.5821874137, + "level_score_std_dev": 2.0531818126, + "se_difference": 6.5192024052, + "z_score": 0.7362863893, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.9774019221, + "gap_confidence_interval_95_upper": 17.5774019221, + "raw_n_min_group": 176, + "raw_n_max_group": 89, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "confidence", + "demographic_factor": "Age", + "score_range": 7.1, + "min_level": "45-54", + "max_level": "65+", + "min_score": 79.7, + "max_score": 86.8, + "se_min": 4.7, + "se_max": 3.9, + "effect_size": 2.1592422488, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.2881905696, + "level_score_std_dev": 2.4866309202, + "se_difference": 6.1073725938, + "z_score": 1.1625293677, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.8702303241, + "gap_confidence_interval_95_upper": 19.0702303241, + "raw_n_min_group": 82, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "confidence", + "demographic_factor": "Education", + "score_range": 7.0, + "min_level": "College", + "max_level": "No College", + "min_score": 79.0, + "max_score": 86.0, + "se_min": 4.0, + "se_max": 4.4, + "effect_size": 2.1288303861, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.2881905696, + "level_score_std_dev": 3.5, + "se_difference": 5.9464274989, + "z_score": 1.1771773895, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.6547837346, + "gap_confidence_interval_95_upper": 18.6547837346, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "confidence", + "demographic_factor": "Ethnicity", + "score_range": 9.4, + "min_level": "Hispanic", + "max_level": "Asian", + "min_score": 78.7, + "max_score": 88.1, + "se_min": 6.0, + "se_max": 4.3, + "effect_size": 2.8587150899, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.2881905696, + "level_score_std_dev": 3.3662850444, + "se_difference": 7.3817342136, + "z_score": 1.2734134999, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.0679332021, + "gap_confidence_interval_95_upper": 23.8679332021, + "raw_n_min_group": 34, + "raw_n_max_group": 40, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "confidence", + "demographic_factor": "Politics", + "score_range": 1.8, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 83.3, + "max_score": 85.1, + "se_min": 4.5, + "se_max": 4.1, + "effect_size": 0.5474135279, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.2881905696, + "level_score_std_dev": 0.8259674462, + "se_difference": 6.0876925021, + "z_score": 0.2956785349, + "p_value": 0.9469805126, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.1316580531, + "gap_confidence_interval_95_upper": 13.7316580531, + "raw_n_min_group": 168, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "confidence", + "demographic_factor": "Sex", + "score_range": 1.3, + "min_level": "Male", + "max_level": "Female", + "min_score": 83.0, + "max_score": 84.3, + "se_min": 4.4, + "se_max": 4.2, + "effect_size": 0.3953542146, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.2881905696, + "level_score_std_dev": 0.65, + "se_difference": 6.0827625303, + "z_score": 0.2137186835, + "p_value": 0.9581379544, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.6219954859, + "gap_confidence_interval_95_upper": 13.2219954859, + "raw_n_min_group": 252, + "raw_n_max_group": 263, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "confidence", + "demographic_factor": "Urbanicity", + "score_range": 6.2, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 81.3, + "max_score": 87.5, + "se_min": 4.4, + "se_max": 3.9, + "effect_size": 1.8855354849, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.2881905696, + "level_score_std_dev": 2.6042699979, + "se_difference": 5.8796258384, + "z_score": 1.0544888689, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.3238548859, + "gap_confidence_interval_95_upper": 17.7238548859, + "raw_n_min_group": 255, + "raw_n_max_group": 89, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "consistency", + "demographic_factor": "Age", + "score_range": 7.9, + "min_level": "18-24", + "max_level": "65+", + "min_score": 83.0, + "max_score": 90.9, + "se_min": 5.0, + "se_max": 3.2, + "effect_size": 2.6536326379, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9770511137, + "level_score_std_dev": 2.6750389405, + "se_difference": 5.9363288319, + "z_score": 1.330788813, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.7349907108, + "gap_confidence_interval_95_upper": 19.5349907108, + "raw_n_min_group": 60, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "consistency", + "demographic_factor": "Education", + "score_range": 7.3, + "min_level": "College", + "max_level": "No College", + "min_score": 82.5, + "max_score": 89.8, + "se_min": 3.9, + "se_max": 3.9, + "effect_size": 2.4520909186, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9770511137, + "level_score_std_dev": 3.65, + "se_difference": 5.5154328933, + "z_score": 1.3235588468, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.5100498299, + "gap_confidence_interval_95_upper": 18.1100498299, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "consistency", + "demographic_factor": "Ethnicity", + "score_range": 6.7, + "min_level": "Hispanic", + "max_level": "Asian", + "min_score": 82.4, + "max_score": 89.1, + "se_min": 5.8, + "se_max": 4.0, + "effect_size": 2.2505491993, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9770511137, + "level_score_std_dev": 2.5460754113, + "se_difference": 7.0455659815, + "z_score": 0.950952701, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.1090555745, + "gap_confidence_interval_95_upper": 20.5090555745, + "raw_n_min_group": 34, + "raw_n_max_group": 40, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "consistency", + "demographic_factor": "Politics", + "score_range": 2.2, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 86.6, + "max_score": 88.8, + "se_min": 3.9, + "se_max": 3.6, + "effect_size": 0.7389863042, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 2.9770511137, + "level_score_std_dev": 0.9177266599, + "se_difference": 5.3075418039, + "z_score": 0.4145045072, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.2025907821, + "gap_confidence_interval_95_upper": 12.6025907821, + "raw_n_min_group": 167, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "consistency", + "demographic_factor": "Sex", + "score_range": 0.5, + "min_level": "Female", + "max_level": "Male", + "min_score": 87.1, + "max_score": 87.6, + "se_min": 3.9, + "se_max": 3.8, + "effect_size": 0.1679514328, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 2.9770511137, + "level_score_std_dev": 0.25, + "se_difference": 5.445181356, + "z_score": 0.0918243062, + "p_value": 0.9801302996, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.1723593471, + "gap_confidence_interval_95_upper": 11.1723593471, + "raw_n_min_group": 263, + "raw_n_max_group": 252, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "consistency", + "demographic_factor": "Urbanicity", + "score_range": 3.3, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 86.2, + "max_score": 89.5, + "se_min": 4.1, + "se_max": 3.6, + "effect_size": 1.1084794564, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9770511137, + "level_score_std_dev": 1.4704496667, + "se_difference": 5.4561891463, + "z_score": 0.6048177421, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.3939342196, + "gap_confidence_interval_95_upper": 13.9939342196, + "raw_n_min_group": 176, + "raw_n_max_group": 89, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "context_memory", + "demographic_factor": "Age", + "score_range": 5.8, + "min_level": "45-54", + "max_level": "65+", + "min_score": 82.2, + "max_score": 88.0, + "se_min": 4.6, + "se_max": 3.9, + "effect_size": 1.7115032893, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3888336858, + "level_score_std_dev": 1.9652537297, + "se_difference": 6.0307545133, + "z_score": 0.9617370409, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.0200616457, + "gap_confidence_interval_95_upper": 17.6200616457, + "raw_n_min_group": 82, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "context_memory", + "demographic_factor": "Education", + "score_range": 8.0, + "min_level": "College", + "max_level": "No College", + "min_score": 80.7, + "max_score": 88.7, + "se_min": 4.2, + "se_max": 4.1, + "effect_size": 2.3606941921, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3888336858, + "level_score_std_dev": 4.0, + "se_difference": 5.8694122363, + "z_score": 1.3629984874, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.5038365936, + "gap_confidence_interval_95_upper": 19.5038365936, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "context_memory", + "demographic_factor": "Ethnicity", + "score_range": 7.3, + "min_level": "Hispanic", + "max_level": "Asian", + "min_score": 81.5, + "max_score": 88.8, + "se_min": 5.9, + "se_max": 4.2, + "effect_size": 2.1541334503, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3888336858, + "level_score_std_dev": 2.7095894523, + "se_difference": 7.2422372234, + "z_score": 1.0079758195, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.8945241254, + "gap_confidence_interval_95_upper": 21.4945241254, + "raw_n_min_group": 34, + "raw_n_max_group": 40, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "context_memory", + "demographic_factor": "Politics", + "score_range": 4.0, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 84.2, + "max_score": 88.2, + "se_min": 4.3, + "se_max": 3.8, + "effect_size": 1.180347096, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3888336858, + "level_score_std_dev": 1.6438437341, + "se_difference": 5.7384666942, + "z_score": 0.6970503121, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.247188047, + "gap_confidence_interval_95_upper": 15.247188047, + "raw_n_min_group": 167, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "context_memory", + "demographic_factor": "Sex", + "score_range": 0.9, + "min_level": "Male", + "max_level": "Female", + "min_score": 85.5, + "max_score": 86.4, + "se_min": 4.2, + "se_max": 4.0, + "effect_size": 0.2655780966, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.3888336858, + "level_score_std_dev": 0.45, + "se_difference": 5.8, + "z_score": 0.1551724138, + "p_value": 0.9640111876, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.4677911103, + "gap_confidence_interval_95_upper": 12.2677911103, + "raw_n_min_group": 252, + "raw_n_max_group": 263, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "context_memory", + "demographic_factor": "Urbanicity", + "score_range": 2.3, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 85.1, + "max_score": 87.4, + "se_min": 4.1, + "se_max": 4.1, + "effect_size": 0.6786995802, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.3888336858, + "level_score_std_dev": 0.9626352719, + "se_difference": 5.7982756057, + "z_score": 0.3966696577, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.0644113597, + "gap_confidence_interval_95_upper": 13.6644113597, + "raw_n_min_group": 255, + "raw_n_max_group": 89, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "conversation_building", + "demographic_factor": "Age", + "score_range": 14.0, + "min_level": "18-24", + "max_level": "65+", + "min_score": 74.8, + "max_score": 88.8, + "se_min": 6.2, + "se_max": 3.8, + "effect_size": 3.2363491432, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3258620689, + "level_score_std_dev": 4.7370877129, + "se_difference": 7.2718635851, + "z_score": 1.9252286345, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -0.2525907272, + "gap_confidence_interval_95_upper": 28.2525907272, + "raw_n_min_group": 60, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "conversation_building", + "demographic_factor": "Education", + "score_range": 4.4, + "min_level": "College", + "max_level": "No College", + "min_score": 80.9, + "max_score": 85.3, + "se_min": 4.0, + "se_max": 4.7, + "effect_size": 1.0171383021, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3258620689, + "level_score_std_dev": 2.2, + "se_difference": 6.1717096497, + "z_score": 0.7129304925, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.6963286364, + "gap_confidence_interval_95_upper": 16.4963286364, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "conversation_building", + "demographic_factor": "Ethnicity", + "score_range": 8.6, + "min_level": "Hispanic", + "max_level": "Asian", + "min_score": 78.8, + "max_score": 87.4, + "se_min": 6.4, + "se_max": 4.5, + "effect_size": 1.9880430451, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3258620689, + "level_score_std_dev": 3.0881831228, + "se_difference": 7.8236819976, + "z_score": 1.0992266816, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.7341349418, + "gap_confidence_interval_95_upper": 23.9341349418, + "raw_n_min_group": 34, + "raw_n_max_group": 40, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "conversation_building", + "demographic_factor": "Politics", + "score_range": 4.0, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 82.2, + "max_score": 86.2, + "se_min": 4.6, + "se_max": 4.1, + "effect_size": 0.9246711838, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3258620689, + "level_score_std_dev": 1.633673434, + "se_difference": 6.1619802012, + "z_score": 0.6491419754, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.0772592678, + "gap_confidence_interval_95_upper": 16.0772592678, + "raw_n_min_group": 167, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "conversation_building", + "demographic_factor": "Sex", + "score_range": 0.8, + "min_level": "Male", + "max_level": "Female", + "min_score": 83.4, + "max_score": 84.2, + "se_min": 4.6, + "se_max": 4.4, + "effect_size": 0.1849342368, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 4.3258620689, + "level_score_std_dev": 0.4, + "se_difference": 6.3655321851, + "z_score": 0.1256768447, + "p_value": 0.9772792279, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -11.6762138253, + "gap_confidence_interval_95_upper": 13.2762138253, + "raw_n_min_group": 252, + "raw_n_max_group": 263, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "conversation_building", + "demographic_factor": "Urbanicity", + "score_range": 2.9, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 82.4, + "max_score": 85.3, + "se_min": 4.6, + "se_max": 4.4, + "effect_size": 0.6703866082, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 4.3258620689, + "level_score_std_dev": 1.1897712198, + "se_difference": 6.3655321851, + "z_score": 0.4555785621, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.5762138253, + "gap_confidence_interval_95_upper": 15.3762138253, + "raw_n_min_group": 255, + "raw_n_max_group": 89, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "conversation_flow", + "demographic_factor": "Age", + "score_range": 11.5, + "min_level": "18-24", + "max_level": "55-64", + "min_score": 69.0, + "max_score": 80.5, + "se_min": 6.4, + "se_max": 4.7, + "effect_size": 2.3138859344, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.969994341, + "level_score_std_dev": 3.9143965052, + "se_difference": 7.9404030124, + "z_score": 1.4482892092, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.0629039271, + "gap_confidence_interval_95_upper": 27.0629039271, + "raw_n_min_group": 60, + "raw_n_max_group": 113, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "conversation_flow", + "demographic_factor": "Education", + "score_range": 3.3, + "min_level": "College", + "max_level": "No College", + "min_score": 74.1, + "max_score": 77.4, + "se_min": 4.5, + "se_max": 5.9, + "effect_size": 0.6639846594, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 4.969994341, + "level_score_std_dev": 1.65, + "se_difference": 7.4202425836, + "z_score": 0.4447293957, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -11.2434082205, + "gap_confidence_interval_95_upper": 17.8434082205, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "conversation_flow", + "demographic_factor": "Ethnicity", + "score_range": 10.5, + "min_level": "Hispanic", + "max_level": "Asian", + "min_score": 71.2, + "max_score": 81.7, + "se_min": 7.5, + "se_max": 5.7, + "effect_size": 2.1126784619, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.969994341, + "level_score_std_dev": 3.7251677815, + "se_difference": 9.4201910809, + "z_score": 1.1146270718, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.963235246, + "gap_confidence_interval_95_upper": 28.963235246, + "raw_n_min_group": 34, + "raw_n_max_group": 40, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "conversation_flow", + "demographic_factor": "Politics", + "score_range": 4.2, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 74.3, + "max_score": 78.5, + "se_min": 5.8, + "se_max": 5.3, + "effect_size": 0.8450713848, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.969994341, + "level_score_std_dev": 1.791337179, + "se_difference": 7.8568441502, + "z_score": 0.5345657773, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -11.1991315665, + "gap_confidence_interval_95_upper": 19.5991315665, + "raw_n_min_group": 168, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "conversation_flow", + "demographic_factor": "Sex", + "score_range": 0.8, + "min_level": "Male", + "max_level": "Female", + "min_score": 75.9, + "max_score": 76.7, + "se_min": 5.5, + "se_max": 5.4, + "effect_size": 0.160965978, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 4.969994341, + "level_score_std_dev": 0.4, + "se_difference": 7.707788269, + "z_score": 0.103791123, + "p_value": 0.9792458562, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -14.3069874077, + "gap_confidence_interval_95_upper": 15.9069874077, + "raw_n_min_group": 252, + "raw_n_max_group": 263, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "conversation_flow", + "demographic_factor": "Urbanicity", + "score_range": 4.5, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 74.6, + "max_score": 79.1, + "se_min": 5.8, + "se_max": 5.3, + "effect_size": 0.9054336265, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.969994341, + "level_score_std_dev": 1.9293061505, + "se_difference": 7.8568441502, + "z_score": 0.5727490471, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.8991315665, + "gap_confidence_interval_95_upper": 19.8991315665, + "raw_n_min_group": 176, + "raw_n_max_group": 89, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "cultural_awareness", + "demographic_factor": "Age", + "score_range": 11.5, + "min_level": "18-24", + "max_level": "65+", + "min_score": 62.9, + "max_score": 74.4, + "se_min": 6.7, + "se_max": 5.7, + "effect_size": 3.0965226124, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7138433783, + "level_score_std_dev": 4.3403788877, + "se_difference": 8.7965902485, + "z_score": 1.3073247332, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.7410000738, + "gap_confidence_interval_95_upper": 28.7410000738, + "raw_n_min_group": 60, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "cultural_awareness", + "demographic_factor": "Education", + "score_range": 6.9, + "min_level": "College", + "max_level": "No College", + "min_score": 67.2, + "max_score": 74.1, + "se_min": 4.8, + "se_max": 6.0, + "effect_size": 1.8579135674, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7138433783, + "level_score_std_dev": 3.45, + "se_difference": 7.6837490849, + "z_score": 0.8979991309, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.1598714727, + "gap_confidence_interval_95_upper": 21.9598714727, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "cultural_awareness", + "demographic_factor": "Ethnicity", + "score_range": 12.1, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 63.4, + "max_score": 75.5, + "se_min": 7.6, + "se_max": 6.0, + "effect_size": 3.2580803139, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7138433783, + "level_score_std_dev": 4.607805877, + "se_difference": 9.6829747495, + "z_score": 1.249615982, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.8782817723, + "gap_confidence_interval_95_upper": 31.0782817723, + "raw_n_min_group": 34, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "cultural_awareness", + "demographic_factor": "Politics", + "score_range": 3.0, + "min_level": "Democrat", + "max_level": "Republican", + "min_score": 70.3, + "max_score": 73.3, + "se_min": 5.9, + "se_max": 5.3, + "effect_size": 0.8077885076, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7138433783, + "level_score_std_dev": 1.2256517541, + "se_difference": 7.9309520236, + "z_score": 0.3782648024, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -12.5443803293, + "gap_confidence_interval_95_upper": 18.5443803293, + "raw_n_min_group": 168, + "raw_n_max_group": 167, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "cultural_awareness", + "demographic_factor": "Sex", + "score_range": 0.7, + "min_level": "Female", + "max_level": "Male", + "min_score": 71.4, + "max_score": 72.1, + "se_min": 5.6, + "se_max": 5.6, + "effect_size": 0.1884839851, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.7138433783, + "level_score_std_dev": 0.35, + "se_difference": 7.9195959493, + "z_score": 0.0883883476, + "p_value": 0.9801302996, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -14.8221228327, + "gap_confidence_interval_95_upper": 16.2221228327, + "raw_n_min_group": 263, + "raw_n_max_group": 252, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "cultural_awareness", + "demographic_factor": "Urbanicity", + "score_range": 2.9, + "min_level": "Suburban", + "max_level": "Urban", + "min_score": 70.6, + "max_score": 73.5, + "se_min": 5.5, + "se_max": 5.5, + "effect_size": 0.780862224, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.7138433783, + "level_score_std_dev": 1.2119772642, + "se_difference": 7.7781745931, + "z_score": 0.372838121, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -12.3449420678, + "gap_confidence_interval_95_upper": 18.1449420678, + "raw_n_min_group": 255, + "raw_n_max_group": 176, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "detail_and_technical_language", + "demographic_factor": "Age", + "score_range": 9.9, + "min_level": "45-54", + "max_level": "55-64", + "min_score": 79.5, + "max_score": 89.4, + "se_min": 5.1, + "se_max": 3.3, + "effect_size": 3.1637692836, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1291788726, + "level_score_std_dev": 4.1354833118, + "se_difference": 6.0745370194, + "z_score": 1.6297538345, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.0058737808, + "gap_confidence_interval_95_upper": 21.8058737808, + "raw_n_min_group": 82, + "raw_n_max_group": 113, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "detail_and_technical_language", + "demographic_factor": "Education", + "score_range": 6.1, + "min_level": "College", + "max_level": "No College", + "min_score": 82.5, + "max_score": 88.6, + "se_min": 3.9, + "se_max": 4.2, + "effect_size": 1.9493931949, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1291788726, + "level_score_std_dev": 3.05, + "se_difference": 5.7314919524, + "z_score": 1.0642953093, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.1335178043, + "gap_confidence_interval_95_upper": 17.3335178043, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "detail_and_technical_language", + "demographic_factor": "Ethnicity", + "score_range": 5.6, + "min_level": "Hispanic", + "max_level": "Asian", + "min_score": 83.8, + "max_score": 89.4, + "se_min": 5.6, + "se_max": 4.1, + "effect_size": 1.7896068675, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1291788726, + "level_score_std_dev": 2.2431841208, + "se_difference": 6.9404610798, + "z_score": 0.8068628202, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.0030537525, + "gap_confidence_interval_95_upper": 19.2030537525, + "raw_n_min_group": 34, + "raw_n_max_group": 40, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "detail_and_technical_language", + "demographic_factor": "Politics", + "score_range": 3.8, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 84.8, + "max_score": 88.6, + "se_min": 4.3, + "se_max": 3.7, + "effect_size": 1.2143760887, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1291788726, + "level_score_std_dev": 1.5691469728, + "se_difference": 5.6727418415, + "z_score": 0.6698700745, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.3183697029, + "gap_confidence_interval_95_upper": 14.9183697029, + "raw_n_min_group": 167, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "detail_and_technical_language", + "demographic_factor": "Sex", + "score_range": 3.0, + "min_level": "Male", + "max_level": "Female", + "min_score": 85.0, + "max_score": 88.0, + "se_min": 4.5, + "se_max": 3.8, + "effect_size": 0.9587179647, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1291788726, + "level_score_std_dev": 1.5, + "se_difference": 5.8898217291, + "z_score": 0.5093532772, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.5438384643, + "gap_confidence_interval_95_upper": 14.5438384643, + "raw_n_min_group": 252, + "raw_n_max_group": 263, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "detail_and_technical_language", + "demographic_factor": "Urbanicity", + "score_range": 2.5, + "min_level": "Suburban", + "max_level": "Urban", + "min_score": 85.2, + "max_score": 87.7, + "se_min": 4.2, + "se_max": 4.0, + "effect_size": 0.7989316373, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.1291788726, + "level_score_std_dev": 1.0964589469, + "se_difference": 5.8, + "z_score": 0.4310344828, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.8677911103, + "gap_confidence_interval_95_upper": 13.8677911103, + "raw_n_min_group": 255, + "raw_n_max_group": 176, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "distinct_personality", + "demographic_factor": "Age", + "score_range": 8.0, + "min_level": "45-54", + "max_level": "55-64", + "min_score": 67.3, + "max_score": 75.3, + "se_min": 5.7, + "se_max": 5.0, + "effect_size": 1.8504181431, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3233471472, + "level_score_std_dev": 2.8964058187, + "se_difference": 7.5822160349, + "z_score": 1.0551005093, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.8608703514, + "gap_confidence_interval_95_upper": 22.8608703514, + "raw_n_min_group": 82, + "raw_n_max_group": 113, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "distinct_personality", + "demographic_factor": "Education", + "score_range": 8.8, + "min_level": "College", + "max_level": "No College", + "min_score": 66.9, + "max_score": 75.7, + "se_min": 4.7, + "se_max": 5.8, + "effect_size": 2.0354599574, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3233471472, + "level_score_std_dev": 4.4, + "se_difference": 7.4652528423, + "z_score": 1.1787946351, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.8316267065, + "gap_confidence_interval_95_upper": 23.4316267065, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "distinct_personality", + "demographic_factor": "Ethnicity", + "score_range": 17.7, + "min_level": "Hispanic", + "max_level": "Asian", + "min_score": 60.8, + "max_score": 78.5, + "se_min": 7.7, + "se_max": 5.7, + "effect_size": 4.0940501415, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3233471472, + "level_score_std_dev": 7.1173643296, + "se_difference": 9.5801878896, + "z_score": 1.8475629292, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -1.0768232287, + "gap_confidence_interval_95_upper": 36.4768232287, + "raw_n_min_group": 34, + "raw_n_max_group": 40, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "distinct_personality", + "demographic_factor": "Politics", + "score_range": 5.1, + "min_level": "Independent", + "max_level": "Republican", + "min_score": 70.6, + "max_score": 75.7, + "se_min": 5.6, + "se_max": 5.0, + "effect_size": 1.1796415662, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3233471472, + "level_score_std_dev": 2.3366642891, + "se_difference": 7.5073297516, + "z_score": 0.6793360847, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.6140959333, + "gap_confidence_interval_95_upper": 19.8140959333, + "raw_n_min_group": 185, + "raw_n_max_group": 167, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "distinct_personality", + "demographic_factor": "Sex", + "score_range": 3.3, + "min_level": "Male", + "max_level": "Female", + "min_score": 71.0, + "max_score": 74.3, + "se_min": 5.6, + "se_max": 5.3, + "effect_size": 0.763297484, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 4.3233471472, + "level_score_std_dev": 1.65, + "se_difference": 7.7103826105, + "z_score": 0.4279943249, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -11.8120722236, + "gap_confidence_interval_95_upper": 18.4120722236, + "raw_n_min_group": 252, + "raw_n_max_group": 263, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "distinct_personality", + "demographic_factor": "Urbanicity", + "score_range": 1.9, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 71.6, + "max_score": 73.5, + "se_min": 5.7, + "se_max": 5.5, + "effect_size": 0.439474309, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 4.3233471472, + "level_score_std_dev": 0.8041558721, + "se_difference": 7.9208585393, + "z_score": 0.2398729873, + "p_value": 0.9570722129, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -13.6245974637, + "gap_confidence_interval_95_upper": 17.4245974637, + "raw_n_min_group": 176, + "raw_n_max_group": 89, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "effectiveness", + "demographic_factor": "Age", + "score_range": 11.4, + "min_level": "45-54", + "max_level": "65+", + "min_score": 75.8, + "max_score": 87.2, + "se_min": 5.2, + "se_max": 3.9, + "effect_size": 2.8786991538, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.960122052, + "level_score_std_dev": 3.7184226046, + "se_difference": 6.5, + "z_score": 1.7538461538, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -1.3397658995, + "gap_confidence_interval_95_upper": 24.1397658995, + "raw_n_min_group": 82, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "effectiveness", + "demographic_factor": "Education", + "score_range": 8.4, + "min_level": "College", + "max_level": "No College", + "min_score": 78.4, + "max_score": 86.8, + "se_min": 4.2, + "se_max": 4.4, + "effect_size": 2.1211467449, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.960122052, + "level_score_std_dev": 4.2, + "se_difference": 6.0827625303, + "z_score": 1.3809514934, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.5219954859, + "gap_confidence_interval_95_upper": 20.3219954859, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "effectiveness", + "demographic_factor": "Ethnicity", + "score_range": 5.8, + "min_level": "Hispanic", + "max_level": "Asian", + "min_score": 80.2, + "max_score": 86.0, + "se_min": 6.0, + "se_max": 4.7, + "effect_size": 1.4646013239, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.960122052, + "level_score_std_dev": 2.2763732119, + "se_difference": 7.6216796049, + "z_score": 0.7609871184, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.1382175273, + "gap_confidence_interval_95_upper": 20.7382175273, + "raw_n_min_group": 34, + "raw_n_max_group": 40, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "effectiveness", + "demographic_factor": "Politics", + "score_range": 2.6, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 82.9, + "max_score": 85.5, + "se_min": 4.4, + "se_max": 4.1, + "effect_size": 0.656545421, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.960122052, + "level_score_std_dev": 1.06249183, + "se_difference": 6.0141499815, + "z_score": 0.4323137946, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.1875173614, + "gap_confidence_interval_95_upper": 14.3875173614, + "raw_n_min_group": 167, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "effectiveness", + "demographic_factor": "Sex", + "score_range": 0.8, + "min_level": "Female", + "max_level": "Male", + "min_score": 83.6, + "max_score": 84.4, + "se_min": 4.4, + "se_max": 4.3, + "effect_size": 0.2020139757, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.960122052, + "level_score_std_dev": 0.4, + "se_difference": 6.1522353661, + "z_score": 0.1300340368, + "p_value": 0.9769857447, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -11.258159742, + "gap_confidence_interval_95_upper": 12.858159742, + "raw_n_min_group": 263, + "raw_n_max_group": 252, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "effectiveness", + "demographic_factor": "Urbanicity", + "score_range": 4.3, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 82.0, + "max_score": 86.3, + "se_min": 4.4, + "se_max": 4.2, + "effect_size": 1.0858251194, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.960122052, + "level_score_std_dev": 1.7568911937, + "se_difference": 6.0827625303, + "z_score": 0.7069156454, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.6219954859, + "gap_confidence_interval_95_upper": 16.2219954859, + "raw_n_min_group": 255, + "raw_n_max_group": 89, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "ethical_alignment", + "demographic_factor": "Age", + "score_range": 9.5, + "min_level": "18-24", + "max_level": "55-64", + "min_score": 66.6, + "max_score": 76.1, + "se_min": 6.2, + "se_max": 4.8, + "effect_size": 2.5268430649, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7596319818, + "level_score_std_dev": 3.6122092347, + "se_difference": 7.8409183136, + "z_score": 1.2115927778, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.8679175003, + "gap_confidence_interval_95_upper": 24.8679175003, + "raw_n_min_group": 60, + "raw_n_max_group": 113, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "ethical_alignment", + "demographic_factor": "Education", + "score_range": 7.3, + "min_level": "College", + "max_level": "No College", + "min_score": 69.6, + "max_score": 76.9, + "se_min": 4.4, + "se_max": 5.6, + "effect_size": 1.9416794078, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7596319818, + "level_score_std_dev": 3.65, + "se_difference": 7.1217975259, + "z_score": 1.0250221203, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.658466656, + "gap_confidence_interval_95_upper": 21.258466656, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "ethical_alignment", + "demographic_factor": "Ethnicity", + "score_range": 14.4, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 67.0, + "max_score": 81.4, + "se_min": 6.9, + "se_max": 5.1, + "effect_size": 3.8301621195, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7596319818, + "level_score_std_dev": 5.1244511901, + "se_difference": 8.5802097876, + "z_score": 1.6782806431, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.4169021636, + "gap_confidence_interval_95_upper": 31.2169021636, + "raw_n_min_group": 34, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "ethical_alignment", + "demographic_factor": "Politics", + "score_range": 1.7, + "min_level": "Democrat", + "max_level": "Republican", + "min_score": 73.5, + "max_score": 75.2, + "se_min": 5.4, + "se_max": 5.1, + "effect_size": 0.4521719169, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.7596319818, + "level_score_std_dev": 0.740870359, + "se_difference": 7.4276510419, + "z_score": 0.228874511, + "p_value": 0.9570722129, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -12.8579285318, + "gap_confidence_interval_95_upper": 16.2579285318, + "raw_n_min_group": 168, + "raw_n_max_group": 167, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "ethical_alignment", + "demographic_factor": "Sex", + "score_range": 0.9, + "min_level": "Female", + "max_level": "Male", + "min_score": 74.0, + "max_score": 74.9, + "se_min": 5.2, + "se_max": 5.3, + "effect_size": 0.2393851325, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.7596319818, + "level_score_std_dev": 0.45, + "se_difference": 7.4249579123, + "z_score": 0.1212128083, + "p_value": 0.9772792279, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -13.6526500949, + "gap_confidence_interval_95_upper": 15.4526500949, + "raw_n_min_group": 263, + "raw_n_max_group": 252, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "ethical_alignment", + "demographic_factor": "Urbanicity", + "score_range": 1.4, + "min_level": "Rural", + "max_level": "Urban", + "min_score": 73.7, + "max_score": 75.1, + "se_min": 5.6, + "se_max": 5.2, + "effect_size": 0.3723768727, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.7596319818, + "level_score_std_dev": 0.5715476066, + "se_difference": 7.6419892698, + "z_score": 0.1831983729, + "p_value": 0.9597457904, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -13.5780237391, + "gap_confidence_interval_95_upper": 16.3780237391, + "raw_n_min_group": 89, + "raw_n_max_group": 176, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "flexibility", + "demographic_factor": "Age", + "score_range": 13.0, + "min_level": "45-54", + "max_level": "65+", + "min_score": 77.8, + "max_score": 90.8, + "se_min": 5.0, + "se_max": 3.1, + "effect_size": 3.8060538958, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.4156111174, + "level_score_std_dev": 4.0982381038, + "se_difference": 5.883026432, + "z_score": 2.2097469985, + "p_value": 0.8243926526, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": 1.4694800731, + "gap_confidence_interval_95_upper": 24.5305199269, + "raw_n_min_group": 82, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "flexibility", + "demographic_factor": "Education", + "score_range": 10.4, + "min_level": "College", + "max_level": "No College", + "min_score": 80.3, + "max_score": 90.7, + "se_min": 4.0, + "se_max": 3.6, + "effect_size": 3.0448431166, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.4156111174, + "level_score_std_dev": 5.2, + "se_difference": 5.3814496188, + "z_score": 1.9325647802, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -0.1474474375, + "gap_confidence_interval_95_upper": 20.9474474375, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "flexibility", + "demographic_factor": "Ethnicity", + "score_range": 8.3, + "min_level": "Hispanic", + "max_level": "Asian", + "min_score": 81.0, + "max_score": 89.3, + "se_min": 5.7, + "se_max": 3.9, + "effect_size": 2.4300190258, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.4156111174, + "level_score_std_dev": 3.2825295124, + "se_difference": 6.9065186599, + "z_score": 1.201763205, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.236527832, + "gap_confidence_interval_95_upper": 21.836527832, + "raw_n_min_group": 34, + "raw_n_max_group": 40, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "flexibility", + "demographic_factor": "Politics", + "score_range": 3.4, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 86.3, + "max_score": 89.7, + "se_min": 4.0, + "se_max": 3.2, + "effect_size": 0.9954294804, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.4156111174, + "level_score_std_dev": 1.4817407181, + "se_difference": 5.1224993899, + "z_score": 0.663738488, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.6399143151, + "gap_confidence_interval_95_upper": 13.4399143151, + "raw_n_min_group": 168, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "flexibility", + "demographic_factor": "Sex", + "score_range": 3.3, + "min_level": "Male", + "max_level": "Female", + "min_score": 85.4, + "max_score": 88.7, + "se_min": 4.0, + "se_max": 3.4, + "effect_size": 0.9661521428, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.4156111174, + "level_score_std_dev": 1.65, + "se_difference": 5.2497618994, + "z_score": 0.6285999372, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.9893442502, + "gap_confidence_interval_95_upper": 13.5893442502, + "raw_n_min_group": 252, + "raw_n_max_group": 263, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "flexibility", + "demographic_factor": "Urbanicity", + "score_range": 3.2, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 86.2, + "max_score": 89.4, + "se_min": 3.7, + "se_max": 3.5, + "effect_size": 0.9368748051, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.4156111174, + "level_score_std_dev": 1.4854853303, + "se_difference": 5.0931326313, + "z_score": 0.6282970093, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.7823565258, + "gap_confidence_interval_95_upper": 13.1823565258, + "raw_n_min_group": 255, + "raw_n_max_group": 89, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "helpfulness", + "demographic_factor": "Age", + "score_range": 4.7, + "min_level": "18-24", + "max_level": "55-64", + "min_score": 79.0, + "max_score": 83.7, + "se_min": 3.9, + "se_max": 3.2, + "effect_size": 1.3047805689, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.6021382539, + "level_score_std_dev": 1.8226964153, + "se_difference": 5.0447993023, + "z_score": 0.9316525234, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.1876249416, + "gap_confidence_interval_95_upper": 14.5876249416, + "raw_n_min_group": 60, + "raw_n_max_group": 113, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "helpfulness", + "demographic_factor": "Education", + "score_range": 4.4, + "min_level": "College", + "max_level": "No College", + "min_score": 79.1, + "max_score": 83.5, + "se_min": 2.8, + "se_max": 3.7, + "effect_size": 1.2214967028, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.6021382539, + "level_score_std_dev": 2.2, + "se_difference": 4.6400431032, + "z_score": 0.9482670531, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.6943173691, + "gap_confidence_interval_95_upper": 13.4943173691, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "helpfulness", + "demographic_factor": "Ethnicity", + "score_range": 3.1, + "min_level": "African American", + "max_level": "Asian", + "min_score": 80.9, + "max_score": 84.0, + "se_min": 3.9, + "se_max": 3.8, + "effect_size": 0.8605999497, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.6021382539, + "level_score_std_dev": 1.112148821, + "se_difference": 5.445181356, + "z_score": 0.5693106983, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.5723593471, + "gap_confidence_interval_95_upper": 13.7723593471, + "raw_n_min_group": 0, + "raw_n_max_group": 40, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "helpfulness", + "demographic_factor": "Politics", + "score_range": 0.4, + "min_level": "Independent", + "max_level": "Democrat", + "min_score": 81.8, + "max_score": 82.2, + "se_min": 3.4, + "se_max": 3.5, + "effect_size": 0.1110451548, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.6021382539, + "level_score_std_dev": 0.1885618083, + "se_difference": 4.8795491595, + "z_score": 0.0819747864, + "p_value": 0.981376128, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.1637406134, + "gap_confidence_interval_95_upper": 9.9637406134, + "raw_n_min_group": 185, + "raw_n_max_group": 168, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "helpfulness", + "demographic_factor": "Sex", + "score_range": 1.1, + "min_level": "Male", + "max_level": "Female", + "min_score": 81.4, + "max_score": 82.5, + "se_min": 3.4, + "se_max": 3.4, + "effect_size": 0.3053741757, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.6021382539, + "level_score_std_dev": 0.55, + "se_difference": 4.8083261121, + "z_score": 0.228769841, + "p_value": 0.9570722129, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.3241460056, + "gap_confidence_interval_95_upper": 10.5241460056, + "raw_n_min_group": 252, + "raw_n_max_group": 263, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "helpfulness", + "demographic_factor": "Urbanicity", + "score_range": 4.1, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 79.9, + "max_score": 84.0, + "se_min": 3.4, + "se_max": 3.4, + "effect_size": 1.1382128367, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.6021382539, + "level_score_std_dev": 1.7326921891, + "se_difference": 4.8083261121, + "z_score": 0.8526875891, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.3241460056, + "gap_confidence_interval_95_upper": 13.5241460056, + "raw_n_min_group": 255, + "raw_n_max_group": 89, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "honesty_empathy_fairness", + "demographic_factor": "Age", + "score_range": 9.2, + "min_level": "45-54", + "max_level": "65+", + "min_score": 75.5, + "max_score": 84.7, + "se_min": 5.1, + "se_max": 4.3, + "effect_size": 2.4199431518, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.8017421993, + "level_score_std_dev": 3.1374086972, + "se_difference": 6.6708320321, + "z_score": 1.3791383078, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.8745905298, + "gap_confidence_interval_95_upper": 22.2745905298, + "raw_n_min_group": 82, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "honesty_empathy_fairness", + "demographic_factor": "Education", + "score_range": 10.2, + "min_level": "College", + "max_level": "No College", + "min_score": 75.8, + "max_score": 86.0, + "se_min": 4.2, + "se_max": 4.5, + "effect_size": 2.6829804509, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.8017421993, + "level_score_std_dev": 5.1, + "se_difference": 6.1554853586, + "z_score": 1.657058608, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -1.8645296102, + "gap_confidence_interval_95_upper": 22.2645296102, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "honesty_empathy_fairness", + "demographic_factor": "Ethnicity", + "score_range": 10.0, + "min_level": "Hispanic", + "max_level": "Asian", + "min_score": 77.2, + "max_score": 87.2, + "se_min": 6.4, + "se_max": 4.3, + "effect_size": 2.6303729911, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.8017421993, + "level_score_std_dev": 3.694844381, + "se_difference": 7.7103826105, + "z_score": 1.2969524997, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.1120722236, + "gap_confidence_interval_95_upper": 25.1120722236, + "raw_n_min_group": 34, + "raw_n_max_group": 40, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "honesty_empathy_fairness", + "demographic_factor": "Politics", + "score_range": 2.2, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 81.7, + "max_score": 83.9, + "se_min": 4.7, + "se_max": 4.3, + "effect_size": 0.578682058, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.8017421993, + "level_score_std_dev": 0.898146239, + "se_difference": 6.3702433235, + "z_score": 0.3453557248, + "p_value": 0.9429935514, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.2854474867, + "gap_confidence_interval_95_upper": 14.6854474867, + "raw_n_min_group": 168, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "honesty_empathy_fairness", + "demographic_factor": "Sex", + "score_range": 0.7, + "min_level": "Male", + "max_level": "Female", + "min_score": 82.2, + "max_score": 82.9, + "se_min": 4.5, + "se_max": 4.3, + "effect_size": 0.1841261094, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.8017421993, + "level_score_std_dev": 0.35, + "se_difference": 6.2241465278, + "z_score": 0.1124652186, + "p_value": 0.977356957, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -11.4991030291, + "gap_confidence_interval_95_upper": 12.8991030291, + "raw_n_min_group": 252, + "raw_n_max_group": 263, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "honesty_empathy_fairness", + "demographic_factor": "Urbanicity", + "score_range": 1.2, + "min_level": "Rural", + "max_level": "Urban", + "min_score": 82.0, + "max_score": 83.2, + "se_min": 4.7, + "se_max": 4.4, + "effect_size": 0.3156447589, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.8017421993, + "level_score_std_dev": 0.4988876516, + "se_difference": 6.4381674411, + "z_score": 0.1863884422, + "p_value": 0.9597457904, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -11.418576311, + "gap_confidence_interval_95_upper": 13.818576311, + "raw_n_min_group": 89, + "raw_n_max_group": 176, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "intuitiveness", + "demographic_factor": "Age", + "score_range": 15.1, + "min_level": "45-54", + "max_level": "65+", + "min_score": 72.5, + "max_score": 87.6, + "se_min": 5.6, + "se_max": 3.9, + "effect_size": 4.8738215367, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.0981848404, + "level_score_std_dev": 5.1224993899, + "se_difference": 6.8242215673, + "z_score": 2.2127065851, + "p_value": 0.8243926526, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": 1.7247715055, + "gap_confidence_interval_95_upper": 28.4752284945, + "raw_n_min_group": 82, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "intuitiveness", + "demographic_factor": "Education", + "score_range": 12.8, + "min_level": "College", + "max_level": "No College", + "min_score": 75.6, + "max_score": 88.4, + "se_min": 4.5, + "se_max": 4.3, + "effect_size": 4.1314513688, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.0981848404, + "level_score_std_dev": 6.4, + "se_difference": 6.2241465278, + "z_score": 2.0565068548, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": 0.6008969709, + "gap_confidence_interval_95_upper": 24.9991030291, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "intuitiveness", + "demographic_factor": "Ethnicity", + "score_range": 6.8, + "min_level": "Hispanic", + "max_level": "Asian", + "min_score": 78.4, + "max_score": 85.2, + "se_min": 6.5, + "se_max": 4.7, + "effect_size": 2.1948335397, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.0981848404, + "level_score_std_dev": 2.8346075566, + "se_difference": 8.0212218521, + "z_score": 0.8477511438, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.921305942, + "gap_confidence_interval_95_upper": 22.521305942, + "raw_n_min_group": 34, + "raw_n_max_group": 40, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "intuitiveness", + "demographic_factor": "Politics", + "score_range": 3.2, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 82.5, + "max_score": 85.7, + "se_min": 4.4, + "se_max": 4.1, + "effect_size": 1.0328628422, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.0981848404, + "level_score_std_dev": 1.3366625104, + "se_difference": 6.0141499815, + "z_score": 0.5320785165, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.5875173614, + "gap_confidence_interval_95_upper": 14.9875173614, + "raw_n_min_group": 167, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "intuitiveness", + "demographic_factor": "Sex", + "score_range": 1.1, + "min_level": "Male", + "max_level": "Female", + "min_score": 83.5, + "max_score": 84.6, + "se_min": 4.5, + "se_max": 4.2, + "effect_size": 0.355046602, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.0981848404, + "level_score_std_dev": 0.55, + "se_difference": 6.1554853586, + "z_score": 0.1787023989, + "p_value": 0.9597457904, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.9645296102, + "gap_confidence_interval_95_upper": 13.1645296102, + "raw_n_min_group": 252, + "raw_n_max_group": 263, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "intuitiveness", + "demographic_factor": "Urbanicity", + "score_range": 2.1, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 82.9, + "max_score": 85.0, + "se_min": 4.3, + "se_max": 4.4, + "effect_size": 0.6778162402, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.0981848404, + "level_score_std_dev": 0.9104333522, + "se_difference": 6.1522353661, + "z_score": 0.3413393466, + "p_value": 0.9429935514, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.958159742, + "gap_confidence_interval_95_upper": 14.158159742, + "raw_n_min_group": 255, + "raw_n_max_group": 89, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "personality", + "demographic_factor": "Age", + "score_range": 5.5, + "min_level": "45-54", + "max_level": "55-64", + "min_score": 67.3, + "max_score": 72.8, + "se_min": 3.4, + "se_max": 3.5, + "effect_size": 1.6343221705, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.365309545, + "level_score_std_dev": 2.1192897133, + "se_difference": 4.8795491595, + "z_score": 1.1271533128, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.0637406134, + "gap_confidence_interval_95_upper": 15.0637406134, + "raw_n_min_group": 82, + "raw_n_max_group": 113, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "personality", + "demographic_factor": "Education", + "score_range": 4.4, + "min_level": "College", + "max_level": "No College", + "min_score": 68.1, + "max_score": 72.5, + "se_min": 2.7, + "se_max": 4.0, + "effect_size": 1.3074577364, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.365309545, + "level_score_std_dev": 2.2, + "se_difference": 4.8259714048, + "z_score": 0.9117335415, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.0587301438, + "gap_confidence_interval_95_upper": 13.8587301438, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "personality", + "demographic_factor": "Ethnicity", + "score_range": 5.9, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 68.3, + "max_score": 74.2, + "se_min": 4.3, + "se_max": 4.1, + "effect_size": 1.7531819647, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.365309545, + "level_score_std_dev": 2.3005434141, + "se_difference": 5.941380311, + "z_score": 0.9930352361, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.744891428, + "gap_confidence_interval_95_upper": 17.544891428, + "raw_n_min_group": 34, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "personality", + "demographic_factor": "Politics", + "score_range": 0.6, + "min_level": "Independent", + "max_level": "Democrat", + "min_score": 70.7, + "max_score": 71.3, + "se_min": 3.6, + "se_max": 3.7, + "effect_size": 0.1782896913, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.365309545, + "level_score_std_dev": 0.2494438258, + "se_difference": 5.1623637997, + "z_score": 0.1162258266, + "p_value": 0.9772792279, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.5180471224, + "gap_confidence_interval_95_upper": 10.7180471224, + "raw_n_min_group": 185, + "raw_n_max_group": 168, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "personality", + "demographic_factor": "Sex", + "score_range": 2.3, + "min_level": "Male", + "max_level": "Female", + "min_score": 69.8, + "max_score": 72.1, + "se_min": 3.6, + "se_max": 3.6, + "effect_size": 0.6834438168, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.365309545, + "level_score_std_dev": 1.15, + "se_difference": 5.0911688245, + "z_score": 0.4517626658, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.6785075353, + "gap_confidence_interval_95_upper": 12.2785075353, + "raw_n_min_group": 252, + "raw_n_max_group": 263, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "personality", + "demographic_factor": "Urbanicity", + "score_range": 0.6, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 70.6, + "max_score": 71.2, + "se_min": 3.6, + "se_max": 3.8, + "effect_size": 0.1782896913, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.365309545, + "level_score_std_dev": 0.2828427125, + "se_difference": 5.2345009313, + "z_score": 0.114624108, + "p_value": 0.977356957, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.6594333024, + "gap_confidence_interval_95_upper": 10.8594333024, + "raw_n_min_group": 176, + "raw_n_max_group": 89, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "personality_consistency", + "demographic_factor": "Age", + "score_range": 7.3, + "min_level": "18-24", + "max_level": "65+", + "min_score": 76.2, + "max_score": 83.5, + "se_min": 5.9, + "se_max": 4.6, + "effect_size": 2.286859886, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1921500939, + "level_score_std_dev": 2.6113002296, + "se_difference": 7.4813100457, + "z_score": 0.9757649336, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.3630982468, + "gap_confidence_interval_95_upper": 21.9630982468, + "raw_n_min_group": 60, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "personality_consistency", + "demographic_factor": "Education", + "score_range": 5.9, + "min_level": "College", + "max_level": "No College", + "min_score": 77.4, + "max_score": 83.3, + "se_min": 4.2, + "se_max": 5.0, + "effect_size": 1.8482840175, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1921500939, + "level_score_std_dev": 2.95, + "se_difference": 6.5299310869, + "z_score": 0.9035317405, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.8984297519, + "gap_confidence_interval_95_upper": 18.6984297519, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "personality_consistency", + "demographic_factor": "Ethnicity", + "score_range": 8.5, + "min_level": "Hispanic", + "max_level": "Asian", + "min_score": 74.3, + "max_score": 82.8, + "se_min": 6.7, + "se_max": 5.3, + "effect_size": 2.6627820591, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1921500939, + "level_score_std_dev": 3.422718218, + "se_difference": 8.5428332537, + "z_score": 0.99498606, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.2436455031, + "gap_confidence_interval_95_upper": 25.2436455031, + "raw_n_min_group": 34, + "raw_n_max_group": 40, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "personality_consistency", + "demographic_factor": "Politics", + "score_range": 2.0, + "min_level": "Democrat", + "max_level": "Republican", + "min_score": 80.3, + "max_score": 82.3, + "se_min": 5.0, + "se_max": 4.4, + "effect_size": 0.6265369551, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.1921500939, + "level_score_std_dev": 0.8286535263, + "se_difference": 6.6603303221, + "z_score": 0.3002854068, + "p_value": 0.9469805126, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -11.0540075565, + "gap_confidence_interval_95_upper": 15.0540075565, + "raw_n_min_group": 168, + "raw_n_max_group": 167, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "personality_consistency", + "demographic_factor": "Sex", + "score_range": 1.0, + "min_level": "Female", + "max_level": "Male", + "min_score": 80.8, + "max_score": 81.8, + "se_min": 4.8, + "se_max": 4.7, + "effect_size": 0.3132684775, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.1921500939, + "level_score_std_dev": 0.5, + "se_difference": 6.7178865724, + "z_score": 0.1488563389, + "p_value": 0.9660130308, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -12.1668157342, + "gap_confidence_interval_95_upper": 14.1668157342, + "raw_n_min_group": 263, + "raw_n_max_group": 252, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "personality_consistency", + "demographic_factor": "Urbanicity", + "score_range": 2.5, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 80.4, + "max_score": 82.9, + "se_min": 5.0, + "se_max": 4.6, + "effect_size": 0.7831711938, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.1921500939, + "level_score_std_dev": 1.0964589469, + "se_difference": 6.7941151006, + "z_score": 0.3679655059, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.816220904, + "gap_confidence_interval_95_upper": 15.816220904, + "raw_n_min_group": 176, + "raw_n_max_group": 89, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "tone_and_language_style", + "demographic_factor": "Age", + "score_range": 13.4, + "min_level": "18-24", + "max_level": "65+", + "min_score": 72.6, + "max_score": 86.0, + "se_min": 6.3, + "se_max": 4.3, + "effect_size": 4.0123657123, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.339675633, + "level_score_std_dev": 4.5059220292, + "se_difference": 7.6275815302, + "z_score": 1.7567822706, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -1.5497850884, + "gap_confidence_interval_95_upper": 28.3497850884, + "raw_n_min_group": 60, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "tone_and_language_style", + "demographic_factor": "Education", + "score_range": 6.4, + "min_level": "College", + "max_level": "No College", + "min_score": 78.9, + "max_score": 85.3, + "se_min": 4.2, + "se_max": 4.7, + "effect_size": 1.9163537731, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.339675633, + "level_score_std_dev": 3.2, + "se_difference": 6.3031738037, + "z_score": 1.0153614987, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.9539936436, + "gap_confidence_interval_95_upper": 18.7539936436, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "tone_and_language_style", + "demographic_factor": "Ethnicity", + "score_range": 12.0, + "min_level": "Hispanic", + "max_level": "Asian", + "min_score": 77.0, + "max_score": 89.0, + "se_min": 6.4, + "se_max": 4.2, + "effect_size": 3.5931633245, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.339675633, + "level_score_std_dev": 4.2534544784, + "se_difference": 7.6550636836, + "z_score": 1.5675898328, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.0036491192, + "gap_confidence_interval_95_upper": 27.0036491192, + "raw_n_min_group": 34, + "raw_n_max_group": 40, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "tone_and_language_style", + "demographic_factor": "Politics", + "score_range": 2.5, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 82.1, + "max_score": 84.6, + "se_min": 4.8, + "se_max": 4.3, + "effect_size": 0.7485756926, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.339675633, + "level_score_std_dev": 1.033870828, + "se_difference": 6.4443773943, + "z_score": 0.3879350707, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.1307475956, + "gap_confidence_interval_95_upper": 15.1307475956, + "raw_n_min_group": 168, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "tone_and_language_style", + "demographic_factor": "Sex", + "score_range": 1.2, + "min_level": "Female", + "max_level": "Male", + "min_score": 82.6, + "max_score": 83.8, + "se_min": 4.6, + "se_max": 4.4, + "effect_size": 0.3593163324, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.339675633, + "level_score_std_dev": 0.6, + "se_difference": 6.3655321851, + "z_score": 0.1885152671, + "p_value": 0.9597457904, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -11.2762138253, + "gap_confidence_interval_95_upper": 13.6762138253, + "raw_n_min_group": 263, + "raw_n_max_group": 252, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "tone_and_language_style", + "demographic_factor": "Urbanicity", + "score_range": 4.3, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 81.2, + "max_score": 85.5, + "se_min": 4.9, + "se_max": 4.3, + "effect_size": 1.2875501913, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.339675633, + "level_score_std_dev": 1.7682382946, + "se_difference": 6.5192024052, + "z_score": 0.6595898904, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.4774019221, + "gap_confidence_interval_95_upper": 17.0774019221, + "raw_n_min_group": 176, + "raw_n_max_group": 89, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "transparency", + "demographic_factor": "Age", + "score_range": 10.6, + "min_level": "45-54", + "max_level": "65+", + "min_score": 62.7, + "max_score": 73.3, + "se_min": 6.1, + "se_max": 6.0, + "effect_size": 2.348368794, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.5137714429, + "level_score_std_dev": 3.4869598347, + "se_difference": 8.5562842403, + "z_score": 1.2388555245, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.1700089524, + "gap_confidence_interval_95_upper": 27.3700089524, + "raw_n_min_group": 82, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "transparency", + "demographic_factor": "Education", + "score_range": 8.1, + "min_level": "College", + "max_level": "No College", + "min_score": 65.2, + "max_score": 73.3, + "se_min": 5.0, + "se_max": 6.3, + "effect_size": 1.7945082294, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.5137714429, + "level_score_std_dev": 4.05, + "se_difference": 8.043009387, + "z_score": 1.0070857325, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.6640087259, + "gap_confidence_interval_95_upper": 23.8640087259, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "transparency", + "demographic_factor": "Ethnicity", + "score_range": 14.1, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 60.8, + "max_score": 74.9, + "se_min": 8.0, + "se_max": 6.1, + "effect_size": 3.1237735845, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.5137714429, + "level_score_std_dev": 5.7312193293, + "se_difference": 10.0603180864, + "z_score": 1.4015461419, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.6178611224, + "gap_confidence_interval_95_upper": 33.8178611224, + "raw_n_min_group": 34, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "transparency", + "demographic_factor": "Politics", + "score_range": 4.8, + "min_level": "Independent", + "max_level": "Republican", + "min_score": 68.6, + "max_score": 73.4, + "se_min": 6.0, + "se_max": 5.5, + "effect_size": 1.0634122841, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.5137714429, + "level_score_std_dev": 2.19544984, + "se_difference": 8.139410298, + "z_score": 0.5897233122, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -11.1529510396, + "gap_confidence_interval_95_upper": 20.7529510396, + "raw_n_min_group": 185, + "raw_n_max_group": 167, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "transparency", + "demographic_factor": "Sex", + "score_range": 1.5, + "min_level": "Male", + "max_level": "Female", + "min_score": 69.8, + "max_score": 71.3, + "se_min": 6.0, + "se_max": 5.8, + "effect_size": 0.3323163388, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 4.5137714429, + "level_score_std_dev": 0.75, + "se_difference": 8.345058418, + "z_score": 0.179747094, + "p_value": 0.9597457904, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -14.8560139482, + "gap_confidence_interval_95_upper": 17.8560139482, + "raw_n_min_group": 252, + "raw_n_max_group": 263, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "transparency", + "demographic_factor": "Urbanicity", + "score_range": 2.8, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 69.4, + "max_score": 72.2, + "se_min": 5.7, + "se_max": 6.1, + "effect_size": 0.6203238324, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 4.5137714429, + "level_score_std_dev": 1.1518101695, + "se_difference": 8.3486525859, + "z_score": 0.3353834611, + "p_value": 0.9429935514, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -13.5630583878, + "gap_confidence_interval_95_upper": 19.1630583878, + "raw_n_min_group": 255, + "raw_n_max_group": 89, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "trustworthiness", + "demographic_factor": "Age", + "score_range": 4.4, + "min_level": "18-24", + "max_level": "35-44", + "min_score": 80.4, + "max_score": 84.8, + "se_min": 4.0, + "se_max": 3.2, + "effect_size": 1.3905264146, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1642692679, + "level_score_std_dev": 1.7416467303, + "se_difference": 5.1224993899, + "z_score": 0.8589556904, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.6399143151, + "gap_confidence_interval_95_upper": 14.4399143151, + "raw_n_min_group": 60, + "raw_n_max_group": 93, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "trustworthiness", + "demographic_factor": "Education", + "score_range": 7.3, + "min_level": "College", + "max_level": "No College", + "min_score": 78.4, + "max_score": 85.7, + "se_min": 3.0, + "se_max": 3.6, + "effect_size": 2.3070097333, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1642692679, + "level_score_std_dev": 3.65, + "se_difference": 4.6861498055, + "z_score": 1.5577820392, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -1.884684845, + "gap_confidence_interval_95_upper": 16.484684845, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "trustworthiness", + "demographic_factor": "Ethnicity", + "score_range": 4.1, + "min_level": "Hispanic", + "max_level": "Asian", + "min_score": 81.5, + "max_score": 85.6, + "se_min": 4.4, + "se_max": 3.8, + "effect_size": 1.2957177954, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1642692679, + "level_score_std_dev": 1.5564382416, + "se_difference": 5.8137767415, + "z_score": 0.7052214391, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.2947930275, + "gap_confidence_interval_95_upper": 15.4947930275, + "raw_n_min_group": 34, + "raw_n_max_group": 40, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "trustworthiness", + "demographic_factor": "Politics", + "score_range": 1.6, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 82.3, + "max_score": 83.9, + "se_min": 3.4, + "se_max": 3.4, + "effect_size": 0.5056459689, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.1642692679, + "level_score_std_dev": 0.7318166133, + "se_difference": 4.8083261121, + "z_score": 0.3327561323, + "p_value": 0.9429935514, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.8241460056, + "gap_confidence_interval_95_upper": 11.0241460056, + "raw_n_min_group": 167, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "trustworthiness", + "demographic_factor": "Sex", + "score_range": 0.8, + "min_level": "Female", + "max_level": "Male", + "min_score": 82.9, + "max_score": 83.7, + "se_min": 3.4, + "se_max": 3.4, + "effect_size": 0.2528229845, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.1642692679, + "level_score_std_dev": 0.4, + "se_difference": 4.8083261121, + "z_score": 0.1663780662, + "p_value": 0.9610477633, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.6241460056, + "gap_confidence_interval_95_upper": 10.2241460056, + "raw_n_min_group": 263, + "raw_n_max_group": 252, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "trustworthiness", + "demographic_factor": "Urbanicity", + "score_range": 3.8, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 81.5, + "max_score": 85.3, + "se_min": 3.6, + "se_max": 3.4, + "effect_size": 1.2009091763, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1642692679, + "level_score_std_dev": 1.5627610893, + "se_difference": 4.9517673613, + "z_score": 0.7674027721, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.9052856879, + "gap_confidence_interval_95_upper": 13.5052856879, + "raw_n_min_group": 176, + "raw_n_max_group": 89, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "understanding", + "demographic_factor": "Age", + "score_range": 5.0, + "min_level": "18-24", + "max_level": "65+", + "min_score": 80.8, + "max_score": 85.8, + "se_min": 3.6, + "se_max": 3.2, + "effect_size": 1.6850234171, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9673178124, + "level_score_std_dev": 1.6108141489, + "se_difference": 4.8166378315, + "z_score": 1.0380684982, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.4404366763, + "gap_confidence_interval_95_upper": 14.4404366763, + "raw_n_min_group": 60, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "understanding", + "demographic_factor": "Education", + "score_range": 6.2, + "min_level": "College", + "max_level": "No College", + "min_score": 80.5, + "max_score": 86.7, + "se_min": 2.9, + "se_max": 3.4, + "effect_size": 2.0894290372, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9673178124, + "level_score_std_dev": 3.1, + "se_difference": 4.4687805943, + "z_score": 1.3874030889, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.5586490196, + "gap_confidence_interval_95_upper": 14.9586490196, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "understanding", + "demographic_factor": "Ethnicity", + "score_range": 3.0, + "min_level": "White", + "max_level": "Asian", + "min_score": 84.4, + "max_score": 87.4, + "se_min": 3.0, + "se_max": 3.6, + "effect_size": 1.0110140503, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9673178124, + "level_score_std_dev": 1.2070107705, + "se_difference": 4.6861498055, + "z_score": 0.6401843997, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.184684845, + "gap_confidence_interval_95_upper": 12.184684845, + "raw_n_min_group": 349, + "raw_n_max_group": 40, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "understanding", + "demographic_factor": "Politics", + "score_range": 1.6, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 84.0, + "max_score": 85.6, + "se_min": 3.2, + "se_max": 3.1, + "effect_size": 0.5392074935, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 2.9673178124, + "level_score_std_dev": 0.6548960901, + "se_difference": 4.455333882, + "z_score": 0.3591201114, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.1322939478, + "gap_confidence_interval_95_upper": 10.3322939478, + "raw_n_min_group": 167, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "understanding", + "demographic_factor": "Sex", + "score_range": 0.2, + "min_level": "Female", + "max_level": "Male", + "min_score": 84.5, + "max_score": 84.7, + "se_min": 3.2, + "se_max": 3.2, + "effect_size": 0.0674009367, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 2.9673178124, + "level_score_std_dev": 0.1, + "se_difference": 4.5254833996, + "z_score": 0.0441941738, + "p_value": 0.9872735163, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.6697844758, + "gap_confidence_interval_95_upper": 9.0697844758, + "raw_n_min_group": 263, + "raw_n_max_group": 252, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "understanding", + "demographic_factor": "Urbanicity", + "score_range": 3.9, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 83.0, + "max_score": 86.9, + "se_min": 3.2, + "se_max": 3.1, + "effect_size": 1.3143182654, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9673178124, + "level_score_std_dev": 1.6131404843, + "se_difference": 4.455333882, + "z_score": 0.8753552715, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.8322939478, + "gap_confidence_interval_95_upper": 12.6322939478, + "raw_n_min_group": 255, + "raw_n_max_group": 89, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "usefulness", + "demographic_factor": "Age", + "score_range": 10.2, + "min_level": "45-54", + "max_level": "65+", + "min_score": 78.5, + "max_score": 88.7, + "se_min": 5.1, + "se_max": 3.8, + "effect_size": 3.034663994, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3611628899, + "level_score_std_dev": 3.3365401241, + "se_difference": 6.3600314465, + "z_score": 1.6037656552, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.2654325756, + "gap_confidence_interval_95_upper": 22.6654325756, + "raw_n_min_group": 82, + "raw_n_max_group": 65, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "usefulness", + "demographic_factor": "Education", + "score_range": 6.7, + "min_level": "College", + "max_level": "No College", + "min_score": 80.8, + "max_score": 87.5, + "se_min": 4.1, + "se_max": 4.4, + "effect_size": 1.9933577216, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3611628899, + "level_score_std_dev": 3.35, + "se_difference": 6.0141499815, + "z_score": 1.1140393939, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.0875173614, + "gap_confidence_interval_95_upper": 18.4875173614, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "usefulness", + "demographic_factor": "Ethnicity", + "score_range": 10.7, + "min_level": "Hispanic", + "max_level": "Asian", + "min_score": 79.5, + "max_score": 90.2, + "se_min": 6.3, + "se_max": 3.8, + "effect_size": 3.183422033, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3611628899, + "level_score_std_dev": 3.9359719257, + "se_difference": 7.3573092908, + "z_score": 1.454336032, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.720061233, + "gap_confidence_interval_95_upper": 25.120061233, + "raw_n_min_group": 34, + "raw_n_max_group": 40, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "usefulness", + "demographic_factor": "Politics", + "score_range": 1.9, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 84.8, + "max_score": 86.7, + "se_min": 4.5, + "se_max": 4.0, + "effect_size": 0.5652805479, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.3611628899, + "level_score_std_dev": 0.8339997335, + "se_difference": 6.0207972894, + "z_score": 0.3155728234, + "p_value": 0.9469805126, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.9005458454, + "gap_confidence_interval_95_upper": 13.7005458454, + "raw_n_min_group": 168, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "usefulness", + "demographic_factor": "Sex", + "score_range": 1.8, + "min_level": "Male", + "max_level": "Female", + "min_score": 84.3, + "max_score": 86.1, + "se_min": 4.5, + "se_max": 4.1, + "effect_size": 0.5355289401, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.3611628899, + "level_score_std_dev": 0.9, + "se_difference": 6.0876925021, + "z_score": 0.2956785349, + "p_value": 0.9469805126, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.1316580531, + "gap_confidence_interval_95_upper": 13.7316580531, + "raw_n_min_group": 252, + "raw_n_max_group": 263, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "deepseek-r1", + "category": "usefulness", + "demographic_factor": "Urbanicity", + "score_range": 2.3, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 84.5, + "max_score": 86.8, + "se_min": 4.2, + "se_max": 4.2, + "effect_size": 0.684286979, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.3611628899, + "level_score_std_dev": 1.0402991023, + "se_difference": 5.939696962, + "z_score": 0.3872251421, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.3415921245, + "gap_confidence_interval_95_upper": 13.9415921245, + "raw_n_min_group": 255, + "raw_n_max_group": 89, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "accuracy", + "demographic_factor": "Age", + "score_range": 3.1, + "min_level": "35-44", + "max_level": "65+", + "min_score": 88.9, + "max_score": 92.0, + "se_min": 3.2, + "se_max": 2.9, + "effect_size": 1.0551371892, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9380065757, + "level_score_std_dev": 0.9956851354, + "se_difference": 4.3185645763, + "z_score": 0.7178311092, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.3642310345, + "gap_confidence_interval_95_upper": 11.5642310345, + "raw_n_min_group": 91, + "raw_n_max_group": 61, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "accuracy", + "demographic_factor": "Education", + "score_range": 2.3, + "min_level": "College", + "max_level": "No College", + "min_score": 88.9, + "max_score": 91.2, + "se_min": 2.7, + "se_max": 3.3, + "effect_size": 0.782843721, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 2.9380065757, + "level_score_std_dev": 1.15, + "se_difference": 4.2638011211, + "z_score": 0.5394247843, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.0568966345, + "gap_confidence_interval_95_upper": 10.6568966345, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "accuracy", + "demographic_factor": "Ethnicity", + "score_range": 1.9, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 89.0, + "max_score": 90.9, + "se_min": 4.0, + "se_max": 3.3, + "effect_size": 0.6466969869, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 2.9380065757, + "level_score_std_dev": 0.7084313658, + "se_difference": 5.185556865, + "z_score": 0.3664023073, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.2635046952, + "gap_confidence_interval_95_upper": 12.0635046952, + "raw_n_min_group": 34, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "accuracy", + "demographic_factor": "Politics", + "score_range": 1.7, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 89.6, + "max_score": 91.3, + "se_min": 3.2, + "se_max": 2.9, + "effect_size": 0.5786236199, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 2.9380065757, + "level_score_std_dev": 0.7039570694, + "se_difference": 4.3185645763, + "z_score": 0.393649318, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.7642310345, + "gap_confidence_interval_95_upper": 10.1642310345, + "raw_n_min_group": 160, + "raw_n_max_group": 177, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "accuracy", + "demographic_factor": "Sex", + "score_range": 0.4, + "min_level": "Male", + "max_level": "Female", + "min_score": 90.2, + "max_score": 90.6, + "se_min": 3.1, + "se_max": 3.1, + "effect_size": 0.1361467341, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 2.9380065757, + "level_score_std_dev": 0.2, + "se_difference": 4.3840620434, + "z_score": 0.0912395847, + "p_value": 0.9801302996, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.192603711, + "gap_confidence_interval_95_upper": 8.992603711, + "raw_n_min_group": 246, + "raw_n_max_group": 251, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "accuracy", + "demographic_factor": "Urbanicity", + "score_range": 3.7, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 88.4, + "max_score": 92.1, + "se_min": 3.5, + "se_max": 2.9, + "effect_size": 1.2593572903, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9380065757, + "level_score_std_dev": 1.5195028427, + "se_difference": 4.5453272709, + "z_score": 0.814022793, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.208677749, + "gap_confidence_interval_95_upper": 12.608677749, + "raw_n_min_group": 171, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "adaptiveness", + "demographic_factor": "Age", + "score_range": 3.8, + "min_level": "35-44", + "max_level": "25-34", + "min_score": 87.1, + "max_score": 90.9, + "se_min": 2.9, + "se_max": 2.7, + "effect_size": 0.9696951115, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.9187575093, + "level_score_std_dev": 1.359432071, + "se_difference": 3.9623225512, + "z_score": 0.9590334837, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.9660094955, + "gap_confidence_interval_95_upper": 11.5660094955, + "raw_n_min_group": 91, + "raw_n_max_group": 105, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "adaptiveness", + "demographic_factor": "Education", + "score_range": 6.3, + "min_level": "College", + "max_level": "No College", + "min_score": 85.2, + "max_score": 91.5, + "se_min": 2.7, + "se_max": 2.9, + "effect_size": 1.6076524217, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.9187575093, + "level_score_std_dev": 3.15, + "se_difference": 3.9623225512, + "z_score": 1.5899765651, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -1.4660094955, + "gap_confidence_interval_95_upper": 14.0660094955, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "adaptiveness", + "demographic_factor": "Ethnicity", + "score_range": 3.3, + "min_level": "Asian", + "max_level": "Hispanic", + "min_score": 87.0, + "max_score": 90.3, + "se_min": 3.7, + "se_max": 3.4, + "effect_size": 0.8421036495, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.9187575093, + "level_score_std_dev": 1.2437342964, + "se_difference": 5.0249378106, + "z_score": 0.6567245455, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.5486971333, + "gap_confidence_interval_95_upper": 13.1486971333, + "raw_n_min_group": 38, + "raw_n_max_group": 34, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "adaptiveness", + "demographic_factor": "Politics", + "score_range": 3.0, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 88.8, + "max_score": 91.8, + "se_min": 3.0, + "se_max": 2.5, + "effect_size": 0.7655487723, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.9187575093, + "level_score_std_dev": 1.3695092389, + "se_difference": 3.905124838, + "z_score": 0.7682212796, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.6539040375, + "gap_confidence_interval_95_upper": 10.6539040375, + "raw_n_min_group": 164, + "raw_n_max_group": 177, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "adaptiveness", + "demographic_factor": "Sex", + "score_range": 0.9, + "min_level": "Female", + "max_level": "Male", + "min_score": 89.0, + "max_score": 89.9, + "se_min": 2.9, + "se_max": 2.8, + "effect_size": 0.2296646317, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.9187575093, + "level_score_std_dev": 0.45, + "se_difference": 4.0311288741, + "z_score": 0.2232625223, + "p_value": 0.9570722129, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.0008674104, + "gap_confidence_interval_95_upper": 8.8008674104, + "raw_n_min_group": 251, + "raw_n_max_group": 246, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "adaptiveness", + "demographic_factor": "Urbanicity", + "score_range": 5.3, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 86.7, + "max_score": 92.0, + "se_min": 3.2, + "se_max": 2.6, + "effect_size": 1.3524694976, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.9187575093, + "level_score_std_dev": 2.1648710508, + "se_difference": 4.1231056256, + "z_score": 1.2854388127, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.7811385307, + "gap_confidence_interval_95_upper": 13.3811385307, + "raw_n_min_group": 171, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "background_and_culture", + "demographic_factor": "Age", + "score_range": 7.8, + "min_level": "18-24", + "max_level": "65+", + "min_score": 75.9, + "max_score": 83.7, + "se_min": 3.8, + "se_max": 3.5, + "effect_size": 2.2549540508, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.459050528, + "level_score_std_dev": 2.8854038808, + "se_difference": 5.1662365412, + "z_score": 1.5098031106, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.3256375564, + "gap_confidence_interval_95_upper": 17.9256375564, + "raw_n_min_group": 60, + "raw_n_max_group": 61, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "background_and_culture", + "demographic_factor": "Education", + "score_range": 5.6, + "min_level": "College", + "max_level": "No College", + "min_score": 76.2, + "max_score": 81.8, + "se_min": 2.8, + "se_max": 3.8, + "effect_size": 1.6189413698, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.459050528, + "level_score_std_dev": 2.8, + "se_difference": 4.7201694885, + "z_score": 1.1863980761, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.6513621984, + "gap_confidence_interval_95_upper": 14.8513621984, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "background_and_culture", + "demographic_factor": "Ethnicity", + "score_range": 8.5, + "min_level": "Asian", + "max_level": "African American", + "min_score": 75.2, + "max_score": 83.7, + "se_min": 4.1, + "se_max": 3.6, + "effect_size": 2.457321722, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.459050528, + "level_score_std_dev": 3.1751968443, + "se_difference": 5.4561891463, + "z_score": 1.5578638812, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.1939342196, + "gap_confidence_interval_95_upper": 19.1939342196, + "raw_n_min_group": 38, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "background_and_culture", + "demographic_factor": "Politics", + "score_range": 1.2, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 79.6, + "max_score": 80.8, + "se_min": 3.5, + "se_max": 3.4, + "effect_size": 0.3469160078, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.459050528, + "level_score_std_dev": 0.5099019514, + "se_difference": 4.8795491595, + "z_score": 0.2459243592, + "p_value": 0.9570722129, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.3637406134, + "gap_confidence_interval_95_upper": 10.7637406134, + "raw_n_min_group": 164, + "raw_n_max_group": 177, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "background_and_culture", + "demographic_factor": "Sex", + "score_range": 0.2, + "min_level": "Female", + "max_level": "Male", + "min_score": 79.8, + "max_score": 80.0, + "se_min": 3.4, + "se_max": 3.5, + "effect_size": 0.0578193346, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.459050528, + "level_score_std_dev": 0.1, + "se_difference": 4.8795491595, + "z_score": 0.0409873932, + "p_value": 0.9872735163, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.3637406134, + "gap_confidence_interval_95_upper": 9.7637406134, + "raw_n_min_group": 251, + "raw_n_max_group": 246, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "background_and_culture", + "demographic_factor": "Urbanicity", + "score_range": 2.4, + "min_level": "Rural", + "max_level": "Suburban", + "min_score": 78.9, + "max_score": 81.3, + "se_min": 3.6, + "se_max": 3.3, + "effect_size": 0.6938320156, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.459050528, + "level_score_std_dev": 1.1085526099, + "se_difference": 4.8836461788, + "z_score": 0.4914360935, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.1717706237, + "gap_confidence_interval_95_upper": 11.9717706237, + "raw_n_min_group": 86, + "raw_n_max_group": 244, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "bias_and_stereotypes", + "demographic_factor": "Age", + "score_range": 5.9, + "min_level": "18-24", + "max_level": "55-64", + "min_score": 83.1, + "max_score": 89.0, + "se_min": 5.0, + "se_max": 3.3, + "effect_size": 2.1191466089, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.7841396038, + "level_score_std_dev": 2.0981473309, + "se_difference": 5.9908263203, + "z_score": 0.9848390997, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.8418038254, + "gap_confidence_interval_95_upper": 17.6418038254, + "raw_n_min_group": 60, + "raw_n_max_group": 111, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "bias_and_stereotypes", + "demographic_factor": "Education", + "score_range": 4.3, + "min_level": "College", + "max_level": "No College", + "min_score": 84.7, + "max_score": 89.0, + "se_min": 3.4, + "se_max": 4.0, + "effect_size": 1.5444627827, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.7841396038, + "level_score_std_dev": 2.15, + "se_difference": 5.2497618994, + "z_score": 0.8190847666, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.9893442502, + "gap_confidence_interval_95_upper": 14.5893442502, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "bias_and_stereotypes", + "demographic_factor": "Ethnicity", + "score_range": 4.7, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 84.5, + "max_score": 89.2, + "se_min": 5.2, + "se_max": 3.9, + "effect_size": 1.6881337393, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.7841396038, + "level_score_std_dev": 1.7073371079, + "se_difference": 6.5, + "z_score": 0.7230769231, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.0397658995, + "gap_confidence_interval_95_upper": 17.4397658995, + "raw_n_min_group": 34, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "bias_and_stereotypes", + "demographic_factor": "Politics", + "score_range": 2.0, + "min_level": "Republican", + "max_level": "Democrat", + "min_score": 86.4, + "max_score": 88.4, + "se_min": 3.9, + "se_max": 3.7, + "effect_size": 0.7183547827, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 2.7841396038, + "level_score_std_dev": 0.8806563209, + "se_difference": 5.3758720223, + "z_score": 0.3720326659, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.5365155492, + "gap_confidence_interval_95_upper": 12.5365155492, + "raw_n_min_group": 160, + "raw_n_max_group": 164, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "bias_and_stereotypes", + "demographic_factor": "Sex", + "score_range": 0.3, + "min_level": "Male", + "max_level": "Female", + "min_score": 87.4, + "max_score": 87.7, + "se_min": 3.9, + "se_max": 3.8, + "effect_size": 0.1077532174, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 2.7841396038, + "level_score_std_dev": 0.15, + "se_difference": 5.445181356, + "z_score": 0.0550945837, + "p_value": 0.9869756083, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.3723593471, + "gap_confidence_interval_95_upper": 10.9723593471, + "raw_n_min_group": 246, + "raw_n_max_group": 251, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "bias_and_stereotypes", + "demographic_factor": "Urbanicity", + "score_range": 3.8, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 85.1, + "max_score": 88.9, + "se_min": 4.3, + "se_max": 3.8, + "effect_size": 1.3648740871, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.7841396038, + "level_score_std_dev": 1.7048949137, + "se_difference": 5.7384666942, + "z_score": 0.6621977965, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.447188047, + "gap_confidence_interval_95_upper": 15.047188047, + "raw_n_min_group": 171, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "clarity", + "demographic_factor": "Age", + "score_range": 5.7, + "min_level": "35-44", + "max_level": "65+", + "min_score": 82.7, + "max_score": 88.4, + "se_min": 3.9, + "se_max": 3.5, + "effect_size": 1.2624444732, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.5150500643, + "level_score_std_dev": 2.0456593612, + "se_difference": 5.2402290026, + "z_score": 1.0877387223, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.5706601159, + "gap_confidence_interval_95_upper": 15.9706601159, + "raw_n_min_group": 91, + "raw_n_max_group": 61, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "clarity", + "demographic_factor": "Education", + "score_range": 5.2, + "min_level": "College", + "max_level": "No College", + "min_score": 82.1, + "max_score": 87.3, + "se_min": 3.4, + "se_max": 3.9, + "effect_size": 1.15170373, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.5150500643, + "level_score_std_dev": 2.6, + "se_difference": 5.1739733281, + "z_score": 1.0050303065, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.9408013801, + "gap_confidence_interval_95_upper": 15.3408013801, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "clarity", + "demographic_factor": "Ethnicity", + "score_range": 5.5, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 84.2, + "max_score": 89.7, + "se_min": 4.9, + "se_max": 3.3, + "effect_size": 1.2181481759, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.5150500643, + "level_score_std_dev": 2.2237355958, + "se_difference": 5.9076221951, + "z_score": 0.9310006325, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.0787267367, + "gap_confidence_interval_95_upper": 17.0787267367, + "raw_n_min_group": 34, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "clarity", + "demographic_factor": "Politics", + "score_range": 1.6, + "min_level": "Independent", + "max_level": "Democrat", + "min_score": 84.2, + "max_score": 85.8, + "se_min": 3.9, + "se_max": 3.7, + "effect_size": 0.3543703784, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 4.5150500643, + "level_score_std_dev": 0.7542472333, + "se_difference": 5.3758720223, + "z_score": 0.2976261327, + "p_value": 0.9469805126, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.9365155492, + "gap_confidence_interval_95_upper": 12.1365155492, + "raw_n_min_group": 177, + "raw_n_max_group": 164, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "clarity", + "demographic_factor": "Sex", + "score_range": 0.7, + "min_level": "Female", + "max_level": "Male", + "min_score": 85.2, + "max_score": 85.9, + "se_min": 3.7, + "se_max": 3.7, + "effect_size": 0.1550370406, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 4.5150500643, + "level_score_std_dev": 0.35, + "se_difference": 5.2325901808, + "z_score": 0.1337769586, + "p_value": 0.9758695334, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.5556883002, + "gap_confidence_interval_95_upper": 10.9556883002, + "raw_n_min_group": 251, + "raw_n_max_group": 246, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "clarity", + "demographic_factor": "Urbanicity", + "score_range": 1.6, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 84.9, + "max_score": 86.5, + "se_min": 3.6, + "se_max": 3.7, + "effect_size": 0.3543703784, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 4.5150500643, + "level_score_std_dev": 0.6683312552, + "se_difference": 5.1623637997, + "z_score": 0.3099355377, + "p_value": 0.9469805126, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.5180471224, + "gap_confidence_interval_95_upper": 11.7180471224, + "raw_n_min_group": 244, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "communication", + "demographic_factor": "Age", + "score_range": 3.7, + "min_level": "18-24", + "max_level": "55-64", + "min_score": 86.1, + "max_score": 89.8, + "se_min": 3.6, + "se_max": 2.5, + "effect_size": 0.9153352018, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.0422350116, + "level_score_std_dev": 1.1460075625, + "se_difference": 4.3829214002, + "z_score": 0.8441857981, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.8903680914, + "gap_confidence_interval_95_upper": 12.2903680914, + "raw_n_min_group": 60, + "raw_n_max_group": 111, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "communication", + "demographic_factor": "Education", + "score_range": 5.6, + "min_level": "College", + "max_level": "No College", + "min_score": 85.2, + "max_score": 90.8, + "se_min": 2.7, + "se_max": 3.0, + "effect_size": 1.3853721973, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.0422350116, + "level_score_std_dev": 2.8, + "se_difference": 4.0360872141, + "z_score": 1.3874824063, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.3105855781, + "gap_confidence_interval_95_upper": 13.5105855781, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "communication", + "demographic_factor": "Ethnicity", + "score_range": 1.7, + "min_level": "Hispanic", + "max_level": "White", + "min_score": 87.5, + "max_score": 89.2, + "se_min": 3.7, + "se_max": 2.7, + "effect_size": 0.420559417, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 4.0422350116, + "level_score_std_dev": 0.66473679, + "se_difference": 4.5803929962, + "z_score": 0.3711471923, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.2774053077, + "gap_confidence_interval_95_upper": 10.6774053077, + "raw_n_min_group": 34, + "raw_n_max_group": 336, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "communication", + "demographic_factor": "Politics", + "score_range": 3.9, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 87.5, + "max_score": 91.4, + "se_min": 3.2, + "se_max": 2.5, + "effect_size": 0.9648127803, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.0422350116, + "level_score_std_dev": 1.5923427883, + "se_difference": 4.0607881008, + "z_score": 0.9604047055, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.0589984265, + "gap_confidence_interval_95_upper": 11.8589984265, + "raw_n_min_group": 164, + "raw_n_max_group": 177, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "communication", + "demographic_factor": "Sex", + "score_range": 1.5, + "min_level": "Female", + "max_level": "Male", + "min_score": 88.2, + "max_score": 89.7, + "se_min": 3.0, + "se_max": 2.8, + "effect_size": 0.3710818386, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 4.0422350116, + "level_score_std_dev": 0.75, + "se_difference": 4.1036569057, + "z_score": 0.3655276341, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.5430197402, + "gap_confidence_interval_95_upper": 9.5430197402, + "raw_n_min_group": 251, + "raw_n_max_group": 246, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "communication", + "demographic_factor": "Urbanicity", + "score_range": 4.6, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 86.2, + "max_score": 90.8, + "se_min": 3.3, + "se_max": 2.8, + "effect_size": 1.1379843049, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.0422350116, + "level_score_std_dev": 1.9362047642, + "se_difference": 4.3278170017, + "z_score": 1.062891522, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.882365455, + "gap_confidence_interval_95_upper": 13.082365455, + "raw_n_min_group": 171, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "comprehensiveness", + "demographic_factor": "Age", + "score_range": 5.4, + "min_level": "25-34", + "max_level": "65+", + "min_score": 84.9, + "max_score": 90.3, + "se_min": 4.5, + "se_max": 3.6, + "effect_size": 1.5074588168, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.5821874137, + "level_score_std_dev": 1.7123732719, + "se_difference": 5.7628118137, + "z_score": 0.9370425713, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.8949036045, + "gap_confidence_interval_95_upper": 16.6949036045, + "raw_n_min_group": 105, + "raw_n_max_group": 61, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "comprehensiveness", + "demographic_factor": "Education", + "score_range": 2.4, + "min_level": "College", + "max_level": "No College", + "min_score": 86.8, + "max_score": 89.2, + "se_min": 3.3, + "se_max": 4.0, + "effect_size": 0.6699816963, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.5821874137, + "level_score_std_dev": 1.2, + "se_difference": 5.185556865, + "z_score": 0.4628239671, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.7635046952, + "gap_confidence_interval_95_upper": 12.5635046952, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "comprehensiveness", + "demographic_factor": "Ethnicity", + "score_range": 1.1, + "min_level": "White", + "max_level": "African American", + "min_score": 88.1, + "max_score": 89.2, + "se_min": 3.6, + "se_max": 4.1, + "effect_size": 0.3070749442, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.5821874137, + "level_score_std_dev": 0.4387482194, + "se_difference": 5.4561891463, + "z_score": 0.201605914, + "p_value": 0.9597457904, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.5939342196, + "gap_confidence_interval_95_upper": 11.7939342196, + "raw_n_min_group": 336, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "comprehensiveness", + "demographic_factor": "Politics", + "score_range": 3.0, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 87.8, + "max_score": 90.8, + "se_min": 3.9, + "se_max": 3.2, + "effect_size": 0.8374771204, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.5821874137, + "level_score_std_dev": 1.3912424503, + "se_difference": 5.0447993023, + "z_score": 0.5946718234, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.8876249416, + "gap_confidence_interval_95_upper": 12.8876249416, + "raw_n_min_group": 160, + "raw_n_max_group": 177, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "comprehensiveness", + "demographic_factor": "Sex", + "score_range": 0.4, + "min_level": "Female", + "max_level": "Male", + "min_score": 88.2, + "max_score": 88.6, + "se_min": 3.8, + "se_max": 3.8, + "effect_size": 0.1116636161, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.5821874137, + "level_score_std_dev": 0.2, + "se_difference": 5.374011537, + "z_score": 0.0744322928, + "p_value": 0.9818838748, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.1328690651, + "gap_confidence_interval_95_upper": 10.9328690651, + "raw_n_min_group": 251, + "raw_n_max_group": 246, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "comprehensiveness", + "demographic_factor": "Urbanicity", + "score_range": 5.0, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 86.9, + "max_score": 91.9, + "se_min": 3.9, + "se_max": 3.3, + "effect_size": 1.3957952007, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.5821874137, + "level_score_std_dev": 2.357022604, + "se_difference": 5.1088159098, + "z_score": 0.9787003659, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.0130951868, + "gap_confidence_interval_95_upper": 15.0130951868, + "raw_n_min_group": 244, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "confidence", + "demographic_factor": "Age", + "score_range": 4.8, + "min_level": "55-64", + "max_level": "65+", + "min_score": 88.8, + "max_score": 93.6, + "se_min": 0.0, + "se_max": 0.0, + "effect_size": 1.4597694076, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.2881905696, + "level_score_std_dev": 1.4395215254, + "se_difference": 0.0, + "z_score": 9999.9, + "p_value": 0.0, + "is_statistically_significant": true, + "gap_confidence_interval_95_lower": 4.8, + "gap_confidence_interval_95_upper": 4.8, + "raw_n_min_group": 111, + "raw_n_max_group": 61, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": true + }, + { + "model": "gemini-2.0-flash-001", + "category": "confidence", + "demographic_factor": "Education", + "score_range": 6.1, + "min_level": "College", + "max_level": "No College", + "min_score": 87.5, + "max_score": 93.6, + "se_min": 0.0, + "se_max": 0.0, + "effect_size": 1.8551236222, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.2881905696, + "level_score_std_dev": 3.05, + "se_difference": 0.0, + "z_score": 9999.9, + "p_value": 0.0, + "is_statistically_significant": true, + "gap_confidence_interval_95_lower": 6.1, + "gap_confidence_interval_95_upper": 6.1, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": true + }, + { + "model": "gemini-2.0-flash-001", + "category": "confidence", + "demographic_factor": "Ethnicity", + "score_range": 2.7, + "min_level": "African American", + "max_level": "Asian", + "min_score": 89.7, + "max_score": 92.4, + "se_min": 0.0, + "se_max": 0.0, + "effect_size": 0.8211202918, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.2881905696, + "level_score_std_dev": 1.0606601718, + "se_difference": 0.0, + "z_score": 9999.9, + "p_value": 0.0, + "is_statistically_significant": true, + "gap_confidence_interval_95_lower": 2.7, + "gap_confidence_interval_95_upper": 2.7, + "raw_n_min_group": 0, + "raw_n_max_group": 38, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": true + }, + { + "model": "gemini-2.0-flash-001", + "category": "confidence", + "demographic_factor": "Politics", + "score_range": 1.9, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 90.6, + "max_score": 92.5, + "se_min": 0.0, + "se_max": 0.0, + "effect_size": 0.5778253905, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.2881905696, + "level_score_std_dev": 0.8041558721, + "se_difference": 0.0, + "z_score": 9999.9, + "p_value": 0.0, + "is_statistically_significant": true, + "gap_confidence_interval_95_lower": 1.9, + "gap_confidence_interval_95_upper": 1.9, + "raw_n_min_group": 164, + "raw_n_max_group": 177, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "confidence", + "demographic_factor": "Sex", + "score_range": 0.8, + "min_level": "Male", + "max_level": "Female", + "min_score": 91.1, + "max_score": 91.9, + "se_min": 0.0, + "se_max": 0.0, + "effect_size": 0.2432949013, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.2881905696, + "level_score_std_dev": 0.4, + "se_difference": 0.0, + "z_score": 9999.9, + "p_value": 0.0, + "is_statistically_significant": true, + "gap_confidence_interval_95_lower": 0.8, + "gap_confidence_interval_95_upper": 0.8, + "raw_n_min_group": 246, + "raw_n_max_group": 251, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "confidence", + "demographic_factor": "Urbanicity", + "score_range": 3.6, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 89.1, + "max_score": 92.7, + "se_min": 0.0, + "se_max": 0.0, + "effect_size": 1.0948270557, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.2881905696, + "level_score_std_dev": 1.630950643, + "se_difference": 0.0, + "z_score": 9999.9, + "p_value": 0.0, + "is_statistically_significant": true, + "gap_confidence_interval_95_lower": 3.6, + "gap_confidence_interval_95_upper": 3.6, + "raw_n_min_group": 171, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": true + }, + { + "model": "gemini-2.0-flash-001", + "category": "consistency", + "demographic_factor": "Age", + "score_range": 2.8, + "min_level": "45-54", + "max_level": "65+", + "min_score": 90.5, + "max_score": 93.3, + "se_min": 2.8, + "se_max": 2.5, + "effect_size": 0.9405280236, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9770511137, + "level_score_std_dev": 0.9, + "se_difference": 3.7536648758, + "z_score": 0.7459376616, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.5570479666, + "gap_confidence_interval_95_upper": 10.1570479666, + "raw_n_min_group": 73, + "raw_n_max_group": 61, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "consistency", + "demographic_factor": "Education", + "score_range": 4.9, + "min_level": "College", + "max_level": "No College", + "min_score": 89.3, + "max_score": 94.2, + "se_min": 2.6, + "se_max": 2.5, + "effect_size": 1.6459240412, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9770511137, + "level_score_std_dev": 2.45, + "se_difference": 3.6069377594, + "z_score": 1.3584930839, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.1694681029, + "gap_confidence_interval_95_upper": 11.9694681029, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "consistency", + "demographic_factor": "Ethnicity", + "score_range": 2.9, + "min_level": "Asian", + "max_level": "Hispanic", + "min_score": 91.8, + "max_score": 94.7, + "se_min": 3.1, + "se_max": 2.6, + "effect_size": 0.9741183101, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9770511137, + "level_score_std_dev": 1.1979148551, + "se_difference": 4.0459856648, + "z_score": 0.7167598307, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.029986185, + "gap_confidence_interval_95_upper": 10.829986185, + "raw_n_min_group": 38, + "raw_n_max_group": 34, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "consistency", + "demographic_factor": "Politics", + "score_range": 1.9, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 92.2, + "max_score": 94.1, + "se_min": 2.6, + "se_max": 2.2, + "effect_size": 0.6382154446, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 2.9770511137, + "level_score_std_dev": 0.8956685895, + "se_difference": 3.4058772732, + "z_score": 0.5578592085, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.7753967912, + "gap_confidence_interval_95_upper": 8.5753967912, + "raw_n_min_group": 164, + "raw_n_max_group": 177, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "consistency", + "demographic_factor": "Sex", + "score_range": 0.4, + "min_level": "Female", + "max_level": "Male", + "min_score": 92.3, + "max_score": 92.7, + "se_min": 2.5, + "se_max": 2.5, + "effect_size": 0.1343611462, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 2.9770511137, + "level_score_std_dev": 0.2, + "se_difference": 3.5355339059, + "z_score": 0.113137085, + "p_value": 0.977356957, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.5295191217, + "gap_confidence_interval_95_upper": 7.3295191217, + "raw_n_min_group": 251, + "raw_n_max_group": 246, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "consistency", + "demographic_factor": "Urbanicity", + "score_range": 2.9, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 91.3, + "max_score": 94.2, + "se_min": 2.8, + "se_max": 2.3, + "effect_size": 0.9741183101, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9770511137, + "level_score_std_dev": 1.2119772642, + "se_difference": 3.6235341864, + "z_score": 0.8003236208, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.2019965021, + "gap_confidence_interval_95_upper": 10.0019965021, + "raw_n_min_group": 171, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "context_memory", + "demographic_factor": "Age", + "score_range": 3.8, + "min_level": "18-24", + "max_level": "65+", + "min_score": 90.9, + "max_score": 94.7, + "se_min": 3.3, + "se_max": 2.2, + "effect_size": 1.1213297412, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3888336858, + "level_score_std_dev": 1.3009611831, + "se_difference": 3.966106403, + "z_score": 0.9581185208, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.9734257088, + "gap_confidence_interval_95_upper": 11.5734257088, + "raw_n_min_group": 60, + "raw_n_max_group": 61, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "context_memory", + "demographic_factor": "Education", + "score_range": 4.2, + "min_level": "College", + "max_level": "No College", + "min_score": 90.8, + "max_score": 95.0, + "se_min": 2.4, + "se_max": 2.3, + "effect_size": 1.2393644508, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3888336858, + "level_score_std_dev": 2.1, + "se_difference": 3.3241540277, + "z_score": 1.263479359, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.3152221734, + "gap_confidence_interval_95_upper": 10.7152221734, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "context_memory", + "demographic_factor": "Ethnicity", + "score_range": 0.6, + "min_level": "Asian", + "max_level": "Hispanic", + "min_score": 93.1, + "max_score": 93.7, + "se_min": 2.8, + "se_max": 2.9, + "effect_size": 0.1770520644, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.3888336858, + "level_score_std_dev": 0.2549509757, + "se_difference": 4.0311288741, + "z_score": 0.1488416815, + "p_value": 0.9660130308, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.3008674104, + "gap_confidence_interval_95_upper": 8.5008674104, + "raw_n_min_group": 38, + "raw_n_max_group": 34, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "context_memory", + "demographic_factor": "Politics", + "score_range": 0.8, + "min_level": "Republican", + "max_level": "Democrat", + "min_score": 93.2, + "max_score": 94.0, + "se_min": 2.4, + "se_max": 2.3, + "effect_size": 0.2360694192, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.3888336858, + "level_score_std_dev": 0.3399346342, + "se_difference": 3.3241540277, + "z_score": 0.240662735, + "p_value": 0.9570722129, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.7152221734, + "gap_confidence_interval_95_upper": 7.3152221734, + "raw_n_min_group": 160, + "raw_n_max_group": 164, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "context_memory", + "demographic_factor": "Sex", + "score_range": 0.0, + "min_level": "Female", + "max_level": "Female", + "min_score": 93.6, + "max_score": 93.6, + "se_min": 2.3, + "se_max": 2.3, + "effect_size": 0.0, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.3888336858, + "level_score_std_dev": 0.0, + "se_difference": 3.2526911935, + "z_score": 0.0, + "p_value": 1.0, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.375157592, + "gap_confidence_interval_95_upper": 6.375157592, + "raw_n_min_group": 251, + "raw_n_max_group": 251, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "context_memory", + "demographic_factor": "Urbanicity", + "score_range": 2.1, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 92.3, + "max_score": 94.4, + "se_min": 2.6, + "se_max": 2.3, + "effect_size": 0.6196822254, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.3888336858, + "level_score_std_dev": 0.8956685895, + "se_difference": 3.4713109915, + "z_score": 0.604958762, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.7036445226, + "gap_confidence_interval_95_upper": 8.9036445226, + "raw_n_min_group": 171, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "conversation_building", + "demographic_factor": "Age", + "score_range": 6.8, + "min_level": "45-54", + "max_level": "65+", + "min_score": 81.1, + "max_score": 87.9, + "se_min": 4.7, + "se_max": 4.0, + "effect_size": 1.5719410124, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3258620689, + "level_score_std_dev": 2.4664414312, + "se_difference": 6.1717096497, + "z_score": 1.1018016702, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.2963286364, + "gap_confidence_interval_95_upper": 18.8963286364, + "raw_n_min_group": 73, + "raw_n_max_group": 61, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "conversation_building", + "demographic_factor": "Education", + "score_range": 3.1, + "min_level": "College", + "max_level": "No College", + "min_score": 83.0, + "max_score": 86.1, + "se_min": 3.7, + "se_max": 4.5, + "effect_size": 0.7166201674, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 4.3258620689, + "level_score_std_dev": 1.55, + "se_difference": 5.8258046655, + "z_score": 0.5321153348, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.3183673253, + "gap_confidence_interval_95_upper": 14.5183673253, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "conversation_building", + "demographic_factor": "Ethnicity", + "score_range": 3.4, + "min_level": "White", + "max_level": "African American", + "min_score": 84.5, + "max_score": 87.9, + "se_min": 4.1, + "se_max": 4.1, + "effect_size": 0.7859705062, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 4.3258620689, + "level_score_std_dev": 1.2247448714, + "se_difference": 5.7982756057, + "z_score": 0.5863812332, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.9644113597, + "gap_confidence_interval_95_upper": 14.7644113597, + "raw_n_min_group": 336, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "conversation_building", + "demographic_factor": "Politics", + "score_range": 2.4, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 83.9, + "max_score": 86.3, + "se_min": 4.3, + "se_max": 4.1, + "effect_size": 0.5548027103, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 4.3258620689, + "level_score_std_dev": 0.9977753031, + "se_difference": 5.941380311, + "z_score": 0.4039465367, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.244891428, + "gap_confidence_interval_95_upper": 14.044891428, + "raw_n_min_group": 160, + "raw_n_max_group": 177, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "conversation_building", + "demographic_factor": "Sex", + "score_range": 0.1, + "min_level": "Female", + "max_level": "Male", + "min_score": 85.0, + "max_score": 85.1, + "se_min": 4.2, + "se_max": 4.3, + "effect_size": 0.0231167796, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 4.3258620689, + "level_score_std_dev": 0.05, + "se_difference": 6.0108235709, + "z_score": 0.0166366553, + "p_value": 0.9926350237, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -11.6809977163, + "gap_confidence_interval_95_upper": 11.8809977163, + "raw_n_min_group": 251, + "raw_n_max_group": 246, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "conversation_building", + "demographic_factor": "Urbanicity", + "score_range": 3.9, + "min_level": "Urban", + "max_level": "Suburban", + "min_score": 82.7, + "max_score": 86.6, + "se_min": 4.6, + "se_max": 3.9, + "effect_size": 0.9015544042, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3258620689, + "level_score_std_dev": 1.630950643, + "se_difference": 6.0307545133, + "z_score": 0.6466852516, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.9200616457, + "gap_confidence_interval_95_upper": 15.7200616457, + "raw_n_min_group": 171, + "raw_n_max_group": 244, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "conversation_flow", + "demographic_factor": "Age", + "score_range": 4.6, + "min_level": "35-44", + "max_level": "55-64", + "min_score": 81.8, + "max_score": 86.4, + "se_min": 4.1, + "se_max": 3.4, + "effect_size": 0.9255543738, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.969994341, + "level_score_std_dev": 1.8526257642, + "se_difference": 5.3263495942, + "z_score": 0.8636308824, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.8394533738, + "gap_confidence_interval_95_upper": 15.0394533738, + "raw_n_min_group": 91, + "raw_n_max_group": 111, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "conversation_flow", + "demographic_factor": "Education", + "score_range": 4.8, + "min_level": "College", + "max_level": "No College", + "min_score": 81.5, + "max_score": 86.3, + "se_min": 3.6, + "se_max": 4.2, + "effect_size": 0.9657958683, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.969994341, + "level_score_std_dev": 2.4, + "se_difference": 5.5317266744, + "z_score": 0.8677218313, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.0419850541, + "gap_confidence_interval_95_upper": 15.6419850541, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "conversation_flow", + "demographic_factor": "Ethnicity", + "score_range": 7.9, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 80.4, + "max_score": 88.3, + "se_min": 5.5, + "se_max": 3.8, + "effect_size": 1.5895390332, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.969994341, + "level_score_std_dev": 2.8744564704, + "se_difference": 6.6850579653, + "z_score": 1.1817399402, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.2024728466, + "gap_confidence_interval_95_upper": 21.0024728466, + "raw_n_min_group": 34, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "conversation_flow", + "demographic_factor": "Politics", + "score_range": 4.9, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 83.0, + "max_score": 87.9, + "se_min": 4.3, + "se_max": 3.5, + "effect_size": 0.9859166155, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.969994341, + "level_score_std_dev": 2.007209229, + "se_difference": 5.5443665103, + "z_score": 0.8837799577, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.9667586772, + "gap_confidence_interval_95_upper": 15.7667586772, + "raw_n_min_group": 164, + "raw_n_max_group": 177, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "conversation_flow", + "demographic_factor": "Sex", + "score_range": 1.6, + "min_level": "Female", + "max_level": "Male", + "min_score": 83.9, + "max_score": 85.5, + "se_min": 4.0, + "se_max": 3.9, + "effect_size": 0.3219319561, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 4.969994341, + "level_score_std_dev": 0.8, + "se_difference": 5.5865910894, + "z_score": 0.2864000558, + "p_value": 0.9486856692, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.3495173315, + "gap_confidence_interval_95_upper": 12.5495173315, + "raw_n_min_group": 251, + "raw_n_max_group": 246, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "conversation_flow", + "demographic_factor": "Urbanicity", + "score_range": 3.1, + "min_level": "Urban", + "max_level": "Suburban", + "min_score": 83.1, + "max_score": 86.2, + "se_min": 4.2, + "se_max": 3.6, + "effect_size": 0.6237431649, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 4.969994341, + "level_score_std_dev": 1.291854825, + "se_difference": 5.5317266744, + "z_score": 0.5604036827, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.7419850541, + "gap_confidence_interval_95_upper": 13.9419850541, + "raw_n_min_group": 171, + "raw_n_max_group": 244, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "cultural_awareness", + "demographic_factor": "Age", + "score_range": 5.0, + "min_level": "18-24", + "max_level": "65+", + "min_score": 72.2, + "max_score": 77.2, + "se_min": 5.1, + "se_max": 4.8, + "effect_size": 1.3463141793, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7138433783, + "level_score_std_dev": 1.8016196417, + "se_difference": 7.003570518, + "z_score": 0.7139215615, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.7267459784, + "gap_confidence_interval_95_upper": 18.7267459784, + "raw_n_min_group": 60, + "raw_n_max_group": 61, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "cultural_awareness", + "demographic_factor": "Education", + "score_range": 1.4, + "min_level": "College", + "max_level": "No College", + "min_score": 74.4, + "max_score": 75.8, + "se_min": 3.7, + "se_max": 5.1, + "effect_size": 0.3769679702, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.7138433783, + "level_score_std_dev": 0.7, + "se_difference": 6.3007936008, + "z_score": 0.2221942328, + "p_value": 0.9570722129, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.9493285316, + "gap_confidence_interval_95_upper": 13.7493285316, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "cultural_awareness", + "demographic_factor": "Ethnicity", + "score_range": 9.9, + "min_level": "Asian", + "max_level": "African American", + "min_score": 70.6, + "max_score": 80.5, + "se_min": 5.5, + "se_max": 4.8, + "effect_size": 2.665702075, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7138433783, + "level_score_std_dev": 3.7262581768, + "se_difference": 7.3, + "z_score": 1.3561643836, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.4077370871, + "gap_confidence_interval_95_upper": 24.2077370871, + "raw_n_min_group": 38, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "cultural_awareness", + "demographic_factor": "Politics", + "score_range": 4.0, + "min_level": "Independent", + "max_level": "Republican", + "min_score": 73.6, + "max_score": 77.6, + "se_min": 4.7, + "se_max": 4.5, + "effect_size": 1.0770513434, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7138433783, + "level_score_std_dev": 1.7987650084, + "se_difference": 6.506919394, + "z_score": 0.6147302214, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.7533276625, + "gap_confidence_interval_95_upper": 16.7533276625, + "raw_n_min_group": 177, + "raw_n_max_group": 160, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "cultural_awareness", + "demographic_factor": "Sex", + "score_range": 2.1, + "min_level": "Female", + "max_level": "Male", + "min_score": 74.3, + "max_score": 76.4, + "se_min": 4.7, + "se_max": 4.7, + "effect_size": 0.5654519553, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.7138433783, + "level_score_std_dev": 1.05, + "se_difference": 6.6468037432, + "z_score": 0.3159413278, + "p_value": 0.9469805126, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.9274959489, + "gap_confidence_interval_95_upper": 15.1274959489, + "raw_n_min_group": 251, + "raw_n_max_group": 246, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "cultural_awareness", + "demographic_factor": "Urbanicity", + "score_range": 3.7, + "min_level": "Urban", + "max_level": "Suburban", + "min_score": 73.7, + "max_score": 77.4, + "se_min": 4.8, + "se_max": 4.4, + "effect_size": 0.9962724927, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7138433783, + "level_score_std_dev": 1.6579773487, + "se_difference": 6.5115282384, + "z_score": 0.5682229831, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.0623608317, + "gap_confidence_interval_95_upper": 16.4623608317, + "raw_n_min_group": 171, + "raw_n_max_group": 244, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "detail_and_technical_language", + "demographic_factor": "Age", + "score_range": 5.5, + "min_level": "45-54", + "max_level": "65+", + "min_score": 88.1, + "max_score": 93.6, + "se_min": 3.4, + "se_max": 2.6, + "effect_size": 1.757649602, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1291788726, + "level_score_std_dev": 1.8068542338, + "se_difference": 4.2801869118, + "z_score": 1.2849906122, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.8890121942, + "gap_confidence_interval_95_upper": 13.8890121942, + "raw_n_min_group": 73, + "raw_n_max_group": 61, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "detail_and_technical_language", + "demographic_factor": "Education", + "score_range": 5.5, + "min_level": "College", + "max_level": "No College", + "min_score": 87.5, + "max_score": 93.0, + "se_min": 3.0, + "se_max": 2.9, + "effect_size": 1.757649602, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1291788726, + "level_score_std_dev": 2.75, + "se_difference": 4.172529209, + "z_score": 1.3181453561, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.6780069741, + "gap_confidence_interval_95_upper": 13.6780069741, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "detail_and_technical_language", + "demographic_factor": "Ethnicity", + "score_range": 1.2, + "min_level": "Asian", + "max_level": "African American", + "min_score": 90.4, + "max_score": 91.6, + "se_min": 3.5, + "se_max": 3.1, + "effect_size": 0.3834871859, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.1291788726, + "level_score_std_dev": 0.4769696007, + "se_difference": 4.675467891, + "z_score": 0.2566588046, + "p_value": 0.9570722129, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.9637486773, + "gap_confidence_interval_95_upper": 10.3637486773, + "raw_n_min_group": 38, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "detail_and_technical_language", + "demographic_factor": "Politics", + "score_range": 2.1, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 90.6, + "max_score": 92.7, + "se_min": 3.1, + "se_max": 2.6, + "effect_size": 0.6711025753, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.1291788726, + "level_score_std_dev": 0.9104333522, + "se_difference": 4.0459856648, + "z_score": 0.5190329808, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.829986185, + "gap_confidence_interval_95_upper": 10.029986185, + "raw_n_min_group": 164, + "raw_n_max_group": 177, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "detail_and_technical_language", + "demographic_factor": "Sex", + "score_range": 0.7, + "min_level": "Female", + "max_level": "Male", + "min_score": 90.8, + "max_score": 91.5, + "se_min": 3.0, + "se_max": 2.9, + "effect_size": 0.2237008584, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.1291788726, + "level_score_std_dev": 0.35, + "se_difference": 4.172529209, + "z_score": 0.1677639544, + "p_value": 0.9610477633, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.4780069741, + "gap_confidence_interval_95_upper": 8.8780069741, + "raw_n_min_group": 251, + "raw_n_max_group": 246, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "detail_and_technical_language", + "demographic_factor": "Urbanicity", + "score_range": 3.9, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 89.2, + "max_score": 93.1, + "se_min": 3.3, + "se_max": 2.7, + "effect_size": 1.2463333541, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1291788726, + "level_score_std_dev": 1.5923427883, + "se_difference": 4.2638011211, + "z_score": 0.9146768081, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.4568966345, + "gap_confidence_interval_95_upper": 12.2568966345, + "raw_n_min_group": 171, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "distinct_personality", + "demographic_factor": "Age", + "score_range": 4.6, + "min_level": "35-44", + "max_level": "65+", + "min_score": 74.3, + "max_score": 78.9, + "se_min": 4.5, + "se_max": 4.7, + "effect_size": 1.0639904323, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3233471472, + "level_score_std_dev": 1.7092070936, + "se_difference": 6.506919394, + "z_score": 0.7069397547, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.1533276625, + "gap_confidence_interval_95_upper": 17.3533276625, + "raw_n_min_group": 91, + "raw_n_max_group": 61, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "distinct_personality", + "demographic_factor": "Education", + "score_range": 1.4, + "min_level": "College", + "max_level": "No College", + "min_score": 75.9, + "max_score": 77.3, + "se_min": 3.8, + "se_max": 5.0, + "effect_size": 0.323823175, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 4.3233471472, + "level_score_std_dev": 0.7, + "se_difference": 6.2801273872, + "z_score": 0.2229254144, + "p_value": 0.9570722129, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.9088234973, + "gap_confidence_interval_95_upper": 13.7088234973, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "distinct_personality", + "demographic_factor": "Ethnicity", + "score_range": 6.7, + "min_level": "White", + "max_level": "African American", + "min_score": 75.6, + "max_score": 82.3, + "se_min": 4.4, + "se_max": 4.7, + "effect_size": 1.5497251948, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3233471472, + "level_score_std_dev": 2.50049995, + "se_difference": 6.4381674411, + "z_score": 1.0406688023, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.918576311, + "gap_confidence_interval_95_upper": 19.318576311, + "raw_n_min_group": 336, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "distinct_personality", + "demographic_factor": "Politics", + "score_range": 5.0, + "min_level": "Independent", + "max_level": "Republican", + "min_score": 74.8, + "max_score": 79.8, + "se_min": 4.7, + "se_max": 4.3, + "effect_size": 1.1565113394, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3233471472, + "level_score_std_dev": 2.3113247764, + "se_difference": 6.3702433235, + "z_score": 0.7848993745, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.4854474867, + "gap_confidence_interval_95_upper": 17.4854474867, + "raw_n_min_group": 177, + "raw_n_max_group": 160, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "distinct_personality", + "demographic_factor": "Sex", + "score_range": 0.2, + "min_level": "Female", + "max_level": "Male", + "min_score": 76.7, + "max_score": 76.9, + "se_min": 4.6, + "se_max": 4.7, + "effect_size": 0.0462604536, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 4.3233471472, + "level_score_std_dev": 0.1, + "se_difference": 6.576473219, + "z_score": 0.0304114369, + "p_value": 0.9914766484, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -12.6896506545, + "gap_confidence_interval_95_upper": 13.0896506545, + "raw_n_min_group": 251, + "raw_n_max_group": 246, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "distinct_personality", + "demographic_factor": "Urbanicity", + "score_range": 5.1, + "min_level": "Rural", + "max_level": "Suburban", + "min_score": 74.3, + "max_score": 79.4, + "se_min": 5.0, + "se_max": 4.2, + "effect_size": 1.1796415662, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3233471472, + "level_score_std_dev": 2.1400934559, + "se_difference": 6.5299310869, + "z_score": 0.7810189621, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.6984297519, + "gap_confidence_interval_95_upper": 17.8984297519, + "raw_n_min_group": 86, + "raw_n_max_group": 244, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "effectiveness", + "demographic_factor": "Age", + "score_range": 4.2, + "min_level": "45-54", + "max_level": "65+", + "min_score": 90.1, + "max_score": 94.3, + "se_min": 2.9, + "se_max": 2.3, + "effect_size": 1.0605733725, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.960122052, + "level_score_std_dev": 1.6007810594, + "se_difference": 3.7013511047, + "z_score": 1.1347207766, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.0545148593, + "gap_confidence_interval_95_upper": 11.4545148593, + "raw_n_min_group": 73, + "raw_n_max_group": 61, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "effectiveness", + "demographic_factor": "Education", + "score_range": 5.1, + "min_level": "College", + "max_level": "No College", + "min_score": 89.0, + "max_score": 94.1, + "se_min": 2.7, + "se_max": 2.5, + "effect_size": 1.2878390951, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.960122052, + "level_score_std_dev": 2.55, + "se_difference": 3.6796738986, + "z_score": 1.3859923843, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.1120283161, + "gap_confidence_interval_95_upper": 12.3120283161, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "effectiveness", + "demographic_factor": "Ethnicity", + "score_range": 2.1, + "min_level": "Asian", + "max_level": "Hispanic", + "min_score": 90.9, + "max_score": 93.0, + "se_min": 3.4, + "se_max": 3.0, + "effect_size": 0.5302866862, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.960122052, + "level_score_std_dev": 0.764852927, + "se_difference": 4.5343136195, + "z_score": 0.463135146, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.7870913888, + "gap_confidence_interval_95_upper": 10.9870913888, + "raw_n_min_group": 38, + "raw_n_max_group": 34, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "effectiveness", + "demographic_factor": "Politics", + "score_range": 1.6, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 91.5, + "max_score": 93.1, + "se_min": 2.8, + "se_max": 2.4, + "effect_size": 0.4040279514, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.960122052, + "level_score_std_dev": 0.7118052168, + "se_difference": 3.6878177829, + "z_score": 0.4338609156, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.6279900361, + "gap_confidence_interval_95_upper": 8.8279900361, + "raw_n_min_group": 164, + "raw_n_max_group": 177, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "effectiveness", + "demographic_factor": "Sex", + "score_range": 1.0, + "min_level": "Female", + "max_level": "Male", + "min_score": 91.9, + "max_score": 92.9, + "se_min": 2.6, + "se_max": 2.4, + "effect_size": 0.2525174696, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.960122052, + "level_score_std_dev": 0.5, + "se_difference": 3.5383612026, + "z_score": 0.2826167095, + "p_value": 0.9499278005, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.9350605214, + "gap_confidence_interval_95_upper": 7.9350605214, + "raw_n_min_group": 251, + "raw_n_max_group": 246, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "effectiveness", + "demographic_factor": "Urbanicity", + "score_range": 4.8, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 89.7, + "max_score": 94.5, + "se_min": 3.1, + "se_max": 2.2, + "effect_size": 1.2120838542, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.960122052, + "level_score_std_dev": 1.9798989873, + "se_difference": 3.8013155617, + "z_score": 1.2627207402, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.6504415949, + "gap_confidence_interval_95_upper": 12.2504415949, + "raw_n_min_group": 171, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "ethical_alignment", + "demographic_factor": "Age", + "score_range": 6.7, + "min_level": "18-24", + "max_level": "65+", + "min_score": 73.0, + "max_score": 79.7, + "se_min": 5.3, + "se_max": 4.9, + "effect_size": 1.7820893195, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7596319818, + "level_score_std_dev": 2.2395436043, + "se_difference": 7.218032973, + "z_score": 0.9282307278, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.4470846664, + "gap_confidence_interval_95_upper": 20.8470846664, + "raw_n_min_group": 60, + "raw_n_max_group": 61, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "ethical_alignment", + "demographic_factor": "Education", + "score_range": 0.3, + "min_level": "College", + "max_level": "No College", + "min_score": 77.7, + "max_score": 78.0, + "se_min": 3.8, + "se_max": 5.3, + "effect_size": 0.0797950442, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.7596319818, + "level_score_std_dev": 0.15, + "se_difference": 6.5215028943, + "z_score": 0.0460016663, + "p_value": 0.9872735163, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -12.4819107978, + "gap_confidence_interval_95_upper": 13.0819107978, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "ethical_alignment", + "demographic_factor": "Ethnicity", + "score_range": 9.4, + "min_level": "Asian", + "max_level": "African American", + "min_score": 73.6, + "max_score": 83.0, + "se_min": 5.8, + "se_max": 4.8, + "effect_size": 2.5002447169, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7596319818, + "level_score_std_dev": 3.7249161064, + "se_difference": 7.5286120899, + "z_score": 1.2485701067, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.3558085498, + "gap_confidence_interval_95_upper": 24.1558085498, + "raw_n_min_group": 38, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "ethical_alignment", + "demographic_factor": "Politics", + "score_range": 3.2, + "min_level": "Independent", + "max_level": "Republican", + "min_score": 76.4, + "max_score": 79.6, + "se_min": 4.9, + "se_max": 4.6, + "effect_size": 0.8511471377, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7596319818, + "level_score_std_dev": 1.3888444437, + "se_difference": 6.7208630398, + "z_score": 0.4761293276, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.9726495031, + "gap_confidence_interval_95_upper": 16.3726495031, + "raw_n_min_group": 177, + "raw_n_max_group": 160, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "ethical_alignment", + "demographic_factor": "Sex", + "score_range": 1.5, + "min_level": "Female", + "max_level": "Male", + "min_score": 77.2, + "max_score": 78.7, + "se_min": 4.8, + "se_max": 4.8, + "effect_size": 0.3989752208, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.7596319818, + "level_score_std_dev": 0.75, + "se_difference": 6.7882250994, + "z_score": 0.2209708691, + "p_value": 0.9570722129, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -11.8046767138, + "gap_confidence_interval_95_upper": 14.8046767138, + "raw_n_min_group": 251, + "raw_n_max_group": 246, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "ethical_alignment", + "demographic_factor": "Urbanicity", + "score_range": 2.5, + "min_level": "Rural", + "max_level": "Suburban", + "min_score": 76.2, + "max_score": 78.7, + "se_min": 5.1, + "se_max": 4.6, + "effect_size": 0.6649587013, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.7596319818, + "level_score_std_dev": 1.1145502332, + "se_difference": 6.8680419335, + "z_score": 0.3640047665, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.9611148339, + "gap_confidence_interval_95_upper": 15.9611148339, + "raw_n_min_group": 86, + "raw_n_max_group": 244, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "flexibility", + "demographic_factor": "Age", + "score_range": 3.0, + "min_level": "45-54", + "max_level": "25-34", + "min_score": 86.5, + "max_score": 89.5, + "se_min": 4.2, + "se_max": 3.9, + "effect_size": 0.8783201298, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.4156111174, + "level_score_std_dev": 1.198146717, + "se_difference": 5.7314919524, + "z_score": 0.5234239226, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.2335178043, + "gap_confidence_interval_95_upper": 14.2335178043, + "raw_n_min_group": 73, + "raw_n_max_group": 105, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "flexibility", + "demographic_factor": "Education", + "score_range": 6.6, + "min_level": "College", + "max_level": "No College", + "min_score": 84.0, + "max_score": 90.6, + "se_min": 3.9, + "se_max": 4.0, + "effect_size": 1.9323042856, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.4156111174, + "level_score_std_dev": 3.3, + "se_difference": 5.5865910894, + "z_score": 1.1814002304, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.3495173315, + "gap_confidence_interval_95_upper": 17.5495173315, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "flexibility", + "demographic_factor": "Ethnicity", + "score_range": 4.3, + "min_level": "Asian", + "max_level": "African American", + "min_score": 86.7, + "max_score": 91.0, + "se_min": 5.1, + "se_max": 3.6, + "effect_size": 1.2589255194, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.4156111174, + "level_score_std_dev": 1.71664644, + "se_difference": 6.242595614, + "z_score": 0.6888160416, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.9352625735, + "gap_confidence_interval_95_upper": 16.5352625735, + "raw_n_min_group": 38, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "flexibility", + "demographic_factor": "Politics", + "score_range": 2.5, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 87.7, + "max_score": 90.2, + "se_min": 4.0, + "se_max": 3.6, + "effect_size": 0.7319334415, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.4156111174, + "level_score_std_dev": 1.0801234497, + "se_difference": 5.3814496188, + "z_score": 0.4645588414, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.0474474375, + "gap_confidence_interval_95_upper": 13.0474474375, + "raw_n_min_group": 160, + "raw_n_max_group": 177, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "flexibility", + "demographic_factor": "Sex", + "score_range": 0.6, + "min_level": "Female", + "max_level": "Male", + "min_score": 88.1, + "max_score": 88.7, + "se_min": 4.0, + "se_max": 3.9, + "effect_size": 0.175664026, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.4156111174, + "level_score_std_dev": 0.3, + "se_difference": 5.5865910894, + "z_score": 0.1074000209, + "p_value": 0.9775052078, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.3495173315, + "gap_confidence_interval_95_upper": 11.5495173315, + "raw_n_min_group": 251, + "raw_n_max_group": 246, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "flexibility", + "demographic_factor": "Urbanicity", + "score_range": 5.2, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 85.5, + "max_score": 90.7, + "se_min": 4.6, + "se_max": 3.6, + "effect_size": 1.5224215583, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.4156111174, + "level_score_std_dev": 2.1483844059, + "se_difference": 5.8412327466, + "z_score": 0.8902230446, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.2486058087, + "gap_confidence_interval_95_upper": 16.6486058087, + "raw_n_min_group": 171, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "helpfulness", + "demographic_factor": "Age", + "score_range": 4.6, + "min_level": "18-24", + "max_level": "65+", + "min_score": 87.9, + "max_score": 92.5, + "se_min": 3.3, + "se_max": 2.5, + "effect_size": 1.2770192802, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.6021382539, + "level_score_std_dev": 1.3960261061, + "se_difference": 4.1400483089, + "z_score": 1.1110981459, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.5143455797, + "gap_confidence_interval_95_upper": 12.7143455797, + "raw_n_min_group": 60, + "raw_n_max_group": 61, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "helpfulness", + "demographic_factor": "Education", + "score_range": 3.4, + "min_level": "College", + "max_level": "No College", + "min_score": 88.7, + "max_score": 92.1, + "se_min": 2.5, + "se_max": 2.7, + "effect_size": 0.9438838158, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.6021382539, + "level_score_std_dev": 1.7, + "se_difference": 3.6796738986, + "z_score": 0.9239949228, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.8120283161, + "gap_confidence_interval_95_upper": 10.6120283161, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "helpfulness", + "demographic_factor": "Ethnicity", + "score_range": 4.1, + "min_level": "Asian", + "max_level": "Hispanic", + "min_score": 88.5, + "max_score": 92.6, + "se_min": 3.5, + "se_max": 2.9, + "effect_size": 1.1382128367, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.6021382539, + "level_score_std_dev": 1.5122417135, + "se_difference": 4.5453272709, + "z_score": 0.9020252571, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.808677749, + "gap_confidence_interval_95_upper": 13.008677749, + "raw_n_min_group": 38, + "raw_n_max_group": 34, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "helpfulness", + "demographic_factor": "Politics", + "score_range": 0.8, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 90.7, + "max_score": 91.5, + "se_min": 2.6, + "se_max": 2.6, + "effect_size": 0.2220903096, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.6021382539, + "level_score_std_dev": 0.3299831646, + "se_difference": 3.6769552622, + "z_score": 0.2175713173, + "p_value": 0.9570722129, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.4066998866, + "gap_confidence_interval_95_upper": 8.0066998866, + "raw_n_min_group": 160, + "raw_n_max_group": 177, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "helpfulness", + "demographic_factor": "Sex", + "score_range": 1.1, + "min_level": "Female", + "max_level": "Male", + "min_score": 90.4, + "max_score": 91.5, + "se_min": 2.7, + "se_max": 2.5, + "effect_size": 0.3053741757, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.6021382539, + "level_score_std_dev": 0.55, + "se_difference": 3.6796738986, + "z_score": 0.2989395339, + "p_value": 0.9469805126, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.1120283161, + "gap_confidence_interval_95_upper": 8.3120283161, + "raw_n_min_group": 251, + "raw_n_max_group": 246, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "helpfulness", + "demographic_factor": "Urbanicity", + "score_range": 3.4, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 88.9, + "max_score": 92.3, + "se_min": 3.0, + "se_max": 2.5, + "effect_size": 0.9438838158, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.6021382539, + "level_score_std_dev": 1.4514360705, + "se_difference": 3.905124838, + "z_score": 0.8706507835, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.2539040375, + "gap_confidence_interval_95_upper": 11.0539040375, + "raw_n_min_group": 171, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "honesty_empathy_fairness", + "demographic_factor": "Age", + "score_range": 2.7, + "min_level": "18-24", + "max_level": "65+", + "min_score": 79.1, + "max_score": 81.8, + "se_min": 5.7, + "se_max": 5.1, + "effect_size": 0.7102007076, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.8017421993, + "level_score_std_dev": 0.8939612221, + "se_difference": 7.6485292704, + "z_score": 0.3530090432, + "p_value": 0.9405596833, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -12.2908419047, + "gap_confidence_interval_95_upper": 17.6908419047, + "raw_n_min_group": 60, + "raw_n_max_group": 61, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "honesty_empathy_fairness", + "demographic_factor": "Education", + "score_range": 3.7, + "min_level": "College", + "max_level": "No College", + "min_score": 78.5, + "max_score": 82.2, + "se_min": 4.3, + "se_max": 5.3, + "effect_size": 0.9732380067, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.8017421993, + "level_score_std_dev": 1.85, + "se_difference": 6.8249542123, + "z_score": 0.5421281792, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.6766644522, + "gap_confidence_interval_95_upper": 17.0766644522, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "honesty_empathy_fairness", + "demographic_factor": "Ethnicity", + "score_range": 11.0, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 72.7, + "max_score": 83.7, + "se_min": 6.9, + "se_max": 5.1, + "effect_size": 2.8934102902, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.8017421993, + "level_score_std_dev": 4.3464784596, + "se_difference": 8.5802097876, + "z_score": 1.2820199357, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.8169021636, + "gap_confidence_interval_95_upper": 27.8169021636, + "raw_n_min_group": 34, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "honesty_empathy_fairness", + "demographic_factor": "Politics", + "score_range": 1.7, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 80.2, + "max_score": 81.9, + "se_min": 5.1, + "se_max": 4.9, + "effect_size": 0.4471634085, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.8017421993, + "level_score_std_dev": 0.7133644853, + "se_difference": 7.072481884, + "z_score": 0.2403682368, + "p_value": 0.9570722129, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -12.161809774, + "gap_confidence_interval_95_upper": 15.561809774, + "raw_n_min_group": 164, + "raw_n_max_group": 177, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "honesty_empathy_fairness", + "demographic_factor": "Sex", + "score_range": 3.7, + "min_level": "Female", + "max_level": "Male", + "min_score": 79.2, + "max_score": 82.9, + "se_min": 5.2, + "se_max": 4.7, + "effect_size": 0.9732380067, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.8017421993, + "level_score_std_dev": 1.85, + "se_difference": 7.0092795636, + "z_score": 0.5278716545, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.0379355021, + "gap_confidence_interval_95_upper": 17.4379355021, + "raw_n_min_group": 251, + "raw_n_max_group": 246, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "honesty_empathy_fairness", + "demographic_factor": "Urbanicity", + "score_range": 4.8, + "min_level": "Urban", + "max_level": "Suburban", + "min_score": 78.2, + "max_score": 83.0, + "se_min": 5.3, + "se_max": 4.5, + "effect_size": 1.2625790357, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.8017421993, + "level_score_std_dev": 1.9686430747, + "se_difference": 6.9526973183, + "z_score": 0.6903795434, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.8270363393, + "gap_confidence_interval_95_upper": 18.4270363393, + "raw_n_min_group": 171, + "raw_n_max_group": 244, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "intuitiveness", + "demographic_factor": "Age", + "score_range": 6.3, + "min_level": "45-54", + "max_level": "65+", + "min_score": 83.5, + "max_score": 89.8, + "se_min": 3.8, + "se_max": 3.2, + "effect_size": 2.0334487206, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.0981848404, + "level_score_std_dev": 2.2617593938, + "se_difference": 4.9678969393, + "z_score": 1.2681422495, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.43689908, + "gap_confidence_interval_95_upper": 16.03689908, + "raw_n_min_group": 73, + "raw_n_max_group": 61, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "intuitiveness", + "demographic_factor": "Education", + "score_range": 4.9, + "min_level": "College", + "max_level": "No College", + "min_score": 83.9, + "max_score": 88.8, + "se_min": 3.2, + "se_max": 3.5, + "effect_size": 1.5815712271, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.0981848404, + "level_score_std_dev": 2.45, + "se_difference": 4.7423622806, + "z_score": 1.0332403368, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.3948592715, + "gap_confidence_interval_95_upper": 14.1948592715, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "intuitiveness", + "demographic_factor": "Ethnicity", + "score_range": 2.4, + "min_level": "White", + "max_level": "Asian", + "min_score": 86.9, + "max_score": 89.3, + "se_min": 3.2, + "se_max": 3.7, + "effect_size": 0.7746471317, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.0981848404, + "level_score_std_dev": 0.916515139, + "se_difference": 4.8918299235, + "z_score": 0.4906139497, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.1878104685, + "gap_confidence_interval_95_upper": 11.9878104685, + "raw_n_min_group": 336, + "raw_n_max_group": 38, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "intuitiveness", + "demographic_factor": "Politics", + "score_range": 3.0, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 85.8, + "max_score": 88.8, + "se_min": 3.7, + "se_max": 3.2, + "effect_size": 0.9683089146, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.0981848404, + "level_score_std_dev": 1.256980509, + "se_difference": 4.8918299235, + "z_score": 0.6132674371, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.5878104685, + "gap_confidence_interval_95_upper": 12.5878104685, + "raw_n_min_group": 164, + "raw_n_max_group": 177, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "intuitiveness", + "demographic_factor": "Sex", + "score_range": 0.6, + "min_level": "Female", + "max_level": "Male", + "min_score": 86.9, + "max_score": 87.5, + "se_min": 3.4, + "se_max": 3.4, + "effect_size": 0.1936617829, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.0981848404, + "level_score_std_dev": 0.3, + "se_difference": 4.8083261121, + "z_score": 0.1247835496, + "p_value": 0.9772792279, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.8241460056, + "gap_confidence_interval_95_upper": 10.0241460056, + "raw_n_min_group": 251, + "raw_n_max_group": 246, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "intuitiveness", + "demographic_factor": "Urbanicity", + "score_range": 2.9, + "min_level": "Urban", + "max_level": "Suburban", + "min_score": 85.4, + "max_score": 88.3, + "se_min": 3.7, + "se_max": 3.1, + "effect_size": 0.9360319508, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.0981848404, + "level_score_std_dev": 1.2119772642, + "se_difference": 4.8270073545, + "z_score": 0.6007863231, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.5607605678, + "gap_confidence_interval_95_upper": 12.3607605678, + "raw_n_min_group": 171, + "raw_n_max_group": 244, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "personality", + "demographic_factor": "Age", + "score_range": 4.9, + "min_level": "35-44", + "max_level": "55-64", + "min_score": 74.1, + "max_score": 79.0, + "se_min": 3.3, + "se_max": 3.3, + "effect_size": 1.4560324792, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.365309545, + "level_score_std_dev": 1.8175074507, + "se_difference": 4.6669047558, + "z_score": 1.0499464327, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.2469652407, + "gap_confidence_interval_95_upper": 14.0469652407, + "raw_n_min_group": 91, + "raw_n_max_group": 111, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "personality", + "demographic_factor": "Education", + "score_range": 0.2, + "min_level": "College", + "max_level": "No College", + "min_score": 76.6, + "max_score": 76.8, + "se_min": 2.8, + "se_max": 3.8, + "effect_size": 0.0594298971, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.365309545, + "level_score_std_dev": 0.1, + "se_difference": 4.7201694885, + "z_score": 0.0423713599, + "p_value": 0.9872735163, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.0513621984, + "gap_confidence_interval_95_upper": 9.4513621984, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "personality", + "demographic_factor": "Ethnicity", + "score_range": 4.8, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 73.4, + "max_score": 78.2, + "se_min": 4.1, + "se_max": 3.9, + "effect_size": 1.4263175306, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.365309545, + "level_score_std_dev": 1.9638928178, + "se_difference": 5.6586217403, + "z_score": 0.8482630966, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.2906948132, + "gap_confidence_interval_95_upper": 15.8906948132, + "raw_n_min_group": 34, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "personality", + "demographic_factor": "Politics", + "score_range": 3.6, + "min_level": "Democrat", + "max_level": "Republican", + "min_score": 74.9, + "max_score": 78.5, + "se_min": 3.5, + "se_max": 3.4, + "effect_size": 1.069738148, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.365309545, + "level_score_std_dev": 1.4966629547, + "se_difference": 4.8795491595, + "z_score": 0.7377730775, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.9637406134, + "gap_confidence_interval_95_upper": 13.1637406134, + "raw_n_min_group": 164, + "raw_n_max_group": 160, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "personality", + "demographic_factor": "Sex", + "score_range": 1.7, + "min_level": "Female", + "max_level": "Male", + "min_score": 75.9, + "max_score": 77.6, + "se_min": 3.4, + "se_max": 3.5, + "effect_size": 0.5051541254, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.365309545, + "level_score_std_dev": 0.85, + "se_difference": 4.8795491595, + "z_score": 0.3483928421, + "p_value": 0.9426292272, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.8637406134, + "gap_confidence_interval_95_upper": 11.2637406134, + "raw_n_min_group": 251, + "raw_n_max_group": 246, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "personality", + "demographic_factor": "Urbanicity", + "score_range": 4.2, + "min_level": "Urban", + "max_level": "Suburban", + "min_score": 74.6, + "max_score": 78.8, + "se_min": 3.5, + "se_max": 3.3, + "effect_size": 1.2480278393, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.365309545, + "level_score_std_dev": 1.7461067805, + "se_difference": 4.8104053883, + "z_score": 0.8731072874, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.2282213121, + "gap_confidence_interval_95_upper": 13.6282213121, + "raw_n_min_group": 171, + "raw_n_max_group": 244, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "personality_consistency", + "demographic_factor": "Age", + "score_range": 1.8, + "min_level": "18-24", + "max_level": "45-54", + "min_score": 86.7, + "max_score": 88.5, + "se_min": 3.8, + "se_max": 3.1, + "effect_size": 0.5638832596, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.1921500939, + "level_score_std_dev": 0.5497474167, + "se_difference": 4.9040799341, + "z_score": 0.3670413256, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.8118200481, + "gap_confidence_interval_95_upper": 11.4118200481, + "raw_n_min_group": 60, + "raw_n_max_group": 73, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "personality_consistency", + "demographic_factor": "Education", + "score_range": 4.2, + "min_level": "College", + "max_level": "No College", + "min_score": 84.9, + "max_score": 89.1, + "se_min": 2.9, + "se_max": 3.6, + "effect_size": 1.3157276057, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1921500939, + "level_score_std_dev": 2.1, + "se_difference": 4.6227697325, + "z_score": 0.9085462273, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.8604621846, + "gap_confidence_interval_95_upper": 13.2604621846, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "personality_consistency", + "demographic_factor": "Ethnicity", + "score_range": 4.9, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 84.2, + "max_score": 89.1, + "se_min": 4.5, + "se_max": 3.5, + "effect_size": 1.5350155399, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1921500939, + "level_score_std_dev": 1.8261982368, + "se_difference": 5.7008771255, + "z_score": 0.8595168589, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.2735138463, + "gap_confidence_interval_95_upper": 16.0735138463, + "raw_n_min_group": 34, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "personality_consistency", + "demographic_factor": "Politics", + "score_range": 1.1, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 87.3, + "max_score": 88.4, + "se_min": 3.5, + "se_max": 3.3, + "effect_size": 0.3445953253, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.1921500939, + "level_score_std_dev": 0.4496912521, + "se_difference": 4.8104053883, + "z_score": 0.2286709562, + "p_value": 0.9570722129, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.3282213121, + "gap_confidence_interval_95_upper": 10.5282213121, + "raw_n_min_group": 164, + "raw_n_max_group": 177, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "personality_consistency", + "demographic_factor": "Sex", + "score_range": 0.2, + "min_level": "Female", + "max_level": "Male", + "min_score": 87.6, + "max_score": 87.8, + "se_min": 3.4, + "se_max": 3.4, + "effect_size": 0.0626536955, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.1921500939, + "level_score_std_dev": 0.1, + "se_difference": 4.8083261121, + "z_score": 0.0415945165, + "p_value": 0.9872735163, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.2241460056, + "gap_confidence_interval_95_upper": 9.6241460056, + "raw_n_min_group": 251, + "raw_n_max_group": 246, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "personality_consistency", + "demographic_factor": "Urbanicity", + "score_range": 3.9, + "min_level": "Urban", + "max_level": "Suburban", + "min_score": 85.1, + "max_score": 89.0, + "se_min": 3.7, + "se_max": 3.0, + "effect_size": 1.2217470624, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1921500939, + "level_score_std_dev": 1.7326921891, + "se_difference": 4.7634021455, + "z_score": 0.8187425459, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.4360966491, + "gap_confidence_interval_95_upper": 13.2360966491, + "raw_n_min_group": 171, + "raw_n_max_group": 244, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "tone_and_language_style", + "demographic_factor": "Age", + "score_range": 6.3, + "min_level": "18-24", + "max_level": "65+", + "min_score": 82.0, + "max_score": 88.3, + "se_min": 5.1, + "se_max": 3.9, + "effect_size": 1.8864107454, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.339675633, + "level_score_std_dev": 2.0950338104, + "se_difference": 6.4202803677, + "z_score": 0.9812655584, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.2835182914, + "gap_confidence_interval_95_upper": 18.8835182914, + "raw_n_min_group": 60, + "raw_n_max_group": 61, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "tone_and_language_style", + "demographic_factor": "Education", + "score_range": 4.9, + "min_level": "College", + "max_level": "No College", + "min_score": 82.2, + "max_score": 87.1, + "se_min": 3.8, + "se_max": 4.4, + "effect_size": 1.4672083575, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.339675633, + "level_score_std_dev": 2.45, + "se_difference": 5.8137767415, + "z_score": 0.8428256223, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.4947930275, + "gap_confidence_interval_95_upper": 16.2947930275, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "tone_and_language_style", + "demographic_factor": "Ethnicity", + "score_range": 1.5, + "min_level": "Asian", + "max_level": "African American", + "min_score": 84.2, + "max_score": 85.7, + "se_min": 5.2, + "se_max": 4.6, + "effect_size": 0.4491454156, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.339675633, + "level_score_std_dev": 0.6417748826, + "se_difference": 6.9426219831, + "z_score": 0.2160567007, + "p_value": 0.9571306973, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -12.1072890451, + "gap_confidence_interval_95_upper": 15.1072890451, + "raw_n_min_group": 38, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "tone_and_language_style", + "demographic_factor": "Politics", + "score_range": 5.9, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 83.0, + "max_score": 88.9, + "se_min": 4.7, + "se_max": 3.5, + "effect_size": 1.7666386345, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.339675633, + "level_score_std_dev": 2.4280765135, + "se_difference": 5.8600341296, + "z_score": 1.0068200747, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.5854558422, + "gap_confidence_interval_95_upper": 17.3854558422, + "raw_n_min_group": 164, + "raw_n_max_group": 177, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "tone_and_language_style", + "demographic_factor": "Sex", + "score_range": 2.8, + "min_level": "Female", + "max_level": "Male", + "min_score": 84.1, + "max_score": 86.9, + "se_min": 4.4, + "se_max": 4.0, + "effect_size": 0.8384047757, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.339675633, + "level_score_std_dev": 1.4, + "se_difference": 5.9464274989, + "z_score": 0.4708709558, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.8547837346, + "gap_confidence_interval_95_upper": 14.4547837346, + "raw_n_min_group": 251, + "raw_n_max_group": 246, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "tone_and_language_style", + "demographic_factor": "Urbanicity", + "score_range": 4.2, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 82.9, + "max_score": 87.1, + "se_min": 4.7, + "se_max": 4.1, + "effect_size": 1.2576071636, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.339675633, + "level_score_std_dev": 1.791337179, + "se_difference": 6.2369864518, + "z_score": 0.6734021362, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.0242688176, + "gap_confidence_interval_95_upper": 16.4242688176, + "raw_n_min_group": 171, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "transparency", + "demographic_factor": "Age", + "score_range": 3.8, + "min_level": "45-54", + "max_level": "25-34", + "min_score": 74.7, + "max_score": 78.5, + "se_min": 5.0, + "se_max": 5.0, + "effect_size": 0.8418680582, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.5137714429, + "level_score_std_dev": 1.5173990905, + "se_difference": 7.0710678119, + "z_score": 0.5374011537, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.0590382435, + "gap_confidence_interval_95_upper": 17.6590382435, + "raw_n_min_group": 73, + "raw_n_max_group": 105, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "transparency", + "demographic_factor": "Education", + "score_range": 2.7, + "min_level": "College", + "max_level": "No College", + "min_score": 74.3, + "max_score": 77.0, + "se_min": 4.5, + "se_max": 5.6, + "effect_size": 0.5981694098, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 4.5137714429, + "level_score_std_dev": 1.35, + "se_difference": 7.1840100223, + "z_score": 0.375834665, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -11.3804009082, + "gap_confidence_interval_95_upper": 16.7804009082, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "transparency", + "demographic_factor": "Ethnicity", + "score_range": 7.8, + "min_level": "White", + "max_level": "African American", + "min_score": 75.1, + "max_score": 82.9, + "se_min": 5.1, + "se_max": 5.0, + "effect_size": 1.7280449617, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.5137714429, + "level_score_std_dev": 3.1547583109, + "se_difference": 7.1421285343, + "z_score": 1.092111401, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.1983147001, + "gap_confidence_interval_95_upper": 21.7983147001, + "raw_n_min_group": 336, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "transparency", + "demographic_factor": "Politics", + "score_range": 1.9, + "min_level": "Independent", + "max_level": "Republican", + "min_score": 75.0, + "max_score": 76.9, + "se_min": 5.3, + "se_max": 5.1, + "effect_size": 0.4209340291, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 4.5137714429, + "level_score_std_dev": 0.7788880964, + "se_difference": 7.3552702194, + "z_score": 0.2583181778, + "p_value": 0.9570722129, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -12.5160647265, + "gap_confidence_interval_95_upper": 16.3160647265, + "raw_n_min_group": 177, + "raw_n_max_group": 160, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "transparency", + "demographic_factor": "Sex", + "score_range": 0.8, + "min_level": "Male", + "max_level": "Female", + "min_score": 75.7, + "max_score": 76.5, + "se_min": 5.3, + "se_max": 5.2, + "effect_size": 0.1772353807, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 4.5137714429, + "level_score_std_dev": 0.4, + "se_difference": 7.4249579123, + "z_score": 0.1077447185, + "p_value": 0.9775052078, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -13.7526500949, + "gap_confidence_interval_95_upper": 15.3526500949, + "raw_n_min_group": 246, + "raw_n_max_group": 251, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "transparency", + "demographic_factor": "Urbanicity", + "score_range": 2.2, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 74.8, + "max_score": 77.0, + "se_min": 5.2, + "se_max": 5.4, + "effect_size": 0.4873972969, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 4.5137714429, + "level_score_std_dev": 1.0143416036, + "se_difference": 7.4966659256, + "z_score": 0.2934637907, + "p_value": 0.9469805126, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -12.4931952183, + "gap_confidence_interval_95_upper": 16.8931952183, + "raw_n_min_group": 244, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "trustworthiness", + "demographic_factor": "Age", + "score_range": 6.5, + "min_level": "18-24", + "max_level": "65+", + "min_score": 81.2, + "max_score": 87.7, + "se_min": 4.4, + "se_max": 3.4, + "effect_size": 2.0541867489, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1642692679, + "level_score_std_dev": 1.9941581348, + "se_difference": 5.5605755098, + "z_score": 1.1689437521, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.3985277325, + "gap_confidence_interval_95_upper": 17.3985277325, + "raw_n_min_group": 60, + "raw_n_max_group": 61, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "trustworthiness", + "demographic_factor": "Education", + "score_range": 8.5, + "min_level": "College", + "max_level": "No College", + "min_score": 80.3, + "max_score": 88.8, + "se_min": 3.3, + "se_max": 3.5, + "effect_size": 2.68624421, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1642692679, + "level_score_std_dev": 4.25, + "se_difference": 4.8104053883, + "z_score": 1.7670028436, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -0.9282213121, + "gap_confidence_interval_95_upper": 17.9282213121, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "trustworthiness", + "demographic_factor": "Ethnicity", + "score_range": 2.7, + "min_level": "Asian", + "max_level": "African American", + "min_score": 84.4, + "max_score": 87.1, + "se_min": 4.3, + "se_max": 3.6, + "effect_size": 0.8532775726, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1642692679, + "level_score_std_dev": 0.9575359001, + "se_difference": 5.6080299571, + "z_score": 0.4814524923, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.2915367402, + "gap_confidence_interval_95_upper": 13.6915367402, + "raw_n_min_group": 38, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "trustworthiness", + "demographic_factor": "Politics", + "score_range": 1.9, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 85.3, + "max_score": 87.2, + "se_min": 3.4, + "se_max": 3.3, + "effect_size": 0.6004545881, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.1642692679, + "level_score_std_dev": 0.7845734864, + "se_difference": 4.7381430962, + "z_score": 0.4010009747, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.3865898221, + "gap_confidence_interval_95_upper": 11.1865898221, + "raw_n_min_group": 160, + "raw_n_max_group": 177, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "trustworthiness", + "demographic_factor": "Sex", + "score_range": 1.1, + "min_level": "Female", + "max_level": "Male", + "min_score": 85.4, + "max_score": 86.5, + "se_min": 3.5, + "se_max": 3.4, + "effect_size": 0.3476316037, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.1642692679, + "level_score_std_dev": 0.55, + "se_difference": 4.8795491595, + "z_score": 0.2254306626, + "p_value": 0.9570722129, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.4637406134, + "gap_confidence_interval_95_upper": 10.6637406134, + "raw_n_min_group": 251, + "raw_n_max_group": 246, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "trustworthiness", + "demographic_factor": "Urbanicity", + "score_range": 4.4, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 83.9, + "max_score": 88.3, + "se_min": 3.7, + "se_max": 3.3, + "effect_size": 1.3905264146, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1642692679, + "level_score_std_dev": 1.8018509002, + "se_difference": 4.9578221025, + "z_score": 0.8874864626, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.3171527626, + "gap_confidence_interval_95_upper": 14.1171527626, + "raw_n_min_group": 171, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "understanding", + "demographic_factor": "Age", + "score_range": 4.9, + "min_level": "18-24", + "max_level": "65+", + "min_score": 85.7, + "max_score": 90.6, + "se_min": 3.5, + "se_max": 2.8, + "effect_size": 1.6513229488, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9673178124, + "level_score_std_dev": 1.4784564022, + "se_difference": 4.4821869662, + "z_score": 1.0932163332, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.8849250257, + "gap_confidence_interval_95_upper": 13.6849250257, + "raw_n_min_group": 60, + "raw_n_max_group": 61, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "understanding", + "demographic_factor": "Education", + "score_range": 3.5, + "min_level": "College", + "max_level": "No College", + "min_score": 86.9, + "max_score": 90.4, + "se_min": 2.5, + "se_max": 2.9, + "effect_size": 1.179516392, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9673178124, + "level_score_std_dev": 1.75, + "se_difference": 3.8288379438, + "z_score": 0.9141154709, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.0043844725, + "gap_confidence_interval_95_upper": 11.0043844725, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "understanding", + "demographic_factor": "Ethnicity", + "score_range": 1.4, + "min_level": "Asian", + "max_level": "Hispanic", + "min_score": 87.9, + "max_score": 89.3, + "se_min": 3.5, + "se_max": 3.4, + "effect_size": 0.4718065568, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 2.9673178124, + "level_score_std_dev": 0.5760859311, + "se_difference": 4.8795491595, + "z_score": 0.2869117523, + "p_value": 0.9486856692, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.1637406134, + "gap_confidence_interval_95_upper": 10.9637406134, + "raw_n_min_group": 38, + "raw_n_max_group": 34, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "understanding", + "demographic_factor": "Politics", + "score_range": 1.7, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 88.8, + "max_score": 90.5, + "se_min": 2.8, + "se_max": 2.6, + "effect_size": 0.5729079618, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 2.9673178124, + "level_score_std_dev": 0.7586537784, + "se_difference": 3.8209946349, + "z_score": 0.4449103342, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.7890118695, + "gap_confidence_interval_95_upper": 9.1890118695, + "raw_n_min_group": 160, + "raw_n_max_group": 177, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "understanding", + "demographic_factor": "Sex", + "score_range": 0.9, + "min_level": "Female", + "max_level": "Male", + "min_score": 88.8, + "max_score": 89.7, + "se_min": 2.9, + "se_max": 2.7, + "effect_size": 0.3033042151, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 2.9673178124, + "level_score_std_dev": 0.45, + "se_difference": 3.9623225512, + "z_score": 0.2271395093, + "p_value": 0.9570722129, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.8660094955, + "gap_confidence_interval_95_upper": 8.6660094955, + "raw_n_min_group": 251, + "raw_n_max_group": 246, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "understanding", + "demographic_factor": "Urbanicity", + "score_range": 4.5, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 87.4, + "max_score": 91.9, + "se_min": 3.1, + "se_max": 2.5, + "effect_size": 1.5165210754, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9673178124, + "level_score_std_dev": 1.902629759, + "se_difference": 3.9824615503, + "z_score": 1.1299544121, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.3054812085, + "gap_confidence_interval_95_upper": 12.3054812085, + "raw_n_min_group": 171, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "usefulness", + "demographic_factor": "Age", + "score_range": 4.4, + "min_level": "35-44", + "max_level": "65+", + "min_score": 88.7, + "max_score": 93.1, + "se_min": 3.1, + "se_max": 2.6, + "effect_size": 1.3090707425, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3611628899, + "level_score_std_dev": 1.4372040758, + "se_difference": 4.0459856648, + "z_score": 1.0874976741, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.529986185, + "gap_confidence_interval_95_upper": 12.329986185, + "raw_n_min_group": 91, + "raw_n_max_group": 61, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "usefulness", + "demographic_factor": "Education", + "score_range": 5.1, + "min_level": "College", + "max_level": "No College", + "min_score": 87.9, + "max_score": 93.0, + "se_min": 2.8, + "se_max": 2.8, + "effect_size": 1.517331997, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3611628899, + "level_score_std_dev": 2.55, + "se_difference": 3.9597979746, + "z_score": 1.2879444943, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.6610614164, + "gap_confidence_interval_95_upper": 12.8610614164, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "usefulness", + "demographic_factor": "Ethnicity", + "score_range": 2.0, + "min_level": "White", + "max_level": "African American", + "min_score": 91.0, + "max_score": 93.0, + "se_min": 2.7, + "se_max": 2.7, + "effect_size": 0.5950321557, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.3611628899, + "level_score_std_dev": 0.7854139036, + "se_difference": 3.8183766184, + "z_score": 0.5237828009, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.4838806515, + "gap_confidence_interval_95_upper": 9.4838806515, + "raw_n_min_group": 336, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "usefulness", + "demographic_factor": "Politics", + "score_range": 1.9, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 90.4, + "max_score": 92.3, + "se_min": 3.0, + "se_max": 2.6, + "effect_size": 0.5652805479, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.3611628899, + "level_score_std_dev": 0.7930251502, + "se_difference": 3.9698866483, + "z_score": 0.4786030858, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.8808348533, + "gap_confidence_interval_95_upper": 9.6808348533, + "raw_n_min_group": 160, + "raw_n_max_group": 177, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "usefulness", + "demographic_factor": "Sex", + "score_range": 0.0, + "min_level": "Female", + "max_level": "Female", + "min_score": 91.3, + "max_score": 91.3, + "se_min": 2.8, + "se_max": 2.8, + "effect_size": 0.0, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.3611628899, + "level_score_std_dev": 0.0, + "se_difference": 3.9597979746, + "z_score": 0.0, + "p_value": 1.0, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.7610614164, + "gap_confidence_interval_95_upper": 7.7610614164, + "raw_n_min_group": 251, + "raw_n_max_group": 251, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gemini-2.0-flash-001", + "category": "usefulness", + "demographic_factor": "Urbanicity", + "score_range": 3.8, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 89.7, + "max_score": 93.5, + "se_min": 3.1, + "se_max": 2.5, + "effect_size": 1.1305610958, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3611628899, + "level_score_std_dev": 1.5860503004, + "se_difference": 3.9824615503, + "z_score": 0.9541837258, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.0054812085, + "gap_confidence_interval_95_upper": 11.6054812085, + "raw_n_min_group": 171, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "accuracy", + "demographic_factor": "Age", + "score_range": 2.3, + "min_level": "18-24", + "max_level": "25-34", + "min_score": 90.0, + "max_score": 92.3, + "se_min": 3.6, + "se_max": 2.7, + "effect_size": 0.782843721, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 2.9380065757, + "level_score_std_dev": 0.956846673, + "se_difference": 4.5, + "z_score": 0.5111111111, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.5198379304, + "gap_confidence_interval_95_upper": 11.1198379304, + "raw_n_min_group": 60, + "raw_n_max_group": 104, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "accuracy", + "demographic_factor": "Education", + "score_range": 2.8, + "min_level": "College", + "max_level": "No College", + "min_score": 89.9, + "max_score": 92.7, + "se_min": 2.6, + "se_max": 2.9, + "effect_size": 0.9530271386, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9380065757, + "level_score_std_dev": 1.4, + "se_difference": 3.8948684188, + "z_score": 0.7188946323, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.8338018254, + "gap_confidence_interval_95_upper": 10.4338018254, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "accuracy", + "demographic_factor": "Ethnicity", + "score_range": 0.3, + "min_level": "Hispanic", + "max_level": "Asian", + "min_score": 91.5, + "max_score": 91.8, + "se_min": 3.5, + "se_max": 3.3, + "effect_size": 0.1021100506, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 2.9380065757, + "level_score_std_dev": 0.1224744871, + "se_difference": 4.8104053883, + "z_score": 0.0623648062, + "p_value": 0.9844547785, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.1282213121, + "gap_confidence_interval_95_upper": 9.7282213121, + "raw_n_min_group": 33, + "raw_n_max_group": 40, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "accuracy", + "demographic_factor": "Politics", + "score_range": 2.6, + "min_level": "Republican", + "max_level": "Democrat", + "min_score": 90.2, + "max_score": 92.8, + "se_min": 3.1, + "se_max": 2.6, + "effect_size": 0.8849537715, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9380065757, + "level_score_std_dev": 1.1614167593, + "se_difference": 4.0459856648, + "z_score": 0.642612262, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.329986185, + "gap_confidence_interval_95_upper": 10.529986185, + "raw_n_min_group": 160, + "raw_n_max_group": 168, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "accuracy", + "demographic_factor": "Sex", + "score_range": 2.0, + "min_level": "Male", + "max_level": "Female", + "min_score": 90.7, + "max_score": 92.7, + "se_min": 3.0, + "se_max": 2.6, + "effect_size": 0.6807336704, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 2.9380065757, + "level_score_std_dev": 1.0, + "se_difference": 3.9698866483, + "z_score": 0.5037927219, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.7808348533, + "gap_confidence_interval_95_upper": 9.7808348533, + "raw_n_min_group": 250, + "raw_n_max_group": 256, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "accuracy", + "demographic_factor": "Urbanicity", + "score_range": 3.1, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 90.4, + "max_score": 93.5, + "se_min": 3.1, + "se_max": 2.5, + "effect_size": 1.0551371892, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9380065757, + "level_score_std_dev": 1.291854825, + "se_difference": 3.9824615503, + "z_score": 0.7784130395, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.7054812085, + "gap_confidence_interval_95_upper": 10.9054812085, + "raw_n_min_group": 173, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "adaptiveness", + "demographic_factor": "Age", + "score_range": 3.8, + "min_level": "18-24", + "max_level": "55-64", + "min_score": 87.9, + "max_score": 91.7, + "se_min": 2.8, + "se_max": 2.0, + "effect_size": 0.9696951115, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.9187575093, + "level_score_std_dev": 1.1930353445, + "se_difference": 3.4409301068, + "z_score": 1.1043525681, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.9440990827, + "gap_confidence_interval_95_upper": 10.5440990827, + "raw_n_min_group": 60, + "raw_n_max_group": 109, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "adaptiveness", + "demographic_factor": "Education", + "score_range": 3.6, + "min_level": "College", + "max_level": "No College", + "min_score": 88.4, + "max_score": 92.0, + "se_min": 2.1, + "se_max": 2.4, + "effect_size": 0.9186585267, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.9187575093, + "level_score_std_dev": 1.8, + "se_difference": 3.1890437438, + "z_score": 1.1288650421, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.650410883, + "gap_confidence_interval_95_upper": 9.850410883, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "adaptiveness", + "demographic_factor": "Ethnicity", + "score_range": 1.7, + "min_level": "Asian", + "max_level": "White", + "min_score": 89.2, + "max_score": 90.9, + "se_min": 3.0, + "se_max": 2.2, + "effect_size": 0.4338109709, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.9187575093, + "level_score_std_dev": 0.6519202405, + "se_difference": 3.7202150475, + "z_score": 0.456962831, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.5914875079, + "gap_confidence_interval_95_upper": 8.9914875079, + "raw_n_min_group": 40, + "raw_n_max_group": 342, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "adaptiveness", + "demographic_factor": "Politics", + "score_range": 1.3, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 90.4, + "max_score": 91.7, + "se_min": 2.3, + "se_max": 2.2, + "effect_size": 0.3317378013, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.9187575093, + "level_score_std_dev": 0.5557777334, + "se_difference": 3.1827660926, + "z_score": 0.4084497453, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.9381069126, + "gap_confidence_interval_95_upper": 7.5381069126, + "raw_n_min_group": 160, + "raw_n_max_group": 183, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "adaptiveness", + "demographic_factor": "Sex", + "score_range": 0.2, + "min_level": "Male", + "max_level": "Female", + "min_score": 90.7, + "max_score": 90.9, + "se_min": 2.4, + "se_max": 2.3, + "effect_size": 0.0510365848, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.9187575093, + "level_score_std_dev": 0.1, + "se_difference": 3.3241540277, + "z_score": 0.0601656838, + "p_value": 0.9852565359, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.3152221734, + "gap_confidence_interval_95_upper": 6.7152221734, + "raw_n_min_group": 250, + "raw_n_max_group": 256, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "adaptiveness", + "demographic_factor": "Urbanicity", + "score_range": 2.4, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 89.9, + "max_score": 92.3, + "se_min": 2.4, + "se_max": 2.2, + "effect_size": 0.6124390178, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.9187575093, + "level_score_std_dev": 1.0498677165, + "se_difference": 3.2557641192, + "z_score": 0.7371541402, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.9811804158, + "gap_confidence_interval_95_upper": 8.7811804158, + "raw_n_min_group": 173, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "background_and_culture", + "demographic_factor": "Age", + "score_range": 6.4, + "min_level": "18-24", + "max_level": "55-64", + "min_score": 74.1, + "max_score": 80.5, + "se_min": 3.7, + "se_max": 3.5, + "effect_size": 1.8502187083, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.459050528, + "level_score_std_dev": 2.2683817041, + "se_difference": 5.0931326313, + "z_score": 1.2565940185, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.5823565258, + "gap_confidence_interval_95_upper": 16.3823565258, + "raw_n_min_group": 60, + "raw_n_max_group": 109, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "background_and_culture", + "demographic_factor": "Education", + "score_range": 2.1, + "min_level": "College", + "max_level": "No College", + "min_score": 77.4, + "max_score": 79.5, + "se_min": 2.9, + "se_max": 4.0, + "effect_size": 0.6071030137, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.459050528, + "level_score_std_dev": 1.05, + "se_difference": 4.9406477308, + "z_score": 0.4250454828, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.5834916127, + "gap_confidence_interval_95_upper": 11.7834916127, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "background_and_culture", + "demographic_factor": "Ethnicity", + "score_range": 2.2, + "min_level": "Asian", + "max_level": "African American", + "min_score": 77.7, + "max_score": 79.9, + "se_min": 4.2, + "se_max": 4.0, + "effect_size": 0.636012681, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.459050528, + "level_score_std_dev": 0.8986100378, + "se_difference": 5.8, + "z_score": 0.3793103448, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.1677911103, + "gap_confidence_interval_95_upper": 13.5677911103, + "raw_n_min_group": 40, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "background_and_culture", + "demographic_factor": "Politics", + "score_range": 1.3, + "min_level": "Democrat", + "max_level": "Republican", + "min_score": 78.1, + "max_score": 79.4, + "se_min": 3.7, + "se_max": 3.6, + "effect_size": 0.3758256751, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.459050528, + "level_score_std_dev": 0.5557777334, + "se_difference": 5.1623637997, + "z_score": 0.2518226244, + "p_value": 0.9570722129, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.8180471224, + "gap_confidence_interval_95_upper": 11.4180471224, + "raw_n_min_group": 168, + "raw_n_max_group": 160, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "background_and_culture", + "demographic_factor": "Sex", + "score_range": 0.6, + "min_level": "Female", + "max_level": "Male", + "min_score": 78.5, + "max_score": 79.1, + "se_min": 3.6, + "se_max": 3.7, + "effect_size": 0.1734580039, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.459050528, + "level_score_std_dev": 0.3, + "se_difference": 5.1623637997, + "z_score": 0.1162258266, + "p_value": 0.9772792279, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.5180471224, + "gap_confidence_interval_95_upper": 10.7180471224, + "raw_n_min_group": 256, + "raw_n_max_group": 250, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "background_and_culture", + "demographic_factor": "Urbanicity", + "score_range": 0.9, + "min_level": "Suburban", + "max_level": "Urban", + "min_score": 78.3, + "max_score": 79.2, + "se_min": 3.5, + "se_max": 3.7, + "effect_size": 0.2601870059, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.459050528, + "level_score_std_dev": 0.4027681991, + "se_difference": 5.0931326313, + "z_score": 0.1767085339, + "p_value": 0.9597457904, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.0823565258, + "gap_confidence_interval_95_upper": 10.8823565258, + "raw_n_min_group": 252, + "raw_n_max_group": 173, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "bias_and_stereotypes", + "demographic_factor": "Age", + "score_range": 7.7, + "min_level": "18-24", + "max_level": "35-44", + "min_score": 83.0, + "max_score": 90.7, + "se_min": 4.8, + "se_max": 2.8, + "effect_size": 2.7656659133, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.7841396038, + "level_score_std_dev": 2.8111385594, + "se_difference": 5.5569775958, + "z_score": 1.3856453202, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.1914759506, + "gap_confidence_interval_95_upper": 18.5914759506, + "raw_n_min_group": 60, + "raw_n_max_group": 93, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "bias_and_stereotypes", + "demographic_factor": "Education", + "score_range": 5.1, + "min_level": "College", + "max_level": "No College", + "min_score": 85.1, + "max_score": 90.2, + "se_min": 3.2, + "se_max": 3.5, + "effect_size": 1.8318046958, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.7841396038, + "level_score_std_dev": 2.55, + "se_difference": 4.7423622806, + "z_score": 1.0754134118, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.1948592715, + "gap_confidence_interval_95_upper": 14.3948592715, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "bias_and_stereotypes", + "demographic_factor": "Ethnicity", + "score_range": 1.4, + "min_level": "Asian", + "max_level": "White", + "min_score": 87.3, + "max_score": 88.7, + "se_min": 4.2, + "se_max": 3.1, + "effect_size": 0.5028483479, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 2.7841396038, + "level_score_std_dev": 0.5261891295, + "se_difference": 5.2201532545, + "z_score": 0.2681913599, + "p_value": 0.955704864, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.8313123725, + "gap_confidence_interval_95_upper": 11.6313123725, + "raw_n_min_group": 40, + "raw_n_max_group": 342, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "bias_and_stereotypes", + "demographic_factor": "Politics", + "score_range": 2.9, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 87.5, + "max_score": 90.4, + "se_min": 3.5, + "se_max": 3.1, + "effect_size": 1.0416144349, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.7841396038, + "level_score_std_dev": 1.1953614051, + "se_difference": 4.675467891, + "z_score": 0.6202587779, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.2637486773, + "gap_confidence_interval_95_upper": 12.0637486773, + "raw_n_min_group": 160, + "raw_n_max_group": 183, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "bias_and_stereotypes", + "demographic_factor": "Sex", + "score_range": 1.0, + "min_level": "Male", + "max_level": "Female", + "min_score": 88.0, + "max_score": 89.0, + "se_min": 3.5, + "se_max": 3.3, + "effect_size": 0.3591773913, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 2.7841396038, + "level_score_std_dev": 0.5, + "se_difference": 4.8104053883, + "z_score": 0.2078826875, + "p_value": 0.9597457904, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.4282213121, + "gap_confidence_interval_95_upper": 10.4282213121, + "raw_n_min_group": 250, + "raw_n_max_group": 256, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "bias_and_stereotypes", + "demographic_factor": "Urbanicity", + "score_range": 4.7, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 86.5, + "max_score": 91.2, + "se_min": 3.8, + "se_max": 3.1, + "effect_size": 1.6881337393, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.7841396038, + "level_score_std_dev": 1.9510680836, + "se_difference": 4.9040799341, + "z_score": 0.9583856836, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.9118200481, + "gap_confidence_interval_95_upper": 14.3118200481, + "raw_n_min_group": 173, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "clarity", + "demographic_factor": "Age", + "score_range": 7.3, + "min_level": "45-54", + "max_level": "65+", + "min_score": 81.7, + "max_score": 89.0, + "se_min": 4.0, + "se_max": 3.3, + "effect_size": 1.6168148517, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.5150500643, + "level_score_std_dev": 2.737649032, + "se_difference": 5.185556865, + "z_score": 1.4077562333, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.8635046952, + "gap_confidence_interval_95_upper": 17.4635046952, + "raw_n_min_group": 81, + "raw_n_max_group": 64, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "clarity", + "demographic_factor": "Education", + "score_range": 6.5, + "min_level": "College", + "max_level": "No College", + "min_score": 82.4, + "max_score": 88.9, + "se_min": 3.3, + "se_max": 3.6, + "effect_size": 1.4396296624, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.5150500643, + "level_score_std_dev": 3.25, + "se_difference": 4.8836461788, + "z_score": 1.3309727531, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.0717706237, + "gap_confidence_interval_95_upper": 16.0717706237, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "clarity", + "demographic_factor": "Ethnicity", + "score_range": 2.5, + "min_level": "Asian", + "max_level": "White", + "min_score": 84.6, + "max_score": 87.1, + "se_min": 4.4, + "se_max": 3.2, + "effect_size": 0.5537037163, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 4.5150500643, + "level_score_std_dev": 0.9120718173, + "se_difference": 5.4405882035, + "z_score": 0.4595091388, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.1633569336, + "gap_confidence_interval_95_upper": 13.1633569336, + "raw_n_min_group": 40, + "raw_n_max_group": 342, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "clarity", + "demographic_factor": "Politics", + "score_range": 2.7, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 85.3, + "max_score": 88.0, + "se_min": 3.7, + "se_max": 3.3, + "effect_size": 0.5980000136, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 4.5150500643, + "level_score_std_dev": 1.1897712198, + "se_difference": 4.9578221025, + "z_score": 0.5445939657, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.0171527626, + "gap_confidence_interval_95_upper": 12.4171527626, + "raw_n_min_group": 168, + "raw_n_max_group": 183, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "clarity", + "demographic_factor": "Sex", + "score_range": 0.0, + "min_level": "Female", + "max_level": "Female", + "min_score": 86.7, + "max_score": 86.7, + "se_min": 3.5, + "se_max": 3.5, + "effect_size": 0.0, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 4.5150500643, + "level_score_std_dev": 0.0, + "se_difference": 4.9497474683, + "z_score": 0.0, + "p_value": 1.0, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.7013267704, + "gap_confidence_interval_95_upper": 9.7013267704, + "raw_n_min_group": 256, + "raw_n_max_group": 256, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "clarity", + "demographic_factor": "Urbanicity", + "score_range": 4.7, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 85.0, + "max_score": 89.7, + "se_min": 3.7, + "se_max": 3.2, + "effect_size": 1.0409629867, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.5150500643, + "level_score_std_dev": 2.0704266871, + "se_difference": 4.8918299235, + "z_score": 0.9607856515, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.8878104685, + "gap_confidence_interval_95_upper": 14.2878104685, + "raw_n_min_group": 173, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "communication", + "demographic_factor": "Age", + "score_range": 4.6, + "min_level": "45-54", + "max_level": "65+", + "min_score": 87.3, + "max_score": 91.9, + "se_min": 3.0, + "se_max": 2.6, + "effect_size": 1.1379843049, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.0422350116, + "level_score_std_dev": 1.631972222, + "se_difference": 3.9698866483, + "z_score": 1.1587232603, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.1808348533, + "gap_confidence_interval_95_upper": 12.3808348533, + "raw_n_min_group": 81, + "raw_n_max_group": 64, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "communication", + "demographic_factor": "Education", + "score_range": 3.2, + "min_level": "College", + "max_level": "No College", + "min_score": 88.3, + "max_score": 91.5, + "se_min": 2.4, + "se_max": 2.8, + "effect_size": 0.7916412556, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 4.0422350116, + "level_score_std_dev": 1.6, + "se_difference": 3.6878177829, + "z_score": 0.8677218313, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.0279900361, + "gap_confidence_interval_95_upper": 10.4279900361, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "communication", + "demographic_factor": "Ethnicity", + "score_range": 1.3, + "min_level": "African American", + "max_level": "Asian", + "min_score": 89.5, + "max_score": 90.8, + "se_min": 3.1, + "se_max": 3.1, + "effect_size": 0.3216042601, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 4.0422350116, + "level_score_std_dev": 0.5117372373, + "se_difference": 4.3840620434, + "z_score": 0.2965286502, + "p_value": 0.9469805126, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.292603711, + "gap_confidence_interval_95_upper": 9.892603711, + "raw_n_min_group": 0, + "raw_n_max_group": 40, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "communication", + "demographic_factor": "Politics", + "score_range": 1.8, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 90.0, + "max_score": 91.8, + "se_min": 2.7, + "se_max": 2.5, + "effect_size": 0.4452982063, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 4.0422350116, + "level_score_std_dev": 0.7874007874, + "se_difference": 3.6796738986, + "z_score": 0.4891737827, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.4120283161, + "gap_confidence_interval_95_upper": 9.0120283161, + "raw_n_min_group": 160, + "raw_n_max_group": 183, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "communication", + "demographic_factor": "Sex", + "score_range": 0.1, + "min_level": "Male", + "max_level": "Female", + "min_score": 90.4, + "max_score": 90.5, + "se_min": 2.7, + "se_max": 2.7, + "effect_size": 0.0247387892, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 4.0422350116, + "level_score_std_dev": 0.05, + "se_difference": 3.8183766184, + "z_score": 0.02618914, + "p_value": 0.9924215291, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.3838806515, + "gap_confidence_interval_95_upper": 7.5838806515, + "raw_n_min_group": 250, + "raw_n_max_group": 256, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "communication", + "demographic_factor": "Urbanicity", + "score_range": 3.2, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 89.2, + "max_score": 92.4, + "se_min": 2.9, + "se_max": 2.5, + "effect_size": 0.7916412556, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 4.0422350116, + "level_score_std_dev": 1.3735598519, + "se_difference": 3.8288379438, + "z_score": 0.8357627162, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.3043844725, + "gap_confidence_interval_95_upper": 10.7043844725, + "raw_n_min_group": 173, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "comprehensiveness", + "demographic_factor": "Age", + "score_range": 5.6, + "min_level": "45-54", + "max_level": "55-64", + "min_score": 87.3, + "max_score": 92.9, + "se_min": 3.4, + "se_max": 2.3, + "effect_size": 1.5632906248, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.5821874137, + "level_score_std_dev": 1.9102065043, + "se_difference": 4.1048751504, + "z_score": 1.364231504, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.4454074557, + "gap_confidence_interval_95_upper": 13.6454074557, + "raw_n_min_group": 81, + "raw_n_max_group": 109, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "comprehensiveness", + "demographic_factor": "Education", + "score_range": 3.4, + "min_level": "College", + "max_level": "No College", + "min_score": 89.2, + "max_score": 92.6, + "se_min": 2.6, + "se_max": 3.0, + "effect_size": 0.9491407365, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.5821874137, + "level_score_std_dev": 1.7, + "se_difference": 3.9698866483, + "z_score": 0.8564476272, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.3808348533, + "gap_confidence_interval_95_upper": 11.1808348533, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "comprehensiveness", + "demographic_factor": "Ethnicity", + "score_range": 2.6, + "min_level": "Asian", + "max_level": "White", + "min_score": 89.4, + "max_score": 92.0, + "se_min": 3.7, + "se_max": 2.5, + "effect_size": 0.7258135044, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.5821874137, + "level_score_std_dev": 1.0735455277, + "se_difference": 4.4654227124, + "z_score": 0.5822517077, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.152067692, + "gap_confidence_interval_95_upper": 11.352067692, + "raw_n_min_group": 40, + "raw_n_max_group": 342, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "comprehensiveness", + "demographic_factor": "Politics", + "score_range": 2.7, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 90.2, + "max_score": 92.9, + "se_min": 3.0, + "se_max": 2.5, + "effect_size": 0.7537294084, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.5821874137, + "level_score_std_dev": 1.1145502332, + "se_difference": 3.905124838, + "z_score": 0.6913991516, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.9539040375, + "gap_confidence_interval_95_upper": 10.3539040375, + "raw_n_min_group": 160, + "raw_n_max_group": 183, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "comprehensiveness", + "demographic_factor": "Sex", + "score_range": 1.4, + "min_level": "Male", + "max_level": "Female", + "min_score": 90.7, + "max_score": 92.1, + "se_min": 3.0, + "se_max": 2.7, + "effect_size": 0.3908226562, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.5821874137, + "level_score_std_dev": 0.7, + "se_difference": 4.0360872141, + "z_score": 0.3468706016, + "p_value": 0.9428986416, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.5105855781, + "gap_confidence_interval_95_upper": 9.3105855781, + "raw_n_min_group": 250, + "raw_n_max_group": 256, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "comprehensiveness", + "demographic_factor": "Urbanicity", + "score_range": 2.7, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 90.5, + "max_score": 93.2, + "se_min": 3.1, + "se_max": 2.6, + "effect_size": 0.7537294084, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.5821874137, + "level_score_std_dev": 1.2083045974, + "se_difference": 4.0459856648, + "z_score": 0.6673281182, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.229986185, + "gap_confidence_interval_95_upper": 10.629986185, + "raw_n_min_group": 173, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "confidence", + "demographic_factor": "Age", + "score_range": 4.0, + "min_level": "45-54", + "max_level": "35-44", + "min_score": 86.2, + "max_score": 90.2, + "se_min": 3.4, + "se_max": 2.8, + "effect_size": 1.2164745064, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.2881905696, + "level_score_std_dev": 1.6750621879, + "se_difference": 4.4045431091, + "z_score": 0.9081532184, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.6327458622, + "gap_confidence_interval_95_upper": 12.6327458622, + "raw_n_min_group": 81, + "raw_n_max_group": 93, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "confidence", + "demographic_factor": "Education", + "score_range": 3.5, + "min_level": "College", + "max_level": "No College", + "min_score": 87.0, + "max_score": 90.5, + "se_min": 2.7, + "se_max": 3.3, + "effect_size": 1.0644151931, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.2881905696, + "level_score_std_dev": 1.75, + "se_difference": 4.2638011211, + "z_score": 0.8208638022, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.8568966345, + "gap_confidence_interval_95_upper": 11.8568966345, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "confidence", + "demographic_factor": "Ethnicity", + "score_range": 3.1, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 87.8, + "max_score": 90.9, + "se_min": 4.1, + "se_max": 3.1, + "effect_size": 0.9427677424, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.2881905696, + "level_score_std_dev": 1.2257650672, + "se_difference": 5.1400389104, + "z_score": 0.6031082749, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.9742911434, + "gap_confidence_interval_95_upper": 13.1742911434, + "raw_n_min_group": 33, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "confidence", + "demographic_factor": "Politics", + "score_range": 2.5, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 88.1, + "max_score": 90.6, + "se_min": 3.2, + "se_max": 2.9, + "effect_size": 0.7602965665, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.2881905696, + "level_score_std_dev": 1.0530379333, + "se_difference": 4.3185645763, + "z_score": 0.5788960558, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.9642310345, + "gap_confidence_interval_95_upper": 10.9642310345, + "raw_n_min_group": 160, + "raw_n_max_group": 183, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "confidence", + "demographic_factor": "Sex", + "score_range": 1.1, + "min_level": "Male", + "max_level": "Female", + "min_score": 88.8, + "max_score": 89.9, + "se_min": 3.2, + "se_max": 3.0, + "effect_size": 0.3345304892, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.2881905696, + "level_score_std_dev": 0.55, + "se_difference": 4.3863424399, + "z_score": 0.2507784139, + "p_value": 0.9570722129, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.497073206, + "gap_confidence_interval_95_upper": 9.697073206, + "raw_n_min_group": 250, + "raw_n_max_group": 256, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "confidence", + "demographic_factor": "Urbanicity", + "score_range": 2.1, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 88.6, + "max_score": 90.7, + "se_min": 3.1, + "se_max": 3.0, + "effect_size": 0.6386491158, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.2881905696, + "level_score_std_dev": 0.9104333522, + "se_difference": 4.313930922, + "z_score": 0.4867949993, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.355149239, + "gap_confidence_interval_95_upper": 10.555149239, + "raw_n_min_group": 252, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "consistency", + "demographic_factor": "Age", + "score_range": 5.3, + "min_level": "45-54", + "max_level": "55-64", + "min_score": 88.4, + "max_score": 93.7, + "se_min": 3.1, + "se_max": 2.1, + "effect_size": 1.7802851875, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9770511137, + "level_score_std_dev": 1.9516374891, + "se_difference": 3.7443290454, + "z_score": 1.4154738902, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.0387500752, + "gap_confidence_interval_95_upper": 12.6387500752, + "raw_n_min_group": 81, + "raw_n_max_group": 109, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "consistency", + "demographic_factor": "Education", + "score_range": 4.7, + "min_level": "College", + "max_level": "No College", + "min_score": 89.0, + "max_score": 93.7, + "se_min": 2.5, + "se_max": 2.6, + "effect_size": 1.5787434681, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9770511137, + "level_score_std_dev": 2.35, + "se_difference": 3.6069377594, + "z_score": 1.3030443866, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.3694681029, + "gap_confidence_interval_95_upper": 11.7694681029, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "consistency", + "demographic_factor": "Ethnicity", + "score_range": 1.2, + "min_level": "Asian", + "max_level": "African American", + "min_score": 91.4, + "max_score": 92.6, + "se_min": 3.1, + "se_max": 2.7, + "effect_size": 0.4030834387, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 2.9770511137, + "level_score_std_dev": 0.4322904116, + "se_difference": 4.1109609582, + "z_score": 0.2919025532, + "p_value": 0.9469805126, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.85733542, + "gap_confidence_interval_95_upper": 9.25733542, + "raw_n_min_group": 40, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "consistency", + "demographic_factor": "Politics", + "score_range": 2.9, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 91.2, + "max_score": 94.1, + "se_min": 2.7, + "se_max": 2.1, + "effect_size": 0.9741183101, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9770511137, + "level_score_std_dev": 1.2229290885, + "se_difference": 3.4205262753, + "z_score": 0.847822752, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.8041083078, + "gap_confidence_interval_95_upper": 9.6041083078, + "raw_n_min_group": 160, + "raw_n_max_group": 183, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "consistency", + "demographic_factor": "Sex", + "score_range": 0.6, + "min_level": "Male", + "max_level": "Female", + "min_score": 91.8, + "max_score": 92.4, + "se_min": 2.7, + "se_max": 2.5, + "effect_size": 0.2015417193, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 2.9770511137, + "level_score_std_dev": 0.3, + "se_difference": 3.6796738986, + "z_score": 0.1630579276, + "p_value": 0.9610477633, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.6120283161, + "gap_confidence_interval_95_upper": 7.8120283161, + "raw_n_min_group": 250, + "raw_n_max_group": 256, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "consistency", + "demographic_factor": "Urbanicity", + "score_range": 2.4, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 90.8, + "max_score": 93.2, + "se_min": 2.9, + "se_max": 2.5, + "effect_size": 0.8061668773, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9770511137, + "level_score_std_dev": 0.9899494937, + "se_difference": 3.8288379438, + "z_score": 0.6268220372, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.1043844725, + "gap_confidence_interval_95_upper": 9.9043844725, + "raw_n_min_group": 173, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "context_memory", + "demographic_factor": "Age", + "score_range": 3.1, + "min_level": "45-54", + "max_level": "25-34", + "min_score": 93.2, + "max_score": 96.3, + "se_min": 2.3, + "se_max": 1.8, + "effect_size": 0.9147689994, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3888336858, + "level_score_std_dev": 1.1086778913, + "se_difference": 2.9206163733, + "z_score": 1.061419784, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.6243029043, + "gap_confidence_interval_95_upper": 8.8243029043, + "raw_n_min_group": 81, + "raw_n_max_group": 104, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "context_memory", + "demographic_factor": "Education", + "score_range": 7.2, + "min_level": "College", + "max_level": "No College", + "min_score": 90.4, + "max_score": 97.6, + "se_min": 2.4, + "se_max": 1.6, + "effect_size": 2.1246247729, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3888336858, + "level_score_std_dev": 3.6, + "se_difference": 2.8844410204, + "z_score": 2.496150883, + "p_value": 0.4082373531, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": 1.5465994845, + "gap_confidence_interval_95_upper": 12.8534005155, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "context_memory", + "demographic_factor": "Ethnicity", + "score_range": 2.0, + "min_level": "Asian", + "max_level": "Hispanic", + "min_score": 94.1, + "max_score": 96.1, + "se_min": 2.5, + "se_max": 2.2, + "effect_size": 0.590173548, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.3888336858, + "level_score_std_dev": 0.7949056548, + "se_difference": 3.3301651611, + "z_score": 0.6005708135, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.5270037783, + "gap_confidence_interval_95_upper": 8.5270037783, + "raw_n_min_group": 40, + "raw_n_max_group": 33, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "context_memory", + "demographic_factor": "Politics", + "score_range": 2.3, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 94.4, + "max_score": 96.7, + "se_min": 2.1, + "se_max": 1.5, + "effect_size": 0.6786995802, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.3888336858, + "level_score_std_dev": 0.9626352719, + "se_difference": 2.5806975801, + "z_score": 0.891231897, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.758074312, + "gap_confidence_interval_95_upper": 7.358074312, + "raw_n_min_group": 160, + "raw_n_max_group": 183, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "context_memory", + "demographic_factor": "Sex", + "score_range": 0.4, + "min_level": "Male", + "max_level": "Female", + "min_score": 94.9, + "max_score": 95.3, + "se_min": 2.0, + "se_max": 1.9, + "effect_size": 0.1180347096, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.3888336858, + "level_score_std_dev": 0.2, + "se_difference": 2.7586228448, + "z_score": 0.1449998867, + "p_value": 0.9672328079, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.0068014228, + "gap_confidence_interval_95_upper": 5.8068014228, + "raw_n_min_group": 250, + "raw_n_max_group": 256, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "context_memory", + "demographic_factor": "Urbanicity", + "score_range": 2.4, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 94.4, + "max_score": 96.8, + "se_min": 2.1, + "se_max": 1.6, + "effect_size": 0.7082082576, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.3888336858, + "level_score_std_dev": 1.1085526099, + "se_difference": 2.6400757565, + "z_score": 0.9090648229, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.7744533992, + "gap_confidence_interval_95_upper": 7.5744533992, + "raw_n_min_group": 173, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "conversation_building", + "demographic_factor": "Age", + "score_range": 5.4, + "min_level": "45-54", + "max_level": "25-34", + "min_score": 89.5, + "max_score": 94.9, + "se_min": 3.0, + "se_max": 2.1, + "effect_size": 1.2483060981, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3258620689, + "level_score_std_dev": 1.889811866, + "se_difference": 3.6619666847, + "z_score": 1.4746174569, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -1.7773228146, + "gap_confidence_interval_95_upper": 12.5773228146, + "raw_n_min_group": 81, + "raw_n_max_group": 104, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "conversation_building", + "demographic_factor": "Education", + "score_range": 7.7, + "min_level": "College", + "max_level": "No College", + "min_score": 88.0, + "max_score": 95.7, + "se_min": 2.8, + "se_max": 2.1, + "effect_size": 1.7799920287, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3258620689, + "level_score_std_dev": 3.85, + "se_difference": 3.5, + "z_score": 2.2, + "p_value": 0.8243926526, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": 0.8401260541, + "gap_confidence_interval_95_upper": 14.5598739459, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "conversation_building", + "demographic_factor": "Ethnicity", + "score_range": 3.1, + "min_level": "Asian", + "max_level": "Hispanic", + "min_score": 90.7, + "max_score": 93.8, + "se_min": 3.3, + "se_max": 3.0, + "effect_size": 0.7166201674, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 4.3258620689, + "level_score_std_dev": 1.2509996003, + "se_difference": 4.4598206242, + "z_score": 0.6950952205, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.6410878009, + "gap_confidence_interval_95_upper": 11.8410878009, + "raw_n_min_group": 40, + "raw_n_max_group": 33, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "conversation_building", + "demographic_factor": "Politics", + "score_range": 3.8, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 91.7, + "max_score": 95.5, + "se_min": 2.6, + "se_max": 1.8, + "effect_size": 0.8784376246, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3258620689, + "level_score_std_dev": 1.5577761927, + "se_difference": 3.1622776602, + "z_score": 1.2016655109, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.397950323, + "gap_confidence_interval_95_upper": 9.997950323, + "raw_n_min_group": 160, + "raw_n_max_group": 183, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "conversation_building", + "demographic_factor": "Sex", + "score_range": 1.0, + "min_level": "Female", + "max_level": "Male", + "min_score": 92.6, + "max_score": 93.6, + "se_min": 2.5, + "se_max": 2.3, + "effect_size": 0.2311677959, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 4.3258620689, + "level_score_std_dev": 0.5, + "se_difference": 3.3970575503, + "z_score": 0.2943724047, + "p_value": 0.9469805126, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.658110452, + "gap_confidence_interval_95_upper": 7.658110452, + "raw_n_min_group": 256, + "raw_n_max_group": 250, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "conversation_building", + "demographic_factor": "Urbanicity", + "score_range": 2.7, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 92.1, + "max_score": 94.8, + "se_min": 2.6, + "se_max": 2.1, + "effect_size": 0.624153049, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 4.3258620689, + "level_score_std_dev": 1.1728408057, + "se_difference": 3.3421549934, + "z_score": 0.8078619948, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.8505034178, + "gap_confidence_interval_95_upper": 9.2505034178, + "raw_n_min_group": 173, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "conversation_flow", + "demographic_factor": "Age", + "score_range": 7.7, + "min_level": "18-24", + "max_level": "65+", + "min_score": 83.4, + "max_score": 91.1, + "se_min": 4.4, + "se_max": 3.0, + "effect_size": 1.5492975387, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.969994341, + "level_score_std_dev": 2.5199206337, + "se_difference": 5.3254107823, + "z_score": 1.4458978499, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.7376133361, + "gap_confidence_interval_95_upper": 18.1376133361, + "raw_n_min_group": 60, + "raw_n_max_group": 64, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "conversation_flow", + "demographic_factor": "Education", + "score_range": 3.7, + "min_level": "College", + "max_level": "No College", + "min_score": 86.1, + "max_score": 89.8, + "se_min": 2.9, + "se_max": 3.4, + "effect_size": 0.7444676485, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 4.969994341, + "level_score_std_dev": 1.85, + "se_difference": 4.4687805943, + "z_score": 0.8279663595, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.0586490196, + "gap_confidence_interval_95_upper": 12.4586490196, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "conversation_flow", + "demographic_factor": "Ethnicity", + "score_range": 3.5, + "min_level": "Hispanic", + "max_level": "Asian", + "min_score": 85.6, + "max_score": 89.1, + "se_min": 4.5, + "se_max": 3.7, + "effect_size": 0.704226154, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 4.969994341, + "level_score_std_dev": 1.4359230481, + "se_difference": 5.8258046655, + "z_score": 0.600775378, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.9183673253, + "gap_confidence_interval_95_upper": 14.9183673253, + "raw_n_min_group": 33, + "raw_n_max_group": 40, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "conversation_flow", + "demographic_factor": "Politics", + "score_range": 0.9, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 88.3, + "max_score": 89.2, + "se_min": 3.2, + "se_max": 3.1, + "effect_size": 0.1810867253, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 4.969994341, + "level_score_std_dev": 0.4027681991, + "se_difference": 4.455333882, + "z_score": 0.2020050627, + "p_value": 0.9597457904, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.8322939478, + "gap_confidence_interval_95_upper": 9.6322939478, + "raw_n_min_group": 160, + "raw_n_max_group": 183, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "conversation_flow", + "demographic_factor": "Sex", + "score_range": 0.1, + "min_level": "Male", + "max_level": "Female", + "min_score": 88.5, + "max_score": 88.6, + "se_min": 3.3, + "se_max": 3.2, + "effect_size": 0.0201207473, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 4.969994341, + "level_score_std_dev": 0.05, + "se_difference": 4.5967379738, + "z_score": 0.0217545574, + "p_value": 0.9924215291, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.9094408751, + "gap_confidence_interval_95_upper": 9.1094408751, + "raw_n_min_group": 250, + "raw_n_max_group": 256, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "conversation_flow", + "demographic_factor": "Urbanicity", + "score_range": 2.6, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 87.5, + "max_score": 90.1, + "se_min": 3.5, + "se_max": 3.1, + "effect_size": 0.5231394287, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 4.969994341, + "level_score_std_dev": 1.0984838036, + "se_difference": 4.675467891, + "z_score": 0.5560940767, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.5637486773, + "gap_confidence_interval_95_upper": 11.7637486773, + "raw_n_min_group": 173, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "cultural_awareness", + "demographic_factor": "Age", + "score_range": 5.5, + "min_level": "18-24", + "max_level": "65+", + "min_score": 72.5, + "max_score": 78.0, + "se_min": 5.3, + "se_max": 4.8, + "effect_size": 1.4809455972, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7138433783, + "level_score_std_dev": 1.77263144, + "se_difference": 7.1505244563, + "z_score": 0.7691743499, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.5147704049, + "gap_confidence_interval_95_upper": 19.5147704049, + "raw_n_min_group": 60, + "raw_n_max_group": 64, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "cultural_awareness", + "demographic_factor": "Education", + "score_range": 2.1, + "min_level": "College", + "max_level": "No College", + "min_score": 75.7, + "max_score": 77.8, + "se_min": 3.7, + "se_max": 5.0, + "effect_size": 0.5654519553, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.7138433783, + "level_score_std_dev": 1.05, + "se_difference": 6.220128616, + "z_score": 0.3376135977, + "p_value": 0.9429935514, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.0912280666, + "gap_confidence_interval_95_upper": 14.2912280666, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "cultural_awareness", + "demographic_factor": "Ethnicity", + "score_range": 6.7, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 73.5, + "max_score": 80.2, + "se_min": 6.1, + "se_max": 4.9, + "effect_size": 1.8040610003, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7138433783, + "level_score_std_dev": 2.5597851472, + "se_difference": 7.8243210568, + "z_score": 0.85630433, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.6353874748, + "gap_confidence_interval_95_upper": 22.0353874748, + "raw_n_min_group": 33, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "cultural_awareness", + "demographic_factor": "Politics", + "score_range": 2.3, + "min_level": "Independent", + "max_level": "Republican", + "min_score": 75.7, + "max_score": 78.0, + "se_min": 4.7, + "se_max": 4.4, + "effect_size": 0.6193045225, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.7138433783, + "level_score_std_dev": 0.9392668536, + "se_difference": 6.4381674411, + "z_score": 0.3572445142, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.318576311, + "gap_confidence_interval_95_upper": 14.918576311, + "raw_n_min_group": 183, + "raw_n_max_group": 160, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "cultural_awareness", + "demographic_factor": "Sex", + "score_range": 1.2, + "min_level": "Female", + "max_level": "Male", + "min_score": 76.5, + "max_score": 77.7, + "se_min": 4.7, + "se_max": 4.6, + "effect_size": 0.323115403, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.7138433783, + "level_score_std_dev": 0.6, + "se_difference": 6.576473219, + "z_score": 0.1824686211, + "p_value": 0.9597457904, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -11.6896506545, + "gap_confidence_interval_95_upper": 14.0896506545, + "raw_n_min_group": 256, + "raw_n_max_group": 250, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "cultural_awareness", + "demographic_factor": "Urbanicity", + "score_range": 1.8, + "min_level": "Rural", + "max_level": "Urban", + "min_score": 76.0, + "max_score": 77.8, + "se_min": 4.9, + "se_max": 4.6, + "effect_size": 0.4846731046, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.7138433783, + "level_score_std_dev": 0.7586537784, + "se_difference": 6.7208630398, + "z_score": 0.2678227468, + "p_value": 0.955704864, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -11.3726495031, + "gap_confidence_interval_95_upper": 14.9726495031, + "raw_n_min_group": 86, + "raw_n_max_group": 173, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "detail_and_technical_language", + "demographic_factor": "Age", + "score_range": 3.3, + "min_level": "45-54", + "max_level": "25-34", + "min_score": 89.6, + "max_score": 92.9, + "se_min": 3.0, + "se_max": 2.5, + "effect_size": 1.0545897612, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1291788726, + "level_score_std_dev": 1.1319844915, + "se_difference": 3.905124838, + "z_score": 0.8450434076, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.3539040375, + "gap_confidence_interval_95_upper": 10.9539040375, + "raw_n_min_group": 81, + "raw_n_max_group": 104, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "detail_and_technical_language", + "demographic_factor": "Education", + "score_range": 3.4, + "min_level": "College", + "max_level": "No College", + "min_score": 89.0, + "max_score": 92.4, + "se_min": 2.6, + "se_max": 2.9, + "effect_size": 1.0865470267, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1291788726, + "level_score_std_dev": 1.7, + "se_difference": 3.8948684188, + "z_score": 0.8729434821, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.2338018254, + "gap_confidence_interval_95_upper": 11.0338018254, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "detail_and_technical_language", + "demographic_factor": "Ethnicity", + "score_range": 4.8, + "min_level": "Asian", + "max_level": "Hispanic", + "min_score": 89.6, + "max_score": 94.4, + "se_min": 3.7, + "se_max": 2.8, + "effect_size": 1.5339487436, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1291788726, + "level_score_std_dev": 1.9057478847, + "se_difference": 4.6400431032, + "z_score": 1.0344731489, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.2943173691, + "gap_confidence_interval_95_upper": 13.8943173691, + "raw_n_min_group": 40, + "raw_n_max_group": 33, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "detail_and_technical_language", + "demographic_factor": "Politics", + "score_range": 1.7, + "min_level": "Republican", + "max_level": "Democrat", + "min_score": 90.3, + "max_score": 92.0, + "se_min": 3.0, + "se_max": 2.7, + "effect_size": 0.5432735133, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.1291788726, + "level_score_std_dev": 0.6976149845, + "se_difference": 4.0360872141, + "z_score": 0.4212000162, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.2105855781, + "gap_confidence_interval_95_upper": 9.6105855781, + "raw_n_min_group": 160, + "raw_n_max_group": 168, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "detail_and_technical_language", + "demographic_factor": "Sex", + "score_range": 0.8, + "min_level": "Female", + "max_level": "Male", + "min_score": 90.8, + "max_score": 91.6, + "se_min": 2.9, + "se_max": 2.8, + "effect_size": 0.2556581239, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.1291788726, + "level_score_std_dev": 0.4, + "se_difference": 4.0311288741, + "z_score": 0.1984555753, + "p_value": 0.9597457904, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.1008674104, + "gap_confidence_interval_95_upper": 8.7008674104, + "raw_n_min_group": 256, + "raw_n_max_group": 250, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "detail_and_technical_language", + "demographic_factor": "Urbanicity", + "score_range": 2.6, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 90.2, + "max_score": 92.8, + "se_min": 3.1, + "se_max": 2.7, + "effect_size": 0.8308889028, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1291788726, + "level_score_std_dev": 1.1115554667, + "se_difference": 4.1109609582, + "z_score": 0.632455532, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.45733542, + "gap_confidence_interval_95_upper": 10.65733542, + "raw_n_min_group": 173, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "distinct_personality", + "demographic_factor": "Age", + "score_range": 5.1, + "min_level": "35-44", + "max_level": "65+", + "min_score": 77.9, + "max_score": 83.0, + "se_min": 4.0, + "se_max": 4.0, + "effect_size": 1.1796415662, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3233471472, + "level_score_std_dev": 1.7808393776, + "se_difference": 5.6568542495, + "z_score": 0.901561146, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.9872305948, + "gap_confidence_interval_95_upper": 16.1872305948, + "raw_n_min_group": 93, + "raw_n_max_group": 64, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "distinct_personality", + "demographic_factor": "Education", + "score_range": 5.0, + "min_level": "College", + "max_level": "No College", + "min_score": 78.2, + "max_score": 83.2, + "se_min": 3.4, + "se_max": 4.2, + "effect_size": 1.1565113394, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3233471472, + "level_score_std_dev": 2.5, + "se_difference": 5.4037024344, + "z_score": 0.9252915127, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.5910621547, + "gap_confidence_interval_95_upper": 15.5910621547, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "distinct_personality", + "demographic_factor": "Ethnicity", + "score_range": 4.1, + "min_level": "White", + "max_level": "African American", + "min_score": 80.9, + "max_score": 85.0, + "se_min": 3.7, + "se_max": 3.9, + "effect_size": 0.9483392983, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3233471472, + "level_score_std_dev": 1.5039531243, + "se_difference": 5.3758720223, + "z_score": 0.7626669651, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.4365155492, + "gap_confidence_interval_95_upper": 14.6365155492, + "raw_n_min_group": 342, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "distinct_personality", + "demographic_factor": "Politics", + "score_range": 6.5, + "min_level": "Independent", + "max_level": "Republican", + "min_score": 77.6, + "max_score": 84.1, + "se_min": 4.3, + "se_max": 3.6, + "effect_size": 1.5034647412, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3233471472, + "level_score_std_dev": 2.653718565, + "se_difference": 5.6080299571, + "z_score": 1.1590522964, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.4915367402, + "gap_confidence_interval_95_upper": 17.4915367402, + "raw_n_min_group": 183, + "raw_n_max_group": 160, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "distinct_personality", + "demographic_factor": "Sex", + "score_range": 0.6, + "min_level": "Male", + "max_level": "Female", + "min_score": 81.2, + "max_score": 81.8, + "se_min": 4.0, + "se_max": 3.9, + "effect_size": 0.1387813607, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 4.3233471472, + "level_score_std_dev": 0.3, + "se_difference": 5.5865910894, + "z_score": 0.1074000209, + "p_value": 0.9775052078, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.3495173315, + "gap_confidence_interval_95_upper": 11.5495173315, + "raw_n_min_group": 250, + "raw_n_max_group": 256, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "distinct_personality", + "demographic_factor": "Urbanicity", + "score_range": 3.0, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 79.9, + "max_score": 82.9, + "se_min": 4.1, + "se_max": 4.0, + "effect_size": 0.6939068036, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 4.3233471472, + "level_score_std_dev": 1.2328828006, + "se_difference": 5.7280013966, + "z_score": 0.5237428891, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.2266764408, + "gap_confidence_interval_95_upper": 14.2266764408, + "raw_n_min_group": 173, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "effectiveness", + "demographic_factor": "Age", + "score_range": 4.4, + "min_level": "45-54", + "max_level": "55-64", + "min_score": 89.6, + "max_score": 94.0, + "se_min": 3.1, + "se_max": 2.2, + "effect_size": 1.1110768664, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.960122052, + "level_score_std_dev": 1.619070378, + "se_difference": 3.8013155617, + "z_score": 1.1574940119, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.0504415949, + "gap_confidence_interval_95_upper": 11.8504415949, + "raw_n_min_group": 81, + "raw_n_max_group": 109, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "effectiveness", + "demographic_factor": "Education", + "score_range": 2.7, + "min_level": "College", + "max_level": "No College", + "min_score": 90.9, + "max_score": 93.6, + "se_min": 2.4, + "se_max": 2.7, + "effect_size": 0.681797168, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.960122052, + "level_score_std_dev": 1.35, + "se_difference": 3.6124783736, + "z_score": 0.7474093187, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.3803275073, + "gap_confidence_interval_95_upper": 9.7803275073, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "effectiveness", + "demographic_factor": "Ethnicity", + "score_range": 0.8, + "min_level": "Hispanic", + "max_level": "White", + "min_score": 92.0, + "max_score": 92.8, + "se_min": 3.3, + "se_max": 2.4, + "effect_size": 0.2020139757, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.960122052, + "level_score_std_dev": 0.334477204, + "se_difference": 4.0804411526, + "z_score": 0.1960572326, + "p_value": 0.9597457904, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.1975177002, + "gap_confidence_interval_95_upper": 8.7975177002, + "raw_n_min_group": 33, + "raw_n_max_group": 342, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "effectiveness", + "demographic_factor": "Politics", + "score_range": 1.8, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 91.9, + "max_score": 93.7, + "se_min": 2.7, + "se_max": 2.4, + "effect_size": 0.4545314453, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.960122052, + "level_score_std_dev": 0.740870359, + "se_difference": 3.6124783736, + "z_score": 0.4982728791, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.2803275073, + "gap_confidence_interval_95_upper": 8.8803275073, + "raw_n_min_group": 160, + "raw_n_max_group": 183, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "effectiveness", + "demographic_factor": "Sex", + "score_range": 1.5, + "min_level": "Male", + "max_level": "Female", + "min_score": 91.9, + "max_score": 93.4, + "se_min": 2.8, + "se_max": 2.4, + "effect_size": 0.3787762044, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.960122052, + "level_score_std_dev": 0.75, + "se_difference": 3.6878177829, + "z_score": 0.4067446084, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.7279900361, + "gap_confidence_interval_95_upper": 8.7279900361, + "raw_n_min_group": 250, + "raw_n_max_group": 256, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "effectiveness", + "demographic_factor": "Urbanicity", + "score_range": 4.3, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 90.9, + "max_score": 95.2, + "se_min": 3.0, + "se_max": 2.1, + "effect_size": 1.0858251194, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.960122052, + "level_score_std_dev": 1.8006171781, + "se_difference": 3.6619666847, + "z_score": 1.1742324194, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.8773228146, + "gap_confidence_interval_95_upper": 11.4773228146, + "raw_n_min_group": 173, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "ethical_alignment", + "demographic_factor": "Age", + "score_range": 11.5, + "min_level": "18-24", + "max_level": "65+", + "min_score": 71.5, + "max_score": 83.0, + "se_min": 5.0, + "se_max": 4.3, + "effect_size": 3.058810026, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7596319818, + "level_score_std_dev": 3.7477400598, + "se_difference": 6.5946948375, + "z_score": 1.7438259515, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -1.4253643706, + "gap_confidence_interval_95_upper": 24.4253643706, + "raw_n_min_group": 60, + "raw_n_max_group": 64, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "ethical_alignment", + "demographic_factor": "Education", + "score_range": 2.8, + "min_level": "College", + "max_level": "No College", + "min_score": 78.6, + "max_score": 81.4, + "se_min": 3.5, + "se_max": 4.6, + "effect_size": 0.7447537455, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.7596319818, + "level_score_std_dev": 1.4, + "se_difference": 5.7801384066, + "z_score": 0.484417466, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.5288631027, + "gap_confidence_interval_95_upper": 14.1288631027, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "ethical_alignment", + "demographic_factor": "Ethnicity", + "score_range": 9.3, + "min_level": "Asian", + "max_level": "African American", + "min_score": 74.9, + "max_score": 84.2, + "se_min": 5.3, + "se_max": 4.3, + "effect_size": 2.4736463688, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7596319818, + "level_score_std_dev": 3.6683613508, + "se_difference": 6.8249542123, + "z_score": 1.3626465044, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.0766644522, + "gap_confidence_interval_95_upper": 22.6766644522, + "raw_n_min_group": 40, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "ethical_alignment", + "demographic_factor": "Politics", + "score_range": 2.6, + "min_level": "Independent", + "max_level": "Republican", + "min_score": 79.2, + "max_score": 81.8, + "se_min": 4.4, + "se_max": 4.0, + "effect_size": 0.6915570493, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.7596319818, + "level_score_std_dev": 1.1264496832, + "se_difference": 5.9464274989, + "z_score": 0.4372373161, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.0547837346, + "gap_confidence_interval_95_upper": 14.2547837346, + "raw_n_min_group": 183, + "raw_n_max_group": 160, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "ethical_alignment", + "demographic_factor": "Sex", + "score_range": 1.5, + "min_level": "Female", + "max_level": "Male", + "min_score": 79.7, + "max_score": 81.2, + "se_min": 4.3, + "se_max": 4.2, + "effect_size": 0.3989752208, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.7596319818, + "level_score_std_dev": 0.75, + "se_difference": 6.0108235709, + "z_score": 0.24954983, + "p_value": 0.9570722129, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.2809977163, + "gap_confidence_interval_95_upper": 13.2809977163, + "raw_n_min_group": 256, + "raw_n_max_group": 250, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "ethical_alignment", + "demographic_factor": "Urbanicity", + "score_range": 0.6, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 80.2, + "max_score": 80.8, + "se_min": 4.3, + "se_max": 4.4, + "effect_size": 0.1595900883, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.7596319818, + "level_score_std_dev": 0.2494438258, + "se_difference": 6.1522353661, + "z_score": 0.0975255276, + "p_value": 0.9796496683, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -11.458159742, + "gap_confidence_interval_95_upper": 12.658159742, + "raw_n_min_group": 173, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "flexibility", + "demographic_factor": "Age", + "score_range": 5.5, + "min_level": "45-54", + "max_level": "25-34", + "min_score": 88.7, + "max_score": 94.2, + "se_min": 3.3, + "se_max": 2.4, + "effect_size": 1.6102535713, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.4156111174, + "level_score_std_dev": 1.8266545012, + "se_difference": 4.0804411526, + "z_score": 1.3478934738, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.4975177002, + "gap_confidence_interval_95_upper": 13.4975177002, + "raw_n_min_group": 81, + "raw_n_max_group": 104, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "flexibility", + "demographic_factor": "Education", + "score_range": 5.6, + "min_level": "College", + "max_level": "No College", + "min_score": 89.0, + "max_score": 94.6, + "se_min": 2.7, + "se_max": 2.6, + "effect_size": 1.639530909, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.4156111174, + "level_score_std_dev": 2.8, + "se_difference": 3.7483329628, + "z_score": 1.4939974798, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -1.7465976091, + "gap_confidence_interval_95_upper": 12.9465976091, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "flexibility", + "demographic_factor": "Ethnicity", + "score_range": 3.2, + "min_level": "Asian", + "max_level": "African American", + "min_score": 90.7, + "max_score": 93.9, + "se_min": 3.5, + "se_max": 2.6, + "effect_size": 0.9368748051, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.4156111174, + "level_score_std_dev": 1.1388041974, + "se_difference": 4.3600458713, + "z_score": 0.7339372324, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.3455328787, + "gap_confidence_interval_95_upper": 11.7455328787, + "raw_n_min_group": 40, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "flexibility", + "demographic_factor": "Politics", + "score_range": 2.4, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 91.5, + "max_score": 93.9, + "se_min": 2.8, + "se_max": 2.3, + "effect_size": 0.7026561038, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.4156111174, + "level_score_std_dev": 1.0077477639, + "se_difference": 3.6235341864, + "z_score": 0.6623367896, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.7019965021, + "gap_confidence_interval_95_upper": 9.5019965021, + "raw_n_min_group": 160, + "raw_n_max_group": 183, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "flexibility", + "demographic_factor": "Sex", + "score_range": 0.7, + "min_level": "Male", + "max_level": "Female", + "min_score": 92.3, + "max_score": 93.0, + "se_min": 2.7, + "se_max": 2.5, + "effect_size": 0.2049413636, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.4156111174, + "level_score_std_dev": 0.35, + "se_difference": 3.6796738986, + "z_score": 0.1902342488, + "p_value": 0.9597457904, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.5120283161, + "gap_confidence_interval_95_upper": 7.9120283161, + "raw_n_min_group": 250, + "raw_n_max_group": 256, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "flexibility", + "demographic_factor": "Urbanicity", + "score_range": 2.3, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 91.8, + "max_score": 94.1, + "se_min": 2.9, + "se_max": 2.4, + "effect_size": 0.6733787662, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.4156111174, + "level_score_std_dev": 0.9877021593, + "se_difference": 3.7643060449, + "z_score": 0.6110023926, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.0779042749, + "gap_confidence_interval_95_upper": 9.6779042749, + "raw_n_min_group": 173, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "helpfulness", + "demographic_factor": "Age", + "score_range": 5.1, + "min_level": "45-54", + "max_level": "55-64", + "min_score": 88.0, + "max_score": 93.1, + "se_min": 3.0, + "se_max": 2.1, + "effect_size": 1.4158257237, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.6021382539, + "level_score_std_dev": 1.59521855, + "se_difference": 3.6619666847, + "z_score": 1.3926942649, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.0773228146, + "gap_confidence_interval_95_upper": 12.2773228146, + "raw_n_min_group": 81, + "raw_n_max_group": 109, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "helpfulness", + "demographic_factor": "Education", + "score_range": 3.9, + "min_level": "College", + "max_level": "No College", + "min_score": 88.6, + "max_score": 92.5, + "se_min": 2.5, + "se_max": 2.7, + "effect_size": 1.0826902593, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.6021382539, + "level_score_std_dev": 1.95, + "se_difference": 3.6796738986, + "z_score": 1.0598765291, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.3120283161, + "gap_confidence_interval_95_upper": 11.1120283161, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "helpfulness", + "demographic_factor": "Ethnicity", + "score_range": 2.2, + "min_level": "Asian", + "max_level": "White", + "min_score": 89.2, + "max_score": 91.4, + "se_min": 3.5, + "se_max": 2.4, + "effect_size": 0.6107483514, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.6021382539, + "level_score_std_dev": 0.7949056548, + "se_difference": 4.2438190348, + "z_score": 0.5184009926, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.1177324651, + "gap_confidence_interval_95_upper": 10.5177324651, + "raw_n_min_group": 40, + "raw_n_max_group": 342, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "helpfulness", + "demographic_factor": "Politics", + "score_range": 2.4, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 90.5, + "max_score": 92.9, + "se_min": 2.7, + "se_max": 2.3, + "effect_size": 0.6662709288, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.6021382539, + "level_score_std_dev": 1.0198039027, + "se_difference": 3.5468295702, + "z_score": 0.6766606493, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.5516582169, + "gap_confidence_interval_95_upper": 9.3516582169, + "raw_n_min_group": 160, + "raw_n_max_group": 183, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "helpfulness", + "demographic_factor": "Sex", + "score_range": 0.1, + "min_level": "Female", + "max_level": "Male", + "min_score": 91.1, + "max_score": 91.2, + "se_min": 2.6, + "se_max": 2.6, + "effect_size": 0.0277612887, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.6021382539, + "level_score_std_dev": 0.05, + "se_difference": 3.6769552622, + "z_score": 0.0271964147, + "p_value": 0.9924215291, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.1066998866, + "gap_confidence_interval_95_upper": 7.3066998866, + "raw_n_min_group": 256, + "raw_n_max_group": 250, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "helpfulness", + "demographic_factor": "Urbanicity", + "score_range": 2.1, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 90.3, + "max_score": 92.4, + "se_min": 2.8, + "se_max": 2.5, + "effect_size": 0.5829870627, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.6021382539, + "level_score_std_dev": 0.8831760866, + "se_difference": 3.7536648758, + "z_score": 0.5594532462, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.2570479666, + "gap_confidence_interval_95_upper": 9.4570479666, + "raw_n_min_group": 173, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "honesty_empathy_fairness", + "demographic_factor": "Age", + "score_range": 4.9, + "min_level": "18-24", + "max_level": "55-64", + "min_score": 83.8, + "max_score": 88.7, + "se_min": 4.2, + "se_max": 3.0, + "effect_size": 1.2888827656, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.8017421993, + "level_score_std_dev": 1.9720970226, + "se_difference": 5.1613951602, + "z_score": 0.9493557164, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.216148624, + "gap_confidence_interval_95_upper": 15.016148624, + "raw_n_min_group": 60, + "raw_n_max_group": 109, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "honesty_empathy_fairness", + "demographic_factor": "Education", + "score_range": 5.4, + "min_level": "College", + "max_level": "No College", + "min_score": 83.5, + "max_score": 88.9, + "se_min": 3.2, + "se_max": 3.5, + "effect_size": 1.4204014152, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.8017421993, + "level_score_std_dev": 2.7, + "se_difference": 4.7423622806, + "z_score": 1.1386730242, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.8948592715, + "gap_confidence_interval_95_upper": 14.6948592715, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "honesty_empathy_fairness", + "demographic_factor": "Ethnicity", + "score_range": 5.5, + "min_level": "Hispanic", + "max_level": "Asian", + "min_score": 82.8, + "max_score": 88.3, + "se_min": 4.8, + "se_max": 3.8, + "effect_size": 1.4467051451, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.8017421993, + "level_score_std_dev": 2.2726636355, + "se_difference": 6.122091146, + "z_score": 0.8983858405, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.4990781562, + "gap_confidence_interval_95_upper": 17.4990781562, + "raw_n_min_group": 33, + "raw_n_max_group": 40, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "honesty_empathy_fairness", + "demographic_factor": "Politics", + "score_range": 3.0, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 86.3, + "max_score": 89.3, + "se_min": 3.6, + "se_max": 3.1, + "effect_size": 0.7891118973, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.8017421993, + "level_score_std_dev": 1.2961481397, + "se_difference": 4.7507894081, + "z_score": 0.631474002, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.311376138, + "gap_confidence_interval_95_upper": 12.311376138, + "raw_n_min_group": 168, + "raw_n_max_group": 183, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "honesty_empathy_fairness", + "demographic_factor": "Sex", + "score_range": 0.0, + "min_level": "Female", + "max_level": "Female", + "min_score": 87.1, + "max_score": 87.1, + "se_min": 3.4, + "se_max": 3.4, + "effect_size": 0.0, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.8017421993, + "level_score_std_dev": 0.0, + "se_difference": 4.8083261121, + "z_score": 0.0, + "p_value": 1.0, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.4241460056, + "gap_confidence_interval_95_upper": 9.4241460056, + "raw_n_min_group": 256, + "raw_n_max_group": 256, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "honesty_empathy_fairness", + "demographic_factor": "Urbanicity", + "score_range": 1.9, + "min_level": "Urban", + "max_level": "Suburban", + "min_score": 86.1, + "max_score": 88.0, + "se_min": 3.6, + "se_max": 3.1, + "effect_size": 0.4997708683, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.8017421993, + "level_score_std_dev": 0.7788880964, + "se_difference": 4.7507894081, + "z_score": 0.3999335346, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.411376138, + "gap_confidence_interval_95_upper": 11.211376138, + "raw_n_min_group": 173, + "raw_n_max_group": 252, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "intuitiveness", + "demographic_factor": "Age", + "score_range": 3.8, + "min_level": "45-54", + "max_level": "65+", + "min_score": 85.0, + "max_score": 88.8, + "se_min": 3.7, + "se_max": 3.6, + "effect_size": 1.2265246251, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.0981848404, + "level_score_std_dev": 1.2445436468, + "se_difference": 5.1623637997, + "z_score": 0.736096902, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.3180471224, + "gap_confidence_interval_95_upper": 13.9180471224, + "raw_n_min_group": 81, + "raw_n_max_group": 64, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "intuitiveness", + "demographic_factor": "Education", + "score_range": 4.2, + "min_level": "College", + "max_level": "No College", + "min_score": 85.0, + "max_score": 89.2, + "se_min": 3.2, + "se_max": 3.6, + "effect_size": 1.3556324804, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.0981848404, + "level_score_std_dev": 2.1, + "se_difference": 4.8166378315, + "z_score": 0.8719775385, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.2404366763, + "gap_confidence_interval_95_upper": 13.6404366763, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "intuitiveness", + "demographic_factor": "Ethnicity", + "score_range": 3.1, + "min_level": "Asian", + "max_level": "African American", + "min_score": 85.1, + "max_score": 88.2, + "se_min": 4.6, + "se_max": 3.7, + "effect_size": 1.0005858784, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.0981848404, + "level_score_std_dev": 1.4159802259, + "se_difference": 5.9033888573, + "z_score": 0.5251221078, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.470429547, + "gap_confidence_interval_95_upper": 14.670429547, + "raw_n_min_group": 40, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "intuitiveness", + "demographic_factor": "Politics", + "score_range": 2.5, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 86.8, + "max_score": 89.3, + "se_min": 3.6, + "se_max": 3.2, + "effect_size": 0.8069240955, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.0981848404, + "level_score_std_dev": 1.0208928554, + "se_difference": 4.8166378315, + "z_score": 0.5190342491, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.9404366763, + "gap_confidence_interval_95_upper": 11.9404366763, + "raw_n_min_group": 160, + "raw_n_max_group": 183, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "intuitiveness", + "demographic_factor": "Sex", + "score_range": 1.5, + "min_level": "Male", + "max_level": "Female", + "min_score": 87.0, + "max_score": 88.5, + "se_min": 3.7, + "se_max": 3.3, + "effect_size": 0.4841544573, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.0981848404, + "level_score_std_dev": 0.75, + "se_difference": 4.9578221025, + "z_score": 0.3025522032, + "p_value": 0.9469805126, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.2171527626, + "gap_confidence_interval_95_upper": 11.2171527626, + "raw_n_min_group": 250, + "raw_n_max_group": 256, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "intuitiveness", + "demographic_factor": "Urbanicity", + "score_range": 2.4, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 86.3, + "max_score": 88.7, + "se_min": 3.8, + "se_max": 3.5, + "effect_size": 0.7746471317, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.0981848404, + "level_score_std_dev": 1.033870828, + "se_difference": 5.1662365412, + "z_score": 0.4645548033, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.7256375564, + "gap_confidence_interval_95_upper": 12.5256375564, + "raw_n_min_group": 173, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "personality", + "demographic_factor": "Age", + "score_range": 5.7, + "min_level": "35-44", + "max_level": "65+", + "min_score": 76.7, + "max_score": 82.4, + "se_min": 3.9, + "se_max": 3.9, + "effect_size": 1.6937520676, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.365309545, + "level_score_std_dev": 1.9267128022, + "se_difference": 5.5154328933, + "z_score": 1.0334637571, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.1100498299, + "gap_confidence_interval_95_upper": 16.5100498299, + "raw_n_min_group": 93, + "raw_n_max_group": 64, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "personality", + "demographic_factor": "Education", + "score_range": 6.8, + "min_level": "College", + "max_level": "No College", + "min_score": 76.0, + "max_score": 82.8, + "se_min": 3.3, + "se_max": 4.1, + "effect_size": 2.0206165017, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.365309545, + "level_score_std_dev": 3.4, + "se_difference": 5.2630789468, + "z_score": 1.2920193804, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.5154451835, + "gap_confidence_interval_95_upper": 17.1154451835, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "personality", + "demographic_factor": "Ethnicity", + "score_range": 3.9, + "min_level": "Asian", + "max_level": "African American", + "min_score": 79.1, + "max_score": 83.0, + "se_min": 4.6, + "se_max": 4.1, + "effect_size": 1.1588829936, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.365309545, + "level_score_std_dev": 1.5346009253, + "se_difference": 6.1619802012, + "z_score": 0.632913426, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.1772592678, + "gap_confidence_interval_95_upper": 15.9772592678, + "raw_n_min_group": 40, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "personality", + "demographic_factor": "Politics", + "score_range": 2.6, + "min_level": "Independent", + "max_level": "Republican", + "min_score": 79.0, + "max_score": 81.6, + "se_min": 4.0, + "se_max": 3.7, + "effect_size": 0.7725886624, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.365309545, + "level_score_std_dev": 1.0656244909, + "se_difference": 5.4488530903, + "z_score": 0.4771646357, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.0795558141, + "gap_confidence_interval_95_upper": 13.2795558141, + "raw_n_min_group": 183, + "raw_n_max_group": 160, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "personality", + "demographic_factor": "Sex", + "score_range": 0.9, + "min_level": "Male", + "max_level": "Female", + "min_score": 80.0, + "max_score": 80.9, + "se_min": 3.9, + "se_max": 3.8, + "effect_size": 0.267434537, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.365309545, + "level_score_std_dev": 0.45, + "se_difference": 5.445181356, + "z_score": 0.1652837511, + "p_value": 0.9610477633, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.7723593471, + "gap_confidence_interval_95_upper": 11.5723593471, + "raw_n_min_group": 250, + "raw_n_max_group": 256, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "personality", + "demographic_factor": "Urbanicity", + "score_range": 1.0, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 79.8, + "max_score": 80.8, + "se_min": 3.9, + "se_max": 4.0, + "effect_size": 0.2971494856, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.365309545, + "level_score_std_dev": 0.4714045208, + "se_difference": 5.5865910894, + "z_score": 0.1790000349, + "p_value": 0.9597457904, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.9495173315, + "gap_confidence_interval_95_upper": 11.9495173315, + "raw_n_min_group": 173, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "personality_consistency", + "demographic_factor": "Age", + "score_range": 1.9, + "min_level": "35-44", + "max_level": "55-64", + "min_score": 87.1, + "max_score": 89.0, + "se_min": 3.1, + "se_max": 2.8, + "effect_size": 0.5952101073, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.1921500939, + "level_score_std_dev": 0.5934831272, + "se_difference": 4.1773197148, + "z_score": 0.4548371036, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.287396193, + "gap_confidence_interval_95_upper": 10.087396193, + "raw_n_min_group": 93, + "raw_n_max_group": 109, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "personality_consistency", + "demographic_factor": "Education", + "score_range": 3.2, + "min_level": "College", + "max_level": "No College", + "min_score": 85.8, + "max_score": 89.0, + "se_min": 2.8, + "se_max": 3.3, + "effect_size": 1.0024591281, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1921500939, + "level_score_std_dev": 1.6, + "se_difference": 4.3278170017, + "z_score": 0.7394027979, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.282365455, + "gap_confidence_interval_95_upper": 11.682365455, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "personality_consistency", + "demographic_factor": "Ethnicity", + "score_range": 2.1, + "min_level": "Asian", + "max_level": "Hispanic", + "min_score": 86.6, + "max_score": 88.7, + "se_min": 3.9, + "se_max": 3.8, + "effect_size": 0.6578638028, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.1921500939, + "level_score_std_dev": 0.7495832175, + "se_difference": 5.445181356, + "z_score": 0.3856620859, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.5723593471, + "gap_confidence_interval_95_upper": 12.7723593471, + "raw_n_min_group": 40, + "raw_n_max_group": 33, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "personality_consistency", + "demographic_factor": "Politics", + "score_range": 0.9, + "min_level": "Democrat", + "max_level": "Republican", + "min_score": 87.6, + "max_score": 88.5, + "se_min": 3.3, + "se_max": 3.0, + "effect_size": 0.2819416298, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.1921500939, + "level_score_std_dev": 0.4027681991, + "se_difference": 4.4598206242, + "z_score": 0.2018018382, + "p_value": 0.9597457904, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.8410878009, + "gap_confidence_interval_95_upper": 9.6410878009, + "raw_n_min_group": 168, + "raw_n_max_group": 160, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "personality_consistency", + "demographic_factor": "Sex", + "score_range": 0.1, + "min_level": "Female", + "max_level": "Male", + "min_score": 87.9, + "max_score": 88.0, + "se_min": 3.1, + "se_max": 3.2, + "effect_size": 0.0313268478, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.1921500939, + "level_score_std_dev": 0.05, + "se_difference": 4.455333882, + "z_score": 0.022445007, + "p_value": 0.9924215291, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.6322939478, + "gap_confidence_interval_95_upper": 8.8322939478, + "raw_n_min_group": 256, + "raw_n_max_group": 250, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "personality_consistency", + "demographic_factor": "Urbanicity", + "score_range": 2.0, + "min_level": "Urban", + "max_level": "Suburban", + "min_score": 86.9, + "max_score": 88.9, + "se_min": 3.3, + "se_max": 2.9, + "effect_size": 0.6265369551, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.1921500939, + "level_score_std_dev": 0.8178562764, + "se_difference": 4.3931765273, + "z_score": 0.4552514536, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.6104677712, + "gap_confidence_interval_95_upper": 10.6104677712, + "raw_n_min_group": 173, + "raw_n_max_group": 252, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "tone_and_language_style", + "demographic_factor": "Age", + "score_range": 8.4, + "min_level": "45-54", + "max_level": "55-64", + "min_score": 79.4, + "max_score": 87.8, + "se_min": 4.7, + "se_max": 3.5, + "effect_size": 2.5152143271, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.339675633, + "level_score_std_dev": 2.4964975466, + "se_difference": 5.8600341296, + "z_score": 1.4334387504, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.0854558422, + "gap_confidence_interval_95_upper": 19.8854558422, + "raw_n_min_group": 81, + "raw_n_max_group": 109, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "tone_and_language_style", + "demographic_factor": "Education", + "score_range": 0.9, + "min_level": "College", + "max_level": "No College", + "min_score": 83.9, + "max_score": 84.8, + "se_min": 3.5, + "se_max": 4.7, + "effect_size": 0.2694872493, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.339675633, + "level_score_std_dev": 0.45, + "se_difference": 5.8600341296, + "z_score": 0.1535827233, + "p_value": 0.9640111876, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.5854558422, + "gap_confidence_interval_95_upper": 12.3854558422, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "tone_and_language_style", + "demographic_factor": "Ethnicity", + "score_range": 1.9, + "min_level": "Asian", + "max_level": "White", + "min_score": 82.7, + "max_score": 84.6, + "se_min": 5.2, + "se_max": 4.0, + "effect_size": 0.5689175264, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.339675633, + "level_score_std_dev": 0.772576857, + "se_difference": 6.5604877867, + "z_score": 0.2896126114, + "p_value": 0.9479774893, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.958319783, + "gap_confidence_interval_95_upper": 14.758319783, + "raw_n_min_group": 40, + "raw_n_max_group": 342, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "tone_and_language_style", + "demographic_factor": "Politics", + "score_range": 3.9, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 82.6, + "max_score": 86.5, + "se_min": 4.5, + "se_max": 4.0, + "effect_size": 1.1677780805, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.339675633, + "level_score_std_dev": 1.6418147141, + "se_difference": 6.0207972894, + "z_score": 0.6477547429, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.9005458454, + "gap_confidence_interval_95_upper": 15.7005458454, + "raw_n_min_group": 160, + "raw_n_max_group": 183, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "tone_and_language_style", + "demographic_factor": "Sex", + "score_range": 0.2, + "min_level": "Male", + "max_level": "Female", + "min_score": 84.4, + "max_score": 84.6, + "se_min": 4.4, + "se_max": 4.3, + "effect_size": 0.0598860554, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.339675633, + "level_score_std_dev": 0.1, + "se_difference": 6.1522353661, + "z_score": 0.0325085092, + "p_value": 0.9907760469, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -11.858159742, + "gap_confidence_interval_95_upper": 12.258159742, + "raw_n_min_group": 250, + "raw_n_max_group": 256, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "tone_and_language_style", + "demographic_factor": "Urbanicity", + "score_range": 2.2, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 83.4, + "max_score": 85.6, + "se_min": 4.5, + "se_max": 4.3, + "effect_size": 0.6587466095, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.339675633, + "level_score_std_dev": 0.898146239, + "se_difference": 6.2241465278, + "z_score": 0.3534621157, + "p_value": 0.9405596833, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.9991030291, + "gap_confidence_interval_95_upper": 14.3991030291, + "raw_n_min_group": 173, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "transparency", + "demographic_factor": "Age", + "score_range": 5.3, + "min_level": "18-24", + "max_level": "35-44", + "min_score": 69.0, + "max_score": 74.3, + "se_min": 6.0, + "se_max": 5.1, + "effect_size": 1.174184397, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.5137714429, + "level_score_std_dev": 1.8254375427, + "se_difference": 7.874642849, + "z_score": 0.6730463974, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.1340163752, + "gap_confidence_interval_95_upper": 20.7340163752, + "raw_n_min_group": 60, + "raw_n_max_group": 93, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "transparency", + "demographic_factor": "Education", + "score_range": 1.1, + "min_level": "No College", + "max_level": "College", + "min_score": 73.0, + "max_score": 74.1, + "se_min": 5.8, + "se_max": 4.2, + "effect_size": 0.2436986484, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 4.5137714429, + "level_score_std_dev": 0.55, + "se_difference": 7.161005516, + "z_score": 0.1536097127, + "p_value": 0.9640111876, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -12.9353129044, + "gap_confidence_interval_95_upper": 15.1353129044, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "transparency", + "demographic_factor": "Ethnicity", + "score_range": 11.5, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 68.3, + "max_score": 79.8, + "se_min": 7.2, + "se_max": 5.2, + "effect_size": 2.5477585973, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.5137714429, + "level_score_std_dev": 4.2663655493, + "se_difference": 8.8814413245, + "z_score": 1.2948348787, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.9073051268, + "gap_confidence_interval_95_upper": 28.9073051268, + "raw_n_min_group": 33, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "transparency", + "demographic_factor": "Politics", + "score_range": 4.4, + "min_level": "Independent", + "max_level": "Republican", + "min_score": 70.8, + "max_score": 75.2, + "se_min": 5.6, + "se_max": 5.0, + "effect_size": 0.9747945938, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.5137714429, + "level_score_std_dev": 1.7969109296, + "se_difference": 7.5073297516, + "z_score": 0.586093877, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.3140959333, + "gap_confidence_interval_95_upper": 19.1140959333, + "raw_n_min_group": 183, + "raw_n_max_group": 160, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "transparency", + "demographic_factor": "Sex", + "score_range": 2.8, + "min_level": "Male", + "max_level": "Female", + "min_score": 71.9, + "max_score": 74.7, + "se_min": 5.5, + "se_max": 5.2, + "effect_size": 0.6203238324, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 4.5137714429, + "level_score_std_dev": 1.4, + "se_difference": 7.5690157881, + "z_score": 0.3699292059, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -12.034998343, + "gap_confidence_interval_95_upper": 17.634998343, + "raw_n_min_group": 250, + "raw_n_max_group": 256, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "transparency", + "demographic_factor": "Urbanicity", + "score_range": 2.6, + "min_level": "Rural", + "max_level": "Urban", + "min_score": 72.0, + "max_score": 74.6, + "se_min": 5.7, + "se_max": 5.3, + "effect_size": 0.5760149872, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 4.5137714429, + "level_score_std_dev": 1.0656244909, + "se_difference": 7.7833154889, + "z_score": 0.3340478751, + "p_value": 0.9429935514, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -12.6550180386, + "gap_confidence_interval_95_upper": 17.8550180386, + "raw_n_min_group": 86, + "raw_n_max_group": 173, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "trustworthiness", + "demographic_factor": "Age", + "score_range": 4.3, + "min_level": "18-24", + "max_level": "55-64", + "min_score": 87.3, + "max_score": 91.6, + "se_min": 3.0, + "se_max": 2.1, + "effect_size": 1.3589235415, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1642692679, + "level_score_std_dev": 1.6569718029, + "se_difference": 3.6619666847, + "z_score": 1.1742324194, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.8773228146, + "gap_confidence_interval_95_upper": 11.4773228146, + "raw_n_min_group": 60, + "raw_n_max_group": 109, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "trustworthiness", + "demographic_factor": "Education", + "score_range": 3.3, + "min_level": "College", + "max_level": "No College", + "min_score": 88.0, + "max_score": 91.3, + "se_min": 2.2, + "se_max": 2.5, + "effect_size": 1.042894811, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1642692679, + "level_score_std_dev": 1.65, + "se_difference": 3.3301651611, + "z_score": 0.9909418423, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.2270037783, + "gap_confidence_interval_95_upper": 9.8270037783, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "trustworthiness", + "demographic_factor": "Ethnicity", + "score_range": 3.7, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 87.1, + "max_score": 90.8, + "se_min": 3.2, + "se_max": 2.6, + "effect_size": 1.1693063032, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1642692679, + "level_score_std_dev": 1.47203091, + "se_difference": 4.1231056256, + "z_score": 0.8973818126, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.3811385307, + "gap_confidence_interval_95_upper": 11.7811385307, + "raw_n_min_group": 33, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "trustworthiness", + "demographic_factor": "Politics", + "score_range": 2.2, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 89.1, + "max_score": 91.3, + "se_min": 2.4, + "se_max": 2.2, + "effect_size": 0.6952632073, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.1642692679, + "level_score_std_dev": 0.9285592185, + "se_difference": 3.2557641192, + "z_score": 0.6757246285, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.1811804158, + "gap_confidence_interval_95_upper": 8.5811804158, + "raw_n_min_group": 160, + "raw_n_max_group": 183, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "trustworthiness", + "demographic_factor": "Sex", + "score_range": 0.6, + "min_level": "Male", + "max_level": "Female", + "min_score": 89.9, + "max_score": 90.5, + "se_min": 2.4, + "se_max": 2.3, + "effect_size": 0.1896172384, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.1642692679, + "level_score_std_dev": 0.3, + "se_difference": 3.3241540277, + "z_score": 0.1804970513, + "p_value": 0.9597457904, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.9152221734, + "gap_confidence_interval_95_upper": 7.1152221734, + "raw_n_min_group": 250, + "raw_n_max_group": 256, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "trustworthiness", + "demographic_factor": "Urbanicity", + "score_range": 0.8, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 89.9, + "max_score": 90.7, + "se_min": 2.3, + "se_max": 2.4, + "effect_size": 0.2528229845, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.1642692679, + "level_score_std_dev": 0.3399346342, + "se_difference": 3.3241540277, + "z_score": 0.240662735, + "p_value": 0.9570722129, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.7152221734, + "gap_confidence_interval_95_upper": 7.3152221734, + "raw_n_min_group": 252, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "understanding", + "demographic_factor": "Age", + "score_range": 5.1, + "min_level": "18-24", + "max_level": "25-34", + "min_score": 89.1, + "max_score": 94.2, + "se_min": 0.0, + "se_max": 0.0, + "effect_size": 1.7187238855, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9673178124, + "level_score_std_dev": 2.0221688027, + "se_difference": 0.0, + "z_score": 9999.9, + "p_value": 0.0, + "is_statistically_significant": true, + "gap_confidence_interval_95_lower": 5.1, + "gap_confidence_interval_95_upper": 5.1, + "raw_n_min_group": 60, + "raw_n_max_group": 104, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": true + }, + { + "model": "gpt-4o", + "category": "understanding", + "demographic_factor": "Education", + "score_range": 4.2, + "min_level": "College", + "max_level": "No College", + "min_score": 90.1, + "max_score": 94.3, + "se_min": 0.0, + "se_max": 0.0, + "effect_size": 1.4154196704, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9673178124, + "level_score_std_dev": 2.1, + "se_difference": 0.0, + "z_score": 9999.9, + "p_value": 0.0, + "is_statistically_significant": true, + "gap_confidence_interval_95_lower": 4.2, + "gap_confidence_interval_95_upper": 4.2, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": true + }, + { + "model": "gpt-4o", + "category": "understanding", + "demographic_factor": "Ethnicity", + "score_range": 2.8, + "min_level": "Asian", + "max_level": "White", + "min_score": 90.4, + "max_score": 93.2, + "se_min": 0.0, + "se_max": 0.0, + "effect_size": 0.9436131136, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9673178124, + "level_score_std_dev": 1.01334841, + "se_difference": 0.0, + "z_score": 9999.9, + "p_value": 0.0, + "is_statistically_significant": true, + "gap_confidence_interval_95_lower": 2.8, + "gap_confidence_interval_95_upper": 2.8, + "raw_n_min_group": 40, + "raw_n_max_group": 342, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": true + }, + { + "model": "gpt-4o", + "category": "understanding", + "demographic_factor": "Politics", + "score_range": 2.3, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 92.3, + "max_score": 94.6, + "se_min": 0.0, + "se_max": 0.0, + "effect_size": 0.7751107719, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 2.9673178124, + "level_score_std_dev": 1.0208928554, + "se_difference": 0.0, + "z_score": 9999.9, + "p_value": 0.0, + "is_statistically_significant": true, + "gap_confidence_interval_95_lower": 2.3, + "gap_confidence_interval_95_upper": 2.3, + "raw_n_min_group": 160, + "raw_n_max_group": 183, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "understanding", + "demographic_factor": "Sex", + "score_range": 1.4, + "min_level": "Male", + "max_level": "Female", + "min_score": 92.1, + "max_score": 93.5, + "se_min": 0.0, + "se_max": 0.0, + "effect_size": 0.4718065568, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 2.9673178124, + "level_score_std_dev": 0.7, + "se_difference": 0.0, + "z_score": 9999.9, + "p_value": 0.0, + "is_statistically_significant": true, + "gap_confidence_interval_95_lower": 1.4, + "gap_confidence_interval_95_upper": 1.4, + "raw_n_min_group": 250, + "raw_n_max_group": 256, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "understanding", + "demographic_factor": "Urbanicity", + "score_range": 2.0, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 92.1, + "max_score": 94.1, + "se_min": 0.0, + "se_max": 0.0, + "effect_size": 0.6740093668, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 2.9673178124, + "level_score_std_dev": 0.837987006, + "se_difference": 0.0, + "z_score": 9999.9, + "p_value": 0.0, + "is_statistically_significant": true, + "gap_confidence_interval_95_lower": 2.0, + "gap_confidence_interval_95_upper": 2.0, + "raw_n_min_group": 252, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "usefulness", + "demographic_factor": "Age", + "score_range": 5.0, + "min_level": "45-54", + "max_level": "55-64", + "min_score": 89.3, + "max_score": 94.3, + "se_min": 3.1, + "se_max": 2.1, + "effect_size": 1.4875803892, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3611628899, + "level_score_std_dev": 2.0047859404, + "se_difference": 3.7443290454, + "z_score": 1.3353527266, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.3387500752, + "gap_confidence_interval_95_upper": 12.3387500752, + "raw_n_min_group": 81, + "raw_n_max_group": 109, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "usefulness", + "demographic_factor": "Education", + "score_range": 4.9, + "min_level": "College", + "max_level": "No College", + "min_score": 89.7, + "max_score": 94.6, + "se_min": 2.7, + "se_max": 2.5, + "effect_size": 1.4578287815, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3611628899, + "level_score_std_dev": 2.45, + "se_difference": 3.6796738986, + "z_score": 1.3316397417, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.3120283161, + "gap_confidence_interval_95_upper": 12.1120283161, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "usefulness", + "demographic_factor": "Ethnicity", + "score_range": 2.3, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 91.5, + "max_score": 93.8, + "se_min": 3.6, + "se_max": 2.5, + "effect_size": 0.684286979, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.3611628899, + "level_score_std_dev": 0.8276472679, + "se_difference": 4.3829214002, + "z_score": 0.5247641447, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.2903680914, + "gap_confidence_interval_95_upper": 10.8903680914, + "raw_n_min_group": 33, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "usefulness", + "demographic_factor": "Politics", + "score_range": 3.0, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 91.4, + "max_score": 94.4, + "se_min": 2.8, + "se_max": 2.2, + "effect_size": 0.8925482335, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3611628899, + "level_score_std_dev": 1.2684198394, + "se_difference": 3.560898763, + "z_score": 0.8424839345, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.979233328, + "gap_confidence_interval_95_upper": 9.979233328, + "raw_n_min_group": 160, + "raw_n_max_group": 183, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "usefulness", + "demographic_factor": "Sex", + "score_range": 0.3, + "min_level": "Female", + "max_level": "Male", + "min_score": 92.8, + "max_score": 93.1, + "se_min": 2.6, + "se_max": 2.5, + "effect_size": 0.0892548234, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.3611628899, + "level_score_std_dev": 0.15, + "se_difference": 3.6069377594, + "z_score": 0.083173046, + "p_value": 0.981376128, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.7694681029, + "gap_confidence_interval_95_upper": 7.3694681029, + "raw_n_min_group": 256, + "raw_n_max_group": 250, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "gpt-4o", + "category": "usefulness", + "demographic_factor": "Urbanicity", + "score_range": 3.9, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 91.6, + "max_score": 95.5, + "se_min": 2.9, + "se_max": 2.0, + "effect_size": 1.1603127036, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3611628899, + "level_score_std_dev": 1.7518244458, + "se_difference": 3.5227829908, + "z_score": 1.1070792638, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.0045277872, + "gap_confidence_interval_95_upper": 10.8045277872, + "raw_n_min_group": 173, + "raw_n_max_group": 86, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "accuracy", + "demographic_factor": "Age", + "score_range": 3.0, + "min_level": "45-54", + "max_level": "65+", + "min_score": 91.7, + "max_score": 94.7, + "se_min": 2.6, + "se_max": 2.0, + "effect_size": 1.0211005056, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9380065757, + "level_score_std_dev": 0.9142392101, + "se_difference": 3.2802438934, + "z_score": 0.9145661413, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.4291598915, + "gap_confidence_interval_95_upper": 9.4291598915, + "raw_n_min_group": 83, + "raw_n_max_group": 64, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "accuracy", + "demographic_factor": "Education", + "score_range": 6.1, + "min_level": "College", + "max_level": "No College", + "min_score": 89.5, + "max_score": 95.6, + "se_min": 2.6, + "se_max": 2.2, + "effect_size": 2.0762376948, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9380065757, + "level_score_std_dev": 3.05, + "se_difference": 3.4058772732, + "z_score": 1.7910216695, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -0.5753967912, + "gap_confidence_interval_95_upper": 12.7753967912, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "accuracy", + "demographic_factor": "Ethnicity", + "score_range": 4.5, + "min_level": "Asian", + "max_level": "Hispanic", + "min_score": 92.0, + "max_score": 96.5, + "se_min": 3.1, + "se_max": 2.1, + "effect_size": 1.5316507584, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9380065757, + "level_score_std_dev": 1.7455300055, + "se_difference": 3.7443290454, + "z_score": 1.2018174539, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.8387500752, + "gap_confidence_interval_95_upper": 11.8387500752, + "raw_n_min_group": 40, + "raw_n_max_group": 34, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "accuracy", + "demographic_factor": "Politics", + "score_range": 2.6, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 92.8, + "max_score": 95.4, + "se_min": 2.4, + "se_max": 1.8, + "effect_size": 0.8849537715, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9380065757, + "level_score_std_dev": 1.1264496832, + "se_difference": 3.0, + "z_score": 0.8666666667, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.2798919536, + "gap_confidence_interval_95_upper": 8.4798919536, + "raw_n_min_group": 163, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "accuracy", + "demographic_factor": "Sex", + "score_range": 0.4, + "min_level": "Male", + "max_level": "Female", + "min_score": 93.3, + "max_score": 93.7, + "se_min": 2.4, + "se_max": 2.3, + "effect_size": 0.1361467341, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 2.9380065757, + "level_score_std_dev": 0.2, + "se_difference": 3.3241540277, + "z_score": 0.1203313675, + "p_value": 0.9772792279, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.1152221734, + "gap_confidence_interval_95_upper": 6.9152221734, + "raw_n_min_group": 251, + "raw_n_max_group": 262, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "accuracy", + "demographic_factor": "Urbanicity", + "score_range": 3.2, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 92.4, + "max_score": 95.6, + "se_min": 2.6, + "se_max": 1.9, + "effect_size": 1.0891738727, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9380065757, + "level_score_std_dev": 1.4429907215, + "se_difference": 3.2202484376, + "z_score": 0.9937121505, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.111570959, + "gap_confidence_interval_95_upper": 9.511570959, + "raw_n_min_group": 179, + "raw_n_max_group": 87, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "adaptiveness", + "demographic_factor": "Age", + "score_range": 5.3, + "min_level": "45-54", + "max_level": "65+", + "min_score": 84.1, + "max_score": 89.4, + "se_min": 3.1, + "se_max": 2.7, + "effect_size": 1.3524694976, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.9187575093, + "level_score_std_dev": 2.0188693205, + "se_difference": 4.1109609582, + "z_score": 1.2892362768, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.75733542, + "gap_confidence_interval_95_upper": 13.35733542, + "raw_n_min_group": 83, + "raw_n_max_group": 64, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "adaptiveness", + "demographic_factor": "Education", + "score_range": 2.4, + "min_level": "College", + "max_level": "No College", + "min_score": 85.7, + "max_score": 88.1, + "se_min": 2.4, + "se_max": 3.1, + "effect_size": 0.6124390178, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.9187575093, + "level_score_std_dev": 1.2, + "se_difference": 3.9204591568, + "z_score": 0.612173193, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.2839587502, + "gap_confidence_interval_95_upper": 10.0839587502, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "adaptiveness", + "demographic_factor": "Ethnicity", + "score_range": 2.1, + "min_level": "Asian", + "max_level": "Hispanic", + "min_score": 85.9, + "max_score": 88.0, + "se_min": 3.4, + "se_max": 3.6, + "effect_size": 0.5358841406, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.9187575093, + "level_score_std_dev": 0.764852927, + "se_difference": 4.9517673613, + "z_score": 0.4240910057, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.6052856879, + "gap_confidence_interval_95_upper": 11.8052856879, + "raw_n_min_group": 40, + "raw_n_max_group": 34, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "adaptiveness", + "demographic_factor": "Politics", + "score_range": 3.0, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 86.5, + "max_score": 89.5, + "se_min": 3.0, + "se_max": 2.6, + "effect_size": 0.7655487723, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.9187575093, + "level_score_std_dev": 1.3123346457, + "se_difference": 3.9698866483, + "z_score": 0.7556890828, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.7808348533, + "gap_confidence_interval_95_upper": 10.7808348533, + "raw_n_min_group": 170, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "adaptiveness", + "demographic_factor": "Sex", + "score_range": 2.5, + "min_level": "Male", + "max_level": "Female", + "min_score": 86.0, + "max_score": 88.5, + "se_min": 3.0, + "se_max": 2.7, + "effect_size": 0.6379573102, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.9187575093, + "level_score_std_dev": 1.25, + "se_difference": 4.0360872141, + "z_score": 0.6194117885, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.4105855781, + "gap_confidence_interval_95_upper": 10.4105855781, + "raw_n_min_group": 251, + "raw_n_max_group": 262, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "adaptiveness", + "demographic_factor": "Urbanicity", + "score_range": 3.1, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 86.3, + "max_score": 89.4, + "se_min": 2.8, + "se_max": 2.8, + "effect_size": 0.7910670647, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.9187575093, + "level_score_std_dev": 1.4165686241, + "se_difference": 3.9597979746, + "z_score": 0.782868222, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.6610614164, + "gap_confidence_interval_95_upper": 10.8610614164, + "raw_n_min_group": 252, + "raw_n_max_group": 87, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "background_and_culture", + "demographic_factor": "Age", + "score_range": 6.7, + "min_level": "45-54", + "max_level": "65+", + "min_score": 73.5, + "max_score": 80.2, + "se_min": 3.4, + "se_max": 3.8, + "effect_size": 1.9369477103, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.459050528, + "level_score_std_dev": 2.3144473782, + "se_difference": 5.0990195136, + "z_score": 1.3139781054, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.2938946031, + "gap_confidence_interval_95_upper": 16.6938946031, + "raw_n_min_group": 83, + "raw_n_max_group": 64, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "background_and_culture", + "demographic_factor": "Education", + "score_range": 4.0, + "min_level": "College", + "max_level": "No College", + "min_score": 75.0, + "max_score": 79.0, + "se_min": 2.8, + "se_max": 4.0, + "effect_size": 1.1563866927, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.459050528, + "level_score_std_dev": 2.0, + "se_difference": 4.8826222463, + "z_score": 0.8192319205, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.5697637528, + "gap_confidence_interval_95_upper": 13.5697637528, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "background_and_culture", + "demographic_factor": "Ethnicity", + "score_range": 3.1, + "min_level": "Asian", + "max_level": "African American", + "min_score": 76.1, + "max_score": 79.2, + "se_min": 4.1, + "se_max": 4.0, + "effect_size": 0.8961996868, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.459050528, + "level_score_std_dev": 1.1011357773, + "se_difference": 5.7280013966, + "z_score": 0.5412009854, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.1266764408, + "gap_confidence_interval_95_upper": 14.3266764408, + "raw_n_min_group": 40, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "background_and_culture", + "demographic_factor": "Politics", + "score_range": 3.6, + "min_level": "Democrat", + "max_level": "Republican", + "min_score": 75.9, + "max_score": 79.5, + "se_min": 3.7, + "se_max": 3.5, + "effect_size": 1.0407480234, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.459050528, + "level_score_std_dev": 1.4704496667, + "se_difference": 5.0931326313, + "z_score": 0.7068341354, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.3823565258, + "gap_confidence_interval_95_upper": 13.5823565258, + "raw_n_min_group": 170, + "raw_n_max_group": 163, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "background_and_culture", + "demographic_factor": "Sex", + "score_range": 0.1, + "min_level": "Male", + "max_level": "Female", + "min_score": 77.6, + "max_score": 77.7, + "se_min": 3.7, + "se_max": 3.6, + "effect_size": 0.0289096673, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.459050528, + "level_score_std_dev": 0.05, + "se_difference": 5.1623637997, + "z_score": 0.0193709711, + "p_value": 0.9924215291, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.0180471224, + "gap_confidence_interval_95_upper": 10.2180471224, + "raw_n_min_group": 251, + "raw_n_max_group": 262, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "background_and_culture", + "demographic_factor": "Urbanicity", + "score_range": 2.1, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 76.8, + "max_score": 78.9, + "se_min": 3.7, + "se_max": 3.8, + "effect_size": 0.6071030137, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.459050528, + "level_score_std_dev": 0.8956685895, + "se_difference": 5.3037722425, + "z_score": 0.395944604, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.2952025774, + "gap_confidence_interval_95_upper": 12.4952025774, + "raw_n_min_group": 179, + "raw_n_max_group": 87, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "bias_and_stereotypes", + "demographic_factor": "Age", + "score_range": 7.8, + "min_level": "18-24", + "max_level": "65+", + "min_score": 81.4, + "max_score": 89.2, + "se_min": 5.0, + "se_max": 3.6, + "effect_size": 2.8015836524, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.7841396038, + "level_score_std_dev": 2.984217746, + "se_difference": 6.1611687203, + "z_score": 1.2659935727, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.2756687945, + "gap_confidence_interval_95_upper": 19.8756687945, + "raw_n_min_group": 59, + "raw_n_max_group": 64, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "bias_and_stereotypes", + "demographic_factor": "Education", + "score_range": 1.2, + "min_level": "College", + "max_level": "No College", + "min_score": 86.0, + "max_score": 87.2, + "se_min": 3.2, + "se_max": 4.1, + "effect_size": 0.4310128696, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 2.7841396038, + "level_score_std_dev": 0.6, + "se_difference": 5.2009614496, + "z_score": 0.2307265708, + "p_value": 0.9570722129, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.9936971262, + "gap_confidence_interval_95_upper": 11.3936971262, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "bias_and_stereotypes", + "demographic_factor": "Ethnicity", + "score_range": 1.1, + "min_level": "Asian", + "max_level": "African American", + "min_score": 86.0, + "max_score": 87.1, + "se_min": 4.5, + "se_max": 4.2, + "effect_size": 0.3950951305, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 2.7841396038, + "level_score_std_dev": 0.420565096, + "se_difference": 6.1554853586, + "z_score": 0.1787023989, + "p_value": 0.9597457904, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.9645296102, + "gap_confidence_interval_95_upper": 13.1645296102, + "raw_n_min_group": 40, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "bias_and_stereotypes", + "demographic_factor": "Politics", + "score_range": 2.0, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 86.3, + "max_score": 88.3, + "se_min": 4.0, + "se_max": 3.5, + "effect_size": 0.7183547827, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 2.7841396038, + "level_score_std_dev": 0.8640987598, + "se_difference": 5.3150729064, + "z_score": 0.3762883474, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.4173514717, + "gap_confidence_interval_95_upper": 12.4173514717, + "raw_n_min_group": 170, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "bias_and_stereotypes", + "demographic_factor": "Sex", + "score_range": 1.2, + "min_level": "Male", + "max_level": "Female", + "min_score": 86.2, + "max_score": 87.4, + "se_min": 4.0, + "se_max": 3.7, + "effect_size": 0.4310128696, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 2.7841396038, + "level_score_std_dev": 0.6, + "se_difference": 5.4488530903, + "z_score": 0.2202298319, + "p_value": 0.9570722129, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.4795558141, + "gap_confidence_interval_95_upper": 11.8795558141, + "raw_n_min_group": 251, + "raw_n_max_group": 262, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "bias_and_stereotypes", + "demographic_factor": "Urbanicity", + "score_range": 3.2, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 85.4, + "max_score": 88.6, + "se_min": 4.1, + "se_max": 3.7, + "effect_size": 1.1493676523, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.7841396038, + "level_score_std_dev": 1.3199326582, + "se_difference": 5.5226805086, + "z_score": 0.5794287747, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.624254895, + "gap_confidence_interval_95_upper": 14.024254895, + "raw_n_min_group": 179, + "raw_n_max_group": 87, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "clarity", + "demographic_factor": "Age", + "score_range": 3.7, + "min_level": "45-54", + "max_level": "55-64", + "min_score": 81.8, + "max_score": 85.5, + "se_min": 4.1, + "se_max": 3.6, + "effect_size": 0.8194815002, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.5150500643, + "level_score_std_dev": 1.2203141672, + "se_difference": 5.4561891463, + "z_score": 0.6781289836, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.9939342196, + "gap_confidence_interval_95_upper": 14.3939342196, + "raw_n_min_group": 83, + "raw_n_max_group": 113, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "clarity", + "demographic_factor": "Education", + "score_range": 3.7, + "min_level": "College", + "max_level": "No College", + "min_score": 82.1, + "max_score": 85.8, + "se_min": 3.3, + "se_max": 4.2, + "effect_size": 0.8194815002, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.5150500643, + "level_score_std_dev": 1.85, + "se_difference": 5.3413481444, + "z_score": 0.6927090128, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.768849992, + "gap_confidence_interval_95_upper": 14.168849992, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "clarity", + "demographic_factor": "Ethnicity", + "score_range": 3.5, + "min_level": "White", + "max_level": "African American", + "min_score": 83.9, + "max_score": 87.4, + "se_min": 3.8, + "se_max": 3.9, + "effect_size": 0.7751852029, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 4.5150500643, + "level_score_std_dev": 1.5660459763, + "se_difference": 5.445181356, + "z_score": 0.6427701432, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.1723593471, + "gap_confidence_interval_95_upper": 14.1723593471, + "raw_n_min_group": 347, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "clarity", + "demographic_factor": "Politics", + "score_range": 2.8, + "min_level": "Republican", + "max_level": "Democrat", + "min_score": 82.9, + "max_score": 85.7, + "se_min": 4.1, + "se_max": 3.8, + "effect_size": 0.6201481623, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 4.5150500643, + "level_score_std_dev": 1.2364824661, + "se_difference": 5.5901699437, + "z_score": 0.500879227, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.1565317572, + "gap_confidence_interval_95_upper": 13.7565317572, + "raw_n_min_group": 163, + "raw_n_max_group": 170, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "clarity", + "demographic_factor": "Sex", + "score_range": 0.6, + "min_level": "Female", + "max_level": "Male", + "min_score": 84.3, + "max_score": 84.9, + "se_min": 3.9, + "se_max": 3.9, + "effect_size": 0.1328888919, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 4.5150500643, + "level_score_std_dev": 0.3, + "se_difference": 5.5154328933, + "z_score": 0.1087856586, + "p_value": 0.9775052078, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.2100498299, + "gap_confidence_interval_95_upper": 11.4100498299, + "raw_n_min_group": 262, + "raw_n_max_group": 251, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "clarity", + "demographic_factor": "Urbanicity", + "score_range": 2.2, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 83.4, + "max_score": 85.6, + "se_min": 3.9, + "se_max": 4.0, + "effect_size": 0.4872592704, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 4.5150500643, + "level_score_std_dev": 0.9416297928, + "se_difference": 5.5865910894, + "z_score": 0.3938000768, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.7495173315, + "gap_confidence_interval_95_upper": 13.1495173315, + "raw_n_min_group": 252, + "raw_n_max_group": 87, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "communication", + "demographic_factor": "Age", + "score_range": 6.7, + "min_level": "18-24", + "max_level": "65+", + "min_score": 85.0, + "max_score": 91.7, + "se_min": 3.5, + "se_max": 2.5, + "effect_size": 1.6574988789, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.0422350116, + "level_score_std_dev": 2.3292821975, + "se_difference": 4.3011626335, + "z_score": 1.5577183592, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -1.7301238534, + "gap_confidence_interval_95_upper": 15.1301238534, + "raw_n_min_group": 59, + "raw_n_max_group": 64, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "communication", + "demographic_factor": "Education", + "score_range": 4.3, + "min_level": "College", + "max_level": "No College", + "min_score": 86.9, + "max_score": 91.2, + "se_min": 2.5, + "se_max": 2.7, + "effect_size": 1.0637679372, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.0422350116, + "level_score_std_dev": 2.15, + "se_difference": 3.6796738986, + "z_score": 1.1685818142, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.9120283161, + "gap_confidence_interval_95_upper": 11.5120283161, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "communication", + "demographic_factor": "Ethnicity", + "score_range": 4.2, + "min_level": "Asian", + "max_level": "African American", + "min_score": 87.0, + "max_score": 91.2, + "se_min": 3.5, + "se_max": 2.7, + "effect_size": 1.039029148, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.0422350116, + "level_score_std_dev": 1.5321553446, + "se_difference": 4.4204072211, + "z_score": 0.9501387067, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.4638389503, + "gap_confidence_interval_95_upper": 12.8638389503, + "raw_n_min_group": 40, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "communication", + "demographic_factor": "Politics", + "score_range": 2.3, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 89.1, + "max_score": 91.4, + "se_min": 2.8, + "se_max": 2.4, + "effect_size": 0.5689921525, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 4.0422350116, + "level_score_std_dev": 0.9741092797, + "se_difference": 3.6878177829, + "z_score": 0.6236750662, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.9279900361, + "gap_confidence_interval_95_upper": 9.5279900361, + "raw_n_min_group": 170, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "communication", + "demographic_factor": "Sex", + "score_range": 1.0, + "min_level": "Male", + "max_level": "Female", + "min_score": 89.2, + "max_score": 90.2, + "se_min": 2.8, + "se_max": 2.5, + "effect_size": 0.2473878924, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 4.0422350116, + "level_score_std_dev": 0.5, + "se_difference": 3.7536648758, + "z_score": 0.2664063077, + "p_value": 0.9558772129, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.3570479666, + "gap_confidence_interval_95_upper": 8.3570479666, + "raw_n_min_group": 251, + "raw_n_max_group": 262, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "communication", + "demographic_factor": "Urbanicity", + "score_range": 2.8, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 88.7, + "max_score": 91.5, + "se_min": 2.8, + "se_max": 2.5, + "effect_size": 0.6926860986, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 4.0422350116, + "level_score_std_dev": 1.2192894105, + "se_difference": 3.7536648758, + "z_score": 0.7459376616, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.5570479666, + "gap_confidence_interval_95_upper": 10.1570479666, + "raw_n_min_group": 179, + "raw_n_max_group": 87, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "comprehensiveness", + "demographic_factor": "Age", + "score_range": 5.2, + "min_level": "45-54", + "max_level": "65+", + "min_score": 87.5, + "max_score": 92.7, + "se_min": 3.7, + "se_max": 2.9, + "effect_size": 1.4516270087, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.5821874137, + "level_score_std_dev": 1.9561867668, + "se_difference": 4.7010637094, + "z_score": 1.1061326375, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.0139155595, + "gap_confidence_interval_95_upper": 14.4139155595, + "raw_n_min_group": 83, + "raw_n_max_group": 64, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "comprehensiveness", + "demographic_factor": "Education", + "score_range": 6.8, + "min_level": "College", + "max_level": "No College", + "min_score": 85.7, + "max_score": 92.5, + "se_min": 3.4, + "se_max": 3.3, + "effect_size": 1.8982814729, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.5821874137, + "level_score_std_dev": 3.4, + "se_difference": 4.7381430962, + "z_score": 1.4351613833, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.4865898221, + "gap_confidence_interval_95_upper": 16.0865898221, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "comprehensiveness", + "demographic_factor": "Ethnicity", + "score_range": 3.3, + "min_level": "Asian", + "max_level": "African American", + "min_score": 87.8, + "max_score": 91.1, + "se_min": 4.5, + "se_max": 3.5, + "effect_size": 0.9212248325, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.5821874137, + "level_score_std_dev": 1.2932517156, + "se_difference": 5.7008771255, + "z_score": 0.5788582927, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.8735138463, + "gap_confidence_interval_95_upper": 14.4735138463, + "raw_n_min_group": 40, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "comprehensiveness", + "demographic_factor": "Politics", + "score_range": 4.8, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 89.2, + "max_score": 94.0, + "se_min": 3.5, + "se_max": 2.4, + "effect_size": 1.3399633927, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.5821874137, + "level_score_std_dev": 2.19544984, + "se_difference": 4.2438190348, + "z_score": 1.1310567111, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.5177324651, + "gap_confidence_interval_95_upper": 13.1177324651, + "raw_n_min_group": 163, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "comprehensiveness", + "demographic_factor": "Sex", + "score_range": 1.7, + "min_level": "Male", + "max_level": "Female", + "min_score": 89.3, + "max_score": 91.0, + "se_min": 3.6, + "se_max": 3.1, + "effect_size": 0.4745703682, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.5821874137, + "level_score_std_dev": 0.85, + "se_difference": 4.7507894081, + "z_score": 0.3578352678, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.611376138, + "gap_confidence_interval_95_upper": 11.011376138, + "raw_n_min_group": 251, + "raw_n_max_group": 262, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "comprehensiveness", + "demographic_factor": "Urbanicity", + "score_range": 2.5, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 88.8, + "max_score": 91.3, + "se_min": 3.6, + "se_max": 3.3, + "effect_size": 0.6978976003, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.5821874137, + "level_score_std_dev": 1.033870828, + "se_difference": 4.8836461788, + "z_score": 0.5119125974, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.0717706237, + "gap_confidence_interval_95_upper": 12.0717706237, + "raw_n_min_group": 179, + "raw_n_max_group": 87, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "confidence", + "demographic_factor": "Age", + "score_range": 6.5, + "min_level": "25-34", + "max_level": "65+", + "min_score": 84.3, + "max_score": 90.8, + "se_min": 4.3, + "se_max": 3.1, + "effect_size": 1.9767710728, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.2881905696, + "level_score_std_dev": 2.0827999317, + "se_difference": 5.3009433123, + "z_score": 1.2261968516, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.8896579762, + "gap_confidence_interval_95_upper": 16.8896579762, + "raw_n_min_group": 106, + "raw_n_max_group": 64, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "confidence", + "demographic_factor": "Education", + "score_range": 5.2, + "min_level": "College", + "max_level": "No College", + "min_score": 84.5, + "max_score": 89.7, + "se_min": 3.4, + "se_max": 3.6, + "effect_size": 1.5814168583, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.2881905696, + "level_score_std_dev": 2.6, + "se_difference": 4.9517673613, + "z_score": 1.0501301092, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.5052856879, + "gap_confidence_interval_95_upper": 14.9052856879, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "confidence", + "demographic_factor": "Ethnicity", + "score_range": 3.7, + "min_level": "Asian", + "max_level": "African American", + "min_score": 86.0, + "max_score": 89.7, + "se_min": 4.5, + "se_max": 3.6, + "effect_size": 1.1252389184, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.2881905696, + "level_score_std_dev": 1.4166421567, + "se_difference": 5.7628118137, + "z_score": 0.6420476878, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.5949036045, + "gap_confidence_interval_95_upper": 14.9949036045, + "raw_n_min_group": 40, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "confidence", + "demographic_factor": "Politics", + "score_range": 0.4, + "min_level": "Independent", + "max_level": "Republican", + "min_score": 87.7, + "max_score": 88.1, + "se_min": 3.6, + "se_max": 3.4, + "effect_size": 0.1216474506, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.2881905696, + "level_score_std_dev": 0.1632993162, + "se_difference": 4.9517673613, + "z_score": 0.0807792392, + "p_value": 0.981376128, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.3052856879, + "gap_confidence_interval_95_upper": 10.1052856879, + "raw_n_min_group": 185, + "raw_n_max_group": 163, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "confidence", + "demographic_factor": "Sex", + "score_range": 0.6, + "min_level": "Male", + "max_level": "Female", + "min_score": 87.6, + "max_score": 88.2, + "se_min": 3.6, + "se_max": 3.4, + "effect_size": 0.182471176, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.2881905696, + "level_score_std_dev": 0.3, + "se_difference": 4.9517673613, + "z_score": 0.1211688588, + "p_value": 0.9772792279, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.1052856879, + "gap_confidence_interval_95_upper": 10.3052856879, + "raw_n_min_group": 251, + "raw_n_max_group": 262, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "confidence", + "demographic_factor": "Urbanicity", + "score_range": 4.2, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 86.2, + "max_score": 90.4, + "se_min": 3.8, + "se_max": 3.2, + "effect_size": 1.2772982317, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.2881905696, + "level_score_std_dev": 1.7663521733, + "se_difference": 4.9678969393, + "z_score": 0.8454281663, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.53689908, + "gap_confidence_interval_95_upper": 13.93689908, + "raw_n_min_group": 179, + "raw_n_max_group": 87, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "consistency", + "demographic_factor": "Age", + "score_range": 4.7, + "min_level": "25-34", + "max_level": "65+", + "min_score": 89.6, + "max_score": 94.3, + "se_min": 0.0, + "se_max": 0.0, + "effect_size": 1.5787434681, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9770511137, + "level_score_std_dev": 1.7007351352, + "se_difference": 0.0, + "z_score": 9999.9, + "p_value": 0.0, + "is_statistically_significant": true, + "gap_confidence_interval_95_lower": 4.7, + "gap_confidence_interval_95_upper": 4.7, + "raw_n_min_group": 106, + "raw_n_max_group": 64, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": true + }, + { + "model": "llama-3.1-405b-instruct", + "category": "consistency", + "demographic_factor": "Education", + "score_range": 4.3, + "min_level": "College", + "max_level": "No College", + "min_score": 90.1, + "max_score": 94.4, + "se_min": 0.0, + "se_max": 0.0, + "effect_size": 1.4443823219, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9770511137, + "level_score_std_dev": 2.15, + "se_difference": 0.0, + "z_score": 9999.9, + "p_value": 0.0, + "is_statistically_significant": true, + "gap_confidence_interval_95_lower": 4.3, + "gap_confidence_interval_95_upper": 4.3, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": true + }, + { + "model": "llama-3.1-405b-instruct", + "category": "consistency", + "demographic_factor": "Ethnicity", + "score_range": 3.8, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 90.6, + "max_score": 94.4, + "se_min": 0.0, + "se_max": 0.0, + "effect_size": 1.2764308891, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9770511137, + "level_score_std_dev": 1.3683932183, + "se_difference": 0.0, + "z_score": 9999.9, + "p_value": 0.0, + "is_statistically_significant": true, + "gap_confidence_interval_95_lower": 3.8, + "gap_confidence_interval_95_upper": 3.8, + "raw_n_min_group": 34, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": true + }, + { + "model": "llama-3.1-405b-instruct", + "category": "consistency", + "demographic_factor": "Politics", + "score_range": 2.2, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 92.1, + "max_score": 94.3, + "se_min": 0.0, + "se_max": 0.0, + "effect_size": 0.7389863042, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 2.9770511137, + "level_score_std_dev": 0.9030811456, + "se_difference": 0.0, + "z_score": 9999.9, + "p_value": 0.0, + "is_statistically_significant": true, + "gap_confidence_interval_95_lower": 2.2, + "gap_confidence_interval_95_upper": 2.2, + "raw_n_min_group": 163, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "consistency", + "demographic_factor": "Sex", + "score_range": 1.8, + "min_level": "Male", + "max_level": "Female", + "min_score": 92.0, + "max_score": 93.8, + "se_min": 0.0, + "se_max": 0.0, + "effect_size": 0.604625158, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 2.9770511137, + "level_score_std_dev": 0.9, + "se_difference": 0.0, + "z_score": 9999.9, + "p_value": 0.0, + "is_statistically_significant": true, + "gap_confidence_interval_95_lower": 1.8, + "gap_confidence_interval_95_upper": 1.8, + "raw_n_min_group": 251, + "raw_n_max_group": 262, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "consistency", + "demographic_factor": "Urbanicity", + "score_range": 4.1, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 91.2, + "max_score": 95.3, + "se_min": 0.0, + "se_max": 0.0, + "effect_size": 1.3772017488, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9770511137, + "level_score_std_dev": 1.7107503227, + "se_difference": 0.0, + "z_score": 9999.9, + "p_value": 0.0, + "is_statistically_significant": true, + "gap_confidence_interval_95_lower": 4.1, + "gap_confidence_interval_95_upper": 4.1, + "raw_n_min_group": 179, + "raw_n_max_group": 87, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": true + }, + { + "model": "llama-3.1-405b-instruct", + "category": "context_memory", + "demographic_factor": "Age", + "score_range": 3.8, + "min_level": "45-54", + "max_level": "65+", + "min_score": 89.6, + "max_score": 93.4, + "se_min": 3.3, + "se_max": 2.7, + "effect_size": 1.1213297412, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3888336858, + "level_score_std_dev": 1.2143814155, + "se_difference": 4.2638011211, + "z_score": 0.8912235567, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.5568966345, + "gap_confidence_interval_95_upper": 12.1568966345, + "raw_n_min_group": 83, + "raw_n_max_group": 64, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "context_memory", + "demographic_factor": "Education", + "score_range": 6.8, + "min_level": "College", + "max_level": "No College", + "min_score": 87.1, + "max_score": 93.9, + "se_min": 3.2, + "se_max": 2.8, + "effect_size": 2.0065900633, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3888336858, + "level_score_std_dev": 3.4, + "se_difference": 4.2520583251, + "z_score": 1.5992254763, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -1.5338811773, + "gap_confidence_interval_95_upper": 15.1338811773, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "context_memory", + "demographic_factor": "Ethnicity", + "score_range": 3.3, + "min_level": "Asian", + "max_level": "Hispanic", + "min_score": 88.8, + "max_score": 92.1, + "se_min": 4.1, + "se_max": 3.7, + "effect_size": 0.9737863542, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3888336858, + "level_score_std_dev": 1.2910751334, + "se_difference": 5.5226805086, + "z_score": 0.5975359239, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.524254895, + "gap_confidence_interval_95_upper": 14.124254895, + "raw_n_min_group": 40, + "raw_n_max_group": 34, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "context_memory", + "demographic_factor": "Politics", + "score_range": 2.3, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 90.9, + "max_score": 93.2, + "se_min": 3.1, + "se_max": 2.7, + "effect_size": 0.6786995802, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.3888336858, + "level_score_std_dev": 0.9626352719, + "se_difference": 4.1109609582, + "z_score": 0.5594798937, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.75733542, + "gap_confidence_interval_95_upper": 10.35733542, + "raw_n_min_group": 163, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "context_memory", + "demographic_factor": "Sex", + "score_range": 0.1, + "min_level": "Female", + "max_level": "Male", + "min_score": 91.6, + "max_score": 91.7, + "se_min": 3.0, + "se_max": 3.0, + "effect_size": 0.0295086774, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.3888336858, + "level_score_std_dev": 0.05, + "se_difference": 4.2426406871, + "z_score": 0.023570226, + "p_value": 0.9924215291, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.2154229461, + "gap_confidence_interval_95_upper": 8.4154229461, + "raw_n_min_group": 262, + "raw_n_max_group": 251, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "context_memory", + "demographic_factor": "Urbanicity", + "score_range": 3.7, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 89.6, + "max_score": 93.3, + "se_min": 3.5, + "se_max": 2.8, + "effect_size": 1.0918210638, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3888336858, + "level_score_std_dev": 1.5195028427, + "se_difference": 4.4821869662, + "z_score": 0.8254898843, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.0849250257, + "gap_confidence_interval_95_upper": 12.4849250257, + "raw_n_min_group": 179, + "raw_n_max_group": 87, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "conversation_building", + "demographic_factor": "Age", + "score_range": 7.8, + "min_level": "45-54", + "max_level": "55-64", + "min_score": 82.5, + "max_score": 90.3, + "se_min": 4.3, + "se_max": 3.0, + "effect_size": 1.8031088083, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3258620689, + "level_score_std_dev": 2.4680739229, + "se_difference": 5.2430906916, + "z_score": 1.4876721497, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.4762689231, + "gap_confidence_interval_95_upper": 18.0762689231, + "raw_n_min_group": 83, + "raw_n_max_group": 113, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "conversation_building", + "demographic_factor": "Education", + "score_range": 5.6, + "min_level": "College", + "max_level": "No College", + "min_score": 83.7, + "max_score": 89.3, + "se_min": 3.5, + "se_max": 3.9, + "effect_size": 1.2945396573, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3258620689, + "level_score_std_dev": 2.8, + "se_difference": 5.2402290026, + "z_score": 1.0686555868, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.6706601159, + "gap_confidence_interval_95_upper": 15.8706601159, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "conversation_building", + "demographic_factor": "Ethnicity", + "score_range": 10.1, + "min_level": "Asian", + "max_level": "Hispanic", + "min_score": 82.3, + "max_score": 92.4, + "se_min": 5.3, + "se_max": 3.6, + "effect_size": 2.334794739, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3258620689, + "level_score_std_dev": 3.8583513319, + "se_difference": 6.4070273919, + "z_score": 1.5763940721, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.457542936, + "gap_confidence_interval_95_upper": 22.657542936, + "raw_n_min_group": 40, + "raw_n_max_group": 34, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "conversation_building", + "demographic_factor": "Politics", + "score_range": 1.9, + "min_level": "Republican", + "max_level": "Democrat", + "min_score": 86.3, + "max_score": 88.2, + "se_min": 3.9, + "se_max": 3.7, + "effect_size": 0.4392188123, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 4.3258620689, + "level_score_std_dev": 0.8730533902, + "se_difference": 5.3758720223, + "z_score": 0.3534310326, + "p_value": 0.9405596833, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.6365155492, + "gap_confidence_interval_95_upper": 12.4365155492, + "raw_n_min_group": 163, + "raw_n_max_group": 170, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "conversation_building", + "demographic_factor": "Sex", + "score_range": 1.5, + "min_level": "Female", + "max_level": "Male", + "min_score": 86.7, + "max_score": 88.2, + "se_min": 3.9, + "se_max": 3.6, + "effect_size": 0.3467516939, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 4.3258620689, + "level_score_std_dev": 0.75, + "se_difference": 5.3075418039, + "z_score": 0.2826167095, + "p_value": 0.9499278005, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.9025907821, + "gap_confidence_interval_95_upper": 11.9025907821, + "raw_n_min_group": 262, + "raw_n_max_group": 251, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "conversation_building", + "demographic_factor": "Urbanicity", + "score_range": 4.2, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 85.9, + "max_score": 90.1, + "se_min": 4.0, + "se_max": 3.6, + "effect_size": 0.9709047429, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3258620689, + "level_score_std_dev": 1.8372685039, + "se_difference": 5.3814496188, + "z_score": 0.7804588536, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.3474474375, + "gap_confidence_interval_95_upper": 14.7474474375, + "raw_n_min_group": 179, + "raw_n_max_group": 87, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "conversation_flow", + "demographic_factor": "Age", + "score_range": 6.9, + "min_level": "18-24", + "max_level": "55-64", + "min_score": 81.9, + "max_score": 88.8, + "se_min": 4.3, + "se_max": 2.9, + "effect_size": 1.3883315607, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.969994341, + "level_score_std_dev": 2.7535835237, + "se_difference": 5.186520992, + "z_score": 1.3303715556, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.2653943493, + "gap_confidence_interval_95_upper": 17.0653943493, + "raw_n_min_group": 59, + "raw_n_max_group": 113, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "conversation_flow", + "demographic_factor": "Education", + "score_range": 1.8, + "min_level": "College", + "max_level": "No College", + "min_score": 84.4, + "max_score": 86.2, + "se_min": 3.0, + "se_max": 3.7, + "effect_size": 0.3621734506, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 4.969994341, + "level_score_std_dev": 0.9, + "se_difference": 4.7634021455, + "z_score": 0.3778811751, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.5360966491, + "gap_confidence_interval_95_upper": 11.1360966491, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "conversation_flow", + "demographic_factor": "Ethnicity", + "score_range": 2.5, + "min_level": "White", + "max_level": "African American", + "min_score": 85.2, + "max_score": 87.7, + "se_min": 3.3, + "se_max": 3.6, + "effect_size": 0.5030186814, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 4.969994341, + "level_score_std_dev": 0.9708243919, + "se_difference": 4.8836461788, + "z_score": 0.5119125974, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.0717706237, + "gap_confidence_interval_95_upper": 12.0717706237, + "raw_n_min_group": 347, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "conversation_flow", + "demographic_factor": "Politics", + "score_range": 2.2, + "min_level": "Democrat", + "max_level": "Republican", + "min_score": 84.4, + "max_score": 86.6, + "se_min": 3.7, + "se_max": 3.3, + "effect_size": 0.4426564396, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 4.969994341, + "level_score_std_dev": 0.9741092797, + "se_difference": 4.9578221025, + "z_score": 0.4437432313, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.5171527626, + "gap_confidence_interval_95_upper": 11.9171527626, + "raw_n_min_group": 170, + "raw_n_max_group": 163, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "conversation_flow", + "demographic_factor": "Sex", + "score_range": 0.4, + "min_level": "Female", + "max_level": "Male", + "min_score": 85.4, + "max_score": 85.8, + "se_min": 3.5, + "se_max": 3.5, + "effect_size": 0.080482989, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 4.969994341, + "level_score_std_dev": 0.2, + "se_difference": 4.9497474683, + "z_score": 0.0808122036, + "p_value": 0.981376128, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.3013267704, + "gap_confidence_interval_95_upper": 10.1013267704, + "raw_n_min_group": 262, + "raw_n_max_group": 251, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "conversation_flow", + "demographic_factor": "Urbanicity", + "score_range": 2.1, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 84.5, + "max_score": 86.6, + "se_min": 3.7, + "se_max": 3.5, + "effect_size": 0.4225356924, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 4.969994341, + "level_score_std_dev": 0.8602325267, + "se_difference": 5.0931326313, + "z_score": 0.4123199123, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.8823565258, + "gap_confidence_interval_95_upper": 12.0823565258, + "raw_n_min_group": 179, + "raw_n_max_group": 87, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "cultural_awareness", + "demographic_factor": "Age", + "score_range": 6.9, + "min_level": "45-54", + "max_level": "55-64", + "min_score": 69.2, + "max_score": 76.1, + "se_min": 4.7, + "se_max": 4.4, + "effect_size": 1.8579135674, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7138433783, + "level_score_std_dev": 2.6541895603, + "se_difference": 6.4381674411, + "z_score": 1.0717335427, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.718576311, + "gap_confidence_interval_95_upper": 19.518576311, + "raw_n_min_group": 83, + "raw_n_max_group": 113, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "cultural_awareness", + "demographic_factor": "Education", + "score_range": 2.1, + "min_level": "College", + "max_level": "No College", + "min_score": 72.9, + "max_score": 75.0, + "se_min": 3.8, + "se_max": 5.2, + "effect_size": 0.5654519553, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.7138433783, + "level_score_std_dev": 1.05, + "se_difference": 6.4404968752, + "z_score": 0.3260617994, + "p_value": 0.9446615231, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.523141918, + "gap_confidence_interval_95_upper": 14.723141918, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "cultural_awareness", + "demographic_factor": "Ethnicity", + "score_range": 5.4, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 71.4, + "max_score": 76.8, + "se_min": 5.9, + "se_max": 5.1, + "effect_size": 1.4540193137, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7138433783, + "level_score_std_dev": 1.9306734576, + "se_difference": 7.7987178433, + "z_score": 0.6924215119, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.8852060985, + "gap_confidence_interval_95_upper": 20.6852060985, + "raw_n_min_group": 34, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "cultural_awareness", + "demographic_factor": "Politics", + "score_range": 4.3, + "min_level": "Democrat", + "max_level": "Republican", + "min_score": 72.4, + "max_score": 76.7, + "se_min": 4.9, + "se_max": 4.5, + "effect_size": 1.1578301942, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7138433783, + "level_score_std_dev": 1.7820088539, + "se_difference": 6.6528189514, + "z_score": 0.6463425552, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.7392855404, + "gap_confidence_interval_95_upper": 17.3392855404, + "raw_n_min_group": 170, + "raw_n_max_group": 163, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "cultural_awareness", + "demographic_factor": "Sex", + "score_range": 0.6, + "min_level": "Male", + "max_level": "Female", + "min_score": 74.0, + "max_score": 74.6, + "se_min": 4.8, + "se_max": 4.7, + "effect_size": 0.1615577015, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.7138433783, + "level_score_std_dev": 0.3, + "se_difference": 6.7178865724, + "z_score": 0.0893138033, + "p_value": 0.9801302996, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -12.5668157342, + "gap_confidence_interval_95_upper": 13.7668157342, + "raw_n_min_group": 251, + "raw_n_max_group": 262, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "cultural_awareness", + "demographic_factor": "Urbanicity", + "score_range": 3.0, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 73.4, + "max_score": 76.4, + "se_min": 4.6, + "se_max": 4.9, + "effect_size": 0.8077885076, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7138433783, + "level_score_std_dev": 1.3912424503, + "se_difference": 6.7208630398, + "z_score": 0.4463712446, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.1726495031, + "gap_confidence_interval_95_upper": 16.1726495031, + "raw_n_min_group": 252, + "raw_n_max_group": 87, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "detail_and_technical_language", + "demographic_factor": "Age", + "score_range": 7.2, + "min_level": "18-24", + "max_level": "65+", + "min_score": 84.7, + "max_score": 91.9, + "se_min": 4.7, + "se_max": 3.0, + "effect_size": 2.3009231153, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1291788726, + "level_score_std_dev": 2.5294377944, + "se_difference": 5.5758407438, + "z_score": 1.291285087, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.7284470413, + "gap_confidence_interval_95_upper": 18.1284470413, + "raw_n_min_group": 59, + "raw_n_max_group": 64, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "detail_and_technical_language", + "demographic_factor": "Education", + "score_range": 4.9, + "min_level": "College", + "max_level": "No College", + "min_score": 86.8, + "max_score": 91.7, + "se_min": 3.2, + "se_max": 3.4, + "effect_size": 1.5659060091, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1291788726, + "level_score_std_dev": 2.45, + "se_difference": 4.669047012, + "z_score": 1.0494646954, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.2511639856, + "gap_confidence_interval_95_upper": 14.0511639856, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "detail_and_technical_language", + "demographic_factor": "Ethnicity", + "score_range": 7.9, + "min_level": "Asian", + "max_level": "African American", + "min_score": 84.3, + "max_score": 92.2, + "se_min": 5.0, + "se_max": 3.1, + "effect_size": 2.5246239738, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1291788726, + "level_score_std_dev": 2.8969811874, + "se_difference": 5.883026432, + "z_score": 1.3428462529, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.6305199269, + "gap_confidence_interval_95_upper": 19.4305199269, + "raw_n_min_group": 40, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "detail_and_technical_language", + "demographic_factor": "Politics", + "score_range": 2.0, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 89.1, + "max_score": 91.1, + "se_min": 3.5, + "se_max": 3.2, + "effect_size": 0.6391453098, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.1291788726, + "level_score_std_dev": 0.8286535263, + "se_difference": 4.7423622806, + "z_score": 0.4217307497, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.2948592715, + "gap_confidence_interval_95_upper": 11.2948592715, + "raw_n_min_group": 163, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "detail_and_technical_language", + "demographic_factor": "Sex", + "score_range": 1.6, + "min_level": "Male", + "max_level": "Female", + "min_score": 89.2, + "max_score": 90.8, + "se_min": 3.6, + "se_max": 3.2, + "effect_size": 0.5113162479, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.1291788726, + "level_score_std_dev": 0.8, + "se_difference": 4.8166378315, + "z_score": 0.3321819194, + "p_value": 0.9429935514, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.8404366763, + "gap_confidence_interval_95_upper": 11.0404366763, + "raw_n_min_group": 251, + "raw_n_max_group": 262, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "detail_and_technical_language", + "demographic_factor": "Urbanicity", + "score_range": 5.8, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 87.9, + "max_score": 93.7, + "se_min": 3.8, + "se_max": 2.6, + "effect_size": 1.8535213985, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1291788726, + "level_score_std_dev": 2.5315783395, + "se_difference": 4.6043457733, + "z_score": 1.259679504, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.224351888, + "gap_confidence_interval_95_upper": 14.824351888, + "raw_n_min_group": 179, + "raw_n_max_group": 87, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "distinct_personality", + "demographic_factor": "Age", + "score_range": 6.0, + "min_level": "18-24", + "max_level": "65+", + "min_score": 71.5, + "max_score": 77.5, + "se_min": 5.0, + "se_max": 4.5, + "effect_size": 1.3878136073, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3233471472, + "level_score_std_dev": 2.3128265535, + "se_difference": 6.7268120235, + "z_score": 0.8919529755, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.1843092969, + "gap_confidence_interval_95_upper": 19.1843092969, + "raw_n_min_group": 59, + "raw_n_max_group": 64, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "distinct_personality", + "demographic_factor": "Education", + "score_range": 0.4, + "min_level": "College", + "max_level": "No College", + "min_score": 75.6, + "max_score": 76.0, + "se_min": 3.5, + "se_max": 4.8, + "effect_size": 0.0925209072, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 4.3233471472, + "level_score_std_dev": 0.2, + "se_difference": 5.9405386961, + "z_score": 0.0673339609, + "p_value": 0.9833880084, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -11.2432418931, + "gap_confidence_interval_95_upper": 12.0432418931, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "distinct_personality", + "demographic_factor": "Ethnicity", + "score_range": 6.1, + "min_level": "White", + "max_level": "African American", + "min_score": 75.0, + "max_score": 81.1, + "se_min": 4.2, + "se_max": 4.5, + "effect_size": 1.4109438341, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3233471472, + "level_score_std_dev": 2.3817010728, + "se_difference": 6.1554853586, + "z_score": 0.9909860303, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.9645296102, + "gap_confidence_interval_95_upper": 18.1645296102, + "raw_n_min_group": 347, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "distinct_personality", + "demographic_factor": "Politics", + "score_range": 5.7, + "min_level": "Independent", + "max_level": "Republican", + "min_score": 73.3, + "max_score": 79.0, + "se_min": 4.6, + "se_max": 4.1, + "effect_size": 1.3184229269, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3233471472, + "level_score_std_dev": 2.5019992006, + "se_difference": 6.1619802012, + "z_score": 0.9250273149, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.3772592678, + "gap_confidence_interval_95_upper": 17.7772592678, + "raw_n_min_group": 185, + "raw_n_max_group": 163, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "distinct_personality", + "demographic_factor": "Sex", + "score_range": 1.7, + "min_level": "Male", + "max_level": "Female", + "min_score": 75.0, + "max_score": 76.7, + "se_min": 4.5, + "se_max": 4.4, + "effect_size": 0.3932138554, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 4.3233471472, + "level_score_std_dev": 0.85, + "se_difference": 6.293647591, + "z_score": 0.2701136305, + "p_value": 0.955704864, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.6353226098, + "gap_confidence_interval_95_upper": 14.0353226098, + "raw_n_min_group": 251, + "raw_n_max_group": 262, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "distinct_personality", + "demographic_factor": "Urbanicity", + "score_range": 4.2, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 73.8, + "max_score": 78.0, + "se_min": 4.6, + "se_max": 4.5, + "effect_size": 0.9714695251, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3233471472, + "level_score_std_dev": 1.7146428199, + "se_difference": 6.4350602173, + "z_score": 0.6526745451, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.4124862642, + "gap_confidence_interval_95_upper": 16.8124862642, + "raw_n_min_group": 179, + "raw_n_max_group": 87, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "effectiveness", + "demographic_factor": "Age", + "score_range": 7.7, + "min_level": "18-24", + "max_level": "65+", + "min_score": 85.9, + "max_score": 93.6, + "se_min": 4.7, + "se_max": 2.6, + "effect_size": 1.9443845162, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.960122052, + "level_score_std_dev": 2.526800788, + "se_difference": 5.3712196008, + "z_score": 1.4335664099, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.8273969706, + "gap_confidence_interval_95_upper": 18.2273969706, + "raw_n_min_group": 59, + "raw_n_max_group": 64, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "effectiveness", + "demographic_factor": "Education", + "score_range": 5.5, + "min_level": "College", + "max_level": "No College", + "min_score": 87.2, + "max_score": 92.7, + "se_min": 3.2, + "se_max": 3.2, + "effect_size": 1.388846083, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.960122052, + "level_score_std_dev": 2.75, + "se_difference": 4.5254833996, + "z_score": 1.2153397802, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.3697844758, + "gap_confidence_interval_95_upper": 14.3697844758, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "effectiveness", + "demographic_factor": "Ethnicity", + "score_range": 4.2, + "min_level": "Asian", + "max_level": "African American", + "min_score": 88.7, + "max_score": 92.9, + "se_min": 4.3, + "se_max": 2.9, + "effect_size": 1.0605733725, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.960122052, + "level_score_std_dev": 1.5237699958, + "se_difference": 5.186520992, + "z_score": 0.8097913816, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.9653943493, + "gap_confidence_interval_95_upper": 14.3653943493, + "raw_n_min_group": 40, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "effectiveness", + "demographic_factor": "Politics", + "score_range": 1.6, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 90.5, + "max_score": 92.1, + "se_min": 3.2, + "se_max": 2.9, + "effect_size": 0.4040279514, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.960122052, + "level_score_std_dev": 0.7318166133, + "se_difference": 4.3185645763, + "z_score": 0.3704934757, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.8642310345, + "gap_confidence_interval_95_upper": 10.0642310345, + "raw_n_min_group": 163, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "effectiveness", + "demographic_factor": "Sex", + "score_range": 2.6, + "min_level": "Male", + "max_level": "Female", + "min_score": 89.5, + "max_score": 92.1, + "se_min": 3.5, + "se_max": 2.9, + "effect_size": 0.656545421, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.960122052, + "level_score_std_dev": 1.3, + "se_difference": 4.5453272709, + "z_score": 0.5720160167, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.308677749, + "gap_confidence_interval_95_upper": 11.508677749, + "raw_n_min_group": 251, + "raw_n_max_group": 262, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "effectiveness", + "demographic_factor": "Urbanicity", + "score_range": 3.5, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 88.7, + "max_score": 92.2, + "se_min": 3.7, + "se_max": 3.0, + "effect_size": 0.8838111437, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.960122052, + "level_score_std_dev": 1.5121728296, + "se_difference": 4.7634021455, + "z_score": 0.7347689515, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.8360966491, + "gap_confidence_interval_95_upper": 12.8360966491, + "raw_n_min_group": 179, + "raw_n_max_group": 87, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "ethical_alignment", + "demographic_factor": "Age", + "score_range": 8.8, + "min_level": "45-54", + "max_level": "55-64", + "min_score": 70.6, + "max_score": 79.4, + "se_min": 4.5, + "se_max": 4.1, + "effect_size": 2.3406546286, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7596319818, + "level_score_std_dev": 3.0566866593, + "se_difference": 6.0876925021, + "z_score": 1.445539504, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.1316580531, + "gap_confidence_interval_95_upper": 20.7316580531, + "raw_n_min_group": 83, + "raw_n_max_group": 113, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "ethical_alignment", + "demographic_factor": "Education", + "score_range": 1.0, + "min_level": "College", + "max_level": "No College", + "min_score": 76.3, + "max_score": 77.3, + "se_min": 3.5, + "se_max": 4.9, + "effect_size": 0.2659834805, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.7596319818, + "level_score_std_dev": 0.5, + "se_difference": 6.0216276869, + "z_score": 0.1660680553, + "p_value": 0.9610477633, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.8021733947, + "gap_confidence_interval_95_upper": 12.8021733947, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "ethical_alignment", + "demographic_factor": "Ethnicity", + "score_range": 6.7, + "min_level": "Asian", + "max_level": "African American", + "min_score": 72.8, + "max_score": 79.5, + "se_min": 5.3, + "se_max": 4.7, + "effect_size": 1.7820893195, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7596319818, + "level_score_std_dev": 2.8734778579, + "se_difference": 7.0837842994, + "z_score": 0.9458221364, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.183962101, + "gap_confidence_interval_95_upper": 20.583962101, + "raw_n_min_group": 40, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "ethical_alignment", + "demographic_factor": "Politics", + "score_range": 4.6, + "min_level": "Democrat", + "max_level": "Republican", + "min_score": 74.7, + "max_score": 79.3, + "se_min": 4.7, + "se_max": 4.3, + "effect_size": 1.2235240104, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7596319818, + "level_score_std_dev": 1.8803073035, + "se_difference": 6.3702433235, + "z_score": 0.7221074245, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.8854474867, + "gap_confidence_interval_95_upper": 17.0854474867, + "raw_n_min_group": 170, + "raw_n_max_group": 163, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "ethical_alignment", + "demographic_factor": "Sex", + "score_range": 0.0, + "min_level": "Female", + "max_level": "Female", + "min_score": 76.9, + "max_score": 76.9, + "se_min": 4.5, + "se_max": 4.5, + "effect_size": 0.0, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.7596319818, + "level_score_std_dev": 0.0, + "se_difference": 6.3639610307, + "z_score": 0.0, + "p_value": 1.0, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -12.4731344191, + "gap_confidence_interval_95_upper": 12.4731344191, + "raw_n_min_group": 262, + "raw_n_max_group": 262, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "ethical_alignment", + "demographic_factor": "Urbanicity", + "score_range": 2.4, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 76.1, + "max_score": 78.5, + "se_min": 4.6, + "se_max": 4.6, + "effect_size": 0.6383603532, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.7596319818, + "level_score_std_dev": 1.0677078252, + "se_difference": 6.5053823869, + "z_score": 0.3689252771, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.350315184, + "gap_confidence_interval_95_upper": 15.150315184, + "raw_n_min_group": 179, + "raw_n_max_group": 87, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "flexibility", + "demographic_factor": "Age", + "score_range": 7.4, + "min_level": "18-24", + "max_level": "65+", + "min_score": 85.0, + "max_score": 92.4, + "se_min": 4.3, + "se_max": 2.8, + "effect_size": 2.1665229868, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.4156111174, + "level_score_std_dev": 2.8812130933, + "se_difference": 5.1312766443, + "z_score": 1.4421362388, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.6571174175, + "gap_confidence_interval_95_upper": 17.4571174175, + "raw_n_min_group": 59, + "raw_n_max_group": 64, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "flexibility", + "demographic_factor": "Education", + "score_range": 2.1, + "min_level": "College", + "max_level": "No College", + "min_score": 89.1, + "max_score": 91.2, + "se_min": 2.6, + "se_max": 3.2, + "effect_size": 0.6148240909, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.4156111174, + "level_score_std_dev": 1.05, + "se_difference": 4.1231056256, + "z_score": 0.5093248126, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.9811385307, + "gap_confidence_interval_95_upper": 10.1811385307, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "flexibility", + "demographic_factor": "Ethnicity", + "score_range": 4.5, + "min_level": "Asian", + "max_level": "African American", + "min_score": 87.9, + "max_score": 92.4, + "se_min": 3.9, + "se_max": 3.0, + "effect_size": 1.3174801947, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.4156111174, + "level_score_std_dev": 1.8255136264, + "se_difference": 4.9203658401, + "z_score": 0.9145661413, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.1437398373, + "gap_confidence_interval_95_upper": 14.1437398373, + "raw_n_min_group": 40, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "flexibility", + "demographic_factor": "Politics", + "score_range": 3.7, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 88.9, + "max_score": 92.6, + "se_min": 3.2, + "se_max": 2.5, + "effect_size": 1.0832614934, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.4156111174, + "level_score_std_dev": 1.5121728296, + "se_difference": 4.0607881008, + "z_score": 0.9111531821, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.2589984265, + "gap_confidence_interval_95_upper": 11.6589984265, + "raw_n_min_group": 163, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "flexibility", + "demographic_factor": "Sex", + "score_range": 1.3, + "min_level": "Male", + "max_level": "Female", + "min_score": 89.8, + "max_score": 91.1, + "se_min": 3.1, + "se_max": 2.8, + "effect_size": 0.3806053896, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.4156111174, + "level_score_std_dev": 0.65, + "se_difference": 4.1773197148, + "z_score": 0.3112043341, + "p_value": 0.9469805126, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.887396193, + "gap_confidence_interval_95_upper": 9.487396193, + "raw_n_min_group": 251, + "raw_n_max_group": 262, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "flexibility", + "demographic_factor": "Urbanicity", + "score_range": 0.3, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 90.3, + "max_score": 90.6, + "se_min": 2.9, + "se_max": 3.2, + "effect_size": 0.087832013, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.4156111174, + "level_score_std_dev": 0.1414213562, + "se_difference": 4.3185645763, + "z_score": 0.0694675267, + "p_value": 0.9826361403, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.1642310345, + "gap_confidence_interval_95_upper": 8.7642310345, + "raw_n_min_group": 252, + "raw_n_max_group": 87, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "helpfulness", + "demographic_factor": "Age", + "score_range": 8.2, + "min_level": "18-24", + "max_level": "65+", + "min_score": 82.6, + "max_score": 90.8, + "se_min": 4.5, + "se_max": 3.0, + "effect_size": 2.2764256733, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.6021382539, + "level_score_std_dev": 2.9844039047, + "se_difference": 5.4083269132, + "z_score": 1.5161805363, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.4001259665, + "gap_confidence_interval_95_upper": 18.8001259665, + "raw_n_min_group": 59, + "raw_n_max_group": 64, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "helpfulness", + "demographic_factor": "Education", + "score_range": 4.2, + "min_level": "College", + "max_level": "No College", + "min_score": 85.0, + "max_score": 89.2, + "se_min": 3.0, + "se_max": 3.5, + "effect_size": 1.1659741254, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.6021382539, + "level_score_std_dev": 2.1, + "se_difference": 4.6097722286, + "z_score": 0.9111079228, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.8349875451, + "gap_confidence_interval_95_upper": 13.2349875451, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "helpfulness", + "demographic_factor": "Ethnicity", + "score_range": 5.2, + "min_level": "Asian", + "max_level": "Hispanic", + "min_score": 84.2, + "max_score": 89.4, + "se_min": 4.5, + "se_max": 3.8, + "effect_size": 1.4435870124, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.6021382539, + "level_score_std_dev": 1.8991774535, + "se_difference": 5.8898217291, + "z_score": 0.8828790139, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.3438384643, + "gap_confidence_interval_95_upper": 16.7438384643, + "raw_n_min_group": 40, + "raw_n_max_group": 34, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "helpfulness", + "demographic_factor": "Politics", + "score_range": 3.3, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 87.1, + "max_score": 90.4, + "se_min": 3.4, + "se_max": 2.9, + "effect_size": 0.9161225271, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.6021382539, + "level_score_std_dev": 1.5107025591, + "se_difference": 4.4687805943, + "z_score": 0.7384564828, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.4586490196, + "gap_confidence_interval_95_upper": 12.0586490196, + "raw_n_min_group": 163, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "helpfulness", + "demographic_factor": "Sex", + "score_range": 2.2, + "min_level": "Male", + "max_level": "Female", + "min_score": 86.6, + "max_score": 88.8, + "se_min": 3.6, + "se_max": 3.1, + "effect_size": 0.6107483514, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.6021382539, + "level_score_std_dev": 1.1, + "se_difference": 4.7507894081, + "z_score": 0.4630809348, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.111376138, + "gap_confidence_interval_95_upper": 11.511376138, + "raw_n_min_group": 251, + "raw_n_max_group": 262, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "helpfulness", + "demographic_factor": "Urbanicity", + "score_range": 1.6, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 87.2, + "max_score": 88.8, + "se_min": 3.4, + "se_max": 3.4, + "effect_size": 0.4441806192, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.6021382539, + "level_score_std_dev": 0.7118052168, + "se_difference": 4.8083261121, + "z_score": 0.3327561323, + "p_value": 0.9429935514, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.8241460056, + "gap_confidence_interval_95_upper": 11.0241460056, + "raw_n_min_group": 179, + "raw_n_max_group": 87, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "honesty_empathy_fairness", + "demographic_factor": "Age", + "score_range": 5.3, + "min_level": "45-54", + "max_level": "65+", + "min_score": 84.3, + "max_score": 89.6, + "se_min": 3.7, + "se_max": 3.2, + "effect_size": 1.3940976853, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.8017421993, + "level_score_std_dev": 1.5993922457, + "se_difference": 4.8918299235, + "z_score": 1.0834391389, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.2878104685, + "gap_confidence_interval_95_upper": 14.8878104685, + "raw_n_min_group": 83, + "raw_n_max_group": 64, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "honesty_empathy_fairness", + "demographic_factor": "Education", + "score_range": 7.3, + "min_level": "College", + "max_level": "No College", + "min_score": 83.1, + "max_score": 90.4, + "se_min": 3.2, + "se_max": 3.4, + "effect_size": 1.9201722835, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.8017421993, + "level_score_std_dev": 3.65, + "se_difference": 4.669047012, + "z_score": 1.5634882196, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -1.8511639856, + "gap_confidence_interval_95_upper": 16.4511639856, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "honesty_empathy_fairness", + "demographic_factor": "Ethnicity", + "score_range": 4.3, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 84.9, + "max_score": 89.2, + "se_min": 4.7, + "se_max": 3.5, + "effect_size": 1.1310603862, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.8017421993, + "level_score_std_dev": 1.6813313177, + "se_difference": 5.8600341296, + "z_score": 0.7337841222, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.1854558422, + "gap_confidence_interval_95_upper": 15.7854558422, + "raw_n_min_group": 34, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "honesty_empathy_fairness", + "demographic_factor": "Politics", + "score_range": 1.7, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 87.4, + "max_score": 89.1, + "se_min": 3.5, + "se_max": 3.2, + "effect_size": 0.4471634085, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.8017421993, + "level_score_std_dev": 0.7039570694, + "se_difference": 4.7423622806, + "z_score": 0.3584711373, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.5948592715, + "gap_confidence_interval_95_upper": 10.9948592715, + "raw_n_min_group": 170, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "honesty_empathy_fairness", + "demographic_factor": "Sex", + "score_range": 0.3, + "min_level": "Male", + "max_level": "Female", + "min_score": 87.8, + "max_score": 88.1, + "se_min": 3.4, + "se_max": 3.3, + "effect_size": 0.0789111897, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.8017421993, + "level_score_std_dev": 0.15, + "se_difference": 4.7381430962, + "z_score": 0.0633159434, + "p_value": 0.9844547785, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.9865898221, + "gap_confidence_interval_95_upper": 9.5865898221, + "raw_n_min_group": 251, + "raw_n_max_group": 262, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "honesty_empathy_fairness", + "demographic_factor": "Urbanicity", + "score_range": 2.7, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 86.8, + "max_score": 89.5, + "se_min": 3.6, + "se_max": 3.3, + "effect_size": 0.7102007076, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.8017421993, + "level_score_std_dev": 1.122497216, + "se_difference": 4.8836461788, + "z_score": 0.5528656052, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.8717706237, + "gap_confidence_interval_95_upper": 12.2717706237, + "raw_n_min_group": 179, + "raw_n_max_group": 87, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "intuitiveness", + "demographic_factor": "Age", + "score_range": 4.8, + "min_level": "45-54", + "max_level": "55-64", + "min_score": 85.4, + "max_score": 90.2, + "se_min": 3.6, + "se_max": 2.8, + "effect_size": 1.5492942633, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.0981848404, + "level_score_std_dev": 1.6224124698, + "se_difference": 4.5607017004, + "z_score": 1.0524696232, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.138811077, + "gap_confidence_interval_95_upper": 13.738811077, + "raw_n_min_group": 83, + "raw_n_max_group": 113, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "intuitiveness", + "demographic_factor": "Education", + "score_range": 6.4, + "min_level": "College", + "max_level": "No College", + "min_score": 84.0, + "max_score": 90.4, + "se_min": 3.2, + "se_max": 3.4, + "effect_size": 2.0657256844, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.0981848404, + "level_score_std_dev": 3.2, + "se_difference": 4.669047012, + "z_score": 1.370729398, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.7511639856, + "gap_confidence_interval_95_upper": 15.5511639856, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "intuitiveness", + "demographic_factor": "Ethnicity", + "score_range": 5.3, + "min_level": "Asian", + "max_level": "African American", + "min_score": 86.6, + "max_score": 91.9, + "se_min": 4.1, + "se_max": 3.0, + "effect_size": 1.7106790824, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.0981848404, + "level_score_std_dev": 1.9836834425, + "se_difference": 5.0803543184, + "z_score": 1.0432343234, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.6573114927, + "gap_confidence_interval_95_upper": 15.2573114927, + "raw_n_min_group": 40, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "intuitiveness", + "demographic_factor": "Politics", + "score_range": 1.7, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 87.8, + "max_score": 89.5, + "se_min": 3.3, + "se_max": 3.1, + "effect_size": 0.5487083849, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.0981848404, + "level_score_std_dev": 0.740870359, + "se_difference": 4.5276925691, + "z_score": 0.3754671887, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.1741143684, + "gap_confidence_interval_95_upper": 10.5741143684, + "raw_n_min_group": 163, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "intuitiveness", + "demographic_factor": "Sex", + "score_range": 1.4, + "min_level": "Male", + "max_level": "Female", + "min_score": 87.5, + "max_score": 88.9, + "se_min": 3.5, + "se_max": 3.2, + "effect_size": 0.4518774935, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.0981848404, + "level_score_std_dev": 0.7, + "se_difference": 4.7423622806, + "z_score": 0.2952115248, + "p_value": 0.9469805126, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.8948592715, + "gap_confidence_interval_95_upper": 10.6948592715, + "raw_n_min_group": 251, + "raw_n_max_group": 262, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "intuitiveness", + "demographic_factor": "Urbanicity", + "score_range": 5.0, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 86.2, + "max_score": 91.2, + "se_min": 3.7, + "se_max": 3.0, + "effect_size": 1.613848191, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.0981848404, + "level_score_std_dev": 2.094967515, + "se_difference": 4.7634021455, + "z_score": 1.0496699307, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.3360966491, + "gap_confidence_interval_95_upper": 14.3360966491, + "raw_n_min_group": 179, + "raw_n_max_group": 87, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "personality", + "demographic_factor": "Age", + "score_range": 3.2, + "min_level": "18-24", + "max_level": "65+", + "min_score": 74.8, + "max_score": 78.0, + "se_min": 3.4, + "se_max": 3.3, + "effect_size": 0.9508783538, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.365309545, + "level_score_std_dev": 1.1653564071, + "se_difference": 4.7381430962, + "z_score": 0.6753700627, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.0865898221, + "gap_confidence_interval_95_upper": 12.4865898221, + "raw_n_min_group": 59, + "raw_n_max_group": 64, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "personality", + "demographic_factor": "Education", + "score_range": 1.1, + "min_level": "No College", + "max_level": "College", + "min_score": 76.8, + "max_score": 77.9, + "se_min": 3.5, + "se_max": 2.6, + "effect_size": 0.3268644341, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.365309545, + "level_score_std_dev": 0.55, + "se_difference": 4.3600458713, + "z_score": 0.2522909236, + "p_value": 0.9570722129, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.4455328787, + "gap_confidence_interval_95_upper": 9.6455328787, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "personality", + "demographic_factor": "Ethnicity", + "score_range": 3.7, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 76.4, + "max_score": 80.1, + "se_min": 3.9, + "se_max": 3.6, + "effect_size": 1.0994530965, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.365309545, + "level_score_std_dev": 1.4300349646, + "se_difference": 5.3075418039, + "z_score": 0.6971212167, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.7025907821, + "gap_confidence_interval_95_upper": 14.1025907821, + "raw_n_min_group": 34, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "personality", + "demographic_factor": "Politics", + "score_range": 2.9, + "min_level": "Independent", + "max_level": "Republican", + "min_score": 75.5, + "max_score": 78.4, + "se_min": 3.2, + "se_max": 3.1, + "effect_size": 0.8617335081, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.365309545, + "level_score_std_dev": 1.1860297916, + "se_difference": 4.455333882, + "z_score": 0.6509052019, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.8322939478, + "gap_confidence_interval_95_upper": 11.6322939478, + "raw_n_min_group": 185, + "raw_n_max_group": 163, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "personality", + "demographic_factor": "Sex", + "score_range": 0.9, + "min_level": "Male", + "max_level": "Female", + "min_score": 76.7, + "max_score": 77.6, + "se_min": 3.2, + "se_max": 3.2, + "effect_size": 0.267434537, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.365309545, + "level_score_std_dev": 0.45, + "se_difference": 4.5254833996, + "z_score": 0.1988737822, + "p_value": 0.9597457904, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.9697844758, + "gap_confidence_interval_95_upper": 9.7697844758, + "raw_n_min_group": 251, + "raw_n_max_group": 262, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "personality", + "demographic_factor": "Urbanicity", + "score_range": 1.1, + "min_level": "Rural", + "max_level": "Suburban", + "min_score": 76.5, + "max_score": 77.6, + "se_min": 3.3, + "se_max": 3.1, + "effect_size": 0.3268644341, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.365309545, + "level_score_std_dev": 0.4642796092, + "se_difference": 4.5276925691, + "z_score": 0.2429493574, + "p_value": 0.9570722129, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.7741143684, + "gap_confidence_interval_95_upper": 9.9741143684, + "raw_n_min_group": 87, + "raw_n_max_group": 252, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "personality_consistency", + "demographic_factor": "Age", + "score_range": 4.5, + "min_level": "25-34", + "max_level": "55-64", + "min_score": 85.4, + "max_score": 89.9, + "se_min": 3.7, + "se_max": 2.7, + "effect_size": 1.4097081489, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1921500939, + "level_score_std_dev": 1.4313940369, + "se_difference": 4.5803929962, + "z_score": 0.9824484501, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.4774053077, + "gap_confidence_interval_95_upper": 13.4774053077, + "raw_n_min_group": 106, + "raw_n_max_group": 113, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "personality_consistency", + "demographic_factor": "Education", + "score_range": 6.1, + "min_level": "College", + "max_level": "No College", + "min_score": 83.0, + "max_score": 89.1, + "se_min": 3.1, + "se_max": 3.5, + "effect_size": 1.910937713, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1921500939, + "level_score_std_dev": 3.05, + "se_difference": 4.675467891, + "z_score": 1.3046822569, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.0637486773, + "gap_confidence_interval_95_upper": 15.2637486773, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "personality_consistency", + "demographic_factor": "Ethnicity", + "score_range": 1.3, + "min_level": "Asian", + "max_level": "White", + "min_score": 85.9, + "max_score": 87.2, + "se_min": 4.0, + "se_max": 3.1, + "effect_size": 0.4072490208, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.1921500939, + "level_score_std_dev": 0.5402545696, + "se_difference": 5.0606323716, + "z_score": 0.256884892, + "p_value": 0.9570722129, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.6186571872, + "gap_confidence_interval_95_upper": 11.2186571872, + "raw_n_min_group": 40, + "raw_n_max_group": 347, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "personality_consistency", + "demographic_factor": "Politics", + "score_range": 2.5, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 86.4, + "max_score": 88.9, + "se_min": 3.5, + "se_max": 3.1, + "effect_size": 0.7831711938, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.1921500939, + "level_score_std_dev": 1.0964589469, + "se_difference": 4.675467891, + "z_score": 0.534705843, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.6637486773, + "gap_confidence_interval_95_upper": 11.6637486773, + "raw_n_min_group": 170, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "personality_consistency", + "demographic_factor": "Sex", + "score_range": 0.9, + "min_level": "Male", + "max_level": "Female", + "min_score": 86.6, + "max_score": 87.5, + "se_min": 3.4, + "se_max": 3.3, + "effect_size": 0.2819416298, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.1921500939, + "level_score_std_dev": 0.45, + "se_difference": 4.7381430962, + "z_score": 0.1899478301, + "p_value": 0.9597457904, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.3865898221, + "gap_confidence_interval_95_upper": 10.1865898221, + "raw_n_min_group": 251, + "raw_n_max_group": 262, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "personality_consistency", + "demographic_factor": "Urbanicity", + "score_range": 4.4, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 85.1, + "max_score": 89.5, + "se_min": 3.6, + "se_max": 3.1, + "effect_size": 1.3783813012, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1921500939, + "level_score_std_dev": 1.8184242263, + "se_difference": 4.7507894081, + "z_score": 0.9261618695, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.911376138, + "gap_confidence_interval_95_upper": 13.711376138, + "raw_n_min_group": 179, + "raw_n_max_group": 87, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "tone_and_language_style", + "demographic_factor": "Age", + "score_range": 9.1, + "min_level": "45-54", + "max_level": "65+", + "min_score": 84.8, + "max_score": 93.9, + "se_min": 0.0, + "se_max": 0.0, + "effect_size": 2.7248155211, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.339675633, + "level_score_std_dev": 3.367326668, + "se_difference": 0.0, + "z_score": 9999.9, + "p_value": 0.0, + "is_statistically_significant": true, + "gap_confidence_interval_95_lower": 9.1, + "gap_confidence_interval_95_upper": 9.1, + "raw_n_min_group": 83, + "raw_n_max_group": 64, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": true + }, + { + "model": "llama-3.1-405b-instruct", + "category": "tone_and_language_style", + "demographic_factor": "Education", + "score_range": 4.0, + "min_level": "College", + "max_level": "No College", + "min_score": 87.7, + "max_score": 91.7, + "se_min": 0.0, + "se_max": 0.0, + "effect_size": 1.1977211082, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.339675633, + "level_score_std_dev": 2.0, + "se_difference": 0.0, + "z_score": 9999.9, + "p_value": 0.0, + "is_statistically_significant": true, + "gap_confidence_interval_95_lower": 4.0, + "gap_confidence_interval_95_upper": 4.0, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": true + }, + { + "model": "llama-3.1-405b-instruct", + "category": "tone_and_language_style", + "demographic_factor": "Ethnicity", + "score_range": 4.7, + "min_level": "Asian", + "max_level": "African American", + "min_score": 87.6, + "max_score": 92.3, + "se_min": 0.0, + "se_max": 0.0, + "effect_size": 1.4073223021, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.339675633, + "level_score_std_dev": 2.016649449, + "se_difference": 0.0, + "z_score": 9999.9, + "p_value": 0.0, + "is_statistically_significant": true, + "gap_confidence_interval_95_lower": 4.7, + "gap_confidence_interval_95_upper": 4.7, + "raw_n_min_group": 40, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": true + }, + { + "model": "llama-3.1-405b-instruct", + "category": "tone_and_language_style", + "demographic_factor": "Politics", + "score_range": 1.1, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 89.8, + "max_score": 90.9, + "se_min": 0.0, + "se_max": 0.0, + "effect_size": 0.3293733047, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.339675633, + "level_score_std_dev": 0.4642796092, + "se_difference": 0.0, + "z_score": 9999.9, + "p_value": 0.0, + "is_statistically_significant": true, + "gap_confidence_interval_95_lower": 1.1, + "gap_confidence_interval_95_upper": 1.1, + "raw_n_min_group": 170, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "tone_and_language_style", + "demographic_factor": "Sex", + "score_range": 0.2, + "min_level": "Male", + "max_level": "Female", + "min_score": 90.2, + "max_score": 90.4, + "se_min": 0.0, + "se_max": 0.0, + "effect_size": 0.0598860554, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.339675633, + "level_score_std_dev": 0.1, + "se_difference": 0.0, + "z_score": 9999.9, + "p_value": 0.0, + "is_statistically_significant": true, + "gap_confidence_interval_95_lower": 0.2, + "gap_confidence_interval_95_upper": 0.2, + "raw_n_min_group": 251, + "raw_n_max_group": 262, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "tone_and_language_style", + "demographic_factor": "Urbanicity", + "score_range": 1.8, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 89.1, + "max_score": 90.9, + "se_min": 0.0, + "se_max": 0.0, + "effect_size": 0.5389744987, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.339675633, + "level_score_std_dev": 0.8259674462, + "se_difference": 0.0, + "z_score": 9999.9, + "p_value": 0.0, + "is_statistically_significant": true, + "gap_confidence_interval_95_lower": 1.8, + "gap_confidence_interval_95_upper": 1.8, + "raw_n_min_group": 179, + "raw_n_max_group": 87, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "transparency", + "demographic_factor": "Age", + "score_range": 7.1, + "min_level": "18-24", + "max_level": "25-34", + "min_score": 74.7, + "max_score": 81.8, + "se_min": 5.3, + "se_max": 4.4, + "effect_size": 1.5729640036, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.5137714429, + "level_score_std_dev": 2.3589663461, + "se_difference": 6.8883960397, + "z_score": 1.0307189016, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.4010081491, + "gap_confidence_interval_95_upper": 20.6010081491, + "raw_n_min_group": 59, + "raw_n_max_group": 106, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "transparency", + "demographic_factor": "Education", + "score_range": 7.4, + "min_level": "College", + "max_level": "No College", + "min_score": 74.6, + "max_score": 82.0, + "se_min": 4.1, + "se_max": 4.7, + "effect_size": 1.6394272713, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.5137714429, + "level_score_std_dev": 3.7, + "se_difference": 6.2369864518, + "z_score": 1.1864704304, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.8242688176, + "gap_confidence_interval_95_upper": 19.6242688176, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "transparency", + "demographic_factor": "Ethnicity", + "score_range": 9.8, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 77.1, + "max_score": 86.9, + "se_min": 6.1, + "se_max": 3.9, + "effect_size": 2.1711334134, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.5137714429, + "level_score_std_dev": 3.7543308325, + "se_difference": 7.240165744, + "z_score": 1.3535601734, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.3904641003, + "gap_confidence_interval_95_upper": 23.9904641003, + "raw_n_min_group": 34, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "transparency", + "demographic_factor": "Politics", + "score_range": 3.7, + "min_level": "Independent", + "max_level": "Democrat", + "min_score": 77.3, + "max_score": 81.0, + "se_min": 4.8, + "se_max": 4.4, + "effect_size": 0.8197136357, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.5137714429, + "level_score_std_dev": 1.5253414918, + "se_difference": 6.5115282384, + "z_score": 0.5682229831, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.0623608317, + "gap_confidence_interval_95_upper": 16.4623608317, + "raw_n_min_group": 185, + "raw_n_max_group": 170, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "transparency", + "demographic_factor": "Sex", + "score_range": 1.6, + "min_level": "Male", + "max_level": "Female", + "min_score": 78.6, + "max_score": 80.2, + "se_min": 4.7, + "se_max": 4.4, + "effect_size": 0.3544707614, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 4.5137714429, + "level_score_std_dev": 0.8, + "se_difference": 6.4381674411, + "z_score": 0.2485179229, + "p_value": 0.9570722129, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -11.018576311, + "gap_confidence_interval_95_upper": 14.218576311, + "raw_n_min_group": 251, + "raw_n_max_group": 262, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "transparency", + "demographic_factor": "Urbanicity", + "score_range": 4.2, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 77.4, + "max_score": 81.6, + "se_min": 4.5, + "se_max": 4.5, + "effect_size": 0.9304857486, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.5137714429, + "level_score_std_dev": 1.7378147197, + "se_difference": 6.3639610307, + "z_score": 0.6599663291, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.2731344191, + "gap_confidence_interval_95_upper": 16.6731344191, + "raw_n_min_group": 252, + "raw_n_max_group": 87, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "trustworthiness", + "demographic_factor": "Age", + "score_range": 6.0, + "min_level": "18-24", + "max_level": "65+", + "min_score": 83.6, + "max_score": 89.6, + "se_min": 3.9, + "se_max": 3.0, + "effect_size": 1.8961723836, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1642692679, + "level_score_std_dev": 2.3021728866, + "se_difference": 4.9203658401, + "z_score": 1.2194215217, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.6437398373, + "gap_confidence_interval_95_upper": 15.6437398373, + "raw_n_min_group": 59, + "raw_n_max_group": 64, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "trustworthiness", + "demographic_factor": "Education", + "score_range": 3.4, + "min_level": "College", + "max_level": "No College", + "min_score": 84.6, + "max_score": 88.0, + "se_min": 2.8, + "se_max": 3.4, + "effect_size": 1.074497684, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1642692679, + "level_score_std_dev": 1.7, + "se_difference": 4.4045431091, + "z_score": 0.7719302356, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.2327458622, + "gap_confidence_interval_95_upper": 12.0327458622, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "trustworthiness", + "demographic_factor": "Ethnicity", + "score_range": 2.6, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 84.7, + "max_score": 87.3, + "se_min": 4.3, + "se_max": 3.5, + "effect_size": 0.8216746995, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1642692679, + "level_score_std_dev": 1.1453711189, + "se_difference": 5.5443665103, + "z_score": 0.4689444674, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.2667586772, + "gap_confidence_interval_95_upper": 13.4667586772, + "raw_n_min_group": 34, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "trustworthiness", + "demographic_factor": "Politics", + "score_range": 3.0, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 85.8, + "max_score": 88.8, + "se_min": 3.4, + "se_max": 2.9, + "effect_size": 0.9480861918, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1642692679, + "level_score_std_dev": 1.2283683848, + "se_difference": 4.4687805943, + "z_score": 0.6713240753, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.7586490196, + "gap_confidence_interval_95_upper": 11.7586490196, + "raw_n_min_group": 170, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "trustworthiness", + "demographic_factor": "Sex", + "score_range": 1.0, + "min_level": "Male", + "max_level": "Female", + "min_score": 86.3, + "max_score": 87.3, + "se_min": 3.3, + "se_max": 3.1, + "effect_size": 0.3160287306, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.1642692679, + "level_score_std_dev": 0.5, + "se_difference": 4.5276925691, + "z_score": 0.2208630521, + "p_value": 0.9570722129, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.8741143684, + "gap_confidence_interval_95_upper": 9.8741143684, + "raw_n_min_group": 251, + "raw_n_max_group": 262, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "trustworthiness", + "demographic_factor": "Urbanicity", + "score_range": 3.3, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 85.3, + "max_score": 88.6, + "se_min": 3.4, + "se_max": 3.1, + "effect_size": 1.042894811, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1642692679, + "level_score_std_dev": 1.352364185, + "se_difference": 4.6010868281, + "z_score": 0.7172218485, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.7179644729, + "gap_confidence_interval_95_upper": 12.3179644729, + "raw_n_min_group": 179, + "raw_n_max_group": 87, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "understanding", + "demographic_factor": "Age", + "score_range": 7.6, + "min_level": "18-24", + "max_level": "55-64", + "min_score": 83.4, + "max_score": 91.0, + "se_min": 3.6, + "se_max": 2.3, + "effect_size": 2.561235594, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9673178124, + "level_score_std_dev": 2.5983434894, + "se_difference": 4.2720018727, + "z_score": 1.7790254374, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -0.7729698123, + "gap_confidence_interval_95_upper": 15.9729698123, + "raw_n_min_group": 59, + "raw_n_max_group": 113, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "understanding", + "demographic_factor": "Education", + "score_range": 2.1, + "min_level": "College", + "max_level": "No College", + "min_score": 87.4, + "max_score": 89.5, + "se_min": 2.4, + "se_max": 3.0, + "effect_size": 0.7077098352, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 2.9673178124, + "level_score_std_dev": 1.05, + "se_difference": 3.8418745425, + "z_score": 0.5466081666, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.4299357363, + "gap_confidence_interval_95_upper": 9.6299357363, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "understanding", + "demographic_factor": "Ethnicity", + "score_range": 4.3, + "min_level": "Asian", + "max_level": "Hispanic", + "min_score": 86.9, + "max_score": 91.2, + "se_min": 3.5, + "se_max": 3.1, + "effect_size": 1.4491201387, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9673178124, + "level_score_std_dev": 1.5540270268, + "se_difference": 4.675467891, + "z_score": 0.9196940499, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.8637486773, + "gap_confidence_interval_95_upper": 13.4637486773, + "raw_n_min_group": 40, + "raw_n_max_group": 34, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "understanding", + "demographic_factor": "Politics", + "score_range": 2.7, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 88.2, + "max_score": 90.9, + "se_min": 2.9, + "se_max": 2.5, + "effect_size": 0.9099126452, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9673178124, + "level_score_std_dev": 1.2283683848, + "se_difference": 3.8288379438, + "z_score": 0.7051747918, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.8043844725, + "gap_confidence_interval_95_upper": 10.2043844725, + "raw_n_min_group": 170, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "understanding", + "demographic_factor": "Sex", + "score_range": 0.7, + "min_level": "Male", + "max_level": "Female", + "min_score": 88.4, + "max_score": 89.1, + "se_min": 2.9, + "se_max": 2.7, + "effect_size": 0.2359032784, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 2.9673178124, + "level_score_std_dev": 0.35, + "se_difference": 3.9623225512, + "z_score": 0.1766640628, + "p_value": 0.9597457904, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.0660094955, + "gap_confidence_interval_95_upper": 8.4660094955, + "raw_n_min_group": 251, + "raw_n_max_group": 262, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "understanding", + "demographic_factor": "Urbanicity", + "score_range": 3.8, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 87.5, + "max_score": 91.3, + "se_min": 3.0, + "se_max": 2.6, + "effect_size": 1.280617797, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9673178124, + "level_score_std_dev": 1.7048949137, + "se_difference": 3.9698866483, + "z_score": 0.9572061715, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.9808348533, + "gap_confidence_interval_95_upper": 11.5808348533, + "raw_n_min_group": 179, + "raw_n_max_group": 87, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "usefulness", + "demographic_factor": "Age", + "score_range": 7.7, + "min_level": "45-54", + "max_level": "65+", + "min_score": 86.4, + "max_score": 94.1, + "se_min": 3.9, + "se_max": 2.5, + "effect_size": 2.2908737994, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3611628899, + "level_score_std_dev": 2.4383509911, + "se_difference": 4.6324939288, + "z_score": 1.6621716333, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -1.379521259, + "gap_confidence_interval_95_upper": 16.779521259, + "raw_n_min_group": 83, + "raw_n_max_group": 64, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "usefulness", + "demographic_factor": "Education", + "score_range": 5.1, + "min_level": "College", + "max_level": "No College", + "min_score": 87.2, + "max_score": 92.3, + "se_min": 3.2, + "se_max": 3.2, + "effect_size": 1.517331997, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3611628899, + "level_score_std_dev": 2.55, + "se_difference": 4.5254833996, + "z_score": 1.1269514325, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.7697844758, + "gap_confidence_interval_95_upper": 13.9697844758, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "usefulness", + "demographic_factor": "Ethnicity", + "score_range": 7.2, + "min_level": "Asian", + "max_level": "Hispanic", + "min_score": 85.8, + "max_score": 93.0, + "se_min": 4.7, + "se_max": 3.4, + "effect_size": 2.1421157605, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3611628899, + "level_score_std_dev": 2.8262165522, + "se_difference": 5.8008620049, + "z_score": 1.2411948421, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.1694806089, + "gap_confidence_interval_95_upper": 18.5694806089, + "raw_n_min_group": 40, + "raw_n_max_group": 34, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "usefulness", + "demographic_factor": "Politics", + "score_range": 1.4, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 90.1, + "max_score": 91.5, + "se_min": 3.2, + "se_max": 3.0, + "effect_size": 0.416522509, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.3611628899, + "level_score_std_dev": 0.5792715732, + "se_difference": 4.3863424399, + "z_score": 0.3191725268, + "p_value": 0.9469805126, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.197073206, + "gap_confidence_interval_95_upper": 9.997073206, + "raw_n_min_group": 163, + "raw_n_max_group": 185, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "usefulness", + "demographic_factor": "Sex", + "score_range": 1.5, + "min_level": "Male", + "max_level": "Female", + "min_score": 89.8, + "max_score": 91.3, + "se_min": 3.4, + "se_max": 3.1, + "effect_size": 0.4462741168, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.3611628899, + "level_score_std_dev": 0.75, + "se_difference": 4.6010868281, + "z_score": 0.3260099311, + "p_value": 0.9446615231, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.5179644729, + "gap_confidence_interval_95_upper": 10.5179644729, + "raw_n_min_group": 251, + "raw_n_max_group": 262, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "llama-3.1-405b-instruct", + "category": "usefulness", + "demographic_factor": "Urbanicity", + "score_range": 2.2, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 89.8, + "max_score": 92.0, + "se_min": 3.2, + "se_max": 3.1, + "effect_size": 0.6545353713, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.3611628899, + "level_score_std_dev": 0.956846673, + "se_difference": 4.455333882, + "z_score": 0.4937901532, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.5322939478, + "gap_confidence_interval_95_upper": 10.9322939478, + "raw_n_min_group": 252, + "raw_n_max_group": 87, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "accuracy", + "demographic_factor": "Age", + "score_range": 7.3, + "min_level": "45-54", + "max_level": "35-44", + "min_score": 84.1, + "max_score": 91.4, + "se_min": 4.0, + "se_max": 2.9, + "effect_size": 2.484677897, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9380065757, + "level_score_std_dev": 2.2662377045, + "se_difference": 4.9406477308, + "z_score": 1.4775390592, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.3834916127, + "gap_confidence_interval_95_upper": 16.9834916127, + "raw_n_min_group": 69, + "raw_n_max_group": 72, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "accuracy", + "demographic_factor": "Education", + "score_range": 1.6, + "min_level": "College", + "max_level": "No College", + "min_score": 86.8, + "max_score": 88.4, + "se_min": 3.0, + "se_max": 3.9, + "effect_size": 0.5445869363, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 2.9380065757, + "level_score_std_dev": 0.8, + "se_difference": 4.9203658401, + "z_score": 0.3251790725, + "p_value": 0.9446615231, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.0437398373, + "gap_confidence_interval_95_upper": 11.2437398373, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "accuracy", + "demographic_factor": "Ethnicity", + "score_range": 5.9, + "min_level": "Hispanic", + "max_level": "White", + "min_score": 82.7, + "max_score": 88.6, + "se_min": 5.2, + "se_max": 3.2, + "effect_size": 2.0081643277, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9380065757, + "level_score_std_dev": 2.3054283767, + "se_difference": 6.105735009, + "z_score": 0.9663046286, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.0670207168, + "gap_confidence_interval_95_upper": 17.8670207168, + "raw_n_min_group": 30, + "raw_n_max_group": 285, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "accuracy", + "demographic_factor": "Politics", + "score_range": 2.4, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 87.0, + "max_score": 89.4, + "se_min": 3.8, + "se_max": 3.4, + "effect_size": 0.8168804045, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9380065757, + "level_score_std_dev": 0.9809292646, + "se_difference": 5.0990195136, + "z_score": 0.4706787243, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.5938946031, + "gap_confidence_interval_95_upper": 12.3938946031, + "raw_n_min_group": 137, + "raw_n_max_group": 154, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "accuracy", + "demographic_factor": "Sex", + "score_range": 1.5, + "min_level": "Male", + "max_level": "Female", + "min_score": 87.1, + "max_score": 88.6, + "se_min": 3.8, + "se_max": 3.4, + "effect_size": 0.5105502528, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 2.9380065757, + "level_score_std_dev": 0.75, + "se_difference": 5.0990195136, + "z_score": 0.2941742027, + "p_value": 0.9469805126, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.4938946031, + "gap_confidence_interval_95_upper": 11.4938946031, + "raw_n_min_group": 207, + "raw_n_max_group": 222, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "accuracy", + "demographic_factor": "Urbanicity", + "score_range": 3.4, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 86.6, + "max_score": 90.0, + "se_min": 3.9, + "se_max": 3.4, + "effect_size": 1.1572472397, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9380065757, + "level_score_std_dev": 1.4817407181, + "se_difference": 5.1739733281, + "z_score": 0.6571352004, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.7408013801, + "gap_confidence_interval_95_upper": 13.5408013801, + "raw_n_min_group": 155, + "raw_n_max_group": 74, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "adaptiveness", + "demographic_factor": "Age", + "score_range": 4.4, + "min_level": "18-24", + "max_level": "55-64", + "min_score": 81.4, + "max_score": 85.8, + "se_min": 4.0, + "se_max": 3.3, + "effect_size": 1.122804866, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.9187575093, + "level_score_std_dev": 1.8892238265, + "se_difference": 5.185556865, + "z_score": 0.8485106064, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.7635046952, + "gap_confidence_interval_95_upper": 14.5635046952, + "raw_n_min_group": 52, + "raw_n_max_group": 95, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "adaptiveness", + "demographic_factor": "Education", + "score_range": 3.6, + "min_level": "College", + "max_level": "No College", + "min_score": 82.0, + "max_score": 85.6, + "se_min": 3.1, + "se_max": 3.8, + "effect_size": 0.9186585267, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.9187575093, + "level_score_std_dev": 1.8, + "se_difference": 4.9040799341, + "z_score": 0.7340826513, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.0118200481, + "gap_confidence_interval_95_upper": 13.2118200481, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "adaptiveness", + "demographic_factor": "Ethnicity", + "score_range": 5.9, + "min_level": "Asian", + "max_level": "African American", + "min_score": 80.3, + "max_score": 86.2, + "se_min": 4.6, + "se_max": 3.8, + "effect_size": 1.5055792521, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.9187575093, + "level_score_std_dev": 2.1505813168, + "se_difference": 5.9665735561, + "z_score": 0.9888422467, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.794269281, + "gap_confidence_interval_95_upper": 17.594269281, + "raw_n_min_group": 34, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "adaptiveness", + "demographic_factor": "Politics", + "score_range": 5.2, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 82.3, + "max_score": 87.5, + "se_min": 3.9, + "se_max": 3.3, + "effect_size": 1.3269512052, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.9187575093, + "level_score_std_dev": 2.1312489818, + "se_difference": 5.1088159098, + "z_score": 1.0178483805, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.8130951868, + "gap_confidence_interval_95_upper": 15.2130951868, + "raw_n_min_group": 137, + "raw_n_max_group": 154, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "adaptiveness", + "demographic_factor": "Sex", + "score_range": 1.4, + "min_level": "Male", + "max_level": "Female", + "min_score": 83.7, + "max_score": 85.1, + "se_min": 3.7, + "se_max": 3.5, + "effect_size": 0.3572560937, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.9187575093, + "level_score_std_dev": 0.7, + "se_difference": 5.0931326313, + "z_score": 0.2748799416, + "p_value": 0.9548678911, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.5823565258, + "gap_confidence_interval_95_upper": 11.3823565258, + "raw_n_min_group": 207, + "raw_n_max_group": 222, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "adaptiveness", + "demographic_factor": "Urbanicity", + "score_range": 3.5, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 83.3, + "max_score": 86.8, + "se_min": 3.8, + "se_max": 3.5, + "effect_size": 0.8931402343, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.9187575093, + "level_score_std_dev": 1.604853749, + "se_difference": 5.1662365412, + "z_score": 0.6774757548, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.6256375564, + "gap_confidence_interval_95_upper": 13.6256375564, + "raw_n_min_group": 155, + "raw_n_max_group": 74, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "background_and_culture", + "demographic_factor": "Age", + "score_range": 5.6, + "min_level": "18-24", + "max_level": "65+", + "min_score": 69.2, + "max_score": 74.8, + "se_min": 3.7, + "se_max": 3.9, + "effect_size": 1.6189413698, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.459050528, + "level_score_std_dev": 2.1414558911, + "se_difference": 5.3758720223, + "z_score": 1.0416914645, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.9365155492, + "gap_confidence_interval_95_upper": 16.1365155492, + "raw_n_min_group": 52, + "raw_n_max_group": 54, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "background_and_culture", + "demographic_factor": "Education", + "score_range": 1.0, + "min_level": "College", + "max_level": "No College", + "min_score": 72.1, + "max_score": 73.1, + "se_min": 3.0, + "se_max": 4.0, + "effect_size": 0.2890966732, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.459050528, + "level_score_std_dev": 0.5, + "se_difference": 5.0, + "z_score": 0.2, + "p_value": 0.9597457904, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.7998199227, + "gap_confidence_interval_95_upper": 10.7998199227, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "background_and_culture", + "demographic_factor": "Ethnicity", + "score_range": 9.2, + "min_level": "Asian", + "max_level": "African American", + "min_score": 67.6, + "max_score": 76.8, + "se_min": 3.8, + "se_max": 4.3, + "effect_size": 2.6596893932, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.459050528, + "level_score_std_dev": 3.8661996844, + "se_difference": 5.7384666942, + "z_score": 1.6032157178, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.047188047, + "gap_confidence_interval_95_upper": 20.447188047, + "raw_n_min_group": 34, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "background_and_culture", + "demographic_factor": "Politics", + "score_range": 3.9, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 71.7, + "max_score": 75.6, + "se_min": 3.7, + "se_max": 3.8, + "effect_size": 1.1274770254, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.459050528, + "level_score_std_dev": 1.6673332001, + "se_difference": 5.3037722425, + "z_score": 0.7353256931, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.4952025774, + "gap_confidence_interval_95_upper": 14.2952025774, + "raw_n_min_group": 137, + "raw_n_max_group": 154, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "background_and_culture", + "demographic_factor": "Sex", + "score_range": 0.0, + "min_level": "Female", + "max_level": "Female", + "min_score": 72.8, + "max_score": 72.8, + "se_min": 3.7, + "se_max": 3.7, + "effect_size": 0.0, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.459050528, + "level_score_std_dev": 0.0, + "se_difference": 5.2325901808, + "z_score": 0.0, + "p_value": 1.0, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.2556883002, + "gap_confidence_interval_95_upper": 10.2556883002, + "raw_n_min_group": 222, + "raw_n_max_group": 222, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "background_and_culture", + "demographic_factor": "Urbanicity", + "score_range": 2.2, + "min_level": "Suburban", + "max_level": "Urban", + "min_score": 72.1, + "max_score": 74.3, + "se_min": 3.5, + "se_max": 3.8, + "effect_size": 0.636012681, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.459050528, + "level_score_std_dev": 1.0143416036, + "se_difference": 5.1662365412, + "z_score": 0.425841903, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.9256375564, + "gap_confidence_interval_95_upper": 12.3256375564, + "raw_n_min_group": 204, + "raw_n_max_group": 155, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "bias_and_stereotypes", + "demographic_factor": "Age", + "score_range": 8.5, + "min_level": "18-24", + "max_level": "35-44", + "min_score": 81.0, + "max_score": 89.5, + "se_min": 5.4, + "se_max": 3.4, + "effect_size": 3.0530078264, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.7841396038, + "level_score_std_dev": 3.0953818648, + "se_difference": 6.3812224534, + "z_score": 1.3320331742, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.006966186, + "gap_confidence_interval_95_upper": 21.006966186, + "raw_n_min_group": 52, + "raw_n_max_group": 72, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "bias_and_stereotypes", + "demographic_factor": "Education", + "score_range": 4.2, + "min_level": "College", + "max_level": "No College", + "min_score": 85.0, + "max_score": 89.2, + "se_min": 3.6, + "se_max": 4.0, + "effect_size": 1.5085450436, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.7841396038, + "level_score_std_dev": 2.1, + "se_difference": 5.3814496188, + "z_score": 0.7804588536, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.3474474375, + "gap_confidence_interval_95_upper": 14.7474474375, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "bias_and_stereotypes", + "demographic_factor": "Ethnicity", + "score_range": 3.4, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 85.3, + "max_score": 88.7, + "se_min": 5.2, + "se_max": 4.1, + "effect_size": 1.2212031305, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.7841396038, + "level_score_std_dev": 1.2930100541, + "se_difference": 6.6219332525, + "z_score": 0.5134452237, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.5787506828, + "gap_confidence_interval_95_upper": 16.3787506828, + "raw_n_min_group": 30, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "bias_and_stereotypes", + "demographic_factor": "Politics", + "score_range": 0.7, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 87.6, + "max_score": 88.3, + "se_min": 3.8, + "se_max": 3.8, + "effect_size": 0.2514241739, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 2.7841396038, + "level_score_std_dev": 0.2943920289, + "se_difference": 5.374011537, + "z_score": 0.1302565123, + "p_value": 0.9769857447, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.8328690651, + "gap_confidence_interval_95_upper": 11.2328690651, + "raw_n_min_group": 142, + "raw_n_max_group": 154, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "bias_and_stereotypes", + "demographic_factor": "Sex", + "score_range": 0.8, + "min_level": "Female", + "max_level": "Male", + "min_score": 87.4, + "max_score": 88.2, + "se_min": 3.9, + "se_max": 3.8, + "effect_size": 0.2873419131, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 2.7841396038, + "level_score_std_dev": 0.4, + "se_difference": 5.445181356, + "z_score": 0.1469188899, + "p_value": 0.9666249838, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.8723593471, + "gap_confidence_interval_95_upper": 11.4723593471, + "raw_n_min_group": 222, + "raw_n_max_group": 207, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "bias_and_stereotypes", + "demographic_factor": "Urbanicity", + "score_range": 5.1, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 85.8, + "max_score": 90.9, + "se_min": 4.3, + "se_max": 3.4, + "effect_size": 1.8318046958, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.7841396038, + "level_score_std_dev": 2.1771541057, + "se_difference": 5.4817880295, + "z_score": 0.930353376, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.6441071086, + "gap_confidence_interval_95_upper": 15.8441071086, + "raw_n_min_group": 155, + "raw_n_max_group": 74, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "clarity", + "demographic_factor": "Age", + "score_range": 7.7, + "min_level": "45-54", + "max_level": "65+", + "min_score": 77.5, + "max_score": 85.2, + "se_min": 4.9, + "se_max": 4.4, + "effect_size": 1.7054074463, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.5150500643, + "level_score_std_dev": 2.8738282636, + "se_difference": 6.5855903304, + "z_score": 1.1692194038, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.2075198645, + "gap_confidence_interval_95_upper": 20.6075198645, + "raw_n_min_group": 69, + "raw_n_max_group": 54, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "clarity", + "demographic_factor": "Education", + "score_range": 2.5, + "min_level": "College", + "max_level": "No College", + "min_score": 80.6, + "max_score": 83.1, + "se_min": 3.9, + "se_max": 4.8, + "effect_size": 0.5537037163, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 4.5150500643, + "level_score_std_dev": 1.25, + "se_difference": 6.1846584384, + "z_score": 0.4042260417, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.621707796, + "gap_confidence_interval_95_upper": 14.621707796, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "clarity", + "demographic_factor": "Ethnicity", + "score_range": 9.7, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 75.9, + "max_score": 85.6, + "se_min": 6.4, + "se_max": 4.6, + "effect_size": 2.1483704193, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.5150500643, + "level_score_std_dev": 3.6002604072, + "se_difference": 7.8816241981, + "z_score": 1.2307107972, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.7476995679, + "gap_confidence_interval_95_upper": 25.1476995679, + "raw_n_min_group": 30, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "clarity", + "demographic_factor": "Politics", + "score_range": 1.4, + "min_level": "Democrat", + "max_level": "Republican", + "min_score": 81.5, + "max_score": 82.9, + "se_min": 4.7, + "se_max": 4.4, + "effect_size": 0.3100740811, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 4.5150500643, + "level_score_std_dev": 0.5887840578, + "se_difference": 6.4381674411, + "z_score": 0.2174531826, + "p_value": 0.9570722129, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -11.218576311, + "gap_confidence_interval_95_upper": 14.018576311, + "raw_n_min_group": 137, + "raw_n_max_group": 142, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "clarity", + "demographic_factor": "Sex", + "score_range": 1.6, + "min_level": "Male", + "max_level": "Female", + "min_score": 81.4, + "max_score": 83.0, + "se_min": 4.7, + "se_max": 4.4, + "effect_size": 0.3543703784, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 4.5150500643, + "level_score_std_dev": 0.8, + "se_difference": 6.4381674411, + "z_score": 0.2485179229, + "p_value": 0.9570722129, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -11.018576311, + "gap_confidence_interval_95_upper": 14.218576311, + "raw_n_min_group": 207, + "raw_n_max_group": 222, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "clarity", + "demographic_factor": "Urbanicity", + "score_range": 5.3, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 80.4, + "max_score": 85.7, + "se_min": 4.6, + "se_max": 4.3, + "effect_size": 1.1738518786, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.5150500643, + "level_score_std_dev": 2.3156472577, + "se_difference": 6.2968245966, + "z_score": 0.8416940823, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.0415494262, + "gap_confidence_interval_95_upper": 17.6415494262, + "raw_n_min_group": 204, + "raw_n_max_group": 74, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "communication", + "demographic_factor": "Age", + "score_range": 5.8, + "min_level": "25-34", + "max_level": "65+", + "min_score": 81.6, + "max_score": 87.4, + "se_min": 4.0, + "se_max": 3.5, + "effect_size": 1.4348497758, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.0422350116, + "level_score_std_dev": 2.3134870266, + "se_difference": 5.3150729064, + "z_score": 1.0912362073, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.6173514717, + "gap_confidence_interval_95_upper": 16.2173514717, + "raw_n_min_group": 91, + "raw_n_max_group": 54, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "communication", + "demographic_factor": "Education", + "score_range": 4.3, + "min_level": "College", + "max_level": "No College", + "min_score": 82.5, + "max_score": 86.8, + "se_min": 3.2, + "se_max": 3.8, + "effect_size": 1.0637679372, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.0422350116, + "level_score_std_dev": 2.15, + "se_difference": 4.9678969393, + "z_score": 0.8655574084, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.43689908, + "gap_confidence_interval_95_upper": 14.03689908, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "communication", + "demographic_factor": "Ethnicity", + "score_range": 8.3, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 80.5, + "max_score": 88.8, + "se_min": 5.1, + "se_max": 3.4, + "effect_size": 2.0533195067, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.0422350116, + "level_score_std_dev": 3.4039499115, + "se_difference": 6.1294371683, + "z_score": 1.3541210673, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.7134760953, + "gap_confidence_interval_95_upper": 20.3134760953, + "raw_n_min_group": 30, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "communication", + "demographic_factor": "Politics", + "score_range": 5.3, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 83.0, + "max_score": 88.3, + "se_min": 3.9, + "se_max": 3.2, + "effect_size": 1.3111558296, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.0422350116, + "level_score_std_dev": 2.2095751226, + "se_difference": 5.0447993023, + "z_score": 1.0505868881, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.5876249416, + "gap_confidence_interval_95_upper": 15.1876249416, + "raw_n_min_group": 137, + "raw_n_max_group": 154, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "communication", + "demographic_factor": "Sex", + "score_range": 0.1, + "min_level": "Male", + "max_level": "Female", + "min_score": 85.3, + "max_score": 85.4, + "se_min": 3.6, + "se_max": 3.6, + "effect_size": 0.0247387892, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 4.0422350116, + "level_score_std_dev": 0.05, + "se_difference": 5.0911688245, + "z_score": 0.019641855, + "p_value": 0.9924215291, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.8785075353, + "gap_confidence_interval_95_upper": 10.0785075353, + "raw_n_min_group": 207, + "raw_n_max_group": 222, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "communication", + "demographic_factor": "Urbanicity", + "score_range": 7.4, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 82.7, + "max_score": 90.1, + "se_min": 3.9, + "se_max": 3.1, + "effect_size": 1.8306704036, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.0422350116, + "level_score_std_dev": 3.2601976764, + "se_difference": 4.9819674828, + "z_score": 1.4853569449, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.3644768385, + "gap_confidence_interval_95_upper": 17.1644768385, + "raw_n_min_group": 155, + "raw_n_max_group": 74, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "comprehensiveness", + "demographic_factor": "Age", + "score_range": 5.0, + "min_level": "25-34", + "max_level": "55-64", + "min_score": 84.6, + "max_score": 89.6, + "se_min": 4.4, + "se_max": 3.2, + "effect_size": 1.3957952007, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.5821874137, + "level_score_std_dev": 1.9629909152, + "se_difference": 5.4405882035, + "z_score": 0.9190182776, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.6633569336, + "gap_confidence_interval_95_upper": 15.6633569336, + "raw_n_min_group": 91, + "raw_n_max_group": 95, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "comprehensiveness", + "demographic_factor": "Education", + "score_range": 1.0, + "min_level": "College", + "max_level": "No College", + "min_score": 87.4, + "max_score": 88.4, + "se_min": 3.2, + "se_max": 4.0, + "effect_size": 0.2791590401, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.5821874137, + "level_score_std_dev": 0.5, + "se_difference": 5.1224993899, + "z_score": 0.1952172024, + "p_value": 0.9597457904, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.0399143151, + "gap_confidence_interval_95_upper": 11.0399143151, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "comprehensiveness", + "demographic_factor": "Ethnicity", + "score_range": 5.3, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 83.4, + "max_score": 88.7, + "se_min": 5.5, + "se_max": 3.9, + "effect_size": 1.4795429127, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.5821874137, + "level_score_std_dev": 2.1637640814, + "se_difference": 6.7424031324, + "z_score": 0.7860698769, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.9148673088, + "gap_confidence_interval_95_upper": 18.5148673088, + "raw_n_min_group": 30, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "comprehensiveness", + "demographic_factor": "Politics", + "score_range": 2.6, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 86.6, + "max_score": 89.2, + "se_min": 4.1, + "se_max": 3.5, + "effect_size": 0.7258135044, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.5821874137, + "level_score_std_dev": 1.2027745702, + "se_difference": 5.3907327888, + "z_score": 0.4823091965, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.9656421163, + "gap_confidence_interval_95_upper": 13.1656421163, + "raw_n_min_group": 137, + "raw_n_max_group": 154, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "comprehensiveness", + "demographic_factor": "Sex", + "score_range": 2.2, + "min_level": "Male", + "max_level": "Female", + "min_score": 86.9, + "max_score": 89.1, + "se_min": 4.0, + "se_max": 3.5, + "effect_size": 0.6141498883, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.5821874137, + "level_score_std_dev": 1.1, + "se_difference": 5.3150729064, + "z_score": 0.4139171821, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.2173514717, + "gap_confidence_interval_95_upper": 12.6173514717, + "raw_n_min_group": 207, + "raw_n_max_group": 222, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "comprehensiveness", + "demographic_factor": "Urbanicity", + "score_range": 4.4, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 86.7, + "max_score": 91.1, + "se_min": 3.9, + "se_max": 3.3, + "effect_size": 1.2282997766, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.5821874137, + "level_score_std_dev": 2.0510160084, + "se_difference": 5.1088159098, + "z_score": 0.861256322, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.6130951868, + "gap_confidence_interval_95_upper": 14.4130951868, + "raw_n_min_group": 204, + "raw_n_max_group": 74, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "confidence", + "demographic_factor": "Age", + "score_range": 4.0, + "min_level": "25-34", + "max_level": "35-44", + "min_score": 85.3, + "max_score": 89.3, + "se_min": 4.1, + "se_max": 3.3, + "effect_size": 1.2164745064, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.2881905696, + "level_score_std_dev": 1.4839886193, + "se_difference": 5.2630789468, + "z_score": 0.7600114003, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.3154451835, + "gap_confidence_interval_95_upper": 14.3154451835, + "raw_n_min_group": 91, + "raw_n_max_group": 72, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "confidence", + "demographic_factor": "Education", + "score_range": 4.7, + "min_level": "College", + "max_level": "No College", + "min_score": 84.8, + "max_score": 89.5, + "se_min": 3.4, + "se_max": 3.8, + "effect_size": 1.429357545, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.2881905696, + "level_score_std_dev": 2.35, + "se_difference": 5.0990195136, + "z_score": 0.9217458351, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.2938946031, + "gap_confidence_interval_95_upper": 14.6938946031, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "confidence", + "demographic_factor": "Ethnicity", + "score_range": 10.2, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 81.6, + "max_score": 91.8, + "se_min": 5.6, + "se_max": 3.2, + "effect_size": 3.1020099912, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.2881905696, + "level_score_std_dev": 3.7312028891, + "se_difference": 6.4498061986, + "z_score": 1.581442866, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.4413878566, + "gap_confidence_interval_95_upper": 22.8413878566, + "raw_n_min_group": 30, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "confidence", + "demographic_factor": "Politics", + "score_range": 1.6, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 87.6, + "max_score": 89.2, + "se_min": 3.8, + "se_max": 3.4, + "effect_size": 0.4865898025, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.2881905696, + "level_score_std_dev": 0.7318166133, + "se_difference": 5.0990195136, + "z_score": 0.3137858162, + "p_value": 0.9469805126, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.3938946031, + "gap_confidence_interval_95_upper": 11.5938946031, + "raw_n_min_group": 137, + "raw_n_max_group": 154, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "confidence", + "demographic_factor": "Sex", + "score_range": 1.2, + "min_level": "Male", + "max_level": "Female", + "min_score": 87.3, + "max_score": 88.5, + "se_min": 3.8, + "se_max": 3.5, + "effect_size": 0.3649423519, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.2881905696, + "level_score_std_dev": 0.6, + "se_difference": 5.1662365412, + "z_score": 0.2322774016, + "p_value": 0.9570722129, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.9256375564, + "gap_confidence_interval_95_upper": 11.3256375564, + "raw_n_min_group": 207, + "raw_n_max_group": 222, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "confidence", + "demographic_factor": "Urbanicity", + "score_range": 1.6, + "min_level": "Suburban", + "max_level": "Urban", + "min_score": 87.4, + "max_score": 89.0, + "se_min": 3.5, + "se_max": 3.4, + "effect_size": 0.4865898025, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.2881905696, + "level_score_std_dev": 0.7318166133, + "se_difference": 4.8795491595, + "z_score": 0.3278991455, + "p_value": 0.9446615231, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.9637406134, + "gap_confidence_interval_95_upper": 11.1637406134, + "raw_n_min_group": 204, + "raw_n_max_group": 155, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "consistency", + "demographic_factor": "Age", + "score_range": 5.3, + "min_level": "25-34", + "max_level": "35-44", + "min_score": 85.4, + "max_score": 90.7, + "se_min": 4.3, + "se_max": 3.2, + "effect_size": 1.7802851875, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9770511137, + "level_score_std_dev": 1.8797162906, + "se_difference": 5.3600373133, + "z_score": 0.9887990867, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.2054800899, + "gap_confidence_interval_95_upper": 15.8054800899, + "raw_n_min_group": 91, + "raw_n_max_group": 72, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "consistency", + "demographic_factor": "Education", + "score_range": 0.5, + "min_level": "College", + "max_level": "No College", + "min_score": 88.2, + "max_score": 88.7, + "se_min": 3.0, + "se_max": 4.0, + "effect_size": 0.1679514328, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 2.9770511137, + "level_score_std_dev": 0.25, + "se_difference": 5.0, + "z_score": 0.1, + "p_value": 0.9792458562, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.2998199227, + "gap_confidence_interval_95_upper": 10.2998199227, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "consistency", + "demographic_factor": "Ethnicity", + "score_range": 2.8, + "min_level": "Hispanic", + "max_level": "Asian", + "min_score": 87.0, + "max_score": 89.8, + "se_min": 4.9, + "se_max": 4.0, + "effect_size": 0.9405280236, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9770511137, + "level_score_std_dev": 1.0755812382, + "se_difference": 6.3253458403, + "z_score": 0.4426635429, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.5974500368, + "gap_confidence_interval_95_upper": 15.1974500368, + "raw_n_min_group": 30, + "raw_n_max_group": 34, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "consistency", + "demographic_factor": "Politics", + "score_range": 2.4, + "min_level": "Republican", + "max_level": "Independent", + "min_score": 88.0, + "max_score": 90.4, + "se_min": 3.7, + "se_max": 3.3, + "effect_size": 0.8061668773, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9770511137, + "level_score_std_dev": 1.1085526099, + "se_difference": 4.9578221025, + "z_score": 0.4840835251, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.3171527626, + "gap_confidence_interval_95_upper": 12.1171527626, + "raw_n_min_group": 142, + "raw_n_max_group": 154, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "consistency", + "demographic_factor": "Sex", + "score_range": 0.4, + "min_level": "Male", + "max_level": "Female", + "min_score": 88.3, + "max_score": 88.7, + "se_min": 3.7, + "se_max": 3.7, + "effect_size": 0.1343611462, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 2.9770511137, + "level_score_std_dev": 0.2, + "se_difference": 5.2325901808, + "z_score": 0.0764439763, + "p_value": 0.9818838748, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.8556883002, + "gap_confidence_interval_95_upper": 10.6556883002, + "raw_n_min_group": 207, + "raw_n_max_group": 222, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "consistency", + "demographic_factor": "Urbanicity", + "score_range": 0.9, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 88.1, + "max_score": 89.0, + "se_min": 3.7, + "se_max": 3.7, + "effect_size": 0.302312579, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 2.9770511137, + "level_score_std_dev": 0.3681787006, + "se_difference": 5.2325901808, + "z_score": 0.1719989468, + "p_value": 0.9610477633, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.3556883002, + "gap_confidence_interval_95_upper": 11.1556883002, + "raw_n_min_group": 204, + "raw_n_max_group": 74, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "context_memory", + "demographic_factor": "Age", + "score_range": 6.5, + "min_level": "45-54", + "max_level": "65+", + "min_score": 87.5, + "max_score": 94.0, + "se_min": 3.5, + "se_max": 2.5, + "effect_size": 1.9180640311, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3888336858, + "level_score_std_dev": 1.9953417975, + "se_difference": 4.3011626335, + "z_score": 1.5112193037, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -1.9301238534, + "gap_confidence_interval_95_upper": 14.9301238534, + "raw_n_min_group": 69, + "raw_n_max_group": 54, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "context_memory", + "demographic_factor": "Education", + "score_range": 4.9, + "min_level": "College", + "max_level": "No College", + "min_score": 88.6, + "max_score": 93.5, + "se_min": 2.8, + "se_max": 2.8, + "effect_size": 1.4459251927, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3888336858, + "level_score_std_dev": 2.45, + "se_difference": 3.9597979746, + "z_score": 1.2374368671, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.8610614164, + "gap_confidence_interval_95_upper": 12.6610614164, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "context_memory", + "demographic_factor": "Ethnicity", + "score_range": 7.3, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 86.9, + "max_score": 94.2, + "se_min": 4.5, + "se_max": 2.5, + "effect_size": 2.1541334503, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3888336858, + "level_score_std_dev": 2.7913930214, + "se_difference": 5.1478150705, + "z_score": 1.418077359, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.7895321372, + "gap_confidence_interval_95_upper": 17.3895321372, + "raw_n_min_group": 30, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "context_memory", + "demographic_factor": "Politics", + "score_range": 2.8, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 91.2, + "max_score": 94.0, + "se_min": 3.0, + "se_max": 2.4, + "effect_size": 0.8262429672, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3888336858, + "level_score_std_dev": 1.2364824661, + "se_difference": 3.8418745425, + "z_score": 0.7288108888, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.7299357363, + "gap_confidence_interval_95_upper": 10.3299357363, + "raw_n_min_group": 137, + "raw_n_max_group": 154, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "context_memory", + "demographic_factor": "Sex", + "score_range": 0.2, + "min_level": "Female", + "max_level": "Male", + "min_score": 91.8, + "max_score": 92.0, + "se_min": 2.8, + "se_max": 2.8, + "effect_size": 0.0590173548, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.3888336858, + "level_score_std_dev": 0.1, + "se_difference": 3.9597979746, + "z_score": 0.0505076272, + "p_value": 0.9872735163, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.5610614164, + "gap_confidence_interval_95_upper": 7.9610614164, + "raw_n_min_group": 222, + "raw_n_max_group": 207, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "context_memory", + "demographic_factor": "Urbanicity", + "score_range": 3.2, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 90.5, + "max_score": 93.7, + "se_min": 3.1, + "se_max": 2.6, + "effect_size": 0.9442776768, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3888336858, + "level_score_std_dev": 1.3366625104, + "se_difference": 4.0459856648, + "z_score": 0.7909073994, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.729986185, + "gap_confidence_interval_95_upper": 11.129986185, + "raw_n_min_group": 155, + "raw_n_max_group": 74, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "conversation_building", + "demographic_factor": "Age", + "score_range": 7.1, + "min_level": "45-54", + "max_level": "55-64", + "min_score": 80.5, + "max_score": 87.6, + "se_min": 4.4, + "se_max": 3.4, + "effect_size": 1.6412913512, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3258620689, + "level_score_std_dev": 2.2146607465, + "se_difference": 5.5605755098, + "z_score": 1.2768462522, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.7985277325, + "gap_confidence_interval_95_upper": 17.9985277325, + "raw_n_min_group": 69, + "raw_n_max_group": 95, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "conversation_building", + "demographic_factor": "Education", + "score_range": 4.0, + "min_level": "College", + "max_level": "No College", + "min_score": 82.4, + "max_score": 86.4, + "se_min": 3.6, + "se_max": 4.3, + "effect_size": 0.9246711838, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3258620689, + "level_score_std_dev": 2.0, + "se_difference": 5.6080299571, + "z_score": 0.7132629516, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.9915367402, + "gap_confidence_interval_95_upper": 14.9915367402, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "conversation_building", + "demographic_factor": "Ethnicity", + "score_range": 8.5, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 77.7, + "max_score": 86.2, + "se_min": 6.1, + "se_max": 4.3, + "effect_size": 1.9649262655, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3258620689, + "level_score_std_dev": 3.4368408459, + "se_difference": 7.4632432628, + "z_score": 1.1389150401, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.127688003, + "gap_confidence_interval_95_upper": 23.127688003, + "raw_n_min_group": 30, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "conversation_building", + "demographic_factor": "Politics", + "score_range": 3.9, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 83.8, + "max_score": 87.7, + "se_min": 4.4, + "se_max": 3.7, + "effect_size": 0.9015544042, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3258620689, + "level_score_std_dev": 1.6131404843, + "se_difference": 5.7489129407, + "z_score": 0.6783891216, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.3676623141, + "gap_confidence_interval_95_upper": 15.1676623141, + "raw_n_min_group": 137, + "raw_n_max_group": 154, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "conversation_building", + "demographic_factor": "Sex", + "score_range": 1.0, + "min_level": "Male", + "max_level": "Female", + "min_score": 84.5, + "max_score": 85.5, + "se_min": 4.2, + "se_max": 4.0, + "effect_size": 0.2311677959, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 4.3258620689, + "level_score_std_dev": 0.5, + "se_difference": 5.8, + "z_score": 0.1724137931, + "p_value": 0.9610477633, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.3677911103, + "gap_confidence_interval_95_upper": 12.3677911103, + "raw_n_min_group": 207, + "raw_n_max_group": 222, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "conversation_building", + "demographic_factor": "Urbanicity", + "score_range": 5.0, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 82.7, + "max_score": 87.7, + "se_min": 4.5, + "se_max": 3.8, + "effect_size": 1.1558389797, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3258620689, + "level_score_std_dev": 2.0499322482, + "se_difference": 5.8898217291, + "z_score": 0.8489221287, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.5438384643, + "gap_confidence_interval_95_upper": 16.5438384643, + "raw_n_min_group": 155, + "raw_n_max_group": 74, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "conversation_flow", + "demographic_factor": "Age", + "score_range": 8.1, + "min_level": "45-54", + "max_level": "65+", + "min_score": 78.8, + "max_score": 86.9, + "se_min": 4.7, + "se_max": 4.1, + "effect_size": 1.6297805277, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.969994341, + "level_score_std_dev": 2.7303337199, + "se_difference": 6.2369864518, + "z_score": 1.2987041198, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.1242688176, + "gap_confidence_interval_95_upper": 20.3242688176, + "raw_n_min_group": 69, + "raw_n_max_group": 54, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "conversation_flow", + "demographic_factor": "Education", + "score_range": 5.4, + "min_level": "College", + "max_level": "No College", + "min_score": 79.4, + "max_score": 84.8, + "se_min": 4.0, + "se_max": 4.6, + "effect_size": 1.0865203518, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.969994341, + "level_score_std_dev": 2.7, + "se_difference": 6.0959002617, + "z_score": 0.8858412651, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.5477449662, + "gap_confidence_interval_95_upper": 17.3477449662, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "conversation_flow", + "demographic_factor": "Ethnicity", + "score_range": 16.8, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 70.4, + "max_score": 87.2, + "se_min": 6.8, + "se_max": 4.1, + "effect_size": 3.380285539, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.969994341, + "level_score_std_dev": 6.4394778515, + "se_difference": 7.9404030124, + "z_score": 2.1157616274, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": 1.2370960729, + "gap_confidence_interval_95_upper": 32.3629039271, + "raw_n_min_group": 30, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "conversation_flow", + "demographic_factor": "Politics", + "score_range": 5.6, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 80.1, + "max_score": 85.7, + "se_min": 4.8, + "se_max": 4.1, + "effect_size": 1.1267618463, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.969994341, + "level_score_std_dev": 2.4729649321, + "se_difference": 6.3126856408, + "z_score": 0.8871026246, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.7726365018, + "gap_confidence_interval_95_upper": 17.9726365018, + "raw_n_min_group": 137, + "raw_n_max_group": 154, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "conversation_flow", + "demographic_factor": "Sex", + "score_range": 2.4, + "min_level": "Male", + "max_level": "Female", + "min_score": 81.7, + "max_score": 84.1, + "se_min": 4.6, + "se_max": 4.2, + "effect_size": 0.4828979341, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 4.969994341, + "level_score_std_dev": 1.2, + "se_difference": 6.228964601, + "z_score": 0.3852967794, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.8085462789, + "gap_confidence_interval_95_upper": 14.6085462789, + "raw_n_min_group": 207, + "raw_n_max_group": 222, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "conversation_flow", + "demographic_factor": "Urbanicity", + "score_range": 6.4, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 80.1, + "max_score": 86.5, + "se_min": 4.8, + "se_max": 4.1, + "effect_size": 1.2877278244, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.969994341, + "level_score_std_dev": 2.6398653164, + "se_difference": 6.3126856408, + "z_score": 1.0138315709, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.9726365018, + "gap_confidence_interval_95_upper": 18.7726365018, + "raw_n_min_group": 155, + "raw_n_max_group": 74, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "cultural_awareness", + "demographic_factor": "Age", + "score_range": 5.8, + "min_level": "18-24", + "max_level": "55-64", + "min_score": 67.8, + "max_score": 73.6, + "se_min": 5.9, + "se_max": 4.8, + "effect_size": 1.561724448, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7138433783, + "level_score_std_dev": 2.4842280267, + "se_difference": 7.6059187479, + "z_score": 0.7625640231, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.1073268153, + "gap_confidence_interval_95_upper": 20.7073268153, + "raw_n_min_group": 52, + "raw_n_max_group": 95, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "cultural_awareness", + "demographic_factor": "Education", + "score_range": 4.0, + "min_level": "College", + "max_level": "No College", + "min_score": 69.4, + "max_score": 73.4, + "se_min": 4.2, + "se_max": 5.6, + "effect_size": 1.0770513434, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7138433783, + "level_score_std_dev": 2.0, + "se_difference": 7.0, + "z_score": 0.5714285714, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.7197478918, + "gap_confidence_interval_95_upper": 17.7197478918, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "cultural_awareness", + "demographic_factor": "Ethnicity", + "score_range": 14.8, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 60.5, + "max_score": 75.3, + "se_min": 6.8, + "se_max": 5.5, + "effect_size": 3.9850899708, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7138433783, + "level_score_std_dev": 6.3833278938, + "se_difference": 8.7458561616, + "z_score": 1.6922299803, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.3415630908, + "gap_confidence_interval_95_upper": 31.9415630908, + "raw_n_min_group": 30, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "cultural_awareness", + "demographic_factor": "Politics", + "score_range": 3.8, + "min_level": "Democrat", + "max_level": "Republican", + "min_score": 70.1, + "max_score": 73.9, + "se_min": 5.3, + "se_max": 5.0, + "effect_size": 1.0231987763, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7138433783, + "level_score_std_dev": 1.6213848676, + "se_difference": 7.2862884928, + "z_score": 0.5215275244, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.4808630268, + "gap_confidence_interval_95_upper": 18.0808630268, + "raw_n_min_group": 137, + "raw_n_max_group": 142, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "cultural_awareness", + "demographic_factor": "Sex", + "score_range": 0.4, + "min_level": "Female", + "max_level": "Male", + "min_score": 71.9, + "max_score": 72.3, + "se_min": 5.1, + "se_max": 5.2, + "effect_size": 0.1077051343, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.7138433783, + "level_score_std_dev": 0.2, + "se_difference": 7.2835430939, + "z_score": 0.0549183268, + "p_value": 0.9869756083, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -13.8754821438, + "gap_confidence_interval_95_upper": 14.6754821438, + "raw_n_min_group": 222, + "raw_n_max_group": 207, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "cultural_awareness", + "demographic_factor": "Urbanicity", + "score_range": 4.9, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 70.0, + "max_score": 74.9, + "se_min": 5.1, + "se_max": 5.2, + "effect_size": 1.3193878957, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7138433783, + "level_score_std_dev": 2.007209229, + "se_difference": 7.2835430939, + "z_score": 0.6727495035, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.3754821438, + "gap_confidence_interval_95_upper": 19.1754821438, + "raw_n_min_group": 204, + "raw_n_max_group": 74, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "detail_and_technical_language", + "demographic_factor": "Age", + "score_range": 9.1, + "min_level": "45-54", + "max_level": "35-44", + "min_score": 80.2, + "max_score": 89.3, + "se_min": 5.0, + "se_max": 3.6, + "effect_size": 2.9081111597, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1291788726, + "level_score_std_dev": 3.0450323844, + "se_difference": 6.1611687203, + "z_score": 1.4769925014, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.9756687945, + "gap_confidence_interval_95_upper": 21.1756687945, + "raw_n_min_group": 69, + "raw_n_max_group": 72, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "detail_and_technical_language", + "demographic_factor": "Education", + "score_range": 9.8, + "min_level": "College", + "max_level": "No College", + "min_score": 80.4, + "max_score": 90.2, + "se_min": 4.3, + "se_max": 4.1, + "effect_size": 3.1318120181, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1291788726, + "level_score_std_dev": 4.9, + "se_difference": 5.941380311, + "z_score": 1.6494483583, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -1.844891428, + "gap_confidence_interval_95_upper": 21.444891428, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "detail_and_technical_language", + "demographic_factor": "Ethnicity", + "score_range": 13.6, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 76.4, + "max_score": 90.0, + "se_min": 6.9, + "se_max": 3.9, + "effect_size": 4.3461881068, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1291788726, + "level_score_std_dev": 5.2919750566, + "se_difference": 7.9259068882, + "z_score": 1.715891972, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -1.9344920456, + "gap_confidence_interval_95_upper": 29.1344920456, + "raw_n_min_group": 30, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "detail_and_technical_language", + "demographic_factor": "Politics", + "score_range": 5.5, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 85.0, + "max_score": 90.5, + "se_min": 4.6, + "se_max": 3.5, + "effect_size": 1.757649602, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1291788726, + "level_score_std_dev": 2.2602851344, + "se_difference": 5.7801384066, + "z_score": 0.9515343082, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.8288631027, + "gap_confidence_interval_95_upper": 16.8288631027, + "raw_n_min_group": 137, + "raw_n_max_group": 154, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "detail_and_technical_language", + "demographic_factor": "Sex", + "score_range": 0.7, + "min_level": "Male", + "max_level": "Female", + "min_score": 86.5, + "max_score": 87.2, + "se_min": 4.3, + "se_max": 4.1, + "effect_size": 0.2237008584, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.1291788726, + "level_score_std_dev": 0.35, + "se_difference": 5.941380311, + "z_score": 0.1178177399, + "p_value": 0.9772792279, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.944891428, + "gap_confidence_interval_95_upper": 12.344891428, + "raw_n_min_group": 207, + "raw_n_max_group": 222, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "detail_and_technical_language", + "demographic_factor": "Urbanicity", + "score_range": 6.9, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 83.6, + "max_score": 90.5, + "se_min": 4.8, + "se_max": 3.7, + "effect_size": 2.2050513189, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1291788726, + "level_score_std_dev": 2.8248893784, + "se_difference": 6.0605280298, + "z_score": 1.1385146585, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.9784166657, + "gap_confidence_interval_95_upper": 18.7784166657, + "raw_n_min_group": 155, + "raw_n_max_group": 74, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "distinct_personality", + "demographic_factor": "Age", + "score_range": 9.9, + "min_level": "45-54", + "max_level": "65+", + "min_score": 66.9, + "max_score": 76.8, + "se_min": 5.6, + "se_max": 5.3, + "effect_size": 2.289892452, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3233471472, + "level_score_std_dev": 3.3836206775, + "se_difference": 7.7103826105, + "z_score": 1.2839829747, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.2120722236, + "gap_confidence_interval_95_upper": 25.0120722236, + "raw_n_min_group": 69, + "raw_n_max_group": 54, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "distinct_personality", + "demographic_factor": "Education", + "score_range": 1.2, + "min_level": "College", + "max_level": "No College", + "min_score": 72.1, + "max_score": 73.3, + "se_min": 4.4, + "se_max": 5.8, + "effect_size": 0.2775627215, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 4.3233471472, + "level_score_std_dev": 0.6, + "se_difference": 7.2801098893, + "z_score": 0.1648326767, + "p_value": 0.9610477633, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -13.0687531865, + "gap_confidence_interval_95_upper": 15.4687531865, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "distinct_personality", + "demographic_factor": "Ethnicity", + "score_range": 13.4, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 66.6, + "max_score": 80.0, + "se_min": 7.2, + "se_max": 5.2, + "effect_size": 3.0994503896, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3233471472, + "level_score_std_dev": 4.8052055107, + "se_difference": 8.8814413245, + "z_score": 1.5087641195, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.0073051268, + "gap_confidence_interval_95_upper": 30.8073051268, + "raw_n_min_group": 30, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "distinct_personality", + "demographic_factor": "Politics", + "score_range": 7.9, + "min_level": "Independent", + "max_level": "Republican", + "min_score": 69.8, + "max_score": 77.7, + "se_min": 5.6, + "se_max": 4.9, + "effect_size": 1.8272879163, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3233471472, + "level_score_std_dev": 3.7007506746, + "se_difference": 7.4411020689, + "z_score": 1.0616706943, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.6842920604, + "gap_confidence_interval_95_upper": 22.4842920604, + "raw_n_min_group": 154, + "raw_n_max_group": 142, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "distinct_personality", + "demographic_factor": "Sex", + "score_range": 0.3, + "min_level": "Female", + "max_level": "Male", + "min_score": 72.7, + "max_score": 73.0, + "se_min": 5.4, + "se_max": 5.4, + "effect_size": 0.0693906804, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 4.3233471472, + "level_score_std_dev": 0.15, + "se_difference": 7.6367532368, + "z_score": 0.0392837101, + "p_value": 0.9872735163, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -14.667761303, + "gap_confidence_interval_95_upper": 15.267761303, + "raw_n_min_group": 222, + "raw_n_max_group": 207, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "distinct_personality", + "demographic_factor": "Urbanicity", + "score_range": 4.6, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 70.7, + "max_score": 75.3, + "se_min": 5.3, + "se_max": 5.4, + "effect_size": 1.0639904323, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.3233471472, + "level_score_std_dev": 1.8873850223, + "se_difference": 7.5663729752, + "z_score": 0.6079531124, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.229818525, + "gap_confidence_interval_95_upper": 19.429818525, + "raw_n_min_group": 204, + "raw_n_max_group": 74, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "effectiveness", + "demographic_factor": "Age", + "score_range": 4.0, + "min_level": "45-54", + "max_level": "65+", + "min_score": 85.2, + "max_score": 89.2, + "se_min": 4.0, + "se_max": 3.7, + "effect_size": 1.0100698785, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.960122052, + "level_score_std_dev": 1.5937377451, + "se_difference": 5.4488530903, + "z_score": 0.7340994396, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.6795558141, + "gap_confidence_interval_95_upper": 14.6795558141, + "raw_n_min_group": 69, + "raw_n_max_group": 54, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "effectiveness", + "demographic_factor": "Education", + "score_range": 1.7, + "min_level": "College", + "max_level": "No College", + "min_score": 86.8, + "max_score": 88.5, + "se_min": 3.2, + "se_max": 4.0, + "effect_size": 0.4292796984, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.960122052, + "level_score_std_dev": 0.85, + "se_difference": 5.1224993899, + "z_score": 0.331869244, + "p_value": 0.9429935514, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.3399143151, + "gap_confidence_interval_95_upper": 11.7399143151, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "effectiveness", + "demographic_factor": "Ethnicity", + "score_range": 4.3, + "min_level": "Asian", + "max_level": "African American", + "min_score": 86.5, + "max_score": 90.8, + "se_min": 4.7, + "se_max": 3.6, + "effect_size": 1.0858251194, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.960122052, + "level_score_std_dev": 1.6315253599, + "se_difference": 5.9203040462, + "z_score": 0.7263140485, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.3035827082, + "gap_confidence_interval_95_upper": 15.9035827082, + "raw_n_min_group": 34, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "effectiveness", + "demographic_factor": "Politics", + "score_range": 2.9, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 86.5, + "max_score": 89.4, + "se_min": 4.0, + "se_max": 3.4, + "effect_size": 0.7323006619, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.960122052, + "level_score_std_dev": 1.249888884, + "se_difference": 5.2497618994, + "z_score": 0.5524060054, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.3893442502, + "gap_confidence_interval_95_upper": 13.1893442502, + "raw_n_min_group": 137, + "raw_n_max_group": 154, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "effectiveness", + "demographic_factor": "Sex", + "score_range": 2.0, + "min_level": "Male", + "max_level": "Female", + "min_score": 86.9, + "max_score": 88.9, + "se_min": 3.9, + "se_max": 3.5, + "effect_size": 0.5050349393, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.960122052, + "level_score_std_dev": 1.0, + "se_difference": 5.2402290026, + "z_score": 0.3816627096, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.2706601159, + "gap_confidence_interval_95_upper": 12.2706601159, + "raw_n_min_group": 207, + "raw_n_max_group": 222, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "effectiveness", + "demographic_factor": "Urbanicity", + "score_range": 3.6, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 86.8, + "max_score": 90.4, + "se_min": 4.0, + "se_max": 3.4, + "effect_size": 0.9090628907, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.960122052, + "level_score_std_dev": 1.6519348924, + "se_difference": 5.2497618994, + "z_score": 0.685745386, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.6893442502, + "gap_confidence_interval_95_upper": 13.8893442502, + "raw_n_min_group": 155, + "raw_n_max_group": 74, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "ethical_alignment", + "demographic_factor": "Age", + "score_range": 8.2, + "min_level": "18-24", + "max_level": "65+", + "min_score": 68.5, + "max_score": 76.7, + "se_min": 5.6, + "se_max": 5.2, + "effect_size": 2.1810645403, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7596319818, + "level_score_std_dev": 3.2770243955, + "se_difference": 7.6419892698, + "z_score": 1.0730190413, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.7780237391, + "gap_confidence_interval_95_upper": 23.1780237391, + "raw_n_min_group": 52, + "raw_n_max_group": 54, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "ethical_alignment", + "demographic_factor": "Education", + "score_range": 2.0, + "min_level": "College", + "max_level": "No College", + "min_score": 73.6, + "max_score": 75.6, + "se_min": 4.1, + "se_max": 5.4, + "effect_size": 0.531966961, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.7596319818, + "level_score_std_dev": 1.0, + "se_difference": 6.7801179931, + "z_score": 0.2949801172, + "p_value": 0.9469805126, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -11.2887870774, + "gap_confidence_interval_95_upper": 15.2887870774, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "ethical_alignment", + "demographic_factor": "Ethnicity", + "score_range": 15.7, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 64.7, + "max_score": 80.4, + "se_min": 6.6, + "se_max": 5.1, + "effect_size": 4.1759406441, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7596319818, + "level_score_std_dev": 6.5411677092, + "se_difference": 8.3408632647, + "z_score": 1.88229917, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -0.6477915987, + "gap_confidence_interval_95_upper": 32.0477915987, + "raw_n_min_group": 30, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "ethical_alignment", + "demographic_factor": "Politics", + "score_range": 3.5, + "min_level": "Democrat", + "max_level": "Republican", + "min_score": 73.0, + "max_score": 76.5, + "se_min": 5.2, + "se_max": 4.8, + "effect_size": 0.9309421818, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7596319818, + "level_score_std_dev": 1.5282524515, + "se_difference": 7.0767224052, + "z_score": 0.4945792416, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.3701210427, + "gap_confidence_interval_95_upper": 17.3701210427, + "raw_n_min_group": 137, + "raw_n_max_group": 142, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "ethical_alignment", + "demographic_factor": "Sex", + "score_range": 3.2, + "min_level": "Female", + "max_level": "Male", + "min_score": 73.4, + "max_score": 76.6, + "se_min": 5.1, + "se_max": 4.9, + "effect_size": 0.8511471377, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.7596319818, + "level_score_std_dev": 1.6, + "se_difference": 7.072481884, + "z_score": 0.4524578574, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.661809774, + "gap_confidence_interval_95_upper": 17.061809774, + "raw_n_min_group": 222, + "raw_n_max_group": 207, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "ethical_alignment", + "demographic_factor": "Urbanicity", + "score_range": 1.3, + "min_level": "Suburban", + "max_level": "Urban", + "min_score": 74.2, + "max_score": 75.5, + "se_min": 4.8, + "se_max": 5.0, + "effect_size": 0.3457785247, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.7596319818, + "level_score_std_dev": 0.5557777334, + "se_difference": 6.9310893805, + "z_score": 0.1875607035, + "p_value": 0.9597457904, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -12.2846855593, + "gap_confidence_interval_95_upper": 14.8846855593, + "raw_n_min_group": 204, + "raw_n_max_group": 155, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "flexibility", + "demographic_factor": "Age", + "score_range": 7.1, + "min_level": "25-34", + "max_level": "65+", + "min_score": 84.2, + "max_score": 91.3, + "se_min": 4.6, + "se_max": 3.3, + "effect_size": 2.0786909739, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.4156111174, + "level_score_std_dev": 2.5499455332, + "se_difference": 5.6612719419, + "z_score": 1.254135126, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.9958891128, + "gap_confidence_interval_95_upper": 18.1958891128, + "raw_n_min_group": 91, + "raw_n_max_group": 54, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "flexibility", + "demographic_factor": "Education", + "score_range": 2.8, + "min_level": "College", + "max_level": "No College", + "min_score": 86.6, + "max_score": 89.4, + "se_min": 3.4, + "se_max": 3.9, + "effect_size": 0.8197654545, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.4156111174, + "level_score_std_dev": 1.4, + "se_difference": 5.1739733281, + "z_score": 0.5411701651, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.3408013801, + "gap_confidence_interval_95_upper": 12.9408013801, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "flexibility", + "demographic_factor": "Ethnicity", + "score_range": 10.4, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 81.3, + "max_score": 91.7, + "se_min": 5.9, + "se_max": 3.3, + "effect_size": 3.0448431166, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.4156111174, + "level_score_std_dev": 3.8402473879, + "se_difference": 6.7601775125, + "z_score": 1.5384211407, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.8497044535, + "gap_confidence_interval_95_upper": 23.6497044535, + "raw_n_min_group": 30, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "flexibility", + "demographic_factor": "Politics", + "score_range": 2.4, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 87.7, + "max_score": 90.1, + "se_min": 4.0, + "se_max": 3.4, + "effect_size": 0.7026561038, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.4156111174, + "level_score_std_dev": 0.9899494937, + "se_difference": 5.2497618994, + "z_score": 0.4571635907, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.8893442502, + "gap_confidence_interval_95_upper": 12.6893442502, + "raw_n_min_group": 137, + "raw_n_max_group": 154, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "flexibility", + "demographic_factor": "Sex", + "score_range": 4.3, + "min_level": "Male", + "max_level": "Female", + "min_score": 86.2, + "max_score": 90.5, + "se_min": 4.2, + "se_max": 3.3, + "effect_size": 1.2589255194, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.4156111174, + "level_score_std_dev": 2.15, + "se_difference": 5.3413481444, + "z_score": 0.805040204, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.168849992, + "gap_confidence_interval_95_upper": 14.768849992, + "raw_n_min_group": 207, + "raw_n_max_group": 222, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "flexibility", + "demographic_factor": "Urbanicity", + "score_range": 1.9, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 87.4, + "max_score": 89.3, + "se_min": 4.0, + "se_max": 3.7, + "effect_size": 0.5562694155, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.4156111174, + "level_score_std_dev": 0.7845734864, + "se_difference": 5.4488530903, + "z_score": 0.3486972338, + "p_value": 0.9426292272, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.7795558141, + "gap_confidence_interval_95_upper": 12.5795558141, + "raw_n_min_group": 155, + "raw_n_max_group": 74, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "helpfulness", + "demographic_factor": "Age", + "score_range": 3.5, + "min_level": "25-34", + "max_level": "55-64", + "min_score": 85.8, + "max_score": 89.3, + "se_min": 3.2, + "se_max": 2.6, + "effect_size": 0.9716451045, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.6021382539, + "level_score_std_dev": 1.2772583485, + "se_difference": 4.1231056256, + "z_score": 0.8488746876, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.5811385307, + "gap_confidence_interval_95_upper": 11.5811385307, + "raw_n_min_group": 91, + "raw_n_max_group": 95, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "helpfulness", + "demographic_factor": "Education", + "score_range": 0.4, + "min_level": "College", + "max_level": "No College", + "min_score": 87.7, + "max_score": 88.1, + "se_min": 2.4, + "se_max": 3.2, + "effect_size": 0.1110451548, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.6021382539, + "level_score_std_dev": 0.2, + "se_difference": 4.0, + "z_score": 0.1, + "p_value": 0.9792458562, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.4398559382, + "gap_confidence_interval_95_upper": 8.2398559382, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "helpfulness", + "demographic_factor": "Ethnicity", + "score_range": 6.1, + "min_level": "Asian", + "max_level": "African American", + "min_score": 84.5, + "max_score": 90.6, + "se_min": 3.8, + "se_max": 2.9, + "effect_size": 1.6934386106, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.6021382539, + "level_score_std_dev": 2.1787324296, + "se_difference": 4.7801673611, + "z_score": 1.2761059476, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.2689558678, + "gap_confidence_interval_95_upper": 15.4689558678, + "raw_n_min_group": 34, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "helpfulness", + "demographic_factor": "Politics", + "score_range": 1.6, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 87.3, + "max_score": 88.9, + "se_min": 3.1, + "se_max": 2.9, + "effect_size": 0.4441806192, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.6021382539, + "level_score_std_dev": 0.6548960901, + "se_difference": 4.2449970554, + "z_score": 0.3769142779, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.720041343, + "gap_confidence_interval_95_upper": 9.920041343, + "raw_n_min_group": 137, + "raw_n_max_group": 154, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "helpfulness", + "demographic_factor": "Sex", + "score_range": 2.4, + "min_level": "Male", + "max_level": "Female", + "min_score": 86.7, + "max_score": 89.1, + "se_min": 3.1, + "se_max": 2.8, + "effect_size": 0.6662709288, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.6021382539, + "level_score_std_dev": 1.2, + "se_difference": 4.1773197148, + "z_score": 0.5745310783, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.787396193, + "gap_confidence_interval_95_upper": 10.587396193, + "raw_n_min_group": 207, + "raw_n_max_group": 222, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "helpfulness", + "demographic_factor": "Urbanicity", + "score_range": 1.7, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 87.1, + "max_score": 88.8, + "se_min": 2.9, + "se_max": 3.0, + "effect_size": 0.4719419079, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.6021382539, + "level_score_std_dev": 0.7133644853, + "se_difference": 4.172529209, + "z_score": 0.4074267464, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.4780069741, + "gap_confidence_interval_95_upper": 9.8780069741, + "raw_n_min_group": 204, + "raw_n_max_group": 74, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "honesty_empathy_fairness", + "demographic_factor": "Age", + "score_range": 7.4, + "min_level": "45-54", + "max_level": "35-44", + "min_score": 79.3, + "max_score": 86.7, + "se_min": 4.6, + "se_max": 3.8, + "effect_size": 1.9464760134, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.8017421993, + "level_score_std_dev": 2.931675668, + "se_difference": 5.9665735561, + "z_score": 1.2402428178, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.294269281, + "gap_confidence_interval_95_upper": 19.094269281, + "raw_n_min_group": 69, + "raw_n_max_group": 72, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "honesty_empathy_fairness", + "demographic_factor": "Education", + "score_range": 6.3, + "min_level": "College", + "max_level": "No College", + "min_score": 80.5, + "max_score": 86.8, + "se_min": 3.9, + "se_max": 4.3, + "effect_size": 1.6571349844, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.8017421993, + "level_score_std_dev": 3.15, + "se_difference": 5.8051701095, + "z_score": 1.085239516, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.0779243387, + "gap_confidence_interval_95_upper": 17.6779243387, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "honesty_empathy_fairness", + "demographic_factor": "Ethnicity", + "score_range": 13.4, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 75.8, + "max_score": 89.2, + "se_min": 6.4, + "se_max": 3.8, + "effect_size": 3.524699808, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.8017421993, + "level_score_std_dev": 4.9147736469, + "se_difference": 7.4431176264, + "z_score": 1.8003208699, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -1.1882424804, + "gap_confidence_interval_95_upper": 27.9882424804, + "raw_n_min_group": 30, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "honesty_empathy_fairness", + "demographic_factor": "Politics", + "score_range": 4.2, + "min_level": "Democrat", + "max_level": "Republican", + "min_score": 82.8, + "max_score": 87.0, + "se_min": 4.5, + "se_max": 3.7, + "effect_size": 1.1047566562, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.8017421993, + "level_score_std_dev": 1.7378147197, + "se_difference": 5.8258046655, + "z_score": 0.7209304536, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.2183673253, + "gap_confidence_interval_95_upper": 15.6183673253, + "raw_n_min_group": 137, + "raw_n_max_group": 142, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "honesty_empathy_fairness", + "demographic_factor": "Sex", + "score_range": 0.3, + "min_level": "Female", + "max_level": "Male", + "min_score": 84.5, + "max_score": 84.8, + "se_min": 4.2, + "se_max": 4.2, + "effect_size": 0.0789111897, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.8017421993, + "level_score_std_dev": 0.15, + "se_difference": 5.939696962, + "z_score": 0.0505076272, + "p_value": 0.9872735163, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -11.3415921245, + "gap_confidence_interval_95_upper": 11.9415921245, + "raw_n_min_group": 222, + "raw_n_max_group": 207, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "honesty_empathy_fairness", + "demographic_factor": "Urbanicity", + "score_range": 5.2, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 83.1, + "max_score": 88.3, + "se_min": 4.3, + "se_max": 3.8, + "effect_size": 1.3677939553, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.8017421993, + "level_score_std_dev": 2.4280765135, + "se_difference": 5.7384666942, + "z_score": 0.9061654057, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.047188047, + "gap_confidence_interval_95_upper": 16.447188047, + "raw_n_min_group": 204, + "raw_n_max_group": 74, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "intuitiveness", + "demographic_factor": "Age", + "score_range": 8.3, + "min_level": "18-24", + "max_level": "35-44", + "min_score": 78.2, + "max_score": 86.5, + "se_min": 5.3, + "se_max": 3.9, + "effect_size": 2.678987997, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.0981848404, + "level_score_std_dev": 2.8854038808, + "se_difference": 6.5802735505, + "z_score": 1.2613457383, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.5970991675, + "gap_confidence_interval_95_upper": 21.1970991675, + "raw_n_min_group": 52, + "raw_n_max_group": 72, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "intuitiveness", + "demographic_factor": "Education", + "score_range": 4.5, + "min_level": "College", + "max_level": "No College", + "min_score": 80.9, + "max_score": 85.4, + "se_min": 3.8, + "se_max": 4.5, + "effect_size": 1.4524633719, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.0981848404, + "level_score_std_dev": 2.25, + "se_difference": 5.8898217291, + "z_score": 0.7640299158, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.0438384643, + "gap_confidence_interval_95_upper": 16.0438384643, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "intuitiveness", + "demographic_factor": "Ethnicity", + "score_range": 13.8, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 74.0, + "max_score": 87.8, + "se_min": 6.4, + "se_max": 4.1, + "effect_size": 4.454221007, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.0981848404, + "level_score_std_dev": 5.2461295257, + "se_difference": 7.6006578663, + "z_score": 1.8156323101, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -1.0970156767, + "gap_confidence_interval_95_upper": 28.6970156767, + "raw_n_min_group": 30, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "intuitiveness", + "demographic_factor": "Politics", + "score_range": 3.2, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 82.6, + "max_score": 85.8, + "se_min": 4.5, + "se_max": 4.1, + "effect_size": 1.0328628422, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.0981848404, + "level_score_std_dev": 1.3072447701, + "se_difference": 6.0876925021, + "z_score": 0.5256507287, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.7316580531, + "gap_confidence_interval_95_upper": 15.1316580531, + "raw_n_min_group": 137, + "raw_n_max_group": 154, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "intuitiveness", + "demographic_factor": "Sex", + "score_range": 2.6, + "min_level": "Male", + "max_level": "Female", + "min_score": 82.5, + "max_score": 85.1, + "se_min": 4.5, + "se_max": 4.1, + "effect_size": 0.8392010593, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.0981848404, + "level_score_std_dev": 1.3, + "se_difference": 6.0876925021, + "z_score": 0.4270912171, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.3316580531, + "gap_confidence_interval_95_upper": 14.5316580531, + "raw_n_min_group": 207, + "raw_n_max_group": 222, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "intuitiveness", + "demographic_factor": "Urbanicity", + "score_range": 5.1, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 81.5, + "max_score": 86.6, + "se_min": 4.6, + "se_max": 4.1, + "effect_size": 1.6461251548, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.0981848404, + "level_score_std_dev": 2.0928449536, + "se_difference": 6.1619802012, + "z_score": 0.8276560186, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.9772592678, + "gap_confidence_interval_95_upper": 17.1772592678, + "raw_n_min_group": 155, + "raw_n_max_group": 74, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "personality", + "demographic_factor": "Age", + "score_range": 2.9, + "min_level": "55-64", + "max_level": "65+", + "min_score": 73.6, + "max_score": 76.5, + "se_min": 3.4, + "se_max": 3.8, + "effect_size": 0.8617335081, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.365309545, + "level_score_std_dev": 0.922707369, + "se_difference": 5.0990195136, + "z_score": 0.5687367919, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.0938946031, + "gap_confidence_interval_95_upper": 12.8938946031, + "raw_n_min_group": 95, + "raw_n_max_group": 54, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "personality", + "demographic_factor": "Education", + "score_range": 1.9, + "min_level": "College", + "max_level": "No College", + "min_score": 73.8, + "max_score": 75.7, + "se_min": 3.0, + "se_max": 3.9, + "effect_size": 0.5645840225, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.365309545, + "level_score_std_dev": 0.95, + "se_difference": 4.9203658401, + "z_score": 0.3861501485, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.7437398373, + "gap_confidence_interval_95_upper": 11.5437398373, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "personality", + "demographic_factor": "Ethnicity", + "score_range": 5.4, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 72.8, + "max_score": 78.2, + "se_min": 4.4, + "se_max": 4.1, + "effect_size": 1.604607222, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.365309545, + "level_score_std_dev": 1.9369757355, + "se_difference": 6.0141499815, + "z_score": 0.8978824965, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.3875173614, + "gap_confidence_interval_95_upper": 17.1875173614, + "raw_n_min_group": 30, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "personality", + "demographic_factor": "Politics", + "score_range": 2.4, + "min_level": "Independent", + "max_level": "Republican", + "min_score": 73.8, + "max_score": 76.2, + "se_min": 3.6, + "se_max": 3.6, + "effect_size": 0.7131587653, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.365309545, + "level_score_std_dev": 1.0077477639, + "se_difference": 5.0911688245, + "z_score": 0.4714045208, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.5785075353, + "gap_confidence_interval_95_upper": 12.3785075353, + "raw_n_min_group": 154, + "raw_n_max_group": 142, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "personality", + "demographic_factor": "Sex", + "score_range": 2.4, + "min_level": "Female", + "max_level": "Male", + "min_score": 73.9, + "max_score": 76.3, + "se_min": 3.6, + "se_max": 3.6, + "effect_size": 0.7131587653, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.365309545, + "level_score_std_dev": 1.2, + "se_difference": 5.0911688245, + "z_score": 0.4714045208, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.5785075353, + "gap_confidence_interval_95_upper": 12.3785075353, + "raw_n_min_group": 222, + "raw_n_max_group": 207, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "personality", + "demographic_factor": "Urbanicity", + "score_range": 2.7, + "min_level": "Suburban", + "max_level": "Urban", + "min_score": 73.6, + "max_score": 76.3, + "se_min": 3.4, + "se_max": 3.7, + "effect_size": 0.802303611, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.365309545, + "level_score_std_dev": 1.1728408057, + "se_difference": 5.0249378106, + "z_score": 0.5373200827, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.1486971333, + "gap_confidence_interval_95_upper": 12.5486971333, + "raw_n_min_group": 204, + "raw_n_max_group": 155, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "personality_consistency", + "demographic_factor": "Age", + "score_range": 8.9, + "min_level": "18-24", + "max_level": "35-44", + "min_score": 80.2, + "max_score": 89.1, + "se_min": 5.1, + "se_max": 3.3, + "effect_size": 2.7880894501, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1921500939, + "level_score_std_dev": 3.2617735462, + "se_difference": 6.0745370194, + "z_score": 1.465132235, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.0058737808, + "gap_confidence_interval_95_upper": 20.8058737808, + "raw_n_min_group": 52, + "raw_n_max_group": 72, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "personality_consistency", + "demographic_factor": "Education", + "score_range": 3.2, + "min_level": "College", + "max_level": "No College", + "min_score": 83.1, + "max_score": 86.3, + "se_min": 3.5, + "se_max": 4.2, + "effect_size": 1.0024591281, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1921500939, + "level_score_std_dev": 1.6, + "se_difference": 5.4671747731, + "z_score": 0.5853114511, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.5154656525, + "gap_confidence_interval_95_upper": 13.9154656525, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "personality_consistency", + "demographic_factor": "Ethnicity", + "score_range": 9.3, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 79.3, + "max_score": 88.6, + "se_min": 5.8, + "se_max": 3.7, + "effect_size": 2.9133968411, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1921500939, + "level_score_std_dev": 3.4456494308, + "se_difference": 6.8796802251, + "z_score": 1.3518070166, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.1839254664, + "gap_confidence_interval_95_upper": 22.7839254664, + "raw_n_min_group": 30, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "personality_consistency", + "demographic_factor": "Politics", + "score_range": 2.5, + "min_level": "Democrat", + "max_level": "Republican", + "min_score": 83.9, + "max_score": 86.4, + "se_min": 4.2, + "se_max": 3.7, + "effect_size": 0.7831711938, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.1921500939, + "level_score_std_dev": 1.0964589469, + "se_difference": 5.5973207877, + "z_score": 0.4466422588, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.4705471537, + "gap_confidence_interval_95_upper": 13.4705471537, + "raw_n_min_group": 137, + "raw_n_max_group": 142, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "personality_consistency", + "demographic_factor": "Sex", + "score_range": 0.5, + "min_level": "Male", + "max_level": "Female", + "min_score": 85.0, + "max_score": 85.5, + "se_min": 4.0, + "se_max": 3.9, + "effect_size": 0.1566342388, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 3.1921500939, + "level_score_std_dev": 0.25, + "se_difference": 5.5865910894, + "z_score": 0.0895000175, + "p_value": 0.9801302996, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -10.4495173315, + "gap_confidence_interval_95_upper": 11.4495173315, + "raw_n_min_group": 207, + "raw_n_max_group": 222, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "personality_consistency", + "demographic_factor": "Urbanicity", + "score_range": 4.4, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 83.4, + "max_score": 87.8, + "se_min": 4.0, + "se_max": 3.8, + "effect_size": 1.3783813012, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1921500939, + "level_score_std_dev": 1.8061622912, + "se_difference": 5.5172456897, + "z_score": 0.797499377, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.4136028456, + "gap_confidence_interval_95_upper": 15.2136028456, + "raw_n_min_group": 204, + "raw_n_max_group": 74, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "tone_and_language_style", + "demographic_factor": "Age", + "score_range": 7.9, + "min_level": "45-54", + "max_level": "65+", + "min_score": 83.1, + "max_score": 91.0, + "se_min": 4.1, + "se_max": 3.1, + "effect_size": 2.3654991886, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.339675633, + "level_score_std_dev": 2.5733678754, + "se_difference": 5.1400389104, + "z_score": 1.5369533456, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.1742911434, + "gap_confidence_interval_95_upper": 17.9742911434, + "raw_n_min_group": 69, + "raw_n_max_group": 54, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "tone_and_language_style", + "demographic_factor": "Education", + "score_range": 4.6, + "min_level": "College", + "max_level": "No College", + "min_score": 83.9, + "max_score": 88.5, + "se_min": 3.4, + "se_max": 3.9, + "effect_size": 1.3773792744, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.339675633, + "level_score_std_dev": 2.3, + "se_difference": 5.1739733281, + "z_score": 0.8890652712, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.5408013801, + "gap_confidence_interval_95_upper": 14.7408013801, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "tone_and_language_style", + "demographic_factor": "Ethnicity", + "score_range": 9.9, + "min_level": "Hispanic", + "max_level": "Asian", + "min_score": 81.6, + "max_score": 91.5, + "se_min": 5.4, + "se_max": 3.4, + "effect_size": 2.9643597427, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.339675633, + "level_score_std_dev": 3.7532485929, + "se_difference": 6.3812224534, + "z_score": 1.5514268735, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.606966186, + "gap_confidence_interval_95_upper": 22.406966186, + "raw_n_min_group": 30, + "raw_n_max_group": 34, + "raw_n_confidence_heuristic": "Medium", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "tone_and_language_style", + "demographic_factor": "Politics", + "score_range": 3.3, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 85.4, + "max_score": 88.7, + "se_min": 4.0, + "se_max": 3.4, + "effect_size": 0.9881199142, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.339675633, + "level_score_std_dev": 1.3928388277, + "se_difference": 5.2497618994, + "z_score": 0.6285999372, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.9893442502, + "gap_confidence_interval_95_upper": 13.5893442502, + "raw_n_min_group": 137, + "raw_n_max_group": 154, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "tone_and_language_style", + "demographic_factor": "Sex", + "score_range": 2.0, + "min_level": "Male", + "max_level": "Female", + "min_score": 85.9, + "max_score": 87.9, + "se_min": 3.9, + "se_max": 3.5, + "effect_size": 0.5988605541, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.339675633, + "level_score_std_dev": 1.0, + "se_difference": 5.2402290026, + "z_score": 0.3816627096, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.2706601159, + "gap_confidence_interval_95_upper": 12.2706601159, + "raw_n_min_group": 207, + "raw_n_max_group": 222, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "tone_and_language_style", + "demographic_factor": "Urbanicity", + "score_range": 2.8, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 86.0, + "max_score": 88.8, + "se_min": 3.7, + "se_max": 3.6, + "effect_size": 0.8384047757, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.339675633, + "level_score_std_dev": 1.2754084313, + "se_difference": 5.1623637997, + "z_score": 0.542387191, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.3180471224, + "gap_confidence_interval_95_upper": 12.9180471224, + "raw_n_min_group": 204, + "raw_n_max_group": 74, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "transparency", + "demographic_factor": "Age", + "score_range": 11.9, + "min_level": "45-54", + "max_level": "55-64", + "min_score": 65.2, + "max_score": 77.1, + "se_min": 5.9, + "se_max": 5.0, + "effect_size": 2.6363762877, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.5137714429, + "level_score_std_dev": 3.9452151047, + "se_difference": 7.7336925204, + "z_score": 1.5387216351, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.2577588075, + "gap_confidence_interval_95_upper": 27.0577588075, + "raw_n_min_group": 69, + "raw_n_max_group": 95, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "transparency", + "demographic_factor": "Education", + "score_range": 1.0, + "min_level": "College", + "max_level": "No College", + "min_score": 71.2, + "max_score": 72.2, + "se_min": 4.7, + "se_max": 6.3, + "effect_size": 0.2215442259, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 4.5137714429, + "level_score_std_dev": 0.5, + "se_difference": 7.8600254453, + "z_score": 0.1272260512, + "p_value": 0.9772792279, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -14.4053667903, + "gap_confidence_interval_95_upper": 16.4053667903, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "transparency", + "demographic_factor": "Ethnicity", + "score_range": 10.4, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 64.0, + "max_score": 74.4, + "se_min": 7.6, + "se_max": 6.2, + "effect_size": 2.3040599489, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.5137714429, + "level_score_std_dev": 3.9996093559, + "se_difference": 9.8081598682, + "z_score": 1.0603416074, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -8.8236400963, + "gap_confidence_interval_95_upper": 29.6236400963, + "raw_n_min_group": 30, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "transparency", + "demographic_factor": "Politics", + "score_range": 6.5, + "min_level": "Democrat", + "max_level": "Republican", + "min_score": 68.7, + "max_score": 75.2, + "se_min": 6.1, + "se_max": 5.4, + "effect_size": 1.440037468, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.5137714429, + "level_score_std_dev": 2.6662499674, + "se_difference": 8.1467785044, + "z_score": 0.7978613874, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -9.4673924587, + "gap_confidence_interval_95_upper": 22.4673924587, + "raw_n_min_group": 137, + "raw_n_max_group": 142, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "transparency", + "demographic_factor": "Sex", + "score_range": 0.6, + "min_level": "Male", + "max_level": "Female", + "min_score": 71.6, + "max_score": 72.2, + "se_min": 5.8, + "se_max": 5.8, + "effect_size": 0.1329265355, + "effect_size_class": "Negligible", + "is_large_effect": false, + "category_std_used": 4.5137714429, + "level_score_std_dev": 0.3, + "se_difference": 8.2024386618, + "z_score": 0.0731489774, + "p_value": 0.9818838748, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -15.4764843625, + "gap_confidence_interval_95_upper": 16.6764843625, + "raw_n_min_group": 207, + "raw_n_max_group": 222, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "transparency", + "demographic_factor": "Urbanicity", + "score_range": 4.3, + "min_level": "Rural", + "max_level": "Urban", + "min_score": 70.5, + "max_score": 74.8, + "se_min": 6.2, + "se_max": 5.6, + "effect_size": 0.9526401712, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 4.5137714429, + "level_score_std_dev": 1.9601587237, + "se_difference": 8.3546394297, + "z_score": 0.5146840909, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -12.074792386, + "gap_confidence_interval_95_upper": 20.674792386, + "raw_n_min_group": 74, + "raw_n_max_group": 155, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "trustworthiness", + "demographic_factor": "Age", + "score_range": 6.7, + "min_level": "18-24", + "max_level": "35-44", + "min_score": 80.7, + "max_score": 87.4, + "se_min": 3.6, + "se_max": 2.9, + "effect_size": 2.117392495, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1642692679, + "level_score_std_dev": 2.4230948989, + "se_difference": 4.6227697325, + "z_score": 1.449347553, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.3604621846, + "gap_confidence_interval_95_upper": 15.7604621846, + "raw_n_min_group": 52, + "raw_n_max_group": 72, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "trustworthiness", + "demographic_factor": "Education", + "score_range": 2.0, + "min_level": "College", + "max_level": "No College", + "min_score": 84.7, + "max_score": 86.7, + "se_min": 2.6, + "se_max": 3.3, + "effect_size": 0.6320574612, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.1642692679, + "level_score_std_dev": 1.0, + "se_difference": 4.2011903075, + "z_score": 0.4760555589, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.2341816949, + "gap_confidence_interval_95_upper": 10.2341816949, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "trustworthiness", + "demographic_factor": "Ethnicity", + "score_range": 3.4, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 83.5, + "max_score": 86.9, + "se_min": 3.9, + "se_max": 3.3, + "effect_size": 1.074497684, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1642692679, + "level_score_std_dev": 1.2884098727, + "se_difference": 5.1088159098, + "z_score": 0.6655162488, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.6130951868, + "gap_confidence_interval_95_upper": 13.4130951868, + "raw_n_min_group": 30, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "trustworthiness", + "demographic_factor": "Politics", + "score_range": 3.2, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 85.2, + "max_score": 88.4, + "se_min": 3.2, + "se_max": 2.8, + "effect_size": 1.0112919379, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.1642692679, + "level_score_std_dev": 1.3888444437, + "se_difference": 4.2520583251, + "z_score": 0.7525766947, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.1338811773, + "gap_confidence_interval_95_upper": 11.5338811773, + "raw_n_min_group": 137, + "raw_n_max_group": 154, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "trustworthiness", + "demographic_factor": "Sex", + "score_range": 1.4, + "min_level": "Male", + "max_level": "Female", + "min_score": 85.3, + "max_score": 86.7, + "se_min": 3.2, + "se_max": 3.0, + "effect_size": 0.4424402228, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 3.1642692679, + "level_score_std_dev": 0.7, + "se_difference": 4.3863424399, + "z_score": 0.3191725268, + "p_value": 0.9469805126, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.197073206, + "gap_confidence_interval_95_upper": 9.997073206, + "raw_n_min_group": 207, + "raw_n_max_group": 222, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "trustworthiness", + "demographic_factor": "Urbanicity", + "score_range": 2.4, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 85.2, + "max_score": 87.6, + "se_min": 3.0, + "se_max": 3.1, + "effect_size": 0.7584689534, + "effect_size_class": "Medium", + "is_large_effect": false, + "category_std_used": 3.1642692679, + "level_score_std_dev": 1.0677078252, + "se_difference": 4.313930922, + "z_score": 0.556337142, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.055149239, + "gap_confidence_interval_95_upper": 10.855149239, + "raw_n_min_group": 204, + "raw_n_max_group": 74, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "understanding", + "demographic_factor": "Age", + "score_range": 5.9, + "min_level": "25-34", + "max_level": "55-64", + "min_score": 83.6, + "max_score": 89.5, + "se_min": 3.4, + "se_max": 2.7, + "effect_size": 1.9883276322, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9673178124, + "level_score_std_dev": 2.2637849329, + "se_difference": 4.3416586692, + "z_score": 1.3589276471, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.6094946248, + "gap_confidence_interval_95_upper": 14.4094946248, + "raw_n_min_group": 91, + "raw_n_max_group": 95, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "understanding", + "demographic_factor": "Education", + "score_range": 0.9, + "min_level": "College", + "max_level": "No College", + "min_score": 87.1, + "max_score": 88.0, + "se_min": 2.5, + "se_max": 3.2, + "effect_size": 0.3033042151, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 2.9673178124, + "level_score_std_dev": 0.45, + "se_difference": 4.0607881008, + "z_score": 0.2216318551, + "p_value": 0.9570722129, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.0589984265, + "gap_confidence_interval_95_upper": 8.8589984265, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "understanding", + "demographic_factor": "Ethnicity", + "score_range": 3.7, + "min_level": "Asian", + "max_level": "African American", + "min_score": 85.7, + "max_score": 89.4, + "se_min": 3.6, + "se_max": 3.2, + "effect_size": 1.2469173287, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9673178124, + "level_score_std_dev": 1.3133925537, + "se_difference": 4.8166378315, + "z_score": 0.7681706886, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -5.7404366763, + "gap_confidence_interval_95_upper": 13.1404366763, + "raw_n_min_group": 34, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "understanding", + "demographic_factor": "Politics", + "score_range": 4.1, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 85.9, + "max_score": 90.0, + "se_min": 3.3, + "se_max": 2.8, + "effect_size": 1.381719202, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9673178124, + "level_score_std_dev": 1.7016332024, + "se_difference": 4.3278170017, + "z_score": 0.9473598349, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.382365455, + "gap_confidence_interval_95_upper": 12.582365455, + "raw_n_min_group": 137, + "raw_n_max_group": 154, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "understanding", + "demographic_factor": "Sex", + "score_range": 0.8, + "min_level": "Male", + "max_level": "Female", + "min_score": 87.3, + "max_score": 88.1, + "se_min": 3.1, + "se_max": 3.0, + "effect_size": 0.2696037467, + "effect_size_class": "Small", + "is_large_effect": false, + "category_std_used": 2.9673178124, + "level_score_std_dev": 0.4, + "se_difference": 4.313930922, + "z_score": 0.185445714, + "p_value": 0.9597457904, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.655149239, + "gap_confidence_interval_95_upper": 9.255149239, + "raw_n_min_group": 207, + "raw_n_max_group": 222, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "understanding", + "demographic_factor": "Urbanicity", + "score_range": 4.1, + "min_level": "Suburban", + "max_level": "Rural", + "min_score": 86.2, + "max_score": 90.3, + "se_min": 3.0, + "se_max": 2.8, + "effect_size": 1.381719202, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 2.9673178124, + "level_score_std_dev": 1.7594190961, + "se_difference": 4.1036569057, + "z_score": 0.9991088666, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -3.9430197402, + "gap_confidence_interval_95_upper": 12.1430197402, + "raw_n_min_group": 204, + "raw_n_max_group": 74, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "usefulness", + "demographic_factor": "Age", + "score_range": 7.3, + "min_level": "45-54", + "max_level": "55-64", + "min_score": 82.5, + "max_score": 89.8, + "se_min": 4.2, + "se_max": 3.1, + "effect_size": 2.1718673683, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3611628899, + "level_score_std_dev": 2.2992752481, + "se_difference": 5.2201532545, + "z_score": 1.3984263764, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -2.9313123725, + "gap_confidence_interval_95_upper": 17.5313123725, + "raw_n_min_group": 69, + "raw_n_max_group": 95, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "usefulness", + "demographic_factor": "Education", + "score_range": 4.1, + "min_level": "College", + "max_level": "No College", + "min_score": 84.7, + "max_score": 88.8, + "se_min": 3.4, + "se_max": 3.9, + "effect_size": 1.2198159192, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3611628899, + "level_score_std_dev": 2.05, + "se_difference": 5.1739733281, + "z_score": 0.7924277417, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.0408013801, + "gap_confidence_interval_95_upper": 14.2408013801, + "raw_n_min_group": 0, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "usefulness", + "demographic_factor": "Ethnicity", + "score_range": 7.6, + "min_level": "Hispanic", + "max_level": "African American", + "min_score": 84.3, + "max_score": 91.9, + "se_min": 5.2, + "se_max": 3.2, + "effect_size": 2.2611221916, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3611628899, + "level_score_std_dev": 2.7244265452, + "se_difference": 6.105735009, + "z_score": 1.244731386, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -4.3670207168, + "gap_confidence_interval_95_upper": 19.5670207168, + "raw_n_min_group": 30, + "raw_n_max_group": 0, + "raw_n_confidence_heuristic": "Low", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "usefulness", + "demographic_factor": "Politics", + "score_range": 3.7, + "min_level": "Democrat", + "max_level": "Independent", + "min_score": 86.5, + "max_score": 90.2, + "se_min": 3.9, + "se_max": 3.2, + "effect_size": 1.100809488, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3611628899, + "level_score_std_dev": 1.6213848676, + "se_difference": 5.0447993023, + "z_score": 0.7334285823, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.1876249416, + "gap_confidence_interval_95_upper": 13.5876249416, + "raw_n_min_group": 137, + "raw_n_max_group": 154, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "usefulness", + "demographic_factor": "Sex", + "score_range": 2.9, + "min_level": "Male", + "max_level": "Female", + "min_score": 85.9, + "max_score": 88.8, + "se_min": 4.0, + "se_max": 3.5, + "effect_size": 0.8627966258, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3611628899, + "level_score_std_dev": 1.45, + "se_difference": 5.3150729064, + "z_score": 0.5456181037, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -7.5173514717, + "gap_confidence_interval_95_upper": 13.3173514717, + "raw_n_min_group": 207, + "raw_n_max_group": 222, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + }, + { + "model": "o1", + "category": "usefulness", + "demographic_factor": "Urbanicity", + "score_range": 4.4, + "min_level": "Urban", + "max_level": "Rural", + "min_score": 85.3, + "max_score": 89.7, + "se_min": 4.1, + "se_max": 3.5, + "effect_size": 1.3090707425, + "effect_size_class": "Large", + "is_large_effect": true, + "category_std_used": 3.3611628899, + "level_score_std_dev": 1.7987650084, + "se_difference": 5.3907327888, + "z_score": 0.8162155633, + "p_value": 0.9400724892, + "is_statistically_significant": false, + "gap_confidence_interval_95_lower": -6.1656421163, + "gap_confidence_interval_95_upper": 14.9656421163, + "raw_n_min_group": 155, + "raw_n_max_group": 74, + "raw_n_confidence_heuristic": "High", + "is_equity_concern": false + } + ], + "universal_issues": [ + { + "category": "accuracy", + "demographic_factor": "Urbanicity", + "min_gap": 3.0999999999999943, + "max_gap": 5.5, + "median_gap": 3.549999999999997, + "min_effect_size": 1.055137189151992, + "raw_n_confidence_heuristics": [ + "High", + "High", + "High", + "High", + "High", + "High" + ], + "notes": "All 6 models evaluated for this combination show a large effect size (>= 0.8). Significance may vary per model." + }, + { + "category": "adaptiveness", + "demographic_factor": "Age", + "min_gap": 3.799999999999997, + "max_gap": 5.5, + "median_gap": 4.449999999999996, + "min_effect_size": 0.9696951115207704, + "raw_n_confidence_heuristics": [ + "High", + "High", + "High", + "High", + "High", + "High" + ], + "notes": "All 6 models evaluated for this combination show a large effect size (>= 0.8). Significance may vary per model." + }, + { + "category": "background_and_culture", + "demographic_factor": "Age", + "min_gap": 5.599999999999994, + "max_gap": 11.399999999999991, + "median_gap": 7.25, + "min_effect_size": 1.6189413697775699, + "raw_n_confidence_heuristics": [ + "High", + "High", + "High", + "High", + "High", + "High" + ], + "notes": "All 6 models evaluated for this combination show a large effect size (>= 0.8). Significance may vary per model." + }, + { + "category": "bias_and_stereotypes", + "demographic_factor": "Age", + "min_gap": 2.700000000000003, + "max_gap": 9.600000000000009, + "median_gap": 7.75, + "min_effect_size": 0.9697789566064673, + "raw_n_confidence_heuristics": [ + "High", + "High", + "High", + "High", + "High", + "High" + ], + "notes": "All 6 models evaluated for this combination show a large effect size (>= 0.8). Significance may vary per model." + }, + { + "category": "bias_and_stereotypes", + "demographic_factor": "Urbanicity", + "min_gap": 3.1999999999999886, + "max_gap": 5.1000000000000085, + "median_gap": 4.250000000000007, + "min_effect_size": 1.1493676522743264, + "raw_n_confidence_heuristics": [ + "High", + "High", + "High", + "High", + "High", + "High" + ], + "notes": "All 6 models evaluated for this combination show a large effect size (>= 0.8). Significance may vary per model." + }, + { + "category": "clarity", + "demographic_factor": "Age", + "min_gap": 3.700000000000003, + "max_gap": 8.5, + "median_gap": 6.5, + "min_effect_size": 0.8194815001582225, + "raw_n_confidence_heuristics": [ + "High", + "High", + "High", + "High", + "High", + "High" + ], + "notes": "All 6 models evaluated for this combination show a large effect size (>= 0.8). Significance may vary per model." + }, + { + "category": "communication", + "demographic_factor": "Age", + "min_gap": 3.700000000000003, + "max_gap": 7.700000000000003, + "median_gap": 6.000000000000007, + "min_effect_size": 0.9153352017869429, + "raw_n_confidence_heuristics": [ + "High", + "High", + "High", + "High", + "High", + "High" + ], + "notes": "All 6 models evaluated for this combination show a large effect size (>= 0.8). Significance may vary per model." + }, + { + "category": "comprehensiveness", + "demographic_factor": "Age", + "min_gap": 5.0, + "max_gap": 9.799999999999997, + "median_gap": 5.5, + "min_effect_size": 1.3957952006970253, + "raw_n_confidence_heuristics": [ + "High", + "High", + "High", + "High", + "High", + "High" + ], + "notes": "All 6 models evaluated for this combination show a large effect size (>= 0.8). Significance may vary per model." + }, + { + "category": "confidence", + "demographic_factor": "Age", + "min_gap": 4.0, + "max_gap": 7.099999999999994, + "median_gap": 5.649999999999999, + "min_effect_size": 1.2164745063564437, + "raw_n_confidence_heuristics": [ + "High", + "High", + "High", + "High", + "High", + "High" + ], + "notes": "All 6 models evaluated for this combination show a large effect size (>= 0.8). Significance may vary per model." + }, + { + "category": "confidence", + "demographic_factor": "Education", + "min_gap": 3.5, + "max_gap": 7.0, + "median_gap": 4.950000000000003, + "min_effect_size": 1.0644151930618881, + "raw_n_confidence_heuristics": [ + "Low", + "Low", + "Low", + "Low", + "Low", + "Low" + ], + "notes": "All 6 models evaluated for this combination show a large effect size (>= 0.8). Significance may vary per model." + }, + { + "category": "context_memory", + "demographic_factor": "Age", + "min_gap": 3.0999999999999943, + "max_gap": 6.5, + "median_gap": 4.100000000000001, + "min_effect_size": 0.9147689994322953, + "raw_n_confidence_heuristics": [ + "High", + "High", + "High", + "High", + "High", + "High" + ], + "notes": "All 6 models evaluated for this combination show a large effect size (>= 0.8). Significance may vary per model." + }, + { + "category": "context_memory", + "demographic_factor": "Education", + "min_gap": 4.200000000000003, + "max_gap": 8.0, + "median_gap": 6.100000000000001, + "min_effect_size": 1.239364450843758, + "raw_n_confidence_heuristics": [ + "Low", + "Low", + "Low", + "Low", + "Low", + "Low" + ], + "notes": "All 6 models evaluated for this combination show a large effect size (>= 0.8). Significance may vary per model." + }, + { + "category": "conversation_building", + "demographic_factor": "Age", + "min_gap": 5.400000000000006, + "max_gap": 14.0, + "median_gap": 7.350000000000001, + "min_effect_size": 1.2483060980745737, + "raw_n_confidence_heuristics": [ + "High", + "High", + "High", + "High", + "High", + "High" + ], + "notes": "All 6 models evaluated for this combination show a large effect size (>= 0.8). Significance may vary per model." + }, + { + "category": "conversation_flow", + "demographic_factor": "Age", + "min_gap": 4.6000000000000085, + "max_gap": 11.5, + "median_gap": 7.29999999999999, + "min_effect_size": 0.9255543737771347, + "raw_n_confidence_heuristics": [ + "High", + "High", + "High", + "High", + "High", + "High" + ], + "notes": "All 6 models evaluated for this combination show a large effect size (>= 0.8). Significance may vary per model." + }, + { + "category": "cultural_awareness", + "demographic_factor": "Age", + "min_gap": 5.0, + "max_gap": 11.500000000000007, + "median_gap": 6.349999999999994, + "min_effect_size": 1.3463141793087665, + "raw_n_confidence_heuristics": [ + "High", + "High", + "High", + "High", + "High", + "High" + ], + "notes": "All 6 models evaluated for this combination show a large effect size (>= 0.8). Significance may vary per model." + }, + { + "category": "cultural_awareness", + "demographic_factor": "Ethnicity", + "min_gap": 5.3999999999999915, + "max_gap": 14.799999999999997, + "median_gap": 8.400000000000006, + "min_effect_size": 1.4540193136534656, + "raw_n_confidence_heuristics": [ + "Low", + "Low", + "Low", + "Low", + "Low", + "Low" + ], + "notes": "All 6 models evaluated for this combination show a large effect size (>= 0.8). Significance may vary per model." + }, + { + "category": "detail_and_technical_language", + "demographic_factor": "Age", + "min_gap": 3.3000000000000114, + "max_gap": 9.900000000000006, + "median_gap": 6.5, + "min_effect_size": 1.054589761200871, + "raw_n_confidence_heuristics": [ + "High", + "High", + "High", + "High", + "High", + "High" + ], + "notes": "All 6 models evaluated for this combination show a large effect size (>= 0.8). Significance may vary per model." + }, + { + "category": "detail_and_technical_language", + "demographic_factor": "Education", + "min_gap": 3.4000000000000057, + "max_gap": 9.799999999999997, + "median_gap": 5.200000000000003, + "min_effect_size": 1.0865470266918045, + "raw_n_confidence_heuristics": [ + "Low", + "Low", + "Low", + "Low", + "Low", + "Low" + ], + "notes": "All 6 models evaluated for this combination show a large effect size (>= 0.8). Significance may vary per model." + }, + { + "category": "distinct_personality", + "demographic_factor": "Age", + "min_gap": 4.6000000000000085, + "max_gap": 9.899999999999991, + "median_gap": 7.0, + "min_effect_size": 1.0639904322593483, + "raw_n_confidence_heuristics": [ + "High", + "High", + "High", + "High", + "High", + "High" + ], + "notes": "All 6 models evaluated for this combination show a large effect size (>= 0.8). Significance may vary per model." + }, + { + "category": "distinct_personality", + "demographic_factor": "Ethnicity", + "min_gap": 4.099999999999994, + "max_gap": 17.700000000000003, + "median_gap": 7.100000000000001, + "min_effect_size": 0.9483392983181118, + "raw_n_confidence_heuristics": [ + "Low", + "Medium", + "Low", + "Low", + "Low", + "Low" + ], + "notes": "All 6 models evaluated for this combination show a large effect size (>= 0.8). Significance may vary per model." + }, + { + "category": "distinct_personality", + "demographic_factor": "Politics", + "min_gap": 5.0, + "max_gap": 7.900000000000006, + "median_gap": 5.5, + "min_effect_size": 1.156511339412333, + "raw_n_confidence_heuristics": [ + "High", + "High", + "High", + "High", + "High", + "High" + ], + "notes": "All 6 models evaluated for this combination show a large effect size (>= 0.8). Significance may vary per model." + }, + { + "category": "effectiveness", + "demographic_factor": "Urbanicity", + "min_gap": 3.5, + "max_gap": 4.799999999999997, + "median_gap": 3.950000000000003, + "min_effect_size": 0.8838111437096218, + "raw_n_confidence_heuristics": [ + "High", + "High", + "High", + "High", + "High", + "High" + ], + "notes": "All 6 models evaluated for this combination show a large effect size (>= 0.8). Significance may vary per model." + }, + { + "category": "ethical_alignment", + "demographic_factor": "Age", + "min_gap": 6.700000000000003, + "max_gap": 11.5, + "median_gap": 9.150000000000006, + "min_effect_size": 1.7820893194757506, + "raw_n_confidence_heuristics": [ + "High", + "High", + "High", + "High", + "High", + "High" + ], + "notes": "All 6 models evaluated for this combination show a large effect size (>= 0.8). Significance may vary per model." + }, + { + "category": "ethical_alignment", + "demographic_factor": "Ethnicity", + "min_gap": 5.799999999999997, + "max_gap": 15.700000000000003, + "median_gap": 9.350000000000001, + "min_effect_size": 1.5427041870088574, + "raw_n_confidence_heuristics": [ + "Low", + "Low", + "Low", + "Low", + "Low", + "Low" + ], + "notes": "All 6 models evaluated for this combination show a large effect size (>= 0.8). Significance may vary per model." + }, + { + "category": "flexibility", + "demographic_factor": "Age", + "min_gap": 3.0, + "max_gap": 13.0, + "median_gap": 6.299999999999997, + "min_effect_size": 0.87832012979616, + "raw_n_confidence_heuristics": [ + "High", + "High", + "High", + "High", + "High", + "High" + ], + "notes": "All 6 models evaluated for this combination show a large effect size (>= 0.8). Significance may vary per model." + }, + { + "category": "helpfulness", + "demographic_factor": "Age", + "min_gap": 3.5, + "max_gap": 8.200000000000003, + "median_gap": 4.899999999999999, + "min_effect_size": 0.9716451044706216, + "raw_n_confidence_heuristics": [ + "High", + "High", + "High", + "High", + "High", + "High" + ], + "notes": "All 6 models evaluated for this combination show a large effect size (>= 0.8). Significance may vary per model." + }, + { + "category": "honesty_empathy_fairness", + "demographic_factor": "Education", + "min_gap": 3.700000000000003, + "max_gap": 10.200000000000003, + "median_gap": 6.399999999999999, + "min_effect_size": 0.9732380066889819, + "raw_n_confidence_heuristics": [ + "Low", + "Low", + "Low", + "Low", + "Low", + "Low" + ], + "notes": "All 6 models evaluated for this combination show a large effect size (>= 0.8). Significance may vary per model." + }, + { + "category": "personality", + "demographic_factor": "Ethnicity", + "min_gap": 3.5, + "max_gap": 5.900000000000006, + "median_gap": 4.350000000000001, + "min_effect_size": 1.0400231994265863, + "raw_n_confidence_heuristics": [ + "Medium", + "Low", + "Low", + "Low", + "Low", + "Low" + ], + "notes": "All 6 models evaluated for this combination show a large effect size (>= 0.8). Significance may vary per model." + }, + { + "category": "personality_consistency", + "demographic_factor": "Education", + "min_gap": 3.200000000000003, + "max_gap": 6.099999999999994, + "median_gap": 5.04999999999999, + "min_effect_size": 1.002459128121161, + "raw_n_confidence_heuristics": [ + "Low", + "Low", + "Low", + "Low", + "Low", + "Low" + ], + "notes": "All 6 models evaluated for this combination show a large effect size (>= 0.8). Significance may vary per model." + }, + { + "category": "tone_and_language_style", + "demographic_factor": "Age", + "min_gap": 6.199999999999989, + "max_gap": 13.400000000000006, + "median_gap": 8.149999999999999, + "min_effect_size": 1.8564677176506805, + "raw_n_confidence_heuristics": [ + "High", + "High", + "High", + "High", + "High", + "High" + ], + "notes": "All 6 models evaluated for this combination show a large effect size (>= 0.8). Significance may vary per model." + }, + { + "category": "transparency", + "demographic_factor": "Age", + "min_gap": 3.799999999999997, + "max_gap": 11.899999999999991, + "median_gap": 7.099999999999994, + "min_effect_size": 0.8418680582428101, + "raw_n_confidence_heuristics": [ + "High", + "High", + "High", + "High", + "High", + "High" + ], + "notes": "All 6 models evaluated for this combination show a large effect size (>= 0.8). Significance may vary per model." + }, + { + "category": "transparency", + "demographic_factor": "Ethnicity", + "min_gap": 7.599999999999994, + "max_gap": 14.100000000000009, + "median_gap": 10.100000000000009, + "min_effect_size": 1.6837361164856202, + "raw_n_confidence_heuristics": [ + "Low", + "Low", + "Low", + "Low", + "Low", + "Low" + ], + "notes": "All 6 models evaluated for this combination show a large effect size (>= 0.8). Significance may vary per model." + }, + { + "category": "trustworthiness", + "demographic_factor": "Age", + "min_gap": 4.299999999999997, + "max_gap": 6.700000000000003, + "median_gap": 6.25, + "min_effect_size": 1.358923541549839, + "raw_n_confidence_heuristics": [ + "High", + "High", + "High", + "High", + "High", + "High" + ], + "notes": "All 6 models evaluated for this combination show a large effect size (>= 0.8). Significance may vary per model." + }, + { + "category": "understanding", + "demographic_factor": "Age", + "min_gap": 4.8999999999999915, + "max_gap": 7.599999999999994, + "median_gap": 5.250000000000007, + "min_effect_size": 1.6513229487802525, + "raw_n_confidence_heuristics": [ + "High", + "High", + "High", + "High", + "High", + "High" + ], + "notes": "All 6 models evaluated for this combination show a large effect size (>= 0.8). Significance may vary per model." + }, + { + "category": "usefulness", + "demographic_factor": "Age", + "min_gap": 4.3999999999999915, + "max_gap": 10.200000000000003, + "median_gap": 6.149999999999999, + "min_effect_size": 1.3090707425290795, + "raw_n_confidence_heuristics": [ + "High", + "High", + "High", + "High", + "High", + "High" + ], + "notes": "All 6 models evaluated for this combination show a large effect size (>= 0.8). Significance may vary per model." + }, + { + "category": "usefulness", + "demographic_factor": "Education", + "min_gap": 4.099999999999994, + "max_gap": 6.700000000000003, + "median_gap": 5.099999999999994, + "min_effect_size": 1.2198159191748248, + "raw_n_confidence_heuristics": [ + "Low", + "Low", + "Low", + "Low", + "Low", + "Low" + ], + "notes": "All 6 models evaluated for this combination show a large effect size (>= 0.8). Significance may vary per model." + } + ] + } +}