Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Add GPT-4o
Browse files- external_models_results.json +23 -1
external_models_results.json
CHANGED
@@ -557,6 +557,28 @@
|
|
557 |
"result_metrics_average": 0.7648947194678011,
|
558 |
"result_metrics_npm": 0.6490441260447987
|
559 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
560 |
{
|
561 |
"model": "claude-3-7-sonnet-20250219",
|
562 |
"name": "Claude 3.7 Sonnet (2025-02-19)",
|
@@ -629,7 +651,7 @@
|
|
629 |
"model": "gemma-3-27b-it",
|
630 |
"name": "google/gemma-3-27b-it (GoogleAI API)",
|
631 |
"link": "https://huggingface.co/google/gemma-3-27b-it",
|
632 |
-
"date": "2025-04-
|
633 |
"status": "full",
|
634 |
"main_language": "English",
|
635 |
"model_type": "chat",
|
|
|
557 |
"result_metrics_average": 0.7648947194678011,
|
558 |
"result_metrics_npm": 0.6490441260447987
|
559 |
},
|
560 |
+
{
|
561 |
+
"model": "gpt-4o-2024-08-06",
|
562 |
+
"name": "GPT-4o (2024-08-06)",
|
563 |
+
"link": "https://www.openai.com/",
|
564 |
+
"date": "2025-04-09",
|
565 |
+
"status": "full",
|
566 |
+
"main_language": "English",
|
567 |
+
"model_type": "proprietary",
|
568 |
+
"result_metrics": {
|
569 |
+
"enem_challenge": 0.8530440867739678,
|
570 |
+
"bluex": 0.7969401947148818,
|
571 |
+
"oab_exams": 0.8200455580865603,
|
572 |
+
"assin2_sts": 0.8078677969518289,
|
573 |
+
"assin2_rte": 0.9407235712144604,
|
574 |
+
"faquad_nli": 0.8654396266184885,
|
575 |
+
"hatebr_offensive": 0.9320137873994456,
|
576 |
+
"portuguese_hate_speech": 0.7512552701451538,
|
577 |
+
"tweetsentbr": 0.7761054092302796
|
578 |
+
},
|
579 |
+
"result_metrics_average": 0.8381594779038962,
|
580 |
+
"result_metrics_npm": 0.7566365012704034
|
581 |
+
},
|
582 |
{
|
583 |
"model": "claude-3-7-sonnet-20250219",
|
584 |
"name": "Claude 3.7 Sonnet (2025-02-19)",
|
|
|
651 |
"model": "gemma-3-27b-it",
|
652 |
"name": "google/gemma-3-27b-it (GoogleAI API)",
|
653 |
"link": "https://huggingface.co/google/gemma-3-27b-it",
|
654 |
+
"date": "2025-04-08",
|
655 |
"status": "full",
|
656 |
"main_language": "English",
|
657 |
"model_type": "chat",
|