Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
456a3cb
1
Parent(s):
78e9bc6
Add Claude 3.7 Sonnet to the leaderboard
Browse files- external_models_results.json +22 -0
external_models_results.json
CHANGED
@@ -531,5 +531,27 @@
|
|
531 |
},
|
532 |
"result_metrics_average": 0.7648947194678011,
|
533 |
"result_metrics_npm": 0.6490441260447987
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
534 |
}
|
535 |
]
|
|
|
531 |
},
|
532 |
"result_metrics_average": 0.7648947194678011,
|
533 |
"result_metrics_npm": 0.6490441260447987
|
534 |
+
},
|
535 |
+
{
|
536 |
+
"model": "claude-3-7-sonnet-20250219",
|
537 |
+
"name": "Claude 3.7 Sonnet (2025-02-19)",
|
538 |
+
"link": "https://www.anthropic.com/",
|
539 |
+
"date": "2025-04-03",
|
540 |
+
"status": "full",
|
541 |
+
"main_language": "English",
|
542 |
+
"model_type": "proprietary",
|
543 |
+
"result_metrics": {
|
544 |
+
"enem_challenge": 0.8901329601119664,
|
545 |
+
"bluex": 0.8456189151599444,
|
546 |
+
"oab_exams": 0.8355353075170843,
|
547 |
+
"assin2_sts": 0.8087979933117393,
|
548 |
+
"assin2_rte": 0.9472965253044003,
|
549 |
+
"faquad_nli": 0.8097848807348216,
|
550 |
+
"hatebr_offensive": 0.9125114739050616,
|
551 |
+
"portuguese_hate_speech": 0.7698524509742262,
|
552 |
+
"tweetsentbr": 0.7842080985659372
|
553 |
+
},
|
554 |
+
"result_metrics_average": 0.8448598450650201,
|
555 |
+
"result_metrics_npm": 0.7622301724524201
|
556 |
}
|
557 |
]
|