LLMArena commited on
Commit
1575c35
·
verified ·
1 Parent(s): 2f93ee6

Upload 2 files

Browse files
elo_results_20250312.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82c36bfa7b66417daa732213e8122a504756bd6db0154784bd0a5e951c294cc2
3
+ size 1744429
leaderboard_table_20250312.csv ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ rating,variance,rating_q975,rating_q025,num_battles,final_ranking,key,Model,License,Organization,Knowledge cutoff date,Link,MT-bench (score),MMLU
2
+ 1010.4073267601368,71.22524500717337,1027.2017348231059,994.5633297083411,1076,14,DeepSeek R1,DeepSeek R1,Open Source,DeepSeek,-,https://huggingface.co/deepseek-ai/DeepSeek-R1,-,-
3
+ 1120.2884330607344,279.34471348402525,1151.8699782512851,1088.9539657531527,331,1,gpt-4o-2024-11-20,gpt-4o-2024-11-20,Proprietary,OpenAI,11-2024,https://openai.com/api/,70.0,50.0
4
+ 1090.6568853777285,155.83121605291333,1114.502676376542,1065.2762490401103,590,1,Google: Gemini Pro 1.5,Google: Gemini Pro 1.5,Proprietary,Google,-,https://gemini.google.com/,-,-
5
+ 1027.0514759995394,56.24497588311864,1040.83261272251,1012.6188550504503,1639,10,claude-3-5-sonnet-20241022,claude-3-5-sonnet-20241022,Proprietary,Anthropic,06-2024,https://docs.anthropic.com/en/docs/intro-to-claude#claude-3-5-family,-,-
6
+ 894.7242001859272,76.50673521968903,911.5347323542308,877.5240604796911,1012,53,Google: Gemini Flash 2.0,Google: Gemini Flash 2.0,Proprietary,Google,-,https://gemini.google.com/,-,-
7
+ 965.3752668224683,205.3203347555349,992.2634546422362,935.4722438587037,369,25,DeepSeek V3,DeepSeek V3,Open Source,DeepSeek,-,https://github.com/deepseek-ai/DeepSeek-V3,-,-
8
+ 965.2126071653736,70.55185461622509,981.028993354862,948.1414737236586,1184,30,claude-3-5-sonnet-20240620,claude-3-5-sonnet-20240620,Proprietary,Anthropic,06-2024,https://docs.anthropic.com/en/docs/intro-to-claude#claude-3-5-family,-,-
9
+ 929.6930105403123,100.14166920930926,948.0140898808216,909.7147243866959,895,39,gpt-4o-2024-05-13,gpt-4o-2024-05-13,Proprietary,OpenAI,05-2024,https://openai.com/api/,70.0,50.0
10
+ 1039.396499875621,67.15412379067834,1053.9308654170454,1022.2865383541152,1261,9,gpt-4-turbo-2024-04-09,gpt-4-turbo-2024-04-09,Proprietary,OpenAI,04-2023,https://openai.com/api/,70.0,50.0
11
+ 882.0682234982544,78.30106748941029,897.5731708567979,864.5043162617403,1014,55,Llama 3.3 70B Instruct,Llama 3.3 70B Instruct,Open Source,Meta,-,https://www.llama.com/,-,-
12
+ 954.0051904361992,78.25167985399976,970.7822132882588,936.9353571968611,1009,35,Google: Gemini Flash 1.5,Google: Gemini Flash 1.5,Proprietary,Google,-,https://gemini.google.com/,-,-
13
+ 923.3945627140915,87.76822906406252,941.7625687324982,904.8167887045892,998,41,Llama 3.1 405B Instruct Turbo,Llama 3.1 405B Instruct Turbo,Proprietary,Meta,-,https://ai.meta.com/blog/meta-llama-3-1/,-,-
14
+ 986.1953978311567,243.3620286736586,1015.4492062018838,953.4053296602099,319,18,claude-3-5-haiku-20241022,claude-3-5-haiku-20241022,Proprietary,Anthropic,10-2024,https://docs.anthropic.com/en/docs/intro-to-claude#claude-3-5-family,-,-
15
+ 989.2853428376088,83.0787514638545,1006.5361779894004,971.237214021768,952,20,Qwen2.5 72B Instruct,Qwen2.5 72B Instruct,Open Source,Qwen,-,https://huggingface.co/Qwen/Qwen2.5-72B-Instruct,-,-
16
+ 943.1820861220498,91.96414131353896,962.248682850685,924.9738388311381,932,37,gpt-4-0613,gpt-4-0613,Proprietary,OpenAI,04-2023,https://openai.com/api/,70.0,50.0
17
+ 1049.5373744477897,63.451619023445126,1065.0658412919797,1034.6171896089954,1235,8,GigaChat-Max-preview 4.0.26.20,GigaChat-Max-preview 4.0.26.20,Proprietary,Sber,In training,https://www.sber-bank.by/new/gigachat-29102024,-,-
18
+ 1096.5137294637816,306.3109976140622,1130.588719621468,1062.4863209569019,297,1,gpt-4o-mini-2024-07-18,gpt-4o-mini-2024-07-18,Proprietary,OpenAI,07-2024,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/,70.0,50.0
19
+ 1116.6022849131039,75.19536059923723,1134.2704204606462,1101.1437571972338,1209,1,MiniMax: MiniMax-Text-01,MiniMax: MiniMax-Text-01,Open Source,MiniMaxAI,-,https://huggingface.co/MiniMaxAI/MiniMax-Text-01,-,-
20
+ 991.3790694979898,56.18694016995274,1006.4892733433867,976.6277865435887,1435,20,YandexGPT 5 Pro,YandexGPT 5 Pro,Proprietary,Yandex,In training,https://yandex.cloud/ru/blog/posts/2025/02/yandex-gpt-5-0,45.2,35.2
21
+ 930.0373996973137,47.67068212154111,943.5979166063132,916.9059817278331,1536,39,Gemma 2 27B,Gemma 2 27B,Proprietary,Google,-,https://blog.google/technology/developers/google-gemma-2/,-,-
22
+ 1048.681659990317,48.37379465450069,1061.806821480492,1035.9603067362846,1674,9,Llama 3.1 70B Instruct Turbo,Llama 3.1 70B Instruct Turbo,Proprietary,Meta,-,https://ai.meta.com/blog/meta-llama-3-1/,-,-
23
+ 1015.791005250039,52.12360889471421,1030.5324567338773,1002.4497882403325,1672,12,RefalMachine/RuadaptQwen2.5-32B-Pro-Beta,RefalMachine/RuadaptQwen2.5-32B-Pro-Beta,Open Source,RefalMachine,-,https://huggingface.co/RefalMachine/RuadaptQwen2.5-32B-Pro-Beta,-,-
24
+ 936.0285084097231,49.62483006930933,950.5046168033663,922.607615134579,1759,38,t-tech/T-pro-it-1.0,t-tech/T-pro-it-1.0,Open Source,t-bank-ai,-,https://huggingface.co/t-tech/T-pro-it-1.0,-,-
25
+ 943.3049958416082,66.76360153192905,959.5510649654458,927.3251169879088,1150,37,saiga_llama3_70b,saiga_llama3_70b,Open Source,Ilya Gusev,In training,https://huggingface.co/IlyaGusev/saiga_llama3_70b_sft_m1_d5_abliterated_awq_4bit,-,-
26
+ 1050.8574913871118,117.6222908895185,1070.9640562748386,1030.2012040856987,708,6,Cohere: Command R+ (08-2024),Cohere: Command R+ (08-2024),Open Source,Cohere,-,https://docs.cohere.com/v2/docs/command-r-plus,-,-
27
+ 885.5695238379504,183.96679504630083,911.8271412357735,858.4797261821544,458,53,claude-3-haiku-20240307,claude-3-haiku-20240307,Proprietary,Anthropic,03-2024,https://docs.anthropic.com/en/docs/intro-to-claude#claude-3-family,-,-
28
+ 961.6552432031702,143.07151469167857,985.8789247465296,938.3603035154537,538,27,YandexGPT Experimental,YandexGPT Experimental,Proprietary,Yandex,In training,https://ya.ru/ai/gpt-3,45.2,35.2
29
+ 1038.2925656376103,175.8339023892628,1064.197235964544,1013.2363271357882,459,8,Qwen 2 Instruct (72B),Qwen 2 Instruct (72B),Open Source,Qwen,12-2023,https://llama.meta.com/llama3/,-,-
30
+ 1001.2745919363508,211.2978058939394,1030.0054304763707,974.0580630779652,339,13,Mistral Small 3,Mistral Small 3,Open Source,Mistral,-,https://mistral.ai/news/mistral-small-3,-,-
31
+ 1004.9393073910051,54.50094473029259,1019.1315965347984,989.7894563348777,1497,18,Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24,Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24,Open Source,Vikhrmodels,In training,https://huggingface.co/Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24,-,-
32
+ 1042.3834731892173,69.90235876698841,1057.9147752616295,1026.0005183426083,1109,9,YandexGPT 4 Pro,YandexGPT 4 Pro,Proprietary,Yandex,In training,https://ya.ru/ai/gpt-4,45.2,35.2
33
+ 992.4652676900004,96.44915701195883,1012.0587718035225,973.0141445051345,798,20,Qwen2.5 Coder 32B Instruct,Qwen2.5 Coder 32B Instruct,Open Source,Qwen,-,https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct,-,-
34
+ 1013.5525552908059,216.84979283489938,1044.4768520374669,984.0333783909465,368,10,LLaMA-3 Chat (70B),LLaMA-3 Chat (70B),Proprietary,Meta,12-2023,https://llama.meta.com/llama3/,-,-
35
+ 941.7149146995179,98.09116234947021,960.8347341932366,921.2529813757608,758,37,GigaChat-Pro 4.0.26.20,GigaChat-Pro 4.0.26.20,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
36
+ 940.0048049226119,73.2925637328862,956.7172135916235,923.0333730083152,1319,37,GigaChat-Pro 4.0.26.15,GigaChat-Pro 4.0.26.15,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
37
+ 999.0688076985209,109.6796291698614,1018.3958907046938,977.2166405287326,743,18,gpt-3.5-turbo-0125,gpt-3.5-turbo-0125,Proprietary,OpenAI,09-2021,https://openai.com/api/,65.2,45.2
38
+ 935.4778948834083,114.82634416979657,956.8518302545663,914.283785948659,740,37,YandexGPT 3 Pro,YandexGPT 3 Pro,Proprietary,Yandex,In training,https://ya.ru/ai/gpt-3,65.2,45.2
39
+ 938.4503309630044,42.92010247876116,951.8925701127134,925.6906330029655,1800,38,t-tech/T-lite-it-1.0,t-tech/T-lite-it-1.0,Open Source,t-bank-ai,-,https://huggingface.co/t-tech/T-lite-it-1.0,-,-
40
+ 976.483763415516,47.473384545131736,990.1772325393277,963.0427874078484,1786,26,GigaChat 4.0.26.15,GigaChat 4.0.26.15,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
41
+ 993.9683807990049,41.38466748913257,1006.5615900089268,981.3222823853195,1872,20,GigaChat 4.0.26.20,GigaChat 4.0.26.20,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
42
+ 1034.5975496514845,321.76876535913675,1070.9722229228912,1000.1269234138199,261,6,Microsoft: Phi 4,Microsoft: Phi 4,Open Source,Microsoft,-,https://huggingface.co/microsoft/phi-4,-,-
43
+ 1008.9204905720724,61.203165145556966,1024.1144158884392,993.2088230608326,1191,17,saiga_llama3_8b_v7,saiga_llama3_8b_v7,Open Source,Ilya Gusev,In training,https://huggingface.co/IlyaGusev/saiga_llama3_8b,-,-
44
+ 1046.6947345614783,104.84797536726725,1066.5868309719588,1027.9311391093731,767,7,GigaChat-Plus 4.0.26.15,GigaChat-Plus 4.0.26.15,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
45
+ 1090.2319426108288,58.77334025082285,1105.0779905607553,1075.1353337593275,1536,1,saiga_phi3_medium,saiga_phi3_medium,Open Source,Ilya Gusev,In training,https://huggingface.co/IlyaGusev/saiga_phi3_medium_sft_m1_d2_kto_m5_d7,-,-
46
+ 1110.767986872094,104.16129931035837,1130.4876237433998,1090.9392202249908,870,1,Llama 3.2 11B Instruct,Llama 3.2 11B Instruct,Open Source,Meta,-,https://www.llama.com/,-,-
47
+ 1009.677989050327,54.13632615846207,1024.4188024199461,995.112990176132,1476,17,GigaChat-Pro 4.0.26.8,GigaChat-Pro 4.0.26.8,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
48
+ 976.8637966722245,41.59226256237247,988.767643128392,963.8660263159662,1858,27,RefalMachine/ruadapt_llama3_instruct_lep_saiga_kto_ablitirated,RefalMachine/ruadapt_llama3_instruct_lep_saiga_kto_ablitirated,Open Source,RefalMachine,-,https://huggingface.co/RefalMachine/ruadapt_llama3_instruct_lep_saiga_kto_ablitirated,-,-
49
+ 1039.4150600119654,44.69489403195483,1052.597086401063,1026.4910052548862,1954,9,T-lite-instruct-0.1,T-lite-instruct-0.1,Open Source,t-bank-ai,In training,https://huggingface.co/AnatoliiPotapov/T-lite-instruct-0.1,-,-
50
+ 1061.4214988270683,40.848184414905575,1073.5254950726878,1048.2546026734915,1986,6,YandexGPT 3 Lite,YandexGPT 3 Lite,Proprietary,Yandex,In training,https://ya.ru/ai/gpt-3,45.2,35.2
51
+ 1082.2112417958133,42.226404914253,1095.0286951283763,1069.5481651569673,2019,3,Llama 3.1 8B Instruct Turbo,Llama 3.1 8B Instruct Turbo,Proprietary,Meta,-,https://ai.meta.com/blog/meta-llama-3-1/,-,-
52
+ 1119.4662922374557,122.82668639998877,1141.6674974445223,1097.4431880083982,704,1,Vikhrmodels/it-5.2-fp16-cp,Vikhrmodels/it-5.2-fp16-cp,Open Source,Vikhrmodels,In training,https://huggingface.co/Vikhrmodels/it-5.2-fp16-cp,-,-
53
+ 1038.4570307724723,47.0374608317294,1051.8198096503982,1025.078398457144,1748,9,LLaMA-3 Chat (8B),LLaMA-3 Chat (8B),Proprietary,Meta,03-2023,https://llama.meta.com/llama3/,-,-
54
+ 1012.4412940975857,109.66452139839825,1031.6540647585537,991.2203235800172,697,12,GigaChat 4.0.26.8,GigaChat 4.0.26.8,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
55
+ 911.4289778855225,251.1963203096412,944.2627056459994,880.686227647264,273,39,GigaChat-Pro 2.2.25.3,GigaChat-Pro 2.2.25.3,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
56
+ 957.8687244653629,48.409278574419154,971.2962404521836,943.410409283664,1449,34,saiga_llama3_8b_v6,saiga_llama3_8b_v6,Open Source,Ilya Gusev,In training,https://huggingface.co/IlyaGusev/saiga_llama3_8b,-,-
57
+ 950.7964205408416,147.45058163542055,975.4606037575222,927.4969354833961,517,32,GigaChat 3.1.25.3,GigaChat 3.1.25.3,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
58
+ 970.5706624615358,201.84384214900155,997.23699697184,941.9664699317148,360,22,MTSAIR/Cotype-Nano,MTSAIR/Cotype-Nano,Open Source,MTSAIR,-,https://huggingface.co/MTSAIR/Cotype-Nano,-,-
59
+ 1013.3150663828466,224.33544295762573,1041.9900629714239,982.8095972230813,282,10,GigaChat-Plus 3.1.25.3,GigaChat-Plus 3.1.25.3,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-