chatbot-arena-leaderboard / leaderboard_table_20250409.csv
LLMArena's picture
Upload 2 files
1d2fe42 verified
rating,variance,rating_q975,rating_q025,num_battles,final_ranking,key,Model,License,Organization,Knowledge cutoff date,Link,MT-bench (score),MMLU
1006.1259414348181,69.55288400465713,1023.5271380510519,990.3332152490776,1124,16,gpt-4o-2024-11-20,gpt-4o-2024-11-20,Proprietary,OpenAI,11-2024,https://openai.com/api/,70.0,50.0
1108.791547310796,191.41437544029145,1136.2253762075313,1082.3066013830446,509,1,Google: Gemini Pro 1.5,Google: Gemini Pro 1.5,Proprietary,Google,-,https://gemini.google.com/,-,-
1086.7275924868504,118.10928707904564,1109.388918927323,1066.4960173579577,710,1,DeepSeek R1,DeepSeek R1,Open Source,DeepSeek,-,https://huggingface.co/deepseek-ai/DeepSeek-R1,-,-
1025.3017939993736,54.43180614874445,1040.347534901134,1010.8203118304381,1673,11,claude-3-5-sonnet-20241022,claude-3-5-sonnet-20241022,Proprietary,Anthropic,06-2024,https://docs.anthropic.com/en/docs/intro-to-claude#claude-3-5-family,-,-
968.6064360138245,232.5635027436972,997.9667128540724,939.6631858333446,346,23,claude-3-7-sonnet-20250219,claude-3-7-sonnet-20250219,Proprietary,Anthropic,06-2024,https://docs.anthropic.com/en/docs/intro-to-claude#claude-3-5-family,-,-
1046.3732765022628,236.1047896971441,1074.7479513117644,1016.5649754526733,302,5,Google: Gemini Flash 2.0,Google: Gemini Flash 2.0,Proprietary,Google,-,https://gemini.google.com/,-,-
986.6648646636394,258.73018734400375,1015.8502881704742,953.859584259983,282,20,claude-3-5-sonnet-20240620,claude-3-5-sonnet-20240620,Proprietary,Anthropic,06-2024,https://docs.anthropic.com/en/docs/intro-to-claude#claude-3-5-family,-,-
891.6863542692658,88.17269771535372,909.5997257634833,873.5360506799598,984,57,DeepSeek V3,DeepSeek V3,Open Source,DeepSeek,-,https://github.com/deepseek-ai/DeepSeek-V3,-,-
962.8999520806199,198.21382152283775,991.3217673094504,936.1843358080105,369,24,gpt-4o-2024-05-13,gpt-4o-2024-05-13,Proprietary,OpenAI,05-2024,https://openai.com/api/,70.0,50.0
962.1719091541096,65.64855206581436,977.5853559986507,947.3731793830349,1231,33,gpt-4-turbo-2024-04-09,gpt-4-turbo-2024-04-09,Proprietary,OpenAI,04-2023,https://openai.com/api/,70.0,50.0
926.9466621658567,94.99696365779795,945.6366918540426,908.1777397513816,895,42,claude-3-5-haiku-20241022,claude-3-5-haiku-20241022,Proprietary,Anthropic,10-2024,https://docs.anthropic.com/en/docs/intro-to-claude#claude-3-5-family,-,-
1036.871921226108,57.33146117357404,1051.5836044764578,1021.5765567858792,1309,10,Google: Gemini Flash 1.5,Google: Gemini Flash 1.5,Proprietary,Google,-,https://gemini.google.com/,-,-
880.0627152303423,88.35857655087098,899.2754505481579,862.3950040771906,994,59,Llama 3.1 405B Instruct Turbo,Llama 3.1 405B Instruct Turbo,Proprietary,Meta*,-,https://ai.meta.com/blog/meta-llama-3-1/,-,-
954.889750179493,66.78916123732644,969.4598097023313,938.725902077859,1063,37,GigaChat 2 Max,GigaChat 2 Max,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
920.092318582865,101.04517109643774,939.6152374432452,901.6272688859958,981,45,Llama 3.3 70B Instruct,Llama 3.3 70B Instruct,Open Source,Meta*,-,https://www.llama.com/,-,-
983.7510514923747,226.81438927827375,1013.0476206645159,956.4863114647407,319,20,Qwen2.5 72B Instruct,Qwen2.5 72B Instruct,Open Source,Qwen,-,https://huggingface.co/Qwen/Qwen2.5-72B-Instruct,-,-
988.0112012289453,69.98133940889008,1004.3386842527016,971.697162464126,1000,22,GigaChat-Max-preview 4.0.26.20,GigaChat-Max-preview 4.0.26.20,Proprietary,Sber,In training,https://www.sber-bank.by/new/gigachat-29102024,-,-
940.395682356073,87.17949978872772,959.8336051984425,922.2712789217929,932,39,gpt-4-0613,gpt-4-0613,Proprietary,OpenAI,04-2023,https://openai.com/api/,70.0,50.0
1046.8865166899866,60.74545586760719,1062.2260525915092,1031.4053963283675,1280,10,MiniMax: MiniMax-Text-01,MiniMax: MiniMax-Text-01,Open Source,MiniMaxAI,-,https://huggingface.co/MiniMaxAI/MiniMax-Text-01,-,-
1091.768282975829,179.06283817656688,1118.3057220007972,1063.9770029003716,525,1,gpt-4o-mini-2024-07-18,gpt-4o-mini-2024-07-18,Proprietary,OpenAI,07-2024,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/,70.0,50.0
1111.0741688431015,67.9884155171522,1127.7076387593254,1096.516486460478,1253,1,Gemma 2 27B,Gemma 2 27B,Proprietary,Google,-,https://blog.google/technology/developers/google-gemma-2/,-,-
988.2323709432126,47.9047650406522,1001.8877952085592,974.4014448779643,1471,22,RefalMachine/RuadaptQwen2.5-32B-Pro-Beta,RefalMachine/RuadaptQwen2.5-32B-Pro-Beta,Open Source,RefalMachine,-,https://huggingface.co/RefalMachine/RuadaptQwen2.5-32B-Pro-Beta,-,-
929.3236653151272,44.53275090811206,941.7792110962093,915.9642466047993,1570,43,Llama 3.1 70B Instruct Turbo,Llama 3.1 70B Instruct Turbo,Proprietary,Meta*,-,https://ai.meta.com/blog/meta-llama-3-1/,-,-
1046.607058691627,50.232090467911966,1061.445068363775,1032.549422731957,1719,10,YandexGPT 5 Pro,YandexGPT 5 Pro,Proprietary,Yandex,In training,https://yandex.cloud/ru/blog/posts/2025/02/yandex-gpt-5-0,45.2,35.2
1013.6383554397992,47.486616083864476,1026.6584213516408,999.8136699447194,1724,14,t-tech/T-pro-it-1.0,t-tech/T-pro-it-1.0,Open Source,t-bank-ai,-,https://huggingface.co/t-tech/T-pro-it-1.0,-,-
932.0019630524619,44.210419310557235,944.5239212079074,917.9191202991792,1811,42,saiga_llama3_70b,saiga_llama3_70b,Open Source,Ilya Gusev,In training,https://huggingface.co/IlyaGusev/saiga_llama3_70b_sft_m1_d5_abliterated_awq_4bit,-,-
942.5646917927029,73.78461009770122,958.8684952815556,925.9531657081408,1192,39,Mistral Small 3,Mistral Small 3,Open Source,Mistral,-,https://mistral.ai/news/mistral-small-3,-,-
1043.1890007192787,96.66852196779347,1062.3073536931229,1024.2705352107012,834,10,YandexGPT Experimental,YandexGPT Experimental,Proprietary,Yandex,In training,https://ya.ru/ai/gpt-3,45.2,35.2
886.4633042006025,136.60905358182714,907.5483827768282,863.1659700454305,575,58,Cohere: Command R+ (08-2024),Cohere: Command R+ (08-2024),Open Source,Cohere,-,https://docs.cohere.com/v2/docs/command-r-plus,-,-
950.5941943248349,111.52917696768456,973.2799834846347,930.3586394062529,666,35,claude-3-haiku-20240307,claude-3-haiku-20240307,Proprietary,Anthropic,03-2024,https://docs.anthropic.com/en/docs/intro-to-claude#claude-3-family,-,-
1035.9983315745394,143.90146352477166,1058.6256121506533,1012.7229056995362,576,10,Qwen 2 Instruct (72B),Qwen 2 Instruct (72B),Open Source,Qwen,12-2023,https://llama.meta.com/llama3/,-,-
1007.54691652338,126.09796356832508,1030.3764414565796,985.5372240973011,552,14,Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24,Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24,Open Source,Vikhrmodels,In training,https://huggingface.co/Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24,-,-
1000.9782156891133,52.37743001815035,1015.2472205270283,988.0890809317389,1552,20,YandexGPT 4 Pro,YandexGPT 4 Pro,Proprietary,Yandex,In training,https://ya.ru/ai/gpt-4,45.2,35.2
1039.5183805166591,70.15855436345713,1056.287510858957,1022.9965667535146,1156,10,LLaMA-3 Chat (70B),LLaMA-3 Chat (70B),Proprietary,Meta*,12-2023,https://llama.meta.com/llama3/,-,-
987.9532371131388,82.8792648157104,1006.2947443875977,970.6909806218003,916,22,GigaChat-Pro 4.0.26.20,GigaChat-Pro 4.0.26.20,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
1018.5259583389198,139.6830604564245,1040.5187327160268,996.4753328290461,539,11,Qwen2.5 Coder 32B Instruct,Qwen2.5 Coder 32B Instruct,Open Source,Qwen,-,https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct,-,-
938.5213858311263,103.52868046785214,958.8970323800178,918.3180199124297,758,39,GigaChat 2 Pro,GigaChat 2 Pro,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
936.0536249335278,66.59682858069917,952.5559737631046,919.72046802144,1360,41,GigaChat-Pro 4.0.26.15,GigaChat-Pro 4.0.26.15,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
993.9187905178396,103.06949617380431,1013.9158213601428,974.2721143671192,788,20,gpt-3.5-turbo-0125,gpt-3.5-turbo-0125,Proprietary,OpenAI,09-2021,https://openai.com/api/,65.2,45.2
933.2643957739294,105.89669736661072,952.250788242389,913.1644790192257,765,41,YandexGPT 3 Pro,YandexGPT 3 Pro,Proprietary,Yandex,In training,https://ya.ru/ai/gpt-3,65.2,45.2
936.4914768035474,44.1785523050009,949.4865082899125,924.1285251294873,1780,41,GigaChat 2 Lite,GigaChat 2 Lite,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
973.8999512476831,43.62725245436129,987.0315222493833,961.0304314680603,1767,30,t-tech/T-lite-it-1.0,t-tech/T-lite-it-1.0,Open Source,t-bank-ai,-,https://huggingface.co/t-tech/T-lite-it-1.0,-,-
991.2215416562731,39.8515014082909,1004.0368537652042,979.0762193811977,1914,22,GigaChat 4.0.26.15,GigaChat 4.0.26.15,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
1012.6249052507358,134.96781707346435,1034.9434113538427,991.0702671617328,575,11,GigaChat 4.0.26.20,GigaChat 4.0.26.20,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
1006.3684468804726,64.83984705723331,1022.0079480363991,990.3523589130326,1185,17,GigaChat-Plus 4.0.26.15,GigaChat-Plus 4.0.26.15,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
1051.1662310492325,100.77670406495862,1070.4786178886336,1031.0452493308608,904,6,saiga_llama3_8b_v7,saiga_llama3_8b_v7,Open Source,Ilya Gusev,In training,https://huggingface.co/IlyaGusev/saiga_llama3_8b,-,-
1088.046398280536,55.85960887650237,1102.6800853326736,1074.0153322093015,1563,1,Microsoft: Phi 4,Microsoft: Phi 4,Open Source,Microsoft,-,https://huggingface.co/microsoft/phi-4,-,-
1104.8046617277482,89.12433190753156,1125.5441303137152,1087.781145508094,1006,1,saiga_phi3_medium,saiga_phi3_medium,Open Source,Ilya Gusev,In training,https://huggingface.co/IlyaGusev/saiga_phi3_medium_sft_m1_d2_kto_m5_d7,-,-
1095.2150658983212,222.12258660411507,1122.9886927023438,1064.777567793472,390,1,Llama 3.2 11B Instruct,Llama 3.2 11B Instruct,Open Source,Meta*,-,https://www.llama.com/,-,-
1006.0523656551303,54.53681938513538,1020.1712725347743,991.240029502129,1509,19,GigaChat-Pro 4.0.26.8,GigaChat-Pro 4.0.26.8,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
974.0856470988368,42.61492054130495,986.1413047726041,959.9271206502428,1884,30,RefalMachine/ruadapt_llama3_instruct_lep_saiga_kto_ablitirated,RefalMachine/ruadapt_llama3_instruct_lep_saiga_kto_ablitirated,Open Source,RefalMachine,-,https://huggingface.co/RefalMachine/ruadapt_llama3_instruct_lep_saiga_kto_ablitirated,-,-
1036.4725668414208,44.26188658411953,1049.7762951175819,1023.629845766662,1989,10,YandexGPT 3 Lite,YandexGPT 3 Lite,Proprietary,Yandex,In training,https://ya.ru/ai/gpt-3,45.2,35.2
1058.6879593749002,42.160892817713915,1071.095710156945,1045.6205361648413,2007,6,T-lite-instruct-0.1,T-lite-instruct-0.1,Open Source,t-bank-ai,In training,https://huggingface.co/AnatoliiPotapov/T-lite-instruct-0.1,-,-
1078.276445096233,41.05404589406256,1090.6903096403657,1066.0672783645552,2032,3,Vikhrmodels/it-5.2-fp16-cp,Vikhrmodels/it-5.2-fp16-cp,Open Source,Vikhrmodels,In training,https://huggingface.co/Vikhrmodels/it-5.2-fp16-cp,-,-
1113.8908310928305,112.15452796659514,1136.1585117232646,1093.260350596063,749,1,Llama 3.1 8B Instruct Turbo,Llama 3.1 8B Instruct Turbo,Proprietary,Meta*,-,https://ai.meta.com/blog/meta-llama-3-1/,-,-
1034.8312971889486,44.346822184953936,1047.8404784925256,1021.9804348450857,1795,10,LLaMA-3 Chat (8B),LLaMA-3 Chat (8B),Proprietary,Meta*,03-2023,https://llama.meta.com/llama3/,-,-
1008.3163690726083,105.00247372140102,1028.479998804267,987.6668091332642,676,14,GigaChat 4.0.26.8,GigaChat 4.0.26.8,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
954.3537811463326,51.77715547304934,968.3886440428691,940.6221220593008,1483,37,GigaChat-Pro 2.2.25.3,GigaChat-Pro 2.2.25.3,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
947.2222889827361,164.47887636388202,972.6722683351983,921.144716476206,497,35,GigaChat 3.1.25.3,GigaChat 3.1.25.3,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
967.6072131994962,157.42112958074418,991.6291011977595,943.3346460585232,449,24,MTSAIR/Cotype-Nano,MTSAIR/Cotype-Nano,Open Source,MTSAIR,-,https://huggingface.co/MTSAIR/Cotype-Nano,-,-
1011.2825390430002,168.34185662988236,1037.4559385016694,985.5702695542409,387,11,GigaChat-Plus 3.1.25.3,GigaChat-Plus 3.1.25.3,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-