chatbot-arena-leaderboard / leaderboard_table_20241104.csv
LLMArena's picture
update lb
1f74766 verified
rating,variance,rating_q975,rating_q025,num_battles,final_ranking,key,Model,License,Organization,Knowledge cutoff date,Link,MT-bench (score),MMLU
1020.0941337638236,266.79812389634293,1048.4714871714837,990.157272263124,389,6,Google: Gemini Pro 1.5,Google: Gemini Pro 1.5,Proprietary,Google,-,https://gemini.google.com/,-,-
1050.4891964460119,164.8566019121936,1070.4758304563127,1030.4526046986537,950,5,gpt-4o-2024-05-13,gpt-4o-2024-05-13,Proprietary,OpenAI,10-2023,https://openai.com/api/,70.0,50.0
907.1540779119522,152.44312233878705,924.8292787048002,888.5831381049169,966,31,claude-3-5-sonnet-20240620,claude-3-5-sonnet-20240620,Proprietary,Anthropic,06-2024,https://docs.anthropic.com/en/docs/intro-to-claude#claude-3-5-family,-,-
963.7540321695844,315.4811417431863,992.4294197495084,929.2336901178561,298,17,Google: Gemini Flash 1.5,Google: Gemini Flash 1.5,Proprietary,Google,-,https://gemini.google.com/,-,-
938.3500673452211,167.34970436668493,959.2617772252623,917.5310845866042,870,21,gpt-4-turbo-2024-04-09,gpt-4-turbo-2024-04-09,Proprietary,OpenAI,04-2023,https://openai.com/api/,70.0,50.0
895.933980228049,154.01351264006163,914.4238811231883,877.0843085172172,978,36,gpt-4-0613,gpt-4-0613,Proprietary,OpenAI,04-2023,https://openai.com/api/,70.0,50.0
952.1854601378284,299.1996605180205,982.9796880907819,916.5655701930941,332,19,Qwen2.5 72B Instruct,Qwen2.5 72B Instruct,Open Source,Qwen,-,https://huggingface.co/Qwen/Qwen2.5-72B-Instruct,-,-
933.5289712493295,161.05840171419112,952.064415468989,914.8729396004192,950,22,Llama 3.1 405B Instruct Turbo,Llama 3.1 405B Instruct Turbo,Proprietary,Meta,-,https://ai.meta.com/blog/meta-llama-3-1/,-,-
983.3850524316155,334.0926296624673,1014.6138397344725,948.3324666111489,238,11,gpt-4o-mini-2024-07-18,gpt-4o-mini-2024-07-18,Proprietary,OpenAI,07-2024,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/,70.0,50.0
953.8875162026574,155.46350601080604,970.8543168490975,933.3240200196258,905,21,Gemma 2 27B,Gemma 2 27B,Proprietary,Google,-,https://blog.google/technology/developers/google-gemma-2/,-,-
1100.9015626574596,270.55404691951156,1129.4929575183326,1071.694889373809,396,1,saiga_llama3_70b,saiga_llama3_70b,Open Source,Ilya Gusev,In training,https://huggingface.co/IlyaGusev/saiga_llama3_70b_sft_m1_d5_abliterated_awq_4bit,-,-
1133.6193389947878,332.6945486648912,1166.44133076706,1101.6242001831574,356,1,YandexGPT 4 Pro,YandexGPT 4 Pro,Proprietary,Yandex,In training,https://ya.ru/ai/gpt-4,45.2,35.2
1008.4955571330311,169.5668466091922,1028.1800382090728,986.730713859822,797,11,Llama 3.1 70B Instruct Turbo,Llama 3.1 70B Instruct Turbo,Proprietary,Meta,-,https://ai.meta.com/blog/meta-llama-3-1/,-,-
951.312998474229,152.24765882194694,969.7726145372793,930.4924520576633,820,21,YandexGPT Experimental,YandexGPT Experimental,Proprietary,Yandex,In training,https://ya.ru/ai/gpt-3,45.2,35.2
1061.7703080153135,149.62906437210688,1079.7055041468923,1043.0716952280472,979,4,Cohere: Command R+ (08-2024),Cohere: Command R+ (08-2024),Open Source,Cohere,-,https://docs.cohere.com/v2/docs/command-r-plus,-,-
1024.8753916951505,149.62487638126635,1043.5382073115381,1005.8739755282235,995,7,claude-3-haiku-20240307,claude-3-haiku-20240307,Proprietary,Anthropic,03-2024,https://docs.anthropic.com/en/docs/intro-to-claude#claude-3-family,-,-
950.3516767115293,147.4032889916814,967.4049958849685,929.9097475521759,1018,21,Qwen 2 Instruct (72B),Qwen 2 Instruct (72B),Open Source,Qwen,12-2023,https://llama.meta.com/llama3/,-,-
966.7759450452351,262.8182107437766,994.1637029450085,935.474415495778,381,17,LLaMA-3 Chat (70B),LLaMA-3 Chat (70B),Proprietary,Meta,12-2023,https://llama.meta.com/llama3/,-,-
1017.5355191893299,180.81807660147226,1037.1607008833687,994.7052412113591,813,9,gpt-3.5-turbo-0125,gpt-3.5-turbo-0125,Proprietary,OpenAI,09-2021,https://openai.com/api/,65.2,45.2
1062.4649735204273,299.03569628234584,1092.211382202106,1030.6266052156398,381,3,YandexGPT 3 Pro,YandexGPT 3 Pro,Proprietary,Yandex,In training,https://ya.ru/ai/gpt-3,65.2,45.2
942.5948037306767,325.4500264064562,972.5128013555731,908.0663108275484,310,21,Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24,Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24,Open Source,Vikhrmodels,In training,https://huggingface.co/Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24,-,-
952.6100543737643,184.3208375229143,972.2134616796886,930.2982204435816,930,21,GigaChat-Pro 4.0.26.15,GigaChat-Pro 4.0.26.15,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
985.8990151017026,243.33892162724038,1012.5603014418141,957.9666717260004,411,11,Llama 3.2 11B Instruct,Llama 3.2 11B Instruct,Open Source,Meta,-,https://www.llama.com/,-,-
948.8582592254037,234.38623001489805,974.4234257088974,921.3483474040983,450,20,saiga_llama3_8b_v7,saiga_llama3_8b_v7,Open Source,Ilya Gusev,In training,https://huggingface.co/IlyaGusev/saiga_llama3_8b,-,-
941.5827953242377,141.83798825976268,958.0098168058221,922.2463604285707,1117,21,GigaChat 4.0.26.15,GigaChat 4.0.26.15,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
991.0583972778235,133.3432963285682,1007.0465878297241,975.5250878450807,1081,11,saiga_phi3_medium,saiga_phi3_medium,Open Source,Ilya Gusev,In training,https://huggingface.co/IlyaGusev/saiga_phi3_medium_sft_m1_d2_kto_m5_d7,-,-
1025.0446698674752,192.94697376477063,1046.0371188029892,1000.9205606266152,592,7,GigaChat-Pro 4.0.26.8,GigaChat-Pro 4.0.26.8,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
1021.3122431715511,155.54992272155414,1039.970215543838,1001.4240352577766,836,9,T-lite-instruct-0.1,T-lite-instruct-0.1,Open Source,t-bank-ai,In training,https://huggingface.co/AnatoliiPotapov/T-lite-instruct-0.1,-,-
1106.308543816692,178.8242558868404,1128.7996392688226,1084.5442323851867,881,1,GigaChat-Plus 4.0.26.15,GigaChat-Plus 4.0.26.15,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
1017.8024832047015,173.25216212348474,1036.8265962249984,996.6655431938515,809,9,LLaMA-3 Chat (8B),LLaMA-3 Chat (8B),Proprietary,Meta,03-2023,https://llama.meta.com/llama3/,-,-
991.2509437638029,129.54016871578966,1007.1961532996846,973.1646793739749,1169,11,Llama 3.1 8B Instruct Turbo,Llama 3.1 8B Instruct Turbo,Proprietary,Meta,-,https://ai.meta.com/blog/meta-llama-3-1/,-,-
1062.5286294893106,136.49341097409925,1078.8139104084269,1046.465053838073,1221,4,Vikhrmodels/it-5.2-fp16-cp,Vikhrmodels/it-5.2-fp16-cp,Open Source,Vikhrmodels,In training,https://huggingface.co/Vikhrmodels/it-5.2-fp16-cp,-,-
1079.4763195876462,144.09405399528333,1095.6428290624463,1062.2829408914379,1253,2,RefalMachine/ruadapt_llama3_instruct_lep_saiga_kto_ablitirated,RefalMachine/ruadapt_llama3_instruct_lep_saiga_kto_ablitirated,Open Source,RefalMachine,-,https://huggingface.co/RefalMachine/ruadapt_llama3_instruct_lep_saiga_kto_ablitirated,-,-
1109.8938509046443,136.53052349755055,1125.9103376994174,1093.3023007575118,1299,1,YandexGPT 3 Lite,YandexGPT 3 Lite,Proprietary,Yandex,In training,https://ya.ru/ai/gpt-3,45.2,35.2
1060.612847947084,145.668401667651,1078.049475337932,1040.6996640517195,1069,4,GigaChat 4.0.26.8,GigaChat 4.0.26.8,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
1025.3199794146722,186.46791863420782,1045.403805372983,1002.9186802788619,665,7,GigaChat-Pro 2.2.25.3,GigaChat-Pro 2.2.25.3,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
926.5871917765131,363.0149305503841,959.0631717074187,890.4975595097276,257,21,saiga_llama3_8b_v6,saiga_llama3_8b_v6,Open Source,Ilya Gusev,In training,https://huggingface.co/IlyaGusev/saiga_llama3_8b,-,-
965.1822926529305,162.7619151723603,983.8070680431707,944.4186662601812,783,19,GigaChat 3.1.25.3,GigaChat 3.1.25.3,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
955.3928154793401,212.4478147817904,977.7411609075767,926.1110851867377,489,19,GigaChat-Plus 3.1.25.3,GigaChat-Plus 3.1.25.3,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-