[ { "model": "sabia-2-small", "name": "Sabiá-2 Small", "link": "https://www.maritaca.ai/", "date": "2024-04-12", "status": "full", "main_language": "Portuguese", "model_type": "proprietary", "result_metrics": { "enem_challenge": 0.7172848145556333, "bluex": 0.5549374130737135, "oab_exams": 0.6364464692482916, "assin2_sts": 0.7053302344881672, "assin2_rte": 0.9121728362223306, "faquad_nli": 0.7575848453041435, "hatebr_offensive": 0.753800795680591, "portuguese_hate_speech": 0.6975326368290793, "tweetsentbr": 0.7119699374276466 }, "result_metrics_average": 0.7163399980921773, "result_metrics_npm": 0.5744541501392351 }, { "model": "sabia-2-medium", "name": "Sabiá-2 Medium", "link": "https://www.maritaca.ai/", "date": "2024-04-13", "status": "full", "main_language": "Portuguese", "model_type": "proprietary", "result_metrics": { "enem_challenge": 0.8180545836249126, "bluex": 0.717663421418637, "oab_exams": 0.7321184510250569, "assin2_sts": 0.7804108376537757, "assin2_rte": 0.923459363368553, "faquad_nli": 0.7657657657657658, "hatebr_offensive": 0.8349989882997386, "portuguese_hate_speech": 0.7379326358571694, "tweetsentbr": 0.7269533040381798 }, "result_metrics_average": 0.7819285945613098, "result_metrics_npm": 0.6676121786922709 }, { "model": "gpt-3.5-turbo-0125", "name": "GPT-3.5 Turbo (0125)", "link": "https://www.openai.com/", "date": "2024-03-08", "status": "full", "main_language": "English", "model_type": "proprietary", "result_metrics": { "enem_challenge": 0.7214835549335199, "bluex": 0.6244784422809457, "oab_exams": 0.5430523917995445, "assin2_sts": 0.7378460201077941, "assin2_rte": 0.8823038414050672, "faquad_nli": 0.746353108609074, "hatebr_offensive": 0.8056205941193919, "portuguese_hate_speech": 0.7363692688971499, "tweetsentbr": 0.7028981330613626 }, "result_metrics_average": 0.7222672616904278, "result_metrics_npm": 0.5841504766165372 }, { "model": "claude-3-haiku-20240307", "name": "Claude-3 Haiku (20240307)", "link": "https://www.claude.ai/", "date": "2024-04-13", "status": "full", "main_language": "English", "model_type": "proprietary", "result_metrics": { "enem_challenge": 0.7718684394681595, "bluex": 0.6662030598052852, "oab_exams": 0.626879271070615, "assin2_sts": 0.7892124744168747, "assin2_rte": 0.9184462138121732, "faquad_nli": 0.6340996599941455, "hatebr_offensive": 0.8023698759439051, "portuguese_hate_speech": 0.7342166269560177, "tweetsentbr": 0.7303315733000207 }, "result_metrics_average": 0.7415141327519107, "result_metrics_npm": 0.6037151240886439 }, { "model": "gemini-1.0-pro", "name": "Gemini 1.0 Pro", "link": "https://ai.google.dev/", "date": "2024-03-08", "status": "full", "main_language": "English", "model_type": "proprietary", "result_metrics": { "enem_challenge": 0.7130860741777467, "bluex": 0.5869262865090403, "oab_exams": 0.4988610478359909, "assin2_sts": 0.7058831239763663, "assin2_rte": 0.8945993304651698, "faquad_nli": 0.7070913567220611, "hatebr_offensive": 0.8086330094493972, "portuguese_hate_speech": 0.699119105113102, "tweetsentbr": 0.6803240476660983 }, "result_metrics_average": 0.6993914868794414, "result_metrics_npm": 0.551208000273598 }, { "model": "gemini-1.5-pro-preview-0409", "name": "Gemini 1.5 Pro Preview (0409)", "link": "https://cloud.google.com/vertex-ai", "date": "2024-04-15", "status": "full", "main_language": "English", "model_type": "proprietary", "result_metrics": { "enem_challenge": 0.8509447165850245, "bluex": 0.7719054242002782, "oab_exams": 0.6888382687927107, "assin2_sts": 0.8159702278408203, "assin2_rte": 0.9328989988467518, "faquad_nli": 0.7290756302521009, "hatebr_offensive": 0.8697698647467024, "portuguese_hate_speech": 0.7539414414414414, "tweetsentbr": 0.772785080895884 }, "result_metrics_average": 0.7984588504001905, "result_metrics_npm": 0.6908188311933006 }, { "model": "deepseek-v2-chat", "name": "DeepSeek-V2 Chat (API)", "link": "https://www.deepseek.com/", "date": "2024-05-18", "status": "full", "main_language": "English", "model_type": "proprietary", "result_metrics": { "enem_challenge": 0.7844646606018194, "bluex": 0.6954102920723226, "oab_exams": 0.564009111617312, "assin2_sts": 0.8533174657651231, "assin2_rte": 0.9440170304568147, "faquad_nli": 0.7995469048381548, "hatebr_offensive": 0.8842986491071644, "portuguese_hate_speech": 0.7271736342651962, "tweetsentbr": 0.6835304759163984 }, "result_metrics_average": 0.7706409138489229, "result_metrics_npm": 0.655901521190756 }, { "model": "gemini-1.5-flash-preview-0514", "name": "Gemini 1.5 Flash Preview (0514)", "link": "https://cloud.google.com/vertex-ai", "date": "2024-05-18", "status": "full", "main_language": "English", "model_type": "proprietary", "result_metrics": { "enem_challenge": 0.8264520643806857, "bluex": 0.7482614742698191, "oab_exams": 0.6419134396355353, "assin2_sts": 0.841655158151231, "assin2_rte": 0.9362097477374545, "faquad_nli": 0.8092185592185592, "hatebr_offensive": 0.9099110141445836, "portuguese_hate_speech": 0.6875904275305673, "tweetsentbr": 0.7219800292667018 }, "result_metrics_average": 0.7914657682594597, "result_metrics_npm": 0.6834036936130392 }, { "model": "gemini-1.5-flash-001", "name": "Gemini 1.5 Flash (001)", "link": "https://cloud.google.com/vertex-ai", "date": "2024-08-09", "status": "full", "main_language": "English", "model_type": "proprietary", "result_metrics": { "enem_challenge": 0.8306508047585724, "bluex": 0.7579972183588317, "oab_exams": 0.6446469248291572, "assin2_sts": 0.838806085610371, "assin2_rte": 0.9366169973822607, "faquad_nli": 0.7963910785668922, "hatebr_offensive": 0.9092078461170015, "portuguese_hate_speech": 0.6932563987219857, "tweetsentbr": 0.7312948963367732 }, "result_metrics_average": 0.7932075834090939, "result_metrics_npm": 0.6855338135928848 }, { "model": "gpt-4o-mini-2024-07-18", "name": "GPT 4o Mini (2024-07-18)", "link": "https://www.openai.com/", "date": "2024-07-25", "status": "full", "main_language": "English", "model_type": "proprietary", "result_metrics": { "enem_challenge": 0.7669699090272918, "bluex": 0.6842837273991655, "oab_exams": 0.6013667425968109, "assin2_sts": 0.7259038954527597, "assin2_rte": 0.942809846745341, "faquad_nli": 0.819807735300693, "hatebr_offensive": 0.8682357029532165, "portuguese_hate_speech": 0.7501413502853012, "tweetsentbr": 0.7509303825869922 }, "result_metrics_average": 0.7678276991497301, "result_metrics_npm": 0.6595966999910003 }, { "model": "nemotron-4-340b-instruct", "name": "nvidia/Nemotron-4-340B-Instruct (Nvidia API)", "link": "https://huggingface.co/nvidia/Nemotron-4-340B-Instruct", "date": "2024-06-30", "status": "full", "main_language": "English", "model_type": "chat", "params": 340.0, "result_metrics": { "enem_challenge": 0.6648005598320503, "bluex": 0.6578581363004172, "oab_exams": 0.7020501138952164, "assin2_sts": 0.7857731021403329, "assin2_rte": 0.9489354458928496, "faquad_nli": 0.8194444444444444, "hatebr_offensive": 0.8641580001234928, "portuguese_hate_speech": 0.7761835184102864, "tweetsentbr": 0.780880021326841 }, "result_metrics_average": 0.7777870380406591, "result_metrics_npm": 0.6740728488043128 }, { "model": "llama_405b_instruct", "name": "meta-llama/Meta-Llama-3.1-405B-Instruct (Vertex AI)", "link": "https://huggingface.co/meta-llama/Meta-Llama-3.1-405B-Instruct", "date": "2024-08-20", "status": "full", "main_language": "English", "model_type": "chat", "params": 406.0, "result_metrics": { "enem_challenge": 0.8523442967109867, "bluex": 0.8011126564673157, "oab_exams": 0.7640091116173121, "assin2_sts": 0.7888441732870783, "assin2_rte": 0.9476445477916471, "faquad_nli": 0.825063276593557, "hatebr_offensive": 0.9073940659389119, "portuguese_hate_speech": 0.7191480935512969, "tweetsentbr": 0.7821434639106575 }, "result_metrics_average": 0.8208559650965292, "result_metrics_npm": 0.7286932366792048 }, { "model": "sabia-3", "name": "Sabiá-3", "link": "https://www.maritaca.ai/", "date": "2024-08-20", "status": "full", "main_language": "Portuguese", "model_type": "proprietary", "result_metrics": { "enem_challenge": 0.8789363191042687, "bluex": 0.7899860917941586, "oab_exams": 0.8391799544419134, "assin2_sts": 0.8253863689009022, "assin2_rte": 0.9477034821619312, "faquad_nli": 0.8243848812618203, "hatebr_offensive": 0.8278737774590023, "portuguese_hate_speech": 0.7241071428571428, "tweetsentbr": 0.7510613086648664 }, "result_metrics_average": 0.8231799251828895, "result_metrics_npm": 0.7241097388486535 }, { "model": "llama3_3_70b", "name": "meta-llama/Llama-3.3-70B-Instruct (Vertex AI)", "link": "https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct", "date": "2025-04-03", "status": "full", "main_language": "English", "model_type": "chat", "params": 70.6, "result_metrics": { "enem_challenge": 0.8320503848845346, "bluex": 0.7593880389429764, "oab_exams": 0.6733485193621868, "assin2_sts": 0.7275578599896508, "assin2_rte": 0.9407071010860484, "faquad_nli": 0.8787563033858187, "hatebr_offensive": 0.9024358249091997, "portuguese_hate_speech": 0.7042216543825339, "tweetsentbr": 0.7076749453899551 }, "result_metrics_average": 0.791793403592545, "result_metrics_npm": 0.6924788466103498 }, { "model": "llama3_2_90b", "name": "meta-llama/Llama-3.2-90B-Vision-Instruct (Vertex AI)", "link": "https://huggingface.co/meta-llama/Llama-3.2-90B-Vision-Instruct", "date": "2025-04-03", "status": "full", "main_language": "English", "model_type": "chat", "params": 88.6, "result_metrics": { "enem_challenge": 0.821553533939818, "bluex": 0.7482614742698191, "oab_exams": 0.7061503416856492, "assin2_sts": 0.7368518566379951, "assin2_rte": 0.9216548775103446, "faquad_nli": 0.8632015306122449, "hatebr_offensive": 0.8965270877302478, "portuguese_hate_speech": 0.7059127552081422, "tweetsentbr": 0.7352076218951984 }, "result_metrics_average": 0.7928134532766066, "result_metrics_npm": 0.6915070359785283 }, { "model": "gemini-1.5-flash-002", "name": "Gemini 1.5 Flash (002)", "link": "https://cloud.google.com/vertex-ai", "date": "2025-04-03", "status": "full", "main_language": "English", "model_type": "proprietary", "result_metrics": { "enem_challenge": 0.8327501749475158, "bluex": 0.760778859527121, "oab_exams": 0.6369020501138952, "assin2_sts": 0.8380176734291938, "assin2_rte": 0.941176117215237, "faquad_nli": 0.8360786822325283, "hatebr_offensive": 0.9046145161133335, "portuguese_hate_speech": 0.7406414313684444, "tweetsentbr": 0.6997509880131249 }, "result_metrics_average": 0.7989678325511549, "result_metrics_npm": 0.6979777100000177 }, { "model": "gemini-1.5-flash-8b-001", "name": "Gemini 1.5 Flash 8B (001)", "link": "https://aistudio.google.com", "date": "2025-04-03", "status": "full", "main_language": "English", "model_type": "proprietary", "result_metrics": { "enem_challenge": 0.7641707487753674, "bluex": 0.6467315716272601, "oab_exams": 0.5603644646924829, "assin2_sts": 0.7638946799836569, "assin2_rte": 0.9329452628161146, "faquad_nli": 0.7937022965448601, "hatebr_offensive": 0.850497640901663, "portuguese_hate_speech": 0.7391317606010173, "tweetsentbr": 0.7376684798923661 }, "result_metrics_average": 0.7543452117594209, "result_metrics_npm": 0.6359642422837162 }, { "model": "gemini-2.0-flash-001", "name": "Gemini 2.0 Flash (001)", "link": "https://cloud.google.com/vertex-ai", "date": "2025-04-03", "status": "full", "main_language": "English", "model_type": "proprietary", "result_metrics": { "enem_challenge": 0.8789363191042687, "bluex": 0.803894297635605, "oab_exams": 0.7767653758542141, "assin2_sts": 0.8440142633742483, "assin2_rte": 0.9305165510724053, "faquad_nli": 0.7533651260745065, "hatebr_offensive": 0.8890432813545366, "portuguese_hate_speech": 0.7655392938544128, "tweetsentbr": 0.7652542619451799 }, "result_metrics_average": 0.8230365300299308, "result_metrics_npm": 0.7253778946033657 }, { "model": "gemini-2.0-flash-lite-001", "name": "Gemini 2.0 Flash Lite (001)", "link": "https://cloud.google.com/vertex-ai", "date": "2025-04-03", "status": "full", "main_language": "English", "model_type": "proprietary", "result_metrics": { "enem_challenge": 0.8509447165850245, "bluex": 0.7872044506258693, "oab_exams": 0.7061503416856492, "assin2_sts": 0.8492479991621328, "assin2_rte": 0.9216548775103446, "faquad_nli": 0.7652777777777777, "hatebr_offensive": 0.8522499647780968, "portuguese_hate_speech": 0.7501387383201693, "tweetsentbr": 0.7675746509081982 }, "result_metrics_average": 0.8056048352614735, "result_metrics_npm": 0.6986042497176748 }, { "model": "gemini-2.5-pro-exp-03-25", "name": "Gemini 2.5 Pro Experimental (0325)", "link": "https://aistudio.google.com", "date": "2025-04-03", "status": "full", "main_language": "English", "model_type": "proprietary", "result_metrics": { "enem_challenge": 0.9769069279216235, "bluex": 0.9499304589707928, "oab_exams": 0.9216400911161731, "assin2_sts": 0.837785744915033, "assin2_rte": 0.9415510158830285, "faquad_nli": 0.8738735797309651, "hatebr_offensive": 0.9248478168290788, "portuguese_hate_speech": 0.7336133105156697, "tweetsentbr": 0.7928002469993594 }, "result_metrics_average": 0.8836610214313025, "result_metrics_npm": 0.8134610556797854 }, { "model": "qwen2-5-vl-72b-instruct", "name": "Qwen/Qwen2.5-VL-72B-Instruct (API)", "link": "https://huggingface.co/Qwen/Qwen2.5-VL-72B-Instruct", "date": "2025-04-03", "status": "full", "main_language": "English", "model_type": "chat", "params": 73.4, "result_metrics": { "enem_challenge": 0.8600419874037789, "bluex": 0.8052851182197497, "oab_exams": 0.6888382687927107, "assin2_sts": 0.7595538567467497, "assin2_rte": 0.9472975104201871, "faquad_nli": 0.8447190882122586, "hatebr_offensive": 0.8810695094657859, "portuguese_hate_speech": 0.769596419318135, "tweetsentbr": 0.5644757075411895 }, "result_metrics_average": 0.7912086073467273, "result_metrics_npm": 0.6888261361422966 }, { "model": "qwen2-5-72b-instruct", "name": "Qwen/Qwen2.5-72B-Instruct (API)", "link": "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct", "date": "2025-04-03", "status": "full", "main_language": "English", "model_type": "chat", "params": 72.7, "result_metrics": { "enem_challenge": 0.8432470258922323, "bluex": 0.780250347705146, "oab_exams": 0.675626423690205, "assin2_sts": 0.8230708844558656, "assin2_rte": 0.9509720145268106, "faquad_nli": 0.8194444444444444, "hatebr_offensive": 0.8810033427242816, "portuguese_hate_speech": 0.7601866578782712, "tweetsentbr": 0.7620172222071487 }, "result_metrics_average": 0.8106464848360451, "result_metrics_npm": 0.7142994872542282 }, { "model": "qwen2-5-vl-32b-instruct", "name": "Qwen/Qwen2.5-VL-32B-Instruct (API)", "link": "https://huggingface.co/Qwen/Qwen2.5-VL-32B-Instruct", "date": "2025-04-03", "status": "full", "main_language": "English", "model_type": "chat", "params": 33.5, "result_metrics": { "enem_challenge": 0.8600419874037789, "bluex": 0.8052851182197497, "oab_exams": 0.6888382687927107, "assin2_sts": 0.7780549055529008, "assin2_rte": 0.9472975104201871, "faquad_nli": 0.8447190882122586, "hatebr_offensive": 0.8810695094657859, "portuguese_hate_speech": 0.769596419318135, "tweetsentbr": 0.7027408707999051 }, "result_metrics_average": 0.8086270753539346, "result_metrics_npm": 0.7137431116807307 }, { "model": "qwen-turbo-2024-11-01", "name": "Qwen-Turbo (2024-11-01)", "link": "https://www.alibabacloud.com/en/product/modelstudio", "date": "2025-04-03", "status": "full", "main_language": "English", "model_type": "proprietary", "result_metrics": { "enem_challenge": 0.7795661301609517, "bluex": 0.7079276773296245, "oab_exams": 0.6091116173120729, "assin2_sts": 0.7640477700456898, "assin2_rte": 0.9260451969385788, "faquad_nli": 0.8128063725490196, "hatebr_offensive": 0.8567933277676292, "portuguese_hate_speech": 0.7239183383094245, "tweetsentbr": 0.7038360447972195 }, "result_metrics_average": 0.7648947194678011, "result_metrics_npm": 0.6490441260447987 }, { "model": "claude-3-7-sonnet-20250219", "name": "Claude 3.7 Sonnet (2025-02-19)", "link": "https://www.anthropic.com/", "date": "2025-04-03", "status": "full", "main_language": "English", "model_type": "proprietary", "result_metrics": { "enem_challenge": 0.8901329601119664, "bluex": 0.8456189151599444, "oab_exams": 0.8355353075170843, "assin2_sts": 0.8087979933117393, "assin2_rte": 0.9472965253044003, "faquad_nli": 0.8097848807348216, "hatebr_offensive": 0.9125114739050616, "portuguese_hate_speech": 0.7698524509742262, "tweetsentbr": 0.7842080985659372 }, "result_metrics_average": 0.8448598450650201, "result_metrics_npm": 0.7622301724524201 } ]