eduagarcia commited on
Commit
c2f342d
·
1 Parent(s): 405857a

Add LLama 4 e Deepseek V3 results

Browse files
Files changed (3) hide show
  1. README.md +6 -0
  2. external_models_results.json +70 -1
  3. model_list.txt +6 -0
README.md CHANGED
@@ -56,6 +56,7 @@ models:
56
  - AI-Sweden-Models/gpt-sw3-40b
57
  - AI-Sweden-Models/gpt-sw3-6.7b
58
  - AI-Sweden-Models/gpt-sw3-6.7b-v2
 
59
  - AXCXEPT/EZO-Qwen2.5-32B-Instruct
60
  - AdaptLLM/finance-LLM
61
  - AdaptLLM/finance-LLM-13B
@@ -80,6 +81,8 @@ models:
80
  - BAAI/Infinity-Instruct-7M-0729-Llama3_1-8B
81
  - BAAI/Infinity-Instruct-7M-Gen-Llama3_1-8B
82
  - BAAI/OPI-Llama-3.1-8B-Instruct
 
 
83
  - Bruno/Caramelinho
84
  - Bruno/Caramelo_7B
85
  - CausalLM/34b-beta
@@ -548,6 +551,7 @@ models:
548
  - deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
549
  - deepseek-ai/DeepSeek-V2-Lite
550
  - deepseek-ai/DeepSeek-V2-Lite-Chat
 
551
  - deepseek-ai/deepseek-llm-7b-base
552
  - deepseek-ai/deepseek-moe-16b-base
553
  - deepseek-ai/deepseek-moe-16b-chat
@@ -744,6 +748,8 @@ models:
744
  - meta-llama/Llama-3.2-3B-Instruct
745
  - meta-llama/Llama-3.2-90B-Vision-Instruct
746
  - meta-llama/Llama-3.3-70B-Instruct
 
 
747
  - meta-llama/Meta-Llama-3-70B
748
  - meta-llama/Meta-Llama-3-70B-Instruct
749
  - meta-llama/Meta-Llama-3-8B
 
56
  - AI-Sweden-Models/gpt-sw3-40b
57
  - AI-Sweden-Models/gpt-sw3-6.7b
58
  - AI-Sweden-Models/gpt-sw3-6.7b-v2
59
+ - AIDC-AI/Marco-LLM-ES
60
  - AXCXEPT/EZO-Qwen2.5-32B-Instruct
61
  - AdaptLLM/finance-LLM
62
  - AdaptLLM/finance-LLM-13B
 
81
  - BAAI/Infinity-Instruct-7M-0729-Llama3_1-8B
82
  - BAAI/Infinity-Instruct-7M-Gen-Llama3_1-8B
83
  - BAAI/OPI-Llama-3.1-8B-Instruct
84
+ - BSC-LT/salamandra-2b
85
+ - BSC-LT/salamandra-7b
86
  - Bruno/Caramelinho
87
  - Bruno/Caramelo_7B
88
  - CausalLM/34b-beta
 
551
  - deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
552
  - deepseek-ai/DeepSeek-V2-Lite
553
  - deepseek-ai/DeepSeek-V2-Lite-Chat
554
+ - deepseek-ai/DeepSeek-V3-0324
555
  - deepseek-ai/deepseek-llm-7b-base
556
  - deepseek-ai/deepseek-moe-16b-base
557
  - deepseek-ai/deepseek-moe-16b-chat
 
748
  - meta-llama/Llama-3.2-3B-Instruct
749
  - meta-llama/Llama-3.2-90B-Vision-Instruct
750
  - meta-llama/Llama-3.3-70B-Instruct
751
+ - meta-llama/Llama-4-Maverick-17B-128E-Instruct
752
+ - meta-llama/Llama-4-Scout-17B-16E-Instruct
753
  - meta-llama/Meta-Llama-3-70B
754
  - meta-llama/Meta-Llama-3-70B-Instruct
755
  - meta-llama/Meta-Llama-3-8B
external_models_results.json CHANGED
@@ -443,6 +443,29 @@
443
  "result_metrics_average": 0.8836610214313025,
444
  "result_metrics_npm": 0.8134610556797854
445
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
446
  {
447
  "model": "qwen2-5-vl-72b-instruct",
448
  "name": "Qwen/Qwen2.5-VL-72B-Instruct (API)",
@@ -538,7 +561,7 @@
538
  "model": "claude-3-7-sonnet-20250219",
539
  "name": "Claude 3.7 Sonnet (2025-02-19)",
540
  "link": "https://www.anthropic.com/",
541
- "date": "2025-04-03",
542
  "status": "full",
543
  "main_language": "English",
544
  "model_type": "proprietary",
@@ -555,5 +578,51 @@
555
  },
556
  "result_metrics_average": 0.8448598450650201,
557
  "result_metrics_npm": 0.7622301724524201
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
558
  }
559
  ]
 
443
  "result_metrics_average": 0.8836610214313025,
444
  "result_metrics_npm": 0.8134610556797854
445
  },
446
+ {
447
+ "model": "deepSeek-v3-0324",
448
+ "name": "deepseek-ai/DeepSeek-V3-0324 (API)",
449
+ "link": "https://huggingface.co/deepseek-ai/DeepSeek-V3-0324",
450
+ "date": "2025-04-03",
451
+ "status": "full",
452
+ "main_language": "English",
453
+ "model_type": "chat",
454
+ "params": 685.0,
455
+ "result_metrics": {
456
+ "enem_challenge": 0.8901329601119664,
457
+ "bluex": 0.8414464534075105,
458
+ "oab_exams": 0.7148063781321184,
459
+ "assin2_sts": 0.8145997097875548,
460
+ "assin2_rte": 0.9421860387625551,
461
+ "faquad_nli": 0.796751127001399,
462
+ "hatebr_offensive": 0.9060129756724185,
463
+ "portuguese_hate_speech": 0.7262480672025753,
464
+ "tweetsentbr": 0.7037326638925795
465
+ },
466
+ "result_metrics_average": 0.8151018193300753,
467
+ "result_metrics_npm": 0.7165435243787625
468
+ },
469
  {
470
  "model": "qwen2-5-vl-72b-instruct",
471
  "name": "Qwen/Qwen2.5-VL-72B-Instruct (API)",
 
561
  "model": "claude-3-7-sonnet-20250219",
562
  "name": "Claude 3.7 Sonnet (2025-02-19)",
563
  "link": "https://www.anthropic.com/",
564
+ "date": "2025-04-04",
565
  "status": "full",
566
  "main_language": "English",
567
  "model_type": "proprietary",
 
578
  },
579
  "result_metrics_average": 0.8448598450650201,
580
  "result_metrics_npm": 0.7622301724524201
581
+ },
582
+ {
583
+ "model": "llama-4-scout-16e",
584
+ "name": "meta-llama/Llama-4-Scout-17B-16E-Instruct (Groq API)",
585
+ "link": "https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct",
586
+ "date": "2025-04-05",
587
+ "status": "full",
588
+ "main_language": "English",
589
+ "model_type": "chat",
590
+ "params": 109.0,
591
+ "result_metrics": {
592
+ "enem_challenge": 0.8054583624912526,
593
+ "bluex": 0.721835883171071,
594
+ "oab_exams": 0.6815489749430524,
595
+ "assin2_sts": 0.7741640227983941,
596
+ "assin2_rte": 0.9312877465954967,
597
+ "faquad_nli": 0.8567037452287072,
598
+ "hatebr_offensive": 0.8813700069483281,
599
+ "portuguese_hate_speech": 0.7009183720501475,
600
+ "tweetsentbr": 0.7277278145615887
601
+ },
602
+ "result_metrics_average": 0.7867794365320042,
603
+ "result_metrics_npm": 0.6811274967601382
604
+ },
605
+ {
606
+ "model": "llama-4-maverick-128e",
607
+ "name": "meta-llama/Llama-4-Maverick-17B-128E-Instruct (FireworksAI API)",
608
+ "link": "https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct",
609
+ "date": "2025-04-05",
610
+ "status": "full",
611
+ "main_language": "English",
612
+ "model_type": "chat",
613
+ "params": 402.0,
614
+ "result_metrics": {
615
+ "enem_challenge": 0.8775367389783065,
616
+ "bluex": 0.8122392211404729,
617
+ "oab_exams": 0.7284738041002278,
618
+ "assin2_sts": 0.7333246903202654,
619
+ "assin2_rte": 0.9329419027588105,
620
+ "faquad_nli": 0.7823695413019562,
621
+ "hatebr_offensive": 0.9047550357833591,
622
+ "portuguese_hate_speech": 0.7231286908077994,
623
+ "tweetsentbr": 0.7165294511353842
624
+ },
625
+ "result_metrics_average": 0.8012554529251759,
626
+ "result_metrics_npm": 0.6997802853383734
627
  }
628
  ]
model_list.txt CHANGED
@@ -27,6 +27,7 @@
27
  - AI-Sweden-Models/gpt-sw3-40b
28
  - AI-Sweden-Models/gpt-sw3-6.7b
29
  - AI-Sweden-Models/gpt-sw3-6.7b-v2
 
30
  - AXCXEPT/EZO-Qwen2.5-32B-Instruct
31
  - AdaptLLM/finance-LLM
32
  - AdaptLLM/finance-LLM-13B
@@ -51,6 +52,8 @@
51
  - BAAI/Infinity-Instruct-7M-0729-Llama3_1-8B
52
  - BAAI/Infinity-Instruct-7M-Gen-Llama3_1-8B
53
  - BAAI/OPI-Llama-3.1-8B-Instruct
 
 
54
  - Bruno/Caramelinho
55
  - Bruno/Caramelo_7B
56
  - CausalLM/34b-beta
@@ -519,6 +522,7 @@
519
  - deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
520
  - deepseek-ai/DeepSeek-V2-Lite
521
  - deepseek-ai/DeepSeek-V2-Lite-Chat
 
522
  - deepseek-ai/deepseek-llm-7b-base
523
  - deepseek-ai/deepseek-moe-16b-base
524
  - deepseek-ai/deepseek-moe-16b-chat
@@ -715,6 +719,8 @@
715
  - meta-llama/Llama-3.2-3B-Instruct
716
  - meta-llama/Llama-3.2-90B-Vision-Instruct
717
  - meta-llama/Llama-3.3-70B-Instruct
 
 
718
  - meta-llama/Meta-Llama-3-70B
719
  - meta-llama/Meta-Llama-3-70B-Instruct
720
  - meta-llama/Meta-Llama-3-8B
 
27
  - AI-Sweden-Models/gpt-sw3-40b
28
  - AI-Sweden-Models/gpt-sw3-6.7b
29
  - AI-Sweden-Models/gpt-sw3-6.7b-v2
30
+ - AIDC-AI/Marco-LLM-ES
31
  - AXCXEPT/EZO-Qwen2.5-32B-Instruct
32
  - AdaptLLM/finance-LLM
33
  - AdaptLLM/finance-LLM-13B
 
52
  - BAAI/Infinity-Instruct-7M-0729-Llama3_1-8B
53
  - BAAI/Infinity-Instruct-7M-Gen-Llama3_1-8B
54
  - BAAI/OPI-Llama-3.1-8B-Instruct
55
+ - BSC-LT/salamandra-2b
56
+ - BSC-LT/salamandra-7b
57
  - Bruno/Caramelinho
58
  - Bruno/Caramelo_7B
59
  - CausalLM/34b-beta
 
522
  - deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
523
  - deepseek-ai/DeepSeek-V2-Lite
524
  - deepseek-ai/DeepSeek-V2-Lite-Chat
525
+ - deepseek-ai/DeepSeek-V3-0324
526
  - deepseek-ai/deepseek-llm-7b-base
527
  - deepseek-ai/deepseek-moe-16b-base
528
  - deepseek-ai/deepseek-moe-16b-chat
 
719
  - meta-llama/Llama-3.2-3B-Instruct
720
  - meta-llama/Llama-3.2-90B-Vision-Instruct
721
  - meta-llama/Llama-3.3-70B-Instruct
722
+ - meta-llama/Llama-4-Maverick-17B-128E-Instruct
723
+ - meta-llama/Llama-4-Scout-17B-16E-Instruct
724
  - meta-llama/Meta-Llama-3-70B
725
  - meta-llama/Meta-Llama-3-70B-Instruct
726
  - meta-llama/Meta-Llama-3-8B