Spaces:
Running
Running
Update entries.
Browse files- data/scores.jsonl +1 -1
data/scores.jsonl
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{"225638": {"metadata": {"team_name": "YNU-HPCC", "email": "[email protected]", "submission_name": "LLaMA + MT", "submission_description": "try another llm", "uses_gold": true, "uses_rag": false, "uses_llm": false, "llm_name": "Llama-3.3-70B-Instruct", "is_finetuned": false}, "scores": {"ko_KR": {"meta_score": 88.17394726485635, "comet_score": 94.903170802095, "overall_score": 91.41488861007092}, "th_TH": {"meta_score": 86.5100087032202, "comet_score": 93.42989604871515, "overall_score": 89.83689450606285}, "de_DE": {"meta_score": 82.59019741320627, "comet_score": 94.37710679890023, "overall_score": 88.091118486679}, "fr_FR": {"meta_score": 86.58737419945105, "comet_score": 93.77212552771259, "overall_score": 90.03664497660041}, "tr_TR": {"meta_score": 79.27565392354124, "comet_score": 94.08626482706862, "overall_score": 86.04831122247953}, "ar_AE": {"meta_score": 88.78381350340884, "comet_score": 94.3301959197181, "overall_score": 91.47300688418572}, "it_IT": {"meta_score": 89.87838367987446, "comet_score": 94.96415760843087, "overall_score": 92.35130542869926}, "es_ES": {"meta_score": 88.42263019857624, "comet_score": 95.27658856609601, "overall_score": 91.72174616762133}, "zh_TW": {"meta_score": 80.6445387881127, "comet_score": 94.23986344620879, "overall_score": 86.91375818515075}, "ja_JP": {"meta_score": 87.74471417384495, "comet_score": 95.68072684348424, "overall_score": 91.5410423141258}, "overall": {"meta_score": 85.86112618480924, "comet_score": 94.50600963884295, "overall_score": 89.94287167816755}}}}
|
2 |
{"226803": {"metadata": {"team_name": "Lunar", "email": "[email protected]", "submission_name": "LLaMA-RAFT-Plus", "submission_description": "(I have already submitted the Google Form for this submission ID, but I was not completely sure if I submitted the correct content, so I am submitting it once more. I appreciate your understanding.) We trained LLaMA-3.1-8B-Instruct using a multi-turn dialogue setup that incorporates function calls to: (1) identify key entities in the source text that require lookup via function calls (e.g., [search(\"The Great Gatsby\")]) and (2) retrieve their corresponding entities in the target language using the Wikipedia API. These retrieved entities are then leveraged to generate the final translation. The system is capable of performing searches at test time, and this version utilizes real-time search to evaluate its performance in real-world conditions. Additionally, it integrates a validation dataset during training to further enhance accuracy.", "uses_gold": false, "uses_rag": true, "uses_llm": false, "llm_name": "Llama-3.1-8B-Instruct", "is_finetuned": true}, "scores": {"ko_KR": {"meta_score": 61.49153876426604, "comet_score": 92.78515101927606, "overall_score": 73.96453370441374}, "th_TH": {"meta_score": 67.21787061212649, "comet_score": 90.16974140353088, "overall_score": 77.02026777289785}, "de_DE": {"meta_score": 59.54731109598366, "comet_score": 91.40155507572416, "overall_score": 72.11338478766199}, "fr_FR": {"meta_score": 67.15462031107045, "comet_score": 91.3326877611881, "overall_score": 77.39940873745869}, "tr_TR": {"meta_score": 71.495640509725, "comet_score": 93.62873369659216, "overall_score": 81.07883912266462}, "ar_AE": {"meta_score": 67.53903672751265, "comet_score": 91.44903267900682, "overall_score": 77.6961390858889}, "it_IT": {"meta_score": 73.57787367595135, "comet_score": 93.30533824454761, "overall_score": 82.27560234063249}, "es_ES": {"meta_score": 66.57924316223304, "comet_score": 93.01946911362057, "overall_score": 77.60922083422801}, "zh_TW": {"meta_score": 38.788112697800074, "comet_score": 88.96667605739724, "overall_score": 54.023015354445704}, "ja_JP": {"meta_score": 55.657791699295224, "comet_score": 92.11579698051152, "overall_score": 69.38942048250435}, "overall": {"meta_score": 62.9049039255964, "comet_score": 91.8174182031395, "overall_score": 74.25698322227962}}}}
|
3 |
{"221913": {"metadata": {"team_name": "sakura", "email": "[email protected]", "submission_name": "Rakuten7b-PO10", "submission_description": "Rakuten7b with Preference Optmization on paranames", "uses_gold": false, "uses_rag": false, "uses_llm": false, "llm_name": "Rakuten/RakutenAI-7B-chat", "is_finetuned": true}, "scores": {"ja_JP": {"meta_score": 29.502740798747062, "comet_score": 90.73745774552985, "overall_score": 44.52759940541613}}}}
|
4 |
-
{"226408": {"metadata": {"team_name": "SHEF", "email": "[email protected]", "submission_name": "
|
5 |
{"226036": {"metadata": {"team_name": "YNU-HPCC", "email": "[email protected]", "submission_name": "Qwen2.5-32B", "submission_description": "Modified to improve performance", "uses_gold": true, "uses_rag": false, "uses_llm": false, "llm_name": "Qwen2.5-32B", "is_finetuned": true}, "scores": {"ko_KR": {"meta_score": 88.17394726485635, "comet_score": 94.903170802095, "overall_score": 91.41488861007092}, "th_TH": {"meta_score": 86.5100087032202, "comet_score": 93.42989604871515, "overall_score": 89.83689450606285}, "de_DE": {"meta_score": 82.59019741320627, "comet_score": 94.37710679890023, "overall_score": 88.091118486679}, "fr_FR": {"meta_score": 86.58737419945105, "comet_score": 93.77212552771259, "overall_score": 90.03664497660041}, "tr_TR": {"meta_score": 79.27565392354124, "comet_score": 94.08626482706862, "overall_score": 86.04831122247953}, "ar_AE": {"meta_score": 88.78381350340884, "comet_score": 94.3301959197181, "overall_score": 91.47300688418572}, "it_IT": {"meta_score": 89.87838367987446, "comet_score": 94.96415760843087, "overall_score": 92.35130542869926}, "es_ES": {"meta_score": 88.42263019857624, "comet_score": 95.27658856609601, "overall_score": 91.72174616762133}, "zh_TW": {"meta_score": 80.6445387881127, "comet_score": 94.23986344620879, "overall_score": 86.91375818515075}, "ja_JP": {"meta_score": 87.74471417384495, "comet_score": 95.68072684348424, "overall_score": 91.5410423141258}, "overall": {"meta_score": 85.86112618480924, "comet_score": 94.50600963884295, "overall_score": 89.94287167816755}}}}
|
6 |
{"226713": {"metadata": {"team_name": "UAlberta", "email": "[email protected]", "submission_name": "WikiGPT4o", "submission_description": "We prompt a state-of-the-art language model with instructions designed to increase the model's attention in the named entity. We incorporate information from WikiData into the prompt to suggest a translation of the entity, and also leverage in context learning.", "uses_gold": true, "uses_rag": true, "uses_llm": false, "llm_name": "GPT-4o", "is_finetuned": false}, "scores": {"ko_KR": {"meta_score": 90.33844942935852, "comet_score": 95.59958734436329, "overall_score": 92.8945861387825}, "th_TH": {"meta_score": 89.96228604583696, "comet_score": 94.11520353859669, "overall_score": 91.99189842403622}, "de_DE": {"meta_score": 85.05786249149081, "comet_score": 94.27934431802504, "overall_score": 89.43152006720092}, "fr_FR": {"meta_score": 89.67978042086003, "comet_score": 94.25645109010419, "overall_score": 91.91117777691568}, "tr_TR": {"meta_score": 81.64542812430136, "comet_score": 95.82169847469216, "overall_score": 88.16735522226375}, "ar_AE": {"meta_score": 91.6648339564548, "comet_score": 94.86057546627976, "overall_score": 93.23532837741573}, "it_IT": {"meta_score": 91.74185955276579, "comet_score": 95.92362562550335, "overall_score": 93.78615126342946}, "es_ES": {"meta_score": 89.34057699512927, "comet_score": 95.29729041435961, "overall_score": 92.22284714553352}, "zh_TW": {"meta_score": 81.22346584330374, "comet_score": 94.27579471799041, "overall_score": 87.26426273975798}, "ja_JP": {"meta_score": 90.32889584964762, "comet_score": 95.79453747675768, "overall_score": 92.98146551518195}, "overall": {"meta_score": 88.09834387091489, "comet_score": 95.02241084666721, "overall_score": 91.38865926705178}}}}
|
7 |
{"225867": {"metadata": {"team_name": "SALT \ud83e\uddc2", "email": "[email protected]", "submission_name": "Salt-MT-Pipeline", "submission_description": "See description for Submission ID 226303. This removes the LLM post-processing (i.e. only DB retrieval and NLLB translation).", "uses_gold": false, "uses_rag": true, "uses_llm": false, "llm_name": "N/A", "is_finetuned": true}, "scores": {"ko_KR": {"meta_score": 74.24242424242425, "comet_score": 92.96608721351399, "overall_score": 82.5559372183359}, "th_TH": {"meta_score": 65.5932695097186, "comet_score": 90.6400655477608, "overall_score": 76.10895902165163}, "de_DE": {"meta_score": 73.77467665078285, "comet_score": 92.34129760371252, "overall_score": 82.02040054005515}, "fr_FR": {"meta_score": 74.76669716376945, "comet_score": 91.84182100413813, "overall_score": 82.42927424713382}, "tr_TR": {"meta_score": 76.86116700201208, "comet_score": 94.46934204479471, "overall_score": 84.76043077058138}, "ar_AE": {"meta_score": 81.72421376731911, "comet_score": 93.20150630694839, "overall_score": 87.08633380650197}, "it_IT": {"meta_score": 77.61867398979993, "comet_score": 93.3607608500814, "overall_score": 84.7650299774166}, "es_ES": {"meta_score": 74.57849381790933, "comet_score": 93.59931963778203, "overall_score": 83.01328382778564}, "zh_TW": {"meta_score": 45.27209571593979, "comet_score": 89.75164376196369, "overall_score": 60.185638803456435}, "ja_JP": {"meta_score": 72.20046985121378, "comet_score": 93.0171020772416, "overall_score": 81.29738738786637}, "overall": {"meta_score": 71.66321817108891, "comet_score": 92.51889460479373, "overall_score": 80.4222675600785}}}}
|
|
|
1 |
{"225638": {"metadata": {"team_name": "YNU-HPCC", "email": "[email protected]", "submission_name": "LLaMA + MT", "submission_description": "try another llm", "uses_gold": true, "uses_rag": false, "uses_llm": false, "llm_name": "Llama-3.3-70B-Instruct", "is_finetuned": false}, "scores": {"ko_KR": {"meta_score": 88.17394726485635, "comet_score": 94.903170802095, "overall_score": 91.41488861007092}, "th_TH": {"meta_score": 86.5100087032202, "comet_score": 93.42989604871515, "overall_score": 89.83689450606285}, "de_DE": {"meta_score": 82.59019741320627, "comet_score": 94.37710679890023, "overall_score": 88.091118486679}, "fr_FR": {"meta_score": 86.58737419945105, "comet_score": 93.77212552771259, "overall_score": 90.03664497660041}, "tr_TR": {"meta_score": 79.27565392354124, "comet_score": 94.08626482706862, "overall_score": 86.04831122247953}, "ar_AE": {"meta_score": 88.78381350340884, "comet_score": 94.3301959197181, "overall_score": 91.47300688418572}, "it_IT": {"meta_score": 89.87838367987446, "comet_score": 94.96415760843087, "overall_score": 92.35130542869926}, "es_ES": {"meta_score": 88.42263019857624, "comet_score": 95.27658856609601, "overall_score": 91.72174616762133}, "zh_TW": {"meta_score": 80.6445387881127, "comet_score": 94.23986344620879, "overall_score": 86.91375818515075}, "ja_JP": {"meta_score": 87.74471417384495, "comet_score": 95.68072684348424, "overall_score": 91.5410423141258}, "overall": {"meta_score": 85.86112618480924, "comet_score": 94.50600963884295, "overall_score": 89.94287167816755}}}}
|
2 |
{"226803": {"metadata": {"team_name": "Lunar", "email": "[email protected]", "submission_name": "LLaMA-RAFT-Plus", "submission_description": "(I have already submitted the Google Form for this submission ID, but I was not completely sure if I submitted the correct content, so I am submitting it once more. I appreciate your understanding.) We trained LLaMA-3.1-8B-Instruct using a multi-turn dialogue setup that incorporates function calls to: (1) identify key entities in the source text that require lookup via function calls (e.g., [search(\"The Great Gatsby\")]) and (2) retrieve their corresponding entities in the target language using the Wikipedia API. These retrieved entities are then leveraged to generate the final translation. The system is capable of performing searches at test time, and this version utilizes real-time search to evaluate its performance in real-world conditions. Additionally, it integrates a validation dataset during training to further enhance accuracy.", "uses_gold": false, "uses_rag": true, "uses_llm": false, "llm_name": "Llama-3.1-8B-Instruct", "is_finetuned": true}, "scores": {"ko_KR": {"meta_score": 61.49153876426604, "comet_score": 92.78515101927606, "overall_score": 73.96453370441374}, "th_TH": {"meta_score": 67.21787061212649, "comet_score": 90.16974140353088, "overall_score": 77.02026777289785}, "de_DE": {"meta_score": 59.54731109598366, "comet_score": 91.40155507572416, "overall_score": 72.11338478766199}, "fr_FR": {"meta_score": 67.15462031107045, "comet_score": 91.3326877611881, "overall_score": 77.39940873745869}, "tr_TR": {"meta_score": 71.495640509725, "comet_score": 93.62873369659216, "overall_score": 81.07883912266462}, "ar_AE": {"meta_score": 67.53903672751265, "comet_score": 91.44903267900682, "overall_score": 77.6961390858889}, "it_IT": {"meta_score": 73.57787367595135, "comet_score": 93.30533824454761, "overall_score": 82.27560234063249}, "es_ES": {"meta_score": 66.57924316223304, "comet_score": 93.01946911362057, "overall_score": 77.60922083422801}, "zh_TW": {"meta_score": 38.788112697800074, "comet_score": 88.96667605739724, "overall_score": 54.023015354445704}, "ja_JP": {"meta_score": 55.657791699295224, "comet_score": 92.11579698051152, "overall_score": 69.38942048250435}, "overall": {"meta_score": 62.9049039255964, "comet_score": 91.8174182031395, "overall_score": 74.25698322227962}}}}
|
3 |
{"221913": {"metadata": {"team_name": "sakura", "email": "[email protected]", "submission_name": "Rakuten7b-PO10", "submission_description": "Rakuten7b with Preference Optmization on paranames", "uses_gold": false, "uses_rag": false, "uses_llm": false, "llm_name": "Rakuten/RakutenAI-7B-chat", "is_finetuned": true}, "scores": {"ja_JP": {"meta_score": 29.502740798747062, "comet_score": 90.73745774552985, "overall_score": 44.52759940541613}}}}
|
4 |
+
{"226408": {"metadata": {"team_name": "SHEF", "email": "[email protected]", "submission_name": "Llama-Wiki-DeepSeek", "submission_description": "A three-stage pipeline incorporating Chain-of-Thought (CoT) reasoning that extracts entities, generates queries for Wikidata search, and leverages returned candidate entity information to enhance LLM translation.", "uses_gold": false, "uses_rag": false, "uses_llm": false, "llm_name": "Llama-3.3-70B-Instruct + DeepSeek-R1", "is_finetuned": true}, "scores": {"de_DE": {"meta_score": 85.56841388699796, "comet_score": 92.82443383879772, "overall_score": 89.04885677651487}, "fr_FR": {"meta_score": 90.04574565416286, "comet_score": 91.92607256434756, "overall_score": 90.97619433769394}, "it_IT": {"meta_score": 93.01686936053353, "comet_score": 94.68152872449326, "overall_score": 93.84181727786786}, "es_ES": {"meta_score": 90.50206069689023, "comet_score": 93.90829785368602, "overall_score": 92.173721032759}}}}
|
5 |
{"226036": {"metadata": {"team_name": "YNU-HPCC", "email": "[email protected]", "submission_name": "Qwen2.5-32B", "submission_description": "Modified to improve performance", "uses_gold": true, "uses_rag": false, "uses_llm": false, "llm_name": "Qwen2.5-32B", "is_finetuned": true}, "scores": {"ko_KR": {"meta_score": 88.17394726485635, "comet_score": 94.903170802095, "overall_score": 91.41488861007092}, "th_TH": {"meta_score": 86.5100087032202, "comet_score": 93.42989604871515, "overall_score": 89.83689450606285}, "de_DE": {"meta_score": 82.59019741320627, "comet_score": 94.37710679890023, "overall_score": 88.091118486679}, "fr_FR": {"meta_score": 86.58737419945105, "comet_score": 93.77212552771259, "overall_score": 90.03664497660041}, "tr_TR": {"meta_score": 79.27565392354124, "comet_score": 94.08626482706862, "overall_score": 86.04831122247953}, "ar_AE": {"meta_score": 88.78381350340884, "comet_score": 94.3301959197181, "overall_score": 91.47300688418572}, "it_IT": {"meta_score": 89.87838367987446, "comet_score": 94.96415760843087, "overall_score": 92.35130542869926}, "es_ES": {"meta_score": 88.42263019857624, "comet_score": 95.27658856609601, "overall_score": 91.72174616762133}, "zh_TW": {"meta_score": 80.6445387881127, "comet_score": 94.23986344620879, "overall_score": 86.91375818515075}, "ja_JP": {"meta_score": 87.74471417384495, "comet_score": 95.68072684348424, "overall_score": 91.5410423141258}, "overall": {"meta_score": 85.86112618480924, "comet_score": 94.50600963884295, "overall_score": 89.94287167816755}}}}
|
6 |
{"226713": {"metadata": {"team_name": "UAlberta", "email": "[email protected]", "submission_name": "WikiGPT4o", "submission_description": "We prompt a state-of-the-art language model with instructions designed to increase the model's attention in the named entity. We incorporate information from WikiData into the prompt to suggest a translation of the entity, and also leverage in context learning.", "uses_gold": true, "uses_rag": true, "uses_llm": false, "llm_name": "GPT-4o", "is_finetuned": false}, "scores": {"ko_KR": {"meta_score": 90.33844942935852, "comet_score": 95.59958734436329, "overall_score": 92.8945861387825}, "th_TH": {"meta_score": 89.96228604583696, "comet_score": 94.11520353859669, "overall_score": 91.99189842403622}, "de_DE": {"meta_score": 85.05786249149081, "comet_score": 94.27934431802504, "overall_score": 89.43152006720092}, "fr_FR": {"meta_score": 89.67978042086003, "comet_score": 94.25645109010419, "overall_score": 91.91117777691568}, "tr_TR": {"meta_score": 81.64542812430136, "comet_score": 95.82169847469216, "overall_score": 88.16735522226375}, "ar_AE": {"meta_score": 91.6648339564548, "comet_score": 94.86057546627976, "overall_score": 93.23532837741573}, "it_IT": {"meta_score": 91.74185955276579, "comet_score": 95.92362562550335, "overall_score": 93.78615126342946}, "es_ES": {"meta_score": 89.34057699512927, "comet_score": 95.29729041435961, "overall_score": 92.22284714553352}, "zh_TW": {"meta_score": 81.22346584330374, "comet_score": 94.27579471799041, "overall_score": 87.26426273975798}, "ja_JP": {"meta_score": 90.32889584964762, "comet_score": 95.79453747675768, "overall_score": 92.98146551518195}, "overall": {"meta_score": 88.09834387091489, "comet_score": 95.02241084666721, "overall_score": 91.38865926705178}}}}
|
7 |
{"225867": {"metadata": {"team_name": "SALT \ud83e\uddc2", "email": "[email protected]", "submission_name": "Salt-MT-Pipeline", "submission_description": "See description for Submission ID 226303. This removes the LLM post-processing (i.e. only DB retrieval and NLLB translation).", "uses_gold": false, "uses_rag": true, "uses_llm": false, "llm_name": "N/A", "is_finetuned": true}, "scores": {"ko_KR": {"meta_score": 74.24242424242425, "comet_score": 92.96608721351399, "overall_score": 82.5559372183359}, "th_TH": {"meta_score": 65.5932695097186, "comet_score": 90.6400655477608, "overall_score": 76.10895902165163}, "de_DE": {"meta_score": 73.77467665078285, "comet_score": 92.34129760371252, "overall_score": 82.02040054005515}, "fr_FR": {"meta_score": 74.76669716376945, "comet_score": 91.84182100413813, "overall_score": 82.42927424713382}, "tr_TR": {"meta_score": 76.86116700201208, "comet_score": 94.46934204479471, "overall_score": 84.76043077058138}, "ar_AE": {"meta_score": 81.72421376731911, "comet_score": 93.20150630694839, "overall_score": 87.08633380650197}, "it_IT": {"meta_score": 77.61867398979993, "comet_score": 93.3607608500814, "overall_score": 84.7650299774166}, "es_ES": {"meta_score": 74.57849381790933, "comet_score": 93.59931963778203, "overall_score": 83.01328382778564}, "zh_TW": {"meta_score": 45.27209571593979, "comet_score": 89.75164376196369, "overall_score": 60.185638803456435}, "ja_JP": {"meta_score": 72.20046985121378, "comet_score": 93.0171020772416, "overall_score": 81.29738738786637}, "overall": {"meta_score": 71.66321817108891, "comet_score": 92.51889460479373, "overall_score": 80.4222675600785}}}}
|