Bofeng Huang
commited on
auto -> yue
Browse files- added_tokens.json +1 -1
- generation_config.json +2 -2
- special_tokens_map.json +1 -1
- tokenizer.json +1 -1
- tokenizer_config.json +2 -2
added_tokens.json
CHANGED
@@ -1504,7 +1504,6 @@
|
|
1504 |
"<|am|>": 50334,
|
1505 |
"<|ar|>": 50272,
|
1506 |
"<|as|>": 50350,
|
1507 |
-
"<|auto|>": 50358,
|
1508 |
"<|az|>": 50304,
|
1509 |
"<|ba|>": 50355,
|
1510 |
"<|be|>": 50330,
|
@@ -1607,5 +1606,6 @@
|
|
1607 |
"<|vi|>": 50278,
|
1608 |
"<|yi|>": 50335,
|
1609 |
"<|yo|>": 50325,
|
|
|
1610 |
"<|zh|>": 50260
|
1611 |
}
|
|
|
1504 |
"<|am|>": 50334,
|
1505 |
"<|ar|>": 50272,
|
1506 |
"<|as|>": 50350,
|
|
|
1507 |
"<|az|>": 50304,
|
1508 |
"<|ba|>": 50355,
|
1509 |
"<|be|>": 50330,
|
|
|
1606 |
"<|vi|>": 50278,
|
1607 |
"<|yi|>": 50335,
|
1608 |
"<|yo|>": 50325,
|
1609 |
+
"<|yue|>": 50358,
|
1610 |
"<|zh|>": 50260
|
1611 |
}
|
generation_config.json
CHANGED
@@ -104,7 +104,6 @@
|
|
104 |
"<|am|>": 50334,
|
105 |
"<|ar|>": 50272,
|
106 |
"<|as|>": 50350,
|
107 |
-
"<|auto|>": 50358,
|
108 |
"<|az|>": 50304,
|
109 |
"<|ba|>": 50355,
|
110 |
"<|be|>": 50330,
|
@@ -199,9 +198,10 @@
|
|
199 |
"<|vi|>": 50278,
|
200 |
"<|yi|>": 50335,
|
201 |
"<|yo|>": 50325,
|
|
|
202 |
"<|zh|>": 50260
|
203 |
},
|
204 |
-
"language": "
|
205 |
"max_initial_timestamp_index": 50,
|
206 |
"max_length": 448,
|
207 |
"no_timestamps_token_id": 50364,
|
|
|
104 |
"<|am|>": 50334,
|
105 |
"<|ar|>": 50272,
|
106 |
"<|as|>": 50350,
|
|
|
107 |
"<|az|>": 50304,
|
108 |
"<|ba|>": 50355,
|
109 |
"<|be|>": 50330,
|
|
|
198 |
"<|vi|>": 50278,
|
199 |
"<|yi|>": 50335,
|
200 |
"<|yo|>": 50325,
|
201 |
+
"<|yue|>": 50358,
|
202 |
"<|zh|>": 50260
|
203 |
},
|
204 |
+
"language": "cantonese",
|
205 |
"max_initial_timestamp_index": 50,
|
206 |
"max_length": 448,
|
207 |
"no_timestamps_token_id": 50364,
|
special_tokens_map.json
CHANGED
@@ -100,7 +100,7 @@
|
|
100 |
"<|ba|>",
|
101 |
"<|jw|>",
|
102 |
"<|su|>",
|
103 |
-
"<|
|
104 |
"<|translate|>",
|
105 |
"<|transcribe|>",
|
106 |
"<|startoflm|>",
|
|
|
100 |
"<|ba|>",
|
101 |
"<|jw|>",
|
102 |
"<|su|>",
|
103 |
+
"<|yue|>",
|
104 |
"<|translate|>",
|
105 |
"<|transcribe|>",
|
106 |
"<|startoflm|>",
|
tokenizer.json
CHANGED
@@ -914,7 +914,7 @@
|
|
914 |
},
|
915 |
{
|
916 |
"id": 50358,
|
917 |
-
"content": "<|
|
918 |
"single_word": false,
|
919 |
"lstrip": false,
|
920 |
"rstrip": false,
|
|
|
914 |
},
|
915 |
{
|
916 |
"id": 50358,
|
917 |
+
"content": "<|yue|>",
|
918 |
"single_word": false,
|
919 |
"lstrip": false,
|
920 |
"rstrip": false,
|
tokenizer_config.json
CHANGED
@@ -810,7 +810,7 @@
|
|
810 |
"special": true
|
811 |
},
|
812 |
"50358": {
|
813 |
-
"content": "<|
|
814 |
"lstrip": false,
|
815 |
"normalized": false,
|
816 |
"rstrip": false,
|
@@ -12975,7 +12975,7 @@
|
|
12975 |
"<|ba|>",
|
12976 |
"<|jw|>",
|
12977 |
"<|su|>",
|
12978 |
-
"<|
|
12979 |
"<|translate|>",
|
12980 |
"<|transcribe|>",
|
12981 |
"<|startoflm|>",
|
|
|
810 |
"special": true
|
811 |
},
|
812 |
"50358": {
|
813 |
+
"content": "<|yue|>",
|
814 |
"lstrip": false,
|
815 |
"normalized": false,
|
816 |
"rstrip": false,
|
|
|
12975 |
"<|ba|>",
|
12976 |
"<|jw|>",
|
12977 |
"<|su|>",
|
12978 |
+
"<|yue|>",
|
12979 |
"<|translate|>",
|
12980 |
"<|transcribe|>",
|
12981 |
"<|startoflm|>",
|