Bofeng Huang commited on
Commit
d17bcd3
·
unverified ·
1 Parent(s): f2c0457

auto -> yue

Browse files
added_tokens.json CHANGED
@@ -1504,7 +1504,6 @@
1504
  "<|am|>": 50334,
1505
  "<|ar|>": 50272,
1506
  "<|as|>": 50350,
1507
- "<|auto|>": 50358,
1508
  "<|az|>": 50304,
1509
  "<|ba|>": 50355,
1510
  "<|be|>": 50330,
@@ -1607,5 +1606,6 @@
1607
  "<|vi|>": 50278,
1608
  "<|yi|>": 50335,
1609
  "<|yo|>": 50325,
 
1610
  "<|zh|>": 50260
1611
  }
 
1504
  "<|am|>": 50334,
1505
  "<|ar|>": 50272,
1506
  "<|as|>": 50350,
 
1507
  "<|az|>": 50304,
1508
  "<|ba|>": 50355,
1509
  "<|be|>": 50330,
 
1606
  "<|vi|>": 50278,
1607
  "<|yi|>": 50335,
1608
  "<|yo|>": 50325,
1609
+ "<|yue|>": 50358,
1610
  "<|zh|>": 50260
1611
  }
generation_config.json CHANGED
@@ -104,7 +104,6 @@
104
  "<|am|>": 50334,
105
  "<|ar|>": 50272,
106
  "<|as|>": 50350,
107
- "<|auto|>": 50358,
108
  "<|az|>": 50304,
109
  "<|ba|>": 50355,
110
  "<|be|>": 50330,
@@ -199,9 +198,10 @@
199
  "<|vi|>": 50278,
200
  "<|yi|>": 50335,
201
  "<|yo|>": 50325,
 
202
  "<|zh|>": 50260
203
  },
204
- "language": "auto",
205
  "max_initial_timestamp_index": 50,
206
  "max_length": 448,
207
  "no_timestamps_token_id": 50364,
 
104
  "<|am|>": 50334,
105
  "<|ar|>": 50272,
106
  "<|as|>": 50350,
 
107
  "<|az|>": 50304,
108
  "<|ba|>": 50355,
109
  "<|be|>": 50330,
 
198
  "<|vi|>": 50278,
199
  "<|yi|>": 50335,
200
  "<|yo|>": 50325,
201
+ "<|yue|>": 50358,
202
  "<|zh|>": 50260
203
  },
204
+ "language": "cantonese",
205
  "max_initial_timestamp_index": 50,
206
  "max_length": 448,
207
  "no_timestamps_token_id": 50364,
special_tokens_map.json CHANGED
@@ -100,7 +100,7 @@
100
  "<|ba|>",
101
  "<|jw|>",
102
  "<|su|>",
103
- "<|auto|>",
104
  "<|translate|>",
105
  "<|transcribe|>",
106
  "<|startoflm|>",
 
100
  "<|ba|>",
101
  "<|jw|>",
102
  "<|su|>",
103
+ "<|yue|>",
104
  "<|translate|>",
105
  "<|transcribe|>",
106
  "<|startoflm|>",
tokenizer.json CHANGED
@@ -914,7 +914,7 @@
914
  },
915
  {
916
  "id": 50358,
917
- "content": "<|auto|>",
918
  "single_word": false,
919
  "lstrip": false,
920
  "rstrip": false,
 
914
  },
915
  {
916
  "id": 50358,
917
+ "content": "<|yue|>",
918
  "single_word": false,
919
  "lstrip": false,
920
  "rstrip": false,
tokenizer_config.json CHANGED
@@ -810,7 +810,7 @@
810
  "special": true
811
  },
812
  "50358": {
813
- "content": "<|auto|>",
814
  "lstrip": false,
815
  "normalized": false,
816
  "rstrip": false,
@@ -12975,7 +12975,7 @@
12975
  "<|ba|>",
12976
  "<|jw|>",
12977
  "<|su|>",
12978
- "<|auto|>",
12979
  "<|translate|>",
12980
  "<|transcribe|>",
12981
  "<|startoflm|>",
 
810
  "special": true
811
  },
812
  "50358": {
813
+ "content": "<|yue|>",
814
  "lstrip": false,
815
  "normalized": false,
816
  "rstrip": false,
 
12975
  "<|ba|>",
12976
  "<|jw|>",
12977
  "<|su|>",
12978
+ "<|yue|>",
12979
  "<|translate|>",
12980
  "<|transcribe|>",
12981
  "<|startoflm|>",