tokenizer can decode tensor for vllm test (#14)
Browse files- tokenizer can decode tensor for vllm test (da9637b13c17a1c828afd4c8d010df0fd081fd19)
- tokenization_moonshot.py +3 -0
tokenization_moonshot.py
CHANGED
@@ -16,6 +16,7 @@ from shutil import copyfile
|
|
16 |
from tiktoken.load import load_tiktoken_bpe
|
17 |
from tokenizers import AddedToken
|
18 |
from transformers.tokenization_utils import PreTrainedTokenizer
|
|
|
19 |
from transformers.models.gpt2.tokenization_gpt2 import bytes_to_unicode
|
20 |
|
21 |
|
@@ -229,6 +230,8 @@ class TikTokenTokenizer(PreTrainedTokenizer):
|
|
229 |
if len(kwargs) > 0:
|
230 |
return super().decode(token_ids, **kwargs)
|
231 |
|
|
|
|
|
232 |
if type(token_ids) is int:
|
233 |
token_ids = [token_ids]
|
234 |
|
|
|
16 |
from tiktoken.load import load_tiktoken_bpe
|
17 |
from tokenizers import AddedToken
|
18 |
from transformers.tokenization_utils import PreTrainedTokenizer
|
19 |
+
from transformers.utils import to_py_obj
|
20 |
from transformers.models.gpt2.tokenization_gpt2 import bytes_to_unicode
|
21 |
|
22 |
|
|
|
230 |
if len(kwargs) > 0:
|
231 |
return super().decode(token_ids, **kwargs)
|
232 |
|
233 |
+
token_ids = to_py_obj(token_ids)
|
234 |
+
|
235 |
if type(token_ids) is int:
|
236 |
token_ids = [token_ids]
|
237 |
|