zhouzaida commited on
Commit
4dea07c
·
verified ·
1 Parent(s): b13cf2f

tokenizer can decode tensor for vllm test (#14)

Browse files

- tokenizer can decode tensor for vllm test (da9637b13c17a1c828afd4c8d010df0fd081fd19)

Files changed (1) hide show
  1. tokenization_moonshot.py +3 -0
tokenization_moonshot.py CHANGED
@@ -16,6 +16,7 @@ from shutil import copyfile
16
  from tiktoken.load import load_tiktoken_bpe
17
  from tokenizers import AddedToken
18
  from transformers.tokenization_utils import PreTrainedTokenizer
 
19
  from transformers.models.gpt2.tokenization_gpt2 import bytes_to_unicode
20
 
21
 
@@ -229,6 +230,8 @@ class TikTokenTokenizer(PreTrainedTokenizer):
229
  if len(kwargs) > 0:
230
  return super().decode(token_ids, **kwargs)
231
 
 
 
232
  if type(token_ids) is int:
233
  token_ids = [token_ids]
234
 
 
16
  from tiktoken.load import load_tiktoken_bpe
17
  from tokenizers import AddedToken
18
  from transformers.tokenization_utils import PreTrainedTokenizer
19
+ from transformers.utils import to_py_obj
20
  from transformers.models.gpt2.tokenization_gpt2 import bytes_to_unicode
21
 
22
 
 
230
  if len(kwargs) > 0:
231
  return super().decode(token_ids, **kwargs)
232
 
233
+ token_ids = to_py_obj(token_ids)
234
+
235
  if type(token_ids) is int:
236
  token_ids = [token_ids]
237