hugosilva664 commited on
Commit
d47afed
·
verified ·
1 Parent(s): 5f8963d

Upload processor

Browse files
chat_template.jinja ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {%set seps=['
2
+
3
+ ','<|end▁of▁sentence|>']%}{%set i=0%}{%for message in messages%}{%if message['role']|lower=='user'%}<|User|>: {%elif message['role']|lower=='assistant'%}<|Assistant|>:{%if not (loop.last and not add_generation_prompt and message['content'][0]['type']=='text' and message['content'][0]['text']=='')%} {%endif%}{%else%}{{message['role'].capitalize()}}: {%endif%}{%for content in message['content']%}{%if content['type']=='image'%}{%if not loop.first%}{{'
4
+ '}}{%endif%}<image_placeholder>{%if not loop.last%}{{'
5
+ '}}{%endif%}{%elif content['type']=='text'%}{%set text=content['text']%}{%if loop.first%}{%set text=text.lstrip()%}{%endif%}{%if loop.last%}{%set text=text.rstrip()%}{%endif%}{%if not loop.first and message['content'][loop.index0-1]['type']=='text'%}{{' '+text}}{%else%}{{text}}{%endif%}{%endif%}{%endfor%}{%if not loop.last or add_generation_prompt%}{%if message['role']|lower=='user'%}{{seps[0]}}{%else%}{{seps[1]}}{%endif%}{%endif%}{%endfor%}{%if add_generation_prompt%}<|Assistant|>:{%endif%}
special_tokens_map.json CHANGED
@@ -10,6 +10,7 @@
10
  "<|User|>",
11
  "<|Assistant|>"
12
  ],
 
13
  "bos_token": {
14
  "content": "<|begin▁of▁sentence|>",
15
  "lstrip": false,
@@ -17,6 +18,7 @@
17
  "rstrip": false,
18
  "single_word": false
19
  },
 
20
  "eos_token": {
21
  "content": "<|end▁of▁sentence|>",
22
  "lstrip": false,
@@ -24,6 +26,7 @@
24
  "rstrip": false,
25
  "single_word": false
26
  },
 
27
  "pad_token": {
28
  "content": "<|▁pad▁|>",
29
  "lstrip": false,
 
10
  "<|User|>",
11
  "<|Assistant|>"
12
  ],
13
+ "boi_token": "<begin_of_image>",
14
  "bos_token": {
15
  "content": "<|begin▁of▁sentence|>",
16
  "lstrip": false,
 
18
  "rstrip": false,
19
  "single_word": false
20
  },
21
+ "eoi_token": "<end_of_image>",
22
  "eos_token": {
23
  "content": "<|end▁of▁sentence|>",
24
  "lstrip": false,
 
26
  "rstrip": false,
27
  "single_word": false
28
  },
29
+ "image_token": "<image_placeholder>",
30
  "pad_token": {
31
  "content": "<|▁pad▁|>",
32
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -4839,10 +4839,17 @@
4839
  "<|User|>",
4840
  "<|Assistant|>"
4841
  ],
 
4842
  "bos_token": "<|begin▁of▁sentence|>",
4843
  "clean_up_tokenization_spaces": false,
 
4844
  "eos_token": "<|end▁of▁sentence|>",
4845
- "extra_special_tokens": {},
 
 
 
 
 
4846
  "legacy": true,
4847
  "model_max_length": 16384,
4848
  "pad_token": "<|▁pad▁|>",
 
4839
  "<|User|>",
4840
  "<|Assistant|>"
4841
  ],
4842
+ "boi_token": "<begin_of_image>",
4843
  "bos_token": "<|begin▁of▁sentence|>",
4844
  "clean_up_tokenization_spaces": false,
4845
+ "eoi_token": "<end_of_image>",
4846
  "eos_token": "<|end▁of▁sentence|>",
4847
+ "extra_special_tokens": {
4848
+ "boi_token": "<begin_of_image>",
4849
+ "eoi_token": "<end_of_image>",
4850
+ "image_token": "<image_placeholder>"
4851
+ },
4852
+ "image_token": "<image_placeholder>",
4853
  "legacy": true,
4854
  "model_max_length": 16384,
4855
  "pad_token": "<|▁pad▁|>",