danielhanchen commited on
Commit
6b5acae
·
verified ·
1 Parent(s): 5cfe0cb

Add files using upload-large-folder tool

Browse files
README.md CHANGED
@@ -1,31 +1,21 @@
1
  ---
2
- base_model: Qwen/Qwen2.5-VL-32B-Instruct
 
3
  license: apache-2.0
4
  language:
5
  - en
6
  pipeline_tag: image-text-to-text
7
  tags:
8
  - multimodal
 
9
  library_name: transformers
10
  ---
11
- <div>
12
- <p style="margin-bottom: 0; margin-top: 0;">
13
- <strong>See <a href="https://huggingface.co/collections/unsloth/qwen25-vl-all-versions-679ca6c784fad5bd976a05a1">our collection</a> for versions of Qwen2.5-VL including 4-bit & dynamic formats.</strong>
14
- </p>
15
- <div style="display: flex; gap: 5px; align-items: center; ">
16
- <a href="https://github.com/unslothai/unsloth/">
17
- <img src="https://github.com/unslothai/unsloth/raw/main/images/unsloth%20new%20logo.png" width="133">
18
- </a>
19
- <a href="https://discord.gg/unsloth">
20
- <img src="https://github.com/unslothai/unsloth/raw/main/images/Discord%20button.png" width="173">
21
- </a>
22
- <a href="https://docs.unsloth.ai/">
23
- <img src="https://raw.githubusercontent.com/unslothai/unsloth/refs/heads/main/images/documentation%20green%20button.png" width="143">
24
- </a>
25
- </div>
26
- </div>
27
 
28
  # Qwen2.5-VL-32B-Instruct
 
 
 
 
29
 
30
  ## Latest Updates:
31
  In addition to the original formula, we have further enhanced Qwen2.5-VL-32B's mathematical and problem-solving abilities through reinforcement learning. This has also significantly improved the model's subjective user experience, with response styles adjusted to better align with human preferences. Particularly for objective queries such as mathematics, logical reasoning, and knowledge-based Q&A, the level of detail in responses and the clarity of formatting have been noticeably enhanced.
@@ -62,7 +52,7 @@ We extend dynamic resolution to the temporal dimension by adopting dynamic FPS s
62
  We enhance both training and inference speeds by strategically implementing window attention into the ViT. The ViT architecture is further optimized with SwiGLU and RMSNorm, aligning it with the structure of the Qwen2.5 LLM.
63
 
64
 
65
- We have three models with 3, 7 and 72 billion parameters. This repo contains the instruction-tuned 32B Qwen2.5-VL model. For more information, visit our [Blog](https://qwenlm.github.io/blog/qwen2.5-vl/) and [GitHub](https://github.com/QwenLM/Qwen2.5-VL).
66
 
67
 
68
 
 
1
  ---
2
+ base_model:
3
+ - Qwen/Qwen2.5-VL-32B-Instruct
4
  license: apache-2.0
5
  language:
6
  - en
7
  pipeline_tag: image-text-to-text
8
  tags:
9
  - multimodal
10
+ - unsloth
11
  library_name: transformers
12
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  # Qwen2.5-VL-32B-Instruct
15
+ <a href="https://chat.qwenlm.ai/" target="_blank" style="margin: 2px;">
16
+ <img alt="Chat" src="https://img.shields.io/badge/%F0%9F%92%9C%EF%B8%8F%20Qwen%20Chat%20-536af5" style="display: inline-block; vertical-align: middle;"/>
17
+ </a>
18
+
19
 
20
  ## Latest Updates:
21
  In addition to the original formula, we have further enhanced Qwen2.5-VL-32B's mathematical and problem-solving abilities through reinforcement learning. This has also significantly improved the model's subjective user experience, with response styles adjusted to better align with human preferences. Particularly for objective queries such as mathematics, logical reasoning, and knowledge-based Q&A, the level of detail in responses and the clarity of formatting have been noticeably enhanced.
 
52
  We enhance both training and inference speeds by strategically implementing window attention into the ViT. The ViT architecture is further optimized with SwiGLU and RMSNorm, aligning it with the structure of the Qwen2.5 LLM.
53
 
54
 
55
+ We have four models with 3, 7, 32 and 72 billion parameters. This repo contains the instruction-tuned 32B Qwen2.5-VL model. For more information, visit our [Blog](https://qwenlm.github.io/blog/qwen2.5-vl/) and [GitHub](https://github.com/QwenLM/Qwen2.5-VL).
56
 
57
 
58
 
chat_template.jinja ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system
2
+ You are a helpful assistant.<|im_end|>
3
+ {% endif %}<|im_start|>{{ message['role'] }}
4
+ {% if message['content'] is string %}{{ message['content'] }}<|im_end|>
5
+ {% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>
6
+ {% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant
7
+ {% endif %}
config.json CHANGED
@@ -9,7 +9,7 @@
9
  "image_token_id": 151655,
10
  "initializer_range": 0.02,
11
  "intermediate_size": 27648,
12
- "max_position_embeddings": 32768,
13
  "max_window_layers": 64,
14
  "model_type": "qwen2_5_vl",
15
  "num_attention_heads": 40,
@@ -48,9 +48,48 @@
48
  },
49
  "rope_theta": 1000000.0,
50
  "sliding_window": 32768,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  "tie_word_embeddings": false,
52
  "torch_dtype": "bfloat16",
53
- "transformers_version": "4.50.3",
54
  "unsloth_fixed": true,
55
  "use_cache": true,
56
  "use_sliding_window": false,
@@ -67,6 +106,7 @@
67
  "hidden_size": 1280,
68
  "in_channels": 3,
69
  "in_chans": 3,
 
70
  "intermediate_size": 3456,
71
  "model_type": "qwen2_5_vl",
72
  "num_heads": 16,
 
9
  "image_token_id": 151655,
10
  "initializer_range": 0.02,
11
  "intermediate_size": 27648,
12
+ "max_position_embeddings": 128000,
13
  "max_window_layers": 64,
14
  "model_type": "qwen2_5_vl",
15
  "num_attention_heads": 40,
 
48
  },
49
  "rope_theta": 1000000.0,
50
  "sliding_window": 32768,
51
+ "text_config": {
52
+ "architectures": [
53
+ "Qwen2_5_VLForConditionalGeneration"
54
+ ],
55
+ "attention_dropout": 0.0,
56
+ "eos_token_id": 151645,
57
+ "hidden_act": "silu",
58
+ "hidden_size": 5120,
59
+ "image_token_id": null,
60
+ "initializer_range": 0.02,
61
+ "intermediate_size": 27648,
62
+ "max_position_embeddings": 128000,
63
+ "max_window_layers": 64,
64
+ "model_type": "qwen2_5_vl_text",
65
+ "num_attention_heads": 40,
66
+ "num_hidden_layers": 64,
67
+ "num_key_value_heads": 8,
68
+ "pad_token_id": 151643,
69
+ "rms_norm_eps": 1e-06,
70
+ "rope_scaling": {
71
+ "mrope_section": [
72
+ 16,
73
+ 24,
74
+ 24
75
+ ],
76
+ "rope_type": "default",
77
+ "type": "default"
78
+ },
79
+ "rope_theta": 1000000.0,
80
+ "sliding_window": 32768,
81
+ "torch_dtype": "bfloat16",
82
+ "use_cache": true,
83
+ "use_sliding_window": false,
84
+ "video_token_id": null,
85
+ "vision_end_token_id": 151653,
86
+ "vision_start_token_id": 151652,
87
+ "vision_token_id": 151654,
88
+ "vocab_size": 152064
89
+ },
90
  "tie_word_embeddings": false,
91
  "torch_dtype": "bfloat16",
92
+ "transformers_version": "4.52.0.dev0",
93
  "unsloth_fixed": true,
94
  "use_cache": true,
95
  "use_sliding_window": false,
 
106
  "hidden_size": 1280,
107
  "in_channels": 3,
108
  "in_chans": 3,
109
+ "initializer_range": 0.02,
110
  "intermediate_size": 3456,
111
  "model_type": "qwen2_5_vl",
112
  "num_heads": 16,
generation_config.json CHANGED
@@ -5,9 +5,9 @@
5
  151645,
6
  151643
7
  ],
8
- "max_length": 32768,
9
  "pad_token_id": 151654,
10
  "repetition_penalty": 1.05,
11
  "temperature": 1e-06,
12
- "transformers_version": "4.50.3"
13
  }
 
5
  151645,
6
  151643
7
  ],
8
+ "max_length": 128000,
9
  "pad_token_id": 151654,
10
  "repetition_penalty": 1.05,
11
  "temperature": 1e-06,
12
+ "transformers_version": "4.52.0.dev0"
13
  }
tokenizer_config.json CHANGED
@@ -195,16 +195,16 @@
195
  "<|video_pad|>"
196
  ],
197
  "bos_token": null,
198
- "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
199
  "clean_up_tokenization_spaces": false,
200
  "eos_token": "<|im_end|>",
201
  "errors": "replace",
202
  "extra_special_tokens": {},
203
- "model_max_length": 32768,
204
  "pad_token": "<|vision_pad|>",
205
  "padding_side": "left",
206
  "processor_class": "Qwen2_5_VLProcessor",
207
  "split_special_tokens": false,
208
  "tokenizer_class": "Qwen2Tokenizer",
209
- "unk_token": null
210
- }
 
 
195
  "<|video_pad|>"
196
  ],
197
  "bos_token": null,
 
198
  "clean_up_tokenization_spaces": false,
199
  "eos_token": "<|im_end|>",
200
  "errors": "replace",
201
  "extra_special_tokens": {},
202
+ "model_max_length": 128000,
203
  "pad_token": "<|vision_pad|>",
204
  "padding_side": "left",
205
  "processor_class": "Qwen2_5_VLProcessor",
206
  "split_special_tokens": false,
207
  "tokenizer_class": "Qwen2Tokenizer",
208
+ "unk_token": null,
209
+ "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
210
+ }