Upload folder using huggingface_hub
Browse files- README.md +17 -14
- config.json +1 -0
- figures/new_logo2.png +0 -0
- generation_config.json +4 -1
- tokenizer_config.json +1 -1
README.md
CHANGED
@@ -14,7 +14,7 @@ language:
|
|
14 |
# dots1
|
15 |
|
16 |
<p align="center">
|
17 |
-
<img src="figures/
|
18 |
<p>
|
19 |
|
20 |
<p align="center">
|
@@ -24,8 +24,6 @@ language:
|
|
24 |
</p>
|
25 |
|
26 |
|
27 |
-
|
28 |
-
|
29 |
Visit our Hugging Face (click links above), search checkpoints with names starting with `dots.llm1` or visit the [dots1 collection](https://huggingface.co/collections/rednote-hilab/dotsllm1-68246aaaaba3363374a8aa7c), and you will find all you need! Enjoy!
|
30 |
|
31 |
|
@@ -117,6 +115,8 @@ curl http://localhost:8000/v1/chat/completions \
|
|
117 |
|
118 |
### Inference with huggingface
|
119 |
|
|
|
|
|
120 |
#### Text Completion
|
121 |
|
122 |
```python
|
@@ -126,8 +126,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
|
|
126 |
model_name = "rednote-hilab/dots.llm1.base"
|
127 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
128 |
|
129 |
-
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.bfloat16
|
130 |
-
model.generation_config = GenerationConfig.from_pretrained(model_name)
|
131 |
|
132 |
text = "An attention function can be described as mapping a query and a set of key-value pairs to an output, where the query, keys, values, and output are all vectors. The output is"
|
133 |
inputs = tokenizer(text, return_tensors="pt")
|
@@ -145,8 +144,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
|
|
145 |
model_name = "rednote-hilab/dots.llm1.inst"
|
146 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
147 |
|
148 |
-
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.bfloat16
|
149 |
-
model.generation_config = GenerationConfig.from_pretrained(model_name)
|
150 |
|
151 |
messages = [
|
152 |
{"role": "user", "content": "Write a piece of quicksort code in C++"}
|
@@ -158,21 +156,26 @@ result = tokenizer.decode(outputs[0][input_tensor.shape[1]:], skip_special_token
|
|
158 |
print(result)
|
159 |
```
|
160 |
|
|
|
161 |
|
162 |
-
|
163 |
-
[SGLang](https://github.com/sgl-project/sglang) is a fast serving framework for large language models and vision language models. SGLang could be used to launch a server with OpenAI-compatible API service. `sglang>=***` is required. It is as easy as
|
164 |
|
165 |
```shell
|
166 |
-
|
167 |
```
|
|
|
168 |
An OpenAI-compatible API will be available at `http://localhost:8000/v1`.
|
169 |
|
170 |
-
### Inference with
|
171 |
-
|
|
|
|
|
|
|
172 |
|
173 |
```shell
|
174 |
-
|
175 |
```
|
|
|
176 |
An OpenAI-compatible API will be available at `http://localhost:8000/v1`.
|
177 |
|
178 |
## 4. Evaluation Results
|
@@ -190,4 +193,4 @@ If you find `dots.llm1` is useful or want to use in your projects, please kindly
|
|
190 |
journal={arXiv preprint arXiv:TBD},
|
191 |
year={2025}
|
192 |
}
|
193 |
-
```
|
|
|
14 |
# dots1
|
15 |
|
16 |
<p align="center">
|
17 |
+
<img src="figures/new_logo2.png" width="300"/>
|
18 |
<p>
|
19 |
|
20 |
<p align="center">
|
|
|
24 |
</p>
|
25 |
|
26 |
|
|
|
|
|
27 |
Visit our Hugging Face (click links above), search checkpoints with names starting with `dots.llm1` or visit the [dots1 collection](https://huggingface.co/collections/rednote-hilab/dotsllm1-68246aaaaba3363374a8aa7c), and you will find all you need! Enjoy!
|
28 |
|
29 |
|
|
|
115 |
|
116 |
### Inference with huggingface
|
117 |
|
118 |
+
We are working to merge it into Transformers ([PR #38143](https://github.com/huggingface/transformers/pull/38143)).
|
119 |
+
|
120 |
#### Text Completion
|
121 |
|
122 |
```python
|
|
|
126 |
model_name = "rednote-hilab/dots.llm1.base"
|
127 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
128 |
|
129 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.bfloat16)
|
|
|
130 |
|
131 |
text = "An attention function can be described as mapping a query and a set of key-value pairs to an output, where the query, keys, values, and output are all vectors. The output is"
|
132 |
inputs = tokenizer(text, return_tensors="pt")
|
|
|
144 |
model_name = "rednote-hilab/dots.llm1.inst"
|
145 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
146 |
|
147 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.bfloat16)
|
|
|
148 |
|
149 |
messages = [
|
150 |
{"role": "user", "content": "Write a piece of quicksort code in C++"}
|
|
|
156 |
print(result)
|
157 |
```
|
158 |
|
159 |
+
### Inference with vllm
|
160 |
|
161 |
+
[vLLM](https://github.com/vllm-project/vllm) is a high-throughput and memory-efficient inference and serving engine for LLMs. Official support for this feature is covered in [PR #18254](https://github.com/vllm-project/vllm/pull/18254).
|
|
|
162 |
|
163 |
```shell
|
164 |
+
vllm serve dots.llm1.inst --port 8000 --tensor-parallel-size 8
|
165 |
```
|
166 |
+
|
167 |
An OpenAI-compatible API will be available at `http://localhost:8000/v1`.
|
168 |
|
169 |
+
### Inference with sglang
|
170 |
+
|
171 |
+
[SGLang](https://github.com/sgl-project/sglang) is a fast serving framework for large language models and vision language models. SGLang could be used to launch a server with OpenAI-compatible API service. Official support for this feature is covered in [PR #6471](https://github.com/sgl-project/sglang/pull/6471).
|
172 |
+
|
173 |
+
Getting started is as simple as running:
|
174 |
|
175 |
```shell
|
176 |
+
python -m sglang.launch_server --model-path dots.llm1.inst --tp 8 --host 0.0.0.0 --port 8000
|
177 |
```
|
178 |
+
|
179 |
An OpenAI-compatible API will be available at `http://localhost:8000/v1`.
|
180 |
|
181 |
## 4. Evaluation Results
|
|
|
193 |
journal={arXiv preprint arXiv:TBD},
|
194 |
year={2025}
|
195 |
}
|
196 |
+
```
|
config.json
CHANGED
@@ -28,6 +28,7 @@
|
|
28 |
"rope_theta": 10000000,
|
29 |
"routed_scaling_factor": 2.5,
|
30 |
"sliding_window": null,
|
|
|
31 |
"tie_word_embeddings": false,
|
32 |
"torch_dtype": "bfloat16",
|
33 |
"transformers_version": "4.46.3",
|
|
|
28 |
"rope_theta": 10000000,
|
29 |
"routed_scaling_factor": 2.5,
|
30 |
"sliding_window": null,
|
31 |
+
"scoring_func": "noaux_tc",
|
32 |
"tie_word_embeddings": false,
|
33 |
"torch_dtype": "bfloat16",
|
34 |
"transformers_version": "4.46.3",
|
figures/new_logo2.png
ADDED
![]() |
generation_config.json
CHANGED
@@ -1,6 +1,9 @@
|
|
1 |
{
|
2 |
"_from_model_config": true,
|
3 |
-
"
|
|
|
|
|
|
|
4 |
"eos_token_id": 151645,
|
5 |
"transformers_version": "4.46.3"
|
6 |
}
|
|
|
1 |
{
|
2 |
"_from_model_config": true,
|
3 |
+
"do_sample": true,
|
4 |
+
"tempeture": 0.7,
|
5 |
+
"top_p": 0.8,
|
6 |
+
"bos_token_id": 151643,
|
7 |
"eos_token_id": 151645,
|
8 |
"transformers_version": "4.46.3"
|
9 |
}
|
tokenizer_config.json
CHANGED
@@ -132,7 +132,7 @@
|
|
132 |
},
|
133 |
"additional_special_tokens": ["<|im_start|>", "<|im_end|>", "<|userprompt|>", "<|endofuserprompt|>", "<|response|>", "<|endofresponse|>", "<|system|>", "<|endofsystem|>", "<|observation|>", "<|endofobservation|>", "<|execution|>", "<|endofexecution|>", "<|reject-unknown|>", "<|sec-cot|>", "<|sec-end-cot|>"],
|
134 |
"bos_token": null,
|
135 |
-
"chat_template": "{% if messages[0]['role'] == 'system' %}<|system|>{{ messages[0]['content'] }}<|endofsystem|>{% set start_idx = 1 %}{% else %}<|system|>You are a helpful assistant
|
136 |
"clean_up_tokenization_spaces": false,
|
137 |
"eos_token": "<|endofresponse|>",
|
138 |
"errors": "replace",
|
|
|
132 |
},
|
133 |
"additional_special_tokens": ["<|im_start|>", "<|im_end|>", "<|userprompt|>", "<|endofuserprompt|>", "<|response|>", "<|endofresponse|>", "<|system|>", "<|endofsystem|>", "<|observation|>", "<|endofobservation|>", "<|execution|>", "<|endofexecution|>", "<|reject-unknown|>", "<|sec-cot|>", "<|sec-end-cot|>"],
|
134 |
"bos_token": null,
|
135 |
+
"chat_template": "{% if messages[0]['role'] == 'system' %}<|system|>{{ messages[0]['content'] }}<|endofsystem|>{% set start_idx = 1 %}{% else %}<|system|>You are a helpful assistant.<|endofsystem|>{% set start_idx = 0 %}{% endif %}{% for idx in range(start_idx, messages|length) %}{% if messages[idx]['role'] == 'user' %}<|userprompt|>{{ messages[idx]['content'] }}<|endofuserprompt|>{% elif messages[idx]['role'] == 'assistant' %}<|response|>{{ messages[idx]['content'] }}<|endofresponse|>{% endif %}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] == 'user' %}<|response|>{% endif %}",
|
136 |
"clean_up_tokenization_spaces": false,
|
137 |
"eos_token": "<|endofresponse|>",
|
138 |
"errors": "replace",
|