Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

README.md +17 -14
config.json +1 -0
figures/new_logo2.png +0 -0
generation_config.json +4 -1
tokenizer_config.json +1 -1

README.md CHANGED Viewed

@@ -14,7 +14,7 @@ language:
 # dots1
 <p align="center">
-    <img src="figures/new_logo.png" width="200"/>
 <p>
 <p align="center">
@@ -24,8 +24,6 @@ language:
 </p>
 Visit our Hugging Face (click links above), search checkpoints with names starting with `dots.llm1` or visit the [dots1 collection](https://huggingface.co/collections/rednote-hilab/dotsllm1-68246aaaaba3363374a8aa7c), and you will find all you need! Enjoy!
@@ -117,6 +115,8 @@ curl http://localhost:8000/v1/chat/completions \
 ### Inference with huggingface
 #### Text Completion
 ```python
@@ -126,8 +126,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
 model_name = "rednote-hilab/dots.llm1.base"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.bfloat16, attn_implementation="eager")
-model.generation_config = GenerationConfig.from_pretrained(model_name)
 text = "An attention function can be described as mapping a query and a set of key-value pairs to an output, where the query, keys, values, and output are all vectors. The output is"
 inputs = tokenizer(text, return_tensors="pt")
@@ -145,8 +144,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
 model_name = "rednote-hilab/dots.llm1.inst"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.bfloat16, attn_implementation="eager")
-model.generation_config = GenerationConfig.from_pretrained(model_name)
 messages = [
     {"role": "user", "content": "Write a piece of quicksort code in C++"}
@@ -158,21 +156,26 @@ result = tokenizer.decode(outputs[0][input_tensor.shape[1]:], skip_special_token
 print(result)
 ```
-### Inference with sglang
-[SGLang](https://github.com/sgl-project/sglang) is a fast serving framework for large language models and vision language models. SGLang could be used to launch a server with OpenAI-compatible API service. `sglang>=***` is required. It is as easy as
 ```shell
-python -m sglang.launch_server --model-path dots.llm1.inst --tp 8 --host 0.0.0.0 --port 8000
 ```
 An OpenAI-compatible API will be available at `http://localhost:8000/v1`.
-### Inference with vllm
-[vLLM](https://github.com/vllm-project/vllm) is a high-throughput and memory-efficient inference and serving engine for LLMs. `vllm>=***` is recommended.
 ```shell
-vllm serve dots.llm1.inst --port 8000 --tensor-parallel-size 8
 ```
 An OpenAI-compatible API will be available at `http://localhost:8000/v1`.
 ## 4. Evaluation Results
@@ -190,4 +193,4 @@ If you find `dots.llm1` is useful or want to use in your projects, please kindly
       journal={arXiv preprint arXiv:TBD},
       year={2025}
 }
-```

 # dots1
 <p align="center">
+    <img src="figures/new_logo2.png" width="300"/>
 <p>
 <p align="center">
 </p>
 Visit our Hugging Face (click links above), search checkpoints with names starting with `dots.llm1` or visit the [dots1 collection](https://huggingface.co/collections/rednote-hilab/dotsllm1-68246aaaaba3363374a8aa7c), and you will find all you need! Enjoy!
 ### Inference with huggingface
+We are working to merge it into Transformers ([PR #38143](https://github.com/huggingface/transformers/pull/38143)).
 #### Text Completion
 ```python
 model_name = "rednote-hilab/dots.llm1.base"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.bfloat16)
 text = "An attention function can be described as mapping a query and a set of key-value pairs to an output, where the query, keys, values, and output are all vectors. The output is"
 inputs = tokenizer(text, return_tensors="pt")
 model_name = "rednote-hilab/dots.llm1.inst"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.bfloat16)
 messages = [
     {"role": "user", "content": "Write a piece of quicksort code in C++"}
 print(result)
 ```
+### Inference with vllm
+[vLLM](https://github.com/vllm-project/vllm) is a high-throughput and memory-efficient inference and serving engine for LLMs. Official support for this feature is covered in [PR #18254](https://github.com/vllm-project/vllm/pull/18254).
 ```shell
+vllm serve dots.llm1.inst --port 8000 --tensor-parallel-size 8
 ```
 An OpenAI-compatible API will be available at `http://localhost:8000/v1`.
+### Inference with sglang
+[SGLang](https://github.com/sgl-project/sglang) is a fast serving framework for large language models and vision language models. SGLang could be used to launch a server with OpenAI-compatible API service. Official support for this feature is covered in [PR #6471](https://github.com/sgl-project/sglang/pull/6471).
+Getting started is as simple as running:
 ```shell
+python -m sglang.launch_server --model-path dots.llm1.inst --tp 8 --host 0.0.0.0 --port 8000
 ```
 An OpenAI-compatible API will be available at `http://localhost:8000/v1`.
 ## 4. Evaluation Results
       journal={arXiv preprint arXiv:TBD},
       year={2025}
 }
+```

config.json CHANGED Viewed

@@ -28,6 +28,7 @@
   "rope_theta": 10000000,
   "routed_scaling_factor": 2.5,
   "sliding_window": null,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.46.3",

   "rope_theta": 10000000,
   "routed_scaling_factor": 2.5,
   "sliding_window": null,
+  "scoring_func": "noaux_tc",
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.46.3",

figures/new_logo2.png ADDED Viewed

generation_config.json CHANGED Viewed

@@ -1,6 +1,9 @@
 {
   "_from_model_config": true,
-  "bos_token_id": null,
   "eos_token_id": 151645,
   "transformers_version": "4.46.3"
 }

 {
   "_from_model_config": true,
+  "do_sample": true,
+  "tempeture": 0.7,
+  "top_p": 0.8,
+  "bos_token_id": 151643,
   "eos_token_id": 151645,
   "transformers_version": "4.46.3"
 }

tokenizer_config.json CHANGED Viewed

@@ -132,7 +132,7 @@
   },
   "additional_special_tokens": ["<|im_start|>", "<|im_end|>", "<|userprompt|>", "<|endofuserprompt|>", "<|response|>", "<|endofresponse|>", "<|system|>", "<|endofsystem|>", "<|observation|>", "<|endofobservation|>", "<|execution|>", "<|endofexecution|>", "<|reject-unknown|>", "<|sec-cot|>", "<|sec-end-cot|>"],
   "bos_token": null,
-  "chat_template": "{% if messages[0]['role'] == 'system' %}<|system|>{{ messages[0]['content'] }}<|endofsystem|>{% set start_idx = 1 %}{% else %}<|system|>You are a helpful assistant<|endofsystem|>{% set start_idx = 0 %}{% endif %}{% for idx in range(start_idx, messages|length) %}{% if messages[idx]['role'] == 'user' %}<|userprompt|>{{ messages[idx]['content'] }}<|endofuserprompt|>{% elif messages[idx]['role'] == 'assistant' %}<|response|>{{ messages[idx]['content'] }}<|endofresponse|>{% endif %}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] == 'user' %}<|response|>{% endif %}",
   "clean_up_tokenization_spaces": false,
   "eos_token": "<|endofresponse|>",
   "errors": "replace",

   },
   "additional_special_tokens": ["<|im_start|>", "<|im_end|>", "<|userprompt|>", "<|endofuserprompt|>", "<|response|>", "<|endofresponse|>", "<|system|>", "<|endofsystem|>", "<|observation|>", "<|endofobservation|>", "<|execution|>", "<|endofexecution|>", "<|reject-unknown|>", "<|sec-cot|>", "<|sec-end-cot|>"],
   "bos_token": null,
+  "chat_template": "{% if messages[0]['role'] == 'system' %}<|system|>{{ messages[0]['content'] }}<|endofsystem|>{% set start_idx = 1 %}{% else %}<|system|>You are a helpful assistant.<|endofsystem|>{% set start_idx = 0 %}{% endif %}{% for idx in range(start_idx, messages|length) %}{% if messages[idx]['role'] == 'user' %}<|userprompt|>{{ messages[idx]['content'] }}<|endofuserprompt|>{% elif messages[idx]['role'] == 'assistant' %}<|response|>{{ messages[idx]['content'] }}<|endofresponse|>{% endif %}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] == 'user' %}<|response|>{% endif %}",
   "clean_up_tokenization_spaces": false,
   "eos_token": "<|endofresponse|>",
   "errors": "replace",