Felladrin commited on
Commit
4f19407
·
0 Parent(s):

Initial commit

Browse files
Files changed (6) hide show
  1. .gitattributes +35 -0
  2. README.md +80 -0
  3. config.json +27 -0
  4. model.safetensors +3 -0
  5. tokenizer.json +0 -0
  6. tokenizer_config.json +40 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: apache-2.0
4
+ datasets:
5
+ - WizardLMTeam/WizardLM_evol_instruct_V2_196k
6
+ language:
7
+ - en
8
+ base_model:
9
+ - Felladrin/Minueza-2-96M
10
+ tags:
11
+ - llama-factory
12
+ ---
13
+
14
+ # Minueza-2-96M-Instruct (Variant 06)
15
+
16
+ This model is a fine-tuned version of [Felladrin/Minueza-2-96M](https://huggingface.co/Felladrin/Minueza-2-96M) on the English [WizardLMTeam/WizardLM_evol_instruct_V2_196k](https://huggingface.co/datasets/WizardLMTeam/WizardLM_evol_instruct_V2_196k) dataset.
17
+
18
+ ## Usage
19
+
20
+ ```sh
21
+ pip install transformers==4.51.1 torch==2.6.0
22
+ ```
23
+
24
+ ```python
25
+ from transformers import pipeline, TextStreamer
26
+ import torch
27
+
28
+ generate_text = pipeline(
29
+ "text-generation",
30
+ model="Felladrin/Minueza-2-96M-Instruct-Variant-06",
31
+ device=torch.device("cuda" if torch.cuda.is_available() else "cpu"),
32
+ )
33
+
34
+ messages = [
35
+ {
36
+ "role": "user",
37
+ "content": "Summarize the advantages of internet marketing.",
38
+ },
39
+ ]
40
+
41
+ generate_text(
42
+ generate_text.tokenizer.apply_chat_template(
43
+ messages, tokenize=False, add_generation_prompt=True
44
+ ),
45
+ streamer=TextStreamer(generate_text.tokenizer, skip_special_tokens=True),
46
+ max_new_tokens=512,
47
+ do_sample=True,
48
+ temperature=0.7,
49
+ top_p=0.9,
50
+ top_k=0,
51
+ min_p=0.1,
52
+ repetition_penalty=1.17,
53
+ )
54
+ ```
55
+
56
+ ## Training hyperparameters
57
+
58
+ The following hyperparameters were used during training:
59
+
60
+ - learning_rate: 5.8e-05
61
+ - train_batch_size: 4
62
+ - eval_batch_size: 4
63
+ - seed: 42
64
+ - gradient_accumulation_steps: 32
65
+ - total_train_batch_size: 128
66
+ - optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
67
+ - lr_scheduler_type: cosine
68
+ - lr_scheduler_warmup_ratio: 0.1
69
+ - num_epochs: 2
70
+
71
+ ## Framework versions
72
+
73
+ - Transformers 4.51.1
74
+ - Pytorch 2.6.0+cu124
75
+ - Datasets 3.5.0
76
+ - Tokenizers 0.21.0
77
+
78
+ ## License
79
+
80
+ This model is licensed under the Apache License 2.0.
config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": ["LlamaForCausalLM"],
3
+ "attention_bias": false,
4
+ "attention_dropout": 0.1,
5
+ "bos_token_id": 1,
6
+ "eos_token_id": 2,
7
+ "head_dim": 56,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 672,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 2688,
12
+ "max_position_embeddings": 4096,
13
+ "mlp_bias": false,
14
+ "model_type": "llama",
15
+ "num_attention_heads": 12,
16
+ "num_hidden_layers": 8,
17
+ "num_key_value_heads": 4,
18
+ "pretraining_tp": 1,
19
+ "rms_norm_eps": 1e-6,
20
+ "rope_scaling": null,
21
+ "rope_theta": 500000.0,
22
+ "tie_word_embeddings": false,
23
+ "torch_dtype": "bfloat16",
24
+ "transformers_version": "4.51.1",
25
+ "use_cache": true,
26
+ "vocab_size": 32000
27
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8707447a0c4011828153d893ee88a164d58f94166ca3530f486bcc7de5ebac91
3
+ size 192018920
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<unk>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<|im_start|>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "<|im_end|>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ }
27
+ },
28
+ "bos_token": "<|im_start|>",
29
+ "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a highly knowledgeable and friendly assistant. Your goal is to understand and respond to user inquiries with clarity. Your interactions are always respectful, helpful, and focused on delivering the most accurate information to the user.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
30
+ "clean_up_tokenization_spaces": false,
31
+ "eos_token": "<|im_end|>",
32
+ "extra_special_tokens": {},
33
+ "model_max_length": 4096,
34
+ "pad_token": "<|im_end|>",
35
+ "padding_side": "right",
36
+ "split_special_tokens": false,
37
+ "tokenizer_class": "PreTrainedTokenizerFast",
38
+ "truncation_side": "right",
39
+ "unk_token": "<unk>"
40
+ }