PocketDoc commited on
Commit
64beab3
·
verified ·
1 Parent(s): 246012e

Model save

Browse files
Files changed (1) hide show
  1. README.md +31 -31
README.md CHANGED
@@ -7,7 +7,7 @@ tags:
7
  datasets:
8
  - Dans-DiscountModels/pretokenization-test-2
9
  model-index:
10
- - name: 7b-m-dans-personalityengine-v1.2.1-rc-4
11
  results: []
12
  ---
13
 
@@ -29,11 +29,11 @@ trust_remote_code:
29
  wandb_project: 7b-m-dans-personalityengine
30
  wandb_watch:
31
 
32
- wandb_run_id: V1.2.1-3-1 # V{Version}-{Run Number}-{Attempt Number}
33
  wandb_log_model:
34
 
35
  # push checkpoints to hub
36
- hub_model_id: Dans-DiscountModels/7b-m-dans-personalityengine-v1.2.1-rc-4
37
  # how to push checkpoints to hub
38
  # https://huggingface.co/docs/transformers/v4.31.0/en/main_classes/trainer#transformers.TrainingArguments.hub_strategy
39
  hub_strategy: "every_save"
@@ -87,7 +87,7 @@ micro_batch_size: 2
87
  num_epochs: 1
88
 
89
  optimizer: ademamix_8bit
90
- optim_args: "beta1=0.9,beta2=0.999,beta3=0.999,alpha=5"
91
 
92
  lr_scheduler: rex
93
  learning_rate: 0.00000015
@@ -135,11 +135,11 @@ special_tokens:
135
 
136
  </details><br>
137
 
138
- # 7b-m-dans-personalityengine-v1.2.1-rc-4
139
 
140
  This model is a fine-tuned version of [Dans-DiscountModels/mistral-7b-v0.3-ChatML](https://huggingface.co/Dans-DiscountModels/mistral-7b-v0.3-ChatML) on the Dans-DiscountModels/pretokenization-test-2 dataset.
141
  It achieves the following results on the evaluation set:
142
- - Loss: 1.4136
143
 
144
  ## Model description
145
 
@@ -168,7 +168,7 @@ The following hyperparameters were used during training:
168
  - total_train_batch_size: 32
169
  - total_eval_batch_size: 16
170
  - optimizer: Use ademamix_8bit and the args are:
171
- beta1=0.9,beta2=0.999,beta3=0.999,alpha=5
172
  - lr_scheduler_type: cosine
173
  - lr_scheduler_warmup_steps: 43
174
  - num_epochs: 1.0
@@ -178,30 +178,30 @@ beta1=0.9,beta2=0.999,beta3=0.999,alpha=5
178
  | Training Loss | Epoch | Step | Validation Loss |
179
  |:-------------:|:------:|:----:|:---------------:|
180
  | 1.5957 | 0.0007 | 1 | 1.5418 |
181
- | 1.4896 | 0.0417 | 61 | 1.5008 |
182
- | 1.5882 | 0.0833 | 122 | 1.4755 |
183
- | 1.3739 | 0.125 | 183 | 1.4632 |
184
- | 1.5317 | 0.1667 | 244 | 1.4558 |
185
- | 1.4852 | 0.2083 | 305 | 1.4504 |
186
- | 1.3851 | 0.25 | 366 | 1.4460 |
187
- | 1.514 | 0.2917 | 427 | 1.4423 |
188
- | 1.5015 | 0.3333 | 488 | 1.4390 |
189
- | 1.5083 | 0.375 | 549 | 1.4361 |
190
- | 1.3896 | 0.4167 | 610 | 1.4336 |
191
- | 1.4243 | 0.4583 | 671 | 1.4313 |
192
- | 1.3101 | 0.5 | 732 | 1.4291 |
193
- | 1.5724 | 0.5417 | 793 | 1.4271 |
194
- | 1.4305 | 0.5833 | 854 | 1.4253 |
195
- | 1.4534 | 0.625 | 915 | 1.4235 |
196
- | 1.4756 | 0.6667 | 976 | 1.4219 |
197
- | 1.4429 | 0.7083 | 1037 | 1.4205 |
198
- | 1.4753 | 0.75 | 1098 | 1.4191 |
199
- | 1.473 | 0.7917 | 1159 | 1.4179 |
200
- | 1.4314 | 0.8333 | 1220 | 1.4167 |
201
- | 1.3473 | 0.875 | 1281 | 1.4157 |
202
- | 1.4458 | 0.9167 | 1342 | 1.4148 |
203
- | 1.4309 | 0.9583 | 1403 | 1.4140 |
204
- | 1.4304 | 1.0 | 1464 | 1.4136 |
205
 
206
 
207
  ### Framework versions
 
7
  datasets:
8
  - Dans-DiscountModels/pretokenization-test-2
9
  model-index:
10
+ - name: 7b-m-dans-personalityengine-v1.2.1-rc-5
11
  results: []
12
  ---
13
 
 
29
  wandb_project: 7b-m-dans-personalityengine
30
  wandb_watch:
31
 
32
+ wandb_run_id: V1.2.1-4-1 # V{Version}-{Run Number}-{Attempt Number}
33
  wandb_log_model:
34
 
35
  # push checkpoints to hub
36
+ hub_model_id: Dans-DiscountModels/7b-m-dans-personalityengine-v1.2.1-rc-5
37
  # how to push checkpoints to hub
38
  # https://huggingface.co/docs/transformers/v4.31.0/en/main_classes/trainer#transformers.TrainingArguments.hub_strategy
39
  hub_strategy: "every_save"
 
87
  num_epochs: 1
88
 
89
  optimizer: ademamix_8bit
90
+ optim_args: "beta1=0.9,beta2=0.999,beta3=0.999,alpha=10"
91
 
92
  lr_scheduler: rex
93
  learning_rate: 0.00000015
 
135
 
136
  </details><br>
137
 
138
+ # 7b-m-dans-personalityengine-v1.2.1-rc-5
139
 
140
  This model is a fine-tuned version of [Dans-DiscountModels/mistral-7b-v0.3-ChatML](https://huggingface.co/Dans-DiscountModels/mistral-7b-v0.3-ChatML) on the Dans-DiscountModels/pretokenization-test-2 dataset.
141
  It achieves the following results on the evaluation set:
142
+ - Loss: 1.4047
143
 
144
  ## Model description
145
 
 
168
  - total_train_batch_size: 32
169
  - total_eval_batch_size: 16
170
  - optimizer: Use ademamix_8bit and the args are:
171
+ beta1=0.9,beta2=0.999,beta3=0.999,alpha=10
172
  - lr_scheduler_type: cosine
173
  - lr_scheduler_warmup_steps: 43
174
  - num_epochs: 1.0
 
178
  | Training Loss | Epoch | Step | Validation Loss |
179
  |:-------------:|:------:|:----:|:---------------:|
180
  | 1.5957 | 0.0007 | 1 | 1.5418 |
181
+ | 1.487 | 0.0417 | 61 | 1.4982 |
182
+ | 1.5851 | 0.0833 | 122 | 1.4720 |
183
+ | 1.3702 | 0.125 | 183 | 1.4596 |
184
+ | 1.5285 | 0.1667 | 244 | 1.4519 |
185
+ | 1.4809 | 0.2083 | 305 | 1.4461 |
186
+ | 1.3806 | 0.25 | 366 | 1.4414 |
187
+ | 1.5097 | 0.2917 | 427 | 1.4373 |
188
+ | 1.497 | 0.3333 | 488 | 1.4338 |
189
+ | 1.503 | 0.375 | 549 | 1.4306 |
190
+ | 1.384 | 0.4167 | 610 | 1.4278 |
191
+ | 1.4191 | 0.4583 | 671 | 1.4252 |
192
+ | 1.3042 | 0.5 | 732 | 1.4228 |
193
+ | 1.5669 | 0.5417 | 793 | 1.4206 |
194
+ | 1.4239 | 0.5833 | 854 | 1.4185 |
195
+ | 1.4472 | 0.625 | 915 | 1.4165 |
196
+ | 1.4692 | 0.6667 | 976 | 1.4147 |
197
+ | 1.4358 | 0.7083 | 1037 | 1.4130 |
198
+ | 1.4676 | 0.75 | 1098 | 1.4114 |
199
+ | 1.4657 | 0.7917 | 1159 | 1.4099 |
200
+ | 1.424 | 0.8333 | 1220 | 1.4085 |
201
+ | 1.3385 | 0.875 | 1281 | 1.4072 |
202
+ | 1.4373 | 0.9167 | 1342 | 1.4061 |
203
+ | 1.4226 | 0.9583 | 1403 | 1.4052 |
204
+ | 1.4225 | 1.0 | 1464 | 1.4047 |
205
 
206
 
207
  ### Framework versions