yakazimir commited on
Commit
61ed84e
·
verified ·
1 Parent(s): 172616c

Model save

Browse files
Files changed (4) hide show
  1. README.md +2 -14
  2. all_results.json +4 -18
  3. train_results.json +4 -4
  4. trainer_state.json +119 -119
README.md CHANGED
@@ -3,32 +3,20 @@ library_name: transformers
3
  license: other
4
  base_model: trl-lib/qwen1.5-0.5b-sft
5
  tags:
6
- - alignment-handbook
7
  - trl
8
  - simpo
9
  - generated_from_trainer
10
  model-index:
11
- - name: trl-lib/qwen1.5-0.5b-sft
12
  results: []
13
  ---
14
 
15
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
16
  should probably proofread and complete it, then remove this comment. -->
17
 
18
- # trl-lib/qwen1.5-0.5b-sft
19
 
20
  This model is a fine-tuned version of [trl-lib/qwen1.5-0.5b-sft](https://huggingface.co/trl-lib/qwen1.5-0.5b-sft) on an unknown dataset.
21
- It achieves the following results on the evaluation set:
22
- - Loss: 0.7799
23
- - Rewards/chosen: -1.7874
24
- - Rewards/rejected: -1.8737
25
- - Rewards/accuracies: 0.5215
26
- - Rewards/margins: 0.0863
27
- - Logps/rejected: -1.8737
28
- - Logps/chosen: -1.7874
29
- - Logits/rejected: -0.3295
30
- - Logits/chosen: -0.3597
31
- - Semantic Entropy: 0.6459
32
 
33
  ## Model description
34
 
 
3
  license: other
4
  base_model: trl-lib/qwen1.5-0.5b-sft
5
  tags:
 
6
  - trl
7
  - simpo
8
  - generated_from_trainer
9
  model-index:
10
+ - name: simpo-exps_qwen05b
11
  results: []
12
  ---
13
 
14
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
  should probably proofread and complete it, then remove this comment. -->
16
 
17
+ # simpo-exps_qwen05b
18
 
19
  This model is a fine-tuned version of [trl-lib/qwen1.5-0.5b-sft](https://huggingface.co/trl-lib/qwen1.5-0.5b-sft) on an unknown dataset.
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  ## Model description
22
 
all_results.json CHANGED
@@ -1,22 +1,8 @@
1
  {
2
  "epoch": 32.0,
3
- "eval_logits/chosen": -0.3597286641597748,
4
- "eval_logits/rejected": -0.32949814200401306,
5
- "eval_logps/chosen": -1.787438988685608,
6
- "eval_logps/rejected": -1.8736913204193115,
7
- "eval_loss": 0.7798751592636108,
8
- "eval_rewards/accuracies": 0.5215133428573608,
9
- "eval_rewards/chosen": -1.787438988685608,
10
- "eval_rewards/margins": 0.08625216037034988,
11
- "eval_rewards/rejected": -1.8736913204193115,
12
- "eval_runtime": 60.3848,
13
- "eval_samples": 1345,
14
- "eval_samples_per_second": 22.274,
15
- "eval_semantic_entropy": 0.6458982825279236,
16
- "eval_steps_per_second": 5.581,
17
  "total_flos": 0.0,
18
- "train_loss": 0.6878796736399333,
19
- "train_runtime": 138.6013,
20
- "train_samples_per_second": 6.926,
21
- "train_steps_per_second": 0.433
22
  }
 
1
  {
2
  "epoch": 32.0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  "total_flos": 0.0,
4
+ "train_loss": 0.6880494674046834,
5
+ "train_runtime": 138.3641,
6
+ "train_samples_per_second": 6.938,
7
+ "train_steps_per_second": 0.434
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 32.0,
3
  "total_flos": 0.0,
4
- "train_loss": 0.6878796736399333,
5
- "train_runtime": 138.6013,
6
- "train_samples_per_second": 6.926,
7
- "train_steps_per_second": 0.433
8
  }
 
1
  {
2
  "epoch": 32.0,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.6880494674046834,
5
+ "train_runtime": 138.3641,
6
+ "train_samples_per_second": 6.938,
7
+ "train_steps_per_second": 0.434
8
  }
trainer_state.json CHANGED
@@ -10,204 +10,204 @@
10
  "log_history": [
11
  {
12
  "epoch": 2.6666666666666665,
13
- "grad_norm": 7.84375,
14
  "learning_rate": 6.666666666666667e-08,
15
- "logits/chosen": -0.6204631328582764,
16
- "logits/rejected": -0.6167532205581665,
17
- "logps/chosen": -1.6595089435577393,
18
- "logps/rejected": -1.862159013748169,
19
- "loss": 0.7095,
20
  "rewards/accuracies": 0.4749999940395355,
21
- "rewards/chosen": -1.6595089435577393,
22
- "rewards/margins": 0.20264975726604462,
23
- "rewards/rejected": -1.862159013748169,
24
- "semantic_entropy": 0.6520633697509766,
25
  "step": 5
26
  },
27
  {
28
  "epoch": 5.333333333333333,
29
- "grad_norm": 6.6875,
30
  "learning_rate": 7.892179482319296e-08,
31
- "logits/chosen": -0.6442058682441711,
32
- "logits/rejected": -0.5976763367652893,
33
- "logps/chosen": -1.6531527042388916,
34
- "logps/rejected": -2.0306272506713867,
35
- "loss": 0.6574,
36
  "rewards/accuracies": 0.5249999761581421,
37
- "rewards/chosen": -1.6531527042388916,
38
- "rewards/margins": 0.37747469544410706,
39
- "rewards/rejected": -2.0306272506713867,
40
- "semantic_entropy": 0.6555213928222656,
41
  "step": 10
42
  },
43
  {
44
  "epoch": 8.0,
45
- "grad_norm": 8.5,
46
  "learning_rate": 7.464101615137755e-08,
47
- "logits/chosen": -0.6110937595367432,
48
- "logits/rejected": -0.6190561056137085,
49
- "logps/chosen": -1.653534173965454,
50
- "logps/rejected": -1.8783153295516968,
51
- "loss": 0.6976,
52
  "rewards/accuracies": 0.5,
53
- "rewards/chosen": -1.653534173965454,
54
- "rewards/margins": 0.22478139400482178,
55
- "rewards/rejected": -1.8783153295516968,
56
- "semantic_entropy": 0.652945876121521,
57
  "step": 15
58
  },
59
  {
60
  "epoch": 10.666666666666666,
61
  "grad_norm": 8.625,
62
  "learning_rate": 6.744966551474935e-08,
63
- "logits/chosen": -0.6272696852684021,
64
- "logits/rejected": -0.6002117395401001,
65
- "logps/chosen": -1.6610969305038452,
66
- "logps/rejected": -1.9501771926879883,
67
- "loss": 0.6824,
68
  "rewards/accuracies": 0.512499988079071,
69
- "rewards/chosen": -1.6610969305038452,
70
- "rewards/margins": 0.28908008337020874,
71
- "rewards/rejected": -1.9501771926879883,
72
- "semantic_entropy": 0.6478284597396851,
73
  "step": 20
74
  },
75
  {
76
  "epoch": 13.333333333333334,
77
  "grad_norm": 8.875,
78
  "learning_rate": 5.7951967208018495e-08,
79
- "logits/chosen": -0.6546803712844849,
80
- "logits/rejected": -0.6259561777114868,
81
- "logps/chosen": -1.686605453491211,
82
- "logps/rejected": -1.9335895776748657,
83
- "loss": 0.7138,
84
  "rewards/accuracies": 0.4749999940395355,
85
- "rewards/chosen": -1.686605453491211,
86
- "rewards/margins": 0.24698403477668762,
87
- "rewards/rejected": -1.9335895776748657,
88
- "semantic_entropy": 0.6518786549568176,
89
  "step": 25
90
  },
91
  {
92
  "epoch": 16.0,
93
- "grad_norm": 6.71875,
94
  "learning_rate": 4.6945927106677224e-08,
95
- "logits/chosen": -0.5952478647232056,
96
- "logits/rejected": -0.60741126537323,
97
- "logps/chosen": -1.6205002069473267,
98
- "logps/rejected": -1.8895899057388306,
99
- "loss": 0.6683,
100
  "rewards/accuracies": 0.512499988079071,
101
- "rewards/chosen": -1.6205002069473267,
102
- "rewards/margins": 0.26908960938453674,
103
- "rewards/rejected": -1.8895899057388306,
104
- "semantic_entropy": 0.6604139804840088,
105
  "step": 30
106
  },
107
  {
108
  "epoch": 18.666666666666668,
109
- "grad_norm": 9.125,
110
  "learning_rate": 3.535628343499079e-08,
111
- "logits/chosen": -0.62486732006073,
112
- "logits/rejected": -0.625863254070282,
113
- "logps/chosen": -1.7008514404296875,
114
- "logps/rejected": -1.9429662227630615,
115
- "loss": 0.7077,
116
  "rewards/accuracies": 0.48750001192092896,
117
- "rewards/chosen": -1.7008514404296875,
118
- "rewards/margins": 0.24211446940898895,
119
- "rewards/rejected": -1.9429662227630615,
120
- "semantic_entropy": 0.640709638595581,
121
  "step": 35
122
  },
123
  {
124
  "epoch": 21.333333333333332,
125
- "grad_norm": 8.25,
126
  "learning_rate": 2.4156809358433726e-08,
127
- "logits/chosen": -0.615136444568634,
128
- "logits/rejected": -0.5828085541725159,
129
- "logps/chosen": -1.583477258682251,
130
- "logps/rejected": -1.8761383295059204,
131
  "loss": 0.6656,
132
  "rewards/accuracies": 0.5249999761581421,
133
- "rewards/chosen": -1.583477258682251,
134
- "rewards/margins": 0.29266101121902466,
135
- "rewards/rejected": -1.8761383295059204,
136
- "semantic_entropy": 0.6757807731628418,
137
  "step": 40
138
  },
139
  {
140
  "epoch": 24.0,
141
  "grad_norm": 8.75,
142
  "learning_rate": 1.4288495612538426e-08,
143
- "logits/chosen": -0.6335859894752502,
144
- "logits/rejected": -0.6234641671180725,
145
- "logps/chosen": -1.683307409286499,
146
- "logps/rejected": -1.9576094150543213,
147
- "loss": 0.6899,
148
  "rewards/accuracies": 0.48750001192092896,
149
- "rewards/chosen": -1.683307409286499,
150
- "rewards/margins": 0.27430176734924316,
151
- "rewards/rejected": -1.9576094150543213,
152
- "semantic_entropy": 0.6437331438064575,
153
  "step": 45
154
  },
155
  {
156
  "epoch": 26.666666666666668,
157
- "grad_norm": 8.125,
158
  "learning_rate": 6.58048754348255e-09,
159
- "logits/chosen": -0.6407667398452759,
160
- "logits/rejected": -0.6134266257286072,
161
- "logps/chosen": -1.6093534231185913,
162
- "logps/rejected": -1.8319820165634155,
163
- "loss": 0.6942,
164
  "rewards/accuracies": 0.4749999940395355,
165
- "rewards/chosen": -1.6093534231185913,
166
- "rewards/margins": 0.22262856364250183,
167
- "rewards/rejected": -1.8319820165634155,
168
- "semantic_entropy": 0.6697005033493042,
169
  "step": 50
170
  },
171
  {
172
  "epoch": 29.333333333333332,
173
- "grad_norm": 7.9375,
174
  "learning_rate": 1.680419507380444e-09,
175
- "logits/chosen": -0.6017393469810486,
176
- "logits/rejected": -0.5887783765792847,
177
- "logps/chosen": -1.6890275478363037,
178
- "logps/rejected": -2.0059025287628174,
179
- "loss": 0.6753,
180
  "rewards/accuracies": 0.550000011920929,
181
- "rewards/chosen": -1.6890275478363037,
182
- "rewards/margins": 0.31687501072883606,
183
- "rewards/rejected": -2.0059025287628174,
184
- "semantic_entropy": 0.6429846882820129,
185
  "step": 55
186
  },
187
  {
188
  "epoch": 32.0,
189
  "grad_norm": 8.4375,
190
  "learning_rate": 0.0,
191
- "logits/chosen": -0.6315954327583313,
192
- "logits/rejected": -0.6313327550888062,
193
- "logps/chosen": -1.6676785945892334,
194
- "logps/rejected": -1.9383472204208374,
195
- "loss": 0.6928,
196
  "rewards/accuracies": 0.4749999940395355,
197
- "rewards/chosen": -1.6676785945892334,
198
- "rewards/margins": 0.2706685960292816,
199
- "rewards/rejected": -1.9383472204208374,
200
- "semantic_entropy": 0.6480283141136169,
201
  "step": 60
202
  },
203
  {
204
  "epoch": 32.0,
205
  "step": 60,
206
  "total_flos": 0.0,
207
- "train_loss": 0.6878796736399333,
208
- "train_runtime": 138.6013,
209
- "train_samples_per_second": 6.926,
210
- "train_steps_per_second": 0.433
211
  }
212
  ],
213
  "logging_steps": 5,
 
10
  "log_history": [
11
  {
12
  "epoch": 2.6666666666666665,
13
+ "grad_norm": 7.78125,
14
  "learning_rate": 6.666666666666667e-08,
15
+ "logits/chosen": -0.6206714510917664,
16
+ "logits/rejected": -0.6167551279067993,
17
+ "logps/chosen": -1.6594607830047607,
18
+ "logps/rejected": -1.8626664876937866,
19
+ "loss": 0.7092,
20
  "rewards/accuracies": 0.4749999940395355,
21
+ "rewards/chosen": -1.6594607830047607,
22
+ "rewards/margins": 0.2032059133052826,
23
+ "rewards/rejected": -1.8626664876937866,
24
+ "semantic_entropy": 0.6521000862121582,
25
  "step": 5
26
  },
27
  {
28
  "epoch": 5.333333333333333,
29
+ "grad_norm": 6.78125,
30
  "learning_rate": 7.892179482319296e-08,
31
+ "logits/chosen": -0.644172191619873,
32
+ "logits/rejected": -0.5970994234085083,
33
+ "logps/chosen": -1.6529489755630493,
34
+ "logps/rejected": -2.02937912940979,
35
+ "loss": 0.6576,
36
  "rewards/accuracies": 0.5249999761581421,
37
+ "rewards/chosen": -1.6529489755630493,
38
+ "rewards/margins": 0.3764302134513855,
39
+ "rewards/rejected": -2.02937912940979,
40
+ "semantic_entropy": 0.6556634902954102,
41
  "step": 10
42
  },
43
  {
44
  "epoch": 8.0,
45
+ "grad_norm": 8.6875,
46
  "learning_rate": 7.464101615137755e-08,
47
+ "logits/chosen": -0.6107379794120789,
48
+ "logits/rejected": -0.6173809766769409,
49
+ "logps/chosen": -1.6537139415740967,
50
+ "logps/rejected": -1.878178596496582,
51
+ "loss": 0.6978,
52
  "rewards/accuracies": 0.5,
53
+ "rewards/chosen": -1.6537139415740967,
54
+ "rewards/margins": 0.22446465492248535,
55
+ "rewards/rejected": -1.878178596496582,
56
+ "semantic_entropy": 0.6528152227401733,
57
  "step": 15
58
  },
59
  {
60
  "epoch": 10.666666666666666,
61
  "grad_norm": 8.625,
62
  "learning_rate": 6.744966551474935e-08,
63
+ "logits/chosen": -0.6249920129776001,
64
+ "logits/rejected": -0.5978578925132751,
65
+ "logps/chosen": -1.661948561668396,
66
+ "logps/rejected": -1.9520155191421509,
67
+ "loss": 0.6819,
68
  "rewards/accuracies": 0.512499988079071,
69
+ "rewards/chosen": -1.661948561668396,
70
+ "rewards/margins": 0.2900669574737549,
71
+ "rewards/rejected": -1.9520155191421509,
72
+ "semantic_entropy": 0.6477808952331543,
73
  "step": 20
74
  },
75
  {
76
  "epoch": 13.333333333333334,
77
  "grad_norm": 8.875,
78
  "learning_rate": 5.7951967208018495e-08,
79
+ "logits/chosen": -0.6556390523910522,
80
+ "logits/rejected": -0.6272687911987305,
81
+ "logps/chosen": -1.6880241632461548,
82
+ "logps/rejected": -1.9340057373046875,
83
+ "loss": 0.7142,
84
  "rewards/accuracies": 0.4749999940395355,
85
+ "rewards/chosen": -1.6880241632461548,
86
+ "rewards/margins": 0.24598172307014465,
87
+ "rewards/rejected": -1.9340057373046875,
88
+ "semantic_entropy": 0.6515553593635559,
89
  "step": 25
90
  },
91
  {
92
  "epoch": 16.0,
93
+ "grad_norm": 6.875,
94
  "learning_rate": 4.6945927106677224e-08,
95
+ "logits/chosen": -0.5940297842025757,
96
+ "logits/rejected": -0.6073416471481323,
97
+ "logps/chosen": -1.6197277307510376,
98
+ "logps/rejected": -1.888943076133728,
99
+ "loss": 0.6684,
100
  "rewards/accuracies": 0.512499988079071,
101
+ "rewards/chosen": -1.6197277307510376,
102
+ "rewards/margins": 0.26921549439430237,
103
+ "rewards/rejected": -1.888943076133728,
104
+ "semantic_entropy": 0.6606020927429199,
105
  "step": 30
106
  },
107
  {
108
  "epoch": 18.666666666666668,
109
+ "grad_norm": 9.1875,
110
  "learning_rate": 3.535628343499079e-08,
111
+ "logits/chosen": -0.6252874732017517,
112
+ "logits/rejected": -0.6270566582679749,
113
+ "logps/chosen": -1.6999114751815796,
114
+ "logps/rejected": -1.9409929513931274,
115
+ "loss": 0.7074,
116
  "rewards/accuracies": 0.48750001192092896,
117
+ "rewards/chosen": -1.6999114751815796,
118
+ "rewards/margins": 0.24108140170574188,
119
+ "rewards/rejected": -1.9409929513931274,
120
+ "semantic_entropy": 0.6408571004867554,
121
  "step": 35
122
  },
123
  {
124
  "epoch": 21.333333333333332,
125
+ "grad_norm": 8.125,
126
  "learning_rate": 2.4156809358433726e-08,
127
+ "logits/chosen": -0.6158267259597778,
128
+ "logits/rejected": -0.5839654803276062,
129
+ "logps/chosen": -1.583505392074585,
130
+ "logps/rejected": -1.8752552270889282,
131
  "loss": 0.6656,
132
  "rewards/accuracies": 0.5249999761581421,
133
+ "rewards/chosen": -1.583505392074585,
134
+ "rewards/margins": 0.29174983501434326,
135
+ "rewards/rejected": -1.8752552270889282,
136
+ "semantic_entropy": 0.6757909059524536,
137
  "step": 40
138
  },
139
  {
140
  "epoch": 24.0,
141
  "grad_norm": 8.75,
142
  "learning_rate": 1.4288495612538426e-08,
143
+ "logits/chosen": -0.6340750455856323,
144
+ "logits/rejected": -0.6243816018104553,
145
+ "logps/chosen": -1.68179452419281,
146
+ "logps/rejected": -1.9539234638214111,
147
+ "loss": 0.6902,
148
  "rewards/accuracies": 0.48750001192092896,
149
+ "rewards/chosen": -1.68179452419281,
150
+ "rewards/margins": 0.2721291184425354,
151
+ "rewards/rejected": -1.9539234638214111,
152
+ "semantic_entropy": 0.6442986130714417,
153
  "step": 45
154
  },
155
  {
156
  "epoch": 26.666666666666668,
157
+ "grad_norm": 8.1875,
158
  "learning_rate": 6.58048754348255e-09,
159
+ "logits/chosen": -0.6418130397796631,
160
+ "logits/rejected": -0.6143754720687866,
161
+ "logps/chosen": -1.6074516773223877,
162
+ "logps/rejected": -1.830583930015564,
163
+ "loss": 0.694,
164
  "rewards/accuracies": 0.4749999940395355,
165
+ "rewards/chosen": -1.6074516773223877,
166
+ "rewards/margins": 0.22313210368156433,
167
+ "rewards/rejected": -1.830583930015564,
168
+ "semantic_entropy": 0.6700640916824341,
169
  "step": 50
170
  },
171
  {
172
  "epoch": 29.333333333333332,
173
+ "grad_norm": 8.0625,
174
  "learning_rate": 1.680419507380444e-09,
175
+ "logits/chosen": -0.6013139486312866,
176
+ "logits/rejected": -0.5867229700088501,
177
+ "logps/chosen": -1.689805030822754,
178
+ "logps/rejected": -2.002596616744995,
179
+ "loss": 0.6768,
180
  "rewards/accuracies": 0.550000011920929,
181
+ "rewards/chosen": -1.689805030822754,
182
+ "rewards/margins": 0.3127916753292084,
183
+ "rewards/rejected": -2.002596616744995,
184
+ "semantic_entropy": 0.6430121064186096,
185
  "step": 55
186
  },
187
  {
188
  "epoch": 32.0,
189
  "grad_norm": 8.4375,
190
  "learning_rate": 0.0,
191
+ "logits/chosen": -0.6301103830337524,
192
+ "logits/rejected": -0.6289348006248474,
193
+ "logps/chosen": -1.665967345237732,
194
+ "logps/rejected": -1.932885766029358,
195
+ "loss": 0.6937,
196
  "rewards/accuracies": 0.4749999940395355,
197
+ "rewards/chosen": -1.665967345237732,
198
+ "rewards/margins": 0.2669183611869812,
199
+ "rewards/rejected": -1.932885766029358,
200
+ "semantic_entropy": 0.6483110785484314,
201
  "step": 60
202
  },
203
  {
204
  "epoch": 32.0,
205
  "step": 60,
206
  "total_flos": 0.0,
207
+ "train_loss": 0.6880494674046834,
208
+ "train_runtime": 138.3641,
209
+ "train_samples_per_second": 6.938,
210
+ "train_steps_per_second": 0.434
211
  }
212
  ],
213
  "logging_steps": 5,