yakazimir commited on
Commit
cc36d85
·
verified ·
1 Parent(s): 6d2308a

Model save

Browse files
Files changed (4) hide show
  1. README.md +2 -13
  2. all_results.json +4 -17
  3. train_results.json +4 -4
  4. trainer_state.json +43 -39
README.md CHANGED
@@ -3,31 +3,20 @@ library_name: transformers
3
  license: other
4
  base_model: trl-lib/qwen1.5-0.5b-sft
5
  tags:
6
- - alignment-handbook
7
  - trl
8
  - simpo
9
  - generated_from_trainer
10
  model-index:
11
- - name: trl-lib/qwen1.5-0.5b-sft
12
  results: []
13
  ---
14
 
15
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
16
  should probably proofread and complete it, then remove this comment. -->
17
 
18
- # trl-lib/qwen1.5-0.5b-sft
19
 
20
  This model is a fine-tuned version of [trl-lib/qwen1.5-0.5b-sft](https://huggingface.co/trl-lib/qwen1.5-0.5b-sft) on an unknown dataset.
21
- It achieves the following results on the evaluation set:
22
- - Loss: 5.8364
23
- - Rewards/chosen: -16.6952
24
- - Rewards/rejected: -17.6561
25
- - Rewards/accuracies: 0.5254
26
- - Rewards/margins: 0.9610
27
- - Logps/rejected: -1.7656
28
- - Logps/chosen: -1.6695
29
- - Logits/rejected: -0.3813
30
- - Logits/chosen: -0.4435
31
 
32
  ## Model description
33
 
 
3
  license: other
4
  base_model: trl-lib/qwen1.5-0.5b-sft
5
  tags:
 
6
  - trl
7
  - simpo
8
  - generated_from_trainer
9
  model-index:
10
+ - name: simpo-exps_qwen05b
11
  results: []
12
  ---
13
 
14
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
  should probably proofread and complete it, then remove this comment. -->
16
 
17
+ # simpo-exps_qwen05b
18
 
19
  This model is a fine-tuned version of [trl-lib/qwen1.5-0.5b-sft](https://huggingface.co/trl-lib/qwen1.5-0.5b-sft) on an unknown dataset.
 
 
 
 
 
 
 
 
 
 
20
 
21
  ## Model description
22
 
all_results.json CHANGED
@@ -1,21 +1,8 @@
1
  {
2
  "epoch": 0.004531679270399638,
3
- "eval_logits/chosen": -0.44351547956466675,
4
- "eval_logits/rejected": -0.3813219964504242,
5
- "eval_logps/chosen": -1.669515609741211,
6
- "eval_logps/rejected": -1.7656141519546509,
7
- "eval_loss": 5.836363315582275,
8
- "eval_rewards/accuracies": 0.5254348516464233,
9
- "eval_rewards/chosen": -16.69515609741211,
10
- "eval_rewards/margins": 0.9609846472740173,
11
- "eval_rewards/rejected": -17.65614128112793,
12
- "eval_runtime": 292.82,
13
- "eval_samples": 5595,
14
- "eval_samples_per_second": 19.107,
15
- "eval_steps_per_second": 4.778,
16
  "total_flos": 0.0,
17
- "train_loss": 104.63558349609374,
18
- "train_runtime": 57.4491,
19
- "train_samples_per_second": 5.57,
20
- "train_steps_per_second": 0.348
21
  }
 
1
  {
2
  "epoch": 0.004531679270399638,
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  "total_flos": 0.0,
4
+ "train_loss": 104.54919738769532,
5
+ "train_runtime": 57.8361,
6
+ "train_samples_per_second": 5.533,
7
+ "train_steps_per_second": 0.346
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0.004531679270399638,
3
  "total_flos": 0.0,
4
- "train_loss": 104.63558349609374,
5
- "train_runtime": 57.4491,
6
- "train_samples_per_second": 5.57,
7
- "train_steps_per_second": 0.348
8
  }
 
1
  {
2
  "epoch": 0.004531679270399638,
3
  "total_flos": 0.0,
4
+ "train_loss": 104.54919738769532,
5
+ "train_runtime": 57.8361,
6
+ "train_samples_per_second": 5.533,
7
+ "train_steps_per_second": 0.346
8
  }
trainer_state.json CHANGED
@@ -10,72 +10,76 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.0011329198175999095,
13
- "grad_norm": 1640.0,
14
  "learning_rate": 7.464101615137755e-08,
15
- "logits/chosen": -0.7980253100395203,
16
- "logits/rejected": -0.6524801850318909,
17
- "logps/chosen": -1.7504403591156006,
18
- "logps/rejected": -1.832793951034546,
19
- "loss": 106.451,
20
  "rewards/accuracies": 0.48750001192092896,
21
- "rewards/chosen": -17.504404067993164,
22
- "rewards/margins": 0.8235357999801636,
23
- "rewards/rejected": -18.327938079833984,
 
24
  "step": 5
25
  },
26
  {
27
  "epoch": 0.002265839635199819,
28
- "grad_norm": 1176.0,
29
  "learning_rate": 4.6945927106677224e-08,
30
- "logits/chosen": -0.8232762217521667,
31
- "logits/rejected": -0.795711874961853,
32
- "logps/chosen": -1.730991005897522,
33
- "logps/rejected": -1.7889397144317627,
34
- "loss": 103.937,
35
  "rewards/accuracies": 0.5,
36
- "rewards/chosen": -17.30990982055664,
37
- "rewards/margins": 0.5794881582260132,
38
- "rewards/rejected": -17.8893985748291,
 
39
  "step": 10
40
  },
41
  {
42
  "epoch": 0.003398759452799728,
43
- "grad_norm": 2336.0,
44
  "learning_rate": 1.4288495612538426e-08,
45
- "logits/chosen": -0.7294623851776123,
46
- "logits/rejected": -0.7721316814422607,
47
- "logps/chosen": -1.7250877618789673,
48
- "logps/rejected": -1.7949330806732178,
49
- "loss": 98.7826,
50
  "rewards/accuracies": 0.5,
51
- "rewards/chosen": -17.250879287719727,
52
- "rewards/margins": 0.6984527707099915,
53
- "rewards/rejected": -17.949331283569336,
 
54
  "step": 15
55
  },
56
  {
57
  "epoch": 0.004531679270399638,
58
  "grad_norm": 1768.0,
59
  "learning_rate": 0.0,
60
- "logits/chosen": -0.724795937538147,
61
- "logits/rejected": -0.6882869601249695,
62
- "logps/chosen": -1.827897310256958,
63
- "logps/rejected": -1.8333686590194702,
64
- "loss": 109.3717,
65
  "rewards/accuracies": 0.5249999761581421,
66
- "rewards/chosen": -18.278972625732422,
67
- "rewards/margins": 0.054715633392333984,
68
- "rewards/rejected": -18.333688735961914,
 
69
  "step": 20
70
  },
71
  {
72
  "epoch": 0.004531679270399638,
73
  "step": 20,
74
  "total_flos": 0.0,
75
- "train_loss": 104.63558349609374,
76
- "train_runtime": 57.4491,
77
- "train_samples_per_second": 5.57,
78
- "train_steps_per_second": 0.348
79
  }
80
  ],
81
  "logging_steps": 5,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.0011329198175999095,
13
+ "grad_norm": 1648.0,
14
  "learning_rate": 7.464101615137755e-08,
15
+ "logits/chosen": -0.7977954149246216,
16
+ "logits/rejected": -0.6530444622039795,
17
+ "logps/chosen": -1.7494233846664429,
18
+ "logps/rejected": -1.8329025506973267,
19
+ "loss": 106.2595,
20
  "rewards/accuracies": 0.48750001192092896,
21
+ "rewards/chosen": -17.49423599243164,
22
+ "rewards/margins": 0.834790825843811,
23
+ "rewards/rejected": -18.329025268554688,
24
+ "semantic_entropy": 0.6561636328697205,
25
  "step": 5
26
  },
27
  {
28
  "epoch": 0.002265839635199819,
29
+ "grad_norm": 1192.0,
30
  "learning_rate": 4.6945927106677224e-08,
31
+ "logits/chosen": -0.8223685026168823,
32
+ "logits/rejected": -0.7962485551834106,
33
+ "logps/chosen": -1.7305948734283447,
34
+ "logps/rejected": -1.790734052658081,
35
+ "loss": 103.7675,
36
  "rewards/accuracies": 0.5,
37
+ "rewards/chosen": -17.305950164794922,
38
+ "rewards/margins": 0.6013931035995483,
39
+ "rewards/rejected": -17.90734100341797,
40
+ "semantic_entropy": 0.6715449094772339,
41
  "step": 10
42
  },
43
  {
44
  "epoch": 0.003398759452799728,
45
+ "grad_norm": 2352.0,
46
  "learning_rate": 1.4288495612538426e-08,
47
+ "logits/chosen": -0.7286165952682495,
48
+ "logits/rejected": -0.7718230485916138,
49
+ "logps/chosen": -1.7242145538330078,
50
+ "logps/rejected": -1.7961229085922241,
51
+ "loss": 98.6901,
52
  "rewards/accuracies": 0.5,
53
+ "rewards/chosen": -17.242145538330078,
54
+ "rewards/margins": 0.7190819382667542,
55
+ "rewards/rejected": -17.96122932434082,
56
+ "semantic_entropy": 0.6638566851615906,
57
  "step": 15
58
  },
59
  {
60
  "epoch": 0.004531679270399638,
61
  "grad_norm": 1768.0,
62
  "learning_rate": 0.0,
63
+ "logits/chosen": -0.7242414355278015,
64
+ "logits/rejected": -0.6874409914016724,
65
+ "logps/chosen": -1.828495979309082,
66
+ "logps/rejected": -1.8325235843658447,
67
+ "loss": 109.4797,
68
  "rewards/accuracies": 0.5249999761581421,
69
+ "rewards/chosen": -18.284961700439453,
70
+ "rewards/margins": 0.040274858474731445,
71
+ "rewards/rejected": -18.32523536682129,
72
+ "semantic_entropy": 0.6504115462303162,
73
  "step": 20
74
  },
75
  {
76
  "epoch": 0.004531679270399638,
77
  "step": 20,
78
  "total_flos": 0.0,
79
+ "train_loss": 104.54919738769532,
80
+ "train_runtime": 57.8361,
81
+ "train_samples_per_second": 5.533,
82
+ "train_steps_per_second": 0.346
83
  }
84
  ],
85
  "logging_steps": 5,