yakazimir commited on
Commit
76625da
·
verified ·
1 Parent(s): 5188e2d

Model save

Browse files
README.md CHANGED
@@ -3,31 +3,20 @@ library_name: transformers
3
  license: other
4
  base_model: trl-lib/qwen1.5-0.5b-sft
5
  tags:
6
- - alignment-handbook
7
  - trl
8
  - simpo
9
  - generated_from_trainer
10
  model-index:
11
- - name: trl-lib/qwen1.5-0.5b-sft
12
  results: []
13
  ---
14
 
15
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
16
  should probably proofread and complete it, then remove this comment. -->
17
 
18
- # trl-lib/qwen1.5-0.5b-sft
19
 
20
  This model is a fine-tuned version of [trl-lib/qwen1.5-0.5b-sft](https://huggingface.co/trl-lib/qwen1.5-0.5b-sft) on an unknown dataset.
21
- It achieves the following results on the evaluation set:
22
- - Loss: 5.8350
23
- - Rewards/chosen: -16.6895
24
- - Rewards/rejected: -17.6490
25
- - Rewards/accuracies: 0.5245
26
- - Rewards/margins: 0.9596
27
- - Logps/rejected: -1.7649
28
- - Logps/chosen: -1.6689
29
- - Logits/rejected: -0.3783
30
- - Logits/chosen: -0.4409
31
 
32
  ## Model description
33
 
@@ -47,12 +36,12 @@ More information needed
47
 
48
  The following hyperparameters were used during training:
49
  - learning_rate: 8e-08
50
- - train_batch_size: 2
51
  - eval_batch_size: 4
52
  - seed: 42
53
  - gradient_accumulation_steps: 16
54
- - total_train_batch_size: 32
55
- - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
56
  - lr_scheduler_type: cosine
57
  - lr_scheduler_warmup_ratio: 0.1
58
  - training_steps: 20
@@ -63,7 +52,7 @@ The following hyperparameters were used during training:
63
 
64
  ### Framework versions
65
 
66
- - Transformers 4.48.3
67
  - Pytorch 2.4.0+cu121
68
- - Datasets 2.18.0
69
- - Tokenizers 0.21.0
 
3
  license: other
4
  base_model: trl-lib/qwen1.5-0.5b-sft
5
  tags:
 
6
  - trl
7
  - simpo
8
  - generated_from_trainer
9
  model-index:
10
+ - name: simpo-exps_qwen05b
11
  results: []
12
  ---
13
 
14
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
  should probably proofread and complete it, then remove this comment. -->
16
 
17
+ # simpo-exps_qwen05b
18
 
19
  This model is a fine-tuned version of [trl-lib/qwen1.5-0.5b-sft](https://huggingface.co/trl-lib/qwen1.5-0.5b-sft) on an unknown dataset.
 
 
 
 
 
 
 
 
 
 
20
 
21
  ## Model description
22
 
 
36
 
37
  The following hyperparameters were used during training:
38
  - learning_rate: 8e-08
39
+ - train_batch_size: 1
40
  - eval_batch_size: 4
41
  - seed: 42
42
  - gradient_accumulation_steps: 16
43
+ - total_train_batch_size: 16
44
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: cosine
46
  - lr_scheduler_warmup_ratio: 0.1
47
  - training_steps: 20
 
52
 
53
  ### Framework versions
54
 
55
+ - Transformers 4.44.2
56
  - Pytorch 2.4.0+cu121
57
+ - Datasets 3.0.1
58
+ - Tokenizers 0.19.1
all_results.json CHANGED
@@ -1,21 +1,8 @@
1
  {
2
- "epoch": 0.64,
3
- "eval_logits/chosen": -0.4409240782260895,
4
- "eval_logits/rejected": -0.3783179819583893,
5
- "eval_logps/chosen": -1.6689454317092896,
6
- "eval_logps/rejected": -1.7649023532867432,
7
- "eval_loss": 5.835046768188477,
8
- "eval_rewards/accuracies": 0.5245413780212402,
9
- "eval_rewards/chosen": -16.689455032348633,
10
- "eval_rewards/margins": 0.9595676064491272,
11
- "eval_rewards/rejected": -17.649023056030273,
12
- "eval_runtime": 294.9751,
13
- "eval_samples": 5595,
14
- "eval_samples_per_second": 18.968,
15
- "eval_steps_per_second": 4.743,
16
  "total_flos": 0.0,
17
- "train_loss": 98.40630645751953,
18
- "train_runtime": 107.1933,
19
- "train_samples_per_second": 5.971,
20
- "train_steps_per_second": 0.187
21
  }
 
1
  {
2
+ "epoch": 0.005352065562803144,
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  "total_flos": 0.0,
4
+ "train_loss": 0.7357763648033142,
5
+ "train_runtime": 51.0237,
6
+ "train_samples_per_second": 6.272,
7
+ "train_steps_per_second": 0.392
8
  }
generation_config.json CHANGED
@@ -3,5 +3,5 @@
3
  "eos_token_id": 151645,
4
  "max_new_tokens": 2048,
5
  "pad_token_id": 151645,
6
- "transformers_version": "4.48.3"
7
  }
 
3
  "eos_token_id": 151645,
4
  "max_new_tokens": 2048,
5
  "pad_token_id": 151645,
6
+ "transformers_version": "4.44.2"
7
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 0.64,
3
  "total_flos": 0.0,
4
- "train_loss": 98.40630645751953,
5
- "train_runtime": 107.1933,
6
- "train_samples_per_second": 5.971,
7
- "train_steps_per_second": 0.187
8
  }
 
1
  {
2
+ "epoch": 0.005352065562803144,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.7357763648033142,
5
+ "train_runtime": 51.0237,
6
+ "train_samples_per_second": 6.272,
7
+ "train_steps_per_second": 0.392
8
  }
trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.64,
5
  "eval_steps": 400,
6
  "global_step": 20,
7
  "is_hyper_param_search": false,
@@ -9,73 +9,77 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.16,
13
- "grad_norm": 3744.0,
14
  "learning_rate": 7.464101615137755e-08,
15
- "logits/chosen": -0.5232290029525757,
16
- "logits/rejected": -0.4501457214355469,
17
- "logps/chosen": -1.740447998046875,
18
- "logps/rejected": -1.7914221286773682,
19
- "loss": 105.9691,
20
- "rewards/accuracies": 0.45625001192092896,
21
- "rewards/chosen": -17.404481887817383,
22
- "rewards/margins": 0.5097407698631287,
23
- "rewards/rejected": -17.91421890258789,
 
24
  "step": 5
25
  },
26
  {
27
- "epoch": 0.32,
28
- "grad_norm": 1224.0,
29
  "learning_rate": 4.6945927106677224e-08,
30
- "logits/chosen": -0.4762292802333832,
31
- "logits/rejected": -0.37752610445022583,
32
- "logps/chosen": -1.6145172119140625,
33
- "logps/rejected": -1.8529552221298218,
34
- "loss": 87.966,
35
- "rewards/accuracies": 0.543749988079071,
36
- "rewards/chosen": -16.145172119140625,
37
- "rewards/margins": 2.384378671646118,
38
- "rewards/rejected": -18.529550552368164,
 
39
  "step": 10
40
  },
41
  {
42
- "epoch": 0.48,
43
- "grad_norm": 1464.0,
44
  "learning_rate": 1.4288495612538426e-08,
45
- "logits/chosen": -0.5665463209152222,
46
- "logits/rejected": -0.5252197980880737,
47
- "logps/chosen": -1.716834306716919,
48
- "logps/rejected": -1.7479000091552734,
49
- "loss": 98.5938,
50
- "rewards/accuracies": 0.512499988079071,
51
- "rewards/chosen": -17.16834259033203,
52
- "rewards/margins": 0.310658723115921,
53
- "rewards/rejected": -17.479000091552734,
 
54
  "step": 15
55
  },
56
  {
57
- "epoch": 0.64,
58
- "grad_norm": 4800.0,
59
  "learning_rate": 0.0,
60
- "logits/chosen": -0.5845416784286499,
61
- "logits/rejected": -0.5405458211898804,
62
- "logps/chosen": -1.8133652210235596,
63
- "logps/rejected": -1.8326694965362549,
64
- "loss": 101.0963,
65
- "rewards/accuracies": 0.5562499761581421,
66
- "rewards/chosen": -18.133655548095703,
67
- "rewards/margins": 0.19304139912128448,
68
- "rewards/rejected": -18.326696395874023,
 
69
  "step": 20
70
  },
71
  {
72
- "epoch": 0.64,
73
  "step": 20,
74
  "total_flos": 0.0,
75
- "train_loss": 98.40630645751953,
76
- "train_runtime": 107.1933,
77
- "train_samples_per_second": 5.971,
78
- "train_steps_per_second": 0.187
79
  }
80
  ],
81
  "logging_steps": 5,
@@ -96,7 +100,7 @@
96
  }
97
  },
98
  "total_flos": 0.0,
99
- "train_batch_size": 2,
100
  "trial_name": null,
101
  "trial_params": null
102
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.005352065562803144,
5
  "eval_steps": 400,
6
  "global_step": 20,
7
  "is_hyper_param_search": false,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.001338016390700786,
13
+ "grad_norm": 6.0,
14
  "learning_rate": 7.464101615137755e-08,
15
+ "logits/chosen": -0.8066712617874146,
16
+ "logits/rejected": -0.6942030787467957,
17
+ "logps/chosen": -1.797594428062439,
18
+ "logps/rejected": -1.9124763011932373,
19
+ "loss": 0.6903,
20
+ "rewards/accuracies": 0.512499988079071,
21
+ "rewards/chosen": -1.797594428062439,
22
+ "rewards/margins": 0.11488206684589386,
23
+ "rewards/rejected": -1.9124763011932373,
24
+ "semantic_entropy": 0.6421751379966736,
25
  "step": 5
26
  },
27
  {
28
+ "epoch": 0.002676032781401572,
29
+ "grad_norm": 6.53125,
30
  "learning_rate": 4.6945927106677224e-08,
31
+ "logits/chosen": -0.6600304841995239,
32
+ "logits/rejected": -0.5782949328422546,
33
+ "logps/chosen": -1.6283073425292969,
34
+ "logps/rejected": -1.860073447227478,
35
+ "loss": 0.6998,
36
+ "rewards/accuracies": 0.612500011920929,
37
+ "rewards/chosen": -1.6283073425292969,
38
+ "rewards/margins": 0.23176583647727966,
39
+ "rewards/rejected": -1.860073447227478,
40
+ "semantic_entropy": 0.6761103868484497,
41
  "step": 10
42
  },
43
  {
44
+ "epoch": 0.004014049172102358,
45
+ "grad_norm": 10.9375,
46
  "learning_rate": 1.4288495612538426e-08,
47
+ "logits/chosen": -0.7018337249755859,
48
+ "logits/rejected": -0.6630809903144836,
49
+ "logps/chosen": -1.814099907875061,
50
+ "logps/rejected": -1.7862392663955688,
51
+ "loss": 0.8097,
52
+ "rewards/accuracies": 0.550000011920929,
53
+ "rewards/chosen": -1.814099907875061,
54
+ "rewards/margins": -0.027860689908266068,
55
+ "rewards/rejected": -1.7862392663955688,
56
+ "semantic_entropy": 0.6300801038742065,
57
  "step": 15
58
  },
59
  {
60
+ "epoch": 0.005352065562803144,
61
+ "grad_norm": 12.5625,
62
  "learning_rate": 0.0,
63
+ "logits/chosen": -0.6895834803581238,
64
+ "logits/rejected": -0.6498485207557678,
65
+ "logps/chosen": -1.7860157489776611,
66
+ "logps/rejected": -1.9019441604614258,
67
+ "loss": 0.7433,
68
+ "rewards/accuracies": 0.5625,
69
+ "rewards/chosen": -1.7860157489776611,
70
+ "rewards/margins": 0.11592836678028107,
71
+ "rewards/rejected": -1.9019441604614258,
72
+ "semantic_entropy": 0.6502264738082886,
73
  "step": 20
74
  },
75
  {
76
+ "epoch": 0.005352065562803144,
77
  "step": 20,
78
  "total_flos": 0.0,
79
+ "train_loss": 0.7357763648033142,
80
+ "train_runtime": 51.0237,
81
+ "train_samples_per_second": 6.272,
82
+ "train_steps_per_second": 0.392
83
  }
84
  ],
85
  "logging_steps": 5,
 
100
  }
101
  },
102
  "total_flos": 0.0,
103
+ "train_batch_size": 1,
104
  "trial_name": null,
105
  "trial_params": null
106
  }