BleachNick commited on
Commit
36bb64e
·
verified ·
1 Parent(s): 20fdd02

Model save

Browse files
README.md CHANGED
@@ -27,7 +27,7 @@ print(output["generated_text"])
27
 
28
  ## Training procedure
29
 
30
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/haozhezhao/huggingface/runs/n8eembxg)
31
 
32
 
33
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
 
27
 
28
  ## Training procedure
29
 
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/haozhezhao/huggingface/runs/5s7qcp5j)
31
 
32
 
33
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 0.06262696608901024,
4
- "train_runtime": 3770.358,
5
  "train_samples": 4500,
6
- "train_samples_per_second": 1.194,
7
- "train_steps_per_second": 0.011
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 0.17294886400923132,
4
+ "train_runtime": 6524.5156,
5
  "train_samples": 4500,
6
+ "train_samples_per_second": 0.69,
7
+ "train_steps_per_second": 0.006
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:542c6e5e58a3ff54e2c927aa29bc7c34556ed1718c1236da2349845bbd39b149
3
  size 4943274328
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da302febe17c06a48f95d6c992ed2f3edfb5c9ba10326eaaa7dde97149e5f2c3
3
  size 4943274328
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 0.06262696608901024,
4
- "train_runtime": 3770.358,
5
  "train_samples": 4500,
6
- "train_samples_per_second": 1.194,
7
- "train_steps_per_second": 0.011
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 0.17294886400923132,
4
+ "train_runtime": 6524.5156,
5
  "train_samples": 4500,
6
+ "train_samples_per_second": 0.69,
7
+ "train_steps_per_second": 0.006
8
  }
trainer_state.json CHANGED
@@ -9,65 +9,65 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "completion_length": 103.06786165237426,
13
  "epoch": 0.24883359253499224,
14
- "grad_norm": 10.89356517791748,
15
- "kl": 3.0970688341651114,
16
  "learning_rate": 1.866025403784439e-05,
17
- "loss": 0.1239,
18
- "reward": 0.61015628001187,
19
- "reward_std": 0.41818763511255386,
20
- "rewards/accuracy_reward": 0.15479911494767293,
21
- "rewards/format_reward": 0.45535716295707973,
22
  "step": 10
23
  },
24
  {
25
- "completion_length": 31.39263536930084,
26
  "epoch": 0.4976671850699845,
27
- "grad_norm": 4.730869293212891,
28
- "kl": 1.1419519972056151,
29
  "learning_rate": 1.1736481776669307e-05,
30
- "loss": 0.0457,
31
- "reward": 1.5758929274976254,
32
- "reward_std": 0.25283683626912534,
33
- "rewards/accuracy_reward": 0.6338169906288386,
34
- "rewards/format_reward": 0.9420759212225676,
35
  "step": 20
36
  },
37
  {
38
- "completion_length": 31.978237950801848,
39
  "epoch": 0.7465007776049767,
40
- "grad_norm": 1.0211782455444336,
41
- "kl": 1.0043069496750832,
42
  "learning_rate": 3.5721239031346067e-06,
43
- "loss": 0.0402,
44
- "reward": 1.6506697192788125,
45
- "reward_std": 0.21753951534628868,
46
- "rewards/accuracy_reward": 0.6888393165543676,
47
- "rewards/format_reward": 0.9618303880095482,
48
  "step": 30
49
  },
50
  {
51
- "completion_length": 26.995983374118804,
52
  "epoch": 0.995334370139969,
53
- "grad_norm": 0.7562219500541687,
54
- "kl": 1.0193692460656165,
55
  "learning_rate": 0.0,
56
- "loss": 0.0408,
57
- "reward": 1.732700977474451,
58
- "reward_std": 0.14281816640868783,
59
- "rewards/accuracy_reward": 0.7460937837138772,
60
- "rewards/format_reward": 0.986607164517045,
61
  "step": 40
62
  },
63
  {
64
  "epoch": 0.995334370139969,
65
  "step": 40,
66
  "total_flos": 0.0,
67
- "train_loss": 0.06262696608901024,
68
- "train_runtime": 3770.358,
69
- "train_samples_per_second": 1.194,
70
- "train_steps_per_second": 0.011
71
  }
72
  ],
73
  "logging_steps": 10,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "completion_length": 355.3971033751965,
13
  "epoch": 0.24883359253499224,
14
+ "grad_norm": 0.5323885083198547,
15
+ "kl": 14.018265570513904,
16
  "learning_rate": 1.866025403784439e-05,
17
+ "loss": 0.5607,
18
+ "reward": 0.26941965536680074,
19
+ "reward_std": 0.20019021732732653,
20
+ "rewards/accuracy_reward": 0.05513393102446571,
21
+ "rewards/format_reward": 0.2142857245518826,
22
  "step": 10
23
  },
24
  {
25
+ "completion_length": 997.3060455322266,
26
  "epoch": 0.4976671850699845,
27
+ "grad_norm": 0.22486089169979095,
28
+ "kl": 0.11681123820599168,
29
  "learning_rate": 1.1736481776669307e-05,
30
+ "loss": 0.0047,
31
+ "reward": 0.0003348214435391128,
32
+ "reward_std": 0.000947018014267087,
33
+ "rewards/accuracy_reward": 0.0003348214435391128,
34
+ "rewards/format_reward": 0.0,
35
  "step": 20
36
  },
37
  {
38
+ "completion_length": 104.72723671197892,
39
  "epoch": 0.7465007776049767,
40
+ "grad_norm": 0.828610360622406,
41
+ "kl": 1.0758276525884867,
42
  "learning_rate": 3.5721239031346067e-06,
43
+ "loss": 0.043,
44
+ "reward": 0.0027901787078008057,
45
+ "reward_std": 0.006452280096709728,
46
+ "rewards/accuracy_reward": 0.0027901787078008057,
47
+ "rewards/format_reward": 0.0,
48
  "step": 30
49
  },
50
  {
51
+ "completion_length": 14.610380122065544,
52
  "epoch": 0.995334370139969,
53
+ "grad_norm": 7.8437886238098145,
54
+ "kl": 2.0839827720075847,
55
  "learning_rate": 0.0,
56
+ "loss": 0.0834,
57
+ "reward": 0.29776787203736604,
58
+ "reward_std": 0.21829486889764665,
59
+ "rewards/accuracy_reward": 0.29776787203736604,
60
+ "rewards/format_reward": 0.0,
61
  "step": 40
62
  },
63
  {
64
  "epoch": 0.995334370139969,
65
  "step": 40,
66
  "total_flos": 0.0,
67
+ "train_loss": 0.17294886400923132,
68
+ "train_runtime": 6524.5156,
69
+ "train_samples_per_second": 0.69,
70
+ "train_steps_per_second": 0.006
71
  }
72
  ],
73
  "logging_steps": 10,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e8e55c660570ec0380b7529a2de772884e55dc7a177165e2c159b6d15a06687
3
  size 5816
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:543ce3ffbd1d4499efd6ed9f0e96de7df8f454f6e71e07a58ee9e7071c23436c
3
  size 5816