lesso14 commited on
Commit
ec8d834
·
verified ·
1 Parent(s): d447db2

Training in progress, step 25500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d1a3176dab0c716b2cb9e2228acd9a8189902ef7d0de3f1a8887cac5395af2b
3
  size 338298
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb834eab847e8e28dec407e59bb0d8e4fb19cd54d9f3d073943c433681e78580
3
  size 338298
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:117b69449b80d22018e7a1446e9587a0c72614a77188fa42a8403de5c5a99beb
3
  size 424342
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a58bfbea0ba319ef812042f985398f0d1c4c4574768319cbb4404df7554a97c
3
  size 424342
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4860adba5ee7478bb07b9927ff6a669b0d3afd76a7de26d7eed63b2c70717d92
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b275d981db52339dd3e5b625c97d2fc32266d9e8bfbf53825cc6f3a487e5c1c4
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c47f6988562d95347ca3617a47b67af7109f8b9dc1ff18e8837397b25c7fb155
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a7a8143394efd02a858ec1c71378ac1441217f91088ab489b87b43340e54b14
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 6.737992286682129,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-24000",
4
- "epoch": 9.080167052521675,
5
  "eval_steps": 500,
6
- "global_step": 25000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3915,6 +3915,84 @@
3915
  "eval_samples_per_second": 490.552,
3916
  "eval_steps_per_second": 122.691,
3917
  "step": 25000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3918
  }
3919
  ],
3920
  "logging_steps": 50,
@@ -3929,7 +4007,7 @@
3929
  "early_stopping_threshold": 0.0
3930
  },
3931
  "attributes": {
3932
- "early_stopping_patience_counter": 2
3933
  }
3934
  },
3935
  "TrainerControl": {
@@ -3938,12 +4016,12 @@
3938
  "should_evaluate": false,
3939
  "should_log": false,
3940
  "should_save": true,
3941
- "should_training_stop": false
3942
  },
3943
  "attributes": {}
3944
  }
3945
  },
3946
- "total_flos": 480564019200000.0,
3947
  "train_batch_size": 4,
3948
  "trial_name": null,
3949
  "trial_params": null
 
1
  {
2
  "best_metric": 6.737992286682129,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-24000",
4
+ "epoch": 9.261745880430341,
5
  "eval_steps": 500,
6
+ "global_step": 25500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3915
  "eval_samples_per_second": 490.552,
3916
  "eval_steps_per_second": 122.691,
3917
  "step": 25000
3918
+ },
3919
+ {
3920
+ "epoch": 9.098324935312542,
3921
+ "grad_norm": 0.32145312428474426,
3922
+ "learning_rate": 4.3060546058765975e-06,
3923
+ "loss": 6.7494,
3924
+ "step": 25050
3925
+ },
3926
+ {
3927
+ "epoch": 9.116482818103409,
3928
+ "grad_norm": 0.32838886976242065,
3929
+ "learning_rate": 4.1356977178935615e-06,
3930
+ "loss": 6.7581,
3931
+ "step": 25100
3932
+ },
3933
+ {
3934
+ "epoch": 9.134640700894275,
3935
+ "grad_norm": 0.2937028408050537,
3936
+ "learning_rate": 3.9687126288057616e-06,
3937
+ "loss": 6.7556,
3938
+ "step": 25150
3939
+ },
3940
+ {
3941
+ "epoch": 9.152798583685142,
3942
+ "grad_norm": 0.3333373963832855,
3943
+ "learning_rate": 3.8051048122335777e-06,
3944
+ "loss": 6.7596,
3945
+ "step": 25200
3946
+ },
3947
+ {
3948
+ "epoch": 9.170956466476008,
3949
+ "grad_norm": 0.29934102296829224,
3950
+ "learning_rate": 3.644879631093374e-06,
3951
+ "loss": 6.7524,
3952
+ "step": 25250
3953
+ },
3954
+ {
3955
+ "epoch": 9.189114349266875,
3956
+ "grad_norm": 0.3680090606212616,
3957
+ "learning_rate": 3.488042337421889e-06,
3958
+ "loss": 6.7547,
3959
+ "step": 25300
3960
+ },
3961
+ {
3962
+ "epoch": 9.207272232057742,
3963
+ "grad_norm": 0.3357902467250824,
3964
+ "learning_rate": 3.3345980722038647e-06,
3965
+ "loss": 6.7557,
3966
+ "step": 25350
3967
+ },
3968
+ {
3969
+ "epoch": 9.225430114848608,
3970
+ "grad_norm": 0.31992048025131226,
3971
+ "learning_rate": 3.184551865203717e-06,
3972
+ "loss": 6.7545,
3973
+ "step": 25400
3974
+ },
3975
+ {
3976
+ "epoch": 9.243587997639475,
3977
+ "grad_norm": 0.2933006286621094,
3978
+ "learning_rate": 3.0379086348005164e-06,
3979
+ "loss": 6.7502,
3980
+ "step": 25450
3981
+ },
3982
+ {
3983
+ "epoch": 9.261745880430341,
3984
+ "grad_norm": 0.32160452008247375,
3985
+ "learning_rate": 2.8946731878269183e-06,
3986
+ "loss": 6.7559,
3987
+ "step": 25500
3988
+ },
3989
+ {
3990
+ "epoch": 9.261745880430341,
3991
+ "eval_loss": 6.738137722015381,
3992
+ "eval_runtime": 9.4312,
3993
+ "eval_samples_per_second": 491.773,
3994
+ "eval_steps_per_second": 122.996,
3995
+ "step": 25500
3996
  }
3997
  ],
3998
  "logging_steps": 50,
 
4007
  "early_stopping_threshold": 0.0
4008
  },
4009
  "attributes": {
4010
+ "early_stopping_patience_counter": 3
4011
  }
4012
  },
4013
  "TrainerControl": {
 
4016
  "should_evaluate": false,
4017
  "should_log": false,
4018
  "should_save": true,
4019
+ "should_training_stop": true
4020
  },
4021
  "attributes": {}
4022
  }
4023
  },
4024
+ "total_flos": 490175299584000.0,
4025
  "train_batch_size": 4,
4026
  "trial_name": null,
4027
  "trial_params": null