Training in progress, step 25500, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 338298
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cb834eab847e8e28dec407e59bb0d8e4fb19cd54d9f3d073943c433681e78580
|
3 |
size 338298
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 424342
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a58bfbea0ba319ef812042f985398f0d1c4c4574768319cbb4404df7554a97c
|
3 |
size 424342
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b275d981db52339dd3e5b625c97d2fc32266d9e8bfbf53825cc6f3a487e5c1c4
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a7a8143394efd02a858ec1c71378ac1441217f91088ab489b87b43340e54b14
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 6.737992286682129,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-24000",
|
4 |
-
"epoch": 9.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -3915,6 +3915,84 @@
|
|
3915 |
"eval_samples_per_second": 490.552,
|
3916 |
"eval_steps_per_second": 122.691,
|
3917 |
"step": 25000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3918 |
}
|
3919 |
],
|
3920 |
"logging_steps": 50,
|
@@ -3929,7 +4007,7 @@
|
|
3929 |
"early_stopping_threshold": 0.0
|
3930 |
},
|
3931 |
"attributes": {
|
3932 |
-
"early_stopping_patience_counter":
|
3933 |
}
|
3934 |
},
|
3935 |
"TrainerControl": {
|
@@ -3938,12 +4016,12 @@
|
|
3938 |
"should_evaluate": false,
|
3939 |
"should_log": false,
|
3940 |
"should_save": true,
|
3941 |
-
"should_training_stop":
|
3942 |
},
|
3943 |
"attributes": {}
|
3944 |
}
|
3945 |
},
|
3946 |
-
"total_flos":
|
3947 |
"train_batch_size": 4,
|
3948 |
"trial_name": null,
|
3949 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 6.737992286682129,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-24000",
|
4 |
+
"epoch": 9.261745880430341,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 25500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
3915 |
"eval_samples_per_second": 490.552,
|
3916 |
"eval_steps_per_second": 122.691,
|
3917 |
"step": 25000
|
3918 |
+
},
|
3919 |
+
{
|
3920 |
+
"epoch": 9.098324935312542,
|
3921 |
+
"grad_norm": 0.32145312428474426,
|
3922 |
+
"learning_rate": 4.3060546058765975e-06,
|
3923 |
+
"loss": 6.7494,
|
3924 |
+
"step": 25050
|
3925 |
+
},
|
3926 |
+
{
|
3927 |
+
"epoch": 9.116482818103409,
|
3928 |
+
"grad_norm": 0.32838886976242065,
|
3929 |
+
"learning_rate": 4.1356977178935615e-06,
|
3930 |
+
"loss": 6.7581,
|
3931 |
+
"step": 25100
|
3932 |
+
},
|
3933 |
+
{
|
3934 |
+
"epoch": 9.134640700894275,
|
3935 |
+
"grad_norm": 0.2937028408050537,
|
3936 |
+
"learning_rate": 3.9687126288057616e-06,
|
3937 |
+
"loss": 6.7556,
|
3938 |
+
"step": 25150
|
3939 |
+
},
|
3940 |
+
{
|
3941 |
+
"epoch": 9.152798583685142,
|
3942 |
+
"grad_norm": 0.3333373963832855,
|
3943 |
+
"learning_rate": 3.8051048122335777e-06,
|
3944 |
+
"loss": 6.7596,
|
3945 |
+
"step": 25200
|
3946 |
+
},
|
3947 |
+
{
|
3948 |
+
"epoch": 9.170956466476008,
|
3949 |
+
"grad_norm": 0.29934102296829224,
|
3950 |
+
"learning_rate": 3.644879631093374e-06,
|
3951 |
+
"loss": 6.7524,
|
3952 |
+
"step": 25250
|
3953 |
+
},
|
3954 |
+
{
|
3955 |
+
"epoch": 9.189114349266875,
|
3956 |
+
"grad_norm": 0.3680090606212616,
|
3957 |
+
"learning_rate": 3.488042337421889e-06,
|
3958 |
+
"loss": 6.7547,
|
3959 |
+
"step": 25300
|
3960 |
+
},
|
3961 |
+
{
|
3962 |
+
"epoch": 9.207272232057742,
|
3963 |
+
"grad_norm": 0.3357902467250824,
|
3964 |
+
"learning_rate": 3.3345980722038647e-06,
|
3965 |
+
"loss": 6.7557,
|
3966 |
+
"step": 25350
|
3967 |
+
},
|
3968 |
+
{
|
3969 |
+
"epoch": 9.225430114848608,
|
3970 |
+
"grad_norm": 0.31992048025131226,
|
3971 |
+
"learning_rate": 3.184551865203717e-06,
|
3972 |
+
"loss": 6.7545,
|
3973 |
+
"step": 25400
|
3974 |
+
},
|
3975 |
+
{
|
3976 |
+
"epoch": 9.243587997639475,
|
3977 |
+
"grad_norm": 0.2933006286621094,
|
3978 |
+
"learning_rate": 3.0379086348005164e-06,
|
3979 |
+
"loss": 6.7502,
|
3980 |
+
"step": 25450
|
3981 |
+
},
|
3982 |
+
{
|
3983 |
+
"epoch": 9.261745880430341,
|
3984 |
+
"grad_norm": 0.32160452008247375,
|
3985 |
+
"learning_rate": 2.8946731878269183e-06,
|
3986 |
+
"loss": 6.7559,
|
3987 |
+
"step": 25500
|
3988 |
+
},
|
3989 |
+
{
|
3990 |
+
"epoch": 9.261745880430341,
|
3991 |
+
"eval_loss": 6.738137722015381,
|
3992 |
+
"eval_runtime": 9.4312,
|
3993 |
+
"eval_samples_per_second": 491.773,
|
3994 |
+
"eval_steps_per_second": 122.996,
|
3995 |
+
"step": 25500
|
3996 |
}
|
3997 |
],
|
3998 |
"logging_steps": 50,
|
|
|
4007 |
"early_stopping_threshold": 0.0
|
4008 |
},
|
4009 |
"attributes": {
|
4010 |
+
"early_stopping_patience_counter": 3
|
4011 |
}
|
4012 |
},
|
4013 |
"TrainerControl": {
|
|
|
4016 |
"should_evaluate": false,
|
4017 |
"should_log": false,
|
4018 |
"should_save": true,
|
4019 |
+
"should_training_stop": true
|
4020 |
},
|
4021 |
"attributes": {}
|
4022 |
}
|
4023 |
},
|
4024 |
+
"total_flos": 490175299584000.0,
|
4025 |
"train_batch_size": 4,
|
4026 |
"trial_name": null,
|
4027 |
"trial_params": null
|