Text-Style-Transfer-Finetuning-Llama-3.2-3B-Instruct-bnb-4bit-unsloth-4
/
last-checkpoint
/trainer_state.json
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 10.0, | |
"eval_steps": 100, | |
"global_step": 5000, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.2, | |
"grad_norm": 0.1518353819847107, | |
"learning_rate": 5e-05, | |
"loss": 1.1279, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.2, | |
"eval_loss": 1.0322937965393066, | |
"eval_runtime": 186.6896, | |
"eval_samples_per_second": 0.761, | |
"eval_steps_per_second": 0.761, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.4, | |
"grad_norm": 0.3351062834262848, | |
"learning_rate": 4.89795918367347e-05, | |
"loss": 1.0369, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.4, | |
"eval_loss": 0.9710885882377625, | |
"eval_runtime": 182.1459, | |
"eval_samples_per_second": 0.78, | |
"eval_steps_per_second": 0.78, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.6, | |
"grad_norm": 0.3858621120452881, | |
"learning_rate": 4.795918367346939e-05, | |
"loss": 1.004, | |
"step": 300 | |
}, | |
{ | |
"epoch": 0.6, | |
"eval_loss": 0.9358227252960205, | |
"eval_runtime": 181.6505, | |
"eval_samples_per_second": 0.782, | |
"eval_steps_per_second": 0.782, | |
"step": 300 | |
}, | |
{ | |
"epoch": 0.8, | |
"grad_norm": 0.40071049332618713, | |
"learning_rate": 4.6938775510204086e-05, | |
"loss": 0.955, | |
"step": 400 | |
}, | |
{ | |
"epoch": 0.8, | |
"eval_loss": 0.9108649492263794, | |
"eval_runtime": 181.8409, | |
"eval_samples_per_second": 0.781, | |
"eval_steps_per_second": 0.781, | |
"step": 400 | |
}, | |
{ | |
"epoch": 1.0, | |
"grad_norm": 0.41050222516059875, | |
"learning_rate": 4.591836734693878e-05, | |
"loss": 0.9817, | |
"step": 500 | |
}, | |
{ | |
"epoch": 1.0, | |
"eval_loss": 0.8939587473869324, | |
"eval_runtime": 181.68, | |
"eval_samples_per_second": 0.782, | |
"eval_steps_per_second": 0.782, | |
"step": 500 | |
}, | |
{ | |
"epoch": 1.2, | |
"grad_norm": 0.5476531982421875, | |
"learning_rate": 4.4897959183673474e-05, | |
"loss": 0.8819, | |
"step": 600 | |
}, | |
{ | |
"epoch": 1.2, | |
"eval_loss": 0.8783901333808899, | |
"eval_runtime": 182.0958, | |
"eval_samples_per_second": 0.78, | |
"eval_steps_per_second": 0.78, | |
"step": 600 | |
}, | |
{ | |
"epoch": 1.4, | |
"grad_norm": 0.5108929872512817, | |
"learning_rate": 4.387755102040816e-05, | |
"loss": 0.8373, | |
"step": 700 | |
}, | |
{ | |
"epoch": 1.4, | |
"eval_loss": 0.8652149438858032, | |
"eval_runtime": 181.7715, | |
"eval_samples_per_second": 0.781, | |
"eval_steps_per_second": 0.781, | |
"step": 700 | |
}, | |
{ | |
"epoch": 1.6, | |
"grad_norm": 0.5909234881401062, | |
"learning_rate": 4.2857142857142856e-05, | |
"loss": 0.8588, | |
"step": 800 | |
}, | |
{ | |
"epoch": 1.6, | |
"eval_loss": 0.8540464639663696, | |
"eval_runtime": 181.5147, | |
"eval_samples_per_second": 0.782, | |
"eval_steps_per_second": 0.782, | |
"step": 800 | |
}, | |
{ | |
"epoch": 1.8, | |
"grad_norm": 0.5984646081924438, | |
"learning_rate": 4.183673469387756e-05, | |
"loss": 0.8656, | |
"step": 900 | |
}, | |
{ | |
"epoch": 1.8, | |
"eval_loss": 0.8421266674995422, | |
"eval_runtime": 181.6623, | |
"eval_samples_per_second": 0.782, | |
"eval_steps_per_second": 0.782, | |
"step": 900 | |
}, | |
{ | |
"epoch": 2.0, | |
"grad_norm": 0.6141483783721924, | |
"learning_rate": 4.0816326530612245e-05, | |
"loss": 0.8625, | |
"step": 1000 | |
}, | |
{ | |
"epoch": 2.0, | |
"eval_loss": 0.8279427289962769, | |
"eval_runtime": 181.7062, | |
"eval_samples_per_second": 0.781, | |
"eval_steps_per_second": 0.781, | |
"step": 1000 | |
}, | |
{ | |
"epoch": 2.2, | |
"grad_norm": 0.6167280077934265, | |
"learning_rate": 3.979591836734694e-05, | |
"loss": 0.8022, | |
"step": 1100 | |
}, | |
{ | |
"epoch": 2.2, | |
"eval_loss": 0.8208828568458557, | |
"eval_runtime": 182.0493, | |
"eval_samples_per_second": 0.78, | |
"eval_steps_per_second": 0.78, | |
"step": 1100 | |
}, | |
{ | |
"epoch": 2.4, | |
"grad_norm": 0.6359805464744568, | |
"learning_rate": 3.8775510204081634e-05, | |
"loss": 0.7596, | |
"step": 1200 | |
}, | |
{ | |
"epoch": 2.4, | |
"eval_loss": 0.814860463142395, | |
"eval_runtime": 181.8235, | |
"eval_samples_per_second": 0.781, | |
"eval_steps_per_second": 0.781, | |
"step": 1200 | |
}, | |
{ | |
"epoch": 2.6, | |
"grad_norm": 0.723619818687439, | |
"learning_rate": 3.775510204081633e-05, | |
"loss": 0.7715, | |
"step": 1300 | |
}, | |
{ | |
"epoch": 2.6, | |
"eval_loss": 0.7978885173797607, | |
"eval_runtime": 182.0693, | |
"eval_samples_per_second": 0.78, | |
"eval_steps_per_second": 0.78, | |
"step": 1300 | |
}, | |
{ | |
"epoch": 2.8, | |
"grad_norm": 0.7059823274612427, | |
"learning_rate": 3.673469387755102e-05, | |
"loss": 0.7314, | |
"step": 1400 | |
}, | |
{ | |
"epoch": 2.8, | |
"eval_loss": 0.7922654747962952, | |
"eval_runtime": 182.558, | |
"eval_samples_per_second": 0.778, | |
"eval_steps_per_second": 0.778, | |
"step": 1400 | |
}, | |
{ | |
"epoch": 3.0, | |
"grad_norm": 0.5519229173660278, | |
"learning_rate": 3.571428571428572e-05, | |
"loss": 0.7402, | |
"step": 1500 | |
}, | |
{ | |
"epoch": 3.0, | |
"eval_loss": 0.782792329788208, | |
"eval_runtime": 182.1181, | |
"eval_samples_per_second": 0.78, | |
"eval_steps_per_second": 0.78, | |
"step": 1500 | |
}, | |
{ | |
"epoch": 3.2, | |
"grad_norm": 0.6199519634246826, | |
"learning_rate": 3.469387755102041e-05, | |
"loss": 0.6767, | |
"step": 1600 | |
}, | |
{ | |
"epoch": 3.2, | |
"eval_loss": 0.7840178608894348, | |
"eval_runtime": 178.9485, | |
"eval_samples_per_second": 0.794, | |
"eval_steps_per_second": 0.794, | |
"step": 1600 | |
}, | |
{ | |
"epoch": 3.4, | |
"grad_norm": 0.642126202583313, | |
"learning_rate": 3.36734693877551e-05, | |
"loss": 0.6481, | |
"step": 1700 | |
}, | |
{ | |
"epoch": 3.4, | |
"eval_loss": 0.7741957306861877, | |
"eval_runtime": 179.1517, | |
"eval_samples_per_second": 0.793, | |
"eval_steps_per_second": 0.793, | |
"step": 1700 | |
}, | |
{ | |
"epoch": 3.6, | |
"grad_norm": 0.7174199819564819, | |
"learning_rate": 3.265306122448979e-05, | |
"loss": 0.6689, | |
"step": 1800 | |
}, | |
{ | |
"epoch": 3.6, | |
"eval_loss": 0.7609220743179321, | |
"eval_runtime": 180.4754, | |
"eval_samples_per_second": 0.787, | |
"eval_steps_per_second": 0.787, | |
"step": 1800 | |
}, | |
{ | |
"epoch": 3.8, | |
"grad_norm": 0.6541227698326111, | |
"learning_rate": 3.1632653061224494e-05, | |
"loss": 0.6652, | |
"step": 1900 | |
}, | |
{ | |
"epoch": 3.8, | |
"eval_loss": 0.75916987657547, | |
"eval_runtime": 179.9759, | |
"eval_samples_per_second": 0.789, | |
"eval_steps_per_second": 0.789, | |
"step": 1900 | |
}, | |
{ | |
"epoch": 4.0, | |
"grad_norm": 0.889131486415863, | |
"learning_rate": 3.061224489795919e-05, | |
"loss": 0.6938, | |
"step": 2000 | |
}, | |
{ | |
"epoch": 4.0, | |
"eval_loss": 0.7402585744857788, | |
"eval_runtime": 181.8785, | |
"eval_samples_per_second": 0.781, | |
"eval_steps_per_second": 0.781, | |
"step": 2000 | |
}, | |
{ | |
"epoch": 4.2, | |
"grad_norm": 0.9711707234382629, | |
"learning_rate": 2.959183673469388e-05, | |
"loss": 0.5935, | |
"step": 2100 | |
}, | |
{ | |
"epoch": 4.2, | |
"eval_loss": 0.7453898191452026, | |
"eval_runtime": 181.9504, | |
"eval_samples_per_second": 0.78, | |
"eval_steps_per_second": 0.78, | |
"step": 2100 | |
}, | |
{ | |
"epoch": 4.4, | |
"grad_norm": 1.0045188665390015, | |
"learning_rate": 2.857142857142857e-05, | |
"loss": 0.5731, | |
"step": 2200 | |
}, | |
{ | |
"epoch": 4.4, | |
"eval_loss": 0.7436273097991943, | |
"eval_runtime": 182.1074, | |
"eval_samples_per_second": 0.78, | |
"eval_steps_per_second": 0.78, | |
"step": 2200 | |
}, | |
{ | |
"epoch": 4.6, | |
"grad_norm": 0.7766691446304321, | |
"learning_rate": 2.7551020408163265e-05, | |
"loss": 0.6094, | |
"step": 2300 | |
}, | |
{ | |
"epoch": 4.6, | |
"eval_loss": 0.7243757843971252, | |
"eval_runtime": 182.5282, | |
"eval_samples_per_second": 0.778, | |
"eval_steps_per_second": 0.778, | |
"step": 2300 | |
}, | |
{ | |
"epoch": 4.8, | |
"grad_norm": 0.8756657242774963, | |
"learning_rate": 2.6530612244897963e-05, | |
"loss": 0.5904, | |
"step": 2400 | |
}, | |
{ | |
"epoch": 4.8, | |
"eval_loss": 0.7280852794647217, | |
"eval_runtime": 182.6182, | |
"eval_samples_per_second": 0.778, | |
"eval_steps_per_second": 0.778, | |
"step": 2400 | |
}, | |
{ | |
"epoch": 5.0, | |
"grad_norm": 0.9650371670722961, | |
"learning_rate": 2.5510204081632654e-05, | |
"loss": 0.5649, | |
"step": 2500 | |
}, | |
{ | |
"epoch": 5.0, | |
"eval_loss": 0.714751124382019, | |
"eval_runtime": 182.1779, | |
"eval_samples_per_second": 0.779, | |
"eval_steps_per_second": 0.779, | |
"step": 2500 | |
}, | |
{ | |
"epoch": 5.2, | |
"grad_norm": 0.9919518828392029, | |
"learning_rate": 2.448979591836735e-05, | |
"loss": 0.5211, | |
"step": 2600 | |
}, | |
{ | |
"epoch": 5.2, | |
"eval_loss": 0.719607949256897, | |
"eval_runtime": 182.2031, | |
"eval_samples_per_second": 0.779, | |
"eval_steps_per_second": 0.779, | |
"step": 2600 | |
}, | |
{ | |
"epoch": 5.4, | |
"grad_norm": 0.979714572429657, | |
"learning_rate": 2.3469387755102043e-05, | |
"loss": 0.536, | |
"step": 2700 | |
}, | |
{ | |
"epoch": 5.4, | |
"eval_loss": 0.7172472476959229, | |
"eval_runtime": 182.2624, | |
"eval_samples_per_second": 0.779, | |
"eval_steps_per_second": 0.779, | |
"step": 2700 | |
}, | |
{ | |
"epoch": 5.6, | |
"grad_norm": 0.9355886578559875, | |
"learning_rate": 2.2448979591836737e-05, | |
"loss": 0.4957, | |
"step": 2800 | |
}, | |
{ | |
"epoch": 5.6, | |
"eval_loss": 0.7044922113418579, | |
"eval_runtime": 182.1756, | |
"eval_samples_per_second": 0.779, | |
"eval_steps_per_second": 0.779, | |
"step": 2800 | |
}, | |
{ | |
"epoch": 5.8, | |
"grad_norm": 0.907577633857727, | |
"learning_rate": 2.1428571428571428e-05, | |
"loss": 0.4935, | |
"step": 2900 | |
}, | |
{ | |
"epoch": 5.8, | |
"eval_loss": 0.6986051201820374, | |
"eval_runtime": 182.1701, | |
"eval_samples_per_second": 0.779, | |
"eval_steps_per_second": 0.779, | |
"step": 2900 | |
}, | |
{ | |
"epoch": 6.0, | |
"grad_norm": 0.6769922971725464, | |
"learning_rate": 2.0408163265306123e-05, | |
"loss": 0.5038, | |
"step": 3000 | |
}, | |
{ | |
"epoch": 6.0, | |
"eval_loss": 0.6920154690742493, | |
"eval_runtime": 182.1079, | |
"eval_samples_per_second": 0.78, | |
"eval_steps_per_second": 0.78, | |
"step": 3000 | |
}, | |
{ | |
"epoch": 6.2, | |
"grad_norm": 0.7743176817893982, | |
"learning_rate": 1.9387755102040817e-05, | |
"loss": 0.4357, | |
"step": 3100 | |
}, | |
{ | |
"epoch": 6.2, | |
"eval_loss": 0.7011306285858154, | |
"eval_runtime": 182.6215, | |
"eval_samples_per_second": 0.778, | |
"eval_steps_per_second": 0.778, | |
"step": 3100 | |
}, | |
{ | |
"epoch": 6.4, | |
"grad_norm": 0.9698778986930847, | |
"learning_rate": 1.836734693877551e-05, | |
"loss": 0.435, | |
"step": 3200 | |
}, | |
{ | |
"epoch": 6.4, | |
"eval_loss": 0.6926498413085938, | |
"eval_runtime": 182.1258, | |
"eval_samples_per_second": 0.78, | |
"eval_steps_per_second": 0.78, | |
"step": 3200 | |
}, | |
{ | |
"epoch": 6.6, | |
"grad_norm": 1.2019530534744263, | |
"learning_rate": 1.7346938775510206e-05, | |
"loss": 0.4521, | |
"step": 3300 | |
}, | |
{ | |
"epoch": 6.6, | |
"eval_loss": 0.6842972636222839, | |
"eval_runtime": 182.0796, | |
"eval_samples_per_second": 0.78, | |
"eval_steps_per_second": 0.78, | |
"step": 3300 | |
}, | |
{ | |
"epoch": 6.8, | |
"grad_norm": 1.1843819618225098, | |
"learning_rate": 1.6326530612244897e-05, | |
"loss": 0.4559, | |
"step": 3400 | |
}, | |
{ | |
"epoch": 6.8, | |
"eval_loss": 0.6817460656166077, | |
"eval_runtime": 182.1245, | |
"eval_samples_per_second": 0.78, | |
"eval_steps_per_second": 0.78, | |
"step": 3400 | |
}, | |
{ | |
"epoch": 7.0, | |
"grad_norm": 0.9009637832641602, | |
"learning_rate": 1.5306122448979594e-05, | |
"loss": 0.447, | |
"step": 3500 | |
}, | |
{ | |
"epoch": 7.0, | |
"eval_loss": 0.6794432401657104, | |
"eval_runtime": 182.0907, | |
"eval_samples_per_second": 0.78, | |
"eval_steps_per_second": 0.78, | |
"step": 3500 | |
}, | |
{ | |
"epoch": 7.2, | |
"grad_norm": 1.0701977014541626, | |
"learning_rate": 1.4285714285714285e-05, | |
"loss": 0.3809, | |
"step": 3600 | |
}, | |
{ | |
"epoch": 7.2, | |
"eval_loss": 0.6871351599693298, | |
"eval_runtime": 182.0665, | |
"eval_samples_per_second": 0.78, | |
"eval_steps_per_second": 0.78, | |
"step": 3600 | |
}, | |
{ | |
"epoch": 7.4, | |
"grad_norm": 0.9625453352928162, | |
"learning_rate": 1.3265306122448982e-05, | |
"loss": 0.3836, | |
"step": 3700 | |
}, | |
{ | |
"epoch": 7.4, | |
"eval_loss": 0.6862939596176147, | |
"eval_runtime": 182.0265, | |
"eval_samples_per_second": 0.78, | |
"eval_steps_per_second": 0.78, | |
"step": 3700 | |
}, | |
{ | |
"epoch": 7.6, | |
"grad_norm": 1.1868125200271606, | |
"learning_rate": 1.2244897959183674e-05, | |
"loss": 0.3889, | |
"step": 3800 | |
}, | |
{ | |
"epoch": 7.6, | |
"eval_loss": 0.6783468723297119, | |
"eval_runtime": 182.5364, | |
"eval_samples_per_second": 0.778, | |
"eval_steps_per_second": 0.778, | |
"step": 3800 | |
}, | |
{ | |
"epoch": 7.8, | |
"grad_norm": 0.9025134444236755, | |
"learning_rate": 1.1224489795918369e-05, | |
"loss": 0.4134, | |
"step": 3900 | |
}, | |
{ | |
"epoch": 7.8, | |
"eval_loss": 0.6690346002578735, | |
"eval_runtime": 181.8517, | |
"eval_samples_per_second": 0.781, | |
"eval_steps_per_second": 0.781, | |
"step": 3900 | |
}, | |
{ | |
"epoch": 8.0, | |
"grad_norm": 1.2734243869781494, | |
"learning_rate": 1.0204081632653061e-05, | |
"loss": 0.4018, | |
"step": 4000 | |
}, | |
{ | |
"epoch": 8.0, | |
"eval_loss": 0.6694031953811646, | |
"eval_runtime": 182.0072, | |
"eval_samples_per_second": 0.78, | |
"eval_steps_per_second": 0.78, | |
"step": 4000 | |
}, | |
{ | |
"epoch": 8.2, | |
"grad_norm": 0.974176824092865, | |
"learning_rate": 9.183673469387756e-06, | |
"loss": 0.3526, | |
"step": 4100 | |
}, | |
{ | |
"epoch": 8.2, | |
"eval_loss": 0.6763675212860107, | |
"eval_runtime": 182.0229, | |
"eval_samples_per_second": 0.78, | |
"eval_steps_per_second": 0.78, | |
"step": 4100 | |
}, | |
{ | |
"epoch": 8.4, | |
"grad_norm": 0.9596600532531738, | |
"learning_rate": 8.163265306122448e-06, | |
"loss": 0.3373, | |
"step": 4200 | |
}, | |
{ | |
"epoch": 8.4, | |
"eval_loss": 0.6745020747184753, | |
"eval_runtime": 181.8856, | |
"eval_samples_per_second": 0.781, | |
"eval_steps_per_second": 0.781, | |
"step": 4200 | |
}, | |
{ | |
"epoch": 8.6, | |
"grad_norm": 0.9683770537376404, | |
"learning_rate": 7.142857142857143e-06, | |
"loss": 0.3704, | |
"step": 4300 | |
}, | |
{ | |
"epoch": 8.6, | |
"eval_loss": 0.6758388876914978, | |
"eval_runtime": 181.9951, | |
"eval_samples_per_second": 0.78, | |
"eval_steps_per_second": 0.78, | |
"step": 4300 | |
}, | |
{ | |
"epoch": 8.8, | |
"grad_norm": 1.0410691499710083, | |
"learning_rate": 6.122448979591837e-06, | |
"loss": 0.3482, | |
"step": 4400 | |
}, | |
{ | |
"epoch": 8.8, | |
"eval_loss": 0.6700472831726074, | |
"eval_runtime": 181.8829, | |
"eval_samples_per_second": 0.781, | |
"eval_steps_per_second": 0.781, | |
"step": 4400 | |
}, | |
{ | |
"epoch": 9.0, | |
"grad_norm": 1.0724315643310547, | |
"learning_rate": 5.102040816326531e-06, | |
"loss": 0.3621, | |
"step": 4500 | |
}, | |
{ | |
"epoch": 9.0, | |
"eval_loss": 0.66424161195755, | |
"eval_runtime": 182.2196, | |
"eval_samples_per_second": 0.779, | |
"eval_steps_per_second": 0.779, | |
"step": 4500 | |
}, | |
{ | |
"epoch": 9.2, | |
"grad_norm": 1.2040272951126099, | |
"learning_rate": 4.081632653061224e-06, | |
"loss": 0.3353, | |
"step": 4600 | |
}, | |
{ | |
"epoch": 9.2, | |
"eval_loss": 0.6743778586387634, | |
"eval_runtime": 182.2041, | |
"eval_samples_per_second": 0.779, | |
"eval_steps_per_second": 0.779, | |
"step": 4600 | |
}, | |
{ | |
"epoch": 9.4, | |
"grad_norm": 1.0811737775802612, | |
"learning_rate": 3.0612244897959185e-06, | |
"loss": 0.3265, | |
"step": 4700 | |
}, | |
{ | |
"epoch": 9.4, | |
"eval_loss": 0.6780717372894287, | |
"eval_runtime": 182.1426, | |
"eval_samples_per_second": 0.78, | |
"eval_steps_per_second": 0.78, | |
"step": 4700 | |
}, | |
{ | |
"epoch": 9.6, | |
"grad_norm": 0.8568335771560669, | |
"learning_rate": 2.040816326530612e-06, | |
"loss": 0.3247, | |
"step": 4800 | |
}, | |
{ | |
"epoch": 9.6, | |
"eval_loss": 0.6788098216056824, | |
"eval_runtime": 182.2171, | |
"eval_samples_per_second": 0.779, | |
"eval_steps_per_second": 0.779, | |
"step": 4800 | |
}, | |
{ | |
"epoch": 9.8, | |
"grad_norm": 1.1256827116012573, | |
"learning_rate": 1.020408163265306e-06, | |
"loss": 0.3355, | |
"step": 4900 | |
}, | |
{ | |
"epoch": 9.8, | |
"eval_loss": 0.6737083196640015, | |
"eval_runtime": 182.1568, | |
"eval_samples_per_second": 0.78, | |
"eval_steps_per_second": 0.78, | |
"step": 4900 | |
}, | |
{ | |
"epoch": 10.0, | |
"grad_norm": 1.1654198169708252, | |
"learning_rate": 0.0, | |
"loss": 0.3152, | |
"step": 5000 | |
}, | |
{ | |
"epoch": 10.0, | |
"eval_loss": 0.6746455430984497, | |
"eval_runtime": 182.1302, | |
"eval_samples_per_second": 0.78, | |
"eval_steps_per_second": 0.78, | |
"step": 5000 | |
} | |
], | |
"logging_steps": 100, | |
"max_steps": 5000, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 10, | |
"save_steps": 100, | |
"stateful_callbacks": { | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": true, | |
"should_training_stop": true | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 5.670208882951987e+17, | |
"train_batch_size": 1, | |
"trial_name": null, | |
"trial_params": null | |
} | |