|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.875912408759124, |
|
"eval_steps": 100, |
|
"global_step": 51, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"completion_length": 17.3368057012558, |
|
"epoch": 0.058394160583941604, |
|
"grad_norm": 0.5516418814659119, |
|
"kl": 0.0, |
|
"learning_rate": 3.333333333333333e-07, |
|
"loss": 0.0, |
|
"reward": 0.635416679084301, |
|
"reward_std": 0.33513265289366245, |
|
"rewards/semantic_entropy": 0.635416679084301, |
|
"step": 1 |
|
}, |
|
{ |
|
"completion_length": 17.217013835906982, |
|
"epoch": 0.11678832116788321, |
|
"grad_norm": 0.4640360474586487, |
|
"kl": 0.0, |
|
"learning_rate": 6.666666666666666e-07, |
|
"loss": 0.0, |
|
"reward": 0.725694440305233, |
|
"reward_std": 0.29016363993287086, |
|
"rewards/semantic_entropy": 0.725694440305233, |
|
"step": 2 |
|
}, |
|
{ |
|
"completion_length": 18.128472328186035, |
|
"epoch": 0.17518248175182483, |
|
"grad_norm": 0.540382981300354, |
|
"kl": 0.0012841224670410156, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0001, |
|
"reward": 0.6006944552063942, |
|
"reward_std": 0.3786292914301157, |
|
"rewards/semantic_entropy": 0.6006944552063942, |
|
"step": 3 |
|
}, |
|
{ |
|
"completion_length": 17.270833373069763, |
|
"epoch": 0.23357664233576642, |
|
"grad_norm": 0.5315675139427185, |
|
"kl": 0.0009038448333740234, |
|
"learning_rate": 1.3333333333333332e-06, |
|
"loss": 0.0, |
|
"reward": 0.666666679084301, |
|
"reward_std": 0.2968092504888773, |
|
"rewards/semantic_entropy": 0.666666679084301, |
|
"step": 4 |
|
}, |
|
{ |
|
"completion_length": 18.501736402511597, |
|
"epoch": 0.291970802919708, |
|
"grad_norm": 0.5848979353904724, |
|
"kl": 0.0011067390441894531, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 0.0, |
|
"reward": 0.642361119389534, |
|
"reward_std": 0.37005409598350525, |
|
"rewards/semantic_entropy": 0.642361119389534, |
|
"step": 5 |
|
}, |
|
{ |
|
"completion_length": 19.102431058883667, |
|
"epoch": 0.35036496350364965, |
|
"grad_norm": 0.8071303367614746, |
|
"kl": 0.001129150390625, |
|
"learning_rate": 2e-06, |
|
"loss": 0.0, |
|
"reward": 0.5868055522441864, |
|
"reward_std": 0.40071484073996544, |
|
"rewards/semantic_entropy": 0.5868055522441864, |
|
"step": 6 |
|
}, |
|
{ |
|
"completion_length": 18.661458492279053, |
|
"epoch": 0.40875912408759124, |
|
"grad_norm": 0.4785407781600952, |
|
"kl": 0.0016431808471679688, |
|
"learning_rate": 1.997564050259824e-06, |
|
"loss": 0.0001, |
|
"reward": 0.6614583432674408, |
|
"reward_std": 0.35613277927041054, |
|
"rewards/semantic_entropy": 0.6614583432674408, |
|
"step": 7 |
|
}, |
|
{ |
|
"completion_length": 18.498263835906982, |
|
"epoch": 0.46715328467153283, |
|
"grad_norm": 0.7530333995819092, |
|
"kl": 0.0045261383056640625, |
|
"learning_rate": 1.99026806874157e-06, |
|
"loss": 0.0002, |
|
"reward": 0.6145833469927311, |
|
"reward_std": 0.35138164833188057, |
|
"rewards/semantic_entropy": 0.6145833469927311, |
|
"step": 8 |
|
}, |
|
{ |
|
"completion_length": 19.208333611488342, |
|
"epoch": 0.5255474452554745, |
|
"grad_norm": 0.741835355758667, |
|
"kl": 0.00656890869140625, |
|
"learning_rate": 1.9781476007338054e-06, |
|
"loss": 0.0003, |
|
"reward": 0.5364583395421505, |
|
"reward_std": 0.39881302043795586, |
|
"rewards/semantic_entropy": 0.5364583395421505, |
|
"step": 9 |
|
}, |
|
{ |
|
"completion_length": 18.463541746139526, |
|
"epoch": 0.583941605839416, |
|
"grad_norm": 0.8095004558563232, |
|
"kl": 0.0157012939453125, |
|
"learning_rate": 1.9612616959383188e-06, |
|
"loss": 0.0006, |
|
"reward": 0.6388888880610466, |
|
"reward_std": 0.3762888126075268, |
|
"rewards/semantic_entropy": 0.6388888880610466, |
|
"step": 10 |
|
}, |
|
{ |
|
"completion_length": 16.302083492279053, |
|
"epoch": 0.6423357664233577, |
|
"grad_norm": 1.0500741004943848, |
|
"kl": 0.05213165283203125, |
|
"learning_rate": 1.9396926207859082e-06, |
|
"loss": 0.0021, |
|
"reward": 0.7083333432674408, |
|
"reward_std": 0.3477053064852953, |
|
"rewards/semantic_entropy": 0.7083333432674408, |
|
"step": 11 |
|
}, |
|
{ |
|
"completion_length": 15.480902791023254, |
|
"epoch": 0.7007299270072993, |
|
"grad_norm": 0.6965835690498352, |
|
"kl": 0.107666015625, |
|
"learning_rate": 1.9135454576426007e-06, |
|
"loss": 0.0043, |
|
"reward": 0.6979166641831398, |
|
"reward_std": 0.3180003799498081, |
|
"rewards/semantic_entropy": 0.6979166641831398, |
|
"step": 12 |
|
}, |
|
{ |
|
"completion_length": 16.611111283302307, |
|
"epoch": 0.7591240875912408, |
|
"grad_norm": 0.8703776001930237, |
|
"kl": 0.080535888671875, |
|
"learning_rate": 1.8829475928589268e-06, |
|
"loss": 0.0032, |
|
"reward": 0.6857638955116272, |
|
"reward_std": 0.3688342422246933, |
|
"rewards/semantic_entropy": 0.6857638955116272, |
|
"step": 13 |
|
}, |
|
{ |
|
"completion_length": 14.387152791023254, |
|
"epoch": 0.8175182481751825, |
|
"grad_norm": 0.7894781827926636, |
|
"kl": 0.4075927734375, |
|
"learning_rate": 1.8480480961564257e-06, |
|
"loss": 0.0163, |
|
"reward": 0.6805555745959282, |
|
"reward_std": 0.31897793617099524, |
|
"rewards/semantic_entropy": 0.6805555745959282, |
|
"step": 14 |
|
}, |
|
{ |
|
"completion_length": 14.901041746139526, |
|
"epoch": 0.8759124087591241, |
|
"grad_norm": 0.8611342906951904, |
|
"kl": 0.185882568359375, |
|
"learning_rate": 1.8090169943749474e-06, |
|
"loss": 0.0074, |
|
"reward": 0.7274305671453476, |
|
"reward_std": 0.27765000611543655, |
|
"rewards/semantic_entropy": 0.7274305671453476, |
|
"step": 15 |
|
}, |
|
{ |
|
"completion_length": 13.159722089767456, |
|
"epoch": 0.9343065693430657, |
|
"grad_norm": 0.9914915561676025, |
|
"kl": 0.35858154296875, |
|
"learning_rate": 1.766044443118978e-06, |
|
"loss": 0.0143, |
|
"reward": 0.7239583432674408, |
|
"reward_std": 0.34174920059740543, |
|
"rewards/semantic_entropy": 0.7239583432674408, |
|
"step": 16 |
|
}, |
|
{ |
|
"completion_length": 15.265625238418579, |
|
"epoch": 0.9927007299270073, |
|
"grad_norm": 0.7431650757789612, |
|
"kl": 0.18798828125, |
|
"learning_rate": 1.719339800338651e-06, |
|
"loss": 0.0075, |
|
"reward": 0.7552083358168602, |
|
"reward_std": 0.2897039409726858, |
|
"rewards/semantic_entropy": 0.7552083358168602, |
|
"step": 17 |
|
}, |
|
{ |
|
"completion_length": 2.0, |
|
"epoch": 1.0, |
|
"grad_norm": 0.7431650757789612, |
|
"kl": 1.125, |
|
"learning_rate": 1.669130606358858e-06, |
|
"loss": 0.0012, |
|
"reward": 1.0, |
|
"reward_std": 0.38924944400787354, |
|
"rewards/semantic_entropy": 1.0, |
|
"step": 18 |
|
}, |
|
{ |
|
"completion_length": 15.090277791023254, |
|
"epoch": 1.0583941605839415, |
|
"grad_norm": 0.8040208220481873, |
|
"kl": 0.3365478515625, |
|
"learning_rate": 1.615661475325658e-06, |
|
"loss": 0.0135, |
|
"reward": 0.7135416716337204, |
|
"reward_std": 0.3099258504807949, |
|
"rewards/semantic_entropy": 0.7135416716337204, |
|
"step": 19 |
|
}, |
|
{ |
|
"completion_length": 15.520833373069763, |
|
"epoch": 1.1167883211678833, |
|
"grad_norm": 0.8632144927978516, |
|
"kl": 0.32586669921875, |
|
"learning_rate": 1.5591929034707466e-06, |
|
"loss": 0.0131, |
|
"reward": 0.737847238779068, |
|
"reward_std": 0.28588614612817764, |
|
"rewards/semantic_entropy": 0.737847238779068, |
|
"step": 20 |
|
}, |
|
{ |
|
"completion_length": 16.050347328186035, |
|
"epoch": 1.1751824817518248, |
|
"grad_norm": 0.74057936668396, |
|
"kl": 0.1895751953125, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.0076, |
|
"reward": 0.75, |
|
"reward_std": 0.31531847827136517, |
|
"rewards/semantic_entropy": 0.75, |
|
"step": 21 |
|
}, |
|
{ |
|
"completion_length": 16.63194465637207, |
|
"epoch": 1.2335766423357664, |
|
"grad_norm": 0.4329465627670288, |
|
"kl": 0.22442626953125, |
|
"learning_rate": 1.4383711467890773e-06, |
|
"loss": 0.009, |
|
"reward": 0.734375, |
|
"reward_std": 0.2730935662984848, |
|
"rewards/semantic_entropy": 0.734375, |
|
"step": 22 |
|
}, |
|
{ |
|
"completion_length": 19.901041984558105, |
|
"epoch": 1.2919708029197081, |
|
"grad_norm": 0.652396023273468, |
|
"kl": 0.12689208984375, |
|
"learning_rate": 1.374606593415912e-06, |
|
"loss": 0.0051, |
|
"reward": 0.7239583507180214, |
|
"reward_std": 0.33322223369032145, |
|
"rewards/semantic_entropy": 0.7239583507180214, |
|
"step": 23 |
|
}, |
|
{ |
|
"completion_length": 17.720486402511597, |
|
"epoch": 1.3503649635036497, |
|
"grad_norm": 0.5013155937194824, |
|
"kl": 0.1468505859375, |
|
"learning_rate": 1.3090169943749473e-06, |
|
"loss": 0.0059, |
|
"reward": 0.75, |
|
"reward_std": 0.29686133936047554, |
|
"rewards/semantic_entropy": 0.75, |
|
"step": 24 |
|
}, |
|
{ |
|
"completion_length": 19.574653148651123, |
|
"epoch": 1.4087591240875912, |
|
"grad_norm": 0.5545840263366699, |
|
"kl": 0.14691162109375, |
|
"learning_rate": 1.2419218955996676e-06, |
|
"loss": 0.0059, |
|
"reward": 0.7378472313284874, |
|
"reward_std": 0.29906335659325123, |
|
"rewards/semantic_entropy": 0.7378472313284874, |
|
"step": 25 |
|
}, |
|
{ |
|
"completion_length": 15.946180701255798, |
|
"epoch": 1.4671532846715327, |
|
"grad_norm": 0.5206867456436157, |
|
"kl": 0.1771240234375, |
|
"learning_rate": 1.1736481776669305e-06, |
|
"loss": 0.0071, |
|
"reward": 0.8107638955116272, |
|
"reward_std": 0.24001463688910007, |
|
"rewards/semantic_entropy": 0.8107638955116272, |
|
"step": 26 |
|
}, |
|
{ |
|
"completion_length": 18.86805558204651, |
|
"epoch": 1.5255474452554745, |
|
"grad_norm": 0.7857072949409485, |
|
"kl": 0.1768798828125, |
|
"learning_rate": 1.1045284632676535e-06, |
|
"loss": 0.0071, |
|
"reward": 0.7552083432674408, |
|
"reward_std": 0.3070409968495369, |
|
"rewards/semantic_entropy": 0.7552083432674408, |
|
"step": 27 |
|
}, |
|
{ |
|
"completion_length": 19.182291865348816, |
|
"epoch": 1.583941605839416, |
|
"grad_norm": 0.6400216221809387, |
|
"kl": 0.2479248046875, |
|
"learning_rate": 1.034899496702501e-06, |
|
"loss": 0.0099, |
|
"reward": 0.7534722238779068, |
|
"reward_std": 0.2666480904445052, |
|
"rewards/semantic_entropy": 0.7534722238779068, |
|
"step": 28 |
|
}, |
|
{ |
|
"completion_length": 16.279513955116272, |
|
"epoch": 1.6423357664233578, |
|
"grad_norm": 0.6639309525489807, |
|
"kl": 0.14581298828125, |
|
"learning_rate": 9.651005032974993e-07, |
|
"loss": 0.0058, |
|
"reward": 0.8368055522441864, |
|
"reward_std": 0.19957617949694395, |
|
"rewards/semantic_entropy": 0.8368055522441864, |
|
"step": 29 |
|
}, |
|
{ |
|
"completion_length": 19.109375, |
|
"epoch": 1.7007299270072993, |
|
"grad_norm": 0.6287054419517517, |
|
"kl": 0.17852783203125, |
|
"learning_rate": 8.954715367323466e-07, |
|
"loss": 0.0071, |
|
"reward": 0.798611119389534, |
|
"reward_std": 0.2921114172786474, |
|
"rewards/semantic_entropy": 0.798611119389534, |
|
"step": 30 |
|
}, |
|
{ |
|
"completion_length": 16.519097566604614, |
|
"epoch": 1.7591240875912408, |
|
"grad_norm": 0.6585462689399719, |
|
"kl": 0.157318115234375, |
|
"learning_rate": 8.263518223330696e-07, |
|
"loss": 0.0063, |
|
"reward": 0.7708333358168602, |
|
"reward_std": 0.2721500750631094, |
|
"rewards/semantic_entropy": 0.7708333358168602, |
|
"step": 31 |
|
}, |
|
{ |
|
"completion_length": 18.239583730697632, |
|
"epoch": 1.8175182481751824, |
|
"grad_norm": 0.6048464775085449, |
|
"kl": 0.147125244140625, |
|
"learning_rate": 7.580781044003324e-07, |
|
"loss": 0.0059, |
|
"reward": 0.7326388955116272, |
|
"reward_std": 0.29634279757738113, |
|
"rewards/semantic_entropy": 0.7326388955116272, |
|
"step": 32 |
|
}, |
|
{ |
|
"completion_length": 19.44270896911621, |
|
"epoch": 1.8759124087591241, |
|
"grad_norm": 0.430084228515625, |
|
"kl": 0.1063232421875, |
|
"learning_rate": 6.909830056250526e-07, |
|
"loss": 0.0043, |
|
"reward": 0.774305559694767, |
|
"reward_std": 0.27460889145731926, |
|
"rewards/semantic_entropy": 0.774305559694767, |
|
"step": 33 |
|
}, |
|
{ |
|
"completion_length": 16.817708730697632, |
|
"epoch": 1.9343065693430657, |
|
"grad_norm": 0.40789568424224854, |
|
"kl": 0.070526123046875, |
|
"learning_rate": 6.253934065840879e-07, |
|
"loss": 0.0028, |
|
"reward": 0.8107639029622078, |
|
"reward_std": 0.2299627624452114, |
|
"rewards/semantic_entropy": 0.8107639029622078, |
|
"step": 34 |
|
}, |
|
{ |
|
"completion_length": 20.5625, |
|
"epoch": 1.9927007299270074, |
|
"grad_norm": 0.4874630868434906, |
|
"kl": 0.13616943359375, |
|
"learning_rate": 5.616288532109224e-07, |
|
"loss": 0.0054, |
|
"reward": 0.7361111044883728, |
|
"reward_std": 0.3129718992859125, |
|
"rewards/semantic_entropy": 0.7361111044883728, |
|
"step": 35 |
|
}, |
|
{ |
|
"completion_length": 34.0, |
|
"epoch": 2.0, |
|
"grad_norm": 0.4874630868434906, |
|
"kl": 0.036865234375, |
|
"learning_rate": 5.000000000000002e-07, |
|
"loss": 0.0023, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy": 1.0, |
|
"step": 36 |
|
}, |
|
{ |
|
"completion_length": 18.72743058204651, |
|
"epoch": 2.0583941605839415, |
|
"grad_norm": 0.5846592783927917, |
|
"kl": 0.12078857421875, |
|
"learning_rate": 4.408070965292533e-07, |
|
"loss": 0.0048, |
|
"reward": 0.7465277835726738, |
|
"reward_std": 0.31662504002451897, |
|
"rewards/semantic_entropy": 0.7465277835726738, |
|
"step": 37 |
|
}, |
|
{ |
|
"completion_length": 17.776041984558105, |
|
"epoch": 2.116788321167883, |
|
"grad_norm": 0.6230023503303528, |
|
"kl": 0.21380615234375, |
|
"learning_rate": 3.843385246743417e-07, |
|
"loss": 0.0085, |
|
"reward": 0.7482638880610466, |
|
"reward_std": 0.28513461723923683, |
|
"rewards/semantic_entropy": 0.7482638880610466, |
|
"step": 38 |
|
}, |
|
{ |
|
"completion_length": 19.468750476837158, |
|
"epoch": 2.1751824817518246, |
|
"grad_norm": 0.6272074580192566, |
|
"kl": 0.096343994140625, |
|
"learning_rate": 3.308693936411421e-07, |
|
"loss": 0.0039, |
|
"reward": 0.7291666716337204, |
|
"reward_std": 0.32780924811959267, |
|
"rewards/semantic_entropy": 0.7291666716337204, |
|
"step": 39 |
|
}, |
|
{ |
|
"completion_length": 17.072916984558105, |
|
"epoch": 2.2335766423357666, |
|
"grad_norm": 0.5045897960662842, |
|
"kl": 0.077362060546875, |
|
"learning_rate": 2.8066019966134904e-07, |
|
"loss": 0.0031, |
|
"reward": 0.8090277835726738, |
|
"reward_std": 0.18912154575809836, |
|
"rewards/semantic_entropy": 0.8090277835726738, |
|
"step": 40 |
|
}, |
|
{ |
|
"completion_length": 18.598958492279053, |
|
"epoch": 2.291970802919708, |
|
"grad_norm": 0.48655831813812256, |
|
"kl": 0.09051513671875, |
|
"learning_rate": 2.339555568810221e-07, |
|
"loss": 0.0036, |
|
"reward": 0.7760416641831398, |
|
"reward_std": 0.26947965286672115, |
|
"rewards/semantic_entropy": 0.7760416641831398, |
|
"step": 41 |
|
}, |
|
{ |
|
"completion_length": 17.996527791023254, |
|
"epoch": 2.3503649635036497, |
|
"grad_norm": 0.5561981797218323, |
|
"kl": 0.0677490234375, |
|
"learning_rate": 1.9098300562505264e-07, |
|
"loss": 0.0027, |
|
"reward": 0.7638888955116272, |
|
"reward_std": 0.2775236200541258, |
|
"rewards/semantic_entropy": 0.7638888955116272, |
|
"step": 42 |
|
}, |
|
{ |
|
"completion_length": 17.697916746139526, |
|
"epoch": 2.408759124087591, |
|
"grad_norm": 0.5540634989738464, |
|
"kl": 0.15765380859375, |
|
"learning_rate": 1.5195190384357404e-07, |
|
"loss": 0.0063, |
|
"reward": 0.774305559694767, |
|
"reward_std": 0.2512203995138407, |
|
"rewards/semantic_entropy": 0.774305559694767, |
|
"step": 43 |
|
}, |
|
{ |
|
"completion_length": 18.682291865348816, |
|
"epoch": 2.4671532846715327, |
|
"grad_norm": 0.4448810815811157, |
|
"kl": 0.11761474609375, |
|
"learning_rate": 1.1705240714107301e-07, |
|
"loss": 0.0047, |
|
"reward": 0.7447916716337204, |
|
"reward_std": 0.2556060552597046, |
|
"rewards/semantic_entropy": 0.7447916716337204, |
|
"step": 44 |
|
}, |
|
{ |
|
"completion_length": 16.9149307012558, |
|
"epoch": 2.5255474452554747, |
|
"grad_norm": 0.5861647725105286, |
|
"kl": 0.09136962890625, |
|
"learning_rate": 8.645454235739902e-08, |
|
"loss": 0.0037, |
|
"reward": 0.798611119389534, |
|
"reward_std": 0.2555408189073205, |
|
"rewards/semantic_entropy": 0.798611119389534, |
|
"step": 45 |
|
}, |
|
{ |
|
"completion_length": 17.83506965637207, |
|
"epoch": 2.5839416058394162, |
|
"grad_norm": 0.4424433708190918, |
|
"kl": 0.1358642578125, |
|
"learning_rate": 6.030737921409168e-08, |
|
"loss": 0.0054, |
|
"reward": 0.7934027835726738, |
|
"reward_std": 0.27376995235681534, |
|
"rewards/semantic_entropy": 0.7934027835726738, |
|
"step": 46 |
|
}, |
|
{ |
|
"completion_length": 18.23263943195343, |
|
"epoch": 2.6423357664233578, |
|
"grad_norm": 0.4687785804271698, |
|
"kl": 0.10284423828125, |
|
"learning_rate": 3.87383040616811e-08, |
|
"loss": 0.0041, |
|
"reward": 0.7934027761220932, |
|
"reward_std": 0.2595429290086031, |
|
"rewards/semantic_entropy": 0.7934027761220932, |
|
"step": 47 |
|
}, |
|
{ |
|
"completion_length": 18.30381965637207, |
|
"epoch": 2.7007299270072993, |
|
"grad_norm": 0.5063730478286743, |
|
"kl": 0.10589599609375, |
|
"learning_rate": 2.185239926619431e-08, |
|
"loss": 0.0042, |
|
"reward": 0.758680559694767, |
|
"reward_std": 0.29030087031424046, |
|
"rewards/semantic_entropy": 0.758680559694767, |
|
"step": 48 |
|
}, |
|
{ |
|
"completion_length": 18.715277910232544, |
|
"epoch": 2.759124087591241, |
|
"grad_norm": 0.5857909321784973, |
|
"kl": 0.117889404296875, |
|
"learning_rate": 9.731931258429638e-09, |
|
"loss": 0.0047, |
|
"reward": 0.7777777835726738, |
|
"reward_std": 0.2577416365966201, |
|
"rewards/semantic_entropy": 0.7777777835726738, |
|
"step": 49 |
|
}, |
|
{ |
|
"completion_length": 17.322916626930237, |
|
"epoch": 2.8175182481751824, |
|
"grad_norm": 0.4949776828289032, |
|
"kl": 0.09075927734375, |
|
"learning_rate": 2.435949740175802e-09, |
|
"loss": 0.0036, |
|
"reward": 0.760416679084301, |
|
"reward_std": 0.2918264754116535, |
|
"rewards/semantic_entropy": 0.760416679084301, |
|
"step": 50 |
|
}, |
|
{ |
|
"completion_length": 19.02256977558136, |
|
"epoch": 2.875912408759124, |
|
"grad_norm": 0.554519772529602, |
|
"kl": 0.09710693359375, |
|
"learning_rate": 0.0, |
|
"loss": 0.0039, |
|
"reward": 0.7395833358168602, |
|
"reward_std": 0.2774972226470709, |
|
"rewards/semantic_entropy": 0.7395833358168602, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 2.875912408759124, |
|
"step": 51, |
|
"total_flos": 0.0, |
|
"train_loss": 0.004883308103913604, |
|
"train_runtime": 5205.8109, |
|
"train_samples_per_second": 0.471, |
|
"train_steps_per_second": 0.01 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 51, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|