|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.875912408759124, |
|
"eval_steps": 100, |
|
"global_step": 51, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"completion_length": 29.870370149612427, |
|
"epoch": 0.058394160583941604, |
|
"grad_norm": 0.5967049598693848, |
|
"kl": 0.0, |
|
"learning_rate": 3.333333333333333e-07, |
|
"loss": 0.0, |
|
"reward": 0.526620376855135, |
|
"reward_std": 0.372432217001915, |
|
"rewards/semantic_entropy": 0.526620376855135, |
|
"step": 1 |
|
}, |
|
{ |
|
"completion_length": 26.355324506759644, |
|
"epoch": 0.11678832116788321, |
|
"grad_norm": 0.27608758211135864, |
|
"kl": 0.0, |
|
"learning_rate": 6.666666666666666e-07, |
|
"loss": 0.0, |
|
"reward": 0.6192129664123058, |
|
"reward_std": 0.3255546223372221, |
|
"rewards/semantic_entropy": 0.6192129664123058, |
|
"step": 2 |
|
}, |
|
{ |
|
"completion_length": 31.85300898551941, |
|
"epoch": 0.17518248175182483, |
|
"grad_norm": 0.3493832051753998, |
|
"kl": 0.0005927085876464844, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.49884259700775146, |
|
"reward_std": 0.36885398998856544, |
|
"rewards/semantic_entropy": 0.49884259700775146, |
|
"step": 3 |
|
}, |
|
{ |
|
"completion_length": 27.081018686294556, |
|
"epoch": 0.23357664233576642, |
|
"grad_norm": 0.8119116425514221, |
|
"kl": 0.0008134841918945312, |
|
"learning_rate": 1.3333333333333332e-06, |
|
"loss": 0.0, |
|
"reward": 0.6111111231148243, |
|
"reward_std": 0.3842464517802, |
|
"rewards/semantic_entropy": 0.6111111231148243, |
|
"step": 4 |
|
}, |
|
{ |
|
"completion_length": 30.222222089767456, |
|
"epoch": 0.291970802919708, |
|
"grad_norm": 0.5014259815216064, |
|
"kl": 0.0007073879241943359, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 0.0, |
|
"reward": 0.6006944514811039, |
|
"reward_std": 0.3485470600426197, |
|
"rewards/semantic_entropy": 0.6006944514811039, |
|
"step": 5 |
|
}, |
|
{ |
|
"completion_length": 29.46759271621704, |
|
"epoch": 0.35036496350364965, |
|
"grad_norm": 0.5462167859077454, |
|
"kl": 0.0006594657897949219, |
|
"learning_rate": 2e-06, |
|
"loss": 0.0, |
|
"reward": 0.5115740709006786, |
|
"reward_std": 0.3930557183921337, |
|
"rewards/semantic_entropy": 0.5115740709006786, |
|
"step": 6 |
|
}, |
|
{ |
|
"completion_length": 30.0775465965271, |
|
"epoch": 0.40875912408759124, |
|
"grad_norm": 0.3974973261356354, |
|
"kl": 0.0009469985961914062, |
|
"learning_rate": 1.997564050259824e-06, |
|
"loss": 0.0, |
|
"reward": 0.6053240820765495, |
|
"reward_std": 0.3901033569127321, |
|
"rewards/semantic_entropy": 0.6053240820765495, |
|
"step": 7 |
|
}, |
|
{ |
|
"completion_length": 30.643518686294556, |
|
"epoch": 0.46715328467153283, |
|
"grad_norm": 0.4026181399822235, |
|
"kl": 0.0013263225555419922, |
|
"learning_rate": 1.99026806874157e-06, |
|
"loss": 0.0001, |
|
"reward": 0.5821759402751923, |
|
"reward_std": 0.3471612483263016, |
|
"rewards/semantic_entropy": 0.5821759402751923, |
|
"step": 8 |
|
}, |
|
{ |
|
"completion_length": 36.761574029922485, |
|
"epoch": 0.5255474452554745, |
|
"grad_norm": 0.46980857849121094, |
|
"kl": 0.0016498565673828125, |
|
"learning_rate": 1.9781476007338054e-06, |
|
"loss": 0.0001, |
|
"reward": 0.4861111231148243, |
|
"reward_std": 0.43300675973296165, |
|
"rewards/semantic_entropy": 0.4861111231148243, |
|
"step": 9 |
|
}, |
|
{ |
|
"completion_length": 32.00000071525574, |
|
"epoch": 0.583941605839416, |
|
"grad_norm": 0.3003987669944763, |
|
"kl": 0.0032224655151367188, |
|
"learning_rate": 1.9612616959383188e-06, |
|
"loss": 0.0001, |
|
"reward": 0.5763888917863369, |
|
"reward_std": 0.3751811906695366, |
|
"rewards/semantic_entropy": 0.5763888917863369, |
|
"step": 10 |
|
}, |
|
{ |
|
"completion_length": 26.78703737258911, |
|
"epoch": 0.6423357664233577, |
|
"grad_norm": 0.6452711224555969, |
|
"kl": 0.007064342498779297, |
|
"learning_rate": 1.9396926207859082e-06, |
|
"loss": 0.0003, |
|
"reward": 0.6087963059544563, |
|
"reward_std": 0.3837307542562485, |
|
"rewards/semantic_entropy": 0.6087963059544563, |
|
"step": 11 |
|
}, |
|
{ |
|
"completion_length": 28.517361164093018, |
|
"epoch": 0.7007299270072993, |
|
"grad_norm": 0.5347678065299988, |
|
"kl": 0.011034965515136719, |
|
"learning_rate": 1.9135454576426007e-06, |
|
"loss": 0.0004, |
|
"reward": 0.5949074178934097, |
|
"reward_std": 0.38593913801014423, |
|
"rewards/semantic_entropy": 0.5949074178934097, |
|
"step": 12 |
|
}, |
|
{ |
|
"completion_length": 32.70833349227905, |
|
"epoch": 0.7591240875912408, |
|
"grad_norm": 0.3103489875793457, |
|
"kl": 0.006297111511230469, |
|
"learning_rate": 1.8829475928589268e-06, |
|
"loss": 0.0003, |
|
"reward": 0.5405092723667622, |
|
"reward_std": 0.3787935618311167, |
|
"rewards/semantic_entropy": 0.5405092723667622, |
|
"step": 13 |
|
}, |
|
{ |
|
"completion_length": 28.361111402511597, |
|
"epoch": 0.8175182481751825, |
|
"grad_norm": 0.341845840215683, |
|
"kl": 0.011461257934570312, |
|
"learning_rate": 1.8480480961564257e-06, |
|
"loss": 0.0005, |
|
"reward": 0.5798611156642437, |
|
"reward_std": 0.37119201570749283, |
|
"rewards/semantic_entropy": 0.5798611156642437, |
|
"step": 14 |
|
}, |
|
{ |
|
"completion_length": 27.28703737258911, |
|
"epoch": 0.8759124087591241, |
|
"grad_norm": 0.3116164803504944, |
|
"kl": 0.00528717041015625, |
|
"learning_rate": 1.8090169943749474e-06, |
|
"loss": 0.0002, |
|
"reward": 0.5891203731298447, |
|
"reward_std": 0.39476561546325684, |
|
"rewards/semantic_entropy": 0.5891203731298447, |
|
"step": 15 |
|
}, |
|
{ |
|
"completion_length": 27.549768924713135, |
|
"epoch": 0.9343065693430657, |
|
"grad_norm": 0.4672207236289978, |
|
"kl": 0.012537002563476562, |
|
"learning_rate": 1.766044443118978e-06, |
|
"loss": 0.0005, |
|
"reward": 0.6006944589316845, |
|
"reward_std": 0.3798612989485264, |
|
"rewards/semantic_entropy": 0.6006944589316845, |
|
"step": 16 |
|
}, |
|
{ |
|
"completion_length": 27.44560170173645, |
|
"epoch": 0.9927007299270073, |
|
"grad_norm": 0.4451679587364197, |
|
"kl": 0.026611328125, |
|
"learning_rate": 1.719339800338651e-06, |
|
"loss": 0.0011, |
|
"reward": 0.6435185112059116, |
|
"reward_std": 0.36070936545729637, |
|
"rewards/semantic_entropy": 0.6435185112059116, |
|
"step": 17 |
|
}, |
|
{ |
|
"completion_length": 16.0, |
|
"epoch": 1.0, |
|
"grad_norm": 0.4451679587364197, |
|
"kl": 0.08837890625, |
|
"learning_rate": 1.669130606358858e-06, |
|
"loss": 0.0001, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy": 1.0, |
|
"step": 18 |
|
}, |
|
{ |
|
"completion_length": 24.4120374917984, |
|
"epoch": 1.0583941605839415, |
|
"grad_norm": 0.3066134452819824, |
|
"kl": 0.04150390625, |
|
"learning_rate": 1.615661475325658e-06, |
|
"loss": 0.0017, |
|
"reward": 0.6423611082136631, |
|
"reward_std": 0.3426077160984278, |
|
"rewards/semantic_entropy": 0.6423611082136631, |
|
"step": 19 |
|
}, |
|
{ |
|
"completion_length": 24.349537134170532, |
|
"epoch": 1.1167883211678833, |
|
"grad_norm": 0.4213350713253021, |
|
"kl": 0.0197906494140625, |
|
"learning_rate": 1.5591929034707466e-06, |
|
"loss": 0.0008, |
|
"reward": 0.6168981604278088, |
|
"reward_std": 0.35933491215109825, |
|
"rewards/semantic_entropy": 0.6168981604278088, |
|
"step": 20 |
|
}, |
|
{ |
|
"completion_length": 24.63078737258911, |
|
"epoch": 1.1751824817518248, |
|
"grad_norm": 0.35187050700187683, |
|
"kl": 0.03948211669921875, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.0016, |
|
"reward": 0.6712963134050369, |
|
"reward_std": 0.3001057803630829, |
|
"rewards/semantic_entropy": 0.6712963134050369, |
|
"step": 21 |
|
}, |
|
{ |
|
"completion_length": 23.54745364189148, |
|
"epoch": 1.2335766423357664, |
|
"grad_norm": 0.30250656604766846, |
|
"kl": 0.02053070068359375, |
|
"learning_rate": 1.4383711467890773e-06, |
|
"loss": 0.0008, |
|
"reward": 0.7118055671453476, |
|
"reward_std": 0.2657315619289875, |
|
"rewards/semantic_entropy": 0.7118055671453476, |
|
"step": 22 |
|
}, |
|
{ |
|
"completion_length": 29.81944465637207, |
|
"epoch": 1.2919708029197081, |
|
"grad_norm": 0.5680757164955139, |
|
"kl": 0.0273590087890625, |
|
"learning_rate": 1.374606593415912e-06, |
|
"loss": 0.0011, |
|
"reward": 0.5763888992369175, |
|
"reward_std": 0.38675259053707123, |
|
"rewards/semantic_entropy": 0.5763888992369175, |
|
"step": 23 |
|
}, |
|
{ |
|
"completion_length": 24.825231790542603, |
|
"epoch": 1.3503649635036497, |
|
"grad_norm": 0.4038240611553192, |
|
"kl": 0.02518463134765625, |
|
"learning_rate": 1.3090169943749473e-06, |
|
"loss": 0.001, |
|
"reward": 0.6053240820765495, |
|
"reward_std": 0.3557727001607418, |
|
"rewards/semantic_entropy": 0.6053240820765495, |
|
"step": 24 |
|
}, |
|
{ |
|
"completion_length": 31.438657999038696, |
|
"epoch": 1.4087591240875912, |
|
"grad_norm": 0.3054307699203491, |
|
"kl": 0.01131439208984375, |
|
"learning_rate": 1.2419218955996676e-06, |
|
"loss": 0.0005, |
|
"reward": 0.5127314850687981, |
|
"reward_std": 0.41082172095775604, |
|
"rewards/semantic_entropy": 0.5127314850687981, |
|
"step": 25 |
|
}, |
|
{ |
|
"completion_length": 27.61805558204651, |
|
"epoch": 1.4671532846715327, |
|
"grad_norm": 0.3926026523113251, |
|
"kl": 0.017902374267578125, |
|
"learning_rate": 1.1736481776669305e-06, |
|
"loss": 0.0007, |
|
"reward": 0.6412037126719952, |
|
"reward_std": 0.3502417653799057, |
|
"rewards/semantic_entropy": 0.6412037126719952, |
|
"step": 26 |
|
}, |
|
{ |
|
"completion_length": 28.688657522201538, |
|
"epoch": 1.5255474452554745, |
|
"grad_norm": 0.31289467215538025, |
|
"kl": 0.018411636352539062, |
|
"learning_rate": 1.1045284632676535e-06, |
|
"loss": 0.0007, |
|
"reward": 0.6319444552063942, |
|
"reward_std": 0.36655068024992943, |
|
"rewards/semantic_entropy": 0.6319444552063942, |
|
"step": 27 |
|
}, |
|
{ |
|
"completion_length": 30.806713581085205, |
|
"epoch": 1.583941605839416, |
|
"grad_norm": 0.2388666570186615, |
|
"kl": 0.015939712524414062, |
|
"learning_rate": 1.034899496702501e-06, |
|
"loss": 0.0006, |
|
"reward": 0.6053240783512592, |
|
"reward_std": 0.3488955218344927, |
|
"rewards/semantic_entropy": 0.6053240783512592, |
|
"step": 28 |
|
}, |
|
{ |
|
"completion_length": 26.402778148651123, |
|
"epoch": 1.6423357664233578, |
|
"grad_norm": 0.6465526819229126, |
|
"kl": 0.016387939453125, |
|
"learning_rate": 9.651005032974993e-07, |
|
"loss": 0.0007, |
|
"reward": 0.6932870522141457, |
|
"reward_std": 0.30794387497007847, |
|
"rewards/semantic_entropy": 0.6932870522141457, |
|
"step": 29 |
|
}, |
|
{ |
|
"completion_length": 30.527778148651123, |
|
"epoch": 1.7007299270072993, |
|
"grad_norm": 0.2623353600502014, |
|
"kl": 0.020229339599609375, |
|
"learning_rate": 8.954715367323466e-07, |
|
"loss": 0.0008, |
|
"reward": 0.634259257465601, |
|
"reward_std": 0.304907551035285, |
|
"rewards/semantic_entropy": 0.634259257465601, |
|
"step": 30 |
|
}, |
|
{ |
|
"completion_length": 28.02430558204651, |
|
"epoch": 1.7591240875912408, |
|
"grad_norm": 0.26452869176864624, |
|
"kl": 0.01360321044921875, |
|
"learning_rate": 8.263518223330696e-07, |
|
"loss": 0.0005, |
|
"reward": 0.6238426044583321, |
|
"reward_std": 0.3165153060108423, |
|
"rewards/semantic_entropy": 0.6238426044583321, |
|
"step": 31 |
|
}, |
|
{ |
|
"completion_length": 29.64814805984497, |
|
"epoch": 1.8175182481751824, |
|
"grad_norm": 0.6390478014945984, |
|
"kl": 0.03284645080566406, |
|
"learning_rate": 7.580781044003324e-07, |
|
"loss": 0.0013, |
|
"reward": 0.6192129626870155, |
|
"reward_std": 0.3468264602124691, |
|
"rewards/semantic_entropy": 0.6192129626870155, |
|
"step": 32 |
|
}, |
|
{ |
|
"completion_length": 31.30439829826355, |
|
"epoch": 1.8759124087591241, |
|
"grad_norm": 0.24422068893909454, |
|
"kl": 0.010921478271484375, |
|
"learning_rate": 6.909830056250526e-07, |
|
"loss": 0.0004, |
|
"reward": 0.6435185223817825, |
|
"reward_std": 0.33870384842157364, |
|
"rewards/semantic_entropy": 0.6435185223817825, |
|
"step": 33 |
|
}, |
|
{ |
|
"completion_length": 29.41319465637207, |
|
"epoch": 1.9343065693430657, |
|
"grad_norm": 0.2262941151857376, |
|
"kl": 0.016880035400390625, |
|
"learning_rate": 6.253934065840879e-07, |
|
"loss": 0.0007, |
|
"reward": 0.6273148208856583, |
|
"reward_std": 0.3323148675262928, |
|
"rewards/semantic_entropy": 0.6273148208856583, |
|
"step": 34 |
|
}, |
|
{ |
|
"completion_length": 28.489583492279053, |
|
"epoch": 1.9927007299270074, |
|
"grad_norm": 0.276526540517807, |
|
"kl": 0.01245880126953125, |
|
"learning_rate": 5.616288532109224e-07, |
|
"loss": 0.0005, |
|
"reward": 0.6238425932824612, |
|
"reward_std": 0.3457994442433119, |
|
"rewards/semantic_entropy": 0.6238425932824612, |
|
"step": 35 |
|
}, |
|
{ |
|
"completion_length": 43.0, |
|
"epoch": 2.0, |
|
"grad_norm": 0.276526540517807, |
|
"kl": 0.00799560546875, |
|
"learning_rate": 5.000000000000002e-07, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.4608885943889618, |
|
"rewards/semantic_entropy": 0.0, |
|
"step": 36 |
|
}, |
|
{ |
|
"completion_length": 29.586805820465088, |
|
"epoch": 2.0583941605839415, |
|
"grad_norm": 0.2671959400177002, |
|
"kl": 0.018100738525390625, |
|
"learning_rate": 4.408070965292533e-07, |
|
"loss": 0.0007, |
|
"reward": 0.6157407388091087, |
|
"reward_std": 0.36300016567111015, |
|
"rewards/semantic_entropy": 0.6157407388091087, |
|
"step": 37 |
|
}, |
|
{ |
|
"completion_length": 25.49074101448059, |
|
"epoch": 2.116788321167883, |
|
"grad_norm": 0.2453327476978302, |
|
"kl": 0.0186920166015625, |
|
"learning_rate": 3.843385246743417e-07, |
|
"loss": 0.0007, |
|
"reward": 0.6689814887940884, |
|
"reward_std": 0.2830717135220766, |
|
"rewards/semantic_entropy": 0.6689814887940884, |
|
"step": 38 |
|
}, |
|
{ |
|
"completion_length": 30.723379850387573, |
|
"epoch": 2.1751824817518246, |
|
"grad_norm": 0.44519904255867004, |
|
"kl": 0.02108001708984375, |
|
"learning_rate": 3.308693936411421e-07, |
|
"loss": 0.0008, |
|
"reward": 0.6018518693745136, |
|
"reward_std": 0.32297887466847897, |
|
"rewards/semantic_entropy": 0.6018518693745136, |
|
"step": 39 |
|
}, |
|
{ |
|
"completion_length": 24.181713104248047, |
|
"epoch": 2.2335766423357666, |
|
"grad_norm": 0.29531192779541016, |
|
"kl": 0.020915985107421875, |
|
"learning_rate": 2.8066019966134904e-07, |
|
"loss": 0.0008, |
|
"reward": 0.6793981567025185, |
|
"reward_std": 0.3183311792090535, |
|
"rewards/semantic_entropy": 0.6793981567025185, |
|
"step": 40 |
|
}, |
|
{ |
|
"completion_length": 27.152777671813965, |
|
"epoch": 2.291970802919708, |
|
"grad_norm": 0.3067900836467743, |
|
"kl": 0.01100921630859375, |
|
"learning_rate": 2.339555568810221e-07, |
|
"loss": 0.0004, |
|
"reward": 0.6238426119089127, |
|
"reward_std": 0.36597814224660397, |
|
"rewards/semantic_entropy": 0.6238426119089127, |
|
"step": 41 |
|
}, |
|
{ |
|
"completion_length": 28.2928249835968, |
|
"epoch": 2.3503649635036497, |
|
"grad_norm": 0.27559277415275574, |
|
"kl": 0.012683868408203125, |
|
"learning_rate": 1.9098300562505264e-07, |
|
"loss": 0.0005, |
|
"reward": 0.6967592611908913, |
|
"reward_std": 0.29866353049874306, |
|
"rewards/semantic_entropy": 0.6967592611908913, |
|
"step": 42 |
|
}, |
|
{ |
|
"completion_length": 26.355324029922485, |
|
"epoch": 2.408759124087591, |
|
"grad_norm": 0.3306453227996826, |
|
"kl": 0.02254486083984375, |
|
"learning_rate": 1.5195190384357404e-07, |
|
"loss": 0.0009, |
|
"reward": 0.6990740746259689, |
|
"reward_std": 0.30733868665993214, |
|
"rewards/semantic_entropy": 0.6990740746259689, |
|
"step": 43 |
|
}, |
|
{ |
|
"completion_length": 27.85185170173645, |
|
"epoch": 2.4671532846715327, |
|
"grad_norm": 0.37372887134552, |
|
"kl": 0.03179931640625, |
|
"learning_rate": 1.1705240714107301e-07, |
|
"loss": 0.0013, |
|
"reward": 0.6562500074505806, |
|
"reward_std": 0.28820149786770344, |
|
"rewards/semantic_entropy": 0.6562500074505806, |
|
"step": 44 |
|
}, |
|
{ |
|
"completion_length": 24.635416746139526, |
|
"epoch": 2.5255474452554747, |
|
"grad_norm": 0.3697076439857483, |
|
"kl": 0.015590667724609375, |
|
"learning_rate": 8.645454235739902e-08, |
|
"loss": 0.0006, |
|
"reward": 0.7037037089467049, |
|
"reward_std": 0.2801394369453192, |
|
"rewards/semantic_entropy": 0.7037037089467049, |
|
"step": 45 |
|
}, |
|
{ |
|
"completion_length": 26.35185217857361, |
|
"epoch": 2.5839416058394162, |
|
"grad_norm": 0.31710338592529297, |
|
"kl": 0.03106689453125, |
|
"learning_rate": 6.030737921409168e-08, |
|
"loss": 0.0012, |
|
"reward": 0.6574074104428291, |
|
"reward_std": 0.33865234442055225, |
|
"rewards/semantic_entropy": 0.6574074104428291, |
|
"step": 46 |
|
}, |
|
{ |
|
"completion_length": 26.200231552124023, |
|
"epoch": 2.6423357664233578, |
|
"grad_norm": 0.2386447936296463, |
|
"kl": 0.021045684814453125, |
|
"learning_rate": 3.87383040616811e-08, |
|
"loss": 0.0008, |
|
"reward": 0.6574074104428291, |
|
"reward_std": 0.30992276407778263, |
|
"rewards/semantic_entropy": 0.6574074104428291, |
|
"step": 47 |
|
}, |
|
{ |
|
"completion_length": 26.50810217857361, |
|
"epoch": 2.7007299270072993, |
|
"grad_norm": 0.5202280282974243, |
|
"kl": 0.02829742431640625, |
|
"learning_rate": 2.185239926619431e-08, |
|
"loss": 0.0011, |
|
"reward": 0.7141203731298447, |
|
"reward_std": 0.28790368139743805, |
|
"rewards/semantic_entropy": 0.7141203731298447, |
|
"step": 48 |
|
}, |
|
{ |
|
"completion_length": 29.20138907432556, |
|
"epoch": 2.759124087591241, |
|
"grad_norm": 0.30392101407051086, |
|
"kl": 0.036502838134765625, |
|
"learning_rate": 9.731931258429638e-09, |
|
"loss": 0.0015, |
|
"reward": 0.6631944477558136, |
|
"reward_std": 0.31401310954242945, |
|
"rewards/semantic_entropy": 0.6631944477558136, |
|
"step": 49 |
|
}, |
|
{ |
|
"completion_length": 24.504629611968994, |
|
"epoch": 2.8175182481751824, |
|
"grad_norm": 0.2555871307849884, |
|
"kl": 0.017333984375, |
|
"learning_rate": 2.435949740175802e-09, |
|
"loss": 0.0007, |
|
"reward": 0.6504629701375961, |
|
"reward_std": 0.3291088491678238, |
|
"rewards/semantic_entropy": 0.6504629701375961, |
|
"step": 50 |
|
}, |
|
{ |
|
"completion_length": 30.652778148651123, |
|
"epoch": 2.875912408759124, |
|
"grad_norm": 0.3172709345817566, |
|
"kl": 0.014835357666015625, |
|
"learning_rate": 0.0, |
|
"loss": 0.0006, |
|
"reward": 0.5914351791143417, |
|
"reward_std": 0.3534049801528454, |
|
"rewards/semantic_entropy": 0.5914351791143417, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 2.875912408759124, |
|
"step": 51, |
|
"total_flos": 0.0, |
|
"train_loss": 0.0006088586881692043, |
|
"train_runtime": 10241.9671, |
|
"train_samples_per_second": 0.239, |
|
"train_steps_per_second": 0.005 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 51, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|