|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9927007299270073, |
|
"eval_steps": 100, |
|
"global_step": 17, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"completion_length": 31.543402910232544, |
|
"epoch": 0.058394160583941604, |
|
"grad_norm": 0.6463247537612915, |
|
"kl": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.4809027835726738, |
|
"reward_std": 0.4057047627866268, |
|
"rewards/semantic_entropy": 0.4809027835726738, |
|
"step": 1 |
|
}, |
|
{ |
|
"completion_length": 27.753472566604614, |
|
"epoch": 0.11678832116788321, |
|
"grad_norm": 0.31535470485687256, |
|
"kl": 0.0, |
|
"learning_rate": 2e-06, |
|
"loss": 0.0, |
|
"reward": 0.5850694514811039, |
|
"reward_std": 0.3451368249952793, |
|
"rewards/semantic_entropy": 0.5850694514811039, |
|
"step": 2 |
|
}, |
|
{ |
|
"completion_length": 32.34895849227905, |
|
"epoch": 0.17518248175182483, |
|
"grad_norm": 0.38025209307670593, |
|
"kl": 0.0008878707885742188, |
|
"learning_rate": 1.9781476007338054e-06, |
|
"loss": 0.0, |
|
"reward": 0.4791666679084301, |
|
"reward_std": 0.38540373742580414, |
|
"rewards/semantic_entropy": 0.4791666679084301, |
|
"step": 3 |
|
}, |
|
{ |
|
"completion_length": 27.75607681274414, |
|
"epoch": 0.23357664233576642, |
|
"grad_norm": 0.7206792235374451, |
|
"kl": 0.0020771026611328125, |
|
"learning_rate": 1.9135454576426007e-06, |
|
"loss": 0.0, |
|
"reward": 0.5998263880610466, |
|
"reward_std": 0.348458593711257, |
|
"rewards/semantic_entropy": 0.5998263880610466, |
|
"step": 4 |
|
}, |
|
{ |
|
"completion_length": 29.860243558883667, |
|
"epoch": 0.291970802919708, |
|
"grad_norm": 0.5087810754776001, |
|
"kl": 0.0018243789672851562, |
|
"learning_rate": 1.8090169943749474e-06, |
|
"loss": 0.0, |
|
"reward": 0.5034722238779068, |
|
"reward_std": 0.39532990381121635, |
|
"rewards/semantic_entropy": 0.5034722238779068, |
|
"step": 5 |
|
}, |
|
{ |
|
"completion_length": 28.811631679534912, |
|
"epoch": 0.35036496350364965, |
|
"grad_norm": 0.4511905610561371, |
|
"kl": 0.0026292800903320312, |
|
"learning_rate": 1.669130606358858e-06, |
|
"loss": 0.0, |
|
"reward": 0.5295138992369175, |
|
"reward_std": 0.3658079691231251, |
|
"rewards/semantic_entropy": 0.5295138992369175, |
|
"step": 6 |
|
}, |
|
{ |
|
"completion_length": 31.047743320465088, |
|
"epoch": 0.40875912408759124, |
|
"grad_norm": 0.46206873655319214, |
|
"kl": 0.003326416015625, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.0, |
|
"reward": 0.4973958395421505, |
|
"reward_std": 0.39973679929971695, |
|
"rewards/semantic_entropy": 0.4973958395421505, |
|
"step": 7 |
|
}, |
|
{ |
|
"completion_length": 30.878472328186035, |
|
"epoch": 0.46715328467153283, |
|
"grad_norm": 0.4796462655067444, |
|
"kl": 0.004016876220703125, |
|
"learning_rate": 1.3090169943749473e-06, |
|
"loss": 0.0, |
|
"reward": 0.5217013955116272, |
|
"reward_std": 0.36933426558971405, |
|
"rewards/semantic_entropy": 0.5217013955116272, |
|
"step": 8 |
|
}, |
|
{ |
|
"completion_length": 34.5590283870697, |
|
"epoch": 0.5255474452554745, |
|
"grad_norm": 0.43365350365638733, |
|
"kl": 0.0037174224853515625, |
|
"learning_rate": 1.1045284632676535e-06, |
|
"loss": 0.0, |
|
"reward": 0.4444444589316845, |
|
"reward_std": 0.4142540544271469, |
|
"rewards/semantic_entropy": 0.4444444589316845, |
|
"step": 9 |
|
}, |
|
{ |
|
"completion_length": 31.54600763320923, |
|
"epoch": 0.583941605839416, |
|
"grad_norm": 0.3461940586566925, |
|
"kl": 0.0054531097412109375, |
|
"learning_rate": 8.954715367323466e-07, |
|
"loss": 0.0, |
|
"reward": 0.5312499962747097, |
|
"reward_std": 0.33972141705453396, |
|
"rewards/semantic_entropy": 0.5312499962747097, |
|
"step": 10 |
|
}, |
|
{ |
|
"completion_length": 25.934895992279053, |
|
"epoch": 0.6423357664233577, |
|
"grad_norm": 0.7275694012641907, |
|
"kl": 0.0059986114501953125, |
|
"learning_rate": 6.909830056250526e-07, |
|
"loss": 0.0, |
|
"reward": 0.5425347350537777, |
|
"reward_std": 0.39345845952630043, |
|
"rewards/semantic_entropy": 0.5425347350537777, |
|
"step": 11 |
|
}, |
|
{ |
|
"completion_length": 27.58420157432556, |
|
"epoch": 0.7007299270072993, |
|
"grad_norm": 0.578926146030426, |
|
"kl": 0.010272979736328125, |
|
"learning_rate": 5.000000000000002e-07, |
|
"loss": 0.0, |
|
"reward": 0.5564236119389534, |
|
"reward_std": 0.38245424441993237, |
|
"rewards/semantic_entropy": 0.5564236119389534, |
|
"step": 12 |
|
}, |
|
{ |
|
"completion_length": 32.76302146911621, |
|
"epoch": 0.7591240875912408, |
|
"grad_norm": 0.3081968128681183, |
|
"kl": 0.0044574737548828125, |
|
"learning_rate": 3.308693936411421e-07, |
|
"loss": 0.0, |
|
"reward": 0.4583333395421505, |
|
"reward_std": 0.39592672139406204, |
|
"rewards/semantic_entropy": 0.4583333395421505, |
|
"step": 13 |
|
}, |
|
{ |
|
"completion_length": 28.50086808204651, |
|
"epoch": 0.8175182481751825, |
|
"grad_norm": 0.4480704367160797, |
|
"kl": 0.006603240966796875, |
|
"learning_rate": 1.9098300562505264e-07, |
|
"loss": 0.0, |
|
"reward": 0.4782986231148243, |
|
"reward_std": 0.3709658682346344, |
|
"rewards/semantic_entropy": 0.4782986231148243, |
|
"step": 14 |
|
}, |
|
{ |
|
"completion_length": 28.36545181274414, |
|
"epoch": 0.8759124087591241, |
|
"grad_norm": 0.2726985514163971, |
|
"kl": 0.00714111328125, |
|
"learning_rate": 8.645454235739902e-08, |
|
"loss": 0.0, |
|
"reward": 0.572048619389534, |
|
"reward_std": 0.3576664440333843, |
|
"rewards/semantic_entropy": 0.572048619389534, |
|
"step": 15 |
|
}, |
|
{ |
|
"completion_length": 29.555555820465088, |
|
"epoch": 0.9343065693430657, |
|
"grad_norm": 0.5542200207710266, |
|
"kl": 0.011875152587890625, |
|
"learning_rate": 2.185239926619431e-08, |
|
"loss": 0.0, |
|
"reward": 0.5442708358168602, |
|
"reward_std": 0.4052053317427635, |
|
"rewards/semantic_entropy": 0.5442708358168602, |
|
"step": 16 |
|
}, |
|
{ |
|
"completion_length": 28.62326431274414, |
|
"epoch": 0.9927007299270073, |
|
"grad_norm": 0.4658753275871277, |
|
"kl": 0.010679244995117188, |
|
"learning_rate": 0.0, |
|
"loss": 0.0, |
|
"reward": 0.564236119389534, |
|
"reward_std": 0.3762592002749443, |
|
"rewards/semantic_entropy": 0.564236119389534, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.9927007299270073, |
|
"step": 17, |
|
"total_flos": 0.0, |
|
"train_loss": 2.8922741848572622e-08, |
|
"train_runtime": 6158.5867, |
|
"train_samples_per_second": 0.133, |
|
"train_steps_per_second": 0.003 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 17, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|