NicholasCorrado's picture
Model save
33c0f9c verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9963369963369964,
"eval_steps": 1000,
"global_step": 204,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.004884004884004884,
"grad_norm": 2.3243459220572165,
"learning_rate": 2.3809523809523807e-08,
"logits/chosen": -2.550273895263672,
"logits/rejected": -2.5806894302368164,
"logps/chosen": -424.7008056640625,
"logps/rejected": -390.49554443359375,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.04884004884004884,
"grad_norm": 2.4712584301903604,
"learning_rate": 2.3809523809523806e-07,
"logits/chosen": -2.4481005668640137,
"logits/rejected": -2.474926471710205,
"logps/chosen": -395.8595886230469,
"logps/rejected": -384.5038146972656,
"loss": 0.6931,
"rewards/accuracies": 0.4635416567325592,
"rewards/chosen": 0.00013807932555209845,
"rewards/margins": 0.0004333473916631192,
"rewards/rejected": -0.00029526810976676643,
"step": 10
},
{
"epoch": 0.09768009768009768,
"grad_norm": 2.318089253747947,
"learning_rate": 4.761904761904761e-07,
"logits/chosen": -2.445664167404175,
"logits/rejected": -2.4723546504974365,
"logps/chosen": -393.4665222167969,
"logps/rejected": -377.8502197265625,
"loss": 0.6905,
"rewards/accuracies": 0.67578125,
"rewards/chosen": 0.00608012406155467,
"rewards/margins": 0.005808630492538214,
"rewards/rejected": 0.00027149339439347386,
"step": 20
},
{
"epoch": 0.14652014652014653,
"grad_norm": 2.5283415680520225,
"learning_rate": 4.970219740227693e-07,
"logits/chosen": -2.494197368621826,
"logits/rejected": -2.5383658409118652,
"logps/chosen": -393.2831115722656,
"logps/rejected": -383.0456237792969,
"loss": 0.6754,
"rewards/accuracies": 0.807812511920929,
"rewards/chosen": 0.03202961012721062,
"rewards/margins": 0.037289537489414215,
"rewards/rejected": -0.005259926896542311,
"step": 30
},
{
"epoch": 0.19536019536019536,
"grad_norm": 2.5364879915405267,
"learning_rate": 4.868186180746791e-07,
"logits/chosen": -2.5153324604034424,
"logits/rejected": -2.5360398292541504,
"logps/chosen": -387.1582946777344,
"logps/rejected": -379.3692932128906,
"loss": 0.647,
"rewards/accuracies": 0.8218749761581421,
"rewards/chosen": 0.0628650039434433,
"rewards/margins": 0.09538714587688446,
"rewards/rejected": -0.032522134482860565,
"step": 40
},
{
"epoch": 0.2442002442002442,
"grad_norm": 2.2699251495146964,
"learning_rate": 4.6965306126428705e-07,
"logits/chosen": -2.539130449295044,
"logits/rejected": -2.5619969367980957,
"logps/chosen": -404.4756774902344,
"logps/rejected": -406.6902770996094,
"loss": 0.5925,
"rewards/accuracies": 0.82421875,
"rewards/chosen": -0.0003643702657427639,
"rewards/margins": 0.2586084008216858,
"rewards/rejected": -0.25897279381752014,
"step": 50
},
{
"epoch": 0.29304029304029305,
"grad_norm": 2.4145672311111994,
"learning_rate": 4.460299516441776e-07,
"logits/chosen": -2.550515651702881,
"logits/rejected": -2.577197551727295,
"logps/chosen": -413.14947509765625,
"logps/rejected": -442.47674560546875,
"loss": 0.5417,
"rewards/accuracies": 0.801562488079071,
"rewards/chosen": -0.15052883327007294,
"rewards/margins": 0.4667808413505554,
"rewards/rejected": -0.6173096895217896,
"step": 60
},
{
"epoch": 0.3418803418803419,
"grad_norm": 2.7821486929434056,
"learning_rate": 4.166437820523908e-07,
"logits/chosen": -2.5242340564727783,
"logits/rejected": -2.5425312519073486,
"logps/chosen": -446.6337890625,
"logps/rejected": -487.9483337402344,
"loss": 0.5011,
"rewards/accuracies": 0.8023437261581421,
"rewards/chosen": -0.4636126160621643,
"rewards/margins": 0.6593486070632935,
"rewards/rejected": -1.1229612827301025,
"step": 70
},
{
"epoch": 0.3907203907203907,
"grad_norm": 2.754390105851781,
"learning_rate": 3.8235847280454626e-07,
"logits/chosen": -2.4175186157226562,
"logits/rejected": -2.449018955230713,
"logps/chosen": -507.10357666015625,
"logps/rejected": -592.0707397460938,
"loss": 0.4575,
"rewards/accuracies": 0.80078125,
"rewards/chosen": -1.1133525371551514,
"rewards/margins": 0.9770663380622864,
"rewards/rejected": -2.090418815612793,
"step": 80
},
{
"epoch": 0.43956043956043955,
"grad_norm": 2.6724675179161568,
"learning_rate": 3.4418197340879627e-07,
"logits/chosen": -2.409747838973999,
"logits/rejected": -2.4172959327697754,
"logps/chosen": -507.91876220703125,
"logps/rejected": -625.10205078125,
"loss": 0.4121,
"rewards/accuracies": 0.835156261920929,
"rewards/chosen": -1.0922380685806274,
"rewards/margins": 1.3621976375579834,
"rewards/rejected": -2.4544358253479004,
"step": 90
},
{
"epoch": 0.4884004884004884,
"grad_norm": 3.151335337005712,
"learning_rate": 3.032366299846039e-07,
"logits/chosen": -2.4340109825134277,
"logits/rejected": -2.4465105533599854,
"logps/chosen": -532.0742797851562,
"logps/rejected": -683.5274658203125,
"loss": 0.3892,
"rewards/accuracies": 0.8335937261581421,
"rewards/chosen": -1.2753849029541016,
"rewards/margins": 1.6482696533203125,
"rewards/rejected": -2.923654794692993,
"step": 100
},
{
"epoch": 0.5372405372405372,
"grad_norm": 2.717335654672678,
"learning_rate": 2.6072618954988863e-07,
"logits/chosen": -2.4394848346710205,
"logits/rejected": -2.442568778991699,
"logps/chosen": -518.7210693359375,
"logps/rejected": -677.293701171875,
"loss": 0.3834,
"rewards/accuracies": 0.827343761920929,
"rewards/chosen": -1.2031551599502563,
"rewards/margins": 1.6999378204345703,
"rewards/rejected": -2.903092861175537,
"step": 110
},
{
"epoch": 0.5860805860805861,
"grad_norm": 3.1603927594667005,
"learning_rate": 2.1790041121336222e-07,
"logits/chosen": -2.4521875381469727,
"logits/rejected": -2.460845470428467,
"logps/chosen": -532.1248168945312,
"logps/rejected": -704.0490112304688,
"loss": 0.3613,
"rewards/accuracies": 0.8414062261581421,
"rewards/chosen": -1.3992774486541748,
"rewards/margins": 1.8053524494171143,
"rewards/rejected": -3.204629898071289,
"step": 120
},
{
"epoch": 0.6349206349206349,
"grad_norm": 2.877029930356179,
"learning_rate": 1.7601832466317766e-07,
"logits/chosen": -2.4438443183898926,
"logits/rejected": -2.462118148803711,
"logps/chosen": -540.3773193359375,
"logps/rejected": -711.51416015625,
"loss": 0.3581,
"rewards/accuracies": 0.8453124761581421,
"rewards/chosen": -1.4280272722244263,
"rewards/margins": 1.8352330923080444,
"rewards/rejected": -3.2632603645324707,
"step": 130
},
{
"epoch": 0.6837606837606838,
"grad_norm": 2.8365028089984454,
"learning_rate": 1.3631121611097362e-07,
"logits/chosen": -2.4740078449249268,
"logits/rejected": -2.487417697906494,
"logps/chosen": -546.05859375,
"logps/rejected": -727.7886962890625,
"loss": 0.3495,
"rewards/accuracies": 0.8609374761581421,
"rewards/chosen": -1.4827759265899658,
"rewards/margins": 1.9239017963409424,
"rewards/rejected": -3.4066779613494873,
"step": 140
},
{
"epoch": 0.7326007326007326,
"grad_norm": 3.083741716442478,
"learning_rate": 9.9946429862908e-08,
"logits/chosen": -2.462756633758545,
"logits/rejected": -2.4654526710510254,
"logps/chosen": -549.0475463867188,
"logps/rejected": -722.2012329101562,
"loss": 0.3454,
"rewards/accuracies": 0.8492187261581421,
"rewards/chosen": -1.5003674030303955,
"rewards/margins": 1.9490848779678345,
"rewards/rejected": -3.4494519233703613,
"step": 150
},
{
"epoch": 0.7814407814407814,
"grad_norm": 3.086204461780561,
"learning_rate": 6.799304971075381e-08,
"logits/chosen": -2.4620633125305176,
"logits/rejected": -2.4670565128326416,
"logps/chosen": -539.4750366210938,
"logps/rejected": -717.6760864257812,
"loss": 0.3426,
"rewards/accuracies": 0.859375,
"rewards/chosen": -1.4655063152313232,
"rewards/margins": 1.9486806392669678,
"rewards/rejected": -3.41418719291687,
"step": 160
},
{
"epoch": 0.8302808302808303,
"grad_norm": 3.152043777770028,
"learning_rate": 4.1390469071538175e-08,
"logits/chosen": -2.4839229583740234,
"logits/rejected": -2.4979355335235596,
"logps/chosen": -547.6788940429688,
"logps/rejected": -723.7260131835938,
"loss": 0.3417,
"rewards/accuracies": 0.839062511920929,
"rewards/chosen": -1.532496690750122,
"rewards/margins": 1.8789927959442139,
"rewards/rejected": -3.411489486694336,
"step": 170
},
{
"epoch": 0.8791208791208791,
"grad_norm": 3.34422267800285,
"learning_rate": 2.0920773878248837e-08,
"logits/chosen": -2.4812464714050293,
"logits/rejected": -2.49059796333313,
"logps/chosen": -556.6783447265625,
"logps/rejected": -743.0768432617188,
"loss": 0.3413,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -1.5343992710113525,
"rewards/margins": 2.0101265907287598,
"rewards/rejected": -3.5445258617401123,
"step": 180
},
{
"epoch": 0.927960927960928,
"grad_norm": 3.2655151670502574,
"learning_rate": 7.185750133542168e-09,
"logits/chosen": -2.473402500152588,
"logits/rejected": -2.471391201019287,
"logps/chosen": -554.5660400390625,
"logps/rejected": -741.9495849609375,
"loss": 0.3343,
"rewards/accuracies": 0.8539062738418579,
"rewards/chosen": -1.5347990989685059,
"rewards/margins": 2.0656068325042725,
"rewards/rejected": -3.6004059314727783,
"step": 190
},
{
"epoch": 0.9768009768009768,
"grad_norm": 4.051209744645471,
"learning_rate": 5.891920784984184e-10,
"logits/chosen": -2.456406354904175,
"logits/rejected": -2.4556210041046143,
"logps/chosen": -550.1729125976562,
"logps/rejected": -737.451904296875,
"loss": 0.3391,
"rewards/accuracies": 0.875,
"rewards/chosen": -1.517017126083374,
"rewards/margins": 2.044365406036377,
"rewards/rejected": -3.561382293701172,
"step": 200
},
{
"epoch": 0.9963369963369964,
"step": 204,
"total_flos": 0.0,
"train_loss": 0.4527332771058176,
"train_runtime": 5381.1516,
"train_samples_per_second": 38.96,
"train_steps_per_second": 0.038
}
],
"logging_steps": 10,
"max_steps": 204,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}