|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9963369963369964, |
|
"eval_steps": 1000, |
|
"global_step": 204, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.004884004884004884, |
|
"grad_norm": 2.3243459220572165, |
|
"learning_rate": 2.3809523809523807e-08, |
|
"logits/chosen": -2.550273895263672, |
|
"logits/rejected": -2.5806894302368164, |
|
"logps/chosen": -424.7008056640625, |
|
"logps/rejected": -390.49554443359375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.04884004884004884, |
|
"grad_norm": 2.4712584301903604, |
|
"learning_rate": 2.3809523809523806e-07, |
|
"logits/chosen": -2.4481005668640137, |
|
"logits/rejected": -2.474926471710205, |
|
"logps/chosen": -395.8595886230469, |
|
"logps/rejected": -384.5038146972656, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4635416567325592, |
|
"rewards/chosen": 0.00013807932555209845, |
|
"rewards/margins": 0.0004333473916631192, |
|
"rewards/rejected": -0.00029526810976676643, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.09768009768009768, |
|
"grad_norm": 2.318089253747947, |
|
"learning_rate": 4.761904761904761e-07, |
|
"logits/chosen": -2.445664167404175, |
|
"logits/rejected": -2.4723546504974365, |
|
"logps/chosen": -393.4665222167969, |
|
"logps/rejected": -377.8502197265625, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.67578125, |
|
"rewards/chosen": 0.00608012406155467, |
|
"rewards/margins": 0.005808630492538214, |
|
"rewards/rejected": 0.00027149339439347386, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.14652014652014653, |
|
"grad_norm": 2.5283415680520225, |
|
"learning_rate": 4.970219740227693e-07, |
|
"logits/chosen": -2.494197368621826, |
|
"logits/rejected": -2.5383658409118652, |
|
"logps/chosen": -393.2831115722656, |
|
"logps/rejected": -383.0456237792969, |
|
"loss": 0.6754, |
|
"rewards/accuracies": 0.807812511920929, |
|
"rewards/chosen": 0.03202961012721062, |
|
"rewards/margins": 0.037289537489414215, |
|
"rewards/rejected": -0.005259926896542311, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.19536019536019536, |
|
"grad_norm": 2.5364879915405267, |
|
"learning_rate": 4.868186180746791e-07, |
|
"logits/chosen": -2.5153324604034424, |
|
"logits/rejected": -2.5360398292541504, |
|
"logps/chosen": -387.1582946777344, |
|
"logps/rejected": -379.3692932128906, |
|
"loss": 0.647, |
|
"rewards/accuracies": 0.8218749761581421, |
|
"rewards/chosen": 0.0628650039434433, |
|
"rewards/margins": 0.09538714587688446, |
|
"rewards/rejected": -0.032522134482860565, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2442002442002442, |
|
"grad_norm": 2.2699251495146964, |
|
"learning_rate": 4.6965306126428705e-07, |
|
"logits/chosen": -2.539130449295044, |
|
"logits/rejected": -2.5619969367980957, |
|
"logps/chosen": -404.4756774902344, |
|
"logps/rejected": -406.6902770996094, |
|
"loss": 0.5925, |
|
"rewards/accuracies": 0.82421875, |
|
"rewards/chosen": -0.0003643702657427639, |
|
"rewards/margins": 0.2586084008216858, |
|
"rewards/rejected": -0.25897279381752014, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.29304029304029305, |
|
"grad_norm": 2.4145672311111994, |
|
"learning_rate": 4.460299516441776e-07, |
|
"logits/chosen": -2.550515651702881, |
|
"logits/rejected": -2.577197551727295, |
|
"logps/chosen": -413.14947509765625, |
|
"logps/rejected": -442.47674560546875, |
|
"loss": 0.5417, |
|
"rewards/accuracies": 0.801562488079071, |
|
"rewards/chosen": -0.15052883327007294, |
|
"rewards/margins": 0.4667808413505554, |
|
"rewards/rejected": -0.6173096895217896, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.3418803418803419, |
|
"grad_norm": 2.7821486929434056, |
|
"learning_rate": 4.166437820523908e-07, |
|
"logits/chosen": -2.5242340564727783, |
|
"logits/rejected": -2.5425312519073486, |
|
"logps/chosen": -446.6337890625, |
|
"logps/rejected": -487.9483337402344, |
|
"loss": 0.5011, |
|
"rewards/accuracies": 0.8023437261581421, |
|
"rewards/chosen": -0.4636126160621643, |
|
"rewards/margins": 0.6593486070632935, |
|
"rewards/rejected": -1.1229612827301025, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.3907203907203907, |
|
"grad_norm": 2.754390105851781, |
|
"learning_rate": 3.8235847280454626e-07, |
|
"logits/chosen": -2.4175186157226562, |
|
"logits/rejected": -2.449018955230713, |
|
"logps/chosen": -507.10357666015625, |
|
"logps/rejected": -592.0707397460938, |
|
"loss": 0.4575, |
|
"rewards/accuracies": 0.80078125, |
|
"rewards/chosen": -1.1133525371551514, |
|
"rewards/margins": 0.9770663380622864, |
|
"rewards/rejected": -2.090418815612793, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.43956043956043955, |
|
"grad_norm": 2.6724675179161568, |
|
"learning_rate": 3.4418197340879627e-07, |
|
"logits/chosen": -2.409747838973999, |
|
"logits/rejected": -2.4172959327697754, |
|
"logps/chosen": -507.91876220703125, |
|
"logps/rejected": -625.10205078125, |
|
"loss": 0.4121, |
|
"rewards/accuracies": 0.835156261920929, |
|
"rewards/chosen": -1.0922380685806274, |
|
"rewards/margins": 1.3621976375579834, |
|
"rewards/rejected": -2.4544358253479004, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.4884004884004884, |
|
"grad_norm": 3.151335337005712, |
|
"learning_rate": 3.032366299846039e-07, |
|
"logits/chosen": -2.4340109825134277, |
|
"logits/rejected": -2.4465105533599854, |
|
"logps/chosen": -532.0742797851562, |
|
"logps/rejected": -683.5274658203125, |
|
"loss": 0.3892, |
|
"rewards/accuracies": 0.8335937261581421, |
|
"rewards/chosen": -1.2753849029541016, |
|
"rewards/margins": 1.6482696533203125, |
|
"rewards/rejected": -2.923654794692993, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5372405372405372, |
|
"grad_norm": 2.717335654672678, |
|
"learning_rate": 2.6072618954988863e-07, |
|
"logits/chosen": -2.4394848346710205, |
|
"logits/rejected": -2.442568778991699, |
|
"logps/chosen": -518.7210693359375, |
|
"logps/rejected": -677.293701171875, |
|
"loss": 0.3834, |
|
"rewards/accuracies": 0.827343761920929, |
|
"rewards/chosen": -1.2031551599502563, |
|
"rewards/margins": 1.6999378204345703, |
|
"rewards/rejected": -2.903092861175537, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.5860805860805861, |
|
"grad_norm": 3.1603927594667005, |
|
"learning_rate": 2.1790041121336222e-07, |
|
"logits/chosen": -2.4521875381469727, |
|
"logits/rejected": -2.460845470428467, |
|
"logps/chosen": -532.1248168945312, |
|
"logps/rejected": -704.0490112304688, |
|
"loss": 0.3613, |
|
"rewards/accuracies": 0.8414062261581421, |
|
"rewards/chosen": -1.3992774486541748, |
|
"rewards/margins": 1.8053524494171143, |
|
"rewards/rejected": -3.204629898071289, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.6349206349206349, |
|
"grad_norm": 2.877029930356179, |
|
"learning_rate": 1.7601832466317766e-07, |
|
"logits/chosen": -2.4438443183898926, |
|
"logits/rejected": -2.462118148803711, |
|
"logps/chosen": -540.3773193359375, |
|
"logps/rejected": -711.51416015625, |
|
"loss": 0.3581, |
|
"rewards/accuracies": 0.8453124761581421, |
|
"rewards/chosen": -1.4280272722244263, |
|
"rewards/margins": 1.8352330923080444, |
|
"rewards/rejected": -3.2632603645324707, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.6837606837606838, |
|
"grad_norm": 2.8365028089984454, |
|
"learning_rate": 1.3631121611097362e-07, |
|
"logits/chosen": -2.4740078449249268, |
|
"logits/rejected": -2.487417697906494, |
|
"logps/chosen": -546.05859375, |
|
"logps/rejected": -727.7886962890625, |
|
"loss": 0.3495, |
|
"rewards/accuracies": 0.8609374761581421, |
|
"rewards/chosen": -1.4827759265899658, |
|
"rewards/margins": 1.9239017963409424, |
|
"rewards/rejected": -3.4066779613494873, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.7326007326007326, |
|
"grad_norm": 3.083741716442478, |
|
"learning_rate": 9.9946429862908e-08, |
|
"logits/chosen": -2.462756633758545, |
|
"logits/rejected": -2.4654526710510254, |
|
"logps/chosen": -549.0475463867188, |
|
"logps/rejected": -722.2012329101562, |
|
"loss": 0.3454, |
|
"rewards/accuracies": 0.8492187261581421, |
|
"rewards/chosen": -1.5003674030303955, |
|
"rewards/margins": 1.9490848779678345, |
|
"rewards/rejected": -3.4494519233703613, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.7814407814407814, |
|
"grad_norm": 3.086204461780561, |
|
"learning_rate": 6.799304971075381e-08, |
|
"logits/chosen": -2.4620633125305176, |
|
"logits/rejected": -2.4670565128326416, |
|
"logps/chosen": -539.4750366210938, |
|
"logps/rejected": -717.6760864257812, |
|
"loss": 0.3426, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -1.4655063152313232, |
|
"rewards/margins": 1.9486806392669678, |
|
"rewards/rejected": -3.41418719291687, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.8302808302808303, |
|
"grad_norm": 3.152043777770028, |
|
"learning_rate": 4.1390469071538175e-08, |
|
"logits/chosen": -2.4839229583740234, |
|
"logits/rejected": -2.4979355335235596, |
|
"logps/chosen": -547.6788940429688, |
|
"logps/rejected": -723.7260131835938, |
|
"loss": 0.3417, |
|
"rewards/accuracies": 0.839062511920929, |
|
"rewards/chosen": -1.532496690750122, |
|
"rewards/margins": 1.8789927959442139, |
|
"rewards/rejected": -3.411489486694336, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.8791208791208791, |
|
"grad_norm": 3.34422267800285, |
|
"learning_rate": 2.0920773878248837e-08, |
|
"logits/chosen": -2.4812464714050293, |
|
"logits/rejected": -2.49059796333313, |
|
"logps/chosen": -556.6783447265625, |
|
"logps/rejected": -743.0768432617188, |
|
"loss": 0.3413, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.5343992710113525, |
|
"rewards/margins": 2.0101265907287598, |
|
"rewards/rejected": -3.5445258617401123, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.927960927960928, |
|
"grad_norm": 3.2655151670502574, |
|
"learning_rate": 7.185750133542168e-09, |
|
"logits/chosen": -2.473402500152588, |
|
"logits/rejected": -2.471391201019287, |
|
"logps/chosen": -554.5660400390625, |
|
"logps/rejected": -741.9495849609375, |
|
"loss": 0.3343, |
|
"rewards/accuracies": 0.8539062738418579, |
|
"rewards/chosen": -1.5347990989685059, |
|
"rewards/margins": 2.0656068325042725, |
|
"rewards/rejected": -3.6004059314727783, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.9768009768009768, |
|
"grad_norm": 4.051209744645471, |
|
"learning_rate": 5.891920784984184e-10, |
|
"logits/chosen": -2.456406354904175, |
|
"logits/rejected": -2.4556210041046143, |
|
"logps/chosen": -550.1729125976562, |
|
"logps/rejected": -737.451904296875, |
|
"loss": 0.3391, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.517017126083374, |
|
"rewards/margins": 2.044365406036377, |
|
"rewards/rejected": -3.561382293701172, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.9963369963369964, |
|
"step": 204, |
|
"total_flos": 0.0, |
|
"train_loss": 0.4527332771058176, |
|
"train_runtime": 5381.1516, |
|
"train_samples_per_second": 38.96, |
|
"train_steps_per_second": 0.038 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 204, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|