|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.998667732480682, |
|
"eval_steps": 500, |
|
"global_step": 1876, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010658140154543033, |
|
"grad_norm": 0.5099978778638334, |
|
"learning_rate": 2.6595744680851066e-07, |
|
"logits/chosen": -0.9605575799942017, |
|
"logits/rejected": -0.8107998967170715, |
|
"logps/chosen": -72.61903381347656, |
|
"logps/rejected": -73.83064270019531, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.40937501192092896, |
|
"rewards/chosen": -0.0005006835563108325, |
|
"rewards/margins": -0.0005729668191634119, |
|
"rewards/rejected": 7.228340837173164e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.021316280309086066, |
|
"grad_norm": 0.45829133018801976, |
|
"learning_rate": 5.319148936170213e-07, |
|
"logits/chosen": -0.8250175714492798, |
|
"logits/rejected": -0.631248414516449, |
|
"logps/chosen": -73.33503723144531, |
|
"logps/rejected": -74.5386962890625, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.534375011920929, |
|
"rewards/chosen": 0.0001330056693404913, |
|
"rewards/margins": 0.0018458422273397446, |
|
"rewards/rejected": -0.001712836674414575, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0319744204636291, |
|
"grad_norm": 0.44986142629280784, |
|
"learning_rate": 7.97872340425532e-07, |
|
"logits/chosen": -0.9430956840515137, |
|
"logits/rejected": -0.723763644695282, |
|
"logps/chosen": -72.37335968017578, |
|
"logps/rejected": -74.72418975830078, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.00010007549280999228, |
|
"rewards/margins": -0.00021309514704626054, |
|
"rewards/rejected": 0.00031317074899561703, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04263256061817213, |
|
"grad_norm": 0.5062020560848992, |
|
"learning_rate": 1.0638297872340427e-06, |
|
"logits/chosen": -1.0635473728179932, |
|
"logits/rejected": -0.8027804493904114, |
|
"logps/chosen": -74.05973815917969, |
|
"logps/rejected": -77.31621551513672, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.0025473046116530895, |
|
"rewards/margins": 0.001502807135693729, |
|
"rewards/rejected": 0.0010444973595440388, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05329070077271516, |
|
"grad_norm": 0.4603926634495011, |
|
"learning_rate": 1.3297872340425533e-06, |
|
"logits/chosen": -0.9184844493865967, |
|
"logits/rejected": -0.6910719871520996, |
|
"logps/chosen": -70.87031555175781, |
|
"logps/rejected": -70.7765121459961, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.5437500476837158, |
|
"rewards/chosen": 0.005035719368606806, |
|
"rewards/margins": 0.0017978404648602009, |
|
"rewards/rejected": 0.0032378786709159613, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0639488409272582, |
|
"grad_norm": 0.46432596513533986, |
|
"learning_rate": 1.595744680851064e-06, |
|
"logits/chosen": -0.7245146632194519, |
|
"logits/rejected": -0.6810291409492493, |
|
"logps/chosen": -73.48963928222656, |
|
"logps/rejected": -71.0315933227539, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.503125011920929, |
|
"rewards/chosen": 0.004202081356197596, |
|
"rewards/margins": 0.003526979358866811, |
|
"rewards/rejected": 0.0006751015898771584, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07460698108180122, |
|
"grad_norm": 0.4690984594634577, |
|
"learning_rate": 1.8617021276595745e-06, |
|
"logits/chosen": -0.8976808786392212, |
|
"logits/rejected": -0.6568697690963745, |
|
"logps/chosen": -73.86611938476562, |
|
"logps/rejected": -73.49287414550781, |
|
"loss": 0.6897, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.008846514858305454, |
|
"rewards/margins": 0.0072571514174342155, |
|
"rewards/rejected": 0.0015893635572865605, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08526512123634426, |
|
"grad_norm": 0.4096155378072253, |
|
"learning_rate": 2.1276595744680853e-06, |
|
"logits/chosen": -0.841995894908905, |
|
"logits/rejected": -0.6835945844650269, |
|
"logps/chosen": -73.09089660644531, |
|
"logps/rejected": -73.23654174804688, |
|
"loss": 0.6876, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": 0.0191436018794775, |
|
"rewards/margins": 0.011691140942275524, |
|
"rewards/rejected": 0.0074524604715406895, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09592326139088729, |
|
"grad_norm": 0.47209263752224684, |
|
"learning_rate": 2.393617021276596e-06, |
|
"logits/chosen": -0.7625155448913574, |
|
"logits/rejected": -0.584552526473999, |
|
"logps/chosen": -72.62481689453125, |
|
"logps/rejected": -76.51739501953125, |
|
"loss": 0.6848, |
|
"rewards/accuracies": 0.621874988079071, |
|
"rewards/chosen": 0.03340023756027222, |
|
"rewards/margins": 0.01788860373198986, |
|
"rewards/rejected": 0.015511633828282356, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.10658140154543032, |
|
"grad_norm": 0.48099658567207737, |
|
"learning_rate": 2.6595744680851065e-06, |
|
"logits/chosen": -0.8015834093093872, |
|
"logits/rejected": -0.6126965880393982, |
|
"logps/chosen": -73.71533203125, |
|
"logps/rejected": -72.98583221435547, |
|
"loss": 0.68, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": 0.04279342293739319, |
|
"rewards/margins": 0.02851303480565548, |
|
"rewards/rejected": 0.014280391857028008, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11723954169997336, |
|
"grad_norm": 0.5108226295974576, |
|
"learning_rate": 2.9255319148936174e-06, |
|
"logits/chosen": -0.8261767625808716, |
|
"logits/rejected": -0.6343762874603271, |
|
"logps/chosen": -71.81920623779297, |
|
"logps/rejected": -71.51973724365234, |
|
"loss": 0.6765, |
|
"rewards/accuracies": 0.6218750476837158, |
|
"rewards/chosen": 0.05313969403505325, |
|
"rewards/margins": 0.03731786087155342, |
|
"rewards/rejected": 0.01582183688879013, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.1278976818545164, |
|
"grad_norm": 0.5891809419007947, |
|
"learning_rate": 3.191489361702128e-06, |
|
"logits/chosen": -1.0786638259887695, |
|
"logits/rejected": -0.8459432125091553, |
|
"logps/chosen": -69.84176635742188, |
|
"logps/rejected": -72.5257568359375, |
|
"loss": 0.6677, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": 0.1408819854259491, |
|
"rewards/margins": 0.06051182374358177, |
|
"rewards/rejected": 0.08037016540765762, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.13855582200905942, |
|
"grad_norm": 0.5309539832117165, |
|
"learning_rate": 3.457446808510639e-06, |
|
"logits/chosen": -1.1783032417297363, |
|
"logits/rejected": -0.9523464441299438, |
|
"logps/chosen": -72.51424407958984, |
|
"logps/rejected": -75.75682067871094, |
|
"loss": 0.6542, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.08351622521877289, |
|
"rewards/margins": 0.09955398738384247, |
|
"rewards/rejected": -0.016037756577134132, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14921396216360244, |
|
"grad_norm": 0.7247117044746004, |
|
"learning_rate": 3.723404255319149e-06, |
|
"logits/chosen": -1.2606769800186157, |
|
"logits/rejected": -0.9969131946563721, |
|
"logps/chosen": -81.59758758544922, |
|
"logps/rejected": -87.05096435546875, |
|
"loss": 0.6594, |
|
"rewards/accuracies": 0.6406250596046448, |
|
"rewards/chosen": -0.23210671544075012, |
|
"rewards/margins": 0.12517394125461578, |
|
"rewards/rejected": -0.3572806119918823, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15987210231814547, |
|
"grad_norm": 0.9229972536259996, |
|
"learning_rate": 3.98936170212766e-06, |
|
"logits/chosen": -1.614499568939209, |
|
"logits/rejected": -1.2407586574554443, |
|
"logps/chosen": -88.99467468261719, |
|
"logps/rejected": -97.67521667480469, |
|
"loss": 0.6289, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.5146691203117371, |
|
"rewards/margins": 0.1912478804588318, |
|
"rewards/rejected": -0.7059170007705688, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17053024247268853, |
|
"grad_norm": 1.693580004178548, |
|
"learning_rate": 4.255319148936171e-06, |
|
"logits/chosen": -1.3985371589660645, |
|
"logits/rejected": -1.2293888330459595, |
|
"logps/chosen": -89.80670928955078, |
|
"logps/rejected": -99.3499755859375, |
|
"loss": 0.6201, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.522789716720581, |
|
"rewards/margins": 0.2634372115135193, |
|
"rewards/rejected": -0.7862269282341003, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18118838262723155, |
|
"grad_norm": 0.7848494213232622, |
|
"learning_rate": 4.521276595744681e-06, |
|
"logits/chosen": -1.1521015167236328, |
|
"logits/rejected": -0.989128589630127, |
|
"logps/chosen": -80.37559509277344, |
|
"logps/rejected": -84.92858123779297, |
|
"loss": 0.6527, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.21056941151618958, |
|
"rewards/margins": 0.1813703328371048, |
|
"rewards/rejected": -0.3919397294521332, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.19184652278177458, |
|
"grad_norm": 0.7201873638245726, |
|
"learning_rate": 4.787234042553192e-06, |
|
"logits/chosen": -1.1590619087219238, |
|
"logits/rejected": -0.9973791241645813, |
|
"logps/chosen": -80.75967407226562, |
|
"logps/rejected": -88.94229888916016, |
|
"loss": 0.6232, |
|
"rewards/accuracies": 0.6531250476837158, |
|
"rewards/chosen": -0.20347803831100464, |
|
"rewards/margins": 0.22162552177906036, |
|
"rewards/rejected": -0.4251035451889038, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2025046629363176, |
|
"grad_norm": 0.7587079633120346, |
|
"learning_rate": 4.999982680938129e-06, |
|
"logits/chosen": -1.5915894508361816, |
|
"logits/rejected": -1.2486015558242798, |
|
"logps/chosen": -100.40947723388672, |
|
"logps/rejected": -107.34376525878906, |
|
"loss": 0.6573, |
|
"rewards/accuracies": 0.590624988079071, |
|
"rewards/chosen": -0.8035547733306885, |
|
"rewards/margins": 0.14539355039596558, |
|
"rewards/rejected": -0.948948323726654, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.21316280309086064, |
|
"grad_norm": 0.7531059201322188, |
|
"learning_rate": 4.999376538968061e-06, |
|
"logits/chosen": -1.907850980758667, |
|
"logits/rejected": -1.534518837928772, |
|
"logps/chosen": -95.59463500976562, |
|
"logps/rejected": -104.54216766357422, |
|
"loss": 0.6258, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.6918821930885315, |
|
"rewards/margins": 0.1947452574968338, |
|
"rewards/rejected": -0.8866274356842041, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22382094324540366, |
|
"grad_norm": 0.9103047569938635, |
|
"learning_rate": 4.997904683849418e-06, |
|
"logits/chosen": -2.1851470470428467, |
|
"logits/rejected": -1.981560468673706, |
|
"logps/chosen": -120.08112335205078, |
|
"logps/rejected": -130.74111938476562, |
|
"loss": 0.622, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": -1.4317903518676758, |
|
"rewards/margins": 0.2795388698577881, |
|
"rewards/rejected": -1.7113292217254639, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23447908339994672, |
|
"grad_norm": 1.0393133794007514, |
|
"learning_rate": 4.9955676253910716e-06, |
|
"logits/chosen": -2.359365940093994, |
|
"logits/rejected": -2.082740306854248, |
|
"logps/chosen": -95.99958801269531, |
|
"logps/rejected": -109.64250183105469, |
|
"loss": 0.6036, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": -0.6478599309921265, |
|
"rewards/margins": 0.39253899455070496, |
|
"rewards/rejected": -1.0403989553451538, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.24513722355448975, |
|
"grad_norm": 0.8489330437505648, |
|
"learning_rate": 4.992366173083788e-06, |
|
"logits/chosen": -2.580554723739624, |
|
"logits/rejected": -2.2353944778442383, |
|
"logps/chosen": -89.865234375, |
|
"logps/rejected": -101.31944274902344, |
|
"loss": 0.6068, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.4718262255191803, |
|
"rewards/margins": 0.3274495005607605, |
|
"rewards/rejected": -0.7992757558822632, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.2557953637090328, |
|
"grad_norm": 0.7366259121737208, |
|
"learning_rate": 4.988301435819853e-06, |
|
"logits/chosen": -2.4100265502929688, |
|
"logits/rejected": -2.114309549331665, |
|
"logps/chosen": -89.16217803955078, |
|
"logps/rejected": -95.29388427734375, |
|
"loss": 0.6553, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -0.4490286111831665, |
|
"rewards/margins": 0.18656983971595764, |
|
"rewards/rejected": -0.6355984210968018, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.2664535038635758, |
|
"grad_norm": 0.941627417718458, |
|
"learning_rate": 4.983374821508974e-06, |
|
"logits/chosen": -2.4510676860809326, |
|
"logits/rejected": -2.0803287029266357, |
|
"logps/chosen": -80.80522918701172, |
|
"logps/rejected": -90.66275787353516, |
|
"loss": 0.623, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": -0.29358971118927, |
|
"rewards/margins": 0.21200178563594818, |
|
"rewards/rejected": -0.5055915117263794, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.27711164401811883, |
|
"grad_norm": 1.1446943232849316, |
|
"learning_rate": 4.977588036590625e-06, |
|
"logits/chosen": -2.885624885559082, |
|
"logits/rejected": -2.4689621925354004, |
|
"logps/chosen": -95.2115249633789, |
|
"logps/rejected": -109.21052551269531, |
|
"loss": 0.5669, |
|
"rewards/accuracies": 0.7156250476837158, |
|
"rewards/chosen": -0.6023179292678833, |
|
"rewards/margins": 0.49607741832733154, |
|
"rewards/rejected": -1.0983953475952148, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.28776978417266186, |
|
"grad_norm": 1.1908008252723357, |
|
"learning_rate": 4.970943085442984e-06, |
|
"logits/chosen": -2.8998217582702637, |
|
"logits/rejected": -2.76627516746521, |
|
"logps/chosen": -96.44219970703125, |
|
"logps/rejected": -111.35501861572266, |
|
"loss": 0.6231, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.7342670559883118, |
|
"rewards/margins": 0.44367682933807373, |
|
"rewards/rejected": -1.1779439449310303, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.2984279243272049, |
|
"grad_norm": 0.9268614082784732, |
|
"learning_rate": 4.963442269688671e-06, |
|
"logits/chosen": -3.221184730529785, |
|
"logits/rejected": -2.929337739944458, |
|
"logps/chosen": -103.54019165039062, |
|
"logps/rejected": -114.9659652709961, |
|
"loss": 0.6445, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.9210506081581116, |
|
"rewards/margins": 0.28211480379104614, |
|
"rewards/rejected": -1.2031654119491577, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3090860644817479, |
|
"grad_norm": 0.894031360936913, |
|
"learning_rate": 4.955088187397535e-06, |
|
"logits/chosen": -3.3140995502471924, |
|
"logits/rejected": -2.8253672122955322, |
|
"logps/chosen": -103.28987121582031, |
|
"logps/rejected": -117.38426208496094, |
|
"loss": 0.5879, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.9232074618339539, |
|
"rewards/margins": 0.3413289785385132, |
|
"rewards/rejected": -1.2645364999771118, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.31974420463629094, |
|
"grad_norm": 1.0644981421218993, |
|
"learning_rate": 4.9458837321867515e-06, |
|
"logits/chosen": -3.37444806098938, |
|
"logits/rejected": -2.863433599472046, |
|
"logps/chosen": -102.99623107910156, |
|
"logps/rejected": -115.40965270996094, |
|
"loss": 0.5798, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.8800393342971802, |
|
"rewards/margins": 0.4223412871360779, |
|
"rewards/rejected": -1.3023805618286133, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.330402344790834, |
|
"grad_norm": 0.9946022978074295, |
|
"learning_rate": 4.935832092218559e-06, |
|
"logits/chosen": -3.430798053741455, |
|
"logits/rejected": -2.994150400161743, |
|
"logps/chosen": -105.30961608886719, |
|
"logps/rejected": -122.96747589111328, |
|
"loss": 0.5732, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": -0.9647340774536133, |
|
"rewards/margins": 0.5575865507125854, |
|
"rewards/rejected": -1.5223206281661987, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.34106048494537705, |
|
"grad_norm": 0.9432002221069283, |
|
"learning_rate": 4.92493674909597e-06, |
|
"logits/chosen": -3.490004062652588, |
|
"logits/rejected": -3.0869531631469727, |
|
"logps/chosen": -104.39641571044922, |
|
"logps/rejected": -123.97718048095703, |
|
"loss": 0.5751, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9189853668212891, |
|
"rewards/margins": 0.5349130630493164, |
|
"rewards/rejected": -1.4538984298706055, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.3517186250999201, |
|
"grad_norm": 0.9967970600797661, |
|
"learning_rate": 4.913201476656839e-06, |
|
"logits/chosen": -3.481781482696533, |
|
"logits/rejected": -3.0183358192443848, |
|
"logps/chosen": -106.99278259277344, |
|
"logps/rejected": -125.36199188232422, |
|
"loss": 0.5474, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.0029829740524292, |
|
"rewards/margins": 0.5545629858970642, |
|
"rewards/rejected": -1.5575461387634277, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.3623767652544631, |
|
"grad_norm": 1.194344610404915, |
|
"learning_rate": 4.900630339666718e-06, |
|
"logits/chosen": -3.7093286514282227, |
|
"logits/rejected": -3.320272207260132, |
|
"logps/chosen": -117.7208023071289, |
|
"logps/rejected": -139.9761962890625, |
|
"loss": 0.5318, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -1.3209178447723389, |
|
"rewards/margins": 0.6359946727752686, |
|
"rewards/rejected": -1.9569125175476074, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.37303490540900613, |
|
"grad_norm": 1.301800611342356, |
|
"learning_rate": 4.887227692410931e-06, |
|
"logits/chosen": -3.6813101768493652, |
|
"logits/rejected": -3.4629955291748047, |
|
"logps/chosen": -122.62342071533203, |
|
"logps/rejected": -139.7255401611328, |
|
"loss": 0.6146, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.4802918434143066, |
|
"rewards/margins": 0.5190926790237427, |
|
"rewards/rejected": -1.9993845224380493, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.38369304556354916, |
|
"grad_norm": 1.0976161154770012, |
|
"learning_rate": 4.8729981771863754e-06, |
|
"logits/chosen": -4.064372539520264, |
|
"logits/rejected": -3.575026750564575, |
|
"logps/chosen": -108.59566497802734, |
|
"logps/rejected": -126.73535919189453, |
|
"loss": 0.5883, |
|
"rewards/accuracies": 0.7156250476837158, |
|
"rewards/chosen": -1.061449646949768, |
|
"rewards/margins": 0.5058751106262207, |
|
"rewards/rejected": -1.5673246383666992, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.3943511857180922, |
|
"grad_norm": 0.9250963996730401, |
|
"learning_rate": 4.857946722693566e-06, |
|
"logits/chosen": -3.8002986907958984, |
|
"logits/rejected": -3.390702962875366, |
|
"logps/chosen": -104.60211944580078, |
|
"logps/rejected": -119.37008666992188, |
|
"loss": 0.5682, |
|
"rewards/accuracies": 0.7000000476837158, |
|
"rewards/chosen": -0.9179290533065796, |
|
"rewards/margins": 0.509262204170227, |
|
"rewards/rejected": -1.4271912574768066, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.4050093258726352, |
|
"grad_norm": 0.9271957532989479, |
|
"learning_rate": 4.8420785423294645e-06, |
|
"logits/chosen": -3.765083074569702, |
|
"logits/rejected": -3.4443726539611816, |
|
"logps/chosen": -106.34342956542969, |
|
"logps/rejected": -127.36721801757812, |
|
"loss": 0.5421, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -1.0203157663345337, |
|
"rewards/margins": 0.6243284940719604, |
|
"rewards/rejected": -1.6446441411972046, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.41566746602717825, |
|
"grad_norm": 0.8975038528515316, |
|
"learning_rate": 4.825399132381714e-06, |
|
"logits/chosen": -3.862760066986084, |
|
"logits/rejected": -3.4624404907226562, |
|
"logps/chosen": -117.69804382324219, |
|
"logps/rejected": -138.2783966064453, |
|
"loss": 0.5662, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -1.380739688873291, |
|
"rewards/margins": 0.5420045256614685, |
|
"rewards/rejected": -1.9227441549301147, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.4263256061817213, |
|
"grad_norm": 0.9348408243690974, |
|
"learning_rate": 4.807914270124877e-06, |
|
"logits/chosen": -4.159409523010254, |
|
"logits/rejected": -3.677629232406616, |
|
"logps/chosen": -130.88888549804688, |
|
"logps/rejected": -144.55130004882812, |
|
"loss": 0.5967, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.741608738899231, |
|
"rewards/margins": 0.40564244985580444, |
|
"rewards/rejected": -2.1472513675689697, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.4369837463362643, |
|
"grad_norm": 0.9171395308867525, |
|
"learning_rate": 4.789630011819355e-06, |
|
"logits/chosen": -3.878075122833252, |
|
"logits/rejected": -3.4123120307922363, |
|
"logps/chosen": -121.80657958984375, |
|
"logps/rejected": -136.29861450195312, |
|
"loss": 0.562, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.4467453956604004, |
|
"rewards/margins": 0.5153086185455322, |
|
"rewards/rejected": -1.9620540142059326, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.44764188649080733, |
|
"grad_norm": 1.1834069705037211, |
|
"learning_rate": 4.770552690613665e-06, |
|
"logits/chosen": -3.595890998840332, |
|
"logits/rejected": -3.097224473953247, |
|
"logps/chosen": -124.76722717285156, |
|
"logps/rejected": -145.12998962402344, |
|
"loss": 0.5591, |
|
"rewards/accuracies": 0.7062500715255737, |
|
"rewards/chosen": -1.5228891372680664, |
|
"rewards/margins": 0.5909878611564636, |
|
"rewards/rejected": -2.1138768196105957, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.4583000266453504, |
|
"grad_norm": 0.9693559755065526, |
|
"learning_rate": 4.750688914350825e-06, |
|
"logits/chosen": -3.4637410640716553, |
|
"logits/rejected": -3.0509443283081055, |
|
"logps/chosen": -118.1113510131836, |
|
"logps/rejected": -143.81488037109375, |
|
"loss": 0.5378, |
|
"rewards/accuracies": 0.7031250596046448, |
|
"rewards/chosen": -1.3217973709106445, |
|
"rewards/margins": 0.7465630769729614, |
|
"rewards/rejected": -2.0683603286743164, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.46895816679989344, |
|
"grad_norm": 0.9987488654555671, |
|
"learning_rate": 4.7300455632795774e-06, |
|
"logits/chosen": -3.6906847953796387, |
|
"logits/rejected": -3.1110119819641113, |
|
"logps/chosen": -110.44822692871094, |
|
"logps/rejected": -127.99951171875, |
|
"loss": 0.5869, |
|
"rewards/accuracies": 0.6781250238418579, |
|
"rewards/chosen": -1.138585090637207, |
|
"rewards/margins": 0.5512310862541199, |
|
"rewards/rejected": -1.6898162364959717, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.47961630695443647, |
|
"grad_norm": 1.0974588840874206, |
|
"learning_rate": 4.708629787671268e-06, |
|
"logits/chosen": -3.850926399230957, |
|
"logits/rejected": -3.47334623336792, |
|
"logps/chosen": -110.14234924316406, |
|
"logps/rejected": -132.25430297851562, |
|
"loss": 0.5314, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.022322177886963, |
|
"rewards/margins": 0.6396123170852661, |
|
"rewards/rejected": -1.6619343757629395, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.4902744471089795, |
|
"grad_norm": 1.7157017329202113, |
|
"learning_rate": 4.6864490053432e-06, |
|
"logits/chosen": -3.5702788829803467, |
|
"logits/rejected": -3.0868241786956787, |
|
"logps/chosen": -121.15917205810547, |
|
"logps/rejected": -142.2877960205078, |
|
"loss": 0.5613, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.488118052482605, |
|
"rewards/margins": 0.5989449620246887, |
|
"rewards/rejected": -2.0870630741119385, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.5009325872635225, |
|
"grad_norm": 1.1271739801805507, |
|
"learning_rate": 4.663510899089304e-06, |
|
"logits/chosen": -3.6561179161071777, |
|
"logits/rejected": -3.205963611602783, |
|
"logps/chosen": -122.10357666015625, |
|
"logps/rejected": -149.9842529296875, |
|
"loss": 0.5057, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.465712308883667, |
|
"rewards/margins": 0.8217430114746094, |
|
"rewards/rejected": -2.2874553203582764, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5115907274180655, |
|
"grad_norm": 1.3294571708308955, |
|
"learning_rate": 4.639823414019042e-06, |
|
"logits/chosen": -3.6346356868743896, |
|
"logits/rejected": -3.384951591491699, |
|
"logps/chosen": -126.30377197265625, |
|
"logps/rejected": -147.95111083984375, |
|
"loss": 0.5665, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -1.6428841352462769, |
|
"rewards/margins": 0.6418995261192322, |
|
"rewards/rejected": -2.2847836017608643, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.5222488675726086, |
|
"grad_norm": 1.210270705347686, |
|
"learning_rate": 4.615394754805444e-06, |
|
"logits/chosen": -4.3083062171936035, |
|
"logits/rejected": -3.9710230827331543, |
|
"logps/chosen": -140.5081024169922, |
|
"logps/rejected": -160.8867645263672, |
|
"loss": 0.5575, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.9924941062927246, |
|
"rewards/margins": 0.5862021446228027, |
|
"rewards/rejected": -2.5786962509155273, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.5329070077271516, |
|
"grad_norm": 1.4030504016316763, |
|
"learning_rate": 4.590233382843242e-06, |
|
"logits/chosen": -4.16569709777832, |
|
"logits/rejected": -3.7832789421081543, |
|
"logps/chosen": -140.76181030273438, |
|
"logps/rejected": -161.51876831054688, |
|
"loss": 0.5867, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": -2.0134429931640625, |
|
"rewards/margins": 0.6146745085716248, |
|
"rewards/rejected": -2.628117561340332, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5329070077271516, |
|
"eval_logits/chosen": -4.173105239868164, |
|
"eval_logits/rejected": -3.9948394298553467, |
|
"eval_logps/chosen": -130.56838989257812, |
|
"eval_logps/rejected": -151.851806640625, |
|
"eval_loss": 0.5392829775810242, |
|
"eval_rewards/accuracies": 0.7138157486915588, |
|
"eval_rewards/chosen": -1.7259799242019653, |
|
"eval_rewards/margins": 0.6580372452735901, |
|
"eval_rewards/rejected": -2.384016990661621, |
|
"eval_runtime": 16.194, |
|
"eval_samples_per_second": 18.772, |
|
"eval_steps_per_second": 2.347, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5435651478816946, |
|
"grad_norm": 1.1859820718511664, |
|
"learning_rate": 4.564348013318086e-06, |
|
"logits/chosen": -4.147817611694336, |
|
"logits/rejected": -3.7964394092559814, |
|
"logps/chosen": -130.77149963378906, |
|
"logps/rejected": -151.90524291992188, |
|
"loss": 0.54, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.7467769384384155, |
|
"rewards/margins": 0.6529412865638733, |
|
"rewards/rejected": -2.3997185230255127, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.5542232880362377, |
|
"grad_norm": 1.2910647547180532, |
|
"learning_rate": 4.537747612187848e-06, |
|
"logits/chosen": -3.901210069656372, |
|
"logits/rejected": -3.5390820503234863, |
|
"logps/chosen": -127.16810607910156, |
|
"logps/rejected": -149.90931701660156, |
|
"loss": 0.5293, |
|
"rewards/accuracies": 0.7156250476837158, |
|
"rewards/chosen": -1.5799282789230347, |
|
"rewards/margins": 0.780663013458252, |
|
"rewards/rejected": -2.360591411590576, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.5648814281907807, |
|
"grad_norm": 1.020301177007009, |
|
"learning_rate": 4.51044139307707e-06, |
|
"logits/chosen": -3.2883031368255615, |
|
"logits/rejected": -2.9629526138305664, |
|
"logps/chosen": -119.66636657714844, |
|
"logps/rejected": -143.79095458984375, |
|
"loss": 0.5617, |
|
"rewards/accuracies": 0.7000000476837158, |
|
"rewards/chosen": -1.354878306388855, |
|
"rewards/margins": 0.7017435431480408, |
|
"rewards/rejected": -2.056621789932251, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.5755395683453237, |
|
"grad_norm": 1.0930589659350993, |
|
"learning_rate": 4.48243881408562e-06, |
|
"logits/chosen": -3.6414079666137695, |
|
"logits/rejected": -3.373316764831543, |
|
"logps/chosen": -130.3306884765625, |
|
"logps/rejected": -160.5707550048828, |
|
"loss": 0.4848, |
|
"rewards/accuracies": 0.7625000476837158, |
|
"rewards/chosen": -1.7106578350067139, |
|
"rewards/margins": 0.832088828086853, |
|
"rewards/rejected": -2.5427465438842773, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.5861977084998667, |
|
"grad_norm": 1.0938647642620174, |
|
"learning_rate": 4.453749574512686e-06, |
|
"logits/chosen": -4.139334678649902, |
|
"logits/rejected": -3.621802568435669, |
|
"logps/chosen": -141.89561462402344, |
|
"logps/rejected": -166.62277221679688, |
|
"loss": 0.5297, |
|
"rewards/accuracies": 0.7593750357627869, |
|
"rewards/chosen": -1.9777443408966064, |
|
"rewards/margins": 0.792668879032135, |
|
"rewards/rejected": -2.770413398742676, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.5968558486544098, |
|
"grad_norm": 1.317920209654218, |
|
"learning_rate": 4.424383611497201e-06, |
|
"logits/chosen": -4.6707000732421875, |
|
"logits/rejected": -4.201042175292969, |
|
"logps/chosen": -133.927734375, |
|
"logps/rejected": -164.15513610839844, |
|
"loss": 0.494, |
|
"rewards/accuracies": 0.7687500715255737, |
|
"rewards/chosen": -1.8915112018585205, |
|
"rewards/margins": 0.8977586030960083, |
|
"rewards/rejected": -2.7892699241638184, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.6075139888089528, |
|
"grad_norm": 1.233574210237788, |
|
"learning_rate": 4.3943510965759115e-06, |
|
"logits/chosen": -4.614037990570068, |
|
"logits/rejected": -4.108269691467285, |
|
"logps/chosen": -118.55348205566406, |
|
"logps/rejected": -151.23236083984375, |
|
"loss": 0.5092, |
|
"rewards/accuracies": 0.7281250357627869, |
|
"rewards/chosen": -1.3849703073501587, |
|
"rewards/margins": 0.9483410716056824, |
|
"rewards/rejected": -2.3333113193511963, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.6181721289634958, |
|
"grad_norm": 1.2029391432297971, |
|
"learning_rate": 4.363662432160236e-06, |
|
"logits/chosen": -4.777067184448242, |
|
"logits/rejected": -4.468045711517334, |
|
"logps/chosen": -128.49325561523438, |
|
"logps/rejected": -155.1313018798828, |
|
"loss": 0.5473, |
|
"rewards/accuracies": 0.7249999642372131, |
|
"rewards/chosen": -1.643248200416565, |
|
"rewards/margins": 0.7391076683998108, |
|
"rewards/rejected": -2.3823559284210205, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.6288302691180389, |
|
"grad_norm": 1.2318649100315033, |
|
"learning_rate": 4.332328247933172e-06, |
|
"logits/chosen": -4.571714878082275, |
|
"logits/rejected": -4.174932479858398, |
|
"logps/chosen": -128.60409545898438, |
|
"logps/rejected": -155.87367248535156, |
|
"loss": 0.5137, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -1.6886957883834839, |
|
"rewards/margins": 0.8426259756088257, |
|
"rewards/rejected": -2.5313217639923096, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.6394884092725819, |
|
"grad_norm": 1.0027557276860497, |
|
"learning_rate": 4.30035939716747e-06, |
|
"logits/chosen": -4.877194881439209, |
|
"logits/rejected": -4.134712219238281, |
|
"logps/chosen": -129.9324951171875, |
|
"logps/rejected": -164.78619384765625, |
|
"loss": 0.4583, |
|
"rewards/accuracies": 0.7562499642372131, |
|
"rewards/chosen": -1.7581331729888916, |
|
"rewards/margins": 1.0519375801086426, |
|
"rewards/rejected": -2.810070753097534, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.650146549427125, |
|
"grad_norm": 1.260211310119496, |
|
"learning_rate": 4.267766952966369e-06, |
|
"logits/chosen": -4.862697601318359, |
|
"logits/rejected": -4.233713626861572, |
|
"logps/chosen": -126.78631591796875, |
|
"logps/rejected": -154.99343872070312, |
|
"loss": 0.5508, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -1.581892967224121, |
|
"rewards/margins": 0.8831641674041748, |
|
"rewards/rejected": -2.465056896209717, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.660804689581668, |
|
"grad_norm": 1.3842421440000163, |
|
"learning_rate": 4.234562204428192e-06, |
|
"logits/chosen": -4.9121599197387695, |
|
"logits/rejected": -4.271299839019775, |
|
"logps/chosen": -123.48585510253906, |
|
"logps/rejected": -156.510986328125, |
|
"loss": 0.4985, |
|
"rewards/accuracies": 0.778124988079071, |
|
"rewards/chosen": -1.5398130416870117, |
|
"rewards/margins": 0.9628331661224365, |
|
"rewards/rejected": -2.5026462078094482, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.6714628297362111, |
|
"grad_norm": 1.145408529867159, |
|
"learning_rate": 4.200756652736115e-06, |
|
"logits/chosen": -4.937037944793701, |
|
"logits/rejected": -4.558259010314941, |
|
"logps/chosen": -144.7303466796875, |
|
"logps/rejected": -171.3979034423828, |
|
"loss": 0.5229, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.126035690307617, |
|
"rewards/margins": 0.8396495580673218, |
|
"rewards/rejected": -2.9656853675842285, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.6821209698907541, |
|
"grad_norm": 1.2268030164614083, |
|
"learning_rate": 4.16636200717449e-06, |
|
"logits/chosen": -4.850823879241943, |
|
"logits/rejected": -4.447197437286377, |
|
"logps/chosen": -151.80709838867188, |
|
"logps/rejected": -181.2040252685547, |
|
"loss": 0.5288, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -2.3810770511627197, |
|
"rewards/margins": 0.8073012232780457, |
|
"rewards/rejected": -3.18837833404541, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.6927791100452971, |
|
"grad_norm": 1.2392007745119231, |
|
"learning_rate": 4.131390181073076e-06, |
|
"logits/chosen": -4.989903926849365, |
|
"logits/rejected": -4.669846534729004, |
|
"logps/chosen": -141.72982788085938, |
|
"logps/rejected": -170.2518768310547, |
|
"loss": 0.5322, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -2.090695381164551, |
|
"rewards/margins": 0.7966994047164917, |
|
"rewards/rejected": -2.887394905090332, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.7034372501998402, |
|
"grad_norm": 1.2995182746265004, |
|
"learning_rate": 4.095853287680604e-06, |
|
"logits/chosen": -5.424099445343018, |
|
"logits/rejected": -5.02023458480835, |
|
"logps/chosen": -131.36595153808594, |
|
"logps/rejected": -160.21595764160156, |
|
"loss": 0.5081, |
|
"rewards/accuracies": 0.7500000596046448, |
|
"rewards/chosen": -1.7348756790161133, |
|
"rewards/margins": 0.9147437810897827, |
|
"rewards/rejected": -2.6496193408966064, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.7140953903543832, |
|
"grad_norm": 1.4784913661173649, |
|
"learning_rate": 4.059763635969086e-06, |
|
"logits/chosen": -5.402804374694824, |
|
"logits/rejected": -4.898547649383545, |
|
"logps/chosen": -148.7879638671875, |
|
"logps/rejected": -180.22988891601562, |
|
"loss": 0.498, |
|
"rewards/accuracies": 0.7312500476837158, |
|
"rewards/chosen": -2.28507137298584, |
|
"rewards/margins": 0.9331786036491394, |
|
"rewards/rejected": -3.218249797821045, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.7247535305089262, |
|
"grad_norm": 1.3209008467287613, |
|
"learning_rate": 4.023133726370342e-06, |
|
"logits/chosen": -5.838848114013672, |
|
"logits/rejected": -5.197572231292725, |
|
"logps/chosen": -157.23619079589844, |
|
"logps/rejected": -188.14352416992188, |
|
"loss": 0.4888, |
|
"rewards/accuracies": 0.7500000596046448, |
|
"rewards/chosen": -2.5044658184051514, |
|
"rewards/margins": 0.9292387962341309, |
|
"rewards/rejected": -3.4337046146392822, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.7354116706634692, |
|
"grad_norm": 1.186441973591312, |
|
"learning_rate": 3.985976246446199e-06, |
|
"logits/chosen": -5.651508808135986, |
|
"logits/rejected": -5.234190464019775, |
|
"logps/chosen": -145.10397338867188, |
|
"logps/rejected": -181.2830352783203, |
|
"loss": 0.483, |
|
"rewards/accuracies": 0.7656250596046448, |
|
"rewards/chosen": -2.1236777305603027, |
|
"rewards/margins": 1.0213332176208496, |
|
"rewards/rejected": -3.1450109481811523, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.7460698108180123, |
|
"grad_norm": 1.5388521698096325, |
|
"learning_rate": 3.948304066493885e-06, |
|
"logits/chosen": -5.722045421600342, |
|
"logits/rejected": -5.271629333496094, |
|
"logps/chosen": -148.8587646484375, |
|
"logps/rejected": -177.77627563476562, |
|
"loss": 0.5577, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.1708545684814453, |
|
"rewards/margins": 0.8769198656082153, |
|
"rewards/rejected": -3.04777455329895, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.7567279509725553, |
|
"grad_norm": 1.4544680343837397, |
|
"learning_rate": 3.910130235088119e-06, |
|
"logits/chosen": -5.55647611618042, |
|
"logits/rejected": -5.154372692108154, |
|
"logps/chosen": -152.96279907226562, |
|
"logps/rejected": -185.05718994140625, |
|
"loss": 0.5002, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.3269407749176025, |
|
"rewards/margins": 0.9755868911743164, |
|
"rewards/rejected": -3.302527666091919, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.7673860911270983, |
|
"grad_norm": 1.416968092425065, |
|
"learning_rate": 3.871467974561456e-06, |
|
"logits/chosen": -5.580058574676514, |
|
"logits/rejected": -5.080539703369141, |
|
"logps/chosen": -145.49124145507812, |
|
"logps/rejected": -180.27259826660156, |
|
"loss": 0.4679, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -2.189512252807617, |
|
"rewards/margins": 1.0660803318023682, |
|
"rewards/rejected": -3.2555925846099854, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.7780442312816414, |
|
"grad_norm": 1.0569629294386793, |
|
"learning_rate": 3.832330676424445e-06, |
|
"logits/chosen": -5.299814224243164, |
|
"logits/rejected": -4.900491714477539, |
|
"logps/chosen": -145.76690673828125, |
|
"logps/rejected": -178.74806213378906, |
|
"loss": 0.5222, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.251197576522827, |
|
"rewards/margins": 0.9395439624786377, |
|
"rewards/rejected": -3.190741539001465, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.7887023714361844, |
|
"grad_norm": 1.371721265725712, |
|
"learning_rate": 3.7927318967271966e-06, |
|
"logits/chosen": -5.235317230224609, |
|
"logits/rejected": -4.782592296600342, |
|
"logps/chosen": -146.36451721191406, |
|
"logps/rejected": -177.93173217773438, |
|
"loss": 0.5222, |
|
"rewards/accuracies": 0.715624988079071, |
|
"rewards/chosen": -2.1911604404449463, |
|
"rewards/margins": 0.9466503858566284, |
|
"rewards/rejected": -3.137810707092285, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.7993605115907274, |
|
"grad_norm": 0.9758943877529455, |
|
"learning_rate": 3.752685351363938e-06, |
|
"logits/chosen": -5.0233473777771, |
|
"logits/rejected": -4.740903377532959, |
|
"logps/chosen": -144.844482421875, |
|
"logps/rejected": -172.91390991210938, |
|
"loss": 0.5075, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.116410255432129, |
|
"rewards/margins": 0.8748281002044678, |
|
"rewards/rejected": -2.9912383556365967, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.8100186517452704, |
|
"grad_norm": 1.1025411234268572, |
|
"learning_rate": 3.7122049113222286e-06, |
|
"logits/chosen": -4.975042343139648, |
|
"logits/rejected": -4.748147010803223, |
|
"logps/chosen": -134.8345947265625, |
|
"logps/rejected": -167.31817626953125, |
|
"loss": 0.5072, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -1.8849825859069824, |
|
"rewards/margins": 0.9475346803665161, |
|
"rewards/rejected": -2.832517147064209, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.8206767918998135, |
|
"grad_norm": 1.4871160539681358, |
|
"learning_rate": 3.671304597878438e-06, |
|
"logits/chosen": -5.481166839599609, |
|
"logits/rejected": -4.848423957824707, |
|
"logps/chosen": -136.3662872314453, |
|
"logps/rejected": -169.13623046875, |
|
"loss": 0.5012, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.8726308345794678, |
|
"rewards/margins": 0.9870717525482178, |
|
"rewards/rejected": -2.8597025871276855, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.8313349320543565, |
|
"grad_norm": 1.1730010160533477, |
|
"learning_rate": 3.6299985777411744e-06, |
|
"logits/chosen": -5.724527835845947, |
|
"logits/rejected": -5.357831954956055, |
|
"logps/chosen": -148.54917907714844, |
|
"logps/rejected": -179.8152313232422, |
|
"loss": 0.5271, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -2.2520809173583984, |
|
"rewards/margins": 0.8587377071380615, |
|
"rewards/rejected": -3.110818386077881, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.8419930722088995, |
|
"grad_norm": 1.5791377643643896, |
|
"learning_rate": 3.5883011581443384e-06, |
|
"logits/chosen": -5.6962690353393555, |
|
"logits/rejected": -5.153504848480225, |
|
"logps/chosen": -151.4555206298828, |
|
"logps/rejected": -182.0592041015625, |
|
"loss": 0.5205, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -2.274034261703491, |
|
"rewards/margins": 0.9063214063644409, |
|
"rewards/rejected": -3.1803555488586426, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.8526512123634425, |
|
"grad_norm": 1.3271848720832946, |
|
"learning_rate": 3.5462267818915014e-06, |
|
"logits/chosen": -5.364205837249756, |
|
"logits/rejected": -4.819432258605957, |
|
"logps/chosen": -152.04446411132812, |
|
"logps/rejected": -186.48834228515625, |
|
"loss": 0.4845, |
|
"rewards/accuracies": 0.7781250476837158, |
|
"rewards/chosen": -2.3229737281799316, |
|
"rewards/margins": 1.0416414737701416, |
|
"rewards/rejected": -3.3646154403686523, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.8633093525179856, |
|
"grad_norm": 1.6557136471716132, |
|
"learning_rate": 3.503790022353333e-06, |
|
"logits/chosen": -5.417389869689941, |
|
"logits/rejected": -4.862929344177246, |
|
"logps/chosen": -158.570556640625, |
|
"logps/rejected": -195.6390380859375, |
|
"loss": 0.4796, |
|
"rewards/accuracies": 0.7593750357627869, |
|
"rewards/chosen": -2.546135663986206, |
|
"rewards/margins": 1.111317753791809, |
|
"rewards/rejected": -3.6574535369873047, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.8739674926725286, |
|
"grad_norm": 1.7264872598952354, |
|
"learning_rate": 3.4610055784197917e-06, |
|
"logits/chosen": -5.47420072555542, |
|
"logits/rejected": -4.85552978515625, |
|
"logps/chosen": -162.07437133789062, |
|
"logps/rejected": -202.63998413085938, |
|
"loss": 0.4763, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.674922466278076, |
|
"rewards/margins": 1.202065110206604, |
|
"rewards/rejected": -3.8769874572753906, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.8846256328270716, |
|
"grad_norm": 1.5313320687290009, |
|
"learning_rate": 3.417888269408851e-06, |
|
"logits/chosen": -5.532134532928467, |
|
"logits/rejected": -4.891984939575195, |
|
"logps/chosen": -155.36102294921875, |
|
"logps/rejected": -193.2767333984375, |
|
"loss": 0.5057, |
|
"rewards/accuracies": 0.7312500476837158, |
|
"rewards/chosen": -2.442742109298706, |
|
"rewards/margins": 1.0950744152069092, |
|
"rewards/rejected": -3.5378167629241943, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.8952837729816147, |
|
"grad_norm": 1.4375521640367483, |
|
"learning_rate": 3.3744530299335093e-06, |
|
"logits/chosen": -5.205890655517578, |
|
"logits/rejected": -4.822669506072998, |
|
"logps/chosen": -162.70623779296875, |
|
"logps/rejected": -196.24661254882812, |
|
"loss": 0.4856, |
|
"rewards/accuracies": 0.7468750476837158, |
|
"rewards/chosen": -2.6811633110046387, |
|
"rewards/margins": 0.998574435710907, |
|
"rewards/rejected": -3.6797378063201904, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.9059419131361578, |
|
"grad_norm": 1.1393303534575105, |
|
"learning_rate": 3.330714904728858e-06, |
|
"logits/chosen": -5.357090950012207, |
|
"logits/rejected": -4.730193138122559, |
|
"logps/chosen": -168.93630981445312, |
|
"logps/rejected": -205.3738250732422, |
|
"loss": 0.4603, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.843959331512451, |
|
"rewards/margins": 1.0875535011291504, |
|
"rewards/rejected": -3.9315128326416016, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.9166000532907008, |
|
"grad_norm": 1.2445671435879222, |
|
"learning_rate": 3.2866890434410153e-06, |
|
"logits/chosen": -4.958594799041748, |
|
"logits/rejected": -4.384560585021973, |
|
"logps/chosen": -156.94627380371094, |
|
"logps/rejected": -191.2472686767578, |
|
"loss": 0.5225, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -2.4854142665863037, |
|
"rewards/margins": 0.981643795967102, |
|
"rewards/rejected": -3.4670581817626953, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.9272581934452439, |
|
"grad_norm": 1.3013892660404491, |
|
"learning_rate": 3.2423906953797214e-06, |
|
"logits/chosen": -4.9950361251831055, |
|
"logits/rejected": -4.418999671936035, |
|
"logps/chosen": -146.83462524414062, |
|
"logps/rejected": -178.1900634765625, |
|
"loss": 0.5126, |
|
"rewards/accuracies": 0.753125011920929, |
|
"rewards/chosen": -2.2267892360687256, |
|
"rewards/margins": 0.9049115180969238, |
|
"rewards/rejected": -3.1317005157470703, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.9379163335997869, |
|
"grad_norm": 1.0095452349351663, |
|
"learning_rate": 3.1978352042364026e-06, |
|
"logits/chosen": -5.063623428344727, |
|
"logits/rejected": -4.299590587615967, |
|
"logps/chosen": -142.35137939453125, |
|
"logps/rejected": -178.93995666503906, |
|
"loss": 0.4621, |
|
"rewards/accuracies": 0.7718750238418579, |
|
"rewards/chosen": -2.0139267444610596, |
|
"rewards/margins": 1.1270688772201538, |
|
"rewards/rejected": -3.140995502471924, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.9485744737543299, |
|
"grad_norm": 1.357019936194069, |
|
"learning_rate": 3.1530380027695584e-06, |
|
"logits/chosen": -4.841828346252441, |
|
"logits/rejected": -4.388213157653809, |
|
"logps/chosen": -136.35086059570312, |
|
"logps/rejected": -170.498046875, |
|
"loss": 0.5007, |
|
"rewards/accuracies": 0.7593749761581421, |
|
"rewards/chosen": -1.939600944519043, |
|
"rewards/margins": 1.0859730243682861, |
|
"rewards/rejected": -3.02557373046875, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.9592326139088729, |
|
"grad_norm": 1.2753443613616648, |
|
"learning_rate": 3.1080146074592882e-06, |
|
"logits/chosen": -5.0070672035217285, |
|
"logits/rejected": -4.423335552215576, |
|
"logps/chosen": -138.63717651367188, |
|
"logps/rejected": -173.17710876464844, |
|
"loss": 0.4835, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.9490740299224854, |
|
"rewards/margins": 1.0117491483688354, |
|
"rewards/rejected": -2.9608232975006104, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.969890754063416, |
|
"grad_norm": 1.6255311820111107, |
|
"learning_rate": 3.0627806131328254e-06, |
|
"logits/chosen": -4.868027687072754, |
|
"logits/rejected": -4.526212692260742, |
|
"logps/chosen": -152.69790649414062, |
|
"logps/rejected": -185.13641357421875, |
|
"loss": 0.529, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -2.4021925926208496, |
|
"rewards/margins": 0.9701516628265381, |
|
"rewards/rejected": -3.3723440170288086, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.980548894217959, |
|
"grad_norm": 1.1225767355162044, |
|
"learning_rate": 3.0173516875629282e-06, |
|
"logits/chosen": -4.990843296051025, |
|
"logits/rejected": -4.554540634155273, |
|
"logps/chosen": -170.96319580078125, |
|
"logps/rejected": -207.29257202148438, |
|
"loss": 0.4982, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -2.926543951034546, |
|
"rewards/margins": 1.032052755355835, |
|
"rewards/rejected": -3.958596706390381, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.991207034372502, |
|
"grad_norm": 1.1933201256441803, |
|
"learning_rate": 2.9717435660410092e-06, |
|
"logits/chosen": -4.947135925292969, |
|
"logits/rejected": -4.488518238067627, |
|
"logps/chosen": -173.5876007080078, |
|
"logps/rejected": -206.2366485595703, |
|
"loss": 0.4984, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -2.969851016998291, |
|
"rewards/margins": 0.915156900882721, |
|
"rewards/rejected": -3.885007858276367, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.0010658140154542, |
|
"grad_norm": 0.7624876376172102, |
|
"learning_rate": 2.9259720459268782e-06, |
|
"logits/chosen": -5.2453413009643555, |
|
"logits/rejected": -4.815248489379883, |
|
"logps/chosen": -159.5972900390625, |
|
"logps/rejected": -195.76258850097656, |
|
"loss": 0.4402, |
|
"rewards/accuracies": 0.7736486196517944, |
|
"rewards/chosen": -2.6124117374420166, |
|
"rewards/margins": 1.0251028537750244, |
|
"rewards/rejected": -3.637514591217041, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.0117239541699974, |
|
"grad_norm": 0.4056472805178364, |
|
"learning_rate": 2.8800529811769797e-06, |
|
"logits/chosen": -5.477859020233154, |
|
"logits/rejected": -4.702430725097656, |
|
"logps/chosen": -133.4419403076172, |
|
"logps/rejected": -211.37484741210938, |
|
"loss": 0.157, |
|
"rewards/accuracies": 0.9781249761581421, |
|
"rewards/chosen": -1.7449127435684204, |
|
"rewards/margins": 2.377041816711426, |
|
"rewards/rejected": -4.121954441070557, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.0223820943245403, |
|
"grad_norm": 0.5996457030018431, |
|
"learning_rate": 2.8340022768530406e-06, |
|
"logits/chosen": -6.234375, |
|
"logits/rejected": -5.426927089691162, |
|
"logps/chosen": -134.3699188232422, |
|
"logps/rejected": -221.4859619140625, |
|
"loss": 0.1387, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.8363261222839355, |
|
"rewards/margins": 2.615605115890503, |
|
"rewards/rejected": -4.451931953430176, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.0330402344790834, |
|
"grad_norm": 0.8046398681032393, |
|
"learning_rate": 2.7878358836129986e-06, |
|
"logits/chosen": -7.514087200164795, |
|
"logits/rejected": -6.6742777824401855, |
|
"logps/chosen": -138.34054565429688, |
|
"logps/rejected": -236.50970458984375, |
|
"loss": 0.1503, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -1.9301979541778564, |
|
"rewards/margins": 2.93267560005188, |
|
"rewards/rejected": -4.862873554229736, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.0436983746336264, |
|
"grad_norm": 0.65663529891454, |
|
"learning_rate": 2.7415697921861527e-06, |
|
"logits/chosen": -8.443697929382324, |
|
"logits/rejected": -7.523745536804199, |
|
"logps/chosen": -146.04627990722656, |
|
"logps/rejected": -256.7216796875, |
|
"loss": 0.1402, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -2.07312273979187, |
|
"rewards/margins": 3.327838182449341, |
|
"rewards/rejected": -5.400960922241211, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.0543565147881695, |
|
"grad_norm": 0.6828949649500631, |
|
"learning_rate": 2.695220027834426e-06, |
|
"logits/chosen": -9.587047576904297, |
|
"logits/rejected": -8.562227249145508, |
|
"logps/chosen": -141.90069580078125, |
|
"logps/rejected": -258.0894470214844, |
|
"loss": 0.1348, |
|
"rewards/accuracies": 0.9625000357627869, |
|
"rewards/chosen": -2.0652272701263428, |
|
"rewards/margins": 3.4861674308776855, |
|
"rewards/rejected": -5.551394462585449, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.0650146549427124, |
|
"grad_norm": 1.945112160995172, |
|
"learning_rate": 2.648802644801669e-06, |
|
"logits/chosen": -11.020868301391602, |
|
"logits/rejected": -9.675145149230957, |
|
"logps/chosen": -165.42596435546875, |
|
"logps/rejected": -274.6050720214844, |
|
"loss": 0.1708, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -2.7763314247131348, |
|
"rewards/margins": 3.271212100982666, |
|
"rewards/rejected": -6.047543525695801, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.0650146549427124, |
|
"eval_logits/chosen": -11.360437393188477, |
|
"eval_logits/rejected": -10.917423248291016, |
|
"eval_logps/chosen": -209.72885131835938, |
|
"eval_logps/rejected": -257.6840515136719, |
|
"eval_loss": 0.5134132504463196, |
|
"eval_rewards/accuracies": 0.7532894611358643, |
|
"eval_rewards/chosen": -4.100793838500977, |
|
"eval_rewards/margins": 1.4581912755966187, |
|
"eval_rewards/rejected": -5.558984756469727, |
|
"eval_runtime": 15.8401, |
|
"eval_samples_per_second": 19.192, |
|
"eval_steps_per_second": 2.399, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.0756727950972556, |
|
"grad_norm": 1.9473221053359875, |
|
"learning_rate": 2.6023337207529276e-06, |
|
"logits/chosen": -11.351781845092773, |
|
"logits/rejected": -10.451410293579102, |
|
"logps/chosen": -176.87155151367188, |
|
"logps/rejected": -294.4244384765625, |
|
"loss": 0.1212, |
|
"rewards/accuracies": 0.9593750238418579, |
|
"rewards/chosen": -3.0807976722717285, |
|
"rewards/margins": 3.5374972820281982, |
|
"rewards/rejected": -6.618295192718506, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.0863309352517985, |
|
"grad_norm": 0.8302351181171278, |
|
"learning_rate": 2.5558293512055927e-06, |
|
"logits/chosen": -11.646150588989258, |
|
"logits/rejected": -10.414688110351562, |
|
"logps/chosen": -161.24465942382812, |
|
"logps/rejected": -285.68719482421875, |
|
"loss": 0.0932, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.6145663261413574, |
|
"rewards/margins": 3.715557098388672, |
|
"rewards/rejected": -6.330122947692871, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.0969890754063416, |
|
"grad_norm": 0.40660215131904015, |
|
"learning_rate": 2.509305643954369e-06, |
|
"logits/chosen": -11.821858406066895, |
|
"logits/rejected": -10.551958084106445, |
|
"logps/chosen": -150.50186157226562, |
|
"logps/rejected": -271.88592529296875, |
|
"loss": 0.1374, |
|
"rewards/accuracies": 0.9593750238418579, |
|
"rewards/chosen": -2.3635170459747314, |
|
"rewards/margins": 3.640512466430664, |
|
"rewards/rejected": -6.004029273986816, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.1076472155608845, |
|
"grad_norm": 1.3222034620943741, |
|
"learning_rate": 2.462778713491995e-06, |
|
"logits/chosen": -12.25080680847168, |
|
"logits/rejected": -11.237726211547852, |
|
"logps/chosen": -160.06948852539062, |
|
"logps/rejected": -284.7348327636719, |
|
"loss": 0.1105, |
|
"rewards/accuracies": 0.9718749523162842, |
|
"rewards/chosen": -2.6440482139587402, |
|
"rewards/margins": 3.7125158309936523, |
|
"rewards/rejected": -6.356563568115234, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.1183053557154277, |
|
"grad_norm": 0.7217042259715399, |
|
"learning_rate": 2.41626467542764e-06, |
|
"logits/chosen": -14.390276908874512, |
|
"logits/rejected": -13.4714994430542, |
|
"logps/chosen": -166.0021514892578, |
|
"logps/rejected": -302.72137451171875, |
|
"loss": 0.1004, |
|
"rewards/accuracies": 0.9687500596046448, |
|
"rewards/chosen": -2.8179616928100586, |
|
"rewards/margins": 3.9968204498291016, |
|
"rewards/rejected": -6.81478214263916, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.1289634958699706, |
|
"grad_norm": 1.3401299204542447, |
|
"learning_rate": 2.3697796409049094e-06, |
|
"logits/chosen": -14.64445972442627, |
|
"logits/rejected": -13.307265281677246, |
|
"logps/chosen": -171.0238037109375, |
|
"logps/rejected": -300.96624755859375, |
|
"loss": 0.1212, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -2.9149460792541504, |
|
"rewards/margins": 3.849693775177002, |
|
"rewards/rejected": -6.764639854431152, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.1396216360245137, |
|
"grad_norm": 1.7629406615643344, |
|
"learning_rate": 2.323339711021405e-06, |
|
"logits/chosen": -14.861509323120117, |
|
"logits/rejected": -13.231218338012695, |
|
"logps/chosen": -164.38311767578125, |
|
"logps/rejected": -303.0757751464844, |
|
"loss": 0.1031, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -2.7439169883728027, |
|
"rewards/margins": 4.13311767578125, |
|
"rewards/rejected": -6.877034664154053, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.1502797761790569, |
|
"grad_norm": 0.42850783100293965, |
|
"learning_rate": 2.2769609712517606e-06, |
|
"logits/chosen": -14.08121109008789, |
|
"logits/rejected": -12.674466133117676, |
|
"logps/chosen": -169.23577880859375, |
|
"logps/rejected": -297.57794189453125, |
|
"loss": 0.0929, |
|
"rewards/accuracies": 0.9812500476837158, |
|
"rewards/chosen": -2.7998509407043457, |
|
"rewards/margins": 3.883415699005127, |
|
"rewards/rejected": -6.683266639709473, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.1609379163335998, |
|
"grad_norm": 1.7461108510928145, |
|
"learning_rate": 2.2306594858760904e-06, |
|
"logits/chosen": -13.571136474609375, |
|
"logits/rejected": -12.342116355895996, |
|
"logps/chosen": -164.53713989257812, |
|
"logps/rejected": -292.97515869140625, |
|
"loss": 0.1015, |
|
"rewards/accuracies": 0.9625000357627869, |
|
"rewards/chosen": -2.7703428268432617, |
|
"rewards/margins": 3.8357138633728027, |
|
"rewards/rejected": -6.6060566902160645, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.171596056488143, |
|
"grad_norm": 1.6098843103193679, |
|
"learning_rate": 2.1844512924157783e-06, |
|
"logits/chosen": -13.091669082641602, |
|
"logits/rejected": -12.027458190917969, |
|
"logps/chosen": -166.10980224609375, |
|
"logps/rejected": -290.114013671875, |
|
"loss": 0.1184, |
|
"rewards/accuracies": 0.9656250476837158, |
|
"rewards/chosen": -2.7894127368927, |
|
"rewards/margins": 3.748467206954956, |
|
"rewards/rejected": -6.537879943847656, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.1822541966426858, |
|
"grad_norm": 1.7184541838344305, |
|
"learning_rate": 2.1383523960785346e-06, |
|
"logits/chosen": -13.56182861328125, |
|
"logits/rejected": -12.549522399902344, |
|
"logps/chosen": -173.02137756347656, |
|
"logps/rejected": -307.5762634277344, |
|
"loss": 0.1063, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -3.0361173152923584, |
|
"rewards/margins": 3.9708454608917236, |
|
"rewards/rejected": -7.006962299346924, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.192912336797229, |
|
"grad_norm": 0.9754061829929829, |
|
"learning_rate": 2.0923787642146438e-06, |
|
"logits/chosen": -14.200406074523926, |
|
"logits/rejected": -13.405488967895508, |
|
"logps/chosen": -180.35836791992188, |
|
"logps/rejected": -316.1576232910156, |
|
"loss": 0.1211, |
|
"rewards/accuracies": 0.9500000476837158, |
|
"rewards/chosen": -3.203021287918091, |
|
"rewards/margins": 4.0161824226379395, |
|
"rewards/rejected": -7.219203472137451, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.2035704769517719, |
|
"grad_norm": 1.1476742916797582, |
|
"learning_rate": 2.0465463207863313e-06, |
|
"logits/chosen": -14.588091850280762, |
|
"logits/rejected": -13.462714195251465, |
|
"logps/chosen": -177.90089416503906, |
|
"logps/rejected": -309.41363525390625, |
|
"loss": 0.0996, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -3.214080810546875, |
|
"rewards/margins": 3.925442934036255, |
|
"rewards/rejected": -7.139523983001709, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.214228617106315, |
|
"grad_norm": 1.6294366891498737, |
|
"learning_rate": 2.000870940852151e-06, |
|
"logits/chosen": -14.483333587646484, |
|
"logits/rejected": -13.924131393432617, |
|
"logps/chosen": -167.20855712890625, |
|
"logps/rejected": -300.50897216796875, |
|
"loss": 0.1323, |
|
"rewards/accuracies": 0.9468750357627869, |
|
"rewards/chosen": -2.83001708984375, |
|
"rewards/margins": 3.9456992149353027, |
|
"rewards/rejected": -6.7757158279418945, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.224886757260858, |
|
"grad_norm": 0.5507170463345659, |
|
"learning_rate": 1.9553684450683197e-06, |
|
"logits/chosen": -14.890935897827148, |
|
"logits/rejected": -13.537836074829102, |
|
"logps/chosen": -163.0977020263672, |
|
"logps/rejected": -289.84246826171875, |
|
"loss": 0.105, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -2.639470338821411, |
|
"rewards/margins": 3.860311508178711, |
|
"rewards/rejected": -6.499781131744385, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.235544897415401, |
|
"grad_norm": 0.9937617450962005, |
|
"learning_rate": 1.910054594208885e-06, |
|
"logits/chosen": -14.66843318939209, |
|
"logits/rejected": -13.91612434387207, |
|
"logps/chosen": -153.71194458007812, |
|
"logps/rejected": -289.9057312011719, |
|
"loss": 0.1175, |
|
"rewards/accuracies": 0.9656250476837158, |
|
"rewards/chosen": -2.3898086547851562, |
|
"rewards/margins": 4.002299785614014, |
|
"rewards/rejected": -6.392107963562012, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.246203037569944, |
|
"grad_norm": 0.5792493776036048, |
|
"learning_rate": 1.8649450837066445e-06, |
|
"logits/chosen": -14.934555053710938, |
|
"logits/rejected": -13.174097061157227, |
|
"logps/chosen": -157.51803588867188, |
|
"logps/rejected": -295.1328430175781, |
|
"loss": 0.0935, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.5683436393737793, |
|
"rewards/margins": 4.121357440948486, |
|
"rewards/rejected": -6.689700603485107, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.2568611777244871, |
|
"grad_norm": 1.8617648927131405, |
|
"learning_rate": 1.8200555382166901e-06, |
|
"logits/chosen": -13.945772171020508, |
|
"logits/rejected": -12.839581489562988, |
|
"logps/chosen": -167.31436157226562, |
|
"logps/rejected": -310.24676513671875, |
|
"loss": 0.11, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": -2.808560609817505, |
|
"rewards/margins": 4.2302565574646, |
|
"rewards/rejected": -7.038817405700684, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.26751931787903, |
|
"grad_norm": 0.49691302028863615, |
|
"learning_rate": 1.7754015062044722e-06, |
|
"logits/chosen": -13.814282417297363, |
|
"logits/rejected": -12.593766212463379, |
|
"logps/chosen": -161.9113006591797, |
|
"logps/rejected": -298.634033203125, |
|
"loss": 0.1028, |
|
"rewards/accuracies": 0.9656250476837158, |
|
"rewards/chosen": -2.725048542022705, |
|
"rewards/margins": 4.082304000854492, |
|
"rewards/rejected": -6.8073530197143555, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.2781774580335732, |
|
"grad_norm": 1.0425364838187585, |
|
"learning_rate": 1.7309984545602531e-06, |
|
"logits/chosen": -13.85971450805664, |
|
"logits/rejected": -12.746572494506836, |
|
"logps/chosen": -158.88441467285156, |
|
"logps/rejected": -298.05419921875, |
|
"loss": 0.0953, |
|
"rewards/accuracies": 0.971875011920929, |
|
"rewards/chosen": -2.5261361598968506, |
|
"rewards/margins": 4.170439720153809, |
|
"rewards/rejected": -6.696575164794922, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.288835598188116, |
|
"grad_norm": 1.3056904575579198, |
|
"learning_rate": 1.6868617632418116e-06, |
|
"logits/chosen": -14.009490966796875, |
|
"logits/rejected": -12.925272941589355, |
|
"logps/chosen": -174.1493682861328, |
|
"logps/rejected": -311.3201904296875, |
|
"loss": 0.1227, |
|
"rewards/accuracies": 0.9625000357627869, |
|
"rewards/chosen": -3.0230090618133545, |
|
"rewards/margins": 4.062224864959717, |
|
"rewards/rejected": -7.08523416519165, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.2994937383426592, |
|
"grad_norm": 0.8576806410178053, |
|
"learning_rate": 1.643006719947266e-06, |
|
"logits/chosen": -14.417228698730469, |
|
"logits/rejected": -13.120880126953125, |
|
"logps/chosen": -176.02964782714844, |
|
"logps/rejected": -315.8590393066406, |
|
"loss": 0.0673, |
|
"rewards/accuracies": 0.9812500476837158, |
|
"rewards/chosen": -3.0250649452209473, |
|
"rewards/margins": 4.23886251449585, |
|
"rewards/rejected": -7.263926982879639, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.3101518784972022, |
|
"grad_norm": 0.9062197003108285, |
|
"learning_rate": 1.5994485148198441e-06, |
|
"logits/chosen": -14.617992401123047, |
|
"logits/rejected": -13.239477157592773, |
|
"logps/chosen": -182.43719482421875, |
|
"logps/rejected": -315.89288330078125, |
|
"loss": 0.0954, |
|
"rewards/accuracies": 0.971875011920929, |
|
"rewards/chosen": -3.2150237560272217, |
|
"rewards/margins": 4.0571136474609375, |
|
"rewards/rejected": -7.272137641906738, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.3208100186517453, |
|
"grad_norm": 0.6422526705939566, |
|
"learning_rate": 1.5562022351864535e-06, |
|
"logits/chosen": -14.779105186462402, |
|
"logits/rejected": -13.467339515686035, |
|
"logps/chosen": -160.6179962158203, |
|
"logps/rejected": -299.6285705566406, |
|
"loss": 0.091, |
|
"rewards/accuracies": 0.9781249761581421, |
|
"rewards/chosen": -2.6850295066833496, |
|
"rewards/margins": 4.171126842498779, |
|
"rewards/rejected": -6.856156349182129, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.3314681588062882, |
|
"grad_norm": 2.1899788286413404, |
|
"learning_rate": 1.5132828603318578e-06, |
|
"logits/chosen": -15.219317436218262, |
|
"logits/rejected": -14.239025115966797, |
|
"logps/chosen": -166.39230346679688, |
|
"logps/rejected": -313.3610534667969, |
|
"loss": 0.087, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -2.8128161430358887, |
|
"rewards/margins": 4.378149509429932, |
|
"rewards/rejected": -7.19096565246582, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.3421262989608314, |
|
"grad_norm": 0.8006482066535761, |
|
"learning_rate": 1.470705256310275e-06, |
|
"logits/chosen": -14.706860542297363, |
|
"logits/rejected": -13.879858016967773, |
|
"logps/chosen": -182.82217407226562, |
|
"logps/rejected": -324.1910400390625, |
|
"loss": 0.1034, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -3.3051517009735107, |
|
"rewards/margins": 4.219384670257568, |
|
"rewards/rejected": -7.524536609649658, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.3527844391153745, |
|
"grad_norm": 1.0659186495075663, |
|
"learning_rate": 1.4284841707961988e-06, |
|
"logits/chosen": -14.331737518310547, |
|
"logits/rejected": -13.09138298034668, |
|
"logps/chosen": -175.8817901611328, |
|
"logps/rejected": -323.72662353515625, |
|
"loss": 0.1028, |
|
"rewards/accuracies": 0.9656250476837158, |
|
"rewards/chosen": -3.0889313220977783, |
|
"rewards/margins": 4.40924072265625, |
|
"rewards/rejected": -7.498172283172607, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.3634425792699174, |
|
"grad_norm": 1.7527883126979154, |
|
"learning_rate": 1.386634227976224e-06, |
|
"logits/chosen": -14.577993392944336, |
|
"logits/rejected": -13.916585922241211, |
|
"logps/chosen": -171.74612426757812, |
|
"logps/rejected": -326.1202392578125, |
|
"loss": 0.1074, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -2.9879088401794434, |
|
"rewards/margins": 4.579015254974365, |
|
"rewards/rejected": -7.56692361831665, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.3741007194244603, |
|
"grad_norm": 0.8123193508447394, |
|
"learning_rate": 1.3451699234836422e-06, |
|
"logits/chosen": -14.181510925292969, |
|
"logits/rejected": -13.42455768585205, |
|
"logps/chosen": -175.9627685546875, |
|
"logps/rejected": -315.4035339355469, |
|
"loss": 0.1252, |
|
"rewards/accuracies": 0.9500000476837158, |
|
"rewards/chosen": -3.1158289909362793, |
|
"rewards/margins": 4.13937520980835, |
|
"rewards/rejected": -7.255204200744629, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.3847588595790035, |
|
"grad_norm": 1.70328515299468, |
|
"learning_rate": 1.3041056193775667e-06, |
|
"logits/chosen": -14.202183723449707, |
|
"logits/rejected": -13.206469535827637, |
|
"logps/chosen": -162.6019287109375, |
|
"logps/rejected": -311.1362609863281, |
|
"loss": 0.069, |
|
"rewards/accuracies": 0.9874999523162842, |
|
"rewards/chosen": -2.7707481384277344, |
|
"rewards/margins": 4.362418174743652, |
|
"rewards/rejected": -7.133166790008545, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.3954169997335466, |
|
"grad_norm": 1.090464523472744, |
|
"learning_rate": 1.263455539168319e-06, |
|
"logits/chosen": -14.497480392456055, |
|
"logits/rejected": -13.267162322998047, |
|
"logps/chosen": -170.33209228515625, |
|
"logps/rejected": -302.6817321777344, |
|
"loss": 0.1738, |
|
"rewards/accuracies": 0.940625011920929, |
|
"rewards/chosen": -2.9318127632141113, |
|
"rewards/margins": 4.033266544342041, |
|
"rewards/rejected": -6.965079307556152, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.4060751398880895, |
|
"grad_norm": 1.3607176939111092, |
|
"learning_rate": 1.2232337628908106e-06, |
|
"logits/chosen": -14.672266006469727, |
|
"logits/rejected": -13.312570571899414, |
|
"logps/chosen": -169.37527465820312, |
|
"logps/rejected": -312.13958740234375, |
|
"loss": 0.103, |
|
"rewards/accuracies": 0.965624988079071, |
|
"rewards/chosen": -2.876102924346924, |
|
"rewards/margins": 4.249708652496338, |
|
"rewards/rejected": -7.1258111000061035, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.4167332800426324, |
|
"grad_norm": 1.48594626665491, |
|
"learning_rate": 1.1834542222276208e-06, |
|
"logits/chosen": -14.105111122131348, |
|
"logits/rejected": -12.860082626342773, |
|
"logps/chosen": -175.06655883789062, |
|
"logps/rejected": -312.02178955078125, |
|
"loss": 0.1194, |
|
"rewards/accuracies": 0.9625000357627869, |
|
"rewards/chosen": -3.08951997756958, |
|
"rewards/margins": 4.081744194030762, |
|
"rewards/rejected": -7.171264171600342, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.4273914201971756, |
|
"grad_norm": 0.9683836132983067, |
|
"learning_rate": 1.1441306956834506e-06, |
|
"logits/chosen": -14.080924987792969, |
|
"logits/rejected": -13.577337265014648, |
|
"logps/chosen": -177.63699340820312, |
|
"logps/rejected": -315.77606201171875, |
|
"loss": 0.1005, |
|
"rewards/accuracies": 0.9687500596046448, |
|
"rewards/chosen": -3.154977798461914, |
|
"rewards/margins": 4.118321418762207, |
|
"rewards/rejected": -7.273299217224121, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.4380495603517187, |
|
"grad_norm": 0.6083203947646434, |
|
"learning_rate": 1.1052768038126466e-06, |
|
"logits/chosen": -14.510137557983398, |
|
"logits/rejected": -12.803991317749023, |
|
"logps/chosen": -168.365478515625, |
|
"logps/rejected": -315.189208984375, |
|
"loss": 0.0767, |
|
"rewards/accuracies": 0.9781249761581421, |
|
"rewards/chosen": -2.8253591060638428, |
|
"rewards/margins": 4.4181013107299805, |
|
"rewards/rejected": -7.243459701538086, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.4487077005062616, |
|
"grad_norm": 1.693615706841822, |
|
"learning_rate": 1.0669060045014216e-06, |
|
"logits/chosen": -14.174700736999512, |
|
"logits/rejected": -12.93868637084961, |
|
"logps/chosen": -178.14138793945312, |
|
"logps/rejected": -318.397216796875, |
|
"loss": 0.1127, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -3.125239133834839, |
|
"rewards/margins": 4.248797416687012, |
|
"rewards/rejected": -7.3740363121032715, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.4593658406608048, |
|
"grad_norm": 1.1144006447099362, |
|
"learning_rate": 1.0290315883064259e-06, |
|
"logits/chosen": -14.330987930297852, |
|
"logits/rejected": -13.16196060180664, |
|
"logps/chosen": -173.17727661132812, |
|
"logps/rejected": -318.8533020019531, |
|
"loss": 0.0949, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -2.957810878753662, |
|
"rewards/margins": 4.391844272613525, |
|
"rewards/rejected": -7.3496551513671875, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.4700239808153477, |
|
"grad_norm": 0.620350891987726, |
|
"learning_rate": 9.9166667385128e-07, |
|
"logits/chosen": -14.554768562316895, |
|
"logits/rejected": -12.75456428527832, |
|
"logps/chosen": -168.19522094726562, |
|
"logps/rejected": -313.51776123046875, |
|
"loss": 0.0847, |
|
"rewards/accuracies": 0.9812500476837158, |
|
"rewards/chosen": -2.827397584915161, |
|
"rewards/margins": 4.38813591003418, |
|
"rewards/rejected": -7.215533256530762, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.4806821209698908, |
|
"grad_norm": 1.523915571386192, |
|
"learning_rate": 9.54824203282647e-07, |
|
"logits/chosen": -14.233976364135742, |
|
"logits/rejected": -13.190291404724121, |
|
"logps/chosen": -165.82955932617188, |
|
"logps/rejected": -306.906494140625, |
|
"loss": 0.1361, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -2.7462000846862793, |
|
"rewards/margins": 4.224640369415283, |
|
"rewards/rejected": -6.970839977264404, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.4913402611244337, |
|
"grad_norm": 0.6459650039596092, |
|
"learning_rate": 9.185169377874489e-07, |
|
"logits/chosen": -13.95530891418457, |
|
"logits/rejected": -13.07752799987793, |
|
"logps/chosen": -173.64149475097656, |
|
"logps/rejected": -318.71051025390625, |
|
"loss": 0.0922, |
|
"rewards/accuracies": 0.971875011920929, |
|
"rewards/chosen": -2.9675002098083496, |
|
"rewards/margins": 4.352604866027832, |
|
"rewards/rejected": -7.320105075836182, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.5019984012789767, |
|
"grad_norm": 0.32915272846922006, |
|
"learning_rate": 8.827574531727454e-07, |
|
"logits/chosen": -14.128362655639648, |
|
"logits/rejected": -13.55349349975586, |
|
"logps/chosen": -171.53123474121094, |
|
"logps/rejected": -323.4970703125, |
|
"loss": 0.0715, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -2.9670920372009277, |
|
"rewards/margins": 4.4901957511901855, |
|
"rewards/rejected": -7.4572882652282715, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.5126565414335198, |
|
"grad_norm": 2.6792359915138606, |
|
"learning_rate": 8.47558135509838e-07, |
|
"logits/chosen": -14.46495246887207, |
|
"logits/rejected": -14.040555953979492, |
|
"logps/chosen": -175.18417358398438, |
|
"logps/rejected": -331.18267822265625, |
|
"loss": 0.1299, |
|
"rewards/accuracies": 0.9656250476837158, |
|
"rewards/chosen": -3.0891642570495605, |
|
"rewards/margins": 4.518985748291016, |
|
"rewards/rejected": -7.608150005340576, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.523314681588063, |
|
"grad_norm": 1.1443797266153013, |
|
"learning_rate": 8.129311768440809e-07, |
|
"logits/chosen": -14.043274879455566, |
|
"logits/rejected": -13.293193817138672, |
|
"logps/chosen": -175.87310791015625, |
|
"logps/rejected": -328.38958740234375, |
|
"loss": 0.0901, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -3.1222434043884277, |
|
"rewards/margins": 4.512340068817139, |
|
"rewards/rejected": -7.634582996368408, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.5339728217426059, |
|
"grad_norm": 0.7215222510349094, |
|
"learning_rate": 7.788885709719035e-07, |
|
"logits/chosen": -14.091695785522461, |
|
"logits/rejected": -13.335174560546875, |
|
"logps/chosen": -177.4180450439453, |
|
"logps/rejected": -313.2870788574219, |
|
"loss": 0.0951, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -3.0618085861206055, |
|
"rewards/margins": 4.144283771514893, |
|
"rewards/rejected": -7.206092357635498, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.544630961897149, |
|
"grad_norm": 0.5075522317916121, |
|
"learning_rate": 7.454421092865039e-07, |
|
"logits/chosen": -14.293548583984375, |
|
"logits/rejected": -12.690837860107422, |
|
"logps/chosen": -169.62765502929688, |
|
"logps/rejected": -311.6443786621094, |
|
"loss": 0.103, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -2.9085817337036133, |
|
"rewards/margins": 4.2476348876953125, |
|
"rewards/rejected": -7.156217098236084, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.5552891020516921, |
|
"grad_norm": 0.6905845223275571, |
|
"learning_rate": 7.126033766936366e-07, |
|
"logits/chosen": -13.802377700805664, |
|
"logits/rejected": -12.326967239379883, |
|
"logps/chosen": -167.22052001953125, |
|
"logps/rejected": -309.58551025390625, |
|
"loss": 0.0856, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -2.816124677658081, |
|
"rewards/margins": 4.280933856964111, |
|
"rewards/rejected": -7.097058296203613, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.565947242206235, |
|
"grad_norm": 0.6979019362080532, |
|
"learning_rate": 6.80383747598938e-07, |
|
"logits/chosen": -13.710118293762207, |
|
"logits/rejected": -12.690275192260742, |
|
"logps/chosen": -163.21290588378906, |
|
"logps/rejected": -304.0994567871094, |
|
"loss": 0.1007, |
|
"rewards/accuracies": 0.971875011920929, |
|
"rewards/chosen": -2.7480688095092773, |
|
"rewards/margins": 4.1758503913879395, |
|
"rewards/rejected": -6.923918724060059, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.576605382360778, |
|
"grad_norm": 1.0552434575799825, |
|
"learning_rate": 6.487943819681489e-07, |
|
"logits/chosen": -13.67576789855957, |
|
"logits/rejected": -12.636192321777344, |
|
"logps/chosen": -166.77676391601562, |
|
"logps/rejected": -312.1618347167969, |
|
"loss": 0.1048, |
|
"rewards/accuracies": 0.9687500596046448, |
|
"rewards/chosen": -2.8698370456695557, |
|
"rewards/margins": 4.286244869232178, |
|
"rewards/rejected": -7.156081676483154, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.587263522515321, |
|
"grad_norm": 0.6949617730879493, |
|
"learning_rate": 6.178462214616205e-07, |
|
"logits/chosen": -13.830046653747559, |
|
"logits/rejected": -13.129570007324219, |
|
"logps/chosen": -169.82052612304688, |
|
"logps/rejected": -319.1707763671875, |
|
"loss": 0.0789, |
|
"rewards/accuracies": 0.9781249761581421, |
|
"rewards/chosen": -2.8928017616271973, |
|
"rewards/margins": 4.442351818084717, |
|
"rewards/rejected": -7.335153579711914, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.5979216626698642, |
|
"grad_norm": 1.5654631211094208, |
|
"learning_rate": 5.875499856444358e-07, |
|
"logits/chosen": -14.716095924377441, |
|
"logits/rejected": -13.551712989807129, |
|
"logps/chosen": -175.95899963378906, |
|
"logps/rejected": -321.272216796875, |
|
"loss": 0.1112, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -3.0603079795837402, |
|
"rewards/margins": 4.3986101150512695, |
|
"rewards/rejected": -7.45891809463501, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.5979216626698642, |
|
"eval_logits/chosen": -14.05460262298584, |
|
"eval_logits/rejected": -13.562302589416504, |
|
"eval_logps/chosen": -214.49014282226562, |
|
"eval_logps/rejected": -270.72174072265625, |
|
"eval_loss": 0.5428091883659363, |
|
"eval_rewards/accuracies": 0.7598684430122375, |
|
"eval_rewards/chosen": -4.2436323165893555, |
|
"eval_rewards/margins": 1.7064825296401978, |
|
"eval_rewards/rejected": -5.950114727020264, |
|
"eval_runtime": 15.8423, |
|
"eval_samples_per_second": 19.189, |
|
"eval_steps_per_second": 2.399, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.6085798028244072, |
|
"grad_norm": 0.9516478324052884, |
|
"learning_rate": 5.579161682734549e-07, |
|
"logits/chosen": -14.098213195800781, |
|
"logits/rejected": -12.930061340332031, |
|
"logps/chosen": -178.26329040527344, |
|
"logps/rejected": -320.6363220214844, |
|
"loss": 0.0932, |
|
"rewards/accuracies": 0.971875011920929, |
|
"rewards/chosen": -3.134605646133423, |
|
"rewards/margins": 4.266073226928711, |
|
"rewards/rejected": -7.400678634643555, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.61923794297895, |
|
"grad_norm": 1.603241108722834, |
|
"learning_rate": 5.289550336625732e-07, |
|
"logits/chosen": -14.05746841430664, |
|
"logits/rejected": -12.830437660217285, |
|
"logps/chosen": -179.49627685546875, |
|
"logps/rejected": -328.4458923339844, |
|
"loss": 0.0746, |
|
"rewards/accuracies": 0.9874999523162842, |
|
"rewards/chosen": -3.172783136367798, |
|
"rewards/margins": 4.395094394683838, |
|
"rewards/rejected": -7.567877769470215, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.6298960831334932, |
|
"grad_norm": 0.694466819318522, |
|
"learning_rate": 5.00676613127456e-07, |
|
"logits/chosen": -14.262409210205078, |
|
"logits/rejected": -12.783060073852539, |
|
"logps/chosen": -170.38018798828125, |
|
"logps/rejected": -322.17864990234375, |
|
"loss": 0.0753, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -2.8670201301574707, |
|
"rewards/margins": 4.609315872192383, |
|
"rewards/rejected": -7.476335525512695, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.6405542232880364, |
|
"grad_norm": 1.0784837637350384, |
|
"learning_rate": 4.730907015109759e-07, |
|
"logits/chosen": -14.312047958374023, |
|
"logits/rejected": -13.048952102661133, |
|
"logps/chosen": -167.04627990722656, |
|
"logps/rejected": -322.6005859375, |
|
"loss": 0.0689, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -2.757124900817871, |
|
"rewards/margins": 4.649479866027832, |
|
"rewards/rejected": -7.406603813171387, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.6512123634425793, |
|
"grad_norm": 0.8480919582335199, |
|
"learning_rate": 4.462068537905559e-07, |
|
"logits/chosen": -14.234856605529785, |
|
"logits/rejected": -13.1453218460083, |
|
"logps/chosen": -167.7684326171875, |
|
"logps/rejected": -317.6662902832031, |
|
"loss": 0.0919, |
|
"rewards/accuracies": 0.9781250357627869, |
|
"rewards/chosen": -2.8255579471588135, |
|
"rewards/margins": 4.447301387786865, |
|
"rewards/rejected": -7.272860050201416, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.6618705035971222, |
|
"grad_norm": 1.7501770911535344, |
|
"learning_rate": 4.200343817685981e-07, |
|
"logits/chosen": -14.393514633178711, |
|
"logits/rejected": -13.625202178955078, |
|
"logps/chosen": -167.9121551513672, |
|
"logps/rejected": -309.9030456542969, |
|
"loss": 0.1161, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -2.890589475631714, |
|
"rewards/margins": 4.238266944885254, |
|
"rewards/rejected": -7.128856182098389, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.6725286437516653, |
|
"grad_norm": 0.8402305008718728, |
|
"learning_rate": 3.9458235084713526e-07, |
|
"logits/chosen": -13.896049499511719, |
|
"logits/rejected": -12.816546440124512, |
|
"logps/chosen": -180.0813751220703, |
|
"logps/rejected": -333.249755859375, |
|
"loss": 0.0914, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -3.1385228633880615, |
|
"rewards/margins": 4.571475982666016, |
|
"rewards/rejected": -7.70999813079834, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.6831867839062085, |
|
"grad_norm": 0.9474634681564772, |
|
"learning_rate": 3.698595768878363e-07, |
|
"logits/chosen": -14.134432792663574, |
|
"logits/rejected": -13.108856201171875, |
|
"logps/chosen": -174.18850708007812, |
|
"logps/rejected": -320.171875, |
|
"loss": 0.0908, |
|
"rewards/accuracies": 0.9749999642372131, |
|
"rewards/chosen": -2.9747958183288574, |
|
"rewards/margins": 4.407635688781738, |
|
"rewards/rejected": -7.3824310302734375, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.6938449240607514, |
|
"grad_norm": 1.0555420214028237, |
|
"learning_rate": 3.4587462315844143e-07, |
|
"logits/chosen": -13.897963523864746, |
|
"logits/rejected": -13.152566909790039, |
|
"logps/chosen": -173.00729370117188, |
|
"logps/rejected": -322.86187744140625, |
|
"loss": 0.1218, |
|
"rewards/accuracies": 0.9593750238418579, |
|
"rewards/chosen": -3.067225694656372, |
|
"rewards/margins": 4.399234771728516, |
|
"rewards/rejected": -7.466460227966309, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.7045030642152943, |
|
"grad_norm": 1.3255479560822947, |
|
"learning_rate": 3.226357973666888e-07, |
|
"logits/chosen": -13.98996353149414, |
|
"logits/rejected": -12.819202423095703, |
|
"logps/chosen": -176.08932495117188, |
|
"logps/rejected": -324.403564453125, |
|
"loss": 0.0948, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.055687189102173, |
|
"rewards/margins": 4.420422077178955, |
|
"rewards/rejected": -7.476109027862549, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.7151612043698374, |
|
"grad_norm": 1.354122557294898, |
|
"learning_rate": 3.0015114878275827e-07, |
|
"logits/chosen": -13.95250415802002, |
|
"logits/rejected": -13.117683410644531, |
|
"logps/chosen": -177.43582153320312, |
|
"logps/rejected": -326.236572265625, |
|
"loss": 0.0717, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.1647706031799316, |
|
"rewards/margins": 4.427828788757324, |
|
"rewards/rejected": -7.592598915100098, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.7258193445243806, |
|
"grad_norm": 0.5089299905557714, |
|
"learning_rate": 2.784284654512351e-07, |
|
"logits/chosen": -13.614534378051758, |
|
"logits/rejected": -12.581478118896484, |
|
"logps/chosen": -170.75653076171875, |
|
"logps/rejected": -316.6766357421875, |
|
"loss": 0.1034, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -2.96988844871521, |
|
"rewards/margins": 4.376916408538818, |
|
"rewards/rejected": -7.346804618835449, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.7364774846789235, |
|
"grad_norm": 1.2106293779004877, |
|
"learning_rate": 2.574752714935502e-07, |
|
"logits/chosen": -13.51029109954834, |
|
"logits/rejected": -12.730785369873047, |
|
"logps/chosen": -177.49710083007812, |
|
"logps/rejected": -317.45697021484375, |
|
"loss": 0.1123, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -3.138115882873535, |
|
"rewards/margins": 4.164592742919922, |
|
"rewards/rejected": -7.302708625793457, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.7471356248334664, |
|
"grad_norm": 0.6426563250757653, |
|
"learning_rate": 2.372988245018401e-07, |
|
"logits/chosen": -13.782270431518555, |
|
"logits/rejected": -12.812226295471191, |
|
"logps/chosen": -172.2945556640625, |
|
"logps/rejected": -303.7276306152344, |
|
"loss": 0.139, |
|
"rewards/accuracies": 0.940625011920929, |
|
"rewards/chosen": -3.019486665725708, |
|
"rewards/margins": 3.9754087924957275, |
|
"rewards/rejected": -6.994894981384277, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.7577937649880095, |
|
"grad_norm": 1.2845177208550855, |
|
"learning_rate": 2.1790611302512115e-07, |
|
"logits/chosen": -13.354096412658691, |
|
"logits/rejected": -12.3941068649292, |
|
"logps/chosen": -179.09466552734375, |
|
"logps/rejected": -320.859375, |
|
"loss": 0.0803, |
|
"rewards/accuracies": 0.9812500476837158, |
|
"rewards/chosen": -3.057070016860962, |
|
"rewards/margins": 4.323160171508789, |
|
"rewards/rejected": -7.380229949951172, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.7684519051425527, |
|
"grad_norm": 1.2899678999572404, |
|
"learning_rate": 1.9930385414865388e-07, |
|
"logits/chosen": -13.521492004394531, |
|
"logits/rejected": -12.732352256774902, |
|
"logps/chosen": -171.2978515625, |
|
"logps/rejected": -315.2615051269531, |
|
"loss": 0.1062, |
|
"rewards/accuracies": 0.9562500715255737, |
|
"rewards/chosen": -2.953364849090576, |
|
"rewards/margins": 4.318905830383301, |
|
"rewards/rejected": -7.272271156311035, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.7791100452970956, |
|
"grad_norm": 0.7690835635352199, |
|
"learning_rate": 1.8149849116733674e-07, |
|
"logits/chosen": -13.898114204406738, |
|
"logits/rejected": -12.968473434448242, |
|
"logps/chosen": -167.1031036376953, |
|
"logps/rejected": -312.3338623046875, |
|
"loss": 0.1, |
|
"rewards/accuracies": 0.9687500596046448, |
|
"rewards/chosen": -2.8572001457214355, |
|
"rewards/margins": 4.320859432220459, |
|
"rewards/rejected": -7.1780595779418945, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.7897681854516387, |
|
"grad_norm": 1.2141716098002162, |
|
"learning_rate": 1.6449619135393086e-07, |
|
"logits/chosen": -13.510313034057617, |
|
"logits/rejected": -12.92930793762207, |
|
"logps/chosen": -178.10476684570312, |
|
"logps/rejected": -315.1727600097656, |
|
"loss": 0.0969, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.1379470825195312, |
|
"rewards/margins": 4.095012664794922, |
|
"rewards/rejected": -7.232959270477295, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.8004263256061819, |
|
"grad_norm": 0.9443691245981232, |
|
"learning_rate": 1.4830284382289146e-07, |
|
"logits/chosen": -13.978818893432617, |
|
"logits/rejected": -12.687828063964844, |
|
"logps/chosen": -171.1253204345703, |
|
"logps/rejected": -315.90771484375, |
|
"loss": 0.0959, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.9089090824127197, |
|
"rewards/margins": 4.360543727874756, |
|
"rewards/rejected": -7.2694525718688965, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.8110844657607248, |
|
"grad_norm": 2.0824770673503536, |
|
"learning_rate": 1.329240574905452e-07, |
|
"logits/chosen": -13.543804168701172, |
|
"logits/rejected": -12.867536544799805, |
|
"logps/chosen": -180.95199584960938, |
|
"logps/rejected": -316.64752197265625, |
|
"loss": 0.1557, |
|
"rewards/accuracies": 0.9406250715255737, |
|
"rewards/chosen": -3.1708712577819824, |
|
"rewards/margins": 4.100353240966797, |
|
"rewards/rejected": -7.271224021911621, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.8217426059152677, |
|
"grad_norm": 0.94408152816753, |
|
"learning_rate": 1.1836515913232178e-07, |
|
"logits/chosen": -13.966662406921387, |
|
"logits/rejected": -12.552648544311523, |
|
"logps/chosen": -183.93409729003906, |
|
"logps/rejected": -319.31768798828125, |
|
"loss": 0.1111, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -3.2507832050323486, |
|
"rewards/margins": 4.125080108642578, |
|
"rewards/rejected": -7.375863075256348, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.8324007460698108, |
|
"grad_norm": 1.2906115190646992, |
|
"learning_rate": 1.0463119153770989e-07, |
|
"logits/chosen": -13.794037818908691, |
|
"logits/rejected": -13.117594718933105, |
|
"logps/chosen": -172.34042358398438, |
|
"logps/rejected": -326.85455322265625, |
|
"loss": 0.0818, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.9182324409484863, |
|
"rewards/margins": 4.589561462402344, |
|
"rewards/rejected": -7.50779390335083, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.843058886224354, |
|
"grad_norm": 0.7585344477278878, |
|
"learning_rate": 9.172691176357635e-08, |
|
"logits/chosen": -13.552767753601074, |
|
"logits/rejected": -12.8858642578125, |
|
"logps/chosen": -172.93157958984375, |
|
"logps/rejected": -315.7481689453125, |
|
"loss": 0.1076, |
|
"rewards/accuracies": 0.9656250476837158, |
|
"rewards/chosen": -2.9512555599212646, |
|
"rewards/margins": 4.305943965911865, |
|
"rewards/rejected": -7.257199287414551, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.853717026378897, |
|
"grad_norm": 0.9566362031211434, |
|
"learning_rate": 7.965678948645833e-08, |
|
"logits/chosen": -14.06009292602539, |
|
"logits/rejected": -12.98459529876709, |
|
"logps/chosen": -168.14500427246094, |
|
"logps/rejected": -314.2690124511719, |
|
"loss": 0.0933, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -2.8941597938537598, |
|
"rewards/margins": 4.334750175476074, |
|
"rewards/rejected": -7.228910446166992, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.8643751665334398, |
|
"grad_norm": 2.140843667836219, |
|
"learning_rate": 6.84250054543928e-08, |
|
"logits/chosen": -13.609771728515625, |
|
"logits/rejected": -12.870636940002441, |
|
"logps/chosen": -172.33584594726562, |
|
"logps/rejected": -312.53826904296875, |
|
"loss": 0.1044, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.9526524543762207, |
|
"rewards/margins": 4.216845989227295, |
|
"rewards/rejected": -7.169497966766357, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.875033306687983, |
|
"grad_norm": 0.42670747932048897, |
|
"learning_rate": 5.803545003882555e-08, |
|
"logits/chosen": -13.57308578491211, |
|
"logits/rejected": -12.556329727172852, |
|
"logps/chosen": -168.01043701171875, |
|
"logps/rejected": -317.0450134277344, |
|
"loss": 0.0811, |
|
"rewards/accuracies": 0.9781250357627869, |
|
"rewards/chosen": -2.7719998359680176, |
|
"rewards/margins": 4.521597862243652, |
|
"rewards/rejected": -7.293597221374512, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.885691446842526, |
|
"grad_norm": 1.090062179029512, |
|
"learning_rate": 4.849172188709589e-08, |
|
"logits/chosen": -13.583359718322754, |
|
"logits/rejected": -12.551968574523926, |
|
"logps/chosen": -174.449462890625, |
|
"logps/rejected": -310.62396240234375, |
|
"loss": 0.1112, |
|
"rewards/accuracies": 0.9531250596046448, |
|
"rewards/chosen": -3.072873592376709, |
|
"rewards/margins": 4.093639373779297, |
|
"rewards/rejected": -7.166513442993164, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.896349586997069, |
|
"grad_norm": 0.7616055944003208, |
|
"learning_rate": 3.979712667596669e-08, |
|
"logits/chosen": -13.73027229309082, |
|
"logits/rejected": -12.876806259155273, |
|
"logps/chosen": -170.3317413330078, |
|
"logps/rejected": -314.7184753417969, |
|
"loss": 0.1202, |
|
"rewards/accuracies": 0.9625000357627869, |
|
"rewards/chosen": -2.9569485187530518, |
|
"rewards/margins": 4.27473783493042, |
|
"rewards/rejected": -7.231686592102051, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.907007727151612, |
|
"grad_norm": 1.3771322117176021, |
|
"learning_rate": 3.195467596663254e-08, |
|
"logits/chosen": -13.845719337463379, |
|
"logits/rejected": -12.682126998901367, |
|
"logps/chosen": -168.68157958984375, |
|
"logps/rejected": -307.2587890625, |
|
"loss": 0.1194, |
|
"rewards/accuracies": 0.965624988079071, |
|
"rewards/chosen": -2.8327126502990723, |
|
"rewards/margins": 4.253602504730225, |
|
"rewards/rejected": -7.086314678192139, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.917665867306155, |
|
"grad_norm": 1.4518895560997447, |
|
"learning_rate": 2.496708616160082e-08, |
|
"logits/chosen": -13.437941551208496, |
|
"logits/rejected": -12.930294036865234, |
|
"logps/chosen": -167.45205688476562, |
|
"logps/rejected": -315.0567932128906, |
|
"loss": 0.0802, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.8053293228149414, |
|
"rewards/margins": 4.396106719970703, |
|
"rewards/rejected": -7.2014360427856445, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.9283240074606982, |
|
"grad_norm": 1.0123274736000365, |
|
"learning_rate": 1.883677756380542e-08, |
|
"logits/chosen": -13.56306266784668, |
|
"logits/rejected": -12.563457489013672, |
|
"logps/chosen": -176.3988037109375, |
|
"logps/rejected": -324.07843017578125, |
|
"loss": 0.1006, |
|
"rewards/accuracies": 0.9531250596046448, |
|
"rewards/chosen": -3.069662094116211, |
|
"rewards/margins": 4.434683799743652, |
|
"rewards/rejected": -7.504345417022705, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.9389821476152411, |
|
"grad_norm": 0.5960069867557609, |
|
"learning_rate": 1.3565873538283758e-08, |
|
"logits/chosen": -13.973889350891113, |
|
"logits/rejected": -12.849482536315918, |
|
"logps/chosen": -171.85963439941406, |
|
"logps/rejected": -319.53265380859375, |
|
"loss": 0.0858, |
|
"rewards/accuracies": 0.9781250357627869, |
|
"rewards/chosen": -2.9035487174987793, |
|
"rewards/margins": 4.421201705932617, |
|
"rewards/rejected": -7.324750900268555, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.949640287769784, |
|
"grad_norm": 1.337606416453446, |
|
"learning_rate": 9.156199776702567e-09, |
|
"logits/chosen": -13.685622215270996, |
|
"logits/rejected": -12.832694053649902, |
|
"logps/chosen": -172.3668975830078, |
|
"logps/rejected": -326.0023193359375, |
|
"loss": 0.0814, |
|
"rewards/accuracies": 0.9812500476837158, |
|
"rewards/chosen": -2.9439876079559326, |
|
"rewards/margins": 4.572345733642578, |
|
"rewards/rejected": -7.516333103179932, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.9602984279243272, |
|
"grad_norm": 1.2290642069022464, |
|
"learning_rate": 5.609283664990694e-09, |
|
"logits/chosen": -13.59455680847168, |
|
"logits/rejected": -12.687222480773926, |
|
"logps/chosen": -170.38259887695312, |
|
"logps/rejected": -317.81622314453125, |
|
"loss": 0.1013, |
|
"rewards/accuracies": 0.9656250476837158, |
|
"rewards/chosen": -2.909156560897827, |
|
"rewards/margins": 4.408141613006592, |
|
"rewards/rejected": -7.31729793548584, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.9709565680788703, |
|
"grad_norm": 1.94956327776971, |
|
"learning_rate": 2.9263537542958962e-09, |
|
"logits/chosen": -13.74908447265625, |
|
"logits/rejected": -12.846452713012695, |
|
"logps/chosen": -171.89051818847656, |
|
"logps/rejected": -321.7201843261719, |
|
"loss": 0.1104, |
|
"rewards/accuracies": 0.9562500715255737, |
|
"rewards/chosen": -3.0273971557617188, |
|
"rewards/margins": 4.435695648193359, |
|
"rewards/rejected": -7.463093280792236, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.9816147082334132, |
|
"grad_norm": 0.8638582388170514, |
|
"learning_rate": 1.1083393354488492e-09, |
|
"logits/chosen": -13.83696174621582, |
|
"logits/rejected": -12.580759048461914, |
|
"logps/chosen": -177.7276611328125, |
|
"logps/rejected": -320.6488952636719, |
|
"loss": 0.1063, |
|
"rewards/accuracies": 0.9593749642372131, |
|
"rewards/chosen": -3.055222511291504, |
|
"rewards/margins": 4.306619167327881, |
|
"rewards/rejected": -7.361841678619385, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.9922728483879562, |
|
"grad_norm": 0.5499522542587156, |
|
"learning_rate": 1.5587011708340093e-10, |
|
"logits/chosen": -13.817309379577637, |
|
"logits/rejected": -12.837597846984863, |
|
"logps/chosen": -166.30062866210938, |
|
"logps/rejected": -316.75640869140625, |
|
"loss": 0.0837, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -2.823861598968506, |
|
"rewards/margins": 4.503476142883301, |
|
"rewards/rejected": -7.327337741851807, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.998667732480682, |
|
"step": 1876, |
|
"total_flos": 174218888085504.0, |
|
"train_loss": 0.33488652056087054, |
|
"train_runtime": 7850.5465, |
|
"train_samples_per_second": 7.649, |
|
"train_steps_per_second": 0.239 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1876, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 20000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 174218888085504.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|