CodeLlama-7B-QML / trainer_state.json
qt-spyro-hf's picture
Upload 14 files (#1)
2690367 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 20,
"global_step": 8786,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0022765430693491933,
"grad_norm": 0.469247430562973,
"learning_rate": 0.0002,
"loss": 1.9469,
"step": 20
},
{
"epoch": 0.004553086138698387,
"grad_norm": 0.6239348649978638,
"learning_rate": 0.0002,
"loss": 1.556,
"step": 40
},
{
"epoch": 0.006829629208047579,
"grad_norm": 0.4587397277355194,
"learning_rate": 0.0002,
"loss": 1.4108,
"step": 60
},
{
"epoch": 0.009106172277396773,
"grad_norm": 0.42919760942459106,
"learning_rate": 0.0002,
"loss": 1.3352,
"step": 80
},
{
"epoch": 0.011382715346745967,
"grad_norm": 0.46492573618888855,
"learning_rate": 0.0002,
"loss": 1.3388,
"step": 100
},
{
"epoch": 0.013659258416095159,
"grad_norm": 0.453070729970932,
"learning_rate": 0.0002,
"loss": 1.2295,
"step": 120
},
{
"epoch": 0.015935801485444354,
"grad_norm": 0.4760678708553314,
"learning_rate": 0.0002,
"loss": 1.2493,
"step": 140
},
{
"epoch": 0.018212344554793546,
"grad_norm": 0.4545675814151764,
"learning_rate": 0.0002,
"loss": 1.215,
"step": 160
},
{
"epoch": 0.020488887624142738,
"grad_norm": 0.4772235155105591,
"learning_rate": 0.0002,
"loss": 1.2173,
"step": 180
},
{
"epoch": 0.022765430693491934,
"grad_norm": 0.4403541088104248,
"learning_rate": 0.0002,
"loss": 1.1058,
"step": 200
},
{
"epoch": 0.025041973762841126,
"grad_norm": 0.511401355266571,
"learning_rate": 0.0002,
"loss": 1.1049,
"step": 220
},
{
"epoch": 0.027318516832190318,
"grad_norm": 0.3809013366699219,
"learning_rate": 0.0002,
"loss": 1.0498,
"step": 240
},
{
"epoch": 0.029595059901539513,
"grad_norm": 0.3980010449886322,
"learning_rate": 0.0002,
"loss": 0.9842,
"step": 260
},
{
"epoch": 0.03187160297088871,
"grad_norm": 0.5747793316841125,
"learning_rate": 0.0002,
"loss": 1.0988,
"step": 280
},
{
"epoch": 0.0341481460402379,
"grad_norm": 0.46827971935272217,
"learning_rate": 0.0002,
"loss": 1.0367,
"step": 300
},
{
"epoch": 0.03642468910958709,
"grad_norm": 0.4702209532260895,
"learning_rate": 0.0002,
"loss": 1.066,
"step": 320
},
{
"epoch": 0.038701232178936285,
"grad_norm": 0.5084996223449707,
"learning_rate": 0.0002,
"loss": 1.0652,
"step": 340
},
{
"epoch": 0.040977775248285477,
"grad_norm": 0.3944012522697449,
"learning_rate": 0.0002,
"loss": 0.9642,
"step": 360
},
{
"epoch": 0.04325431831763467,
"grad_norm": 0.40287718176841736,
"learning_rate": 0.0002,
"loss": 0.9431,
"step": 380
},
{
"epoch": 0.04553086138698387,
"grad_norm": 0.4629077613353729,
"learning_rate": 0.0002,
"loss": 0.9615,
"step": 400
},
{
"epoch": 0.04780740445633306,
"grad_norm": 0.44827452301979065,
"learning_rate": 0.0002,
"loss": 0.9434,
"step": 420
},
{
"epoch": 0.05008394752568225,
"grad_norm": 0.41644710302352905,
"learning_rate": 0.0002,
"loss": 0.9241,
"step": 440
},
{
"epoch": 0.05236049059503144,
"grad_norm": 0.4760611057281494,
"learning_rate": 0.0002,
"loss": 0.8475,
"step": 460
},
{
"epoch": 0.054637033664380635,
"grad_norm": 0.45987364649772644,
"learning_rate": 0.0002,
"loss": 0.898,
"step": 480
},
{
"epoch": 0.056913576733729834,
"grad_norm": 0.4840068817138672,
"learning_rate": 0.0002,
"loss": 0.9611,
"step": 500
},
{
"epoch": 0.059190119803079026,
"grad_norm": 0.40314286947250366,
"learning_rate": 0.0002,
"loss": 0.8884,
"step": 520
},
{
"epoch": 0.06146666287242822,
"grad_norm": 0.5458106398582458,
"learning_rate": 0.0002,
"loss": 0.8939,
"step": 540
},
{
"epoch": 0.06374320594177742,
"grad_norm": 0.5420896410942078,
"learning_rate": 0.0002,
"loss": 0.8265,
"step": 560
},
{
"epoch": 0.0660197490111266,
"grad_norm": 0.5356529355049133,
"learning_rate": 0.0002,
"loss": 0.8432,
"step": 580
},
{
"epoch": 0.0682962920804758,
"grad_norm": 0.5064826011657715,
"learning_rate": 0.0002,
"loss": 0.8272,
"step": 600
},
{
"epoch": 0.07057283514982499,
"grad_norm": 0.4143005311489105,
"learning_rate": 0.0002,
"loss": 0.7854,
"step": 620
},
{
"epoch": 0.07284937821917419,
"grad_norm": 0.3817225396633148,
"learning_rate": 0.0002,
"loss": 0.8219,
"step": 640
},
{
"epoch": 0.07512592128852338,
"grad_norm": 0.5336936712265015,
"learning_rate": 0.0002,
"loss": 0.7977,
"step": 660
},
{
"epoch": 0.07740246435787257,
"grad_norm": 0.5397001504898071,
"learning_rate": 0.0002,
"loss": 0.8117,
"step": 680
},
{
"epoch": 0.07967900742722177,
"grad_norm": 0.4968530535697937,
"learning_rate": 0.0002,
"loss": 0.7527,
"step": 700
},
{
"epoch": 0.08195555049657095,
"grad_norm": 0.4084935784339905,
"learning_rate": 0.0002,
"loss": 0.651,
"step": 720
},
{
"epoch": 0.08423209356592015,
"grad_norm": 0.48406732082366943,
"learning_rate": 0.0002,
"loss": 0.7352,
"step": 740
},
{
"epoch": 0.08650863663526934,
"grad_norm": 0.5246301293373108,
"learning_rate": 0.0002,
"loss": 0.7785,
"step": 760
},
{
"epoch": 0.08878517970461854,
"grad_norm": 0.5729619264602661,
"learning_rate": 0.0002,
"loss": 0.7646,
"step": 780
},
{
"epoch": 0.09106172277396773,
"grad_norm": 0.5675190687179565,
"learning_rate": 0.0002,
"loss": 0.7784,
"step": 800
},
{
"epoch": 0.09333826584331692,
"grad_norm": 0.4682878255844116,
"learning_rate": 0.0002,
"loss": 0.7284,
"step": 820
},
{
"epoch": 0.09561480891266612,
"grad_norm": 0.5388545393943787,
"learning_rate": 0.0002,
"loss": 0.6959,
"step": 840
},
{
"epoch": 0.0978913519820153,
"grad_norm": 0.48806509375572205,
"learning_rate": 0.0002,
"loss": 0.7585,
"step": 860
},
{
"epoch": 0.1001678950513645,
"grad_norm": 0.4149261713027954,
"learning_rate": 0.0002,
"loss": 0.6978,
"step": 880
},
{
"epoch": 0.1024444381207137,
"grad_norm": 0.4971105754375458,
"learning_rate": 0.0002,
"loss": 0.7103,
"step": 900
},
{
"epoch": 0.10472098119006289,
"grad_norm": 0.5066735744476318,
"learning_rate": 0.0002,
"loss": 0.6854,
"step": 920
},
{
"epoch": 0.10699752425941209,
"grad_norm": 0.4922661781311035,
"learning_rate": 0.0002,
"loss": 0.6231,
"step": 940
},
{
"epoch": 0.10927406732876127,
"grad_norm": 0.5949555039405823,
"learning_rate": 0.0002,
"loss": 0.6813,
"step": 960
},
{
"epoch": 0.11155061039811047,
"grad_norm": 0.581446647644043,
"learning_rate": 0.0002,
"loss": 0.6174,
"step": 980
},
{
"epoch": 0.11382715346745967,
"grad_norm": 0.6152529716491699,
"learning_rate": 0.0002,
"loss": 0.6405,
"step": 1000
},
{
"epoch": 0.11610369653680885,
"grad_norm": 0.5986836552619934,
"learning_rate": 0.0002,
"loss": 0.5776,
"step": 1020
},
{
"epoch": 0.11838023960615805,
"grad_norm": 0.4255094528198242,
"learning_rate": 0.0002,
"loss": 0.6576,
"step": 1040
},
{
"epoch": 0.12065678267550724,
"grad_norm": 0.4563849866390228,
"learning_rate": 0.0002,
"loss": 0.6647,
"step": 1060
},
{
"epoch": 0.12293332574485644,
"grad_norm": 0.593227744102478,
"learning_rate": 0.0002,
"loss": 0.6043,
"step": 1080
},
{
"epoch": 0.12520986881420562,
"grad_norm": 0.47059598565101624,
"learning_rate": 0.0002,
"loss": 0.591,
"step": 1100
},
{
"epoch": 0.12748641188355483,
"grad_norm": 0.5013225674629211,
"learning_rate": 0.0002,
"loss": 0.5947,
"step": 1120
},
{
"epoch": 0.12976295495290402,
"grad_norm": 0.46772757172584534,
"learning_rate": 0.0002,
"loss": 0.6292,
"step": 1140
},
{
"epoch": 0.1320394980222532,
"grad_norm": 0.5844313502311707,
"learning_rate": 0.0002,
"loss": 0.6128,
"step": 1160
},
{
"epoch": 0.1343160410916024,
"grad_norm": 0.5295489430427551,
"learning_rate": 0.0002,
"loss": 0.6064,
"step": 1180
},
{
"epoch": 0.1365925841609516,
"grad_norm": 0.4482004642486572,
"learning_rate": 0.0002,
"loss": 0.5899,
"step": 1200
},
{
"epoch": 0.1388691272303008,
"grad_norm": 0.6281692981719971,
"learning_rate": 0.0002,
"loss": 0.6109,
"step": 1220
},
{
"epoch": 0.14114567029964997,
"grad_norm": 0.4718242585659027,
"learning_rate": 0.0002,
"loss": 0.5857,
"step": 1240
},
{
"epoch": 0.14342221336899919,
"grad_norm": 0.5219341516494751,
"learning_rate": 0.0002,
"loss": 0.5581,
"step": 1260
},
{
"epoch": 0.14569875643834837,
"grad_norm": 0.47050580382347107,
"learning_rate": 0.0002,
"loss": 0.6368,
"step": 1280
},
{
"epoch": 0.14797529950769756,
"grad_norm": 0.5425338745117188,
"learning_rate": 0.0002,
"loss": 0.5626,
"step": 1300
},
{
"epoch": 0.15025184257704677,
"grad_norm": 0.4944934844970703,
"learning_rate": 0.0002,
"loss": 0.5337,
"step": 1320
},
{
"epoch": 0.15252838564639595,
"grad_norm": 0.5921599864959717,
"learning_rate": 0.0002,
"loss": 0.5672,
"step": 1340
},
{
"epoch": 0.15480492871574514,
"grad_norm": 0.4866751730442047,
"learning_rate": 0.0002,
"loss": 0.5305,
"step": 1360
},
{
"epoch": 0.15708147178509432,
"grad_norm": 0.62166827917099,
"learning_rate": 0.0002,
"loss": 0.5737,
"step": 1380
},
{
"epoch": 0.15935801485444354,
"grad_norm": 0.5006982684135437,
"learning_rate": 0.0002,
"loss": 0.5542,
"step": 1400
},
{
"epoch": 0.16163455792379272,
"grad_norm": 0.6090095043182373,
"learning_rate": 0.0002,
"loss": 0.5215,
"step": 1420
},
{
"epoch": 0.1639111009931419,
"grad_norm": 0.4260309636592865,
"learning_rate": 0.0002,
"loss": 0.5535,
"step": 1440
},
{
"epoch": 0.16618764406249112,
"grad_norm": 0.48657718300819397,
"learning_rate": 0.0002,
"loss": 0.5441,
"step": 1460
},
{
"epoch": 0.1684641871318403,
"grad_norm": 0.43275007605552673,
"learning_rate": 0.0002,
"loss": 0.5161,
"step": 1480
},
{
"epoch": 0.1707407302011895,
"grad_norm": 0.4225006699562073,
"learning_rate": 0.0002,
"loss": 0.512,
"step": 1500
},
{
"epoch": 0.17301727327053867,
"grad_norm": 0.5176346302032471,
"learning_rate": 0.0002,
"loss": 0.5384,
"step": 1520
},
{
"epoch": 0.1752938163398879,
"grad_norm": 0.6492679715156555,
"learning_rate": 0.0002,
"loss": 0.4981,
"step": 1540
},
{
"epoch": 0.17757035940923707,
"grad_norm": 0.5511758327484131,
"learning_rate": 0.0002,
"loss": 0.5289,
"step": 1560
},
{
"epoch": 0.17984690247858626,
"grad_norm": 0.5211341977119446,
"learning_rate": 0.0002,
"loss": 0.5002,
"step": 1580
},
{
"epoch": 0.18212344554793547,
"grad_norm": 0.5488260984420776,
"learning_rate": 0.0002,
"loss": 0.5178,
"step": 1600
},
{
"epoch": 0.18439998861728465,
"grad_norm": 0.6779264211654663,
"learning_rate": 0.0002,
"loss": 0.5155,
"step": 1620
},
{
"epoch": 0.18667653168663384,
"grad_norm": 0.502919614315033,
"learning_rate": 0.0002,
"loss": 0.4923,
"step": 1640
},
{
"epoch": 0.18895307475598305,
"grad_norm": 0.4989205300807953,
"learning_rate": 0.0002,
"loss": 0.4825,
"step": 1660
},
{
"epoch": 0.19122961782533224,
"grad_norm": 0.5155315399169922,
"learning_rate": 0.0002,
"loss": 0.4796,
"step": 1680
},
{
"epoch": 0.19350616089468142,
"grad_norm": 0.5648865699768066,
"learning_rate": 0.0002,
"loss": 0.4985,
"step": 1700
},
{
"epoch": 0.1957827039640306,
"grad_norm": 0.606176495552063,
"learning_rate": 0.0002,
"loss": 0.4819,
"step": 1720
},
{
"epoch": 0.19805924703337982,
"grad_norm": 0.5440786480903625,
"learning_rate": 0.0002,
"loss": 0.5213,
"step": 1740
},
{
"epoch": 0.200335790102729,
"grad_norm": 0.43152502179145813,
"learning_rate": 0.0002,
"loss": 0.4429,
"step": 1760
},
{
"epoch": 0.2026123331720782,
"grad_norm": 0.5701313614845276,
"learning_rate": 0.0002,
"loss": 0.4486,
"step": 1780
},
{
"epoch": 0.2048888762414274,
"grad_norm": 0.565666913986206,
"learning_rate": 0.0002,
"loss": 0.4561,
"step": 1800
},
{
"epoch": 0.2071654193107766,
"grad_norm": 0.5725598931312561,
"learning_rate": 0.0002,
"loss": 0.4757,
"step": 1820
},
{
"epoch": 0.20944196238012577,
"grad_norm": 0.4642520248889923,
"learning_rate": 0.0002,
"loss": 0.438,
"step": 1840
},
{
"epoch": 0.21171850544947496,
"grad_norm": 0.6077229976654053,
"learning_rate": 0.0002,
"loss": 0.4295,
"step": 1860
},
{
"epoch": 0.21399504851882417,
"grad_norm": 0.6314090490341187,
"learning_rate": 0.0002,
"loss": 0.449,
"step": 1880
},
{
"epoch": 0.21627159158817336,
"grad_norm": 0.4416756331920624,
"learning_rate": 0.0002,
"loss": 0.4554,
"step": 1900
},
{
"epoch": 0.21854813465752254,
"grad_norm": 0.5278882384300232,
"learning_rate": 0.0002,
"loss": 0.4554,
"step": 1920
},
{
"epoch": 0.22082467772687175,
"grad_norm": 0.45619043707847595,
"learning_rate": 0.0002,
"loss": 0.4868,
"step": 1940
},
{
"epoch": 0.22310122079622094,
"grad_norm": 0.5881581902503967,
"learning_rate": 0.0002,
"loss": 0.4672,
"step": 1960
},
{
"epoch": 0.22537776386557012,
"grad_norm": 0.5379284024238586,
"learning_rate": 0.0002,
"loss": 0.4531,
"step": 1980
},
{
"epoch": 0.22765430693491934,
"grad_norm": 0.5562624931335449,
"learning_rate": 0.0002,
"loss": 0.464,
"step": 2000
},
{
"epoch": 0.22993085000426852,
"grad_norm": 0.554499626159668,
"learning_rate": 0.0002,
"loss": 0.446,
"step": 2020
},
{
"epoch": 0.2322073930736177,
"grad_norm": 0.509219229221344,
"learning_rate": 0.0002,
"loss": 0.4417,
"step": 2040
},
{
"epoch": 0.2344839361429669,
"grad_norm": 0.5206849575042725,
"learning_rate": 0.0002,
"loss": 0.4118,
"step": 2060
},
{
"epoch": 0.2367604792123161,
"grad_norm": 0.548729658126831,
"learning_rate": 0.0002,
"loss": 0.4067,
"step": 2080
},
{
"epoch": 0.2390370222816653,
"grad_norm": 0.4220084846019745,
"learning_rate": 0.0002,
"loss": 0.428,
"step": 2100
},
{
"epoch": 0.24131356535101448,
"grad_norm": 0.5507292747497559,
"learning_rate": 0.0002,
"loss": 0.4176,
"step": 2120
},
{
"epoch": 0.2435901084203637,
"grad_norm": 0.5605701208114624,
"learning_rate": 0.0002,
"loss": 0.4661,
"step": 2140
},
{
"epoch": 0.24586665148971287,
"grad_norm": 0.43142881989479065,
"learning_rate": 0.0002,
"loss": 0.4197,
"step": 2160
},
{
"epoch": 0.24814319455906206,
"grad_norm": 0.47790080308914185,
"learning_rate": 0.0002,
"loss": 0.4568,
"step": 2180
},
{
"epoch": 0.25041973762841124,
"grad_norm": 0.6048968434333801,
"learning_rate": 0.0002,
"loss": 0.4199,
"step": 2200
},
{
"epoch": 0.25269628069776046,
"grad_norm": 0.4925907850265503,
"learning_rate": 0.0002,
"loss": 0.4325,
"step": 2220
},
{
"epoch": 0.25497282376710967,
"grad_norm": 0.5463051199913025,
"learning_rate": 0.0002,
"loss": 0.4549,
"step": 2240
},
{
"epoch": 0.2572493668364588,
"grad_norm": 0.4631319046020508,
"learning_rate": 0.0002,
"loss": 0.3977,
"step": 2260
},
{
"epoch": 0.25952590990580804,
"grad_norm": 0.4965234398841858,
"learning_rate": 0.0002,
"loss": 0.4285,
"step": 2280
},
{
"epoch": 0.2618024529751572,
"grad_norm": 0.5436238646507263,
"learning_rate": 0.0002,
"loss": 0.4039,
"step": 2300
},
{
"epoch": 0.2640789960445064,
"grad_norm": 0.5218191742897034,
"learning_rate": 0.0002,
"loss": 0.4092,
"step": 2320
},
{
"epoch": 0.2663555391138556,
"grad_norm": 0.5417261719703674,
"learning_rate": 0.0002,
"loss": 0.3825,
"step": 2340
},
{
"epoch": 0.2686320821832048,
"grad_norm": 0.6126281023025513,
"learning_rate": 0.0002,
"loss": 0.4391,
"step": 2360
},
{
"epoch": 0.270908625252554,
"grad_norm": 0.4734433889389038,
"learning_rate": 0.0002,
"loss": 0.4151,
"step": 2380
},
{
"epoch": 0.2731851683219032,
"grad_norm": 0.4501429796218872,
"learning_rate": 0.0002,
"loss": 0.4178,
"step": 2400
},
{
"epoch": 0.27546171139125236,
"grad_norm": 0.5258509516716003,
"learning_rate": 0.0002,
"loss": 0.4007,
"step": 2420
},
{
"epoch": 0.2777382544606016,
"grad_norm": 0.47874951362609863,
"learning_rate": 0.0002,
"loss": 0.4245,
"step": 2440
},
{
"epoch": 0.2800147975299508,
"grad_norm": 0.528533399105072,
"learning_rate": 0.0002,
"loss": 0.3794,
"step": 2460
},
{
"epoch": 0.28229134059929994,
"grad_norm": 0.46465063095092773,
"learning_rate": 0.0002,
"loss": 0.4019,
"step": 2480
},
{
"epoch": 0.28456788366864916,
"grad_norm": 0.5217177867889404,
"learning_rate": 0.0002,
"loss": 0.4104,
"step": 2500
},
{
"epoch": 0.28684442673799837,
"grad_norm": 0.510036289691925,
"learning_rate": 0.0002,
"loss": 0.389,
"step": 2520
},
{
"epoch": 0.2891209698073475,
"grad_norm": 0.6968228220939636,
"learning_rate": 0.0002,
"loss": 0.4152,
"step": 2540
},
{
"epoch": 0.29139751287669674,
"grad_norm": 0.4529867470264435,
"learning_rate": 0.0002,
"loss": 0.3987,
"step": 2560
},
{
"epoch": 0.29367405594604595,
"grad_norm": 0.5680263638496399,
"learning_rate": 0.0002,
"loss": 0.3828,
"step": 2580
},
{
"epoch": 0.2959505990153951,
"grad_norm": 0.4892405867576599,
"learning_rate": 0.0002,
"loss": 0.4006,
"step": 2600
},
{
"epoch": 0.2982271420847443,
"grad_norm": 0.47588276863098145,
"learning_rate": 0.0002,
"loss": 0.4197,
"step": 2620
},
{
"epoch": 0.30050368515409354,
"grad_norm": 0.5624070167541504,
"learning_rate": 0.0002,
"loss": 0.3997,
"step": 2640
},
{
"epoch": 0.3027802282234427,
"grad_norm": 0.5434039831161499,
"learning_rate": 0.0002,
"loss": 0.3977,
"step": 2660
},
{
"epoch": 0.3050567712927919,
"grad_norm": 0.5572277903556824,
"learning_rate": 0.0002,
"loss": 0.3966,
"step": 2680
},
{
"epoch": 0.30733331436214106,
"grad_norm": 0.5533374547958374,
"learning_rate": 0.0002,
"loss": 0.3803,
"step": 2700
},
{
"epoch": 0.3096098574314903,
"grad_norm": 0.40596967935562134,
"learning_rate": 0.0002,
"loss": 0.3682,
"step": 2720
},
{
"epoch": 0.3118864005008395,
"grad_norm": 0.4737823009490967,
"learning_rate": 0.0002,
"loss": 0.3761,
"step": 2740
},
{
"epoch": 0.31416294357018865,
"grad_norm": 0.4295174777507782,
"learning_rate": 0.0002,
"loss": 0.4035,
"step": 2760
},
{
"epoch": 0.31643948663953786,
"grad_norm": 0.5348454713821411,
"learning_rate": 0.0002,
"loss": 0.404,
"step": 2780
},
{
"epoch": 0.31871602970888707,
"grad_norm": 0.4819965362548828,
"learning_rate": 0.0002,
"loss": 0.3929,
"step": 2800
},
{
"epoch": 0.32099257277823623,
"grad_norm": 0.5920088291168213,
"learning_rate": 0.0002,
"loss": 0.3798,
"step": 2820
},
{
"epoch": 0.32326911584758544,
"grad_norm": 0.4936531186103821,
"learning_rate": 0.0002,
"loss": 0.3995,
"step": 2840
},
{
"epoch": 0.32554565891693465,
"grad_norm": 0.5252315998077393,
"learning_rate": 0.0002,
"loss": 0.3842,
"step": 2860
},
{
"epoch": 0.3278222019862838,
"grad_norm": 0.5818414688110352,
"learning_rate": 0.0002,
"loss": 0.3533,
"step": 2880
},
{
"epoch": 0.330098745055633,
"grad_norm": 0.44053876399993896,
"learning_rate": 0.0002,
"loss": 0.3402,
"step": 2900
},
{
"epoch": 0.33237528812498224,
"grad_norm": 0.5421345233917236,
"learning_rate": 0.0002,
"loss": 0.3542,
"step": 2920
},
{
"epoch": 0.3346518311943314,
"grad_norm": 0.4642751216888428,
"learning_rate": 0.0002,
"loss": 0.3755,
"step": 2940
},
{
"epoch": 0.3369283742636806,
"grad_norm": 0.5137833952903748,
"learning_rate": 0.0002,
"loss": 0.3602,
"step": 2960
},
{
"epoch": 0.3392049173330298,
"grad_norm": 0.5032792687416077,
"learning_rate": 0.0002,
"loss": 0.3451,
"step": 2980
},
{
"epoch": 0.341481460402379,
"grad_norm": 0.4932720363140106,
"learning_rate": 0.0002,
"loss": 0.384,
"step": 3000
},
{
"epoch": 0.3437580034717282,
"grad_norm": 0.49986231327056885,
"learning_rate": 0.0002,
"loss": 0.3826,
"step": 3020
},
{
"epoch": 0.34603454654107735,
"grad_norm": 0.6325618624687195,
"learning_rate": 0.0002,
"loss": 0.3582,
"step": 3040
},
{
"epoch": 0.34831108961042656,
"grad_norm": 0.5402369499206543,
"learning_rate": 0.0002,
"loss": 0.3706,
"step": 3060
},
{
"epoch": 0.3505876326797758,
"grad_norm": 0.4967012107372284,
"learning_rate": 0.0002,
"loss": 0.3456,
"step": 3080
},
{
"epoch": 0.35286417574912493,
"grad_norm": 0.4491735100746155,
"learning_rate": 0.0002,
"loss": 0.347,
"step": 3100
},
{
"epoch": 0.35514071881847414,
"grad_norm": 0.9062516093254089,
"learning_rate": 0.0002,
"loss": 0.3617,
"step": 3120
},
{
"epoch": 0.35741726188782336,
"grad_norm": 0.5253359079360962,
"learning_rate": 0.0002,
"loss": 0.3512,
"step": 3140
},
{
"epoch": 0.3596938049571725,
"grad_norm": 0.4836867153644562,
"learning_rate": 0.0002,
"loss": 0.3585,
"step": 3160
},
{
"epoch": 0.3619703480265217,
"grad_norm": 0.49537473917007446,
"learning_rate": 0.0002,
"loss": 0.364,
"step": 3180
},
{
"epoch": 0.36424689109587094,
"grad_norm": 0.6098095178604126,
"learning_rate": 0.0002,
"loss": 0.3455,
"step": 3200
},
{
"epoch": 0.3665234341652201,
"grad_norm": 0.5926884412765503,
"learning_rate": 0.0002,
"loss": 0.3406,
"step": 3220
},
{
"epoch": 0.3687999772345693,
"grad_norm": 0.5868669152259827,
"learning_rate": 0.0002,
"loss": 0.3643,
"step": 3240
},
{
"epoch": 0.3710765203039185,
"grad_norm": 0.42670106887817383,
"learning_rate": 0.0002,
"loss": 0.344,
"step": 3260
},
{
"epoch": 0.3733530633732677,
"grad_norm": 0.5992838740348816,
"learning_rate": 0.0002,
"loss": 0.3588,
"step": 3280
},
{
"epoch": 0.3756296064426169,
"grad_norm": 0.4388341009616852,
"learning_rate": 0.0002,
"loss": 0.3375,
"step": 3300
},
{
"epoch": 0.3779061495119661,
"grad_norm": 0.596488893032074,
"learning_rate": 0.0002,
"loss": 0.3425,
"step": 3320
},
{
"epoch": 0.38018269258131526,
"grad_norm": 0.4572538137435913,
"learning_rate": 0.0002,
"loss": 0.3711,
"step": 3340
},
{
"epoch": 0.3824592356506645,
"grad_norm": 0.5661656856536865,
"learning_rate": 0.0002,
"loss": 0.3415,
"step": 3360
},
{
"epoch": 0.38473577872001363,
"grad_norm": 0.45082923769950867,
"learning_rate": 0.0002,
"loss": 0.3495,
"step": 3380
},
{
"epoch": 0.38701232178936285,
"grad_norm": 0.4995211660861969,
"learning_rate": 0.0002,
"loss": 0.3311,
"step": 3400
},
{
"epoch": 0.38928886485871206,
"grad_norm": 0.5004004240036011,
"learning_rate": 0.0002,
"loss": 0.3506,
"step": 3420
},
{
"epoch": 0.3915654079280612,
"grad_norm": 0.5676460266113281,
"learning_rate": 0.0002,
"loss": 0.3383,
"step": 3440
},
{
"epoch": 0.39384195099741043,
"grad_norm": 0.4805515706539154,
"learning_rate": 0.0002,
"loss": 0.3382,
"step": 3460
},
{
"epoch": 0.39611849406675964,
"grad_norm": 0.47675764560699463,
"learning_rate": 0.0002,
"loss": 0.3021,
"step": 3480
},
{
"epoch": 0.3983950371361088,
"grad_norm": 0.6285260915756226,
"learning_rate": 0.0002,
"loss": 0.3467,
"step": 3500
},
{
"epoch": 0.400671580205458,
"grad_norm": 0.5657575130462646,
"learning_rate": 0.0002,
"loss": 0.3382,
"step": 3520
},
{
"epoch": 0.4029481232748072,
"grad_norm": 0.6148316860198975,
"learning_rate": 0.0002,
"loss": 0.3396,
"step": 3540
},
{
"epoch": 0.4052246663441564,
"grad_norm": 0.5819992423057556,
"learning_rate": 0.0002,
"loss": 0.3373,
"step": 3560
},
{
"epoch": 0.4075012094135056,
"grad_norm": 0.6080338954925537,
"learning_rate": 0.0002,
"loss": 0.3463,
"step": 3580
},
{
"epoch": 0.4097777524828548,
"grad_norm": 0.6103864312171936,
"learning_rate": 0.0002,
"loss": 0.3441,
"step": 3600
},
{
"epoch": 0.41205429555220396,
"grad_norm": 0.5234800577163696,
"learning_rate": 0.0002,
"loss": 0.3272,
"step": 3620
},
{
"epoch": 0.4143308386215532,
"grad_norm": 0.5393822193145752,
"learning_rate": 0.0002,
"loss": 0.3308,
"step": 3640
},
{
"epoch": 0.4166073816909024,
"grad_norm": 0.4853431284427643,
"learning_rate": 0.0002,
"loss": 0.3152,
"step": 3660
},
{
"epoch": 0.41888392476025155,
"grad_norm": 0.5507264733314514,
"learning_rate": 0.0002,
"loss": 0.3229,
"step": 3680
},
{
"epoch": 0.42116046782960076,
"grad_norm": 0.44306129217147827,
"learning_rate": 0.0002,
"loss": 0.3389,
"step": 3700
},
{
"epoch": 0.4234370108989499,
"grad_norm": 0.4574294984340668,
"learning_rate": 0.0002,
"loss": 0.3516,
"step": 3720
},
{
"epoch": 0.42571355396829913,
"grad_norm": 0.5367994904518127,
"learning_rate": 0.0002,
"loss": 0.3576,
"step": 3740
},
{
"epoch": 0.42799009703764834,
"grad_norm": 0.5044491291046143,
"learning_rate": 0.0002,
"loss": 0.3449,
"step": 3760
},
{
"epoch": 0.4302666401069975,
"grad_norm": 0.41715556383132935,
"learning_rate": 0.0002,
"loss": 0.3128,
"step": 3780
},
{
"epoch": 0.4325431831763467,
"grad_norm": 0.4355817437171936,
"learning_rate": 0.0002,
"loss": 0.3131,
"step": 3800
},
{
"epoch": 0.4348197262456959,
"grad_norm": 0.5237382650375366,
"learning_rate": 0.0002,
"loss": 0.3281,
"step": 3820
},
{
"epoch": 0.4370962693150451,
"grad_norm": 0.6210081577301025,
"learning_rate": 0.0002,
"loss": 0.3195,
"step": 3840
},
{
"epoch": 0.4393728123843943,
"grad_norm": 0.5145352482795715,
"learning_rate": 0.0002,
"loss": 0.3107,
"step": 3860
},
{
"epoch": 0.4416493554537435,
"grad_norm": 0.5554608106613159,
"learning_rate": 0.0002,
"loss": 0.3418,
"step": 3880
},
{
"epoch": 0.44392589852309267,
"grad_norm": 0.4971628487110138,
"learning_rate": 0.0002,
"loss": 0.3293,
"step": 3900
},
{
"epoch": 0.4462024415924419,
"grad_norm": 0.49732130765914917,
"learning_rate": 0.0002,
"loss": 0.3138,
"step": 3920
},
{
"epoch": 0.4484789846617911,
"grad_norm": 0.5883257985115051,
"learning_rate": 0.0002,
"loss": 0.3357,
"step": 3940
},
{
"epoch": 0.45075552773114025,
"grad_norm": 0.5349528193473816,
"learning_rate": 0.0002,
"loss": 0.3381,
"step": 3960
},
{
"epoch": 0.45303207080048946,
"grad_norm": 0.5360047221183777,
"learning_rate": 0.0002,
"loss": 0.3116,
"step": 3980
},
{
"epoch": 0.4553086138698387,
"grad_norm": 0.4889732003211975,
"learning_rate": 0.0002,
"loss": 0.3154,
"step": 4000
},
{
"epoch": 0.45758515693918783,
"grad_norm": 0.4912421703338623,
"learning_rate": 0.0002,
"loss": 0.3054,
"step": 4020
},
{
"epoch": 0.45986170000853704,
"grad_norm": 0.4449983835220337,
"learning_rate": 0.0002,
"loss": 0.3079,
"step": 4040
},
{
"epoch": 0.46213824307788626,
"grad_norm": 0.4488675892353058,
"learning_rate": 0.0002,
"loss": 0.3027,
"step": 4060
},
{
"epoch": 0.4644147861472354,
"grad_norm": 0.5412561893463135,
"learning_rate": 0.0002,
"loss": 0.2932,
"step": 4080
},
{
"epoch": 0.4666913292165846,
"grad_norm": 0.41218650341033936,
"learning_rate": 0.0002,
"loss": 0.3087,
"step": 4100
},
{
"epoch": 0.4689678722859338,
"grad_norm": 0.5233949422836304,
"learning_rate": 0.0002,
"loss": 0.3157,
"step": 4120
},
{
"epoch": 0.471244415355283,
"grad_norm": 0.5676075220108032,
"learning_rate": 0.0002,
"loss": 0.3267,
"step": 4140
},
{
"epoch": 0.4735209584246322,
"grad_norm": 0.5336834788322449,
"learning_rate": 0.0002,
"loss": 0.3185,
"step": 4160
},
{
"epoch": 0.47579750149398137,
"grad_norm": 0.5505925416946411,
"learning_rate": 0.0002,
"loss": 0.3116,
"step": 4180
},
{
"epoch": 0.4780740445633306,
"grad_norm": 0.5440223813056946,
"learning_rate": 0.0002,
"loss": 0.3234,
"step": 4200
},
{
"epoch": 0.4803505876326798,
"grad_norm": 0.46334293484687805,
"learning_rate": 0.0002,
"loss": 0.3209,
"step": 4220
},
{
"epoch": 0.48262713070202895,
"grad_norm": 0.452364444732666,
"learning_rate": 0.0002,
"loss": 0.3056,
"step": 4240
},
{
"epoch": 0.48490367377137816,
"grad_norm": 0.5037956833839417,
"learning_rate": 0.0002,
"loss": 0.3141,
"step": 4260
},
{
"epoch": 0.4871802168407274,
"grad_norm": 0.4308939278125763,
"learning_rate": 0.0002,
"loss": 0.2948,
"step": 4280
},
{
"epoch": 0.48945675991007653,
"grad_norm": 0.45019960403442383,
"learning_rate": 0.0002,
"loss": 0.3142,
"step": 4300
},
{
"epoch": 0.49173330297942575,
"grad_norm": 0.4351404011249542,
"learning_rate": 0.0002,
"loss": 0.31,
"step": 4320
},
{
"epoch": 0.49400984604877496,
"grad_norm": 0.38306841254234314,
"learning_rate": 0.0002,
"loss": 0.2889,
"step": 4340
},
{
"epoch": 0.4962863891181241,
"grad_norm": 0.545360803604126,
"learning_rate": 0.0002,
"loss": 0.311,
"step": 4360
},
{
"epoch": 0.49856293218747333,
"grad_norm": 0.44942232966423035,
"learning_rate": 0.0002,
"loss": 0.2899,
"step": 4380
},
{
"epoch": 0.5008394752568225,
"grad_norm": 0.46564239263534546,
"learning_rate": 0.0002,
"loss": 0.3013,
"step": 4400
},
{
"epoch": 0.5031160183261717,
"grad_norm": 0.5398554801940918,
"learning_rate": 0.0002,
"loss": 0.3104,
"step": 4420
},
{
"epoch": 0.5053925613955209,
"grad_norm": 0.47367504239082336,
"learning_rate": 0.0002,
"loss": 0.2945,
"step": 4440
},
{
"epoch": 0.5076691044648701,
"grad_norm": 0.45659711956977844,
"learning_rate": 0.0002,
"loss": 0.304,
"step": 4460
},
{
"epoch": 0.5099456475342193,
"grad_norm": 0.4942033290863037,
"learning_rate": 0.0002,
"loss": 0.2969,
"step": 4480
},
{
"epoch": 0.5122221906035684,
"grad_norm": 0.46578243374824524,
"learning_rate": 0.0002,
"loss": 0.2935,
"step": 4500
},
{
"epoch": 0.5144987336729177,
"grad_norm": 0.6523891687393188,
"learning_rate": 0.0002,
"loss": 0.2823,
"step": 4520
},
{
"epoch": 0.5167752767422669,
"grad_norm": 0.4787238538265228,
"learning_rate": 0.0002,
"loss": 0.3148,
"step": 4540
},
{
"epoch": 0.5190518198116161,
"grad_norm": 0.46825891733169556,
"learning_rate": 0.0002,
"loss": 0.3089,
"step": 4560
},
{
"epoch": 0.5213283628809653,
"grad_norm": 0.46605536341667175,
"learning_rate": 0.0002,
"loss": 0.3012,
"step": 4580
},
{
"epoch": 0.5236049059503144,
"grad_norm": 0.5826888680458069,
"learning_rate": 0.0002,
"loss": 0.3043,
"step": 4600
},
{
"epoch": 0.5258814490196636,
"grad_norm": 0.48641151189804077,
"learning_rate": 0.0002,
"loss": 0.2952,
"step": 4620
},
{
"epoch": 0.5281579920890128,
"grad_norm": 0.5396175384521484,
"learning_rate": 0.0002,
"loss": 0.2926,
"step": 4640
},
{
"epoch": 0.530434535158362,
"grad_norm": 0.5584241151809692,
"learning_rate": 0.0002,
"loss": 0.3048,
"step": 4660
},
{
"epoch": 0.5327110782277112,
"grad_norm": 0.5832685232162476,
"learning_rate": 0.0002,
"loss": 0.2948,
"step": 4680
},
{
"epoch": 0.5349876212970605,
"grad_norm": 0.4676337242126465,
"learning_rate": 0.0002,
"loss": 0.3043,
"step": 4700
},
{
"epoch": 0.5372641643664096,
"grad_norm": 0.4440428614616394,
"learning_rate": 0.0002,
"loss": 0.288,
"step": 4720
},
{
"epoch": 0.5395407074357588,
"grad_norm": 0.49934279918670654,
"learning_rate": 0.0002,
"loss": 0.2882,
"step": 4740
},
{
"epoch": 0.541817250505108,
"grad_norm": 0.5172054171562195,
"learning_rate": 0.0002,
"loss": 0.3225,
"step": 4760
},
{
"epoch": 0.5440937935744572,
"grad_norm": 0.4527619183063507,
"learning_rate": 0.0002,
"loss": 0.2869,
"step": 4780
},
{
"epoch": 0.5463703366438064,
"grad_norm": 0.548918604850769,
"learning_rate": 0.0002,
"loss": 0.3105,
"step": 4800
},
{
"epoch": 0.5486468797131556,
"grad_norm": 0.48801419138908386,
"learning_rate": 0.0002,
"loss": 0.2835,
"step": 4820
},
{
"epoch": 0.5509234227825047,
"grad_norm": 0.49810609221458435,
"learning_rate": 0.0002,
"loss": 0.3227,
"step": 4840
},
{
"epoch": 0.5531999658518539,
"grad_norm": 0.49763086438179016,
"learning_rate": 0.0002,
"loss": 0.2786,
"step": 4860
},
{
"epoch": 0.5554765089212031,
"grad_norm": 0.48815059661865234,
"learning_rate": 0.0002,
"loss": 0.2802,
"step": 4880
},
{
"epoch": 0.5577530519905524,
"grad_norm": 0.3571115732192993,
"learning_rate": 0.0002,
"loss": 0.2796,
"step": 4900
},
{
"epoch": 0.5600295950599016,
"grad_norm": 0.6448425650596619,
"learning_rate": 0.0002,
"loss": 0.2844,
"step": 4920
},
{
"epoch": 0.5623061381292508,
"grad_norm": 0.49660468101501465,
"learning_rate": 0.0002,
"loss": 0.2892,
"step": 4940
},
{
"epoch": 0.5645826811985999,
"grad_norm": 0.47702720761299133,
"learning_rate": 0.0002,
"loss": 0.3111,
"step": 4960
},
{
"epoch": 0.5668592242679491,
"grad_norm": 0.5281921029090881,
"learning_rate": 0.0002,
"loss": 0.2908,
"step": 4980
},
{
"epoch": 0.5691357673372983,
"grad_norm": 0.6427987813949585,
"learning_rate": 0.0002,
"loss": 0.2848,
"step": 5000
},
{
"epoch": 0.5714123104066475,
"grad_norm": 0.5437233448028564,
"learning_rate": 0.0002,
"loss": 0.3023,
"step": 5020
},
{
"epoch": 0.5736888534759967,
"grad_norm": 0.517444372177124,
"learning_rate": 0.0002,
"loss": 0.2876,
"step": 5040
},
{
"epoch": 0.5759653965453458,
"grad_norm": 0.5197298526763916,
"learning_rate": 0.0002,
"loss": 0.304,
"step": 5060
},
{
"epoch": 0.578241939614695,
"grad_norm": 0.3452152907848358,
"learning_rate": 0.0002,
"loss": 0.2794,
"step": 5080
},
{
"epoch": 0.5805184826840443,
"grad_norm": 0.5630306601524353,
"learning_rate": 0.0002,
"loss": 0.2979,
"step": 5100
},
{
"epoch": 0.5827950257533935,
"grad_norm": 0.5696737170219421,
"learning_rate": 0.0002,
"loss": 0.3035,
"step": 5120
},
{
"epoch": 0.5850715688227427,
"grad_norm": 0.5024551153182983,
"learning_rate": 0.0002,
"loss": 0.2717,
"step": 5140
},
{
"epoch": 0.5873481118920919,
"grad_norm": 0.4166383147239685,
"learning_rate": 0.0002,
"loss": 0.3065,
"step": 5160
},
{
"epoch": 0.589624654961441,
"grad_norm": 0.36780408024787903,
"learning_rate": 0.0002,
"loss": 0.2864,
"step": 5180
},
{
"epoch": 0.5919011980307902,
"grad_norm": 0.436526894569397,
"learning_rate": 0.0002,
"loss": 0.2764,
"step": 5200
},
{
"epoch": 0.5941777411001394,
"grad_norm": 0.43115249276161194,
"learning_rate": 0.0002,
"loss": 0.2791,
"step": 5220
},
{
"epoch": 0.5964542841694886,
"grad_norm": 0.359739750623703,
"learning_rate": 0.0002,
"loss": 0.3108,
"step": 5240
},
{
"epoch": 0.5987308272388379,
"grad_norm": 0.4555259644985199,
"learning_rate": 0.0002,
"loss": 0.2623,
"step": 5260
},
{
"epoch": 0.6010073703081871,
"grad_norm": 0.4587076008319855,
"learning_rate": 0.0002,
"loss": 0.293,
"step": 5280
},
{
"epoch": 0.6032839133775362,
"grad_norm": 0.5236973166465759,
"learning_rate": 0.0002,
"loss": 0.2888,
"step": 5300
},
{
"epoch": 0.6055604564468854,
"grad_norm": 0.46685513854026794,
"learning_rate": 0.0002,
"loss": 0.2731,
"step": 5320
},
{
"epoch": 0.6078369995162346,
"grad_norm": 0.5701884627342224,
"learning_rate": 0.0002,
"loss": 0.28,
"step": 5340
},
{
"epoch": 0.6101135425855838,
"grad_norm": 0.5002717971801758,
"learning_rate": 0.0002,
"loss": 0.2777,
"step": 5360
},
{
"epoch": 0.612390085654933,
"grad_norm": 0.5896885395050049,
"learning_rate": 0.0002,
"loss": 0.3048,
"step": 5380
},
{
"epoch": 0.6146666287242821,
"grad_norm": 0.49014943838119507,
"learning_rate": 0.0002,
"loss": 0.2642,
"step": 5400
},
{
"epoch": 0.6169431717936313,
"grad_norm": 0.5924846529960632,
"learning_rate": 0.0002,
"loss": 0.2943,
"step": 5420
},
{
"epoch": 0.6192197148629806,
"grad_norm": 0.49827829003334045,
"learning_rate": 0.0002,
"loss": 0.2879,
"step": 5440
},
{
"epoch": 0.6214962579323298,
"grad_norm": 0.45312178134918213,
"learning_rate": 0.0002,
"loss": 0.2728,
"step": 5460
},
{
"epoch": 0.623772801001679,
"grad_norm": 0.3595191538333893,
"learning_rate": 0.0002,
"loss": 0.2713,
"step": 5480
},
{
"epoch": 0.6260493440710282,
"grad_norm": 0.6547619104385376,
"learning_rate": 0.0002,
"loss": 0.2855,
"step": 5500
},
{
"epoch": 0.6283258871403773,
"grad_norm": 0.4659534692764282,
"learning_rate": 0.0002,
"loss": 0.2908,
"step": 5520
},
{
"epoch": 0.6306024302097265,
"grad_norm": 0.4027460813522339,
"learning_rate": 0.0002,
"loss": 0.2651,
"step": 5540
},
{
"epoch": 0.6328789732790757,
"grad_norm": 0.36129653453826904,
"learning_rate": 0.0002,
"loss": 0.2915,
"step": 5560
},
{
"epoch": 0.6351555163484249,
"grad_norm": 0.5963912010192871,
"learning_rate": 0.0002,
"loss": 0.2968,
"step": 5580
},
{
"epoch": 0.6374320594177741,
"grad_norm": 0.49669450521469116,
"learning_rate": 0.0002,
"loss": 0.2965,
"step": 5600
},
{
"epoch": 0.6397086024871234,
"grad_norm": 0.5784302353858948,
"learning_rate": 0.0002,
"loss": 0.2626,
"step": 5620
},
{
"epoch": 0.6419851455564725,
"grad_norm": 0.5651645660400391,
"learning_rate": 0.0002,
"loss": 0.2738,
"step": 5640
},
{
"epoch": 0.6442616886258217,
"grad_norm": 0.45475292205810547,
"learning_rate": 0.0002,
"loss": 0.2653,
"step": 5660
},
{
"epoch": 0.6465382316951709,
"grad_norm": 0.4691898822784424,
"learning_rate": 0.0002,
"loss": 0.2634,
"step": 5680
},
{
"epoch": 0.6488147747645201,
"grad_norm": 0.4604431092739105,
"learning_rate": 0.0002,
"loss": 0.2838,
"step": 5700
},
{
"epoch": 0.6510913178338693,
"grad_norm": 0.506804883480072,
"learning_rate": 0.0002,
"loss": 0.2657,
"step": 5720
},
{
"epoch": 0.6533678609032184,
"grad_norm": 0.5051881670951843,
"learning_rate": 0.0002,
"loss": 0.2976,
"step": 5740
},
{
"epoch": 0.6556444039725676,
"grad_norm": 0.4780672788619995,
"learning_rate": 0.0002,
"loss": 0.2828,
"step": 5760
},
{
"epoch": 0.6579209470419168,
"grad_norm": 0.4695095121860504,
"learning_rate": 0.0002,
"loss": 0.2685,
"step": 5780
},
{
"epoch": 0.660197490111266,
"grad_norm": 0.4259052276611328,
"learning_rate": 0.0002,
"loss": 0.2635,
"step": 5800
},
{
"epoch": 0.6624740331806153,
"grad_norm": 0.5684182643890381,
"learning_rate": 0.0002,
"loss": 0.2879,
"step": 5820
},
{
"epoch": 0.6647505762499645,
"grad_norm": 0.42193594574928284,
"learning_rate": 0.0002,
"loss": 0.2678,
"step": 5840
},
{
"epoch": 0.6670271193193136,
"grad_norm": 0.5095034241676331,
"learning_rate": 0.0002,
"loss": 0.2677,
"step": 5860
},
{
"epoch": 0.6693036623886628,
"grad_norm": 0.46626052260398865,
"learning_rate": 0.0002,
"loss": 0.2906,
"step": 5880
},
{
"epoch": 0.671580205458012,
"grad_norm": 0.5086765289306641,
"learning_rate": 0.0002,
"loss": 0.2775,
"step": 5900
},
{
"epoch": 0.6738567485273612,
"grad_norm": 0.44444966316223145,
"learning_rate": 0.0002,
"loss": 0.2764,
"step": 5920
},
{
"epoch": 0.6761332915967104,
"grad_norm": 0.4477381706237793,
"learning_rate": 0.0002,
"loss": 0.2729,
"step": 5940
},
{
"epoch": 0.6784098346660596,
"grad_norm": 0.46984028816223145,
"learning_rate": 0.0002,
"loss": 0.273,
"step": 5960
},
{
"epoch": 0.6806863777354087,
"grad_norm": 0.417084276676178,
"learning_rate": 0.0002,
"loss": 0.2744,
"step": 5980
},
{
"epoch": 0.682962920804758,
"grad_norm": 0.4144213795661926,
"learning_rate": 0.0002,
"loss": 0.2704,
"step": 6000
},
{
"epoch": 0.6852394638741072,
"grad_norm": 0.5844799876213074,
"learning_rate": 0.0002,
"loss": 0.2635,
"step": 6020
},
{
"epoch": 0.6875160069434564,
"grad_norm": 0.39512693881988525,
"learning_rate": 0.0002,
"loss": 0.2471,
"step": 6040
},
{
"epoch": 0.6897925500128056,
"grad_norm": 0.5299990773200989,
"learning_rate": 0.0002,
"loss": 0.2648,
"step": 6060
},
{
"epoch": 0.6920690930821547,
"grad_norm": 0.4980265498161316,
"learning_rate": 0.0002,
"loss": 0.2725,
"step": 6080
},
{
"epoch": 0.6943456361515039,
"grad_norm": 0.4003869891166687,
"learning_rate": 0.0002,
"loss": 0.2768,
"step": 6100
},
{
"epoch": 0.6966221792208531,
"grad_norm": 0.5103460550308228,
"learning_rate": 0.0002,
"loss": 0.2638,
"step": 6120
},
{
"epoch": 0.6988987222902023,
"grad_norm": 0.737101137638092,
"learning_rate": 0.0002,
"loss": 0.2779,
"step": 6140
},
{
"epoch": 0.7011752653595515,
"grad_norm": 0.4731826186180115,
"learning_rate": 0.0002,
"loss": 0.2691,
"step": 6160
},
{
"epoch": 0.7034518084289008,
"grad_norm": 0.5234053730964661,
"learning_rate": 0.0002,
"loss": 0.2739,
"step": 6180
},
{
"epoch": 0.7057283514982499,
"grad_norm": 0.5235525369644165,
"learning_rate": 0.0002,
"loss": 0.2754,
"step": 6200
},
{
"epoch": 0.7080048945675991,
"grad_norm": 0.4453619122505188,
"learning_rate": 0.0002,
"loss": 0.2833,
"step": 6220
},
{
"epoch": 0.7102814376369483,
"grad_norm": 0.4025666117668152,
"learning_rate": 0.0002,
"loss": 0.2713,
"step": 6240
},
{
"epoch": 0.7125579807062975,
"grad_norm": 0.35240331292152405,
"learning_rate": 0.0002,
"loss": 0.2786,
"step": 6260
},
{
"epoch": 0.7148345237756467,
"grad_norm": 0.4521905779838562,
"learning_rate": 0.0002,
"loss": 0.2639,
"step": 6280
},
{
"epoch": 0.7171110668449959,
"grad_norm": 0.5230519771575928,
"learning_rate": 0.0002,
"loss": 0.2517,
"step": 6300
},
{
"epoch": 0.719387609914345,
"grad_norm": 0.5415637493133545,
"learning_rate": 0.0002,
"loss": 0.2739,
"step": 6320
},
{
"epoch": 0.7216641529836942,
"grad_norm": 0.4067966341972351,
"learning_rate": 0.0002,
"loss": 0.2751,
"step": 6340
},
{
"epoch": 0.7239406960530435,
"grad_norm": 0.4670214354991913,
"learning_rate": 0.0002,
"loss": 0.2644,
"step": 6360
},
{
"epoch": 0.7262172391223927,
"grad_norm": 0.5316203236579895,
"learning_rate": 0.0002,
"loss": 0.2746,
"step": 6380
},
{
"epoch": 0.7284937821917419,
"grad_norm": 0.46312493085861206,
"learning_rate": 0.0002,
"loss": 0.2539,
"step": 6400
},
{
"epoch": 0.730770325261091,
"grad_norm": 0.465279221534729,
"learning_rate": 0.0002,
"loss": 0.2742,
"step": 6420
},
{
"epoch": 0.7330468683304402,
"grad_norm": 0.5096962451934814,
"learning_rate": 0.0002,
"loss": 0.2546,
"step": 6440
},
{
"epoch": 0.7353234113997894,
"grad_norm": 0.4525590240955353,
"learning_rate": 0.0002,
"loss": 0.2694,
"step": 6460
},
{
"epoch": 0.7375999544691386,
"grad_norm": 0.5033881664276123,
"learning_rate": 0.0002,
"loss": 0.2627,
"step": 6480
},
{
"epoch": 0.7398764975384878,
"grad_norm": 0.44053900241851807,
"learning_rate": 0.0002,
"loss": 0.258,
"step": 6500
},
{
"epoch": 0.742153040607837,
"grad_norm": 0.4677462875843048,
"learning_rate": 0.0002,
"loss": 0.2659,
"step": 6520
},
{
"epoch": 0.7444295836771861,
"grad_norm": 0.5687553882598877,
"learning_rate": 0.0002,
"loss": 0.271,
"step": 6540
},
{
"epoch": 0.7467061267465354,
"grad_norm": 0.4980468451976776,
"learning_rate": 0.0002,
"loss": 0.265,
"step": 6560
},
{
"epoch": 0.7489826698158846,
"grad_norm": 0.5155619382858276,
"learning_rate": 0.0002,
"loss": 0.2491,
"step": 6580
},
{
"epoch": 0.7512592128852338,
"grad_norm": 0.5364673733711243,
"learning_rate": 0.0002,
"loss": 0.2564,
"step": 6600
},
{
"epoch": 0.753535755954583,
"grad_norm": 0.421838641166687,
"learning_rate": 0.0002,
"loss": 0.267,
"step": 6620
},
{
"epoch": 0.7558122990239322,
"grad_norm": 0.46299833059310913,
"learning_rate": 0.0002,
"loss": 0.2461,
"step": 6640
},
{
"epoch": 0.7580888420932813,
"grad_norm": 0.3832832872867584,
"learning_rate": 0.0002,
"loss": 0.265,
"step": 6660
},
{
"epoch": 0.7603653851626305,
"grad_norm": 0.5560947060585022,
"learning_rate": 0.0002,
"loss": 0.253,
"step": 6680
},
{
"epoch": 0.7626419282319797,
"grad_norm": 0.4832628667354584,
"learning_rate": 0.0002,
"loss": 0.2515,
"step": 6700
},
{
"epoch": 0.764918471301329,
"grad_norm": 0.44354599714279175,
"learning_rate": 0.0002,
"loss": 0.2687,
"step": 6720
},
{
"epoch": 0.7671950143706782,
"grad_norm": 0.3746070861816406,
"learning_rate": 0.0002,
"loss": 0.2481,
"step": 6740
},
{
"epoch": 0.7694715574400273,
"grad_norm": 0.3048388659954071,
"learning_rate": 0.0002,
"loss": 0.269,
"step": 6760
},
{
"epoch": 0.7717481005093765,
"grad_norm": 0.46471843123435974,
"learning_rate": 0.0002,
"loss": 0.2642,
"step": 6780
},
{
"epoch": 0.7740246435787257,
"grad_norm": 0.44309428334236145,
"learning_rate": 0.0002,
"loss": 0.2565,
"step": 6800
},
{
"epoch": 0.7763011866480749,
"grad_norm": 0.4174291789531708,
"learning_rate": 0.0002,
"loss": 0.262,
"step": 6820
},
{
"epoch": 0.7785777297174241,
"grad_norm": 0.42592549324035645,
"learning_rate": 0.0002,
"loss": 0.2608,
"step": 6840
},
{
"epoch": 0.7808542727867733,
"grad_norm": 0.4378054141998291,
"learning_rate": 0.0002,
"loss": 0.2765,
"step": 6860
},
{
"epoch": 0.7831308158561224,
"grad_norm": 0.4560708701610565,
"learning_rate": 0.0002,
"loss": 0.2381,
"step": 6880
},
{
"epoch": 0.7854073589254716,
"grad_norm": 0.4595545828342438,
"learning_rate": 0.0002,
"loss": 0.2561,
"step": 6900
},
{
"epoch": 0.7876839019948209,
"grad_norm": 0.45213592052459717,
"learning_rate": 0.0002,
"loss": 0.2645,
"step": 6920
},
{
"epoch": 0.7899604450641701,
"grad_norm": 0.4857342839241028,
"learning_rate": 0.0002,
"loss": 0.2687,
"step": 6940
},
{
"epoch": 0.7922369881335193,
"grad_norm": 0.4939437508583069,
"learning_rate": 0.0002,
"loss": 0.2642,
"step": 6960
},
{
"epoch": 0.7945135312028685,
"grad_norm": 0.46244382858276367,
"learning_rate": 0.0002,
"loss": 0.2536,
"step": 6980
},
{
"epoch": 0.7967900742722176,
"grad_norm": 0.5876993536949158,
"learning_rate": 0.0002,
"loss": 0.2492,
"step": 7000
},
{
"epoch": 0.7990666173415668,
"grad_norm": 0.5170072913169861,
"learning_rate": 0.0002,
"loss": 0.2548,
"step": 7020
},
{
"epoch": 0.801343160410916,
"grad_norm": 0.394380658864975,
"learning_rate": 0.0002,
"loss": 0.2524,
"step": 7040
},
{
"epoch": 0.8036197034802652,
"grad_norm": 0.4716455340385437,
"learning_rate": 0.0002,
"loss": 0.2573,
"step": 7060
},
{
"epoch": 0.8058962465496144,
"grad_norm": 0.34525179862976074,
"learning_rate": 0.0002,
"loss": 0.246,
"step": 7080
},
{
"epoch": 0.8081727896189635,
"grad_norm": 0.5030418038368225,
"learning_rate": 0.0002,
"loss": 0.2596,
"step": 7100
},
{
"epoch": 0.8104493326883128,
"grad_norm": 0.5586132407188416,
"learning_rate": 0.0002,
"loss": 0.2568,
"step": 7120
},
{
"epoch": 0.812725875757662,
"grad_norm": 0.47025129199028015,
"learning_rate": 0.0002,
"loss": 0.265,
"step": 7140
},
{
"epoch": 0.8150024188270112,
"grad_norm": 0.5654832720756531,
"learning_rate": 0.0002,
"loss": 0.2468,
"step": 7160
},
{
"epoch": 0.8172789618963604,
"grad_norm": 0.4701017141342163,
"learning_rate": 0.0002,
"loss": 0.2538,
"step": 7180
},
{
"epoch": 0.8195555049657096,
"grad_norm": 0.47270438075065613,
"learning_rate": 0.0002,
"loss": 0.2529,
"step": 7200
},
{
"epoch": 0.8218320480350587,
"grad_norm": 0.39433714747428894,
"learning_rate": 0.0002,
"loss": 0.2445,
"step": 7220
},
{
"epoch": 0.8241085911044079,
"grad_norm": 0.4521467685699463,
"learning_rate": 0.0002,
"loss": 0.2556,
"step": 7240
},
{
"epoch": 0.8263851341737571,
"grad_norm": 0.28483667969703674,
"learning_rate": 0.0002,
"loss": 0.2451,
"step": 7260
},
{
"epoch": 0.8286616772431064,
"grad_norm": 0.4298310875892639,
"learning_rate": 0.0002,
"loss": 0.2599,
"step": 7280
},
{
"epoch": 0.8309382203124556,
"grad_norm": 0.39677906036376953,
"learning_rate": 0.0002,
"loss": 0.2539,
"step": 7300
},
{
"epoch": 0.8332147633818048,
"grad_norm": 0.5800175666809082,
"learning_rate": 0.0002,
"loss": 0.2463,
"step": 7320
},
{
"epoch": 0.8354913064511539,
"grad_norm": 0.42742472887039185,
"learning_rate": 0.0002,
"loss": 0.2593,
"step": 7340
},
{
"epoch": 0.8377678495205031,
"grad_norm": 0.5521807670593262,
"learning_rate": 0.0002,
"loss": 0.253,
"step": 7360
},
{
"epoch": 0.8400443925898523,
"grad_norm": 0.5068047046661377,
"learning_rate": 0.0002,
"loss": 0.2503,
"step": 7380
},
{
"epoch": 0.8423209356592015,
"grad_norm": 0.4325120151042938,
"learning_rate": 0.0002,
"loss": 0.2466,
"step": 7400
},
{
"epoch": 0.8445974787285507,
"grad_norm": 0.5130394101142883,
"learning_rate": 0.0002,
"loss": 0.2521,
"step": 7420
},
{
"epoch": 0.8468740217978998,
"grad_norm": 0.5091120600700378,
"learning_rate": 0.0002,
"loss": 0.2429,
"step": 7440
},
{
"epoch": 0.849150564867249,
"grad_norm": 0.4635036289691925,
"learning_rate": 0.0002,
"loss": 0.235,
"step": 7460
},
{
"epoch": 0.8514271079365983,
"grad_norm": 0.3827108144760132,
"learning_rate": 0.0002,
"loss": 0.2487,
"step": 7480
},
{
"epoch": 0.8537036510059475,
"grad_norm": 0.3880899250507355,
"learning_rate": 0.0002,
"loss": 0.2469,
"step": 7500
},
{
"epoch": 0.8559801940752967,
"grad_norm": 0.408933162689209,
"learning_rate": 0.0002,
"loss": 0.2499,
"step": 7520
},
{
"epoch": 0.8582567371446459,
"grad_norm": 0.5049706101417542,
"learning_rate": 0.0002,
"loss": 0.2418,
"step": 7540
},
{
"epoch": 0.860533280213995,
"grad_norm": 0.43551701307296753,
"learning_rate": 0.0002,
"loss": 0.2478,
"step": 7560
},
{
"epoch": 0.8628098232833442,
"grad_norm": 0.5024411678314209,
"learning_rate": 0.0002,
"loss": 0.2538,
"step": 7580
},
{
"epoch": 0.8650863663526934,
"grad_norm": 0.36361223459243774,
"learning_rate": 0.0002,
"loss": 0.2536,
"step": 7600
},
{
"epoch": 0.8673629094220426,
"grad_norm": 0.4526277482509613,
"learning_rate": 0.0002,
"loss": 0.242,
"step": 7620
},
{
"epoch": 0.8696394524913919,
"grad_norm": 0.5677676200866699,
"learning_rate": 0.0002,
"loss": 0.2572,
"step": 7640
},
{
"epoch": 0.8719159955607411,
"grad_norm": 0.4915711283683777,
"learning_rate": 0.0002,
"loss": 0.2562,
"step": 7660
},
{
"epoch": 0.8741925386300902,
"grad_norm": 0.36850452423095703,
"learning_rate": 0.0002,
"loss": 0.2523,
"step": 7680
},
{
"epoch": 0.8764690816994394,
"grad_norm": 0.38313761353492737,
"learning_rate": 0.0002,
"loss": 0.2596,
"step": 7700
},
{
"epoch": 0.8787456247687886,
"grad_norm": 0.5384640097618103,
"learning_rate": 0.0002,
"loss": 0.2455,
"step": 7720
},
{
"epoch": 0.8810221678381378,
"grad_norm": 0.5308900475502014,
"learning_rate": 0.0002,
"loss": 0.2439,
"step": 7740
},
{
"epoch": 0.883298710907487,
"grad_norm": 0.5488154292106628,
"learning_rate": 0.0002,
"loss": 0.2428,
"step": 7760
},
{
"epoch": 0.8855752539768362,
"grad_norm": 0.5271242260932922,
"learning_rate": 0.0002,
"loss": 0.2372,
"step": 7780
},
{
"epoch": 0.8878517970461853,
"grad_norm": 0.46171802282333374,
"learning_rate": 0.0002,
"loss": 0.2506,
"step": 7800
},
{
"epoch": 0.8901283401155345,
"grad_norm": 0.45436665415763855,
"learning_rate": 0.0002,
"loss": 0.2414,
"step": 7820
},
{
"epoch": 0.8924048831848838,
"grad_norm": 0.4920847415924072,
"learning_rate": 0.0002,
"loss": 0.2669,
"step": 7840
},
{
"epoch": 0.894681426254233,
"grad_norm": 0.5913518071174622,
"learning_rate": 0.0002,
"loss": 0.2552,
"step": 7860
},
{
"epoch": 0.8969579693235822,
"grad_norm": 0.6011972427368164,
"learning_rate": 0.0002,
"loss": 0.2533,
"step": 7880
},
{
"epoch": 0.8992345123929313,
"grad_norm": 0.4650927186012268,
"learning_rate": 0.0002,
"loss": 0.2448,
"step": 7900
},
{
"epoch": 0.9015110554622805,
"grad_norm": 0.5828790664672852,
"learning_rate": 0.0002,
"loss": 0.2381,
"step": 7920
},
{
"epoch": 0.9037875985316297,
"grad_norm": 0.5178338885307312,
"learning_rate": 0.0002,
"loss": 0.2619,
"step": 7940
},
{
"epoch": 0.9060641416009789,
"grad_norm": 0.5147708058357239,
"learning_rate": 0.0002,
"loss": 0.258,
"step": 7960
},
{
"epoch": 0.9083406846703281,
"grad_norm": 0.45790836215019226,
"learning_rate": 0.0002,
"loss": 0.2474,
"step": 7980
},
{
"epoch": 0.9106172277396773,
"grad_norm": 0.3837074935436249,
"learning_rate": 0.0002,
"loss": 0.2356,
"step": 8000
},
{
"epoch": 0.9128937708090265,
"grad_norm": 0.4466090500354767,
"learning_rate": 0.0002,
"loss": 0.237,
"step": 8020
},
{
"epoch": 0.9151703138783757,
"grad_norm": 0.5893344283103943,
"learning_rate": 0.0002,
"loss": 0.2399,
"step": 8040
},
{
"epoch": 0.9174468569477249,
"grad_norm": 0.49547362327575684,
"learning_rate": 0.0002,
"loss": 0.2526,
"step": 8060
},
{
"epoch": 0.9197234000170741,
"grad_norm": 0.47068551182746887,
"learning_rate": 0.0002,
"loss": 0.2631,
"step": 8080
},
{
"epoch": 0.9219999430864233,
"grad_norm": 0.3512951135635376,
"learning_rate": 0.0002,
"loss": 0.2395,
"step": 8100
},
{
"epoch": 0.9242764861557725,
"grad_norm": 0.3996793031692505,
"learning_rate": 0.0002,
"loss": 0.2424,
"step": 8120
},
{
"epoch": 0.9265530292251216,
"grad_norm": 0.5782022476196289,
"learning_rate": 0.0002,
"loss": 0.2549,
"step": 8140
},
{
"epoch": 0.9288295722944708,
"grad_norm": 0.450860857963562,
"learning_rate": 0.0002,
"loss": 0.2465,
"step": 8160
},
{
"epoch": 0.93110611536382,
"grad_norm": 0.4679816663265228,
"learning_rate": 0.0002,
"loss": 0.2326,
"step": 8180
},
{
"epoch": 0.9333826584331693,
"grad_norm": 0.5497337579727173,
"learning_rate": 0.0002,
"loss": 0.2457,
"step": 8200
},
{
"epoch": 0.9356592015025185,
"grad_norm": 0.3775748312473297,
"learning_rate": 0.0002,
"loss": 0.2331,
"step": 8220
},
{
"epoch": 0.9379357445718676,
"grad_norm": 0.5428327918052673,
"learning_rate": 0.0002,
"loss": 0.2399,
"step": 8240
},
{
"epoch": 0.9402122876412168,
"grad_norm": 0.4089830219745636,
"learning_rate": 0.0002,
"loss": 0.246,
"step": 8260
},
{
"epoch": 0.942488830710566,
"grad_norm": 0.5781340003013611,
"learning_rate": 0.0002,
"loss": 0.2451,
"step": 8280
},
{
"epoch": 0.9447653737799152,
"grad_norm": 0.5869989395141602,
"learning_rate": 0.0002,
"loss": 0.2541,
"step": 8300
},
{
"epoch": 0.9470419168492644,
"grad_norm": 0.47708019614219666,
"learning_rate": 0.0002,
"loss": 0.2559,
"step": 8320
},
{
"epoch": 0.9493184599186136,
"grad_norm": 0.5445525050163269,
"learning_rate": 0.0002,
"loss": 0.2466,
"step": 8340
},
{
"epoch": 0.9515950029879627,
"grad_norm": 0.480214387178421,
"learning_rate": 0.0002,
"loss": 0.236,
"step": 8360
},
{
"epoch": 0.953871546057312,
"grad_norm": 0.5392053127288818,
"learning_rate": 0.0002,
"loss": 0.2383,
"step": 8380
},
{
"epoch": 0.9561480891266612,
"grad_norm": 0.4515858292579651,
"learning_rate": 0.0002,
"loss": 0.238,
"step": 8400
},
{
"epoch": 0.9584246321960104,
"grad_norm": 0.5461826324462891,
"learning_rate": 0.0002,
"loss": 0.2442,
"step": 8420
},
{
"epoch": 0.9607011752653596,
"grad_norm": 0.44309332966804504,
"learning_rate": 0.0002,
"loss": 0.2622,
"step": 8440
},
{
"epoch": 0.9629777183347088,
"grad_norm": 0.5409505367279053,
"learning_rate": 0.0002,
"loss": 0.2303,
"step": 8460
},
{
"epoch": 0.9652542614040579,
"grad_norm": 0.3868342638015747,
"learning_rate": 0.0002,
"loss": 0.2624,
"step": 8480
},
{
"epoch": 0.9675308044734071,
"grad_norm": 0.38888975977897644,
"learning_rate": 0.0002,
"loss": 0.246,
"step": 8500
},
{
"epoch": 0.9698073475427563,
"grad_norm": 0.38946032524108887,
"learning_rate": 0.0002,
"loss": 0.2503,
"step": 8520
},
{
"epoch": 0.9720838906121055,
"grad_norm": 0.42425817251205444,
"learning_rate": 0.0002,
"loss": 0.2556,
"step": 8540
},
{
"epoch": 0.9743604336814548,
"grad_norm": 0.41515296697616577,
"learning_rate": 0.0002,
"loss": 0.2437,
"step": 8560
},
{
"epoch": 0.9766369767508039,
"grad_norm": 0.4085826575756073,
"learning_rate": 0.0002,
"loss": 0.2293,
"step": 8580
},
{
"epoch": 0.9789135198201531,
"grad_norm": 0.3404542803764343,
"learning_rate": 0.0002,
"loss": 0.242,
"step": 8600
},
{
"epoch": 0.9811900628895023,
"grad_norm": 0.43266579508781433,
"learning_rate": 0.0002,
"loss": 0.2513,
"step": 8620
},
{
"epoch": 0.9834666059588515,
"grad_norm": 0.42724549770355225,
"learning_rate": 0.0002,
"loss": 0.2384,
"step": 8640
},
{
"epoch": 0.9857431490282007,
"grad_norm": 0.5089221596717834,
"learning_rate": 0.0002,
"loss": 0.2409,
"step": 8660
},
{
"epoch": 0.9880196920975499,
"grad_norm": 0.519223690032959,
"learning_rate": 0.0002,
"loss": 0.2353,
"step": 8680
},
{
"epoch": 0.990296235166899,
"grad_norm": 0.5701056122779846,
"learning_rate": 0.0002,
"loss": 0.2486,
"step": 8700
},
{
"epoch": 0.9925727782362482,
"grad_norm": 0.4519595503807068,
"learning_rate": 0.0002,
"loss": 0.2374,
"step": 8720
},
{
"epoch": 0.9948493213055974,
"grad_norm": 0.4883946180343628,
"learning_rate": 0.0002,
"loss": 0.2441,
"step": 8740
},
{
"epoch": 0.9971258643749467,
"grad_norm": 0.6918900012969971,
"learning_rate": 0.0002,
"loss": 0.2403,
"step": 8760
},
{
"epoch": 0.9994024074442959,
"grad_norm": 0.4810091555118561,
"learning_rate": 0.0002,
"loss": 0.2334,
"step": 8780
},
{
"epoch": 1.0,
"eval_loss": 0.30941203236579895,
"eval_runtime": 408.7196,
"eval_samples_per_second": 7.083,
"eval_steps_per_second": 0.886,
"step": 8786
}
],
"logging_steps": 20,
"max_steps": 13000,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 77,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.923169198364426e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}