|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 20, |
|
"global_step": 8786, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0022765430693491933, |
|
"grad_norm": 0.469247430562973, |
|
"learning_rate": 0.0002, |
|
"loss": 1.9469, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.004553086138698387, |
|
"grad_norm": 0.6239348649978638, |
|
"learning_rate": 0.0002, |
|
"loss": 1.556, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.006829629208047579, |
|
"grad_norm": 0.4587397277355194, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4108, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.009106172277396773, |
|
"grad_norm": 0.42919760942459106, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3352, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.011382715346745967, |
|
"grad_norm": 0.46492573618888855, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3388, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.013659258416095159, |
|
"grad_norm": 0.453070729970932, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2295, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.015935801485444354, |
|
"grad_norm": 0.4760678708553314, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2493, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.018212344554793546, |
|
"grad_norm": 0.4545675814151764, |
|
"learning_rate": 0.0002, |
|
"loss": 1.215, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.020488887624142738, |
|
"grad_norm": 0.4772235155105591, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2173, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.022765430693491934, |
|
"grad_norm": 0.4403541088104248, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1058, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.025041973762841126, |
|
"grad_norm": 0.511401355266571, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1049, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.027318516832190318, |
|
"grad_norm": 0.3809013366699219, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0498, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.029595059901539513, |
|
"grad_norm": 0.3980010449886322, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9842, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.03187160297088871, |
|
"grad_norm": 0.5747793316841125, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0988, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.0341481460402379, |
|
"grad_norm": 0.46827971935272217, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0367, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.03642468910958709, |
|
"grad_norm": 0.4702209532260895, |
|
"learning_rate": 0.0002, |
|
"loss": 1.066, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.038701232178936285, |
|
"grad_norm": 0.5084996223449707, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0652, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.040977775248285477, |
|
"grad_norm": 0.3944012522697449, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9642, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.04325431831763467, |
|
"grad_norm": 0.40287718176841736, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9431, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.04553086138698387, |
|
"grad_norm": 0.4629077613353729, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9615, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.04780740445633306, |
|
"grad_norm": 0.44827452301979065, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9434, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.05008394752568225, |
|
"grad_norm": 0.41644710302352905, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9241, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.05236049059503144, |
|
"grad_norm": 0.4760611057281494, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8475, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.054637033664380635, |
|
"grad_norm": 0.45987364649772644, |
|
"learning_rate": 0.0002, |
|
"loss": 0.898, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.056913576733729834, |
|
"grad_norm": 0.4840068817138672, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9611, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.059190119803079026, |
|
"grad_norm": 0.40314286947250366, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8884, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.06146666287242822, |
|
"grad_norm": 0.5458106398582458, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8939, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.06374320594177742, |
|
"grad_norm": 0.5420896410942078, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8265, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.0660197490111266, |
|
"grad_norm": 0.5356529355049133, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8432, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.0682962920804758, |
|
"grad_norm": 0.5064826011657715, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8272, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.07057283514982499, |
|
"grad_norm": 0.4143005311489105, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7854, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.07284937821917419, |
|
"grad_norm": 0.3817225396633148, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8219, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.07512592128852338, |
|
"grad_norm": 0.5336936712265015, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7977, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.07740246435787257, |
|
"grad_norm": 0.5397001504898071, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8117, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.07967900742722177, |
|
"grad_norm": 0.4968530535697937, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7527, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.08195555049657095, |
|
"grad_norm": 0.4084935784339905, |
|
"learning_rate": 0.0002, |
|
"loss": 0.651, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.08423209356592015, |
|
"grad_norm": 0.48406732082366943, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7352, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.08650863663526934, |
|
"grad_norm": 0.5246301293373108, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7785, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.08878517970461854, |
|
"grad_norm": 0.5729619264602661, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7646, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.09106172277396773, |
|
"grad_norm": 0.5675190687179565, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7784, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.09333826584331692, |
|
"grad_norm": 0.4682878255844116, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7284, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.09561480891266612, |
|
"grad_norm": 0.5388545393943787, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6959, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.0978913519820153, |
|
"grad_norm": 0.48806509375572205, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7585, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.1001678950513645, |
|
"grad_norm": 0.4149261713027954, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6978, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.1024444381207137, |
|
"grad_norm": 0.4971105754375458, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7103, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.10472098119006289, |
|
"grad_norm": 0.5066735744476318, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6854, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.10699752425941209, |
|
"grad_norm": 0.4922661781311035, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6231, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.10927406732876127, |
|
"grad_norm": 0.5949555039405823, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6813, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.11155061039811047, |
|
"grad_norm": 0.581446647644043, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6174, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.11382715346745967, |
|
"grad_norm": 0.6152529716491699, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6405, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.11610369653680885, |
|
"grad_norm": 0.5986836552619934, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5776, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.11838023960615805, |
|
"grad_norm": 0.4255094528198242, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6576, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.12065678267550724, |
|
"grad_norm": 0.4563849866390228, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6647, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.12293332574485644, |
|
"grad_norm": 0.593227744102478, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6043, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.12520986881420562, |
|
"grad_norm": 0.47059598565101624, |
|
"learning_rate": 0.0002, |
|
"loss": 0.591, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.12748641188355483, |
|
"grad_norm": 0.5013225674629211, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5947, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.12976295495290402, |
|
"grad_norm": 0.46772757172584534, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6292, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.1320394980222532, |
|
"grad_norm": 0.5844313502311707, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6128, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.1343160410916024, |
|
"grad_norm": 0.5295489430427551, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6064, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.1365925841609516, |
|
"grad_norm": 0.4482004642486572, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5899, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.1388691272303008, |
|
"grad_norm": 0.6281692981719971, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6109, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.14114567029964997, |
|
"grad_norm": 0.4718242585659027, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5857, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.14342221336899919, |
|
"grad_norm": 0.5219341516494751, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5581, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.14569875643834837, |
|
"grad_norm": 0.47050580382347107, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6368, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.14797529950769756, |
|
"grad_norm": 0.5425338745117188, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5626, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.15025184257704677, |
|
"grad_norm": 0.4944934844970703, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5337, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.15252838564639595, |
|
"grad_norm": 0.5921599864959717, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5672, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.15480492871574514, |
|
"grad_norm": 0.4866751730442047, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5305, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.15708147178509432, |
|
"grad_norm": 0.62166827917099, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5737, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.15935801485444354, |
|
"grad_norm": 0.5006982684135437, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5542, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.16163455792379272, |
|
"grad_norm": 0.6090095043182373, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5215, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.1639111009931419, |
|
"grad_norm": 0.4260309636592865, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5535, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.16618764406249112, |
|
"grad_norm": 0.48657718300819397, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5441, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.1684641871318403, |
|
"grad_norm": 0.43275007605552673, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5161, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.1707407302011895, |
|
"grad_norm": 0.4225006699562073, |
|
"learning_rate": 0.0002, |
|
"loss": 0.512, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.17301727327053867, |
|
"grad_norm": 0.5176346302032471, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5384, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.1752938163398879, |
|
"grad_norm": 0.6492679715156555, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4981, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.17757035940923707, |
|
"grad_norm": 0.5511758327484131, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5289, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.17984690247858626, |
|
"grad_norm": 0.5211341977119446, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5002, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.18212344554793547, |
|
"grad_norm": 0.5488260984420776, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5178, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.18439998861728465, |
|
"grad_norm": 0.6779264211654663, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5155, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.18667653168663384, |
|
"grad_norm": 0.502919614315033, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4923, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.18895307475598305, |
|
"grad_norm": 0.4989205300807953, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4825, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.19122961782533224, |
|
"grad_norm": 0.5155315399169922, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4796, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.19350616089468142, |
|
"grad_norm": 0.5648865699768066, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4985, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.1957827039640306, |
|
"grad_norm": 0.606176495552063, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4819, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.19805924703337982, |
|
"grad_norm": 0.5440786480903625, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5213, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.200335790102729, |
|
"grad_norm": 0.43152502179145813, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4429, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.2026123331720782, |
|
"grad_norm": 0.5701313614845276, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4486, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.2048888762414274, |
|
"grad_norm": 0.565666913986206, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4561, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.2071654193107766, |
|
"grad_norm": 0.5725598931312561, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4757, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.20944196238012577, |
|
"grad_norm": 0.4642520248889923, |
|
"learning_rate": 0.0002, |
|
"loss": 0.438, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.21171850544947496, |
|
"grad_norm": 0.6077229976654053, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4295, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.21399504851882417, |
|
"grad_norm": 0.6314090490341187, |
|
"learning_rate": 0.0002, |
|
"loss": 0.449, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.21627159158817336, |
|
"grad_norm": 0.4416756331920624, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4554, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.21854813465752254, |
|
"grad_norm": 0.5278882384300232, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4554, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.22082467772687175, |
|
"grad_norm": 0.45619043707847595, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4868, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.22310122079622094, |
|
"grad_norm": 0.5881581902503967, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4672, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.22537776386557012, |
|
"grad_norm": 0.5379284024238586, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4531, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.22765430693491934, |
|
"grad_norm": 0.5562624931335449, |
|
"learning_rate": 0.0002, |
|
"loss": 0.464, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.22993085000426852, |
|
"grad_norm": 0.554499626159668, |
|
"learning_rate": 0.0002, |
|
"loss": 0.446, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.2322073930736177, |
|
"grad_norm": 0.509219229221344, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4417, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.2344839361429669, |
|
"grad_norm": 0.5206849575042725, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4118, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.2367604792123161, |
|
"grad_norm": 0.548729658126831, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4067, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.2390370222816653, |
|
"grad_norm": 0.4220084846019745, |
|
"learning_rate": 0.0002, |
|
"loss": 0.428, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.24131356535101448, |
|
"grad_norm": 0.5507292747497559, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4176, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.2435901084203637, |
|
"grad_norm": 0.5605701208114624, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4661, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.24586665148971287, |
|
"grad_norm": 0.43142881989479065, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4197, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.24814319455906206, |
|
"grad_norm": 0.47790080308914185, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4568, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.25041973762841124, |
|
"grad_norm": 0.6048968434333801, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4199, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.25269628069776046, |
|
"grad_norm": 0.4925907850265503, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4325, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.25497282376710967, |
|
"grad_norm": 0.5463051199913025, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4549, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.2572493668364588, |
|
"grad_norm": 0.4631319046020508, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3977, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.25952590990580804, |
|
"grad_norm": 0.4965234398841858, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4285, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.2618024529751572, |
|
"grad_norm": 0.5436238646507263, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4039, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.2640789960445064, |
|
"grad_norm": 0.5218191742897034, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4092, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.2663555391138556, |
|
"grad_norm": 0.5417261719703674, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3825, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.2686320821832048, |
|
"grad_norm": 0.6126281023025513, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4391, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.270908625252554, |
|
"grad_norm": 0.4734433889389038, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4151, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.2731851683219032, |
|
"grad_norm": 0.4501429796218872, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4178, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.27546171139125236, |
|
"grad_norm": 0.5258509516716003, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4007, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.2777382544606016, |
|
"grad_norm": 0.47874951362609863, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4245, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.2800147975299508, |
|
"grad_norm": 0.528533399105072, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3794, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.28229134059929994, |
|
"grad_norm": 0.46465063095092773, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4019, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.28456788366864916, |
|
"grad_norm": 0.5217177867889404, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4104, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.28684442673799837, |
|
"grad_norm": 0.510036289691925, |
|
"learning_rate": 0.0002, |
|
"loss": 0.389, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.2891209698073475, |
|
"grad_norm": 0.6968228220939636, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4152, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.29139751287669674, |
|
"grad_norm": 0.4529867470264435, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3987, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.29367405594604595, |
|
"grad_norm": 0.5680263638496399, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3828, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.2959505990153951, |
|
"grad_norm": 0.4892405867576599, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4006, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.2982271420847443, |
|
"grad_norm": 0.47588276863098145, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4197, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.30050368515409354, |
|
"grad_norm": 0.5624070167541504, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3997, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.3027802282234427, |
|
"grad_norm": 0.5434039831161499, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3977, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.3050567712927919, |
|
"grad_norm": 0.5572277903556824, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3966, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.30733331436214106, |
|
"grad_norm": 0.5533374547958374, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3803, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.3096098574314903, |
|
"grad_norm": 0.40596967935562134, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3682, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.3118864005008395, |
|
"grad_norm": 0.4737823009490967, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3761, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.31416294357018865, |
|
"grad_norm": 0.4295174777507782, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4035, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.31643948663953786, |
|
"grad_norm": 0.5348454713821411, |
|
"learning_rate": 0.0002, |
|
"loss": 0.404, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.31871602970888707, |
|
"grad_norm": 0.4819965362548828, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3929, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.32099257277823623, |
|
"grad_norm": 0.5920088291168213, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3798, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.32326911584758544, |
|
"grad_norm": 0.4936531186103821, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3995, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.32554565891693465, |
|
"grad_norm": 0.5252315998077393, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3842, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.3278222019862838, |
|
"grad_norm": 0.5818414688110352, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3533, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.330098745055633, |
|
"grad_norm": 0.44053876399993896, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3402, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.33237528812498224, |
|
"grad_norm": 0.5421345233917236, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3542, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.3346518311943314, |
|
"grad_norm": 0.4642751216888428, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3755, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.3369283742636806, |
|
"grad_norm": 0.5137833952903748, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3602, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.3392049173330298, |
|
"grad_norm": 0.5032792687416077, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3451, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.341481460402379, |
|
"grad_norm": 0.4932720363140106, |
|
"learning_rate": 0.0002, |
|
"loss": 0.384, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.3437580034717282, |
|
"grad_norm": 0.49986231327056885, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3826, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.34603454654107735, |
|
"grad_norm": 0.6325618624687195, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3582, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.34831108961042656, |
|
"grad_norm": 0.5402369499206543, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3706, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.3505876326797758, |
|
"grad_norm": 0.4967012107372284, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3456, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.35286417574912493, |
|
"grad_norm": 0.4491735100746155, |
|
"learning_rate": 0.0002, |
|
"loss": 0.347, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.35514071881847414, |
|
"grad_norm": 0.9062516093254089, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3617, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.35741726188782336, |
|
"grad_norm": 0.5253359079360962, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3512, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.3596938049571725, |
|
"grad_norm": 0.4836867153644562, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3585, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.3619703480265217, |
|
"grad_norm": 0.49537473917007446, |
|
"learning_rate": 0.0002, |
|
"loss": 0.364, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.36424689109587094, |
|
"grad_norm": 0.6098095178604126, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3455, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.3665234341652201, |
|
"grad_norm": 0.5926884412765503, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3406, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.3687999772345693, |
|
"grad_norm": 0.5868669152259827, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3643, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.3710765203039185, |
|
"grad_norm": 0.42670106887817383, |
|
"learning_rate": 0.0002, |
|
"loss": 0.344, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.3733530633732677, |
|
"grad_norm": 0.5992838740348816, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3588, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.3756296064426169, |
|
"grad_norm": 0.4388341009616852, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3375, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.3779061495119661, |
|
"grad_norm": 0.596488893032074, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3425, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.38018269258131526, |
|
"grad_norm": 0.4572538137435913, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3711, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.3824592356506645, |
|
"grad_norm": 0.5661656856536865, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3415, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.38473577872001363, |
|
"grad_norm": 0.45082923769950867, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3495, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.38701232178936285, |
|
"grad_norm": 0.4995211660861969, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3311, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.38928886485871206, |
|
"grad_norm": 0.5004004240036011, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3506, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.3915654079280612, |
|
"grad_norm": 0.5676460266113281, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3383, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.39384195099741043, |
|
"grad_norm": 0.4805515706539154, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3382, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.39611849406675964, |
|
"grad_norm": 0.47675764560699463, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3021, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.3983950371361088, |
|
"grad_norm": 0.6285260915756226, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3467, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.400671580205458, |
|
"grad_norm": 0.5657575130462646, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3382, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.4029481232748072, |
|
"grad_norm": 0.6148316860198975, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3396, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.4052246663441564, |
|
"grad_norm": 0.5819992423057556, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3373, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.4075012094135056, |
|
"grad_norm": 0.6080338954925537, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3463, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.4097777524828548, |
|
"grad_norm": 0.6103864312171936, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3441, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.41205429555220396, |
|
"grad_norm": 0.5234800577163696, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3272, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.4143308386215532, |
|
"grad_norm": 0.5393822193145752, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3308, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.4166073816909024, |
|
"grad_norm": 0.4853431284427643, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3152, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.41888392476025155, |
|
"grad_norm": 0.5507264733314514, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3229, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.42116046782960076, |
|
"grad_norm": 0.44306129217147827, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3389, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.4234370108989499, |
|
"grad_norm": 0.4574294984340668, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3516, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.42571355396829913, |
|
"grad_norm": 0.5367994904518127, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3576, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.42799009703764834, |
|
"grad_norm": 0.5044491291046143, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3449, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.4302666401069975, |
|
"grad_norm": 0.41715556383132935, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3128, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.4325431831763467, |
|
"grad_norm": 0.4355817437171936, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3131, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.4348197262456959, |
|
"grad_norm": 0.5237382650375366, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3281, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.4370962693150451, |
|
"grad_norm": 0.6210081577301025, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3195, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.4393728123843943, |
|
"grad_norm": 0.5145352482795715, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3107, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.4416493554537435, |
|
"grad_norm": 0.5554608106613159, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3418, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.44392589852309267, |
|
"grad_norm": 0.4971628487110138, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3293, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.4462024415924419, |
|
"grad_norm": 0.49732130765914917, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3138, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.4484789846617911, |
|
"grad_norm": 0.5883257985115051, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3357, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.45075552773114025, |
|
"grad_norm": 0.5349528193473816, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3381, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.45303207080048946, |
|
"grad_norm": 0.5360047221183777, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3116, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.4553086138698387, |
|
"grad_norm": 0.4889732003211975, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3154, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.45758515693918783, |
|
"grad_norm": 0.4912421703338623, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3054, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.45986170000853704, |
|
"grad_norm": 0.4449983835220337, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3079, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.46213824307788626, |
|
"grad_norm": 0.4488675892353058, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3027, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.4644147861472354, |
|
"grad_norm": 0.5412561893463135, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2932, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.4666913292165846, |
|
"grad_norm": 0.41218650341033936, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3087, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.4689678722859338, |
|
"grad_norm": 0.5233949422836304, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3157, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.471244415355283, |
|
"grad_norm": 0.5676075220108032, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3267, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.4735209584246322, |
|
"grad_norm": 0.5336834788322449, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3185, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.47579750149398137, |
|
"grad_norm": 0.5505925416946411, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3116, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.4780740445633306, |
|
"grad_norm": 0.5440223813056946, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3234, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.4803505876326798, |
|
"grad_norm": 0.46334293484687805, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3209, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.48262713070202895, |
|
"grad_norm": 0.452364444732666, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3056, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.48490367377137816, |
|
"grad_norm": 0.5037956833839417, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3141, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.4871802168407274, |
|
"grad_norm": 0.4308939278125763, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2948, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.48945675991007653, |
|
"grad_norm": 0.45019960403442383, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3142, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.49173330297942575, |
|
"grad_norm": 0.4351404011249542, |
|
"learning_rate": 0.0002, |
|
"loss": 0.31, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.49400984604877496, |
|
"grad_norm": 0.38306841254234314, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2889, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.4962863891181241, |
|
"grad_norm": 0.545360803604126, |
|
"learning_rate": 0.0002, |
|
"loss": 0.311, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.49856293218747333, |
|
"grad_norm": 0.44942232966423035, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2899, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.5008394752568225, |
|
"grad_norm": 0.46564239263534546, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3013, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.5031160183261717, |
|
"grad_norm": 0.5398554801940918, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3104, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.5053925613955209, |
|
"grad_norm": 0.47367504239082336, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2945, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.5076691044648701, |
|
"grad_norm": 0.45659711956977844, |
|
"learning_rate": 0.0002, |
|
"loss": 0.304, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.5099456475342193, |
|
"grad_norm": 0.4942033290863037, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2969, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.5122221906035684, |
|
"grad_norm": 0.46578243374824524, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2935, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.5144987336729177, |
|
"grad_norm": 0.6523891687393188, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2823, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.5167752767422669, |
|
"grad_norm": 0.4787238538265228, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3148, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.5190518198116161, |
|
"grad_norm": 0.46825891733169556, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3089, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.5213283628809653, |
|
"grad_norm": 0.46605536341667175, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3012, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.5236049059503144, |
|
"grad_norm": 0.5826888680458069, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3043, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.5258814490196636, |
|
"grad_norm": 0.48641151189804077, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2952, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.5281579920890128, |
|
"grad_norm": 0.5396175384521484, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2926, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.530434535158362, |
|
"grad_norm": 0.5584241151809692, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3048, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.5327110782277112, |
|
"grad_norm": 0.5832685232162476, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2948, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.5349876212970605, |
|
"grad_norm": 0.4676337242126465, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3043, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.5372641643664096, |
|
"grad_norm": 0.4440428614616394, |
|
"learning_rate": 0.0002, |
|
"loss": 0.288, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.5395407074357588, |
|
"grad_norm": 0.49934279918670654, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2882, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.541817250505108, |
|
"grad_norm": 0.5172054171562195, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3225, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.5440937935744572, |
|
"grad_norm": 0.4527619183063507, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2869, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.5463703366438064, |
|
"grad_norm": 0.548918604850769, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3105, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.5486468797131556, |
|
"grad_norm": 0.48801419138908386, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2835, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.5509234227825047, |
|
"grad_norm": 0.49810609221458435, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3227, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.5531999658518539, |
|
"grad_norm": 0.49763086438179016, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2786, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.5554765089212031, |
|
"grad_norm": 0.48815059661865234, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2802, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.5577530519905524, |
|
"grad_norm": 0.3571115732192993, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2796, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.5600295950599016, |
|
"grad_norm": 0.6448425650596619, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2844, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.5623061381292508, |
|
"grad_norm": 0.49660468101501465, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2892, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.5645826811985999, |
|
"grad_norm": 0.47702720761299133, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3111, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.5668592242679491, |
|
"grad_norm": 0.5281921029090881, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2908, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.5691357673372983, |
|
"grad_norm": 0.6427987813949585, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2848, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.5714123104066475, |
|
"grad_norm": 0.5437233448028564, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3023, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.5736888534759967, |
|
"grad_norm": 0.517444372177124, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2876, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.5759653965453458, |
|
"grad_norm": 0.5197298526763916, |
|
"learning_rate": 0.0002, |
|
"loss": 0.304, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.578241939614695, |
|
"grad_norm": 0.3452152907848358, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2794, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.5805184826840443, |
|
"grad_norm": 0.5630306601524353, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2979, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.5827950257533935, |
|
"grad_norm": 0.5696737170219421, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3035, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.5850715688227427, |
|
"grad_norm": 0.5024551153182983, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2717, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.5873481118920919, |
|
"grad_norm": 0.4166383147239685, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3065, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.589624654961441, |
|
"grad_norm": 0.36780408024787903, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2864, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.5919011980307902, |
|
"grad_norm": 0.436526894569397, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2764, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.5941777411001394, |
|
"grad_norm": 0.43115249276161194, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2791, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.5964542841694886, |
|
"grad_norm": 0.359739750623703, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3108, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.5987308272388379, |
|
"grad_norm": 0.4555259644985199, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2623, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.6010073703081871, |
|
"grad_norm": 0.4587076008319855, |
|
"learning_rate": 0.0002, |
|
"loss": 0.293, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.6032839133775362, |
|
"grad_norm": 0.5236973166465759, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2888, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.6055604564468854, |
|
"grad_norm": 0.46685513854026794, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2731, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.6078369995162346, |
|
"grad_norm": 0.5701884627342224, |
|
"learning_rate": 0.0002, |
|
"loss": 0.28, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.6101135425855838, |
|
"grad_norm": 0.5002717971801758, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2777, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.612390085654933, |
|
"grad_norm": 0.5896885395050049, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3048, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 0.6146666287242821, |
|
"grad_norm": 0.49014943838119507, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2642, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.6169431717936313, |
|
"grad_norm": 0.5924846529960632, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2943, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 0.6192197148629806, |
|
"grad_norm": 0.49827829003334045, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2879, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.6214962579323298, |
|
"grad_norm": 0.45312178134918213, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2728, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 0.623772801001679, |
|
"grad_norm": 0.3595191538333893, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2713, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 0.6260493440710282, |
|
"grad_norm": 0.6547619104385376, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2855, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.6283258871403773, |
|
"grad_norm": 0.4659534692764282, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2908, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 0.6306024302097265, |
|
"grad_norm": 0.4027460813522339, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2651, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 0.6328789732790757, |
|
"grad_norm": 0.36129653453826904, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2915, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 0.6351555163484249, |
|
"grad_norm": 0.5963912010192871, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2968, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.6374320594177741, |
|
"grad_norm": 0.49669450521469116, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2965, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.6397086024871234, |
|
"grad_norm": 0.5784302353858948, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2626, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 0.6419851455564725, |
|
"grad_norm": 0.5651645660400391, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2738, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 0.6442616886258217, |
|
"grad_norm": 0.45475292205810547, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2653, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 0.6465382316951709, |
|
"grad_norm": 0.4691898822784424, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2634, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 0.6488147747645201, |
|
"grad_norm": 0.4604431092739105, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2838, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.6510913178338693, |
|
"grad_norm": 0.506804883480072, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2657, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 0.6533678609032184, |
|
"grad_norm": 0.5051881670951843, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2976, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 0.6556444039725676, |
|
"grad_norm": 0.4780672788619995, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2828, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.6579209470419168, |
|
"grad_norm": 0.4695095121860504, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2685, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 0.660197490111266, |
|
"grad_norm": 0.4259052276611328, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2635, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.6624740331806153, |
|
"grad_norm": 0.5684182643890381, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2879, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 0.6647505762499645, |
|
"grad_norm": 0.42193594574928284, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2678, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 0.6670271193193136, |
|
"grad_norm": 0.5095034241676331, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2677, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 0.6693036623886628, |
|
"grad_norm": 0.46626052260398865, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2906, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 0.671580205458012, |
|
"grad_norm": 0.5086765289306641, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2775, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.6738567485273612, |
|
"grad_norm": 0.44444966316223145, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2764, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 0.6761332915967104, |
|
"grad_norm": 0.4477381706237793, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2729, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 0.6784098346660596, |
|
"grad_norm": 0.46984028816223145, |
|
"learning_rate": 0.0002, |
|
"loss": 0.273, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 0.6806863777354087, |
|
"grad_norm": 0.417084276676178, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2744, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 0.682962920804758, |
|
"grad_norm": 0.4144213795661926, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2704, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.6852394638741072, |
|
"grad_norm": 0.5844799876213074, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2635, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 0.6875160069434564, |
|
"grad_norm": 0.39512693881988525, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2471, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 0.6897925500128056, |
|
"grad_norm": 0.5299990773200989, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2648, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 0.6920690930821547, |
|
"grad_norm": 0.4980265498161316, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2725, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.6943456361515039, |
|
"grad_norm": 0.4003869891166687, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2768, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.6966221792208531, |
|
"grad_norm": 0.5103460550308228, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2638, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 0.6988987222902023, |
|
"grad_norm": 0.737101137638092, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2779, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 0.7011752653595515, |
|
"grad_norm": 0.4731826186180115, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2691, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 0.7034518084289008, |
|
"grad_norm": 0.5234053730964661, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2739, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 0.7057283514982499, |
|
"grad_norm": 0.5235525369644165, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2754, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.7080048945675991, |
|
"grad_norm": 0.4453619122505188, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2833, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 0.7102814376369483, |
|
"grad_norm": 0.4025666117668152, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2713, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 0.7125579807062975, |
|
"grad_norm": 0.35240331292152405, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2786, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 0.7148345237756467, |
|
"grad_norm": 0.4521905779838562, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2639, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 0.7171110668449959, |
|
"grad_norm": 0.5230519771575928, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2517, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.719387609914345, |
|
"grad_norm": 0.5415637493133545, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2739, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 0.7216641529836942, |
|
"grad_norm": 0.4067966341972351, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2751, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 0.7239406960530435, |
|
"grad_norm": 0.4670214354991913, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2644, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 0.7262172391223927, |
|
"grad_norm": 0.5316203236579895, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2746, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 0.7284937821917419, |
|
"grad_norm": 0.46312493085861206, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2539, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.730770325261091, |
|
"grad_norm": 0.465279221534729, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2742, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 0.7330468683304402, |
|
"grad_norm": 0.5096962451934814, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2546, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 0.7353234113997894, |
|
"grad_norm": 0.4525590240955353, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2694, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 0.7375999544691386, |
|
"grad_norm": 0.5033881664276123, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2627, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 0.7398764975384878, |
|
"grad_norm": 0.44053900241851807, |
|
"learning_rate": 0.0002, |
|
"loss": 0.258, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.742153040607837, |
|
"grad_norm": 0.4677462875843048, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2659, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 0.7444295836771861, |
|
"grad_norm": 0.5687553882598877, |
|
"learning_rate": 0.0002, |
|
"loss": 0.271, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 0.7467061267465354, |
|
"grad_norm": 0.4980468451976776, |
|
"learning_rate": 0.0002, |
|
"loss": 0.265, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 0.7489826698158846, |
|
"grad_norm": 0.5155619382858276, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2491, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 0.7512592128852338, |
|
"grad_norm": 0.5364673733711243, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2564, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.753535755954583, |
|
"grad_norm": 0.421838641166687, |
|
"learning_rate": 0.0002, |
|
"loss": 0.267, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 0.7558122990239322, |
|
"grad_norm": 0.46299833059310913, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2461, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 0.7580888420932813, |
|
"grad_norm": 0.3832832872867584, |
|
"learning_rate": 0.0002, |
|
"loss": 0.265, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 0.7603653851626305, |
|
"grad_norm": 0.5560947060585022, |
|
"learning_rate": 0.0002, |
|
"loss": 0.253, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 0.7626419282319797, |
|
"grad_norm": 0.4832628667354584, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2515, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.764918471301329, |
|
"grad_norm": 0.44354599714279175, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2687, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 0.7671950143706782, |
|
"grad_norm": 0.3746070861816406, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2481, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 0.7694715574400273, |
|
"grad_norm": 0.3048388659954071, |
|
"learning_rate": 0.0002, |
|
"loss": 0.269, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 0.7717481005093765, |
|
"grad_norm": 0.46471843123435974, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2642, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 0.7740246435787257, |
|
"grad_norm": 0.44309428334236145, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2565, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.7763011866480749, |
|
"grad_norm": 0.4174291789531708, |
|
"learning_rate": 0.0002, |
|
"loss": 0.262, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 0.7785777297174241, |
|
"grad_norm": 0.42592549324035645, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2608, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 0.7808542727867733, |
|
"grad_norm": 0.4378054141998291, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2765, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 0.7831308158561224, |
|
"grad_norm": 0.4560708701610565, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2381, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 0.7854073589254716, |
|
"grad_norm": 0.4595545828342438, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2561, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.7876839019948209, |
|
"grad_norm": 0.45213592052459717, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2645, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 0.7899604450641701, |
|
"grad_norm": 0.4857342839241028, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2687, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 0.7922369881335193, |
|
"grad_norm": 0.4939437508583069, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2642, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 0.7945135312028685, |
|
"grad_norm": 0.46244382858276367, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2536, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 0.7967900742722176, |
|
"grad_norm": 0.5876993536949158, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2492, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.7990666173415668, |
|
"grad_norm": 0.5170072913169861, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2548, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 0.801343160410916, |
|
"grad_norm": 0.394380658864975, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2524, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 0.8036197034802652, |
|
"grad_norm": 0.4716455340385437, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2573, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 0.8058962465496144, |
|
"grad_norm": 0.34525179862976074, |
|
"learning_rate": 0.0002, |
|
"loss": 0.246, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 0.8081727896189635, |
|
"grad_norm": 0.5030418038368225, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2596, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.8104493326883128, |
|
"grad_norm": 0.5586132407188416, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2568, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 0.812725875757662, |
|
"grad_norm": 0.47025129199028015, |
|
"learning_rate": 0.0002, |
|
"loss": 0.265, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 0.8150024188270112, |
|
"grad_norm": 0.5654832720756531, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2468, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 0.8172789618963604, |
|
"grad_norm": 0.4701017141342163, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2538, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 0.8195555049657096, |
|
"grad_norm": 0.47270438075065613, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2529, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.8218320480350587, |
|
"grad_norm": 0.39433714747428894, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2445, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 0.8241085911044079, |
|
"grad_norm": 0.4521467685699463, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2556, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 0.8263851341737571, |
|
"grad_norm": 0.28483667969703674, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2451, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 0.8286616772431064, |
|
"grad_norm": 0.4298310875892639, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2599, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 0.8309382203124556, |
|
"grad_norm": 0.39677906036376953, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2539, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.8332147633818048, |
|
"grad_norm": 0.5800175666809082, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2463, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 0.8354913064511539, |
|
"grad_norm": 0.42742472887039185, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2593, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 0.8377678495205031, |
|
"grad_norm": 0.5521807670593262, |
|
"learning_rate": 0.0002, |
|
"loss": 0.253, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 0.8400443925898523, |
|
"grad_norm": 0.5068047046661377, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2503, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 0.8423209356592015, |
|
"grad_norm": 0.4325120151042938, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2466, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.8445974787285507, |
|
"grad_norm": 0.5130394101142883, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2521, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 0.8468740217978998, |
|
"grad_norm": 0.5091120600700378, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2429, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 0.849150564867249, |
|
"grad_norm": 0.4635036289691925, |
|
"learning_rate": 0.0002, |
|
"loss": 0.235, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 0.8514271079365983, |
|
"grad_norm": 0.3827108144760132, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2487, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 0.8537036510059475, |
|
"grad_norm": 0.3880899250507355, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2469, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.8559801940752967, |
|
"grad_norm": 0.408933162689209, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2499, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 0.8582567371446459, |
|
"grad_norm": 0.5049706101417542, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2418, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 0.860533280213995, |
|
"grad_norm": 0.43551701307296753, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2478, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 0.8628098232833442, |
|
"grad_norm": 0.5024411678314209, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2538, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 0.8650863663526934, |
|
"grad_norm": 0.36361223459243774, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2536, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.8673629094220426, |
|
"grad_norm": 0.4526277482509613, |
|
"learning_rate": 0.0002, |
|
"loss": 0.242, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 0.8696394524913919, |
|
"grad_norm": 0.5677676200866699, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2572, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 0.8719159955607411, |
|
"grad_norm": 0.4915711283683777, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2562, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 0.8741925386300902, |
|
"grad_norm": 0.36850452423095703, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2523, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 0.8764690816994394, |
|
"grad_norm": 0.38313761353492737, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2596, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.8787456247687886, |
|
"grad_norm": 0.5384640097618103, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2455, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 0.8810221678381378, |
|
"grad_norm": 0.5308900475502014, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2439, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 0.883298710907487, |
|
"grad_norm": 0.5488154292106628, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2428, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 0.8855752539768362, |
|
"grad_norm": 0.5271242260932922, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2372, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 0.8878517970461853, |
|
"grad_norm": 0.46171802282333374, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2506, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.8901283401155345, |
|
"grad_norm": 0.45436665415763855, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2414, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 0.8924048831848838, |
|
"grad_norm": 0.4920847415924072, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2669, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 0.894681426254233, |
|
"grad_norm": 0.5913518071174622, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2552, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 0.8969579693235822, |
|
"grad_norm": 0.6011972427368164, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2533, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 0.8992345123929313, |
|
"grad_norm": 0.4650927186012268, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2448, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.9015110554622805, |
|
"grad_norm": 0.5828790664672852, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2381, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 0.9037875985316297, |
|
"grad_norm": 0.5178338885307312, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2619, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 0.9060641416009789, |
|
"grad_norm": 0.5147708058357239, |
|
"learning_rate": 0.0002, |
|
"loss": 0.258, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 0.9083406846703281, |
|
"grad_norm": 0.45790836215019226, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2474, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 0.9106172277396773, |
|
"grad_norm": 0.3837074935436249, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2356, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.9128937708090265, |
|
"grad_norm": 0.4466090500354767, |
|
"learning_rate": 0.0002, |
|
"loss": 0.237, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 0.9151703138783757, |
|
"grad_norm": 0.5893344283103943, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2399, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 0.9174468569477249, |
|
"grad_norm": 0.49547362327575684, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2526, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 0.9197234000170741, |
|
"grad_norm": 0.47068551182746887, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2631, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 0.9219999430864233, |
|
"grad_norm": 0.3512951135635376, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2395, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.9242764861557725, |
|
"grad_norm": 0.3996793031692505, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2424, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 0.9265530292251216, |
|
"grad_norm": 0.5782022476196289, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2549, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 0.9288295722944708, |
|
"grad_norm": 0.450860857963562, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2465, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 0.93110611536382, |
|
"grad_norm": 0.4679816663265228, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2326, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 0.9333826584331693, |
|
"grad_norm": 0.5497337579727173, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2457, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.9356592015025185, |
|
"grad_norm": 0.3775748312473297, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2331, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 0.9379357445718676, |
|
"grad_norm": 0.5428327918052673, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2399, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 0.9402122876412168, |
|
"grad_norm": 0.4089830219745636, |
|
"learning_rate": 0.0002, |
|
"loss": 0.246, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 0.942488830710566, |
|
"grad_norm": 0.5781340003013611, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2451, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 0.9447653737799152, |
|
"grad_norm": 0.5869989395141602, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2541, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.9470419168492644, |
|
"grad_norm": 0.47708019614219666, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2559, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 0.9493184599186136, |
|
"grad_norm": 0.5445525050163269, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2466, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 0.9515950029879627, |
|
"grad_norm": 0.480214387178421, |
|
"learning_rate": 0.0002, |
|
"loss": 0.236, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 0.953871546057312, |
|
"grad_norm": 0.5392053127288818, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2383, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 0.9561480891266612, |
|
"grad_norm": 0.4515858292579651, |
|
"learning_rate": 0.0002, |
|
"loss": 0.238, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.9584246321960104, |
|
"grad_norm": 0.5461826324462891, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2442, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 0.9607011752653596, |
|
"grad_norm": 0.44309332966804504, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2622, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 0.9629777183347088, |
|
"grad_norm": 0.5409505367279053, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2303, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 0.9652542614040579, |
|
"grad_norm": 0.3868342638015747, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2624, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 0.9675308044734071, |
|
"grad_norm": 0.38888975977897644, |
|
"learning_rate": 0.0002, |
|
"loss": 0.246, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.9698073475427563, |
|
"grad_norm": 0.38946032524108887, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2503, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 0.9720838906121055, |
|
"grad_norm": 0.42425817251205444, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2556, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 0.9743604336814548, |
|
"grad_norm": 0.41515296697616577, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2437, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 0.9766369767508039, |
|
"grad_norm": 0.4085826575756073, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2293, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 0.9789135198201531, |
|
"grad_norm": 0.3404542803764343, |
|
"learning_rate": 0.0002, |
|
"loss": 0.242, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.9811900628895023, |
|
"grad_norm": 0.43266579508781433, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2513, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 0.9834666059588515, |
|
"grad_norm": 0.42724549770355225, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2384, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 0.9857431490282007, |
|
"grad_norm": 0.5089221596717834, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2409, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 0.9880196920975499, |
|
"grad_norm": 0.519223690032959, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2353, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 0.990296235166899, |
|
"grad_norm": 0.5701056122779846, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2486, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.9925727782362482, |
|
"grad_norm": 0.4519595503807068, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2374, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 0.9948493213055974, |
|
"grad_norm": 0.4883946180343628, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2441, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 0.9971258643749467, |
|
"grad_norm": 0.6918900012969971, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2403, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 0.9994024074442959, |
|
"grad_norm": 0.4810091555118561, |
|
"learning_rate": 0.0002, |
|
"loss": 0.2334, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.30941203236579895, |
|
"eval_runtime": 408.7196, |
|
"eval_samples_per_second": 7.083, |
|
"eval_steps_per_second": 0.886, |
|
"step": 8786 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 13000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 77, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.923169198364426e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|