{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.8399789338170753, "eval_steps": 500, "global_step": 17943, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 4.681373983264088e-05, "grad_norm": 9.625, "learning_rate": 2e-05, "loss": 10.5587, "step": 1 }, { "epoch": 9.362747966528175e-05, "grad_norm": 9.0625, "learning_rate": 4e-05, "loss": 10.5005, "step": 2 }, { "epoch": 0.00014044121949792265, "grad_norm": 9.1875, "learning_rate": 6e-05, "loss": 10.3841, "step": 3 }, { "epoch": 0.0001872549593305635, "grad_norm": 7.71875, "learning_rate": 8e-05, "loss": 10.1555, "step": 4 }, { "epoch": 0.0002340686991632044, "grad_norm": 5.34375, "learning_rate": 0.0001, "loss": 9.83, "step": 5 }, { "epoch": 0.0002808824389958453, "grad_norm": 5.03125, "learning_rate": 0.00012, "loss": 9.4883, "step": 6 }, { "epoch": 0.0003276961788284862, "grad_norm": 4.0625, "learning_rate": 0.00014, "loss": 9.2042, "step": 7 }, { "epoch": 0.000374509918661127, "grad_norm": 3.015625, "learning_rate": 0.00016, "loss": 9.2225, "step": 8 }, { "epoch": 0.0004213236584937679, "grad_norm": 3.5, "learning_rate": 0.00018, "loss": 8.9115, "step": 9 }, { "epoch": 0.0004681373983264088, "grad_norm": 2.53125, "learning_rate": 0.0002, "loss": 8.7852, "step": 10 }, { "epoch": 0.0005149511381590497, "grad_norm": 2.796875, "learning_rate": 0.00019999999982962788, "loss": 8.5903, "step": 11 }, { "epoch": 0.0005617648779916906, "grad_norm": 2.21875, "learning_rate": 0.00019999999931851156, "loss": 8.4898, "step": 12 }, { "epoch": 0.0006085786178243314, "grad_norm": 1.6953125, "learning_rate": 0.00019999999846665104, "loss": 8.3986, "step": 13 }, { "epoch": 0.0006553923576569724, "grad_norm": 1.8203125, "learning_rate": 0.00019999999727404627, "loss": 8.3349, "step": 14 }, { "epoch": 0.0007022060974896132, "grad_norm": 1.875, "learning_rate": 0.00019999999574069727, "loss": 7.9546, "step": 15 }, { "epoch": 0.000749019837322254, "grad_norm": 1.8359375, "learning_rate": 0.00019999999386660414, "loss": 7.9107, "step": 16 }, { "epoch": 0.000795833577154895, "grad_norm": 1.4140625, "learning_rate": 0.00019999999165176674, "loss": 7.8724, "step": 17 }, { "epoch": 0.0008426473169875358, "grad_norm": 2.015625, "learning_rate": 0.00019999998909618518, "loss": 7.8727, "step": 18 }, { "epoch": 0.0008894610568201767, "grad_norm": 1.5546875, "learning_rate": 0.00019999998619985943, "loss": 7.7074, "step": 19 }, { "epoch": 0.0009362747966528176, "grad_norm": 1.28125, "learning_rate": 0.0001999999829627895, "loss": 7.7303, "step": 20 }, { "epoch": 0.0009830885364854585, "grad_norm": 1.5390625, "learning_rate": 0.00019999997938497543, "loss": 7.6192, "step": 21 }, { "epoch": 0.0010299022763180993, "grad_norm": 1.7421875, "learning_rate": 0.0001999999754664172, "loss": 7.4217, "step": 22 }, { "epoch": 0.0010767160161507402, "grad_norm": 1.84375, "learning_rate": 0.00019999997120711483, "loss": 7.4604, "step": 23 }, { "epoch": 0.0011235297559833812, "grad_norm": 1.359375, "learning_rate": 0.00019999996660706833, "loss": 7.2903, "step": 24 }, { "epoch": 0.001170343495816022, "grad_norm": 1.265625, "learning_rate": 0.00019999996166627772, "loss": 7.3114, "step": 25 }, { "epoch": 0.0012171572356486629, "grad_norm": 1.5078125, "learning_rate": 0.00019999995638474304, "loss": 7.6625, "step": 26 }, { "epoch": 0.0012639709754813037, "grad_norm": 1.3984375, "learning_rate": 0.00019999995076246428, "loss": 7.2903, "step": 27 }, { "epoch": 0.0013107847153139447, "grad_norm": 1.6953125, "learning_rate": 0.00019999994479944148, "loss": 7.6075, "step": 28 }, { "epoch": 0.0013575984551465856, "grad_norm": 1.109375, "learning_rate": 0.00019999993849567464, "loss": 7.4221, "step": 29 }, { "epoch": 0.0014044121949792264, "grad_norm": 1.78125, "learning_rate": 0.0001999999318511638, "loss": 7.5448, "step": 30 }, { "epoch": 0.0014512259348118672, "grad_norm": 1.4609375, "learning_rate": 0.00019999992486590895, "loss": 7.3255, "step": 31 }, { "epoch": 0.001498039674644508, "grad_norm": 1.3671875, "learning_rate": 0.00019999991753991015, "loss": 7.2797, "step": 32 }, { "epoch": 0.0015448534144771491, "grad_norm": 1.421875, "learning_rate": 0.0001999999098731674, "loss": 7.2972, "step": 33 }, { "epoch": 0.00159166715430979, "grad_norm": 1.1796875, "learning_rate": 0.00019999990186568076, "loss": 7.4253, "step": 34 }, { "epoch": 0.0016384808941424308, "grad_norm": 1.203125, "learning_rate": 0.0001999998935174502, "loss": 7.5086, "step": 35 }, { "epoch": 0.0016852946339750716, "grad_norm": 1.4375, "learning_rate": 0.00019999988482847584, "loss": 7.5712, "step": 36 }, { "epoch": 0.0017321083738077126, "grad_norm": 1.5390625, "learning_rate": 0.00019999987579875762, "loss": 7.2869, "step": 37 }, { "epoch": 0.0017789221136403535, "grad_norm": 2.03125, "learning_rate": 0.00019999986642829559, "loss": 7.3813, "step": 38 }, { "epoch": 0.0018257358534729943, "grad_norm": 1.6484375, "learning_rate": 0.00019999985671708981, "loss": 7.2332, "step": 39 }, { "epoch": 0.0018725495933056351, "grad_norm": 1.1484375, "learning_rate": 0.00019999984666514025, "loss": 7.3536, "step": 40 }, { "epoch": 0.0019193633331382762, "grad_norm": 1.078125, "learning_rate": 0.00019999983627244706, "loss": 7.2352, "step": 41 }, { "epoch": 0.001966177072970917, "grad_norm": 1.4609375, "learning_rate": 0.00019999982553901017, "loss": 7.4369, "step": 42 }, { "epoch": 0.002012990812803558, "grad_norm": 1.21875, "learning_rate": 0.0001999998144648297, "loss": 7.3666, "step": 43 }, { "epoch": 0.0020598045526361987, "grad_norm": 1.5234375, "learning_rate": 0.0001999998030499056, "loss": 7.2945, "step": 44 }, { "epoch": 0.0021066182924688397, "grad_norm": 1.1953125, "learning_rate": 0.00019999979129423795, "loss": 7.4709, "step": 45 }, { "epoch": 0.0021534320323014803, "grad_norm": 1.3984375, "learning_rate": 0.0001999997791978268, "loss": 7.2908, "step": 46 }, { "epoch": 0.0022002457721341214, "grad_norm": 1.453125, "learning_rate": 0.0001999997667606722, "loss": 7.2221, "step": 47 }, { "epoch": 0.0022470595119667624, "grad_norm": 1.390625, "learning_rate": 0.00019999975398277419, "loss": 7.1293, "step": 48 }, { "epoch": 0.002293873251799403, "grad_norm": 1.3828125, "learning_rate": 0.00019999974086413276, "loss": 7.3055, "step": 49 }, { "epoch": 0.002340686991632044, "grad_norm": 1.2890625, "learning_rate": 0.000199999727404748, "loss": 7.0999, "step": 50 }, { "epoch": 0.0023875007314646847, "grad_norm": 1.53125, "learning_rate": 0.00019999971360461994, "loss": 7.2172, "step": 51 }, { "epoch": 0.0024343144712973257, "grad_norm": 1.6171875, "learning_rate": 0.00019999969946374866, "loss": 7.0868, "step": 52 }, { "epoch": 0.002481128211129967, "grad_norm": 2.53125, "learning_rate": 0.00019999968498213419, "loss": 7.2289, "step": 53 }, { "epoch": 0.0025279419509626074, "grad_norm": 1.8125, "learning_rate": 0.00019999967015977657, "loss": 7.405, "step": 54 }, { "epoch": 0.0025747556907952484, "grad_norm": 2.296875, "learning_rate": 0.00019999965499667584, "loss": 7.2212, "step": 55 }, { "epoch": 0.0026215694306278895, "grad_norm": 1.3828125, "learning_rate": 0.00019999963949283205, "loss": 6.9824, "step": 56 }, { "epoch": 0.00266838317046053, "grad_norm": 1.359375, "learning_rate": 0.00019999962364824526, "loss": 7.0745, "step": 57 }, { "epoch": 0.002715196910293171, "grad_norm": 1.6484375, "learning_rate": 0.00019999960746291558, "loss": 6.9829, "step": 58 }, { "epoch": 0.0027620106501258118, "grad_norm": 1.25, "learning_rate": 0.00019999959093684302, "loss": 7.0456, "step": 59 }, { "epoch": 0.002808824389958453, "grad_norm": 1.7265625, "learning_rate": 0.00019999957407002762, "loss": 6.9504, "step": 60 }, { "epoch": 0.002855638129791094, "grad_norm": 1.640625, "learning_rate": 0.00019999955686246943, "loss": 6.7784, "step": 61 }, { "epoch": 0.0029024518696237345, "grad_norm": 1.7890625, "learning_rate": 0.00019999953931416854, "loss": 6.9617, "step": 62 }, { "epoch": 0.0029492656094563755, "grad_norm": 1.71875, "learning_rate": 0.000199999521425125, "loss": 6.7434, "step": 63 }, { "epoch": 0.002996079349289016, "grad_norm": 1.4140625, "learning_rate": 0.00019999950319533888, "loss": 6.9401, "step": 64 }, { "epoch": 0.003042893089121657, "grad_norm": 2.578125, "learning_rate": 0.00019999948462481022, "loss": 7.0247, "step": 65 }, { "epoch": 0.0030897068289542982, "grad_norm": 1.7265625, "learning_rate": 0.00019999946571353912, "loss": 6.7464, "step": 66 }, { "epoch": 0.003136520568786939, "grad_norm": 1.6015625, "learning_rate": 0.00019999944646152557, "loss": 6.8974, "step": 67 }, { "epoch": 0.00318333430861958, "grad_norm": 1.5625, "learning_rate": 0.00019999942686876974, "loss": 6.7462, "step": 68 }, { "epoch": 0.003230148048452221, "grad_norm": 2.1875, "learning_rate": 0.0001999994069352716, "loss": 6.9558, "step": 69 }, { "epoch": 0.0032769617882848615, "grad_norm": 2.125, "learning_rate": 0.0001999993866610313, "loss": 6.9781, "step": 70 }, { "epoch": 0.0033237755281175026, "grad_norm": 1.921875, "learning_rate": 0.00019999936604604885, "loss": 8.5057, "step": 71 }, { "epoch": 0.003370589267950143, "grad_norm": 1.25, "learning_rate": 0.00019999934509032437, "loss": 6.7316, "step": 72 }, { "epoch": 0.0034174030077827842, "grad_norm": 1.421875, "learning_rate": 0.00019999932379385785, "loss": 7.0338, "step": 73 }, { "epoch": 0.0034642167476154253, "grad_norm": 1.703125, "learning_rate": 0.00019999930215664947, "loss": 6.8349, "step": 74 }, { "epoch": 0.003511030487448066, "grad_norm": 2.046875, "learning_rate": 0.00019999928017869922, "loss": 6.6431, "step": 75 }, { "epoch": 0.003557844227280707, "grad_norm": 1.8671875, "learning_rate": 0.00019999925786000718, "loss": 6.7677, "step": 76 }, { "epoch": 0.0036046579671133476, "grad_norm": 1.46875, "learning_rate": 0.00019999923520057348, "loss": 6.6546, "step": 77 }, { "epoch": 0.0036514717069459886, "grad_norm": 1.4140625, "learning_rate": 0.00019999921220039817, "loss": 6.5876, "step": 78 }, { "epoch": 0.0036982854467786297, "grad_norm": 1.90625, "learning_rate": 0.0001999991888594813, "loss": 6.819, "step": 79 }, { "epoch": 0.0037450991866112703, "grad_norm": 1.1171875, "learning_rate": 0.000199999165177823, "loss": 6.718, "step": 80 }, { "epoch": 0.0037919129264439113, "grad_norm": 1.3984375, "learning_rate": 0.0001999991411554233, "loss": 6.7555, "step": 81 }, { "epoch": 0.0038387266662765524, "grad_norm": 1.7265625, "learning_rate": 0.00019999911679228233, "loss": 6.6067, "step": 82 }, { "epoch": 0.003885540406109193, "grad_norm": 1.671875, "learning_rate": 0.00019999909208840016, "loss": 6.6304, "step": 83 }, { "epoch": 0.003932354145941834, "grad_norm": 1.6796875, "learning_rate": 0.00019999906704377683, "loss": 6.6464, "step": 84 }, { "epoch": 0.003979167885774475, "grad_norm": 1.8828125, "learning_rate": 0.00019999904165841244, "loss": 6.7784, "step": 85 }, { "epoch": 0.004025981625607116, "grad_norm": 1.5703125, "learning_rate": 0.00019999901593230717, "loss": 6.5869, "step": 86 }, { "epoch": 0.004072795365439756, "grad_norm": 1.765625, "learning_rate": 0.00019999898986546098, "loss": 6.6631, "step": 87 }, { "epoch": 0.004119609105272397, "grad_norm": 1.234375, "learning_rate": 0.000199998963457874, "loss": 6.5766, "step": 88 }, { "epoch": 0.004166422845105038, "grad_norm": 1.0078125, "learning_rate": 0.00019999893670954636, "loss": 8.2825, "step": 89 }, { "epoch": 0.0042132365849376794, "grad_norm": 1.7890625, "learning_rate": 0.00019999890962047815, "loss": 6.4126, "step": 90 }, { "epoch": 0.0042600503247703205, "grad_norm": 1.671875, "learning_rate": 0.0001999988821906694, "loss": 6.701, "step": 91 }, { "epoch": 0.004306864064602961, "grad_norm": 1.515625, "learning_rate": 0.00019999885442012022, "loss": 6.5346, "step": 92 }, { "epoch": 0.004353677804435602, "grad_norm": 1.5, "learning_rate": 0.00019999882630883077, "loss": 6.4892, "step": 93 }, { "epoch": 0.004400491544268243, "grad_norm": 1.71875, "learning_rate": 0.00019999879785680106, "loss": 6.465, "step": 94 }, { "epoch": 0.004447305284100884, "grad_norm": 1.578125, "learning_rate": 0.00019999876906403127, "loss": 6.5992, "step": 95 }, { "epoch": 0.004494119023933525, "grad_norm": 1.375, "learning_rate": 0.00019999873993052143, "loss": 6.429, "step": 96 }, { "epoch": 0.004540932763766165, "grad_norm": 1.765625, "learning_rate": 0.00019999871045627164, "loss": 6.6151, "step": 97 }, { "epoch": 0.004587746503598806, "grad_norm": 1.5625, "learning_rate": 0.00019999868064128206, "loss": 6.7383, "step": 98 }, { "epoch": 0.004634560243431447, "grad_norm": 1.6953125, "learning_rate": 0.00019999865048555274, "loss": 6.3578, "step": 99 }, { "epoch": 0.004681373983264088, "grad_norm": 1.3203125, "learning_rate": 0.0001999986199890838, "loss": 6.5973, "step": 100 }, { "epoch": 0.004728187723096729, "grad_norm": 1.2421875, "learning_rate": 0.00019999858915187532, "loss": 6.4356, "step": 101 }, { "epoch": 0.004775001462929369, "grad_norm": 1.2109375, "learning_rate": 0.00019999855797392746, "loss": 9.9672, "step": 102 }, { "epoch": 0.0048218152027620104, "grad_norm": 1.7265625, "learning_rate": 0.00019999852645524027, "loss": 6.7742, "step": 103 }, { "epoch": 0.0048686289425946515, "grad_norm": 1.703125, "learning_rate": 0.00019999849459581394, "loss": 6.3794, "step": 104 }, { "epoch": 0.0049154426824272925, "grad_norm": 1.8828125, "learning_rate": 0.00019999846239564847, "loss": 6.6451, "step": 105 }, { "epoch": 0.004962256422259934, "grad_norm": 2.015625, "learning_rate": 0.000199998429854744, "loss": 6.3639, "step": 106 }, { "epoch": 0.005009070162092574, "grad_norm": 1.4609375, "learning_rate": 0.0001999983969731007, "loss": 6.5005, "step": 107 }, { "epoch": 0.005055883901925215, "grad_norm": 1.3125, "learning_rate": 0.00019999836375071865, "loss": 6.2923, "step": 108 }, { "epoch": 0.005102697641757856, "grad_norm": 1.59375, "learning_rate": 0.00019999833018759792, "loss": 6.5189, "step": 109 }, { "epoch": 0.005149511381590497, "grad_norm": 1.5390625, "learning_rate": 0.00019999829628373868, "loss": 6.3003, "step": 110 }, { "epoch": 0.005196325121423138, "grad_norm": 1.625, "learning_rate": 0.00019999826203914101, "loss": 6.2618, "step": 111 }, { "epoch": 0.005243138861255779, "grad_norm": 1.4765625, "learning_rate": 0.00019999822745380506, "loss": 6.3522, "step": 112 }, { "epoch": 0.005289952601088419, "grad_norm": 1.546875, "learning_rate": 0.0001999981925277309, "loss": 6.2944, "step": 113 }, { "epoch": 0.00533676634092106, "grad_norm": 1.546875, "learning_rate": 0.00019999815726091873, "loss": 6.4022, "step": 114 }, { "epoch": 0.005383580080753701, "grad_norm": 1.5078125, "learning_rate": 0.00019999812165336856, "loss": 6.4656, "step": 115 }, { "epoch": 0.005430393820586342, "grad_norm": 1.6875, "learning_rate": 0.00019999808570508062, "loss": 6.2839, "step": 116 }, { "epoch": 0.005477207560418983, "grad_norm": 1.5546875, "learning_rate": 0.00019999804941605497, "loss": 6.2601, "step": 117 }, { "epoch": 0.0055240213002516235, "grad_norm": 1.5703125, "learning_rate": 0.00019999801278629175, "loss": 6.653, "step": 118 }, { "epoch": 0.005570835040084265, "grad_norm": 1.3828125, "learning_rate": 0.00019999797581579106, "loss": 6.4137, "step": 119 }, { "epoch": 0.005617648779916906, "grad_norm": 1.53125, "learning_rate": 0.0001999979385045531, "loss": 6.6399, "step": 120 }, { "epoch": 0.005664462519749547, "grad_norm": 1.4296875, "learning_rate": 0.00019999790085257788, "loss": 6.3265, "step": 121 }, { "epoch": 0.005711276259582188, "grad_norm": 1.7109375, "learning_rate": 0.0001999978628598656, "loss": 6.3171, "step": 122 }, { "epoch": 0.005758089999414828, "grad_norm": 1.5, "learning_rate": 0.00019999782452641643, "loss": 6.4172, "step": 123 }, { "epoch": 0.005804903739247469, "grad_norm": 1.671875, "learning_rate": 0.00019999778585223042, "loss": 6.3674, "step": 124 }, { "epoch": 0.00585171747908011, "grad_norm": 1.3671875, "learning_rate": 0.00019999774683730773, "loss": 6.1829, "step": 125 }, { "epoch": 0.005898531218912751, "grad_norm": 1.796875, "learning_rate": 0.0001999977074816485, "loss": 6.6371, "step": 126 }, { "epoch": 0.005945344958745392, "grad_norm": 1.7421875, "learning_rate": 0.0001999976677852529, "loss": 6.0517, "step": 127 }, { "epoch": 0.005992158698578032, "grad_norm": 1.625, "learning_rate": 0.000199997627748121, "loss": 6.2581, "step": 128 }, { "epoch": 0.006038972438410673, "grad_norm": 1.3046875, "learning_rate": 0.00019999758737025293, "loss": 6.1489, "step": 129 }, { "epoch": 0.006085786178243314, "grad_norm": 1.46875, "learning_rate": 0.0001999975466516489, "loss": 6.193, "step": 130 }, { "epoch": 0.006132599918075955, "grad_norm": 1.6484375, "learning_rate": 0.00019999750559230898, "loss": 6.0299, "step": 131 }, { "epoch": 0.0061794136579085964, "grad_norm": 1.515625, "learning_rate": 0.0001999974641922334, "loss": 6.0145, "step": 132 }, { "epoch": 0.006226227397741237, "grad_norm": 1.5078125, "learning_rate": 0.0001999974224514222, "loss": 6.2877, "step": 133 }, { "epoch": 0.006273041137573878, "grad_norm": 1.515625, "learning_rate": 0.00019999738036987553, "loss": 6.4387, "step": 134 }, { "epoch": 0.006319854877406519, "grad_norm": 1.5625, "learning_rate": 0.00019999733794759365, "loss": 6.3178, "step": 135 }, { "epoch": 0.00636666861723916, "grad_norm": 1.3046875, "learning_rate": 0.00019999729518457656, "loss": 6.4039, "step": 136 }, { "epoch": 0.006413482357071801, "grad_norm": 1.5078125, "learning_rate": 0.00019999725208082447, "loss": 6.3216, "step": 137 }, { "epoch": 0.006460296096904442, "grad_norm": 1.3671875, "learning_rate": 0.00019999720863633753, "loss": 6.117, "step": 138 }, { "epoch": 0.006507109836737082, "grad_norm": 1.8125, "learning_rate": 0.00019999716485111587, "loss": 6.1492, "step": 139 }, { "epoch": 0.006553923576569723, "grad_norm": 1.3515625, "learning_rate": 0.00019999712072515968, "loss": 6.0111, "step": 140 }, { "epoch": 0.006600737316402364, "grad_norm": 1.625, "learning_rate": 0.00019999707625846907, "loss": 6.1752, "step": 141 }, { "epoch": 0.006647551056235005, "grad_norm": 1.5078125, "learning_rate": 0.0001999970314510442, "loss": 6.1281, "step": 142 }, { "epoch": 0.006694364796067646, "grad_norm": 1.703125, "learning_rate": 0.0001999969863028852, "loss": 6.1796, "step": 143 }, { "epoch": 0.006741178535900286, "grad_norm": 2.171875, "learning_rate": 0.0001999969408139923, "loss": 6.4784, "step": 144 }, { "epoch": 0.0067879922757329274, "grad_norm": 1.9453125, "learning_rate": 0.0001999968949843656, "loss": 6.1207, "step": 145 }, { "epoch": 0.0068348060155655685, "grad_norm": 1.65625, "learning_rate": 0.0001999968488140052, "loss": 5.9694, "step": 146 }, { "epoch": 0.0068816197553982095, "grad_norm": 1.3125, "learning_rate": 0.00019999680230291136, "loss": 6.0903, "step": 147 }, { "epoch": 0.006928433495230851, "grad_norm": 1.453125, "learning_rate": 0.0001999967554510842, "loss": 6.1947, "step": 148 }, { "epoch": 0.006975247235063491, "grad_norm": 1.40625, "learning_rate": 0.00019999670825852385, "loss": 6.1563, "step": 149 }, { "epoch": 0.007022060974896132, "grad_norm": 1.40625, "learning_rate": 0.00019999666072523052, "loss": 6.1762, "step": 150 }, { "epoch": 0.007068874714728773, "grad_norm": 1.71875, "learning_rate": 0.00019999661285120436, "loss": 6.1234, "step": 151 }, { "epoch": 0.007115688454561414, "grad_norm": 1.4609375, "learning_rate": 0.00019999656463644548, "loss": 5.96, "step": 152 }, { "epoch": 0.007162502194394055, "grad_norm": 1.796875, "learning_rate": 0.00019999651608095412, "loss": 5.917, "step": 153 }, { "epoch": 0.007209315934226695, "grad_norm": 1.609375, "learning_rate": 0.00019999646718473038, "loss": 6.1153, "step": 154 }, { "epoch": 0.007256129674059336, "grad_norm": 2.03125, "learning_rate": 0.0001999964179477745, "loss": 5.6466, "step": 155 }, { "epoch": 0.007302943413891977, "grad_norm": 1.3515625, "learning_rate": 0.0001999963683700866, "loss": 6.0026, "step": 156 }, { "epoch": 0.007349757153724618, "grad_norm": 1.2734375, "learning_rate": 0.00019999631845166684, "loss": 5.9503, "step": 157 }, { "epoch": 0.007396570893557259, "grad_norm": 1.2890625, "learning_rate": 0.00019999626819251542, "loss": 6.0213, "step": 158 }, { "epoch": 0.0074433846333899, "grad_norm": 1.484375, "learning_rate": 0.00019999621759263247, "loss": 6.1855, "step": 159 }, { "epoch": 0.0074901983732225405, "grad_norm": 1.6484375, "learning_rate": 0.00019999616665201822, "loss": 5.7041, "step": 160 }, { "epoch": 0.007537012113055182, "grad_norm": 1.6953125, "learning_rate": 0.0001999961153706728, "loss": 5.9845, "step": 161 }, { "epoch": 0.007583825852887823, "grad_norm": 1.6796875, "learning_rate": 0.00019999606374859638, "loss": 5.833, "step": 162 }, { "epoch": 0.007630639592720464, "grad_norm": 1.5703125, "learning_rate": 0.00019999601178578917, "loss": 6.0725, "step": 163 }, { "epoch": 0.007677453332553105, "grad_norm": 1.59375, "learning_rate": 0.0001999959594822513, "loss": 6.3727, "step": 164 }, { "epoch": 0.007724267072385745, "grad_norm": 1.265625, "learning_rate": 0.00019999590683798303, "loss": 6.027, "step": 165 }, { "epoch": 0.007771080812218386, "grad_norm": 1.703125, "learning_rate": 0.00019999585385298442, "loss": 6.2445, "step": 166 }, { "epoch": 0.007817894552051027, "grad_norm": 1.5078125, "learning_rate": 0.0001999958005272558, "loss": 5.622, "step": 167 }, { "epoch": 0.007864708291883668, "grad_norm": 1.546875, "learning_rate": 0.0001999957468607972, "loss": 6.0816, "step": 168 }, { "epoch": 0.007911522031716309, "grad_norm": 1.6328125, "learning_rate": 0.00019999569285360892, "loss": 6.0777, "step": 169 }, { "epoch": 0.00795833577154895, "grad_norm": 1.59375, "learning_rate": 0.00019999563850569104, "loss": 6.0072, "step": 170 }, { "epoch": 0.008005149511381591, "grad_norm": 1.4921875, "learning_rate": 0.00019999558381704387, "loss": 6.03, "step": 171 }, { "epoch": 0.008051963251214232, "grad_norm": 1.6640625, "learning_rate": 0.0001999955287876675, "loss": 6.0384, "step": 172 }, { "epoch": 0.008098776991046872, "grad_norm": 1.4921875, "learning_rate": 0.00019999547341756214, "loss": 5.9649, "step": 173 }, { "epoch": 0.008145590730879513, "grad_norm": 1.4375, "learning_rate": 0.00019999541770672797, "loss": 5.9329, "step": 174 }, { "epoch": 0.008192404470712154, "grad_norm": 1.3046875, "learning_rate": 0.00019999536165516522, "loss": 5.8939, "step": 175 }, { "epoch": 0.008239218210544795, "grad_norm": 1.6640625, "learning_rate": 0.00019999530526287405, "loss": 5.939, "step": 176 }, { "epoch": 0.008286031950377436, "grad_norm": 1.46875, "learning_rate": 0.00019999524852985464, "loss": 6.0945, "step": 177 }, { "epoch": 0.008332845690210077, "grad_norm": 1.5234375, "learning_rate": 0.00019999519145610722, "loss": 5.9449, "step": 178 }, { "epoch": 0.008379659430042718, "grad_norm": 1.4296875, "learning_rate": 0.00019999513404163195, "loss": 5.7187, "step": 179 }, { "epoch": 0.008426473169875359, "grad_norm": 1.71875, "learning_rate": 0.00019999507628642906, "loss": 5.9706, "step": 180 }, { "epoch": 0.008473286909708, "grad_norm": 1.671875, "learning_rate": 0.00019999501819049873, "loss": 6.1844, "step": 181 }, { "epoch": 0.008520100649540641, "grad_norm": 1.265625, "learning_rate": 0.00019999495975384112, "loss": 5.9518, "step": 182 }, { "epoch": 0.00856691438937328, "grad_norm": 2.109375, "learning_rate": 0.0001999949009764565, "loss": 5.9487, "step": 183 }, { "epoch": 0.008613728129205921, "grad_norm": 1.7109375, "learning_rate": 0.000199994841858345, "loss": 5.6932, "step": 184 }, { "epoch": 0.008660541869038562, "grad_norm": 1.3125, "learning_rate": 0.0001999947823995069, "loss": 5.9287, "step": 185 }, { "epoch": 0.008707355608871203, "grad_norm": 1.546875, "learning_rate": 0.00019999472259994234, "loss": 5.9712, "step": 186 }, { "epoch": 0.008754169348703844, "grad_norm": 1.515625, "learning_rate": 0.00019999466245965156, "loss": 6.1483, "step": 187 }, { "epoch": 0.008800983088536486, "grad_norm": 1.6953125, "learning_rate": 0.0001999946019786347, "loss": 5.8761, "step": 188 }, { "epoch": 0.008847796828369127, "grad_norm": 2.9375, "learning_rate": 0.00019999454115689205, "loss": 4.3387, "step": 189 }, { "epoch": 0.008894610568201768, "grad_norm": 1.6484375, "learning_rate": 0.0001999944799944238, "loss": 5.9423, "step": 190 }, { "epoch": 0.008941424308034409, "grad_norm": 1.4921875, "learning_rate": 0.00019999441849123008, "loss": 5.7921, "step": 191 }, { "epoch": 0.00898823804786705, "grad_norm": 1.5, "learning_rate": 0.00019999435664731118, "loss": 6.1127, "step": 192 }, { "epoch": 0.009035051787699689, "grad_norm": 1.5, "learning_rate": 0.0001999942944626673, "loss": 5.7331, "step": 193 }, { "epoch": 0.00908186552753233, "grad_norm": 1.7265625, "learning_rate": 0.00019999423193729865, "loss": 5.6567, "step": 194 }, { "epoch": 0.009128679267364971, "grad_norm": 1.421875, "learning_rate": 0.00019999416907120544, "loss": 5.733, "step": 195 }, { "epoch": 0.009175493007197612, "grad_norm": 1.703125, "learning_rate": 0.00019999410586438784, "loss": 5.9106, "step": 196 }, { "epoch": 0.009222306747030253, "grad_norm": 1.515625, "learning_rate": 0.00019999404231684612, "loss": 5.538, "step": 197 }, { "epoch": 0.009269120486862894, "grad_norm": 1.46875, "learning_rate": 0.00019999397842858051, "loss": 5.9058, "step": 198 }, { "epoch": 0.009315934226695535, "grad_norm": 1.4296875, "learning_rate": 0.0001999939141995912, "loss": 5.9867, "step": 199 }, { "epoch": 0.009362747966528176, "grad_norm": 1.5703125, "learning_rate": 0.00019999384962987837, "loss": 6.0503, "step": 200 }, { "epoch": 0.009409561706360817, "grad_norm": 1.8203125, "learning_rate": 0.00019999378471944227, "loss": 5.768, "step": 201 }, { "epoch": 0.009456375446193458, "grad_norm": 1.7265625, "learning_rate": 0.0001999937194682832, "loss": 5.9755, "step": 202 }, { "epoch": 0.0095031891860261, "grad_norm": 1.4921875, "learning_rate": 0.00019999365387640125, "loss": 5.728, "step": 203 }, { "epoch": 0.009550002925858739, "grad_norm": 1.6484375, "learning_rate": 0.0001999935879437967, "loss": 5.8359, "step": 204 }, { "epoch": 0.00959681666569138, "grad_norm": 1.4296875, "learning_rate": 0.0001999935216704698, "loss": 5.8741, "step": 205 }, { "epoch": 0.009643630405524021, "grad_norm": 1.546875, "learning_rate": 0.00019999345505642075, "loss": 5.6061, "step": 206 }, { "epoch": 0.009690444145356662, "grad_norm": 1.5703125, "learning_rate": 0.0001999933881016498, "loss": 5.8311, "step": 207 }, { "epoch": 0.009737257885189303, "grad_norm": 2.21875, "learning_rate": 0.00019999332080615712, "loss": 5.7866, "step": 208 }, { "epoch": 0.009784071625021944, "grad_norm": 1.7890625, "learning_rate": 0.00019999325316994303, "loss": 5.6044, "step": 209 }, { "epoch": 0.009830885364854585, "grad_norm": 1.2265625, "learning_rate": 0.00019999318519300764, "loss": 5.6633, "step": 210 }, { "epoch": 0.009877699104687226, "grad_norm": 1.2734375, "learning_rate": 0.0001999931168753513, "loss": 5.8487, "step": 211 }, { "epoch": 0.009924512844519867, "grad_norm": 1.3203125, "learning_rate": 0.0001999930482169742, "loss": 5.7113, "step": 212 }, { "epoch": 0.009971326584352508, "grad_norm": 2.796875, "learning_rate": 0.00019999297921787656, "loss": 6.1899, "step": 213 }, { "epoch": 0.010018140324185148, "grad_norm": 2.140625, "learning_rate": 0.00019999290987805863, "loss": 5.8806, "step": 214 }, { "epoch": 0.010064954064017789, "grad_norm": 2.359375, "learning_rate": 0.00019999284019752063, "loss": 5.7144, "step": 215 }, { "epoch": 0.01011176780385043, "grad_norm": 1.8828125, "learning_rate": 0.0001999927701762628, "loss": 5.7326, "step": 216 }, { "epoch": 0.01015858154368307, "grad_norm": 1.59375, "learning_rate": 0.00019999269981428537, "loss": 5.348, "step": 217 }, { "epoch": 0.010205395283515712, "grad_norm": 1.5859375, "learning_rate": 0.00019999262911158863, "loss": 5.7748, "step": 218 }, { "epoch": 0.010252209023348353, "grad_norm": 1.90625, "learning_rate": 0.00019999255806817277, "loss": 5.7285, "step": 219 }, { "epoch": 0.010299022763180994, "grad_norm": 1.375, "learning_rate": 0.00019999248668403806, "loss": 5.6715, "step": 220 }, { "epoch": 0.010345836503013635, "grad_norm": 7.25, "learning_rate": 0.0001999924149591847, "loss": 6.5481, "step": 221 }, { "epoch": 0.010392650242846276, "grad_norm": 19.625, "learning_rate": 0.000199992342893613, "loss": 6.7156, "step": 222 }, { "epoch": 0.010439463982678917, "grad_norm": 1.7734375, "learning_rate": 0.00019999227048732315, "loss": 5.7585, "step": 223 }, { "epoch": 0.010486277722511558, "grad_norm": 1.6328125, "learning_rate": 0.00019999219774031543, "loss": 5.6336, "step": 224 }, { "epoch": 0.010533091462344197, "grad_norm": 1.625, "learning_rate": 0.00019999212465259008, "loss": 5.5619, "step": 225 }, { "epoch": 0.010579905202176838, "grad_norm": 1.796875, "learning_rate": 0.00019999205122414734, "loss": 5.8248, "step": 226 }, { "epoch": 0.01062671894200948, "grad_norm": 1.3671875, "learning_rate": 0.00019999197745498745, "loss": 5.7192, "step": 227 }, { "epoch": 0.01067353268184212, "grad_norm": 1.8125, "learning_rate": 0.0001999919033451107, "loss": 5.7953, "step": 228 }, { "epoch": 0.010720346421674761, "grad_norm": 1.625, "learning_rate": 0.00019999182889451732, "loss": 6.0322, "step": 229 }, { "epoch": 0.010767160161507403, "grad_norm": 1.8203125, "learning_rate": 0.00019999175410320754, "loss": 5.8805, "step": 230 }, { "epoch": 0.010813973901340044, "grad_norm": 1.65625, "learning_rate": 0.00019999167897118164, "loss": 5.9994, "step": 231 }, { "epoch": 0.010860787641172685, "grad_norm": 1.4609375, "learning_rate": 0.0001999916034984399, "loss": 5.4595, "step": 232 }, { "epoch": 0.010907601381005326, "grad_norm": 1.3984375, "learning_rate": 0.00019999152768498252, "loss": 5.5491, "step": 233 }, { "epoch": 0.010954415120837967, "grad_norm": 1.4921875, "learning_rate": 0.00019999145153080982, "loss": 5.679, "step": 234 }, { "epoch": 0.011001228860670606, "grad_norm": 1.4140625, "learning_rate": 0.000199991375035922, "loss": 5.8745, "step": 235 }, { "epoch": 0.011048042600503247, "grad_norm": 1.5390625, "learning_rate": 0.00019999129820031937, "loss": 5.5245, "step": 236 }, { "epoch": 0.011094856340335888, "grad_norm": 1.203125, "learning_rate": 0.00019999122102400216, "loss": 5.6852, "step": 237 }, { "epoch": 0.01114167008016853, "grad_norm": 1.5390625, "learning_rate": 0.00019999114350697063, "loss": 6.0116, "step": 238 }, { "epoch": 0.01118848382000117, "grad_norm": 1.4140625, "learning_rate": 0.00019999106564922507, "loss": 5.5504, "step": 239 }, { "epoch": 0.011235297559833811, "grad_norm": 2.375, "learning_rate": 0.00019999098745076576, "loss": 5.5171, "step": 240 }, { "epoch": 0.011282111299666452, "grad_norm": 1.7109375, "learning_rate": 0.00019999090891159292, "loss": 5.9129, "step": 241 }, { "epoch": 0.011328925039499093, "grad_norm": 1.59375, "learning_rate": 0.00019999083003170682, "loss": 5.391, "step": 242 }, { "epoch": 0.011375738779331734, "grad_norm": 1.15625, "learning_rate": 0.00019999075081110777, "loss": 5.6115, "step": 243 }, { "epoch": 0.011422552519164375, "grad_norm": 1.3125, "learning_rate": 0.000199990671249796, "loss": 5.6588, "step": 244 }, { "epoch": 0.011469366258997016, "grad_norm": 1.265625, "learning_rate": 0.00019999059134777182, "loss": 5.7302, "step": 245 }, { "epoch": 0.011516179998829656, "grad_norm": 1.4609375, "learning_rate": 0.00019999051110503545, "loss": 5.6356, "step": 246 }, { "epoch": 0.011562993738662297, "grad_norm": 1.515625, "learning_rate": 0.00019999043052158723, "loss": 5.6781, "step": 247 }, { "epoch": 0.011609807478494938, "grad_norm": 1.4921875, "learning_rate": 0.00019999034959742736, "loss": 5.7263, "step": 248 }, { "epoch": 0.011656621218327579, "grad_norm": 1.25, "learning_rate": 0.00019999026833255617, "loss": 5.7457, "step": 249 }, { "epoch": 0.01170343495816022, "grad_norm": 2.265625, "learning_rate": 0.0001999901867269739, "loss": 5.5089, "step": 250 }, { "epoch": 0.011750248697992861, "grad_norm": 1.3828125, "learning_rate": 0.0001999901047806809, "loss": 5.4488, "step": 251 }, { "epoch": 0.011797062437825502, "grad_norm": 1.1796875, "learning_rate": 0.00019999002249367735, "loss": 5.8238, "step": 252 }, { "epoch": 0.011843876177658143, "grad_norm": 1.421875, "learning_rate": 0.00019998993986596362, "loss": 5.8415, "step": 253 }, { "epoch": 0.011890689917490784, "grad_norm": 1.5625, "learning_rate": 0.0001999898568975399, "loss": 5.4131, "step": 254 }, { "epoch": 0.011937503657323425, "grad_norm": 1.3984375, "learning_rate": 0.00019998977358840655, "loss": 5.8528, "step": 255 }, { "epoch": 0.011984317397156065, "grad_norm": 1.2734375, "learning_rate": 0.00019998968993856383, "loss": 5.665, "step": 256 }, { "epoch": 0.012031131136988706, "grad_norm": 1.484375, "learning_rate": 0.000199989605948012, "loss": 5.6992, "step": 257 }, { "epoch": 0.012077944876821347, "grad_norm": 1.1640625, "learning_rate": 0.0001999895216167514, "loss": 5.6625, "step": 258 }, { "epoch": 0.012124758616653988, "grad_norm": 1.4765625, "learning_rate": 0.00019998943694478225, "loss": 5.2503, "step": 259 }, { "epoch": 0.012171572356486629, "grad_norm": 1.4296875, "learning_rate": 0.0001999893519321049, "loss": 5.4384, "step": 260 }, { "epoch": 0.01221838609631927, "grad_norm": 1.59375, "learning_rate": 0.0001999892665787196, "loss": 5.7071, "step": 261 }, { "epoch": 0.01226519983615191, "grad_norm": 1.6328125, "learning_rate": 0.00019998918088462667, "loss": 5.4274, "step": 262 }, { "epoch": 0.012312013575984552, "grad_norm": 1.640625, "learning_rate": 0.00019998909484982637, "loss": 5.8272, "step": 263 }, { "epoch": 0.012358827315817193, "grad_norm": 1.6171875, "learning_rate": 0.000199989008474319, "loss": 5.5326, "step": 264 }, { "epoch": 0.012405641055649834, "grad_norm": 1.640625, "learning_rate": 0.00019998892175810489, "loss": 5.6172, "step": 265 }, { "epoch": 0.012452454795482473, "grad_norm": 1.546875, "learning_rate": 0.00019998883470118427, "loss": 5.7846, "step": 266 }, { "epoch": 0.012499268535315114, "grad_norm": 1.8359375, "learning_rate": 0.0001999887473035575, "loss": 5.6456, "step": 267 }, { "epoch": 0.012546082275147755, "grad_norm": 1.390625, "learning_rate": 0.00019998865956522485, "loss": 5.344, "step": 268 }, { "epoch": 0.012592896014980396, "grad_norm": 1.2890625, "learning_rate": 0.00019998857148618664, "loss": 5.6041, "step": 269 }, { "epoch": 0.012639709754813037, "grad_norm": 1.296875, "learning_rate": 0.00019998848306644314, "loss": 5.5013, "step": 270 }, { "epoch": 0.012686523494645678, "grad_norm": 1.96875, "learning_rate": 0.00019998839430599466, "loss": 5.8627, "step": 271 }, { "epoch": 0.01273333723447832, "grad_norm": 1.34375, "learning_rate": 0.0001999883052048415, "loss": 5.1601, "step": 272 }, { "epoch": 0.01278015097431096, "grad_norm": 1.546875, "learning_rate": 0.00019998821576298396, "loss": 5.7255, "step": 273 }, { "epoch": 0.012826964714143602, "grad_norm": 1.40625, "learning_rate": 0.0001999881259804224, "loss": 5.7827, "step": 274 }, { "epoch": 0.012873778453976243, "grad_norm": 1.484375, "learning_rate": 0.00019998803585715704, "loss": 5.5887, "step": 275 }, { "epoch": 0.012920592193808884, "grad_norm": 1.9140625, "learning_rate": 0.00019998794539318824, "loss": 5.6204, "step": 276 }, { "epoch": 0.012967405933641523, "grad_norm": 1.421875, "learning_rate": 0.00019998785458851627, "loss": 5.7183, "step": 277 }, { "epoch": 0.013014219673474164, "grad_norm": 2.328125, "learning_rate": 0.0001999877634431415, "loss": 5.48, "step": 278 }, { "epoch": 0.013061033413306805, "grad_norm": 1.4609375, "learning_rate": 0.00019998767195706418, "loss": 5.6046, "step": 279 }, { "epoch": 0.013107847153139446, "grad_norm": 1.265625, "learning_rate": 0.00019998758013028468, "loss": 5.445, "step": 280 }, { "epoch": 0.013154660892972087, "grad_norm": 22.625, "learning_rate": 0.00019998748796280324, "loss": 7.4453, "step": 281 }, { "epoch": 0.013201474632804728, "grad_norm": 1.8984375, "learning_rate": 0.00019998739545462027, "loss": 5.0412, "step": 282 }, { "epoch": 0.01324828837263737, "grad_norm": 1.328125, "learning_rate": 0.00019998730260573598, "loss": 5.196, "step": 283 }, { "epoch": 0.01329510211247001, "grad_norm": 1.828125, "learning_rate": 0.00019998720941615076, "loss": 5.8159, "step": 284 }, { "epoch": 0.013341915852302651, "grad_norm": 2.4375, "learning_rate": 0.00019998711588586486, "loss": 5.4469, "step": 285 }, { "epoch": 0.013388729592135292, "grad_norm": 3.25, "learning_rate": 0.00019998702201487868, "loss": 5.4213, "step": 286 }, { "epoch": 0.013435543331967932, "grad_norm": 1.84375, "learning_rate": 0.00019998692780319252, "loss": 5.6564, "step": 287 }, { "epoch": 0.013482357071800573, "grad_norm": 1.7265625, "learning_rate": 0.00019998683325080668, "loss": 5.6905, "step": 288 }, { "epoch": 0.013529170811633214, "grad_norm": 1.71875, "learning_rate": 0.00019998673835772146, "loss": 5.4385, "step": 289 }, { "epoch": 0.013575984551465855, "grad_norm": 1.4609375, "learning_rate": 0.0001999866431239372, "loss": 5.3975, "step": 290 }, { "epoch": 0.013622798291298496, "grad_norm": 1.5390625, "learning_rate": 0.00019998654754945424, "loss": 5.3125, "step": 291 }, { "epoch": 0.013669612031131137, "grad_norm": 1.96875, "learning_rate": 0.00019998645163427293, "loss": 5.8996, "step": 292 }, { "epoch": 0.013716425770963778, "grad_norm": 1.328125, "learning_rate": 0.00019998635537839352, "loss": 5.6022, "step": 293 }, { "epoch": 0.013763239510796419, "grad_norm": 1.3671875, "learning_rate": 0.00019998625878181643, "loss": 7.1401, "step": 294 }, { "epoch": 0.01381005325062906, "grad_norm": 2.0625, "learning_rate": 0.0001999861618445419, "loss": 5.1922, "step": 295 }, { "epoch": 0.013856866990461701, "grad_norm": 1.171875, "learning_rate": 0.00019998606456657033, "loss": 4.1116, "step": 296 }, { "epoch": 0.013903680730294342, "grad_norm": 1.8671875, "learning_rate": 0.00019998596694790203, "loss": 5.5061, "step": 297 }, { "epoch": 0.013950494470126982, "grad_norm": 1.3984375, "learning_rate": 0.00019998586898853731, "loss": 5.5977, "step": 298 }, { "epoch": 0.013997308209959623, "grad_norm": 1.40625, "learning_rate": 0.00019998577068847651, "loss": 5.5054, "step": 299 }, { "epoch": 0.014044121949792264, "grad_norm": 1.375, "learning_rate": 0.00019998567204772, "loss": 5.6511, "step": 300 }, { "epoch": 0.014090935689624905, "grad_norm": 1.3671875, "learning_rate": 0.0001999855730662681, "loss": 5.3209, "step": 301 }, { "epoch": 0.014137749429457546, "grad_norm": 1.4765625, "learning_rate": 0.0001999854737441211, "loss": 5.3897, "step": 302 }, { "epoch": 0.014184563169290187, "grad_norm": 1.359375, "learning_rate": 0.0001999853740812794, "loss": 5.5082, "step": 303 }, { "epoch": 0.014231376909122828, "grad_norm": 1.359375, "learning_rate": 0.0001999852740777433, "loss": 5.5015, "step": 304 }, { "epoch": 0.014278190648955469, "grad_norm": 1.59375, "learning_rate": 0.0001999851737335132, "loss": 5.455, "step": 305 }, { "epoch": 0.01432500438878811, "grad_norm": 1.6484375, "learning_rate": 0.00019998507304858936, "loss": 5.6728, "step": 306 }, { "epoch": 0.014371818128620751, "grad_norm": 2.015625, "learning_rate": 0.00019998497202297218, "loss": 5.9225, "step": 307 }, { "epoch": 0.01441863186845339, "grad_norm": 1.421875, "learning_rate": 0.00019998487065666198, "loss": 5.3483, "step": 308 }, { "epoch": 0.014465445608286031, "grad_norm": 1.4765625, "learning_rate": 0.00019998476894965913, "loss": 5.1686, "step": 309 }, { "epoch": 0.014512259348118672, "grad_norm": 1.3046875, "learning_rate": 0.00019998466690196394, "loss": 5.7067, "step": 310 }, { "epoch": 0.014559073087951313, "grad_norm": 1.6171875, "learning_rate": 0.00019998456451357675, "loss": 5.8564, "step": 311 }, { "epoch": 0.014605886827783954, "grad_norm": 1.9609375, "learning_rate": 0.000199984461784498, "loss": 5.8359, "step": 312 }, { "epoch": 0.014652700567616595, "grad_norm": 1.7421875, "learning_rate": 0.00019998435871472793, "loss": 5.4853, "step": 313 }, { "epoch": 0.014699514307449237, "grad_norm": 1.3984375, "learning_rate": 0.00019998425530426696, "loss": 5.2414, "step": 314 }, { "epoch": 0.014746328047281878, "grad_norm": 1.6640625, "learning_rate": 0.00019998415155311544, "loss": 5.412, "step": 315 }, { "epoch": 0.014793141787114519, "grad_norm": 1.546875, "learning_rate": 0.00019998404746127366, "loss": 5.3779, "step": 316 }, { "epoch": 0.01483995552694716, "grad_norm": 1.625, "learning_rate": 0.00019998394302874202, "loss": 5.3131, "step": 317 }, { "epoch": 0.0148867692667798, "grad_norm": 1.7578125, "learning_rate": 0.0001999838382555209, "loss": 5.2973, "step": 318 }, { "epoch": 0.01493358300661244, "grad_norm": 1.8046875, "learning_rate": 0.0001999837331416106, "loss": 5.7516, "step": 319 }, { "epoch": 0.014980396746445081, "grad_norm": 1.765625, "learning_rate": 0.00019998362768701154, "loss": 5.4642, "step": 320 }, { "epoch": 0.015027210486277722, "grad_norm": 1.796875, "learning_rate": 0.00019998352189172402, "loss": 5.4079, "step": 321 }, { "epoch": 0.015074024226110363, "grad_norm": 1.7109375, "learning_rate": 0.00019998341575574845, "loss": 5.4297, "step": 322 }, { "epoch": 0.015120837965943004, "grad_norm": 1.7578125, "learning_rate": 0.00019998330927908516, "loss": 5.4244, "step": 323 }, { "epoch": 0.015167651705775645, "grad_norm": 1.484375, "learning_rate": 0.0001999832024617345, "loss": 5.4015, "step": 324 }, { "epoch": 0.015214465445608286, "grad_norm": 1.828125, "learning_rate": 0.00019998309530369689, "loss": 5.3066, "step": 325 }, { "epoch": 0.015261279185440927, "grad_norm": 1.4765625, "learning_rate": 0.00019998298780497266, "loss": 5.3409, "step": 326 }, { "epoch": 0.015308092925273568, "grad_norm": 1.609375, "learning_rate": 0.00019998287996556217, "loss": 5.5476, "step": 327 }, { "epoch": 0.01535490666510621, "grad_norm": 1.3046875, "learning_rate": 0.00019998277178546577, "loss": 5.4817, "step": 328 }, { "epoch": 0.015401720404938849, "grad_norm": 1.546875, "learning_rate": 0.00019998266326468392, "loss": 5.2955, "step": 329 }, { "epoch": 0.01544853414477149, "grad_norm": 1.6953125, "learning_rate": 0.00019998255440321688, "loss": 5.5896, "step": 330 }, { "epoch": 0.01549534788460413, "grad_norm": 1.578125, "learning_rate": 0.00019998244520106506, "loss": 5.1052, "step": 331 }, { "epoch": 0.015542161624436772, "grad_norm": 1.78125, "learning_rate": 0.00019998233565822886, "loss": 5.7136, "step": 332 }, { "epoch": 0.015588975364269413, "grad_norm": 1.5625, "learning_rate": 0.00019998222577470863, "loss": 5.3505, "step": 333 }, { "epoch": 0.015635789104102054, "grad_norm": 1.3671875, "learning_rate": 0.00019998211555050474, "loss": 5.4243, "step": 334 }, { "epoch": 0.015682602843934693, "grad_norm": 1.609375, "learning_rate": 0.00019998200498561755, "loss": 5.2985, "step": 335 }, { "epoch": 0.015729416583767336, "grad_norm": 1.3828125, "learning_rate": 0.00019998189408004747, "loss": 5.3413, "step": 336 }, { "epoch": 0.015776230323599975, "grad_norm": 1.6875, "learning_rate": 0.00019998178283379487, "loss": 5.5021, "step": 337 }, { "epoch": 0.015823044063432618, "grad_norm": 1.984375, "learning_rate": 0.0001999816712468601, "loss": 5.26, "step": 338 }, { "epoch": 0.015869857803265257, "grad_norm": 1.7578125, "learning_rate": 0.0001999815593192436, "loss": 5.5412, "step": 339 }, { "epoch": 0.0159166715430979, "grad_norm": 1.265625, "learning_rate": 0.0001999814470509457, "loss": 5.3342, "step": 340 }, { "epoch": 0.01596348528293054, "grad_norm": 1.453125, "learning_rate": 0.0001999813344419668, "loss": 5.2879, "step": 341 }, { "epoch": 0.016010299022763182, "grad_norm": 1.375, "learning_rate": 0.00019998122149230728, "loss": 5.3041, "step": 342 }, { "epoch": 0.01605711276259582, "grad_norm": 1.5703125, "learning_rate": 0.00019998110820196755, "loss": 5.2675, "step": 343 }, { "epoch": 0.016103926502428464, "grad_norm": 1.265625, "learning_rate": 0.00019998099457094792, "loss": 4.9953, "step": 344 }, { "epoch": 0.016150740242261104, "grad_norm": 1.359375, "learning_rate": 0.00019998088059924887, "loss": 5.2627, "step": 345 }, { "epoch": 0.016197553982093743, "grad_norm": 1.4609375, "learning_rate": 0.00019998076628687076, "loss": 5.2395, "step": 346 }, { "epoch": 0.016244367721926386, "grad_norm": 1.9765625, "learning_rate": 0.00019998065163381393, "loss": 5.1161, "step": 347 }, { "epoch": 0.016291181461759025, "grad_norm": 1.7421875, "learning_rate": 0.00019998053664007883, "loss": 5.6272, "step": 348 }, { "epoch": 0.016337995201591668, "grad_norm": 1.71875, "learning_rate": 0.00019998042130566583, "loss": 5.4451, "step": 349 }, { "epoch": 0.016384808941424307, "grad_norm": 1.578125, "learning_rate": 0.0001999803056305753, "loss": 5.4485, "step": 350 }, { "epoch": 0.01643162268125695, "grad_norm": 1.5078125, "learning_rate": 0.00019998018961480768, "loss": 5.3714, "step": 351 }, { "epoch": 0.01647843642108959, "grad_norm": 1.515625, "learning_rate": 0.00019998007325836334, "loss": 5.2577, "step": 352 }, { "epoch": 0.016525250160922232, "grad_norm": 1.734375, "learning_rate": 0.0001999799565612427, "loss": 5.3031, "step": 353 }, { "epoch": 0.01657206390075487, "grad_norm": 1.296875, "learning_rate": 0.0001999798395234461, "loss": 5.3284, "step": 354 }, { "epoch": 0.01661887764058751, "grad_norm": 1.65625, "learning_rate": 0.000199979722144974, "loss": 5.3929, "step": 355 }, { "epoch": 0.016665691380420154, "grad_norm": 1.6171875, "learning_rate": 0.00019997960442582678, "loss": 5.3935, "step": 356 }, { "epoch": 0.016712505120252793, "grad_norm": 1.7734375, "learning_rate": 0.00019997948636600482, "loss": 5.2139, "step": 357 }, { "epoch": 0.016759318860085436, "grad_norm": 1.5859375, "learning_rate": 0.00019997936796550854, "loss": 5.482, "step": 358 }, { "epoch": 0.016806132599918075, "grad_norm": 1.421875, "learning_rate": 0.00019997924922433838, "loss": 5.0737, "step": 359 }, { "epoch": 0.016852946339750718, "grad_norm": 1.484375, "learning_rate": 0.00019997913014249466, "loss": 4.975, "step": 360 }, { "epoch": 0.016899760079583357, "grad_norm": 1.7265625, "learning_rate": 0.00019997901071997783, "loss": 5.5198, "step": 361 }, { "epoch": 0.016946573819416, "grad_norm": 1.46875, "learning_rate": 0.00019997889095678834, "loss": 5.3114, "step": 362 }, { "epoch": 0.01699338755924864, "grad_norm": 1.515625, "learning_rate": 0.00019997877085292654, "loss": 5.3758, "step": 363 }, { "epoch": 0.017040201299081282, "grad_norm": 1.375, "learning_rate": 0.00019997865040839285, "loss": 5.1013, "step": 364 }, { "epoch": 0.01708701503891392, "grad_norm": 1.28125, "learning_rate": 0.00019997852962318772, "loss": 5.1975, "step": 365 }, { "epoch": 0.01713382877874656, "grad_norm": 1.1953125, "learning_rate": 0.00019997840849731149, "loss": 5.4591, "step": 366 }, { "epoch": 0.017180642518579203, "grad_norm": 6.3125, "learning_rate": 0.00019997828703076462, "loss": 5.4764, "step": 367 }, { "epoch": 0.017227456258411843, "grad_norm": 1.7265625, "learning_rate": 0.0001999781652235475, "loss": 5.6935, "step": 368 }, { "epoch": 0.017274269998244485, "grad_norm": 1.546875, "learning_rate": 0.0001999780430756606, "loss": 5.1146, "step": 369 }, { "epoch": 0.017321083738077125, "grad_norm": 1.4296875, "learning_rate": 0.00019997792058710427, "loss": 5.3632, "step": 370 }, { "epoch": 0.017367897477909768, "grad_norm": 1.59375, "learning_rate": 0.00019997779775787896, "loss": 5.1254, "step": 371 }, { "epoch": 0.017414711217742407, "grad_norm": 1.6328125, "learning_rate": 0.0001999776745879851, "loss": 5.4194, "step": 372 }, { "epoch": 0.01746152495757505, "grad_norm": 1.8203125, "learning_rate": 0.00019997755107742306, "loss": 5.7491, "step": 373 }, { "epoch": 0.01750833869740769, "grad_norm": 1.3359375, "learning_rate": 0.00019997742722619333, "loss": 5.4095, "step": 374 }, { "epoch": 0.01755515243724033, "grad_norm": 1.7265625, "learning_rate": 0.00019997730303429628, "loss": 4.9951, "step": 375 }, { "epoch": 0.01760196617707297, "grad_norm": 1.65625, "learning_rate": 0.00019997717850173233, "loss": 5.6246, "step": 376 }, { "epoch": 0.01764877991690561, "grad_norm": 1.6875, "learning_rate": 0.00019997705362850195, "loss": 5.3259, "step": 377 }, { "epoch": 0.017695593656738253, "grad_norm": 1.3203125, "learning_rate": 0.00019997692841460554, "loss": 4.8992, "step": 378 }, { "epoch": 0.017742407396570892, "grad_norm": 1.3828125, "learning_rate": 0.0001999768028600435, "loss": 5.341, "step": 379 }, { "epoch": 0.017789221136403535, "grad_norm": 1.484375, "learning_rate": 0.0001999766769648163, "loss": 5.2761, "step": 380 }, { "epoch": 0.017836034876236175, "grad_norm": 1.4765625, "learning_rate": 0.00019997655072892436, "loss": 5.5082, "step": 381 }, { "epoch": 0.017882848616068817, "grad_norm": 1.828125, "learning_rate": 0.00019997642415236808, "loss": 5.1171, "step": 382 }, { "epoch": 0.017929662355901457, "grad_norm": 1.609375, "learning_rate": 0.00019997629723514794, "loss": 5.1707, "step": 383 }, { "epoch": 0.0179764760957341, "grad_norm": 1.8046875, "learning_rate": 0.00019997616997726434, "loss": 5.3273, "step": 384 }, { "epoch": 0.01802328983556674, "grad_norm": 1.984375, "learning_rate": 0.0001999760423787177, "loss": 5.1976, "step": 385 }, { "epoch": 0.018070103575399378, "grad_norm": 2.265625, "learning_rate": 0.00019997591443950849, "loss": 5.5286, "step": 386 }, { "epoch": 0.01811691731523202, "grad_norm": 1.6328125, "learning_rate": 0.00019997578615963711, "loss": 5.2601, "step": 387 }, { "epoch": 0.01816373105506466, "grad_norm": 1.7578125, "learning_rate": 0.00019997565753910403, "loss": 4.881, "step": 388 }, { "epoch": 0.018210544794897303, "grad_norm": 1.578125, "learning_rate": 0.00019997552857790968, "loss": 5.5452, "step": 389 }, { "epoch": 0.018257358534729942, "grad_norm": 2.953125, "learning_rate": 0.00019997539927605452, "loss": 4.6265, "step": 390 }, { "epoch": 0.018304172274562585, "grad_norm": 1.765625, "learning_rate": 0.00019997526963353893, "loss": 4.995, "step": 391 }, { "epoch": 0.018350986014395224, "grad_norm": 1.6328125, "learning_rate": 0.0001999751396503634, "loss": 4.8525, "step": 392 }, { "epoch": 0.018397799754227867, "grad_norm": 1.578125, "learning_rate": 0.00019997500932652837, "loss": 4.9451, "step": 393 }, { "epoch": 0.018444613494060506, "grad_norm": 1.5859375, "learning_rate": 0.00019997487866203425, "loss": 5.2397, "step": 394 }, { "epoch": 0.01849142723389315, "grad_norm": 1.5703125, "learning_rate": 0.00019997474765688154, "loss": 4.9642, "step": 395 }, { "epoch": 0.01853824097372579, "grad_norm": 1.078125, "learning_rate": 0.00019997461631107065, "loss": 5.1864, "step": 396 }, { "epoch": 0.018585054713558428, "grad_norm": 1.4921875, "learning_rate": 0.000199974484624602, "loss": 5.1419, "step": 397 }, { "epoch": 0.01863186845339107, "grad_norm": 2.109375, "learning_rate": 0.00019997435259747607, "loss": 5.1942, "step": 398 }, { "epoch": 0.01867868219322371, "grad_norm": 1.9296875, "learning_rate": 0.00019997422022969338, "loss": 5.3292, "step": 399 }, { "epoch": 0.018725495933056353, "grad_norm": 1.6171875, "learning_rate": 0.00019997408752125425, "loss": 5.114, "step": 400 }, { "epoch": 0.018772309672888992, "grad_norm": 1.1796875, "learning_rate": 0.00019997395447215923, "loss": 5.0294, "step": 401 }, { "epoch": 0.018819123412721635, "grad_norm": 1.53125, "learning_rate": 0.0001999738210824087, "loss": 5.1231, "step": 402 }, { "epoch": 0.018865937152554274, "grad_norm": 1.546875, "learning_rate": 0.0001999736873520032, "loss": 5.335, "step": 403 }, { "epoch": 0.018912750892386917, "grad_norm": 1.1953125, "learning_rate": 0.0001999735532809431, "loss": 5.9695, "step": 404 }, { "epoch": 0.018959564632219556, "grad_norm": 3.953125, "learning_rate": 0.00019997341886922893, "loss": 5.2445, "step": 405 }, { "epoch": 0.0190063783720522, "grad_norm": 1.2109375, "learning_rate": 0.0001999732841168611, "loss": 8.2573, "step": 406 }, { "epoch": 0.019053192111884838, "grad_norm": 1.734375, "learning_rate": 0.00019997314902384008, "loss": 4.8997, "step": 407 }, { "epoch": 0.019100005851717478, "grad_norm": 1.53125, "learning_rate": 0.00019997301359016634, "loss": 5.2774, "step": 408 }, { "epoch": 0.01914681959155012, "grad_norm": 1.7578125, "learning_rate": 0.00019997287781584034, "loss": 5.1038, "step": 409 }, { "epoch": 0.01919363333138276, "grad_norm": 1.6015625, "learning_rate": 0.00019997274170086251, "loss": 5.2223, "step": 410 }, { "epoch": 0.019240447071215402, "grad_norm": 2.171875, "learning_rate": 0.00019997260524523336, "loss": 5.3441, "step": 411 }, { "epoch": 0.019287260811048042, "grad_norm": 1.75, "learning_rate": 0.00019997246844895332, "loss": 5.3061, "step": 412 }, { "epoch": 0.019334074550880685, "grad_norm": 2.203125, "learning_rate": 0.00019997233131202288, "loss": 4.9575, "step": 413 }, { "epoch": 0.019380888290713324, "grad_norm": 1.34375, "learning_rate": 0.00019997219383444256, "loss": 5.1263, "step": 414 }, { "epoch": 0.019427702030545967, "grad_norm": 1.3515625, "learning_rate": 0.0001999720560162127, "loss": 4.8964, "step": 415 }, { "epoch": 0.019474515770378606, "grad_norm": 1.46875, "learning_rate": 0.00019997191785733383, "loss": 5.2671, "step": 416 }, { "epoch": 0.01952132951021125, "grad_norm": 1.8515625, "learning_rate": 0.00019997177935780647, "loss": 4.67, "step": 417 }, { "epoch": 0.019568143250043888, "grad_norm": 1.5, "learning_rate": 0.00019997164051763103, "loss": 5.1398, "step": 418 }, { "epoch": 0.019614956989876527, "grad_norm": 1.7109375, "learning_rate": 0.000199971501336808, "loss": 5.1441, "step": 419 }, { "epoch": 0.01966177072970917, "grad_norm": 1.2578125, "learning_rate": 0.00019997136181533788, "loss": 7.6637, "step": 420 }, { "epoch": 0.01970858446954181, "grad_norm": 1.7265625, "learning_rate": 0.0001999712219532211, "loss": 4.9191, "step": 421 }, { "epoch": 0.019755398209374452, "grad_norm": 1.546875, "learning_rate": 0.00019997108175045817, "loss": 5.2672, "step": 422 }, { "epoch": 0.01980221194920709, "grad_norm": 1.515625, "learning_rate": 0.00019997094120704953, "loss": 5.2711, "step": 423 }, { "epoch": 0.019849025689039734, "grad_norm": 1.703125, "learning_rate": 0.00019997080032299573, "loss": 5.0509, "step": 424 }, { "epoch": 0.019895839428872374, "grad_norm": 1.7265625, "learning_rate": 0.00019997065909829718, "loss": 5.5497, "step": 425 }, { "epoch": 0.019942653168705016, "grad_norm": 1.4375, "learning_rate": 0.0001999705175329544, "loss": 5.2375, "step": 426 }, { "epoch": 0.019989466908537656, "grad_norm": 1.9609375, "learning_rate": 0.00019997037562696784, "loss": 5.1471, "step": 427 }, { "epoch": 0.020036280648370295, "grad_norm": 1.203125, "learning_rate": 0.00019997023338033803, "loss": 5.2589, "step": 428 }, { "epoch": 0.020083094388202938, "grad_norm": 2.5625, "learning_rate": 0.00019997009079306542, "loss": 5.2862, "step": 429 }, { "epoch": 0.020129908128035577, "grad_norm": 1.8515625, "learning_rate": 0.00019996994786515047, "loss": 5.1315, "step": 430 }, { "epoch": 0.02017672186786822, "grad_norm": 1.4609375, "learning_rate": 0.00019996980459659376, "loss": 5.2024, "step": 431 }, { "epoch": 0.02022353560770086, "grad_norm": 1.71875, "learning_rate": 0.00019996966098739569, "loss": 4.9905, "step": 432 }, { "epoch": 0.020270349347533502, "grad_norm": 1.53125, "learning_rate": 0.00019996951703755677, "loss": 5.0235, "step": 433 }, { "epoch": 0.02031716308736614, "grad_norm": 1.3359375, "learning_rate": 0.00019996937274707752, "loss": 5.3094, "step": 434 }, { "epoch": 0.020363976827198784, "grad_norm": 1.453125, "learning_rate": 0.0001999692281159584, "loss": 5.3838, "step": 435 }, { "epoch": 0.020410790567031423, "grad_norm": 2.0, "learning_rate": 0.00019996908314419992, "loss": 4.5997, "step": 436 }, { "epoch": 0.020457604306864066, "grad_norm": 1.5625, "learning_rate": 0.00019996893783180255, "loss": 5.1547, "step": 437 }, { "epoch": 0.020504418046696705, "grad_norm": 1.78125, "learning_rate": 0.00019996879217876683, "loss": 5.3029, "step": 438 }, { "epoch": 0.020551231786529345, "grad_norm": 1.65625, "learning_rate": 0.0001999686461850932, "loss": 5.2961, "step": 439 }, { "epoch": 0.020598045526361988, "grad_norm": 1.421875, "learning_rate": 0.0001999684998507822, "loss": 4.6656, "step": 440 }, { "epoch": 0.020644859266194627, "grad_norm": 1.765625, "learning_rate": 0.00019996835317583433, "loss": 5.2848, "step": 441 }, { "epoch": 0.02069167300602727, "grad_norm": 1.2421875, "learning_rate": 0.00019996820616025005, "loss": 5.0927, "step": 442 }, { "epoch": 0.02073848674585991, "grad_norm": 1.171875, "learning_rate": 0.0001999680588040299, "loss": 3.8776, "step": 443 }, { "epoch": 0.020785300485692552, "grad_norm": 1.515625, "learning_rate": 0.00019996791110717433, "loss": 5.3751, "step": 444 }, { "epoch": 0.02083211422552519, "grad_norm": 1.7734375, "learning_rate": 0.00019996776306968394, "loss": 5.0779, "step": 445 }, { "epoch": 0.020878927965357834, "grad_norm": 1.7890625, "learning_rate": 0.00019996761469155916, "loss": 5.0293, "step": 446 }, { "epoch": 0.020925741705190473, "grad_norm": 1.59375, "learning_rate": 0.0001999674659728005, "loss": 5.0129, "step": 447 }, { "epoch": 0.020972555445023116, "grad_norm": 1.40625, "learning_rate": 0.00019996731691340847, "loss": 4.9326, "step": 448 }, { "epoch": 0.021019369184855755, "grad_norm": 1.6015625, "learning_rate": 0.00019996716751338362, "loss": 5.3143, "step": 449 }, { "epoch": 0.021066182924688395, "grad_norm": 1.53125, "learning_rate": 0.00019996701777272638, "loss": 5.1283, "step": 450 }, { "epoch": 0.021112996664521037, "grad_norm": 1.625, "learning_rate": 0.00019996686769143733, "loss": 4.9726, "step": 451 }, { "epoch": 0.021159810404353677, "grad_norm": 1.65625, "learning_rate": 0.00019996671726951694, "loss": 5.1439, "step": 452 }, { "epoch": 0.02120662414418632, "grad_norm": 1.6796875, "learning_rate": 0.00019996656650696578, "loss": 5.2888, "step": 453 }, { "epoch": 0.02125343788401896, "grad_norm": 1.4140625, "learning_rate": 0.0001999664154037843, "loss": 5.116, "step": 454 }, { "epoch": 0.0213002516238516, "grad_norm": 1.5, "learning_rate": 0.000199966263959973, "loss": 4.9857, "step": 455 }, { "epoch": 0.02134706536368424, "grad_norm": 1.7890625, "learning_rate": 0.0001999661121755325, "loss": 5.5331, "step": 456 }, { "epoch": 0.021393879103516884, "grad_norm": 1.1953125, "learning_rate": 0.00019996596005046323, "loss": 4.9981, "step": 457 }, { "epoch": 0.021440692843349523, "grad_norm": 1.625, "learning_rate": 0.0001999658075847657, "loss": 5.3661, "step": 458 }, { "epoch": 0.021487506583182162, "grad_norm": 1.5625, "learning_rate": 0.00019996565477844048, "loss": 5.1321, "step": 459 }, { "epoch": 0.021534320323014805, "grad_norm": 1.65625, "learning_rate": 0.00019996550163148806, "loss": 5.1056, "step": 460 }, { "epoch": 0.021581134062847444, "grad_norm": 1.5234375, "learning_rate": 0.000199965348143909, "loss": 5.0678, "step": 461 }, { "epoch": 0.021627947802680087, "grad_norm": 1.484375, "learning_rate": 0.00019996519431570375, "loss": 4.7282, "step": 462 }, { "epoch": 0.021674761542512726, "grad_norm": 1.765625, "learning_rate": 0.0001999650401468729, "loss": 5.0392, "step": 463 }, { "epoch": 0.02172157528234537, "grad_norm": 1.5859375, "learning_rate": 0.00019996488563741696, "loss": 5.3042, "step": 464 }, { "epoch": 0.02176838902217801, "grad_norm": 1.4296875, "learning_rate": 0.00019996473078733647, "loss": 5.3593, "step": 465 }, { "epoch": 0.02181520276201065, "grad_norm": 1.7890625, "learning_rate": 0.00019996457559663192, "loss": 5.4853, "step": 466 }, { "epoch": 0.02186201650184329, "grad_norm": 1.546875, "learning_rate": 0.00019996442006530382, "loss": 5.6614, "step": 467 }, { "epoch": 0.021908830241675933, "grad_norm": 1.4453125, "learning_rate": 0.00019996426419335278, "loss": 5.1283, "step": 468 }, { "epoch": 0.021955643981508573, "grad_norm": 1.59375, "learning_rate": 0.00019996410798077928, "loss": 5.0687, "step": 469 }, { "epoch": 0.022002457721341212, "grad_norm": 1.8515625, "learning_rate": 0.00019996395142758385, "loss": 5.4254, "step": 470 }, { "epoch": 0.022049271461173855, "grad_norm": 1.1015625, "learning_rate": 0.00019996379453376704, "loss": 7.8804, "step": 471 }, { "epoch": 0.022096085201006494, "grad_norm": 1.234375, "learning_rate": 0.00019996363729932938, "loss": 5.4523, "step": 472 }, { "epoch": 0.022142898940839137, "grad_norm": 1.4375, "learning_rate": 0.0001999634797242714, "loss": 5.0359, "step": 473 }, { "epoch": 0.022189712680671776, "grad_norm": 1.21875, "learning_rate": 0.00019996332180859363, "loss": 5.1355, "step": 474 }, { "epoch": 0.02223652642050442, "grad_norm": 1.390625, "learning_rate": 0.00019996316355229662, "loss": 5.0281, "step": 475 }, { "epoch": 0.02228334016033706, "grad_norm": 1.484375, "learning_rate": 0.00019996300495538094, "loss": 5.3582, "step": 476 }, { "epoch": 0.0223301539001697, "grad_norm": 1.5546875, "learning_rate": 0.0001999628460178471, "loss": 4.9056, "step": 477 }, { "epoch": 0.02237696764000234, "grad_norm": 1.328125, "learning_rate": 0.0001999626867396956, "loss": 5.1014, "step": 478 }, { "epoch": 0.022423781379834983, "grad_norm": 1.6796875, "learning_rate": 0.00019996252712092705, "loss": 5.0013, "step": 479 }, { "epoch": 0.022470595119667622, "grad_norm": 1.359375, "learning_rate": 0.00019996236716154195, "loss": 5.0501, "step": 480 }, { "epoch": 0.022517408859500262, "grad_norm": 1.6796875, "learning_rate": 0.00019996220686154087, "loss": 5.3184, "step": 481 }, { "epoch": 0.022564222599332905, "grad_norm": 1.484375, "learning_rate": 0.00019996204622092437, "loss": 5.1426, "step": 482 }, { "epoch": 0.022611036339165544, "grad_norm": 1.5703125, "learning_rate": 0.00019996188523969296, "loss": 5.3092, "step": 483 }, { "epoch": 0.022657850078998187, "grad_norm": 1.59375, "learning_rate": 0.00019996172391784718, "loss": 5.051, "step": 484 }, { "epoch": 0.022704663818830826, "grad_norm": 2.046875, "learning_rate": 0.0001999615622553876, "loss": 4.9005, "step": 485 }, { "epoch": 0.02275147755866347, "grad_norm": 1.4375, "learning_rate": 0.00019996140025231482, "loss": 4.8971, "step": 486 }, { "epoch": 0.022798291298496108, "grad_norm": 1.421875, "learning_rate": 0.00019996123790862934, "loss": 4.9744, "step": 487 }, { "epoch": 0.02284510503832875, "grad_norm": 1.2109375, "learning_rate": 0.0001999610752243317, "loss": 4.7906, "step": 488 }, { "epoch": 0.02289191877816139, "grad_norm": 1.6875, "learning_rate": 0.00019996091219942245, "loss": 5.0042, "step": 489 }, { "epoch": 0.022938732517994033, "grad_norm": 1.8046875, "learning_rate": 0.00019996074883390222, "loss": 5.0915, "step": 490 }, { "epoch": 0.022985546257826672, "grad_norm": 1.5859375, "learning_rate": 0.0001999605851277715, "loss": 4.5874, "step": 491 }, { "epoch": 0.02303235999765931, "grad_norm": 1.328125, "learning_rate": 0.00019996042108103086, "loss": 4.7597, "step": 492 }, { "epoch": 0.023079173737491954, "grad_norm": 1.5703125, "learning_rate": 0.00019996025669368085, "loss": 5.5063, "step": 493 }, { "epoch": 0.023125987477324594, "grad_norm": 1.390625, "learning_rate": 0.00019996009196572207, "loss": 5.1019, "step": 494 }, { "epoch": 0.023172801217157236, "grad_norm": 1.6953125, "learning_rate": 0.00019995992689715505, "loss": 4.6908, "step": 495 }, { "epoch": 0.023219614956989876, "grad_norm": 1.5625, "learning_rate": 0.00019995976148798033, "loss": 5.0174, "step": 496 }, { "epoch": 0.02326642869682252, "grad_norm": 1.8359375, "learning_rate": 0.0001999595957381985, "loss": 5.5257, "step": 497 }, { "epoch": 0.023313242436655158, "grad_norm": 1.5390625, "learning_rate": 0.00019995942964781016, "loss": 5.3618, "step": 498 }, { "epoch": 0.0233600561764878, "grad_norm": 1.4453125, "learning_rate": 0.00019995926321681582, "loss": 4.9597, "step": 499 }, { "epoch": 0.02340686991632044, "grad_norm": 1.515625, "learning_rate": 0.00019995909644521609, "loss": 4.6327, "step": 500 }, { "epoch": 0.02345368365615308, "grad_norm": 1.3671875, "learning_rate": 0.00019995892933301148, "loss": 4.937, "step": 501 }, { "epoch": 0.023500497395985722, "grad_norm": 1.4921875, "learning_rate": 0.00019995876188020262, "loss": 5.0713, "step": 502 }, { "epoch": 0.02354731113581836, "grad_norm": 1.609375, "learning_rate": 0.00019995859408679007, "loss": 5.1407, "step": 503 }, { "epoch": 0.023594124875651004, "grad_norm": 1.5, "learning_rate": 0.00019995842595277435, "loss": 5.0447, "step": 504 }, { "epoch": 0.023640938615483643, "grad_norm": 1.4296875, "learning_rate": 0.00019995825747815612, "loss": 4.8239, "step": 505 }, { "epoch": 0.023687752355316286, "grad_norm": 1.5859375, "learning_rate": 0.00019995808866293585, "loss": 4.6566, "step": 506 }, { "epoch": 0.023734566095148926, "grad_norm": 1.171875, "learning_rate": 0.00019995791950711423, "loss": 5.0372, "step": 507 }, { "epoch": 0.02378137983498157, "grad_norm": 1.75, "learning_rate": 0.00019995775001069172, "loss": 4.9537, "step": 508 }, { "epoch": 0.023828193574814208, "grad_norm": 2.015625, "learning_rate": 0.000199957580173669, "loss": 5.3744, "step": 509 }, { "epoch": 0.02387500731464685, "grad_norm": 1.6640625, "learning_rate": 0.00019995740999604656, "loss": 5.3062, "step": 510 }, { "epoch": 0.02392182105447949, "grad_norm": 1.6328125, "learning_rate": 0.00019995723947782509, "loss": 4.5041, "step": 511 }, { "epoch": 0.02396863479431213, "grad_norm": 1.3515625, "learning_rate": 0.00019995706861900502, "loss": 5.041, "step": 512 }, { "epoch": 0.024015448534144772, "grad_norm": 3.46875, "learning_rate": 0.00019995689741958707, "loss": 5.6153, "step": 513 }, { "epoch": 0.02406226227397741, "grad_norm": 1.640625, "learning_rate": 0.00019995672587957178, "loss": 4.8307, "step": 514 }, { "epoch": 0.024109076013810054, "grad_norm": 1.828125, "learning_rate": 0.00019995655399895968, "loss": 4.8836, "step": 515 }, { "epoch": 0.024155889753642693, "grad_norm": 1.4375, "learning_rate": 0.00019995638177775142, "loss": 4.5774, "step": 516 }, { "epoch": 0.024202703493475336, "grad_norm": 16.375, "learning_rate": 0.00019995620921594757, "loss": 6.2692, "step": 517 }, { "epoch": 0.024249517233307975, "grad_norm": 6.9375, "learning_rate": 0.00019995603631354873, "loss": 5.2057, "step": 518 }, { "epoch": 0.024296330973140618, "grad_norm": 1.5625, "learning_rate": 0.00019995586307055546, "loss": 3.7915, "step": 519 }, { "epoch": 0.024343144712973257, "grad_norm": 2.40625, "learning_rate": 0.00019995568948696834, "loss": 5.8104, "step": 520 }, { "epoch": 0.0243899584528059, "grad_norm": 1.890625, "learning_rate": 0.00019995551556278802, "loss": 5.0974, "step": 521 }, { "epoch": 0.02443677219263854, "grad_norm": 1.5546875, "learning_rate": 0.00019995534129801503, "loss": 5.0463, "step": 522 }, { "epoch": 0.02448358593247118, "grad_norm": 1.8671875, "learning_rate": 0.00019995516669265003, "loss": 5.4048, "step": 523 }, { "epoch": 0.02453039967230382, "grad_norm": 1.9140625, "learning_rate": 0.00019995499174669355, "loss": 5.1534, "step": 524 }, { "epoch": 0.02457721341213646, "grad_norm": 1.4375, "learning_rate": 0.00019995481646014623, "loss": 4.9296, "step": 525 }, { "epoch": 0.024624027151969104, "grad_norm": 1.6015625, "learning_rate": 0.00019995464083300864, "loss": 5.1405, "step": 526 }, { "epoch": 0.024670840891801743, "grad_norm": 2.0625, "learning_rate": 0.00019995446486528138, "loss": 4.6438, "step": 527 }, { "epoch": 0.024717654631634386, "grad_norm": 1.359375, "learning_rate": 0.00019995428855696504, "loss": 5.1218, "step": 528 }, { "epoch": 0.024764468371467025, "grad_norm": 1.5078125, "learning_rate": 0.0001999541119080603, "loss": 5.3445, "step": 529 }, { "epoch": 0.024811282111299668, "grad_norm": 1.765625, "learning_rate": 0.00019995393491856767, "loss": 5.4011, "step": 530 }, { "epoch": 0.024858095851132307, "grad_norm": 1.3984375, "learning_rate": 0.00019995375758848777, "loss": 5.0661, "step": 531 }, { "epoch": 0.024904909590964946, "grad_norm": 1.7421875, "learning_rate": 0.00019995357991782123, "loss": 5.2638, "step": 532 }, { "epoch": 0.02495172333079759, "grad_norm": 1.6640625, "learning_rate": 0.00019995340190656866, "loss": 4.6876, "step": 533 }, { "epoch": 0.02499853707063023, "grad_norm": 1.5, "learning_rate": 0.00019995322355473064, "loss": 5.1821, "step": 534 }, { "epoch": 0.02504535081046287, "grad_norm": 1.90625, "learning_rate": 0.00019995304486230777, "loss": 4.8834, "step": 535 }, { "epoch": 0.02509216455029551, "grad_norm": 1.390625, "learning_rate": 0.0001999528658293007, "loss": 4.7524, "step": 536 }, { "epoch": 0.025138978290128153, "grad_norm": 1.265625, "learning_rate": 0.00019995268645571002, "loss": 4.8354, "step": 537 }, { "epoch": 0.025185792029960793, "grad_norm": 1.7109375, "learning_rate": 0.0001999525067415363, "loss": 5.3256, "step": 538 }, { "epoch": 0.025232605769793436, "grad_norm": 1.3359375, "learning_rate": 0.00019995232668678023, "loss": 5.0837, "step": 539 }, { "epoch": 0.025279419509626075, "grad_norm": 1.578125, "learning_rate": 0.00019995214629144238, "loss": 4.5372, "step": 540 }, { "epoch": 0.025326233249458718, "grad_norm": 1.4609375, "learning_rate": 0.00019995196555552336, "loss": 4.8896, "step": 541 }, { "epoch": 0.025373046989291357, "grad_norm": 1.515625, "learning_rate": 0.00019995178447902378, "loss": 5.2592, "step": 542 }, { "epoch": 0.025419860729123996, "grad_norm": 1.5546875, "learning_rate": 0.0001999516030619443, "loss": 4.8137, "step": 543 }, { "epoch": 0.02546667446895664, "grad_norm": 1.2265625, "learning_rate": 0.0001999514213042855, "loss": 6.1245, "step": 544 }, { "epoch": 0.02551348820878928, "grad_norm": 1.8828125, "learning_rate": 0.00019995123920604798, "loss": 5.0661, "step": 545 }, { "epoch": 0.02556030194862192, "grad_norm": 1.5625, "learning_rate": 0.00019995105676723243, "loss": 5.0126, "step": 546 }, { "epoch": 0.02560711568845456, "grad_norm": 1.7734375, "learning_rate": 0.00019995087398783944, "loss": 5.4157, "step": 547 }, { "epoch": 0.025653929428287203, "grad_norm": 1.1640625, "learning_rate": 0.0001999506908678696, "loss": 4.8038, "step": 548 }, { "epoch": 0.025700743168119843, "grad_norm": 1.953125, "learning_rate": 0.00019995050740732355, "loss": 4.8134, "step": 549 }, { "epoch": 0.025747556907952485, "grad_norm": 1.59375, "learning_rate": 0.00019995032360620193, "loss": 5.1124, "step": 550 }, { "epoch": 0.025794370647785125, "grad_norm": 5.03125, "learning_rate": 0.00019995013946450536, "loss": 6.257, "step": 551 }, { "epoch": 0.025841184387617767, "grad_norm": 1.46875, "learning_rate": 0.00019994995498223448, "loss": 5.0985, "step": 552 }, { "epoch": 0.025887998127450407, "grad_norm": 1.4609375, "learning_rate": 0.0001999497701593899, "loss": 4.5259, "step": 553 }, { "epoch": 0.025934811867283046, "grad_norm": 1.2890625, "learning_rate": 0.00019994958499597228, "loss": 6.5457, "step": 554 }, { "epoch": 0.02598162560711569, "grad_norm": 1.3671875, "learning_rate": 0.00019994939949198218, "loss": 5.3396, "step": 555 }, { "epoch": 0.026028439346948328, "grad_norm": 1.2421875, "learning_rate": 0.0001999492136474203, "loss": 4.7243, "step": 556 }, { "epoch": 0.02607525308678097, "grad_norm": 1.3984375, "learning_rate": 0.00019994902746228724, "loss": 4.8813, "step": 557 }, { "epoch": 0.02612206682661361, "grad_norm": 1.3359375, "learning_rate": 0.00019994884093658366, "loss": 4.8851, "step": 558 }, { "epoch": 0.026168880566446253, "grad_norm": 3.265625, "learning_rate": 0.00019994865407031018, "loss": 7.1051, "step": 559 }, { "epoch": 0.026215694306278892, "grad_norm": 1.2265625, "learning_rate": 0.00019994846686346743, "loss": 5.3012, "step": 560 }, { "epoch": 0.026262508046111535, "grad_norm": 1.40625, "learning_rate": 0.00019994827931605607, "loss": 5.2501, "step": 561 }, { "epoch": 0.026309321785944174, "grad_norm": 1.4140625, "learning_rate": 0.00019994809142807673, "loss": 5.2258, "step": 562 }, { "epoch": 0.026356135525776817, "grad_norm": 1.3125, "learning_rate": 0.00019994790319953002, "loss": 5.0507, "step": 563 }, { "epoch": 0.026402949265609457, "grad_norm": 1.375, "learning_rate": 0.00019994771463041663, "loss": 4.7399, "step": 564 }, { "epoch": 0.026449763005442096, "grad_norm": 1.328125, "learning_rate": 0.00019994752572073717, "loss": 4.9829, "step": 565 }, { "epoch": 0.02649657674527474, "grad_norm": 1.5234375, "learning_rate": 0.0001999473364704923, "loss": 5.0284, "step": 566 }, { "epoch": 0.026543390485107378, "grad_norm": 1.4453125, "learning_rate": 0.00019994714687968267, "loss": 5.1692, "step": 567 }, { "epoch": 0.02659020422494002, "grad_norm": 1.140625, "learning_rate": 0.00019994695694830887, "loss": 5.5887, "step": 568 }, { "epoch": 0.02663701796477266, "grad_norm": 1.453125, "learning_rate": 0.0001999467666763716, "loss": 4.8597, "step": 569 }, { "epoch": 0.026683831704605303, "grad_norm": 2.125, "learning_rate": 0.00019994657606387154, "loss": 4.4814, "step": 570 }, { "epoch": 0.026730645444437942, "grad_norm": 1.578125, "learning_rate": 0.00019994638511080926, "loss": 4.8862, "step": 571 }, { "epoch": 0.026777459184270585, "grad_norm": 1.6953125, "learning_rate": 0.00019994619381718547, "loss": 4.8817, "step": 572 }, { "epoch": 0.026824272924103224, "grad_norm": 1.7265625, "learning_rate": 0.00019994600218300082, "loss": 5.08, "step": 573 }, { "epoch": 0.026871086663935864, "grad_norm": 1.6640625, "learning_rate": 0.0001999458102082559, "loss": 4.9554, "step": 574 }, { "epoch": 0.026917900403768506, "grad_norm": 1.515625, "learning_rate": 0.00019994561789295144, "loss": 5.1714, "step": 575 }, { "epoch": 0.026964714143601146, "grad_norm": 1.953125, "learning_rate": 0.0001999454252370881, "loss": 4.7401, "step": 576 }, { "epoch": 0.02701152788343379, "grad_norm": 1.421875, "learning_rate": 0.00019994523224066643, "loss": 4.7719, "step": 577 }, { "epoch": 0.027058341623266428, "grad_norm": 1.3046875, "learning_rate": 0.00019994503890368717, "loss": 4.9286, "step": 578 }, { "epoch": 0.02710515536309907, "grad_norm": 1.28125, "learning_rate": 0.00019994484522615097, "loss": 4.7161, "step": 579 }, { "epoch": 0.02715196910293171, "grad_norm": 1.109375, "learning_rate": 0.00019994465120805853, "loss": 4.8127, "step": 580 }, { "epoch": 0.027198782842764353, "grad_norm": 1.4921875, "learning_rate": 0.00019994445684941043, "loss": 4.8933, "step": 581 }, { "epoch": 0.027245596582596992, "grad_norm": 1.3203125, "learning_rate": 0.00019994426215020735, "loss": 5.2034, "step": 582 }, { "epoch": 0.027292410322429635, "grad_norm": 1.390625, "learning_rate": 0.00019994406711045003, "loss": 4.8938, "step": 583 }, { "epoch": 0.027339224062262274, "grad_norm": 1.4140625, "learning_rate": 0.00019994387173013904, "loss": 4.8231, "step": 584 }, { "epoch": 0.027386037802094913, "grad_norm": 1.578125, "learning_rate": 0.0001999436760092751, "loss": 4.9181, "step": 585 }, { "epoch": 0.027432851541927556, "grad_norm": 1.6875, "learning_rate": 0.00019994347994785886, "loss": 5.0359, "step": 586 }, { "epoch": 0.027479665281760195, "grad_norm": 2.046875, "learning_rate": 0.00019994328354589098, "loss": 5.0661, "step": 587 }, { "epoch": 0.027526479021592838, "grad_norm": 1.4765625, "learning_rate": 0.00019994308680337214, "loss": 5.0331, "step": 588 }, { "epoch": 0.027573292761425477, "grad_norm": 2.140625, "learning_rate": 0.000199942889720303, "loss": 5.0079, "step": 589 }, { "epoch": 0.02762010650125812, "grad_norm": 2.21875, "learning_rate": 0.00019994269229668427, "loss": 4.3951, "step": 590 }, { "epoch": 0.02766692024109076, "grad_norm": 1.5859375, "learning_rate": 0.00019994249453251657, "loss": 5.1709, "step": 591 }, { "epoch": 0.027713733980923402, "grad_norm": 1.90625, "learning_rate": 0.0001999422964278006, "loss": 5.072, "step": 592 }, { "epoch": 0.02776054772075604, "grad_norm": 1.5703125, "learning_rate": 0.00019994209798253706, "loss": 5.2214, "step": 593 }, { "epoch": 0.027807361460588684, "grad_norm": 1.5859375, "learning_rate": 0.00019994189919672655, "loss": 4.9795, "step": 594 }, { "epoch": 0.027854175200421324, "grad_norm": 1.5703125, "learning_rate": 0.00019994170007036984, "loss": 4.8748, "step": 595 }, { "epoch": 0.027900988940253963, "grad_norm": 1.2734375, "learning_rate": 0.0001999415006034675, "loss": 5.2401, "step": 596 }, { "epoch": 0.027947802680086606, "grad_norm": 1.8515625, "learning_rate": 0.00019994130079602034, "loss": 5.1199, "step": 597 }, { "epoch": 0.027994616419919245, "grad_norm": 1.734375, "learning_rate": 0.00019994110064802895, "loss": 4.5883, "step": 598 }, { "epoch": 0.028041430159751888, "grad_norm": 1.453125, "learning_rate": 0.00019994090015949402, "loss": 4.8967, "step": 599 }, { "epoch": 0.028088243899584527, "grad_norm": 1.421875, "learning_rate": 0.00019994069933041626, "loss": 4.6963, "step": 600 }, { "epoch": 0.02813505763941717, "grad_norm": 1.6484375, "learning_rate": 0.00019994049816079637, "loss": 4.835, "step": 601 }, { "epoch": 0.02818187137924981, "grad_norm": 1.4453125, "learning_rate": 0.00019994029665063495, "loss": 5.3728, "step": 602 }, { "epoch": 0.028228685119082452, "grad_norm": 1.328125, "learning_rate": 0.0001999400947999328, "loss": 5.1004, "step": 603 }, { "epoch": 0.02827549885891509, "grad_norm": 1.6328125, "learning_rate": 0.00019993989260869055, "loss": 5.5349, "step": 604 }, { "epoch": 0.02832231259874773, "grad_norm": 1.5, "learning_rate": 0.00019993969007690886, "loss": 5.286, "step": 605 }, { "epoch": 0.028369126338580374, "grad_norm": 1.4609375, "learning_rate": 0.0001999394872045885, "loss": 4.9586, "step": 606 }, { "epoch": 0.028415940078413013, "grad_norm": 1.7265625, "learning_rate": 0.0001999392839917301, "loss": 4.9011, "step": 607 }, { "epoch": 0.028462753818245656, "grad_norm": 1.2734375, "learning_rate": 0.00019993908043833435, "loss": 4.9296, "step": 608 }, { "epoch": 0.028509567558078295, "grad_norm": 1.8671875, "learning_rate": 0.00019993887654440196, "loss": 4.9703, "step": 609 }, { "epoch": 0.028556381297910938, "grad_norm": 1.3828125, "learning_rate": 0.00019993867230993362, "loss": 4.8879, "step": 610 }, { "epoch": 0.028603195037743577, "grad_norm": 1.6484375, "learning_rate": 0.00019993846773493003, "loss": 4.9733, "step": 611 }, { "epoch": 0.02865000877757622, "grad_norm": 1.4765625, "learning_rate": 0.0001999382628193919, "loss": 4.9993, "step": 612 }, { "epoch": 0.02869682251740886, "grad_norm": 1.5625, "learning_rate": 0.00019993805756331992, "loss": 4.7652, "step": 613 }, { "epoch": 0.028743636257241502, "grad_norm": 1.5859375, "learning_rate": 0.00019993785196671478, "loss": 4.8588, "step": 614 }, { "epoch": 0.02879044999707414, "grad_norm": 1.6171875, "learning_rate": 0.00019993764602957719, "loss": 4.7967, "step": 615 }, { "epoch": 0.02883726373690678, "grad_norm": 1.296875, "learning_rate": 0.00019993743975190784, "loss": 4.3697, "step": 616 }, { "epoch": 0.028884077476739423, "grad_norm": 1.5234375, "learning_rate": 0.00019993723313370746, "loss": 5.5459, "step": 617 }, { "epoch": 0.028930891216572063, "grad_norm": 1.59375, "learning_rate": 0.0001999370261749767, "loss": 4.9731, "step": 618 }, { "epoch": 0.028977704956404705, "grad_norm": 1.3671875, "learning_rate": 0.00019993681887571634, "loss": 4.6819, "step": 619 }, { "epoch": 0.029024518696237345, "grad_norm": 1.3046875, "learning_rate": 0.00019993661123592703, "loss": 5.0422, "step": 620 }, { "epoch": 0.029071332436069987, "grad_norm": 1.4140625, "learning_rate": 0.00019993640325560946, "loss": 4.9566, "step": 621 }, { "epoch": 0.029118146175902627, "grad_norm": 1.3828125, "learning_rate": 0.0001999361949347644, "loss": 5.0353, "step": 622 }, { "epoch": 0.02916495991573527, "grad_norm": 1.4140625, "learning_rate": 0.00019993598627339253, "loss": 5.0798, "step": 623 }, { "epoch": 0.02921177365556791, "grad_norm": 1.234375, "learning_rate": 0.00019993577727149458, "loss": 4.7382, "step": 624 }, { "epoch": 0.02925858739540055, "grad_norm": 1.703125, "learning_rate": 0.00019993556792907122, "loss": 4.7009, "step": 625 }, { "epoch": 0.02930540113523319, "grad_norm": 1.5, "learning_rate": 0.0001999353582461232, "loss": 5.0649, "step": 626 }, { "epoch": 0.02935221487506583, "grad_norm": 1.3515625, "learning_rate": 0.00019993514822265124, "loss": 4.7387, "step": 627 }, { "epoch": 0.029399028614898473, "grad_norm": 1.3359375, "learning_rate": 0.00019993493785865603, "loss": 5.415, "step": 628 }, { "epoch": 0.029445842354731112, "grad_norm": 1.21875, "learning_rate": 0.00019993472715413828, "loss": 7.1683, "step": 629 }, { "epoch": 0.029492656094563755, "grad_norm": 1.4765625, "learning_rate": 0.00019993451610909874, "loss": 5.0825, "step": 630 }, { "epoch": 0.029539469834396394, "grad_norm": 1.390625, "learning_rate": 0.00019993430472353812, "loss": 5.2372, "step": 631 }, { "epoch": 0.029586283574229037, "grad_norm": 1.5859375, "learning_rate": 0.0001999340929974571, "loss": 5.0636, "step": 632 }, { "epoch": 0.029633097314061677, "grad_norm": 1.5, "learning_rate": 0.00019993388093085646, "loss": 4.9153, "step": 633 }, { "epoch": 0.02967991105389432, "grad_norm": 1.3984375, "learning_rate": 0.00019993366852373687, "loss": 5.2183, "step": 634 }, { "epoch": 0.02972672479372696, "grad_norm": 1.453125, "learning_rate": 0.0001999334557760991, "loss": 4.9553, "step": 635 }, { "epoch": 0.0297735385335596, "grad_norm": 1.640625, "learning_rate": 0.0001999332426879439, "loss": 4.844, "step": 636 }, { "epoch": 0.02982035227339224, "grad_norm": 1.328125, "learning_rate": 0.00019993302925927188, "loss": 5.1969, "step": 637 }, { "epoch": 0.02986716601322488, "grad_norm": 1.6484375, "learning_rate": 0.00019993281549008388, "loss": 5.2812, "step": 638 }, { "epoch": 0.029913979753057523, "grad_norm": 1.2109375, "learning_rate": 0.00019993260138038058, "loss": 4.83, "step": 639 }, { "epoch": 0.029960793492890162, "grad_norm": 1.859375, "learning_rate": 0.0001999323869301627, "loss": 4.7881, "step": 640 }, { "epoch": 0.030007607232722805, "grad_norm": 1.421875, "learning_rate": 0.00019993217213943101, "loss": 4.8344, "step": 641 }, { "epoch": 0.030054420972555444, "grad_norm": 1.7421875, "learning_rate": 0.00019993195700818622, "loss": 5.186, "step": 642 }, { "epoch": 0.030101234712388087, "grad_norm": 1.3125, "learning_rate": 0.00019993174153642903, "loss": 4.6614, "step": 643 }, { "epoch": 0.030148048452220726, "grad_norm": 1.5078125, "learning_rate": 0.00019993152572416026, "loss": 4.4136, "step": 644 }, { "epoch": 0.03019486219205337, "grad_norm": 1.4296875, "learning_rate": 0.00019993130957138055, "loss": 4.6109, "step": 645 }, { "epoch": 0.03024167593188601, "grad_norm": 1.4609375, "learning_rate": 0.0001999310930780907, "loss": 5.0909, "step": 646 }, { "epoch": 0.030288489671718648, "grad_norm": 1.421875, "learning_rate": 0.00019993087624429142, "loss": 4.9574, "step": 647 }, { "epoch": 0.03033530341155129, "grad_norm": 1.9296875, "learning_rate": 0.00019993065906998345, "loss": 4.7684, "step": 648 }, { "epoch": 0.03038211715138393, "grad_norm": 1.5625, "learning_rate": 0.00019993044155516753, "loss": 5.1964, "step": 649 }, { "epoch": 0.030428930891216573, "grad_norm": 1.390625, "learning_rate": 0.00019993022369984442, "loss": 4.8954, "step": 650 }, { "epoch": 0.030475744631049212, "grad_norm": 1.3046875, "learning_rate": 0.00019993000550401485, "loss": 4.9541, "step": 651 }, { "epoch": 0.030522558370881855, "grad_norm": 1.640625, "learning_rate": 0.00019992978696767956, "loss": 5.0104, "step": 652 }, { "epoch": 0.030569372110714494, "grad_norm": 1.625, "learning_rate": 0.0001999295680908393, "loss": 4.7717, "step": 653 }, { "epoch": 0.030616185850547137, "grad_norm": 1.546875, "learning_rate": 0.0001999293488734948, "loss": 4.7767, "step": 654 }, { "epoch": 0.030662999590379776, "grad_norm": 1.203125, "learning_rate": 0.00019992912931564684, "loss": 4.7851, "step": 655 }, { "epoch": 0.03070981333021242, "grad_norm": 1.6796875, "learning_rate": 0.00019992890941729612, "loss": 4.9374, "step": 656 }, { "epoch": 0.030756627070045058, "grad_norm": 1.7578125, "learning_rate": 0.00019992868917844344, "loss": 4.9521, "step": 657 }, { "epoch": 0.030803440809877698, "grad_norm": 1.453125, "learning_rate": 0.0001999284685990895, "loss": 4.8679, "step": 658 }, { "epoch": 0.03085025454971034, "grad_norm": 1.15625, "learning_rate": 0.00019992824767923513, "loss": 4.9766, "step": 659 }, { "epoch": 0.03089706828954298, "grad_norm": 1.2890625, "learning_rate": 0.00019992802641888099, "loss": 4.8027, "step": 660 }, { "epoch": 0.030943882029375622, "grad_norm": 1.484375, "learning_rate": 0.0001999278048180279, "loss": 5.1964, "step": 661 }, { "epoch": 0.03099069576920826, "grad_norm": 2.0625, "learning_rate": 0.00019992758287667658, "loss": 5.1983, "step": 662 }, { "epoch": 0.031037509509040904, "grad_norm": 1.6171875, "learning_rate": 0.00019992736059482779, "loss": 4.8008, "step": 663 }, { "epoch": 0.031084323248873544, "grad_norm": 1.8203125, "learning_rate": 0.00019992713797248231, "loss": 4.8177, "step": 664 }, { "epoch": 0.031131136988706187, "grad_norm": 1.546875, "learning_rate": 0.00019992691500964087, "loss": 5.0452, "step": 665 }, { "epoch": 0.031177950728538826, "grad_norm": 1.515625, "learning_rate": 0.00019992669170630425, "loss": 5.0471, "step": 666 }, { "epoch": 0.03122476446837147, "grad_norm": 1.4921875, "learning_rate": 0.0001999264680624732, "loss": 4.4623, "step": 667 }, { "epoch": 0.03127157820820411, "grad_norm": 1.6328125, "learning_rate": 0.00019992624407814852, "loss": 5.1657, "step": 668 }, { "epoch": 0.03131839194803675, "grad_norm": 1.390625, "learning_rate": 0.0001999260197533309, "loss": 4.6344, "step": 669 }, { "epoch": 0.03136520568786939, "grad_norm": 1.4921875, "learning_rate": 0.00019992579508802113, "loss": 4.8231, "step": 670 }, { "epoch": 0.03141201942770203, "grad_norm": 1.0625, "learning_rate": 0.00019992557008222, "loss": 3.5082, "step": 671 }, { "epoch": 0.03145883316753467, "grad_norm": 1.28125, "learning_rate": 0.0001999253447359283, "loss": 4.4485, "step": 672 }, { "epoch": 0.03150564690736731, "grad_norm": 1.4921875, "learning_rate": 0.00019992511904914673, "loss": 5.2325, "step": 673 }, { "epoch": 0.03155246064719995, "grad_norm": 1.3046875, "learning_rate": 0.00019992489302187613, "loss": 4.5318, "step": 674 }, { "epoch": 0.0315992743870326, "grad_norm": 1.28125, "learning_rate": 0.0001999246666541172, "loss": 4.8821, "step": 675 }, { "epoch": 0.031646088126865236, "grad_norm": 1.3671875, "learning_rate": 0.00019992443994587075, "loss": 5.1526, "step": 676 }, { "epoch": 0.031692901866697876, "grad_norm": 1.296875, "learning_rate": 0.00019992421289713753, "loss": 4.8223, "step": 677 }, { "epoch": 0.031739715606530515, "grad_norm": 1.4140625, "learning_rate": 0.00019992398550791834, "loss": 4.9435, "step": 678 }, { "epoch": 0.031786529346363154, "grad_norm": 1.1796875, "learning_rate": 0.00019992375777821397, "loss": 4.6603, "step": 679 }, { "epoch": 0.0318333430861958, "grad_norm": 1.3671875, "learning_rate": 0.00019992352970802518, "loss": 4.7698, "step": 680 }, { "epoch": 0.03188015682602844, "grad_norm": 1.46875, "learning_rate": 0.0001999233012973527, "loss": 4.8116, "step": 681 }, { "epoch": 0.03192697056586108, "grad_norm": 1.5703125, "learning_rate": 0.00019992307254619736, "loss": 4.9606, "step": 682 }, { "epoch": 0.03197378430569372, "grad_norm": 1.3046875, "learning_rate": 0.0001999228434545599, "loss": 4.6745, "step": 683 }, { "epoch": 0.032020598045526365, "grad_norm": 1.453125, "learning_rate": 0.00019992261402244116, "loss": 4.5115, "step": 684 }, { "epoch": 0.032067411785359004, "grad_norm": 1.609375, "learning_rate": 0.00019992238424984188, "loss": 5.0388, "step": 685 }, { "epoch": 0.03211422552519164, "grad_norm": 1.578125, "learning_rate": 0.00019992215413676284, "loss": 4.9971, "step": 686 }, { "epoch": 0.03216103926502428, "grad_norm": 1.375, "learning_rate": 0.0001999219236832049, "loss": 4.8256, "step": 687 }, { "epoch": 0.03220785300485693, "grad_norm": 1.34375, "learning_rate": 0.0001999216928891687, "loss": 4.9003, "step": 688 }, { "epoch": 0.03225466674468957, "grad_norm": 1.7109375, "learning_rate": 0.00019992146175465514, "loss": 4.9959, "step": 689 }, { "epoch": 0.03230148048452221, "grad_norm": 1.8046875, "learning_rate": 0.00019992123027966495, "loss": 5.0499, "step": 690 }, { "epoch": 0.03234829422435485, "grad_norm": 1.2421875, "learning_rate": 0.00019992099846419899, "loss": 4.8137, "step": 691 }, { "epoch": 0.032395107964187486, "grad_norm": 1.6171875, "learning_rate": 0.00019992076630825798, "loss": 4.7969, "step": 692 }, { "epoch": 0.03244192170402013, "grad_norm": 1.796875, "learning_rate": 0.00019992053381184273, "loss": 4.7403, "step": 693 }, { "epoch": 0.03248873544385277, "grad_norm": 1.265625, "learning_rate": 0.00019992030097495405, "loss": 4.9453, "step": 694 }, { "epoch": 0.03253554918368541, "grad_norm": 1.34375, "learning_rate": 0.0001999200677975927, "loss": 4.3787, "step": 695 }, { "epoch": 0.03258236292351805, "grad_norm": 1.2109375, "learning_rate": 0.00019991983427975952, "loss": 4.8825, "step": 696 }, { "epoch": 0.0326291766633507, "grad_norm": 1.4921875, "learning_rate": 0.00019991960042145524, "loss": 5.0361, "step": 697 }, { "epoch": 0.032675990403183336, "grad_norm": 1.328125, "learning_rate": 0.00019991936622268072, "loss": 4.6849, "step": 698 }, { "epoch": 0.032722804143015975, "grad_norm": 1.484375, "learning_rate": 0.00019991913168343673, "loss": 5.1813, "step": 699 }, { "epoch": 0.032769617882848615, "grad_norm": 1.7890625, "learning_rate": 0.00019991889680372407, "loss": 5.4485, "step": 700 }, { "epoch": 0.032816431622681254, "grad_norm": 1.4609375, "learning_rate": 0.0001999186615835436, "loss": 4.6792, "step": 701 }, { "epoch": 0.0328632453625139, "grad_norm": 1.28125, "learning_rate": 0.00019991842602289602, "loss": 4.9281, "step": 702 }, { "epoch": 0.03291005910234654, "grad_norm": 1.5078125, "learning_rate": 0.00019991819012178216, "loss": 4.5923, "step": 703 }, { "epoch": 0.03295687284217918, "grad_norm": 1.609375, "learning_rate": 0.00019991795388020286, "loss": 4.4806, "step": 704 }, { "epoch": 0.03300368658201182, "grad_norm": 1.6875, "learning_rate": 0.00019991771729815892, "loss": 5.0019, "step": 705 }, { "epoch": 0.033050500321844464, "grad_norm": 1.2734375, "learning_rate": 0.00019991748037565113, "loss": 4.9531, "step": 706 }, { "epoch": 0.033097314061677104, "grad_norm": 1.578125, "learning_rate": 0.0001999172431126803, "loss": 4.8479, "step": 707 }, { "epoch": 0.03314412780150974, "grad_norm": 1.6015625, "learning_rate": 0.00019991700550924722, "loss": 4.731, "step": 708 }, { "epoch": 0.03319094154134238, "grad_norm": 1.1171875, "learning_rate": 0.00019991676756535274, "loss": 4.6406, "step": 709 }, { "epoch": 0.03323775528117502, "grad_norm": 1.28125, "learning_rate": 0.00019991652928099765, "loss": 4.9456, "step": 710 }, { "epoch": 0.03328456902100767, "grad_norm": 1.484375, "learning_rate": 0.00019991629065618276, "loss": 4.9957, "step": 711 }, { "epoch": 0.03333138276084031, "grad_norm": 1.1875, "learning_rate": 0.00019991605169090887, "loss": 5.0619, "step": 712 }, { "epoch": 0.033378196500672946, "grad_norm": 1.421875, "learning_rate": 0.00019991581238517684, "loss": 4.37, "step": 713 }, { "epoch": 0.033425010240505586, "grad_norm": 1.265625, "learning_rate": 0.00019991557273898742, "loss": 4.408, "step": 714 }, { "epoch": 0.03347182398033823, "grad_norm": 1.375, "learning_rate": 0.0001999153327523415, "loss": 4.5727, "step": 715 }, { "epoch": 0.03351863772017087, "grad_norm": 1.8203125, "learning_rate": 0.00019991509242523982, "loss": 5.0094, "step": 716 }, { "epoch": 0.03356545146000351, "grad_norm": 1.53125, "learning_rate": 0.00019991485175768325, "loss": 4.6497, "step": 717 }, { "epoch": 0.03361226519983615, "grad_norm": 1.671875, "learning_rate": 0.0001999146107496726, "loss": 5.1445, "step": 718 }, { "epoch": 0.033659078939668796, "grad_norm": 1.671875, "learning_rate": 0.00019991436940120872, "loss": 4.7715, "step": 719 }, { "epoch": 0.033705892679501435, "grad_norm": 1.4921875, "learning_rate": 0.00019991412771229236, "loss": 5.1969, "step": 720 }, { "epoch": 0.033752706419334075, "grad_norm": 1.890625, "learning_rate": 0.0001999138856829244, "loss": 4.5632, "step": 721 }, { "epoch": 0.033799520159166714, "grad_norm": 1.625, "learning_rate": 0.00019991364331310563, "loss": 4.8231, "step": 722 }, { "epoch": 0.03384633389899935, "grad_norm": 1.734375, "learning_rate": 0.00019991340060283692, "loss": 4.5766, "step": 723 }, { "epoch": 0.033893147638832, "grad_norm": 1.8828125, "learning_rate": 0.00019991315755211908, "loss": 5.0925, "step": 724 }, { "epoch": 0.03393996137866464, "grad_norm": 1.5234375, "learning_rate": 0.0001999129141609529, "loss": 4.6494, "step": 725 }, { "epoch": 0.03398677511849728, "grad_norm": 1.6015625, "learning_rate": 0.00019991267042933928, "loss": 4.4414, "step": 726 }, { "epoch": 0.03403358885832992, "grad_norm": 1.921875, "learning_rate": 0.00019991242635727897, "loss": 5.3646, "step": 727 }, { "epoch": 0.034080402598162564, "grad_norm": 1.40625, "learning_rate": 0.0001999121819447729, "loss": 5.1889, "step": 728 }, { "epoch": 0.0341272163379952, "grad_norm": 1.7734375, "learning_rate": 0.0001999119371918218, "loss": 5.0292, "step": 729 }, { "epoch": 0.03417403007782784, "grad_norm": 1.796875, "learning_rate": 0.00019991169209842652, "loss": 4.6456, "step": 730 }, { "epoch": 0.03422084381766048, "grad_norm": 1.453125, "learning_rate": 0.00019991144666458798, "loss": 4.8144, "step": 731 }, { "epoch": 0.03426765755749312, "grad_norm": 1.3671875, "learning_rate": 0.00019991120089030693, "loss": 4.7138, "step": 732 }, { "epoch": 0.03431447129732577, "grad_norm": 3.015625, "learning_rate": 0.00019991095477558425, "loss": 4.1102, "step": 733 }, { "epoch": 0.03436128503715841, "grad_norm": 1.546875, "learning_rate": 0.00019991070832042077, "loss": 5.0823, "step": 734 }, { "epoch": 0.034408098776991046, "grad_norm": 1.3203125, "learning_rate": 0.00019991046152481733, "loss": 5.0694, "step": 735 }, { "epoch": 0.034454912516823685, "grad_norm": 1.3046875, "learning_rate": 0.00019991021438877475, "loss": 4.8201, "step": 736 }, { "epoch": 0.03450172625665633, "grad_norm": 1.25, "learning_rate": 0.00019990996691229392, "loss": 4.7184, "step": 737 }, { "epoch": 0.03454853999648897, "grad_norm": 1.7734375, "learning_rate": 0.00019990971909537563, "loss": 4.3015, "step": 738 }, { "epoch": 0.03459535373632161, "grad_norm": 1.6015625, "learning_rate": 0.00019990947093802074, "loss": 4.9612, "step": 739 }, { "epoch": 0.03464216747615425, "grad_norm": 1.3125, "learning_rate": 0.0001999092224402301, "loss": 4.7189, "step": 740 }, { "epoch": 0.03468898121598689, "grad_norm": 1.421875, "learning_rate": 0.00019990897360200457, "loss": 4.7586, "step": 741 }, { "epoch": 0.034735794955819535, "grad_norm": 1.5546875, "learning_rate": 0.00019990872442334498, "loss": 4.4929, "step": 742 }, { "epoch": 0.034782608695652174, "grad_norm": 2.296875, "learning_rate": 0.0001999084749042522, "loss": 5.2744, "step": 743 }, { "epoch": 0.034829422435484814, "grad_norm": 1.4375, "learning_rate": 0.00019990822504472707, "loss": 5.0039, "step": 744 }, { "epoch": 0.03487623617531745, "grad_norm": 1.5546875, "learning_rate": 0.00019990797484477043, "loss": 4.6487, "step": 745 }, { "epoch": 0.0349230499151501, "grad_norm": 1.4765625, "learning_rate": 0.00019990772430438316, "loss": 4.5523, "step": 746 }, { "epoch": 0.03496986365498274, "grad_norm": 1.53125, "learning_rate": 0.00019990747342356607, "loss": 4.7854, "step": 747 }, { "epoch": 0.03501667739481538, "grad_norm": 1.6953125, "learning_rate": 0.00019990722220232003, "loss": 4.8674, "step": 748 }, { "epoch": 0.03506349113464802, "grad_norm": 1.328125, "learning_rate": 0.00019990697064064592, "loss": 4.7425, "step": 749 }, { "epoch": 0.03511030487448066, "grad_norm": 1.2734375, "learning_rate": 0.00019990671873854459, "loss": 4.9218, "step": 750 }, { "epoch": 0.0351571186143133, "grad_norm": 1.2890625, "learning_rate": 0.00019990646649601688, "loss": 5.2301, "step": 751 }, { "epoch": 0.03520393235414594, "grad_norm": 1.7421875, "learning_rate": 0.0001999062139130637, "loss": 4.9132, "step": 752 }, { "epoch": 0.03525074609397858, "grad_norm": 1.5078125, "learning_rate": 0.00019990596098968578, "loss": 4.609, "step": 753 }, { "epoch": 0.03529755983381122, "grad_norm": 1.4765625, "learning_rate": 0.00019990570772588414, "loss": 4.7612, "step": 754 }, { "epoch": 0.03534437357364387, "grad_norm": 1.390625, "learning_rate": 0.00019990545412165958, "loss": 4.632, "step": 755 }, { "epoch": 0.035391187313476506, "grad_norm": 1.53125, "learning_rate": 0.00019990520017701294, "loss": 5.1205, "step": 756 }, { "epoch": 0.035438001053309146, "grad_norm": 1.671875, "learning_rate": 0.00019990494589194508, "loss": 4.9568, "step": 757 }, { "epoch": 0.035484814793141785, "grad_norm": 1.6640625, "learning_rate": 0.00019990469126645693, "loss": 4.5879, "step": 758 }, { "epoch": 0.03553162853297443, "grad_norm": 1.84375, "learning_rate": 0.0001999044363005493, "loss": 5.0213, "step": 759 }, { "epoch": 0.03557844227280707, "grad_norm": 1.5234375, "learning_rate": 0.0001999041809942231, "loss": 5.0793, "step": 760 }, { "epoch": 0.03562525601263971, "grad_norm": 1.6953125, "learning_rate": 0.00019990392534747917, "loss": 4.2958, "step": 761 }, { "epoch": 0.03567206975247235, "grad_norm": 1.203125, "learning_rate": 0.00019990366936031837, "loss": 4.717, "step": 762 }, { "epoch": 0.03571888349230499, "grad_norm": 1.5625, "learning_rate": 0.0001999034130327416, "loss": 4.6932, "step": 763 }, { "epoch": 0.035765697232137635, "grad_norm": 1.421875, "learning_rate": 0.00019990315636474973, "loss": 4.8477, "step": 764 }, { "epoch": 0.035812510971970274, "grad_norm": 1.2734375, "learning_rate": 0.00019990289935634364, "loss": 4.6476, "step": 765 }, { "epoch": 0.03585932471180291, "grad_norm": 2.203125, "learning_rate": 0.0001999026420075242, "loss": 4.405, "step": 766 }, { "epoch": 0.03590613845163555, "grad_norm": 1.8359375, "learning_rate": 0.00019990238431829224, "loss": 4.2563, "step": 767 }, { "epoch": 0.0359529521914682, "grad_norm": 1.421875, "learning_rate": 0.00019990212628864874, "loss": 5.3606, "step": 768 }, { "epoch": 0.03599976593130084, "grad_norm": 1.4375, "learning_rate": 0.0001999018679185945, "loss": 4.4774, "step": 769 }, { "epoch": 0.03604657967113348, "grad_norm": 1.3515625, "learning_rate": 0.00019990160920813036, "loss": 4.6053, "step": 770 }, { "epoch": 0.03609339341096612, "grad_norm": 1.7109375, "learning_rate": 0.00019990135015725732, "loss": 5.2703, "step": 771 }, { "epoch": 0.036140207150798756, "grad_norm": 1.9375, "learning_rate": 0.0001999010907659762, "loss": 3.974, "step": 772 }, { "epoch": 0.0361870208906314, "grad_norm": 1.71875, "learning_rate": 0.00019990083103428786, "loss": 4.6998, "step": 773 }, { "epoch": 0.03623383463046404, "grad_norm": 1.4453125, "learning_rate": 0.00019990057096219322, "loss": 4.6743, "step": 774 }, { "epoch": 0.03628064837029668, "grad_norm": 1.375, "learning_rate": 0.00019990031054969317, "loss": 4.7411, "step": 775 }, { "epoch": 0.03632746211012932, "grad_norm": 1.25, "learning_rate": 0.00019990004979678863, "loss": 4.4763, "step": 776 }, { "epoch": 0.036374275849961966, "grad_norm": 1.4921875, "learning_rate": 0.0001998997887034804, "loss": 4.9488, "step": 777 }, { "epoch": 0.036421089589794606, "grad_norm": 1.2109375, "learning_rate": 0.00019989952726976942, "loss": 4.9847, "step": 778 }, { "epoch": 0.036467903329627245, "grad_norm": 2.328125, "learning_rate": 0.00019989926549565654, "loss": 5.1445, "step": 779 }, { "epoch": 0.036514717069459884, "grad_norm": 1.5625, "learning_rate": 0.00019989900338114274, "loss": 4.5144, "step": 780 }, { "epoch": 0.03656153080929253, "grad_norm": 1.984375, "learning_rate": 0.00019989874092622882, "loss": 4.4585, "step": 781 }, { "epoch": 0.03660834454912517, "grad_norm": 1.359375, "learning_rate": 0.00019989847813091575, "loss": 4.7481, "step": 782 }, { "epoch": 0.03665515828895781, "grad_norm": 1.375, "learning_rate": 0.00019989821499520437, "loss": 4.8575, "step": 783 }, { "epoch": 0.03670197202879045, "grad_norm": 1.90625, "learning_rate": 0.0001998979515190956, "loss": 5.0369, "step": 784 }, { "epoch": 0.03674878576862309, "grad_norm": 1.796875, "learning_rate": 0.00019989768770259034, "loss": 5.1513, "step": 785 }, { "epoch": 0.036795599508455734, "grad_norm": 1.5703125, "learning_rate": 0.00019989742354568945, "loss": 4.8713, "step": 786 }, { "epoch": 0.03684241324828837, "grad_norm": 1.40625, "learning_rate": 0.00019989715904839394, "loss": 4.5047, "step": 787 }, { "epoch": 0.03688922698812101, "grad_norm": 1.578125, "learning_rate": 0.00019989689421070456, "loss": 4.8304, "step": 788 }, { "epoch": 0.03693604072795365, "grad_norm": 2.171875, "learning_rate": 0.00019989662903262232, "loss": 4.4959, "step": 789 }, { "epoch": 0.0369828544677863, "grad_norm": 1.65625, "learning_rate": 0.00019989636351414806, "loss": 4.7573, "step": 790 }, { "epoch": 0.03702966820761894, "grad_norm": 1.7265625, "learning_rate": 0.00019989609765528276, "loss": 4.6362, "step": 791 }, { "epoch": 0.03707648194745158, "grad_norm": 1.6328125, "learning_rate": 0.00019989583145602725, "loss": 5.6069, "step": 792 }, { "epoch": 0.037123295687284216, "grad_norm": 1.5078125, "learning_rate": 0.0001998955649163825, "loss": 4.2933, "step": 793 }, { "epoch": 0.037170109427116856, "grad_norm": 1.3515625, "learning_rate": 0.00019989529803634935, "loss": 4.5968, "step": 794 }, { "epoch": 0.0372169231669495, "grad_norm": 1.34375, "learning_rate": 0.00019989503081592878, "loss": 4.6834, "step": 795 }, { "epoch": 0.03726373690678214, "grad_norm": 1.703125, "learning_rate": 0.00019989476325512167, "loss": 4.8039, "step": 796 }, { "epoch": 0.03731055064661478, "grad_norm": 1.4296875, "learning_rate": 0.00019989449535392888, "loss": 4.7068, "step": 797 }, { "epoch": 0.03735736438644742, "grad_norm": 1.6484375, "learning_rate": 0.0001998942271123514, "loss": 4.7417, "step": 798 }, { "epoch": 0.037404178126280066, "grad_norm": 1.5078125, "learning_rate": 0.00019989395853039013, "loss": 4.9799, "step": 799 }, { "epoch": 0.037450991866112705, "grad_norm": 1.515625, "learning_rate": 0.00019989368960804597, "loss": 4.612, "step": 800 }, { "epoch": 0.037497805605945345, "grad_norm": 1.3203125, "learning_rate": 0.0001998934203453198, "loss": 4.6746, "step": 801 }, { "epoch": 0.037544619345777984, "grad_norm": 1.765625, "learning_rate": 0.0001998931507422126, "loss": 5.1883, "step": 802 }, { "epoch": 0.03759143308561063, "grad_norm": 1.515625, "learning_rate": 0.00019989288079872526, "loss": 4.6499, "step": 803 }, { "epoch": 0.03763824682544327, "grad_norm": 1.4765625, "learning_rate": 0.0001998926105148587, "loss": 4.9738, "step": 804 }, { "epoch": 0.03768506056527591, "grad_norm": 1.9296875, "learning_rate": 0.00019989233989061385, "loss": 4.8912, "step": 805 }, { "epoch": 0.03773187430510855, "grad_norm": 1.46875, "learning_rate": 0.00019989206892599164, "loss": 4.9302, "step": 806 }, { "epoch": 0.03777868804494119, "grad_norm": 1.5, "learning_rate": 0.00019989179762099294, "loss": 4.5547, "step": 807 }, { "epoch": 0.037825501784773834, "grad_norm": 1.46875, "learning_rate": 0.00019989152597561875, "loss": 5.1403, "step": 808 }, { "epoch": 0.03787231552460647, "grad_norm": 1.703125, "learning_rate": 0.00019989125398986994, "loss": 4.6274, "step": 809 }, { "epoch": 0.03791912926443911, "grad_norm": 1.640625, "learning_rate": 0.00019989098166374745, "loss": 4.6729, "step": 810 }, { "epoch": 0.03796594300427175, "grad_norm": 1.5703125, "learning_rate": 0.00019989070899725225, "loss": 5.0307, "step": 811 }, { "epoch": 0.0380127567441044, "grad_norm": 2.046875, "learning_rate": 0.00019989043599038516, "loss": 5.3956, "step": 812 }, { "epoch": 0.03805957048393704, "grad_norm": 1.4765625, "learning_rate": 0.00019989016264314725, "loss": 4.6743, "step": 813 }, { "epoch": 0.038106384223769676, "grad_norm": 1.6875, "learning_rate": 0.00019988988895553938, "loss": 4.6816, "step": 814 }, { "epoch": 0.038153197963602316, "grad_norm": 2.46875, "learning_rate": 0.00019988961492756245, "loss": 5.0635, "step": 815 }, { "epoch": 0.038200011703434955, "grad_norm": 1.640625, "learning_rate": 0.0001998893405592175, "loss": 5.0767, "step": 816 }, { "epoch": 0.0382468254432676, "grad_norm": 1.3984375, "learning_rate": 0.0001998890658505053, "loss": 4.775, "step": 817 }, { "epoch": 0.03829363918310024, "grad_norm": 1.3515625, "learning_rate": 0.00019988879080142695, "loss": 4.7443, "step": 818 }, { "epoch": 0.03834045292293288, "grad_norm": 1.453125, "learning_rate": 0.0001998885154119833, "loss": 4.7973, "step": 819 }, { "epoch": 0.03838726666276552, "grad_norm": 1.1328125, "learning_rate": 0.00019988823968217532, "loss": 4.6744, "step": 820 }, { "epoch": 0.038434080402598166, "grad_norm": 1.8046875, "learning_rate": 0.0001998879636120039, "loss": 4.766, "step": 821 }, { "epoch": 0.038480894142430805, "grad_norm": 1.5390625, "learning_rate": 0.00019988768720147006, "loss": 4.2638, "step": 822 }, { "epoch": 0.038527707882263444, "grad_norm": 1.3984375, "learning_rate": 0.0001998874104505747, "loss": 4.7045, "step": 823 }, { "epoch": 0.038574521622096083, "grad_norm": 1.5390625, "learning_rate": 0.00019988713335931872, "loss": 4.9042, "step": 824 }, { "epoch": 0.03862133536192872, "grad_norm": 1.4296875, "learning_rate": 0.00019988685592770312, "loss": 4.8877, "step": 825 }, { "epoch": 0.03866814910176137, "grad_norm": 1.5234375, "learning_rate": 0.00019988657815572886, "loss": 4.8177, "step": 826 }, { "epoch": 0.03871496284159401, "grad_norm": 1.3515625, "learning_rate": 0.00019988630004339687, "loss": 4.7945, "step": 827 }, { "epoch": 0.03876177658142665, "grad_norm": 1.6640625, "learning_rate": 0.00019988602159070804, "loss": 4.8447, "step": 828 }, { "epoch": 0.03880859032125929, "grad_norm": 1.671875, "learning_rate": 0.0001998857427976634, "loss": 5.0944, "step": 829 }, { "epoch": 0.03885540406109193, "grad_norm": 1.1953125, "learning_rate": 0.00019988546366426385, "loss": 4.4111, "step": 830 }, { "epoch": 0.03890221780092457, "grad_norm": 1.8203125, "learning_rate": 0.00019988518419051035, "loss": 5.2838, "step": 831 }, { "epoch": 0.03894903154075721, "grad_norm": 1.2109375, "learning_rate": 0.00019988490437640387, "loss": 4.5825, "step": 832 }, { "epoch": 0.03899584528058985, "grad_norm": 1.453125, "learning_rate": 0.00019988462422194535, "loss": 4.4745, "step": 833 }, { "epoch": 0.0390426590204225, "grad_norm": 1.203125, "learning_rate": 0.00019988434372713573, "loss": 4.6076, "step": 834 }, { "epoch": 0.03908947276025514, "grad_norm": 1.171875, "learning_rate": 0.000199884062891976, "loss": 7.1992, "step": 835 }, { "epoch": 0.039136286500087776, "grad_norm": 1.4765625, "learning_rate": 0.0001998837817164671, "loss": 4.8558, "step": 836 }, { "epoch": 0.039183100239920415, "grad_norm": 1.265625, "learning_rate": 0.00019988350020060995, "loss": 4.8008, "step": 837 }, { "epoch": 0.039229913979753055, "grad_norm": 1.8125, "learning_rate": 0.0001998832183444056, "loss": 4.5893, "step": 838 }, { "epoch": 0.0392767277195857, "grad_norm": 2.953125, "learning_rate": 0.00019988293614785495, "loss": 4.7336, "step": 839 }, { "epoch": 0.03932354145941834, "grad_norm": 1.2578125, "learning_rate": 0.00019988265361095894, "loss": 4.6389, "step": 840 }, { "epoch": 0.03937035519925098, "grad_norm": 1.4765625, "learning_rate": 0.00019988237073371857, "loss": 4.7321, "step": 841 }, { "epoch": 0.03941716893908362, "grad_norm": 1.4453125, "learning_rate": 0.00019988208751613482, "loss": 4.9224, "step": 842 }, { "epoch": 0.039463982678916265, "grad_norm": 1.359375, "learning_rate": 0.00019988180395820858, "loss": 4.6328, "step": 843 }, { "epoch": 0.039510796418748904, "grad_norm": 1.2890625, "learning_rate": 0.00019988152005994092, "loss": 4.6747, "step": 844 }, { "epoch": 0.039557610158581544, "grad_norm": 1.5078125, "learning_rate": 0.0001998812358213327, "loss": 4.7951, "step": 845 }, { "epoch": 0.03960442389841418, "grad_norm": 1.53125, "learning_rate": 0.00019988095124238498, "loss": 4.875, "step": 846 }, { "epoch": 0.03965123763824682, "grad_norm": 2.0, "learning_rate": 0.0001998806663230987, "loss": 4.6214, "step": 847 }, { "epoch": 0.03969805137807947, "grad_norm": 1.3515625, "learning_rate": 0.0001998803810634748, "loss": 4.6325, "step": 848 }, { "epoch": 0.03974486511791211, "grad_norm": 1.4609375, "learning_rate": 0.00019988009546351428, "loss": 4.4966, "step": 849 }, { "epoch": 0.03979167885774475, "grad_norm": 1.4765625, "learning_rate": 0.00019987980952321813, "loss": 4.3231, "step": 850 }, { "epoch": 0.03983849259757739, "grad_norm": 1.6953125, "learning_rate": 0.00019987952324258728, "loss": 4.5497, "step": 851 }, { "epoch": 0.03988530633741003, "grad_norm": 1.8515625, "learning_rate": 0.00019987923662162274, "loss": 4.7293, "step": 852 }, { "epoch": 0.03993212007724267, "grad_norm": 1.328125, "learning_rate": 0.00019987894966032546, "loss": 4.5499, "step": 853 }, { "epoch": 0.03997893381707531, "grad_norm": 1.5078125, "learning_rate": 0.00019987866235869644, "loss": 4.4569, "step": 854 }, { "epoch": 0.04002574755690795, "grad_norm": 1.4921875, "learning_rate": 0.00019987837471673666, "loss": 4.6534, "step": 855 }, { "epoch": 0.04007256129674059, "grad_norm": 1.5, "learning_rate": 0.00019987808673444706, "loss": 4.5878, "step": 856 }, { "epoch": 0.040119375036573236, "grad_norm": 1.921875, "learning_rate": 0.0001998777984118287, "loss": 4.8028, "step": 857 }, { "epoch": 0.040166188776405876, "grad_norm": 1.4453125, "learning_rate": 0.00019987750974888248, "loss": 4.4677, "step": 858 }, { "epoch": 0.040213002516238515, "grad_norm": 2.703125, "learning_rate": 0.00019987722074560942, "loss": 3.6053, "step": 859 }, { "epoch": 0.040259816256071154, "grad_norm": 1.484375, "learning_rate": 0.0001998769314020105, "loss": 4.5523, "step": 860 }, { "epoch": 0.0403066299959038, "grad_norm": 1.3203125, "learning_rate": 0.00019987664171808675, "loss": 4.8903, "step": 861 }, { "epoch": 0.04035344373573644, "grad_norm": 1.265625, "learning_rate": 0.00019987635169383907, "loss": 4.1388, "step": 862 }, { "epoch": 0.04040025747556908, "grad_norm": 1.3671875, "learning_rate": 0.0001998760613292685, "loss": 4.7581, "step": 863 }, { "epoch": 0.04044707121540172, "grad_norm": 2.015625, "learning_rate": 0.00019987577062437605, "loss": 4.5211, "step": 864 }, { "epoch": 0.040493884955234365, "grad_norm": 1.453125, "learning_rate": 0.00019987547957916267, "loss": 4.7618, "step": 865 }, { "epoch": 0.040540698695067004, "grad_norm": 1.4296875, "learning_rate": 0.00019987518819362937, "loss": 4.65, "step": 866 }, { "epoch": 0.04058751243489964, "grad_norm": 2.0625, "learning_rate": 0.00019987489646777716, "loss": 4.479, "step": 867 }, { "epoch": 0.04063432617473228, "grad_norm": 1.234375, "learning_rate": 0.00019987460440160697, "loss": 4.3891, "step": 868 }, { "epoch": 0.04068113991456492, "grad_norm": 1.5625, "learning_rate": 0.00019987431199511985, "loss": 4.6761, "step": 869 }, { "epoch": 0.04072795365439757, "grad_norm": 1.765625, "learning_rate": 0.00019987401924831678, "loss": 4.9622, "step": 870 }, { "epoch": 0.04077476739423021, "grad_norm": 1.625, "learning_rate": 0.0001998737261611988, "loss": 4.8257, "step": 871 }, { "epoch": 0.04082158113406285, "grad_norm": 1.28125, "learning_rate": 0.00019987343273376683, "loss": 4.799, "step": 872 }, { "epoch": 0.040868394873895486, "grad_norm": 1.484375, "learning_rate": 0.00019987313896602195, "loss": 4.7085, "step": 873 }, { "epoch": 0.04091520861372813, "grad_norm": 1.609375, "learning_rate": 0.00019987284485796508, "loss": 4.5196, "step": 874 }, { "epoch": 0.04096202235356077, "grad_norm": 1.3359375, "learning_rate": 0.0001998725504095973, "loss": 4.7755, "step": 875 }, { "epoch": 0.04100883609339341, "grad_norm": 1.4453125, "learning_rate": 0.00019987225562091956, "loss": 4.8876, "step": 876 }, { "epoch": 0.04105564983322605, "grad_norm": 1.625, "learning_rate": 0.00019987196049193288, "loss": 5.0826, "step": 877 }, { "epoch": 0.04110246357305869, "grad_norm": 1.3828125, "learning_rate": 0.00019987166502263826, "loss": 4.0691, "step": 878 }, { "epoch": 0.041149277312891336, "grad_norm": 1.359375, "learning_rate": 0.00019987136921303673, "loss": 4.6226, "step": 879 }, { "epoch": 0.041196091052723975, "grad_norm": 1.3984375, "learning_rate": 0.0001998710730631293, "loss": 4.1228, "step": 880 }, { "epoch": 0.041242904792556614, "grad_norm": 1.7890625, "learning_rate": 0.00019987077657291692, "loss": 4.5224, "step": 881 }, { "epoch": 0.041289718532389254, "grad_norm": 1.5703125, "learning_rate": 0.00019987047974240064, "loss": 4.7483, "step": 882 }, { "epoch": 0.0413365322722219, "grad_norm": 1.3515625, "learning_rate": 0.0001998701825715815, "loss": 4.8497, "step": 883 }, { "epoch": 0.04138334601205454, "grad_norm": 1.4765625, "learning_rate": 0.00019986988506046047, "loss": 4.7039, "step": 884 }, { "epoch": 0.04143015975188718, "grad_norm": 3.78125, "learning_rate": 0.0001998695872090386, "loss": 5.7929, "step": 885 }, { "epoch": 0.04147697349171982, "grad_norm": 1.734375, "learning_rate": 0.00019986928901731687, "loss": 5.0761, "step": 886 }, { "epoch": 0.04152378723155246, "grad_norm": 1.6875, "learning_rate": 0.00019986899048529634, "loss": 4.5019, "step": 887 }, { "epoch": 0.041570600971385104, "grad_norm": 1.453125, "learning_rate": 0.00019986869161297795, "loss": 3.6201, "step": 888 }, { "epoch": 0.04161741471121774, "grad_norm": 1.6640625, "learning_rate": 0.0001998683924003628, "loss": 4.5424, "step": 889 }, { "epoch": 0.04166422845105038, "grad_norm": 1.625, "learning_rate": 0.00019986809284745186, "loss": 5.0528, "step": 890 }, { "epoch": 0.04171104219088302, "grad_norm": 1.3828125, "learning_rate": 0.00019986779295424616, "loss": 4.6761, "step": 891 }, { "epoch": 0.04175785593071567, "grad_norm": 1.40625, "learning_rate": 0.00019986749272074672, "loss": 4.7803, "step": 892 }, { "epoch": 0.04180466967054831, "grad_norm": 1.328125, "learning_rate": 0.0001998671921469546, "loss": 5.0041, "step": 893 }, { "epoch": 0.041851483410380946, "grad_norm": 1.265625, "learning_rate": 0.0001998668912328708, "loss": 4.4616, "step": 894 }, { "epoch": 0.041898297150213586, "grad_norm": 1.5078125, "learning_rate": 0.0001998665899784963, "loss": 4.4573, "step": 895 }, { "epoch": 0.04194511089004623, "grad_norm": 1.3046875, "learning_rate": 0.0001998662883838322, "loss": 4.8389, "step": 896 }, { "epoch": 0.04199192462987887, "grad_norm": 1.421875, "learning_rate": 0.00019986598644887947, "loss": 4.634, "step": 897 }, { "epoch": 0.04203873836971151, "grad_norm": 1.328125, "learning_rate": 0.0001998656841736392, "loss": 4.3696, "step": 898 }, { "epoch": 0.04208555210954415, "grad_norm": 1.234375, "learning_rate": 0.00019986538155811236, "loss": 4.3278, "step": 899 }, { "epoch": 0.04213236584937679, "grad_norm": 1.5234375, "learning_rate": 0.00019986507860230001, "loss": 4.8756, "step": 900 }, { "epoch": 0.042179179589209435, "grad_norm": 1.3203125, "learning_rate": 0.00019986477530620315, "loss": 4.5176, "step": 901 }, { "epoch": 0.042225993329042075, "grad_norm": 1.390625, "learning_rate": 0.0001998644716698229, "loss": 4.5216, "step": 902 }, { "epoch": 0.042272807068874714, "grad_norm": 1.5703125, "learning_rate": 0.00019986416769316018, "loss": 4.4821, "step": 903 }, { "epoch": 0.04231962080870735, "grad_norm": 1.5, "learning_rate": 0.0001998638633762161, "loss": 4.6752, "step": 904 }, { "epoch": 0.04236643454854, "grad_norm": 1.6640625, "learning_rate": 0.0001998635587189917, "loss": 4.5762, "step": 905 }, { "epoch": 0.04241324828837264, "grad_norm": 1.5390625, "learning_rate": 0.00019986325372148796, "loss": 4.6562, "step": 906 }, { "epoch": 0.04246006202820528, "grad_norm": 1.734375, "learning_rate": 0.000199862948383706, "loss": 4.8389, "step": 907 }, { "epoch": 0.04250687576803792, "grad_norm": 1.34375, "learning_rate": 0.00019986264270564676, "loss": 4.9152, "step": 908 }, { "epoch": 0.04255368950787056, "grad_norm": 1.6328125, "learning_rate": 0.00019986233668731136, "loss": 4.9042, "step": 909 }, { "epoch": 0.0426005032477032, "grad_norm": 1.2890625, "learning_rate": 0.00019986203032870085, "loss": 4.6545, "step": 910 }, { "epoch": 0.04264731698753584, "grad_norm": 1.5, "learning_rate": 0.0001998617236298162, "loss": 5.129, "step": 911 }, { "epoch": 0.04269413072736848, "grad_norm": 1.5234375, "learning_rate": 0.00019986141659065852, "loss": 4.6831, "step": 912 }, { "epoch": 0.04274094446720112, "grad_norm": 1.3828125, "learning_rate": 0.00019986110921122884, "loss": 4.5739, "step": 913 }, { "epoch": 0.04278775820703377, "grad_norm": 1.90625, "learning_rate": 0.0001998608014915282, "loss": 4.6836, "step": 914 }, { "epoch": 0.04283457194686641, "grad_norm": 1.59375, "learning_rate": 0.00019986049343155763, "loss": 5.0846, "step": 915 }, { "epoch": 0.042881385686699046, "grad_norm": 1.2890625, "learning_rate": 0.00019986018503131823, "loss": 4.4711, "step": 916 }, { "epoch": 0.042928199426531685, "grad_norm": 1.4140625, "learning_rate": 0.000199859876290811, "loss": 4.3599, "step": 917 }, { "epoch": 0.042975013166364325, "grad_norm": 1.796875, "learning_rate": 0.00019985956721003703, "loss": 4.3154, "step": 918 }, { "epoch": 0.04302182690619697, "grad_norm": 1.59375, "learning_rate": 0.00019985925778899735, "loss": 4.994, "step": 919 }, { "epoch": 0.04306864064602961, "grad_norm": 1.71875, "learning_rate": 0.00019985894802769302, "loss": 4.6047, "step": 920 }, { "epoch": 0.04311545438586225, "grad_norm": 1.4296875, "learning_rate": 0.0001998586379261251, "loss": 4.7863, "step": 921 }, { "epoch": 0.04316226812569489, "grad_norm": 1.1875, "learning_rate": 0.00019985832748429462, "loss": 4.6881, "step": 922 }, { "epoch": 0.043209081865527535, "grad_norm": 1.28125, "learning_rate": 0.0001998580167022027, "loss": 4.647, "step": 923 }, { "epoch": 0.043255895605360174, "grad_norm": 1.2265625, "learning_rate": 0.00019985770557985033, "loss": 4.3042, "step": 924 }, { "epoch": 0.043302709345192814, "grad_norm": 1.6953125, "learning_rate": 0.00019985739411723864, "loss": 5.3789, "step": 925 }, { "epoch": 0.04334952308502545, "grad_norm": 1.2734375, "learning_rate": 0.00019985708231436863, "loss": 4.5278, "step": 926 }, { "epoch": 0.0433963368248581, "grad_norm": 1.671875, "learning_rate": 0.00019985677017124136, "loss": 4.6452, "step": 927 }, { "epoch": 0.04344315056469074, "grad_norm": 1.5703125, "learning_rate": 0.00019985645768785793, "loss": 4.6879, "step": 928 }, { "epoch": 0.04348996430452338, "grad_norm": 1.609375, "learning_rate": 0.0001998561448642194, "loss": 4.672, "step": 929 }, { "epoch": 0.04353677804435602, "grad_norm": 1.5703125, "learning_rate": 0.00019985583170032683, "loss": 3.6971, "step": 930 }, { "epoch": 0.043583591784188656, "grad_norm": 1.90625, "learning_rate": 0.00019985551819618128, "loss": 4.5407, "step": 931 }, { "epoch": 0.0436304055240213, "grad_norm": 1.3046875, "learning_rate": 0.00019985520435178382, "loss": 4.6569, "step": 932 }, { "epoch": 0.04367721926385394, "grad_norm": 2.015625, "learning_rate": 0.00019985489016713555, "loss": 4.1209, "step": 933 }, { "epoch": 0.04372403300368658, "grad_norm": 1.8828125, "learning_rate": 0.00019985457564223745, "loss": 6.0386, "step": 934 }, { "epoch": 0.04377084674351922, "grad_norm": 1.4765625, "learning_rate": 0.00019985426077709072, "loss": 4.7749, "step": 935 }, { "epoch": 0.04381766048335187, "grad_norm": 1.6015625, "learning_rate": 0.00019985394557169635, "loss": 4.9989, "step": 936 }, { "epoch": 0.043864474223184506, "grad_norm": 1.0234375, "learning_rate": 0.00019985363002605544, "loss": 6.7876, "step": 937 }, { "epoch": 0.043911287963017145, "grad_norm": 1.1640625, "learning_rate": 0.000199853314140169, "loss": 4.4126, "step": 938 }, { "epoch": 0.043958101702849785, "grad_norm": 1.6796875, "learning_rate": 0.00019985299791403825, "loss": 5.0916, "step": 939 }, { "epoch": 0.044004915442682424, "grad_norm": 1.7578125, "learning_rate": 0.00019985268134766412, "loss": 4.7689, "step": 940 }, { "epoch": 0.04405172918251507, "grad_norm": 1.7265625, "learning_rate": 0.00019985236444104777, "loss": 4.7549, "step": 941 }, { "epoch": 0.04409854292234771, "grad_norm": 1.3515625, "learning_rate": 0.00019985204719419027, "loss": 4.7441, "step": 942 }, { "epoch": 0.04414535666218035, "grad_norm": 1.65625, "learning_rate": 0.00019985172960709267, "loss": 4.8585, "step": 943 }, { "epoch": 0.04419217040201299, "grad_norm": 1.2109375, "learning_rate": 0.0001998514116797561, "loss": 4.7233, "step": 944 }, { "epoch": 0.044238984141845634, "grad_norm": 1.328125, "learning_rate": 0.0001998510934121816, "loss": 4.138, "step": 945 }, { "epoch": 0.044285797881678274, "grad_norm": 1.4609375, "learning_rate": 0.00019985077480437027, "loss": 4.1307, "step": 946 }, { "epoch": 0.04433261162151091, "grad_norm": 1.25, "learning_rate": 0.0001998504558563232, "loss": 4.8172, "step": 947 }, { "epoch": 0.04437942536134355, "grad_norm": 1.8671875, "learning_rate": 0.00019985013656804147, "loss": 4.2123, "step": 948 }, { "epoch": 0.0444262391011762, "grad_norm": 1.4453125, "learning_rate": 0.0001998498169395262, "loss": 4.7561, "step": 949 }, { "epoch": 0.04447305284100884, "grad_norm": 1.828125, "learning_rate": 0.0001998494969707784, "loss": 4.918, "step": 950 }, { "epoch": 0.04451986658084148, "grad_norm": 1.4375, "learning_rate": 0.00019984917666179922, "loss": 4.2495, "step": 951 }, { "epoch": 0.04456668032067412, "grad_norm": 1.6953125, "learning_rate": 0.00019984885601258978, "loss": 4.4088, "step": 952 }, { "epoch": 0.044613494060506756, "grad_norm": 1.3203125, "learning_rate": 0.0001998485350231511, "loss": 4.4648, "step": 953 }, { "epoch": 0.0446603078003394, "grad_norm": 1.2890625, "learning_rate": 0.00019984821369348432, "loss": 4.6011, "step": 954 }, { "epoch": 0.04470712154017204, "grad_norm": 1.484375, "learning_rate": 0.00019984789202359053, "loss": 4.484, "step": 955 }, { "epoch": 0.04475393528000468, "grad_norm": 1.3203125, "learning_rate": 0.00019984757001347082, "loss": 4.4022, "step": 956 }, { "epoch": 0.04480074901983732, "grad_norm": 1.1953125, "learning_rate": 0.00019984724766312627, "loss": 4.5821, "step": 957 }, { "epoch": 0.044847562759669966, "grad_norm": 1.203125, "learning_rate": 0.000199846924972558, "loss": 5.6038, "step": 958 }, { "epoch": 0.044894376499502606, "grad_norm": 1.3671875, "learning_rate": 0.00019984660194176713, "loss": 6.3338, "step": 959 }, { "epoch": 0.044941190239335245, "grad_norm": 1.3515625, "learning_rate": 0.00019984627857075468, "loss": 4.547, "step": 960 }, { "epoch": 0.044988003979167884, "grad_norm": 1.3828125, "learning_rate": 0.00019984595485952186, "loss": 4.4812, "step": 961 }, { "epoch": 0.045034817719000524, "grad_norm": 1.3515625, "learning_rate": 0.00019984563080806971, "loss": 4.5458, "step": 962 }, { "epoch": 0.04508163145883317, "grad_norm": 1.5546875, "learning_rate": 0.00019984530641639932, "loss": 4.4752, "step": 963 }, { "epoch": 0.04512844519866581, "grad_norm": 1.3046875, "learning_rate": 0.00019984498168451183, "loss": 4.4879, "step": 964 }, { "epoch": 0.04517525893849845, "grad_norm": 1.8515625, "learning_rate": 0.00019984465661240835, "loss": 4.4885, "step": 965 }, { "epoch": 0.04522207267833109, "grad_norm": 1.3828125, "learning_rate": 0.00019984433120008997, "loss": 4.963, "step": 966 }, { "epoch": 0.045268886418163734, "grad_norm": 1.3515625, "learning_rate": 0.0001998440054475578, "loss": 4.6192, "step": 967 }, { "epoch": 0.04531570015799637, "grad_norm": 1.4375, "learning_rate": 0.00019984367935481294, "loss": 4.6093, "step": 968 }, { "epoch": 0.04536251389782901, "grad_norm": 4.25, "learning_rate": 0.0001998433529218565, "loss": 3.8403, "step": 969 }, { "epoch": 0.04540932763766165, "grad_norm": 1.4296875, "learning_rate": 0.00019984302614868966, "loss": 4.5532, "step": 970 }, { "epoch": 0.04545614137749429, "grad_norm": 1.2890625, "learning_rate": 0.00019984269903531345, "loss": 4.85, "step": 971 }, { "epoch": 0.04550295511732694, "grad_norm": 1.6796875, "learning_rate": 0.00019984237158172902, "loss": 3.8825, "step": 972 }, { "epoch": 0.04554976885715958, "grad_norm": 1.4296875, "learning_rate": 0.00019984204378793746, "loss": 4.5397, "step": 973 }, { "epoch": 0.045596582596992216, "grad_norm": 1.421875, "learning_rate": 0.00019984171565393992, "loss": 4.6376, "step": 974 }, { "epoch": 0.045643396336824855, "grad_norm": 1.390625, "learning_rate": 0.00019984138717973752, "loss": 4.5807, "step": 975 }, { "epoch": 0.0456902100766575, "grad_norm": 1.2734375, "learning_rate": 0.00019984105836533137, "loss": 4.7224, "step": 976 }, { "epoch": 0.04573702381649014, "grad_norm": 1.1953125, "learning_rate": 0.00019984072921072255, "loss": 4.454, "step": 977 }, { "epoch": 0.04578383755632278, "grad_norm": 1.34375, "learning_rate": 0.0001998403997159122, "loss": 4.5439, "step": 978 }, { "epoch": 0.04583065129615542, "grad_norm": 1.6953125, "learning_rate": 0.0001998400698809015, "loss": 5.163, "step": 979 }, { "epoch": 0.045877465035988066, "grad_norm": 1.3515625, "learning_rate": 0.00019983973970569152, "loss": 4.8874, "step": 980 }, { "epoch": 0.045924278775820705, "grad_norm": 1.3125, "learning_rate": 0.0001998394091902834, "loss": 4.6408, "step": 981 }, { "epoch": 0.045971092515653345, "grad_norm": 1.265625, "learning_rate": 0.00019983907833467828, "loss": 4.42, "step": 982 }, { "epoch": 0.046017906255485984, "grad_norm": 1.609375, "learning_rate": 0.00019983874713887728, "loss": 4.648, "step": 983 }, { "epoch": 0.04606471999531862, "grad_norm": 1.34375, "learning_rate": 0.0001998384156028815, "loss": 4.602, "step": 984 }, { "epoch": 0.04611153373515127, "grad_norm": 1.3515625, "learning_rate": 0.0001998380837266921, "loss": 4.6814, "step": 985 }, { "epoch": 0.04615834747498391, "grad_norm": 1.5078125, "learning_rate": 0.00019983775151031016, "loss": 5.0391, "step": 986 }, { "epoch": 0.04620516121481655, "grad_norm": 1.3984375, "learning_rate": 0.00019983741895373687, "loss": 4.905, "step": 987 }, { "epoch": 0.04625197495464919, "grad_norm": 1.328125, "learning_rate": 0.0001998370860569734, "loss": 4.3872, "step": 988 }, { "epoch": 0.046298788694481834, "grad_norm": 1.5703125, "learning_rate": 0.00019983675282002073, "loss": 4.371, "step": 989 }, { "epoch": 0.04634560243431447, "grad_norm": 1.328125, "learning_rate": 0.00019983641924288016, "loss": 4.7827, "step": 990 }, { "epoch": 0.04639241617414711, "grad_norm": 1.2109375, "learning_rate": 0.00019983608532555275, "loss": 4.2665, "step": 991 }, { "epoch": 0.04643922991397975, "grad_norm": 1.53125, "learning_rate": 0.00019983575106803967, "loss": 4.7434, "step": 992 }, { "epoch": 0.04648604365381239, "grad_norm": 1.375, "learning_rate": 0.00019983541647034203, "loss": 4.2945, "step": 993 }, { "epoch": 0.04653285739364504, "grad_norm": 1.4375, "learning_rate": 0.00019983508153246095, "loss": 4.6578, "step": 994 }, { "epoch": 0.046579671133477676, "grad_norm": 1.3671875, "learning_rate": 0.00019983474625439763, "loss": 4.5706, "step": 995 }, { "epoch": 0.046626484873310316, "grad_norm": 1.59375, "learning_rate": 0.00019983441063615317, "loss": 4.6284, "step": 996 }, { "epoch": 0.046673298613142955, "grad_norm": 1.4921875, "learning_rate": 0.00019983407467772873, "loss": 4.4343, "step": 997 }, { "epoch": 0.0467201123529756, "grad_norm": 2.796875, "learning_rate": 0.00019983373837912546, "loss": 3.8477, "step": 998 }, { "epoch": 0.04676692609280824, "grad_norm": 1.4296875, "learning_rate": 0.00019983340174034448, "loss": 4.8878, "step": 999 }, { "epoch": 0.04681373983264088, "grad_norm": 1.4765625, "learning_rate": 0.000199833064761387, "loss": 4.2363, "step": 1000 }, { "epoch": 0.04686055357247352, "grad_norm": 1.6171875, "learning_rate": 0.00019983272744225408, "loss": 4.3054, "step": 1001 }, { "epoch": 0.04690736731230616, "grad_norm": 1.3515625, "learning_rate": 0.0001998323897829469, "loss": 4.721, "step": 1002 }, { "epoch": 0.046954181052138805, "grad_norm": 1.8671875, "learning_rate": 0.00019983205178346664, "loss": 4.5095, "step": 1003 }, { "epoch": 0.047000994791971444, "grad_norm": 2.1875, "learning_rate": 0.00019983171344381445, "loss": 4.9735, "step": 1004 }, { "epoch": 0.04704780853180408, "grad_norm": 1.421875, "learning_rate": 0.00019983137476399146, "loss": 4.7178, "step": 1005 }, { "epoch": 0.04709462227163672, "grad_norm": 1.2578125, "learning_rate": 0.00019983103574399883, "loss": 4.5702, "step": 1006 }, { "epoch": 0.04714143601146937, "grad_norm": 1.53125, "learning_rate": 0.00019983069638383768, "loss": 4.2608, "step": 1007 }, { "epoch": 0.04718824975130201, "grad_norm": 1.5703125, "learning_rate": 0.00019983035668350924, "loss": 4.4859, "step": 1008 }, { "epoch": 0.04723506349113465, "grad_norm": 2.03125, "learning_rate": 0.00019983001664301462, "loss": 5.0905, "step": 1009 }, { "epoch": 0.04728187723096729, "grad_norm": 1.5390625, "learning_rate": 0.000199829676262355, "loss": 4.3662, "step": 1010 }, { "epoch": 0.04732869097079993, "grad_norm": 12.6875, "learning_rate": 0.00019982933554153154, "loss": 4.8988, "step": 1011 }, { "epoch": 0.04737550471063257, "grad_norm": 1.6640625, "learning_rate": 0.00019982899448054534, "loss": 4.8519, "step": 1012 }, { "epoch": 0.04742231845046521, "grad_norm": 1.515625, "learning_rate": 0.00019982865307939767, "loss": 4.2821, "step": 1013 }, { "epoch": 0.04746913219029785, "grad_norm": 1.171875, "learning_rate": 0.0001998283113380896, "loss": 4.5426, "step": 1014 }, { "epoch": 0.04751594593013049, "grad_norm": 1.7578125, "learning_rate": 0.00019982796925662233, "loss": 4.8208, "step": 1015 }, { "epoch": 0.04756275966996314, "grad_norm": 1.7734375, "learning_rate": 0.00019982762683499702, "loss": 4.9794, "step": 1016 }, { "epoch": 0.047609573409795776, "grad_norm": 1.5546875, "learning_rate": 0.00019982728407321484, "loss": 4.3352, "step": 1017 }, { "epoch": 0.047656387149628415, "grad_norm": 1.296875, "learning_rate": 0.00019982694097127698, "loss": 5.0963, "step": 1018 }, { "epoch": 0.047703200889461055, "grad_norm": 1.2421875, "learning_rate": 0.00019982659752918461, "loss": 4.6286, "step": 1019 }, { "epoch": 0.0477500146292937, "grad_norm": 1.609375, "learning_rate": 0.0001998262537469388, "loss": 4.5979, "step": 1020 }, { "epoch": 0.04779682836912634, "grad_norm": 1.4609375, "learning_rate": 0.00019982590962454088, "loss": 4.742, "step": 1021 }, { "epoch": 0.04784364210895898, "grad_norm": 1.7421875, "learning_rate": 0.0001998255651619919, "loss": 4.5855, "step": 1022 }, { "epoch": 0.04789045584879162, "grad_norm": 1.390625, "learning_rate": 0.0001998252203592931, "loss": 4.6202, "step": 1023 }, { "epoch": 0.04793726958862426, "grad_norm": 1.6875, "learning_rate": 0.00019982487521644565, "loss": 4.4254, "step": 1024 }, { "epoch": 0.047984083328456904, "grad_norm": 1.421875, "learning_rate": 0.00019982452973345068, "loss": 4.5054, "step": 1025 }, { "epoch": 0.048030897068289544, "grad_norm": 1.4765625, "learning_rate": 0.0001998241839103094, "loss": 4.6786, "step": 1026 }, { "epoch": 0.04807771080812218, "grad_norm": 1.5, "learning_rate": 0.00019982383774702294, "loss": 4.3143, "step": 1027 }, { "epoch": 0.04812452454795482, "grad_norm": 1.3984375, "learning_rate": 0.00019982349124359256, "loss": 4.7215, "step": 1028 }, { "epoch": 0.04817133828778747, "grad_norm": 1.4453125, "learning_rate": 0.0001998231444000194, "loss": 4.3089, "step": 1029 }, { "epoch": 0.04821815202762011, "grad_norm": 0.94921875, "learning_rate": 0.0001998227972163047, "loss": 3.5137, "step": 1030 }, { "epoch": 0.04826496576745275, "grad_norm": 1.640625, "learning_rate": 0.0001998224496924495, "loss": 4.8095, "step": 1031 }, { "epoch": 0.048311779507285386, "grad_norm": 1.5078125, "learning_rate": 0.00019982210182845514, "loss": 4.4471, "step": 1032 }, { "epoch": 0.048358593247118026, "grad_norm": 1.859375, "learning_rate": 0.0001998217536243227, "loss": 4.9278, "step": 1033 }, { "epoch": 0.04840540698695067, "grad_norm": 1.4453125, "learning_rate": 0.00019982140508005341, "loss": 4.5208, "step": 1034 }, { "epoch": 0.04845222072678331, "grad_norm": 1.4375, "learning_rate": 0.00019982105619564846, "loss": 4.4985, "step": 1035 }, { "epoch": 0.04849903446661595, "grad_norm": 1.640625, "learning_rate": 0.000199820706971109, "loss": 4.4892, "step": 1036 }, { "epoch": 0.04854584820644859, "grad_norm": 2.015625, "learning_rate": 0.0001998203574064363, "loss": 4.3691, "step": 1037 }, { "epoch": 0.048592661946281236, "grad_norm": 1.671875, "learning_rate": 0.00019982000750163147, "loss": 4.6198, "step": 1038 }, { "epoch": 0.048639475686113876, "grad_norm": 1.578125, "learning_rate": 0.00019981965725669577, "loss": 4.6617, "step": 1039 }, { "epoch": 0.048686289425946515, "grad_norm": 1.515625, "learning_rate": 0.00019981930667163034, "loss": 4.4368, "step": 1040 }, { "epoch": 0.048733103165779154, "grad_norm": 1.34375, "learning_rate": 0.00019981895574643636, "loss": 4.4055, "step": 1041 }, { "epoch": 0.0487799169056118, "grad_norm": 1.375, "learning_rate": 0.00019981860448111508, "loss": 4.5534, "step": 1042 }, { "epoch": 0.04882673064544444, "grad_norm": 1.3828125, "learning_rate": 0.00019981825287566772, "loss": 4.3428, "step": 1043 }, { "epoch": 0.04887354438527708, "grad_norm": 1.640625, "learning_rate": 0.0001998179009300954, "loss": 4.5028, "step": 1044 }, { "epoch": 0.04892035812510972, "grad_norm": 1.578125, "learning_rate": 0.0001998175486443993, "loss": 4.7842, "step": 1045 }, { "epoch": 0.04896717186494236, "grad_norm": 1.5234375, "learning_rate": 0.00019981719601858074, "loss": 4.7399, "step": 1046 }, { "epoch": 0.049013985604775004, "grad_norm": 1.1328125, "learning_rate": 0.0001998168430526408, "loss": 4.6428, "step": 1047 }, { "epoch": 0.04906079934460764, "grad_norm": 1.5390625, "learning_rate": 0.0001998164897465808, "loss": 4.6673, "step": 1048 }, { "epoch": 0.04910761308444028, "grad_norm": 1.6953125, "learning_rate": 0.00019981613610040185, "loss": 4.3171, "step": 1049 }, { "epoch": 0.04915442682427292, "grad_norm": 1.3359375, "learning_rate": 0.00019981578211410517, "loss": 4.0785, "step": 1050 }, { "epoch": 0.04920124056410557, "grad_norm": 1.3671875, "learning_rate": 0.000199815427787692, "loss": 4.6596, "step": 1051 }, { "epoch": 0.04924805430393821, "grad_norm": 1.3125, "learning_rate": 0.00019981507312116353, "loss": 4.4438, "step": 1052 }, { "epoch": 0.04929486804377085, "grad_norm": 1.4140625, "learning_rate": 0.00019981471811452095, "loss": 4.6984, "step": 1053 }, { "epoch": 0.049341681783603486, "grad_norm": 1.3203125, "learning_rate": 0.00019981436276776552, "loss": 4.5526, "step": 1054 }, { "epoch": 0.049388495523436125, "grad_norm": 1.6015625, "learning_rate": 0.0001998140070808984, "loss": 4.5915, "step": 1055 }, { "epoch": 0.04943530926326877, "grad_norm": 1.34375, "learning_rate": 0.0001998136510539208, "loss": 4.5895, "step": 1056 }, { "epoch": 0.04948212300310141, "grad_norm": 1.515625, "learning_rate": 0.00019981329468683398, "loss": 4.4976, "step": 1057 }, { "epoch": 0.04952893674293405, "grad_norm": 1.2890625, "learning_rate": 0.00019981293797963914, "loss": 4.7046, "step": 1058 }, { "epoch": 0.04957575048276669, "grad_norm": 1.4921875, "learning_rate": 0.00019981258093233745, "loss": 4.7181, "step": 1059 }, { "epoch": 0.049622564222599336, "grad_norm": 1.6640625, "learning_rate": 0.00019981222354493018, "loss": 4.775, "step": 1060 }, { "epoch": 0.049669377962431975, "grad_norm": 1.609375, "learning_rate": 0.00019981186581741854, "loss": 4.7097, "step": 1061 }, { "epoch": 0.049716191702264614, "grad_norm": 2.234375, "learning_rate": 0.0001998115077498037, "loss": 4.8148, "step": 1062 }, { "epoch": 0.049763005442097254, "grad_norm": 1.59375, "learning_rate": 0.00019981114934208694, "loss": 4.5235, "step": 1063 }, { "epoch": 0.04980981918192989, "grad_norm": 1.6015625, "learning_rate": 0.00019981079059426944, "loss": 4.8127, "step": 1064 }, { "epoch": 0.04985663292176254, "grad_norm": 1.3203125, "learning_rate": 0.00019981043150635249, "loss": 4.8259, "step": 1065 }, { "epoch": 0.04990344666159518, "grad_norm": 1.4296875, "learning_rate": 0.00019981007207833722, "loss": 4.5481, "step": 1066 }, { "epoch": 0.04995026040142782, "grad_norm": 1.453125, "learning_rate": 0.0001998097123102249, "loss": 4.6965, "step": 1067 }, { "epoch": 0.04999707414126046, "grad_norm": 1.34375, "learning_rate": 0.00019980935220201675, "loss": 4.4313, "step": 1068 }, { "epoch": 0.0500438878810931, "grad_norm": 1.3125, "learning_rate": 0.000199808991753714, "loss": 4.813, "step": 1069 }, { "epoch": 0.05009070162092574, "grad_norm": 1.2578125, "learning_rate": 0.0001998086309653179, "loss": 4.5004, "step": 1070 }, { "epoch": 0.05013751536075838, "grad_norm": 1.5234375, "learning_rate": 0.0001998082698368297, "loss": 4.5482, "step": 1071 }, { "epoch": 0.05018432910059102, "grad_norm": 1.5859375, "learning_rate": 0.00019980790836825053, "loss": 4.5339, "step": 1072 }, { "epoch": 0.05023114284042367, "grad_norm": 2.15625, "learning_rate": 0.00019980754655958167, "loss": 4.116, "step": 1073 }, { "epoch": 0.05027795658025631, "grad_norm": 1.78125, "learning_rate": 0.00019980718441082437, "loss": 4.6404, "step": 1074 }, { "epoch": 0.050324770320088946, "grad_norm": 1.8125, "learning_rate": 0.00019980682192197988, "loss": 4.8031, "step": 1075 }, { "epoch": 0.050371584059921586, "grad_norm": 1.3515625, "learning_rate": 0.00019980645909304943, "loss": 4.2362, "step": 1076 }, { "epoch": 0.050418397799754225, "grad_norm": 2.453125, "learning_rate": 0.0001998060959240342, "loss": 4.4638, "step": 1077 }, { "epoch": 0.05046521153958687, "grad_norm": 1.5625, "learning_rate": 0.0001998057324149355, "loss": 5.023, "step": 1078 }, { "epoch": 0.05051202527941951, "grad_norm": 1.5078125, "learning_rate": 0.00019980536856575452, "loss": 4.7134, "step": 1079 }, { "epoch": 0.05055883901925215, "grad_norm": 1.5859375, "learning_rate": 0.00019980500437649252, "loss": 4.6968, "step": 1080 }, { "epoch": 0.05060565275908479, "grad_norm": 1.6875, "learning_rate": 0.00019980463984715075, "loss": 4.3159, "step": 1081 }, { "epoch": 0.050652466498917435, "grad_norm": 1.1640625, "learning_rate": 0.00019980427497773043, "loss": 4.7636, "step": 1082 }, { "epoch": 0.050699280238750075, "grad_norm": 1.1796875, "learning_rate": 0.0001998039097682328, "loss": 4.7002, "step": 1083 }, { "epoch": 0.050746093978582714, "grad_norm": 1.484375, "learning_rate": 0.00019980354421865912, "loss": 4.6075, "step": 1084 }, { "epoch": 0.05079290771841535, "grad_norm": 1.125, "learning_rate": 0.00019980317832901063, "loss": 6.6866, "step": 1085 }, { "epoch": 0.05083972145824799, "grad_norm": 1.3359375, "learning_rate": 0.00019980281209928857, "loss": 4.4522, "step": 1086 }, { "epoch": 0.05088653519808064, "grad_norm": 1.515625, "learning_rate": 0.00019980244552949422, "loss": 4.5856, "step": 1087 }, { "epoch": 0.05093334893791328, "grad_norm": 1.3203125, "learning_rate": 0.00019980207861962877, "loss": 4.5774, "step": 1088 }, { "epoch": 0.05098016267774592, "grad_norm": 1.5234375, "learning_rate": 0.00019980171136969353, "loss": 5.0388, "step": 1089 }, { "epoch": 0.05102697641757856, "grad_norm": 1.328125, "learning_rate": 0.00019980134377968974, "loss": 4.5975, "step": 1090 }, { "epoch": 0.0510737901574112, "grad_norm": 1.578125, "learning_rate": 0.00019980097584961865, "loss": 4.5601, "step": 1091 }, { "epoch": 0.05112060389724384, "grad_norm": 1.2734375, "learning_rate": 0.00019980060757948147, "loss": 3.9614, "step": 1092 }, { "epoch": 0.05116741763707648, "grad_norm": 1.5234375, "learning_rate": 0.0001998002389692795, "loss": 4.5646, "step": 1093 }, { "epoch": 0.05121423137690912, "grad_norm": 1.2578125, "learning_rate": 0.00019979987001901396, "loss": 4.3984, "step": 1094 }, { "epoch": 0.05126104511674177, "grad_norm": 1.3828125, "learning_rate": 0.00019979950072868618, "loss": 4.5341, "step": 1095 }, { "epoch": 0.051307858856574406, "grad_norm": 1.453125, "learning_rate": 0.00019979913109829735, "loss": 4.6495, "step": 1096 }, { "epoch": 0.051354672596407046, "grad_norm": 1.8125, "learning_rate": 0.0001997987611278487, "loss": 4.4311, "step": 1097 }, { "epoch": 0.051401486336239685, "grad_norm": 1.2734375, "learning_rate": 0.00019979839081734162, "loss": 4.4643, "step": 1098 }, { "epoch": 0.051448300076072324, "grad_norm": 1.5, "learning_rate": 0.00019979802016677724, "loss": 4.4021, "step": 1099 }, { "epoch": 0.05149511381590497, "grad_norm": 1.359375, "learning_rate": 0.0001997976491761569, "loss": 4.618, "step": 1100 }, { "epoch": 0.05154192755573761, "grad_norm": 1.2578125, "learning_rate": 0.00019979727784548182, "loss": 4.3402, "step": 1101 }, { "epoch": 0.05158874129557025, "grad_norm": 1.8671875, "learning_rate": 0.00019979690617475334, "loss": 4.3651, "step": 1102 }, { "epoch": 0.05163555503540289, "grad_norm": 1.375, "learning_rate": 0.00019979653416397263, "loss": 4.8042, "step": 1103 }, { "epoch": 0.051682368775235535, "grad_norm": 1.65625, "learning_rate": 0.00019979616181314098, "loss": 4.4716, "step": 1104 }, { "epoch": 0.051729182515068174, "grad_norm": 1.6640625, "learning_rate": 0.00019979578912225968, "loss": 4.5524, "step": 1105 }, { "epoch": 0.051775996254900813, "grad_norm": 1.8828125, "learning_rate": 0.00019979541609133002, "loss": 4.8036, "step": 1106 }, { "epoch": 0.05182280999473345, "grad_norm": 1.1484375, "learning_rate": 0.00019979504272035322, "loss": 7.3304, "step": 1107 }, { "epoch": 0.05186962373456609, "grad_norm": 1.1328125, "learning_rate": 0.0001997946690093306, "loss": 4.5835, "step": 1108 }, { "epoch": 0.05191643747439874, "grad_norm": 1.84375, "learning_rate": 0.0001997942949582634, "loss": 4.4781, "step": 1109 }, { "epoch": 0.05196325121423138, "grad_norm": 1.1953125, "learning_rate": 0.00019979392056715295, "loss": 4.2529, "step": 1110 }, { "epoch": 0.05201006495406402, "grad_norm": 1.203125, "learning_rate": 0.00019979354583600042, "loss": 3.5285, "step": 1111 }, { "epoch": 0.052056878693896656, "grad_norm": 1.1484375, "learning_rate": 0.00019979317076480722, "loss": 6.077, "step": 1112 }, { "epoch": 0.0521036924337293, "grad_norm": 1.3671875, "learning_rate": 0.0001997927953535745, "loss": 4.5559, "step": 1113 }, { "epoch": 0.05215050617356194, "grad_norm": 1.1484375, "learning_rate": 0.00019979241960230363, "loss": 4.667, "step": 1114 }, { "epoch": 0.05219731991339458, "grad_norm": 2.5625, "learning_rate": 0.00019979204351099585, "loss": 4.5503, "step": 1115 }, { "epoch": 0.05224413365322722, "grad_norm": 1.3046875, "learning_rate": 0.00019979166707965245, "loss": 4.667, "step": 1116 }, { "epoch": 0.05229094739305986, "grad_norm": 1.6171875, "learning_rate": 0.00019979129030827473, "loss": 4.7207, "step": 1117 }, { "epoch": 0.052337761132892506, "grad_norm": 1.234375, "learning_rate": 0.00019979091319686392, "loss": 4.4253, "step": 1118 }, { "epoch": 0.052384574872725145, "grad_norm": 1.4453125, "learning_rate": 0.00019979053574542137, "loss": 4.5771, "step": 1119 }, { "epoch": 0.052431388612557785, "grad_norm": 1.1171875, "learning_rate": 0.00019979015795394832, "loss": 4.7428, "step": 1120 }, { "epoch": 0.052478202352390424, "grad_norm": 1.6015625, "learning_rate": 0.0001997897798224461, "loss": 4.4423, "step": 1121 }, { "epoch": 0.05252501609222307, "grad_norm": 1.3515625, "learning_rate": 0.00019978940135091593, "loss": 4.5562, "step": 1122 }, { "epoch": 0.05257182983205571, "grad_norm": 1.359375, "learning_rate": 0.00019978902253935918, "loss": 4.287, "step": 1123 }, { "epoch": 0.05261864357188835, "grad_norm": 1.4296875, "learning_rate": 0.0001997886433877771, "loss": 4.7804, "step": 1124 }, { "epoch": 0.05266545731172099, "grad_norm": 1.6328125, "learning_rate": 0.00019978826389617095, "loss": 4.5797, "step": 1125 }, { "epoch": 0.052712271051553634, "grad_norm": 2.875, "learning_rate": 0.0001997878840645421, "loss": 4.5806, "step": 1126 }, { "epoch": 0.052759084791386274, "grad_norm": 1.296875, "learning_rate": 0.0001997875038928918, "loss": 4.2506, "step": 1127 }, { "epoch": 0.05280589853121891, "grad_norm": 1.234375, "learning_rate": 0.00019978712338122135, "loss": 4.5784, "step": 1128 }, { "epoch": 0.05285271227105155, "grad_norm": 1.4296875, "learning_rate": 0.00019978674252953203, "loss": 4.3331, "step": 1129 }, { "epoch": 0.05289952601088419, "grad_norm": 1.7578125, "learning_rate": 0.00019978636133782517, "loss": 4.2298, "step": 1130 }, { "epoch": 0.05294633975071684, "grad_norm": 1.3359375, "learning_rate": 0.00019978597980610202, "loss": 4.3805, "step": 1131 }, { "epoch": 0.05299315349054948, "grad_norm": 1.4765625, "learning_rate": 0.0001997855979343639, "loss": 4.4189, "step": 1132 }, { "epoch": 0.053039967230382117, "grad_norm": 1.7578125, "learning_rate": 0.0001997852157226122, "loss": 4.6598, "step": 1133 }, { "epoch": 0.053086780970214756, "grad_norm": 2.125, "learning_rate": 0.00019978483317084807, "loss": 4.4953, "step": 1134 }, { "epoch": 0.0531335947100474, "grad_norm": 1.1953125, "learning_rate": 0.00019978445027907292, "loss": 4.3335, "step": 1135 }, { "epoch": 0.05318040844988004, "grad_norm": 1.46875, "learning_rate": 0.00019978406704728802, "loss": 4.691, "step": 1136 }, { "epoch": 0.05322722218971268, "grad_norm": 1.6484375, "learning_rate": 0.0001997836834754947, "loss": 4.5826, "step": 1137 }, { "epoch": 0.05327403592954532, "grad_norm": 1.2578125, "learning_rate": 0.00019978329956369423, "loss": 4.4478, "step": 1138 }, { "epoch": 0.05332084966937796, "grad_norm": 1.3828125, "learning_rate": 0.00019978291531188793, "loss": 4.2634, "step": 1139 }, { "epoch": 0.053367663409210606, "grad_norm": 1.2890625, "learning_rate": 0.0001997825307200771, "loss": 4.6536, "step": 1140 }, { "epoch": 0.053414477149043245, "grad_norm": 1.421875, "learning_rate": 0.00019978214578826307, "loss": 4.3062, "step": 1141 }, { "epoch": 0.053461290888875884, "grad_norm": 1.390625, "learning_rate": 0.00019978176051644716, "loss": 4.3864, "step": 1142 }, { "epoch": 0.053508104628708524, "grad_norm": 1.5390625, "learning_rate": 0.00019978137490463067, "loss": 4.114, "step": 1143 }, { "epoch": 0.05355491836854117, "grad_norm": 1.65625, "learning_rate": 0.00019978098895281492, "loss": 4.036, "step": 1144 }, { "epoch": 0.05360173210837381, "grad_norm": 1.421875, "learning_rate": 0.00019978060266100117, "loss": 4.0456, "step": 1145 }, { "epoch": 0.05364854584820645, "grad_norm": 1.4921875, "learning_rate": 0.00019978021602919083, "loss": 4.2978, "step": 1146 }, { "epoch": 0.05369535958803909, "grad_norm": 1.3125, "learning_rate": 0.00019977982905738516, "loss": 4.6331, "step": 1147 }, { "epoch": 0.05374217332787173, "grad_norm": 1.4609375, "learning_rate": 0.0001997794417455855, "loss": 4.0521, "step": 1148 }, { "epoch": 0.05378898706770437, "grad_norm": 1.46875, "learning_rate": 0.00019977905409379316, "loss": 4.3761, "step": 1149 }, { "epoch": 0.05383580080753701, "grad_norm": 1.7109375, "learning_rate": 0.00019977866610200944, "loss": 5.0324, "step": 1150 }, { "epoch": 0.05388261454736965, "grad_norm": 1.53125, "learning_rate": 0.00019977827777023567, "loss": 4.4265, "step": 1151 }, { "epoch": 0.05392942828720229, "grad_norm": 1.8828125, "learning_rate": 0.00019977788909847322, "loss": 4.9626, "step": 1152 }, { "epoch": 0.05397624202703494, "grad_norm": 1.515625, "learning_rate": 0.00019977750008672338, "loss": 4.3189, "step": 1153 }, { "epoch": 0.05402305576686758, "grad_norm": 1.203125, "learning_rate": 0.00019977711073498747, "loss": 4.3869, "step": 1154 }, { "epoch": 0.054069869506700216, "grad_norm": 1.6796875, "learning_rate": 0.00019977672104326682, "loss": 4.7115, "step": 1155 }, { "epoch": 0.054116683246532855, "grad_norm": 1.828125, "learning_rate": 0.00019977633101156276, "loss": 4.7655, "step": 1156 }, { "epoch": 0.0541634969863655, "grad_norm": 1.734375, "learning_rate": 0.0001997759406398766, "loss": 4.7069, "step": 1157 }, { "epoch": 0.05421031072619814, "grad_norm": 1.5, "learning_rate": 0.00019977554992820973, "loss": 3.9359, "step": 1158 }, { "epoch": 0.05425712446603078, "grad_norm": 1.3828125, "learning_rate": 0.00019977515887656343, "loss": 4.5473, "step": 1159 }, { "epoch": 0.05430393820586342, "grad_norm": 1.71875, "learning_rate": 0.00019977476748493902, "loss": 4.824, "step": 1160 }, { "epoch": 0.05435075194569606, "grad_norm": 1.3359375, "learning_rate": 0.00019977437575333787, "loss": 4.4835, "step": 1161 }, { "epoch": 0.054397565685528705, "grad_norm": 1.3203125, "learning_rate": 0.00019977398368176128, "loss": 4.5989, "step": 1162 }, { "epoch": 0.054444379425361344, "grad_norm": 1.25, "learning_rate": 0.00019977359127021065, "loss": 4.3505, "step": 1163 }, { "epoch": 0.054491193165193984, "grad_norm": 1.3828125, "learning_rate": 0.00019977319851868723, "loss": 4.5688, "step": 1164 }, { "epoch": 0.05453800690502662, "grad_norm": 1.5078125, "learning_rate": 0.00019977280542719244, "loss": 4.4679, "step": 1165 }, { "epoch": 0.05458482064485927, "grad_norm": 1.546875, "learning_rate": 0.00019977241199572757, "loss": 4.8508, "step": 1166 }, { "epoch": 0.05463163438469191, "grad_norm": 1.34375, "learning_rate": 0.00019977201822429398, "loss": 4.4077, "step": 1167 }, { "epoch": 0.05467844812452455, "grad_norm": 1.296875, "learning_rate": 0.000199771624112893, "loss": 4.7961, "step": 1168 }, { "epoch": 0.05472526186435719, "grad_norm": 1.1953125, "learning_rate": 0.00019977122966152594, "loss": 4.5459, "step": 1169 }, { "epoch": 0.05477207560418983, "grad_norm": 1.53125, "learning_rate": 0.00019977083487019424, "loss": 3.9573, "step": 1170 }, { "epoch": 0.05481888934402247, "grad_norm": 1.3359375, "learning_rate": 0.00019977043973889915, "loss": 4.491, "step": 1171 }, { "epoch": 0.05486570308385511, "grad_norm": 1.109375, "learning_rate": 0.00019977004426764206, "loss": 4.5687, "step": 1172 }, { "epoch": 0.05491251682368775, "grad_norm": 1.4609375, "learning_rate": 0.00019976964845642433, "loss": 4.5016, "step": 1173 }, { "epoch": 0.05495933056352039, "grad_norm": 1.2265625, "learning_rate": 0.00019976925230524728, "loss": 4.399, "step": 1174 }, { "epoch": 0.05500614430335304, "grad_norm": 1.2734375, "learning_rate": 0.00019976885581411227, "loss": 4.2116, "step": 1175 }, { "epoch": 0.055052958043185676, "grad_norm": 1.3359375, "learning_rate": 0.00019976845898302066, "loss": 4.6402, "step": 1176 }, { "epoch": 0.055099771783018316, "grad_norm": 1.4609375, "learning_rate": 0.00019976806181197377, "loss": 4.488, "step": 1177 }, { "epoch": 0.055146585522850955, "grad_norm": 2.296875, "learning_rate": 0.00019976766430097298, "loss": 4.7537, "step": 1178 }, { "epoch": 0.055193399262683594, "grad_norm": 1.28125, "learning_rate": 0.00019976726645001965, "loss": 4.5711, "step": 1179 }, { "epoch": 0.05524021300251624, "grad_norm": 1.8125, "learning_rate": 0.00019976686825911512, "loss": 4.4971, "step": 1180 }, { "epoch": 0.05528702674234888, "grad_norm": 1.171875, "learning_rate": 0.00019976646972826078, "loss": 5.5976, "step": 1181 }, { "epoch": 0.05533384048218152, "grad_norm": 1.390625, "learning_rate": 0.00019976607085745793, "loss": 4.6036, "step": 1182 }, { "epoch": 0.05538065422201416, "grad_norm": 1.3125, "learning_rate": 0.00019976567164670797, "loss": 4.7965, "step": 1183 }, { "epoch": 0.055427467961846805, "grad_norm": 1.359375, "learning_rate": 0.00019976527209601225, "loss": 4.2146, "step": 1184 }, { "epoch": 0.055474281701679444, "grad_norm": 1.3984375, "learning_rate": 0.00019976487220537212, "loss": 4.4539, "step": 1185 }, { "epoch": 0.05552109544151208, "grad_norm": 1.625, "learning_rate": 0.00019976447197478898, "loss": 4.96, "step": 1186 }, { "epoch": 0.05556790918134472, "grad_norm": 1.1484375, "learning_rate": 0.00019976407140426415, "loss": 4.6782, "step": 1187 }, { "epoch": 0.05561472292117737, "grad_norm": 1.9609375, "learning_rate": 0.00019976367049379904, "loss": 4.8021, "step": 1188 }, { "epoch": 0.05566153666101001, "grad_norm": 1.5390625, "learning_rate": 0.00019976326924339496, "loss": 4.6504, "step": 1189 }, { "epoch": 0.05570835040084265, "grad_norm": 1.1796875, "learning_rate": 0.00019976286765305333, "loss": 4.5497, "step": 1190 }, { "epoch": 0.05575516414067529, "grad_norm": 1.3359375, "learning_rate": 0.00019976246572277545, "loss": 4.6512, "step": 1191 }, { "epoch": 0.055801977880507926, "grad_norm": 2.03125, "learning_rate": 0.00019976206345256276, "loss": 4.6638, "step": 1192 }, { "epoch": 0.05584879162034057, "grad_norm": 1.4140625, "learning_rate": 0.00019976166084241664, "loss": 4.4816, "step": 1193 }, { "epoch": 0.05589560536017321, "grad_norm": 1.6953125, "learning_rate": 0.00019976125789233839, "loss": 4.3003, "step": 1194 }, { "epoch": 0.05594241910000585, "grad_norm": 1.390625, "learning_rate": 0.00019976085460232944, "loss": 3.0717, "step": 1195 }, { "epoch": 0.05598923283983849, "grad_norm": 1.2890625, "learning_rate": 0.0001997604509723911, "loss": 4.641, "step": 1196 }, { "epoch": 0.05603604657967114, "grad_norm": 1.546875, "learning_rate": 0.00019976004700252484, "loss": 4.4435, "step": 1197 }, { "epoch": 0.056082860319503776, "grad_norm": 1.390625, "learning_rate": 0.00019975964269273194, "loss": 4.3395, "step": 1198 }, { "epoch": 0.056129674059336415, "grad_norm": 1.5703125, "learning_rate": 0.00019975923804301389, "loss": 4.3152, "step": 1199 }, { "epoch": 0.056176487799169054, "grad_norm": 1.46875, "learning_rate": 0.00019975883305337197, "loss": 4.5379, "step": 1200 }, { "epoch": 0.056223301539001694, "grad_norm": 1.40625, "learning_rate": 0.00019975842772380757, "loss": 4.4521, "step": 1201 }, { "epoch": 0.05627011527883434, "grad_norm": 1.6328125, "learning_rate": 0.00019975802205432212, "loss": 4.0957, "step": 1202 }, { "epoch": 0.05631692901866698, "grad_norm": 1.7421875, "learning_rate": 0.00019975761604491696, "loss": 3.8602, "step": 1203 }, { "epoch": 0.05636374275849962, "grad_norm": 1.2421875, "learning_rate": 0.0001997572096955935, "loss": 4.6358, "step": 1204 }, { "epoch": 0.05641055649833226, "grad_norm": 1.3828125, "learning_rate": 0.0001997568030063531, "loss": 4.3062, "step": 1205 }, { "epoch": 0.056457370238164904, "grad_norm": 1.4296875, "learning_rate": 0.00019975639597719717, "loss": 4.6625, "step": 1206 }, { "epoch": 0.056504183977997544, "grad_norm": 1.3203125, "learning_rate": 0.0001997559886081271, "loss": 4.1793, "step": 1207 }, { "epoch": 0.05655099771783018, "grad_norm": 1.140625, "learning_rate": 0.00019975558089914424, "loss": 4.2056, "step": 1208 }, { "epoch": 0.05659781145766282, "grad_norm": 1.3671875, "learning_rate": 0.00019975517285025004, "loss": 4.6937, "step": 1209 }, { "epoch": 0.05664462519749546, "grad_norm": 1.2109375, "learning_rate": 0.00019975476446144582, "loss": 4.3804, "step": 1210 }, { "epoch": 0.05669143893732811, "grad_norm": 1.4296875, "learning_rate": 0.00019975435573273302, "loss": 4.4065, "step": 1211 }, { "epoch": 0.05673825267716075, "grad_norm": 1.3828125, "learning_rate": 0.000199753946664113, "loss": 4.4436, "step": 1212 }, { "epoch": 0.056785066416993386, "grad_norm": 1.296875, "learning_rate": 0.0001997535372555872, "loss": 4.4513, "step": 1213 }, { "epoch": 0.056831880156826026, "grad_norm": 1.2109375, "learning_rate": 0.000199753127507157, "loss": 4.3368, "step": 1214 }, { "epoch": 0.05687869389665867, "grad_norm": 1.6328125, "learning_rate": 0.00019975271741882376, "loss": 4.849, "step": 1215 }, { "epoch": 0.05692550763649131, "grad_norm": 1.6484375, "learning_rate": 0.00019975230699058888, "loss": 4.0726, "step": 1216 }, { "epoch": 0.05697232137632395, "grad_norm": 1.3515625, "learning_rate": 0.00019975189622245383, "loss": 4.9176, "step": 1217 }, { "epoch": 0.05701913511615659, "grad_norm": 1.1796875, "learning_rate": 0.0001997514851144199, "loss": 4.2845, "step": 1218 }, { "epoch": 0.057065948855989236, "grad_norm": 1.3359375, "learning_rate": 0.0001997510736664886, "loss": 4.605, "step": 1219 }, { "epoch": 0.057112762595821875, "grad_norm": 2.140625, "learning_rate": 0.00019975066187866128, "loss": 4.4735, "step": 1220 }, { "epoch": 0.057159576335654515, "grad_norm": 1.3125, "learning_rate": 0.00019975024975093933, "loss": 4.0359, "step": 1221 }, { "epoch": 0.057206390075487154, "grad_norm": 1.78125, "learning_rate": 0.00019974983728332415, "loss": 5.1669, "step": 1222 }, { "epoch": 0.05725320381531979, "grad_norm": 1.5546875, "learning_rate": 0.0001997494244758172, "loss": 4.2296, "step": 1223 }, { "epoch": 0.05730001755515244, "grad_norm": 1.53125, "learning_rate": 0.00019974901132841983, "loss": 4.353, "step": 1224 }, { "epoch": 0.05734683129498508, "grad_norm": 1.9453125, "learning_rate": 0.00019974859784113348, "loss": 4.7037, "step": 1225 }, { "epoch": 0.05739364503481772, "grad_norm": 1.25, "learning_rate": 0.00019974818401395953, "loss": 4.2431, "step": 1226 }, { "epoch": 0.05744045877465036, "grad_norm": 1.171875, "learning_rate": 0.00019974776984689943, "loss": 4.1929, "step": 1227 }, { "epoch": 0.057487272514483004, "grad_norm": 1.21875, "learning_rate": 0.00019974735533995457, "loss": 4.3112, "step": 1228 }, { "epoch": 0.05753408625431564, "grad_norm": 1.4296875, "learning_rate": 0.00019974694049312635, "loss": 5.2464, "step": 1229 }, { "epoch": 0.05758089999414828, "grad_norm": 1.5859375, "learning_rate": 0.00019974652530641623, "loss": 4.1207, "step": 1230 }, { "epoch": 0.05762771373398092, "grad_norm": 1.3671875, "learning_rate": 0.00019974610977982552, "loss": 4.136, "step": 1231 }, { "epoch": 0.05767452747381356, "grad_norm": 1.2890625, "learning_rate": 0.0001997456939133558, "loss": 3.9523, "step": 1232 }, { "epoch": 0.05772134121364621, "grad_norm": 1.25, "learning_rate": 0.0001997452777070083, "loss": 4.5245, "step": 1233 }, { "epoch": 0.05776815495347885, "grad_norm": 1.3984375, "learning_rate": 0.0001997448611607846, "loss": 4.7061, "step": 1234 }, { "epoch": 0.057814968693311486, "grad_norm": 1.671875, "learning_rate": 0.00019974444427468601, "loss": 4.542, "step": 1235 }, { "epoch": 0.057861782433144125, "grad_norm": 1.5078125, "learning_rate": 0.00019974402704871403, "loss": 4.5721, "step": 1236 }, { "epoch": 0.05790859617297677, "grad_norm": 1.1875, "learning_rate": 0.00019974360948287002, "loss": 4.6069, "step": 1237 }, { "epoch": 0.05795540991280941, "grad_norm": 1.515625, "learning_rate": 0.00019974319157715543, "loss": 4.311, "step": 1238 }, { "epoch": 0.05800222365264205, "grad_norm": 1.1328125, "learning_rate": 0.00019974277333157167, "loss": 4.5311, "step": 1239 }, { "epoch": 0.05804903739247469, "grad_norm": 1.7109375, "learning_rate": 0.0001997423547461202, "loss": 4.3009, "step": 1240 }, { "epoch": 0.058095851132307336, "grad_norm": 1.4453125, "learning_rate": 0.00019974193582080239, "loss": 4.3817, "step": 1241 }, { "epoch": 0.058142664872139975, "grad_norm": 1.515625, "learning_rate": 0.0001997415165556197, "loss": 4.1151, "step": 1242 }, { "epoch": 0.058189478611972614, "grad_norm": 1.25, "learning_rate": 0.00019974109695057357, "loss": 4.677, "step": 1243 }, { "epoch": 0.058236292351805254, "grad_norm": 1.40625, "learning_rate": 0.00019974067700566543, "loss": 4.4379, "step": 1244 }, { "epoch": 0.05828310609163789, "grad_norm": 1.6953125, "learning_rate": 0.0001997402567208967, "loss": 4.5243, "step": 1245 }, { "epoch": 0.05832991983147054, "grad_norm": 2.59375, "learning_rate": 0.0001997398360962688, "loss": 4.671, "step": 1246 }, { "epoch": 0.05837673357130318, "grad_norm": 1.2578125, "learning_rate": 0.00019973941513178316, "loss": 4.0769, "step": 1247 }, { "epoch": 0.05842354731113582, "grad_norm": 1.4375, "learning_rate": 0.00019973899382744125, "loss": 4.4595, "step": 1248 }, { "epoch": 0.05847036105096846, "grad_norm": 1.3671875, "learning_rate": 0.00019973857218324445, "loss": 4.6475, "step": 1249 }, { "epoch": 0.0585171747908011, "grad_norm": 1.28125, "learning_rate": 0.00019973815019919426, "loss": 4.5695, "step": 1250 }, { "epoch": 0.05856398853063374, "grad_norm": 1.4453125, "learning_rate": 0.0001997377278752921, "loss": 4.1085, "step": 1251 }, { "epoch": 0.05861080227046638, "grad_norm": 1.4296875, "learning_rate": 0.00019973730521153935, "loss": 4.2131, "step": 1252 }, { "epoch": 0.05865761601029902, "grad_norm": 2.046875, "learning_rate": 0.00019973688220793753, "loss": 5.2137, "step": 1253 }, { "epoch": 0.05870442975013166, "grad_norm": 1.359375, "learning_rate": 0.00019973645886448807, "loss": 4.5941, "step": 1254 }, { "epoch": 0.05875124348996431, "grad_norm": 1.515625, "learning_rate": 0.00019973603518119235, "loss": 4.2945, "step": 1255 }, { "epoch": 0.058798057229796946, "grad_norm": 1.4140625, "learning_rate": 0.00019973561115805186, "loss": 4.5484, "step": 1256 }, { "epoch": 0.058844870969629585, "grad_norm": 1.296875, "learning_rate": 0.00019973518679506809, "loss": 4.1607, "step": 1257 }, { "epoch": 0.058891684709462225, "grad_norm": 1.6640625, "learning_rate": 0.0001997347620922424, "loss": 3.9675, "step": 1258 }, { "epoch": 0.05893849844929487, "grad_norm": 1.3671875, "learning_rate": 0.00019973433704957626, "loss": 4.0567, "step": 1259 }, { "epoch": 0.05898531218912751, "grad_norm": 1.546875, "learning_rate": 0.00019973391166707113, "loss": 4.1209, "step": 1260 }, { "epoch": 0.05903212592896015, "grad_norm": 1.203125, "learning_rate": 0.0001997334859447285, "loss": 4.5946, "step": 1261 }, { "epoch": 0.05907893966879279, "grad_norm": 1.5234375, "learning_rate": 0.00019973305988254974, "loss": 4.4807, "step": 1262 }, { "epoch": 0.05912575340862543, "grad_norm": 1.2578125, "learning_rate": 0.00019973263348053636, "loss": 4.1706, "step": 1263 }, { "epoch": 0.059172567148458075, "grad_norm": 1.53125, "learning_rate": 0.0001997322067386898, "loss": 4.4415, "step": 1264 }, { "epoch": 0.059219380888290714, "grad_norm": 1.625, "learning_rate": 0.0001997317796570115, "loss": 4.4714, "step": 1265 }, { "epoch": 0.05926619462812335, "grad_norm": 1.3359375, "learning_rate": 0.00019973135223550292, "loss": 4.7225, "step": 1266 }, { "epoch": 0.05931300836795599, "grad_norm": 1.5, "learning_rate": 0.00019973092447416556, "loss": 4.5227, "step": 1267 }, { "epoch": 0.05935982210778864, "grad_norm": 1.53125, "learning_rate": 0.0001997304963730008, "loss": 4.2849, "step": 1268 }, { "epoch": 0.05940663584762128, "grad_norm": 1.7890625, "learning_rate": 0.00019973006793201013, "loss": 4.7399, "step": 1269 }, { "epoch": 0.05945344958745392, "grad_norm": 3.046875, "learning_rate": 0.00019972963915119505, "loss": 4.6449, "step": 1270 }, { "epoch": 0.05950026332728656, "grad_norm": 2.078125, "learning_rate": 0.000199729210030557, "loss": 4.8099, "step": 1271 }, { "epoch": 0.0595470770671192, "grad_norm": 1.2421875, "learning_rate": 0.0001997287805700974, "loss": 4.3601, "step": 1272 }, { "epoch": 0.05959389080695184, "grad_norm": 1.4609375, "learning_rate": 0.00019972835076981774, "loss": 4.4489, "step": 1273 }, { "epoch": 0.05964070454678448, "grad_norm": 1.5390625, "learning_rate": 0.0001997279206297195, "loss": 4.1733, "step": 1274 }, { "epoch": 0.05968751828661712, "grad_norm": 1.3203125, "learning_rate": 0.00019972749014980415, "loss": 4.4748, "step": 1275 }, { "epoch": 0.05973433202644976, "grad_norm": 1.234375, "learning_rate": 0.00019972705933007313, "loss": 4.0922, "step": 1276 }, { "epoch": 0.059781145766282406, "grad_norm": 1.203125, "learning_rate": 0.00019972662817052791, "loss": 4.5707, "step": 1277 }, { "epoch": 0.059827959506115046, "grad_norm": 1.390625, "learning_rate": 0.00019972619667117, "loss": 4.0362, "step": 1278 }, { "epoch": 0.059874773245947685, "grad_norm": 1.421875, "learning_rate": 0.0001997257648320008, "loss": 4.7732, "step": 1279 }, { "epoch": 0.059921586985780324, "grad_norm": 1.71875, "learning_rate": 0.00019972533265302184, "loss": 4.4551, "step": 1280 }, { "epoch": 0.05996840072561297, "grad_norm": 1.359375, "learning_rate": 0.00019972490013423457, "loss": 4.956, "step": 1281 }, { "epoch": 0.06001521446544561, "grad_norm": 1.6171875, "learning_rate": 0.00019972446727564046, "loss": 4.392, "step": 1282 }, { "epoch": 0.06006202820527825, "grad_norm": 1.3359375, "learning_rate": 0.000199724034077241, "loss": 3.3178, "step": 1283 }, { "epoch": 0.06010884194511089, "grad_norm": 1.234375, "learning_rate": 0.00019972360053903767, "loss": 4.4045, "step": 1284 }, { "epoch": 0.06015565568494353, "grad_norm": 1.4140625, "learning_rate": 0.0001997231666610319, "loss": 4.3936, "step": 1285 }, { "epoch": 0.060202469424776174, "grad_norm": 1.359375, "learning_rate": 0.0001997227324432253, "loss": 4.495, "step": 1286 }, { "epoch": 0.06024928316460881, "grad_norm": 1.4453125, "learning_rate": 0.00019972229788561916, "loss": 4.7401, "step": 1287 }, { "epoch": 0.06029609690444145, "grad_norm": 1.46875, "learning_rate": 0.00019972186298821508, "loss": 4.18, "step": 1288 }, { "epoch": 0.06034291064427409, "grad_norm": 1.5, "learning_rate": 0.0001997214277510145, "loss": 4.3242, "step": 1289 }, { "epoch": 0.06038972438410674, "grad_norm": 1.5859375, "learning_rate": 0.00019972099217401893, "loss": 4.0946, "step": 1290 }, { "epoch": 0.06043653812393938, "grad_norm": 1.8671875, "learning_rate": 0.00019972055625722986, "loss": 4.2768, "step": 1291 }, { "epoch": 0.06048335186377202, "grad_norm": 1.3203125, "learning_rate": 0.00019972012000064876, "loss": 4.0206, "step": 1292 }, { "epoch": 0.060530165603604656, "grad_norm": 2.046875, "learning_rate": 0.00019971968340427707, "loss": 4.2685, "step": 1293 }, { "epoch": 0.060576979343437296, "grad_norm": 1.1875, "learning_rate": 0.00019971924646811635, "loss": 3.5064, "step": 1294 }, { "epoch": 0.06062379308326994, "grad_norm": 1.6875, "learning_rate": 0.00019971880919216807, "loss": 4.2705, "step": 1295 }, { "epoch": 0.06067060682310258, "grad_norm": 1.3515625, "learning_rate": 0.0001997183715764337, "loss": 4.3403, "step": 1296 }, { "epoch": 0.06071742056293522, "grad_norm": 1.875, "learning_rate": 0.00019971793362091476, "loss": 4.3451, "step": 1297 }, { "epoch": 0.06076423430276786, "grad_norm": 0.98046875, "learning_rate": 0.00019971749532561272, "loss": 6.5128, "step": 1298 }, { "epoch": 0.060811048042600506, "grad_norm": 1.671875, "learning_rate": 0.00019971705669052906, "loss": 4.6694, "step": 1299 }, { "epoch": 0.060857861782433145, "grad_norm": 1.6875, "learning_rate": 0.0001997166177156653, "loss": 4.4922, "step": 1300 }, { "epoch": 0.060904675522265785, "grad_norm": 1.3671875, "learning_rate": 0.00019971617840102294, "loss": 4.5454, "step": 1301 }, { "epoch": 0.060951489262098424, "grad_norm": 1.3828125, "learning_rate": 0.00019971573874660346, "loss": 4.2486, "step": 1302 }, { "epoch": 0.06099830300193107, "grad_norm": 1.875, "learning_rate": 0.00019971529875240839, "loss": 4.3024, "step": 1303 }, { "epoch": 0.06104511674176371, "grad_norm": 1.3984375, "learning_rate": 0.00019971485841843915, "loss": 4.6674, "step": 1304 }, { "epoch": 0.06109193048159635, "grad_norm": 1.453125, "learning_rate": 0.00019971441774469732, "loss": 4.9717, "step": 1305 }, { "epoch": 0.06113874422142899, "grad_norm": 1.5, "learning_rate": 0.00019971397673118437, "loss": 4.3059, "step": 1306 }, { "epoch": 0.06118555796126163, "grad_norm": 1.4296875, "learning_rate": 0.00019971353537790183, "loss": 3.8027, "step": 1307 }, { "epoch": 0.061232371701094274, "grad_norm": 1.125, "learning_rate": 0.00019971309368485115, "loss": 4.1924, "step": 1308 }, { "epoch": 0.06127918544092691, "grad_norm": 1.6875, "learning_rate": 0.0001997126516520339, "loss": 4.7512, "step": 1309 }, { "epoch": 0.06132599918075955, "grad_norm": 1.40625, "learning_rate": 0.00019971220927945154, "loss": 4.5101, "step": 1310 }, { "epoch": 0.06137281292059219, "grad_norm": 1.7890625, "learning_rate": 0.0001997117665671056, "loss": 4.7869, "step": 1311 }, { "epoch": 0.06141962666042484, "grad_norm": 1.3125, "learning_rate": 0.00019971132351499757, "loss": 4.5011, "step": 1312 }, { "epoch": 0.06146644040025748, "grad_norm": 1.4609375, "learning_rate": 0.00019971088012312894, "loss": 4.4561, "step": 1313 }, { "epoch": 0.061513254140090116, "grad_norm": 1.46875, "learning_rate": 0.00019971043639150127, "loss": 4.487, "step": 1314 }, { "epoch": 0.061560067879922756, "grad_norm": 1.4609375, "learning_rate": 0.0001997099923201161, "loss": 4.4467, "step": 1315 }, { "epoch": 0.061606881619755395, "grad_norm": 1.2421875, "learning_rate": 0.00019970954790897482, "loss": 4.4102, "step": 1316 }, { "epoch": 0.06165369535958804, "grad_norm": 1.5390625, "learning_rate": 0.00019970910315807907, "loss": 4.9912, "step": 1317 }, { "epoch": 0.06170050909942068, "grad_norm": 1.3203125, "learning_rate": 0.0001997086580674303, "loss": 4.708, "step": 1318 }, { "epoch": 0.06174732283925332, "grad_norm": 1.6171875, "learning_rate": 0.00019970821263703002, "loss": 4.4066, "step": 1319 }, { "epoch": 0.06179413657908596, "grad_norm": 1.453125, "learning_rate": 0.00019970776686687976, "loss": 4.2984, "step": 1320 }, { "epoch": 0.061840950318918605, "grad_norm": 1.640625, "learning_rate": 0.00019970732075698112, "loss": 4.702, "step": 1321 }, { "epoch": 0.061887764058751245, "grad_norm": 1.3203125, "learning_rate": 0.00019970687430733548, "loss": 4.6159, "step": 1322 }, { "epoch": 0.061934577798583884, "grad_norm": 1.40625, "learning_rate": 0.00019970642751794446, "loss": 4.4265, "step": 1323 }, { "epoch": 0.06198139153841652, "grad_norm": 1.484375, "learning_rate": 0.00019970598038880952, "loss": 4.4272, "step": 1324 }, { "epoch": 0.06202820527824916, "grad_norm": 1.4453125, "learning_rate": 0.00019970553291993225, "loss": 4.1152, "step": 1325 }, { "epoch": 0.06207501901808181, "grad_norm": 1.4140625, "learning_rate": 0.00019970508511131413, "loss": 4.0327, "step": 1326 }, { "epoch": 0.06212183275791445, "grad_norm": 1.7890625, "learning_rate": 0.00019970463696295667, "loss": 4.5573, "step": 1327 }, { "epoch": 0.06216864649774709, "grad_norm": 1.3828125, "learning_rate": 0.00019970418847486146, "loss": 4.2334, "step": 1328 }, { "epoch": 0.06221546023757973, "grad_norm": 1.234375, "learning_rate": 0.00019970373964702998, "loss": 4.2951, "step": 1329 }, { "epoch": 0.06226227397741237, "grad_norm": 1.5, "learning_rate": 0.00019970329047946375, "loss": 4.219, "step": 1330 }, { "epoch": 0.06230908771724501, "grad_norm": 1.921875, "learning_rate": 0.00019970284097216434, "loss": 3.9734, "step": 1331 }, { "epoch": 0.06235590145707765, "grad_norm": 1.625, "learning_rate": 0.00019970239112513326, "loss": 4.2722, "step": 1332 }, { "epoch": 0.06240271519691029, "grad_norm": 1.7265625, "learning_rate": 0.00019970194093837205, "loss": 4.4883, "step": 1333 }, { "epoch": 0.06244952893674294, "grad_norm": 1.3125, "learning_rate": 0.0001997014904118822, "loss": 4.36, "step": 1334 }, { "epoch": 0.06249634267657558, "grad_norm": 11.4375, "learning_rate": 0.00019970103954566534, "loss": 6.0567, "step": 1335 }, { "epoch": 0.06254315641640822, "grad_norm": 1.109375, "learning_rate": 0.00019970058833972294, "loss": 3.9781, "step": 1336 }, { "epoch": 0.06258997015624086, "grad_norm": 1.6796875, "learning_rate": 0.00019970013679405653, "loss": 4.2658, "step": 1337 }, { "epoch": 0.0626367838960735, "grad_norm": 1.234375, "learning_rate": 0.0001996996849086677, "loss": 4.3043, "step": 1338 }, { "epoch": 0.06268359763590614, "grad_norm": 1.2890625, "learning_rate": 0.00019969923268355793, "loss": 4.4528, "step": 1339 }, { "epoch": 0.06273041137573877, "grad_norm": 1.703125, "learning_rate": 0.00019969878011872876, "loss": 4.5469, "step": 1340 }, { "epoch": 0.06277722511557142, "grad_norm": 1.3203125, "learning_rate": 0.00019969832721418177, "loss": 4.3547, "step": 1341 }, { "epoch": 0.06282403885540407, "grad_norm": 1.578125, "learning_rate": 0.00019969787396991855, "loss": 4.4232, "step": 1342 }, { "epoch": 0.0628708525952367, "grad_norm": 1.4765625, "learning_rate": 0.00019969742038594053, "loss": 4.1793, "step": 1343 }, { "epoch": 0.06291766633506934, "grad_norm": 1.34375, "learning_rate": 0.00019969696646224933, "loss": 3.9016, "step": 1344 }, { "epoch": 0.06296448007490199, "grad_norm": 1.7734375, "learning_rate": 0.00019969651219884648, "loss": 4.6872, "step": 1345 }, { "epoch": 0.06301129381473462, "grad_norm": 2.28125, "learning_rate": 0.00019969605759573354, "loss": 4.7922, "step": 1346 }, { "epoch": 0.06305810755456727, "grad_norm": 1.5703125, "learning_rate": 0.00019969560265291204, "loss": 4.8264, "step": 1347 }, { "epoch": 0.0631049212943999, "grad_norm": 1.5703125, "learning_rate": 0.0001996951473703835, "loss": 4.746, "step": 1348 }, { "epoch": 0.06315173503423255, "grad_norm": 1.4453125, "learning_rate": 0.00019969469174814955, "loss": 4.2508, "step": 1349 }, { "epoch": 0.0631985487740652, "grad_norm": 1.09375, "learning_rate": 0.0001996942357862117, "loss": 2.9023, "step": 1350 }, { "epoch": 0.06324536251389783, "grad_norm": 1.578125, "learning_rate": 0.00019969377948457148, "loss": 4.6651, "step": 1351 }, { "epoch": 0.06329217625373047, "grad_norm": 1.5, "learning_rate": 0.0001996933228432305, "loss": 4.589, "step": 1352 }, { "epoch": 0.0633389899935631, "grad_norm": 1.8828125, "learning_rate": 0.00019969286586219025, "loss": 4.5093, "step": 1353 }, { "epoch": 0.06338580373339575, "grad_norm": 1.484375, "learning_rate": 0.00019969240854145233, "loss": 4.793, "step": 1354 }, { "epoch": 0.0634326174732284, "grad_norm": 1.453125, "learning_rate": 0.00019969195088101832, "loss": 6.6285, "step": 1355 }, { "epoch": 0.06347943121306103, "grad_norm": 1.34375, "learning_rate": 0.00019969149288088972, "loss": 4.7468, "step": 1356 }, { "epoch": 0.06352624495289368, "grad_norm": 1.5625, "learning_rate": 0.00019969103454106814, "loss": 3.9194, "step": 1357 }, { "epoch": 0.06357305869272631, "grad_norm": 1.734375, "learning_rate": 0.00019969057586155512, "loss": 4.7089, "step": 1358 }, { "epoch": 0.06361987243255895, "grad_norm": 1.296875, "learning_rate": 0.0001996901168423522, "loss": 4.1545, "step": 1359 }, { "epoch": 0.0636666861723916, "grad_norm": 1.671875, "learning_rate": 0.00019968965748346098, "loss": 4.0637, "step": 1360 }, { "epoch": 0.06371349991222423, "grad_norm": 1.3515625, "learning_rate": 0.00019968919778488303, "loss": 4.4751, "step": 1361 }, { "epoch": 0.06376031365205688, "grad_norm": 1.296875, "learning_rate": 0.0001996887377466199, "loss": 3.923, "step": 1362 }, { "epoch": 0.06380712739188953, "grad_norm": 1.3984375, "learning_rate": 0.00019968827736867314, "loss": 4.1236, "step": 1363 }, { "epoch": 0.06385394113172216, "grad_norm": 1.265625, "learning_rate": 0.00019968781665104436, "loss": 5.9743, "step": 1364 }, { "epoch": 0.0639007548715548, "grad_norm": 1.34375, "learning_rate": 0.00019968735559373508, "loss": 4.5393, "step": 1365 }, { "epoch": 0.06394756861138744, "grad_norm": 1.4375, "learning_rate": 0.00019968689419674692, "loss": 4.4708, "step": 1366 }, { "epoch": 0.06399438235122008, "grad_norm": 1.1484375, "learning_rate": 0.00019968643246008144, "loss": 4.1284, "step": 1367 }, { "epoch": 0.06404119609105273, "grad_norm": 1.25, "learning_rate": 0.00019968597038374017, "loss": 4.1964, "step": 1368 }, { "epoch": 0.06408800983088536, "grad_norm": 1.515625, "learning_rate": 0.00019968550796772475, "loss": 4.1834, "step": 1369 }, { "epoch": 0.06413482357071801, "grad_norm": 1.265625, "learning_rate": 0.00019968504521203668, "loss": 4.7719, "step": 1370 }, { "epoch": 0.06418163731055064, "grad_norm": 1.265625, "learning_rate": 0.0001996845821166776, "loss": 4.5293, "step": 1371 }, { "epoch": 0.06422845105038329, "grad_norm": 1.2890625, "learning_rate": 0.0001996841186816491, "loss": 4.5266, "step": 1372 }, { "epoch": 0.06427526479021593, "grad_norm": 2.40625, "learning_rate": 0.00019968365490695268, "loss": 4.5621, "step": 1373 }, { "epoch": 0.06432207853004857, "grad_norm": 1.3828125, "learning_rate": 0.00019968319079259, "loss": 3.9713, "step": 1374 }, { "epoch": 0.06436889226988121, "grad_norm": 1.40625, "learning_rate": 0.0001996827263385626, "loss": 4.4185, "step": 1375 }, { "epoch": 0.06441570600971386, "grad_norm": 1.375, "learning_rate": 0.0001996822615448721, "loss": 4.1824, "step": 1376 }, { "epoch": 0.06446251974954649, "grad_norm": 0.98828125, "learning_rate": 0.00019968179641152004, "loss": 2.926, "step": 1377 }, { "epoch": 0.06450933348937914, "grad_norm": 1.2265625, "learning_rate": 0.000199681330938508, "loss": 4.2805, "step": 1378 }, { "epoch": 0.06455614722921177, "grad_norm": 1.3984375, "learning_rate": 0.0001996808651258376, "loss": 4.4914, "step": 1379 }, { "epoch": 0.06460296096904442, "grad_norm": 1.25, "learning_rate": 0.0001996803989735104, "loss": 4.2658, "step": 1380 }, { "epoch": 0.06464977470887706, "grad_norm": 1.421875, "learning_rate": 0.000199679932481528, "loss": 4.3584, "step": 1381 }, { "epoch": 0.0646965884487097, "grad_norm": 1.7109375, "learning_rate": 0.00019967946564989201, "loss": 4.3984, "step": 1382 }, { "epoch": 0.06474340218854234, "grad_norm": 1.4765625, "learning_rate": 0.000199678998478604, "loss": 4.686, "step": 1383 }, { "epoch": 0.06479021592837497, "grad_norm": 2.25, "learning_rate": 0.00019967853096766557, "loss": 4.1983, "step": 1384 }, { "epoch": 0.06483702966820762, "grad_norm": 1.3515625, "learning_rate": 0.0001996780631170783, "loss": 4.2543, "step": 1385 }, { "epoch": 0.06488384340804026, "grad_norm": 2.171875, "learning_rate": 0.00019967759492684376, "loss": 4.358, "step": 1386 }, { "epoch": 0.0649306571478729, "grad_norm": 1.4765625, "learning_rate": 0.00019967712639696363, "loss": 4.4254, "step": 1387 }, { "epoch": 0.06497747088770554, "grad_norm": 1.25, "learning_rate": 0.00019967665752743946, "loss": 4.4684, "step": 1388 }, { "epoch": 0.06502428462753818, "grad_norm": 1.203125, "learning_rate": 0.00019967618831827276, "loss": 4.4557, "step": 1389 }, { "epoch": 0.06507109836737082, "grad_norm": 1.2734375, "learning_rate": 0.0001996757187694653, "loss": 4.1192, "step": 1390 }, { "epoch": 0.06511791210720347, "grad_norm": 1.6015625, "learning_rate": 0.00019967524888101854, "loss": 4.1309, "step": 1391 }, { "epoch": 0.0651647258470361, "grad_norm": 1.421875, "learning_rate": 0.00019967477865293415, "loss": 4.2653, "step": 1392 }, { "epoch": 0.06521153958686875, "grad_norm": 1.453125, "learning_rate": 0.0001996743080852137, "loss": 2.9204, "step": 1393 }, { "epoch": 0.0652583533267014, "grad_norm": 1.6171875, "learning_rate": 0.0001996738371778588, "loss": 4.3362, "step": 1394 }, { "epoch": 0.06530516706653403, "grad_norm": 1.7578125, "learning_rate": 0.00019967336593087106, "loss": 4.1575, "step": 1395 }, { "epoch": 0.06535198080636667, "grad_norm": 1.2890625, "learning_rate": 0.0001996728943442521, "loss": 4.0051, "step": 1396 }, { "epoch": 0.0653987945461993, "grad_norm": 1.1875, "learning_rate": 0.00019967242241800354, "loss": 4.1554, "step": 1397 }, { "epoch": 0.06544560828603195, "grad_norm": 1.3203125, "learning_rate": 0.00019967195015212693, "loss": 4.6963, "step": 1398 }, { "epoch": 0.0654924220258646, "grad_norm": 1.3125, "learning_rate": 0.00019967147754662391, "loss": 4.3179, "step": 1399 }, { "epoch": 0.06553923576569723, "grad_norm": 1.4140625, "learning_rate": 0.0001996710046014961, "loss": 4.6975, "step": 1400 }, { "epoch": 0.06558604950552988, "grad_norm": 1.203125, "learning_rate": 0.00019967053131674512, "loss": 4.8795, "step": 1401 }, { "epoch": 0.06563286324536251, "grad_norm": 1.1953125, "learning_rate": 0.00019967005769237254, "loss": 4.0349, "step": 1402 }, { "epoch": 0.06567967698519515, "grad_norm": 1.609375, "learning_rate": 0.00019966958372838, "loss": 4.051, "step": 1403 }, { "epoch": 0.0657264907250278, "grad_norm": 1.65625, "learning_rate": 0.00019966910942476913, "loss": 4.821, "step": 1404 }, { "epoch": 0.06577330446486043, "grad_norm": 1.421875, "learning_rate": 0.00019966863478154153, "loss": 4.4196, "step": 1405 }, { "epoch": 0.06582011820469308, "grad_norm": 2.0, "learning_rate": 0.0001996681597986988, "loss": 4.7258, "step": 1406 }, { "epoch": 0.06586693194452573, "grad_norm": 1.265625, "learning_rate": 0.0001996676844762426, "loss": 3.9137, "step": 1407 }, { "epoch": 0.06591374568435836, "grad_norm": 1.1953125, "learning_rate": 0.0001996672088141745, "loss": 4.2248, "step": 1408 }, { "epoch": 0.065960559424191, "grad_norm": 1.359375, "learning_rate": 0.00019966673281249619, "loss": 4.1161, "step": 1409 }, { "epoch": 0.06600737316402364, "grad_norm": 1.5625, "learning_rate": 0.0001996662564712092, "loss": 4.1902, "step": 1410 }, { "epoch": 0.06605418690385628, "grad_norm": 1.25, "learning_rate": 0.00019966577979031524, "loss": 4.1945, "step": 1411 }, { "epoch": 0.06610100064368893, "grad_norm": 2.015625, "learning_rate": 0.00019966530276981589, "loss": 4.6642, "step": 1412 }, { "epoch": 0.06614781438352156, "grad_norm": 1.5390625, "learning_rate": 0.00019966482540971275, "loss": 4.3074, "step": 1413 }, { "epoch": 0.06619462812335421, "grad_norm": 1.25, "learning_rate": 0.0001996643477100075, "loss": 4.3181, "step": 1414 }, { "epoch": 0.06624144186318684, "grad_norm": 1.5078125, "learning_rate": 0.00019966386967070173, "loss": 4.4221, "step": 1415 }, { "epoch": 0.06628825560301949, "grad_norm": 1.203125, "learning_rate": 0.00019966339129179713, "loss": 4.0978, "step": 1416 }, { "epoch": 0.06633506934285213, "grad_norm": 2.28125, "learning_rate": 0.00019966291257329522, "loss": 4.3993, "step": 1417 }, { "epoch": 0.06638188308268476, "grad_norm": 1.7109375, "learning_rate": 0.00019966243351519775, "loss": 4.3362, "step": 1418 }, { "epoch": 0.06642869682251741, "grad_norm": 1.328125, "learning_rate": 0.0001996619541175063, "loss": 4.368, "step": 1419 }, { "epoch": 0.06647551056235004, "grad_norm": 1.359375, "learning_rate": 0.00019966147438022245, "loss": 4.4716, "step": 1420 }, { "epoch": 0.06652232430218269, "grad_norm": 1.6015625, "learning_rate": 0.00019966099430334793, "loss": 4.7362, "step": 1421 }, { "epoch": 0.06656913804201534, "grad_norm": 1.234375, "learning_rate": 0.00019966051388688432, "loss": 4.5471, "step": 1422 }, { "epoch": 0.06661595178184797, "grad_norm": 1.140625, "learning_rate": 0.00019966003313083328, "loss": 4.0185, "step": 1423 }, { "epoch": 0.06666276552168061, "grad_norm": 1.65625, "learning_rate": 0.00019965955203519643, "loss": 4.5793, "step": 1424 }, { "epoch": 0.06670957926151326, "grad_norm": 1.328125, "learning_rate": 0.00019965907059997542, "loss": 4.1484, "step": 1425 }, { "epoch": 0.06675639300134589, "grad_norm": 1.8203125, "learning_rate": 0.00019965858882517185, "loss": 4.4982, "step": 1426 }, { "epoch": 0.06680320674117854, "grad_norm": 1.421875, "learning_rate": 0.00019965810671078744, "loss": 4.3274, "step": 1427 }, { "epoch": 0.06685002048101117, "grad_norm": 1.71875, "learning_rate": 0.00019965762425682378, "loss": 4.2871, "step": 1428 }, { "epoch": 0.06689683422084382, "grad_norm": 1.59375, "learning_rate": 0.00019965714146328252, "loss": 4.4422, "step": 1429 }, { "epoch": 0.06694364796067646, "grad_norm": 1.4609375, "learning_rate": 0.00019965665833016533, "loss": 4.2604, "step": 1430 }, { "epoch": 0.0669904617005091, "grad_norm": 1.390625, "learning_rate": 0.00019965617485747384, "loss": 4.3759, "step": 1431 }, { "epoch": 0.06703727544034174, "grad_norm": 1.09375, "learning_rate": 0.0001996556910452097, "loss": 3.9497, "step": 1432 }, { "epoch": 0.06708408918017437, "grad_norm": 1.609375, "learning_rate": 0.0001996552068933745, "loss": 4.4628, "step": 1433 }, { "epoch": 0.06713090292000702, "grad_norm": 1.3046875, "learning_rate": 0.00019965472240196999, "loss": 4.5103, "step": 1434 }, { "epoch": 0.06717771665983967, "grad_norm": 6.8125, "learning_rate": 0.00019965423757099777, "loss": 4.6568, "step": 1435 }, { "epoch": 0.0672245303996723, "grad_norm": 1.4609375, "learning_rate": 0.0001996537524004595, "loss": 4.3677, "step": 1436 }, { "epoch": 0.06727134413950495, "grad_norm": 1.265625, "learning_rate": 0.0001996532668903568, "loss": 4.0385, "step": 1437 }, { "epoch": 0.06731815787933759, "grad_norm": 1.3359375, "learning_rate": 0.00019965278104069136, "loss": 6.0096, "step": 1438 }, { "epoch": 0.06736497161917022, "grad_norm": 1.4453125, "learning_rate": 0.00019965229485146484, "loss": 4.2288, "step": 1439 }, { "epoch": 0.06741178535900287, "grad_norm": 1.5859375, "learning_rate": 0.00019965180832267888, "loss": 3.8733, "step": 1440 }, { "epoch": 0.0674585990988355, "grad_norm": 1.546875, "learning_rate": 0.00019965132145433517, "loss": 4.3302, "step": 1441 }, { "epoch": 0.06750541283866815, "grad_norm": 1.453125, "learning_rate": 0.0001996508342464353, "loss": 6.4838, "step": 1442 }, { "epoch": 0.0675522265785008, "grad_norm": 1.1875, "learning_rate": 0.00019965034669898102, "loss": 4.2247, "step": 1443 }, { "epoch": 0.06759904031833343, "grad_norm": 1.171875, "learning_rate": 0.00019964985881197391, "loss": 4.329, "step": 1444 }, { "epoch": 0.06764585405816607, "grad_norm": 1.9140625, "learning_rate": 0.00019964937058541567, "loss": 4.2673, "step": 1445 }, { "epoch": 0.0676926677979987, "grad_norm": 1.296875, "learning_rate": 0.00019964888201930796, "loss": 4.5649, "step": 1446 }, { "epoch": 0.06773948153783135, "grad_norm": 1.2109375, "learning_rate": 0.00019964839311365244, "loss": 4.3282, "step": 1447 }, { "epoch": 0.067786295277664, "grad_norm": 1.3359375, "learning_rate": 0.00019964790386845077, "loss": 3.1298, "step": 1448 }, { "epoch": 0.06783310901749663, "grad_norm": 2.1875, "learning_rate": 0.00019964741428370465, "loss": 3.9852, "step": 1449 }, { "epoch": 0.06787992275732928, "grad_norm": 1.25, "learning_rate": 0.00019964692435941572, "loss": 4.0513, "step": 1450 }, { "epoch": 0.06792673649716191, "grad_norm": 1.4765625, "learning_rate": 0.00019964643409558564, "loss": 4.2741, "step": 1451 }, { "epoch": 0.06797355023699456, "grad_norm": 1.4296875, "learning_rate": 0.00019964594349221613, "loss": 4.5906, "step": 1452 }, { "epoch": 0.0680203639768272, "grad_norm": 1.4609375, "learning_rate": 0.00019964545254930882, "loss": 4.6553, "step": 1453 }, { "epoch": 0.06806717771665984, "grad_norm": 1.7734375, "learning_rate": 0.00019964496126686538, "loss": 4.3018, "step": 1454 }, { "epoch": 0.06811399145649248, "grad_norm": 1.1953125, "learning_rate": 0.0001996444696448875, "loss": 4.3697, "step": 1455 }, { "epoch": 0.06816080519632513, "grad_norm": 1.4140625, "learning_rate": 0.0001996439776833768, "loss": 4.4636, "step": 1456 }, { "epoch": 0.06820761893615776, "grad_norm": 1.4921875, "learning_rate": 0.00019964348538233507, "loss": 4.368, "step": 1457 }, { "epoch": 0.0682544326759904, "grad_norm": 1.34375, "learning_rate": 0.0001996429927417639, "loss": 4.3161, "step": 1458 }, { "epoch": 0.06830124641582304, "grad_norm": 1.3125, "learning_rate": 0.000199642499761665, "loss": 4.3314, "step": 1459 }, { "epoch": 0.06834806015565568, "grad_norm": 1.3984375, "learning_rate": 0.00019964200644204003, "loss": 4.2545, "step": 1460 }, { "epoch": 0.06839487389548833, "grad_norm": 1.5234375, "learning_rate": 0.0001996415127828907, "loss": 4.4316, "step": 1461 }, { "epoch": 0.06844168763532096, "grad_norm": 1.1171875, "learning_rate": 0.00019964101878421864, "loss": 4.3981, "step": 1462 }, { "epoch": 0.06848850137515361, "grad_norm": 1.46875, "learning_rate": 0.0001996405244460256, "loss": 4.1554, "step": 1463 }, { "epoch": 0.06853531511498624, "grad_norm": 1.34375, "learning_rate": 0.00019964002976831323, "loss": 4.1605, "step": 1464 }, { "epoch": 0.06858212885481889, "grad_norm": 2.25, "learning_rate": 0.0001996395347510832, "loss": 4.5722, "step": 1465 }, { "epoch": 0.06862894259465153, "grad_norm": 1.4765625, "learning_rate": 0.0001996390393943372, "loss": 4.6528, "step": 1466 }, { "epoch": 0.06867575633448417, "grad_norm": 1.3125, "learning_rate": 0.00019963854369807699, "loss": 4.6066, "step": 1467 }, { "epoch": 0.06872257007431681, "grad_norm": 1.3125, "learning_rate": 0.00019963804766230417, "loss": 4.2992, "step": 1468 }, { "epoch": 0.06876938381414946, "grad_norm": 1.328125, "learning_rate": 0.00019963755128702047, "loss": 4.4008, "step": 1469 }, { "epoch": 0.06881619755398209, "grad_norm": 1.59375, "learning_rate": 0.00019963705457222756, "loss": 4.48, "step": 1470 }, { "epoch": 0.06886301129381474, "grad_norm": 1.6796875, "learning_rate": 0.00019963655751792714, "loss": 4.2871, "step": 1471 }, { "epoch": 0.06890982503364737, "grad_norm": 1.4296875, "learning_rate": 0.0001996360601241209, "loss": 4.7162, "step": 1472 }, { "epoch": 0.06895663877348002, "grad_norm": 1.671875, "learning_rate": 0.00019963556239081058, "loss": 4.409, "step": 1473 }, { "epoch": 0.06900345251331266, "grad_norm": 1.578125, "learning_rate": 0.00019963506431799782, "loss": 4.5652, "step": 1474 }, { "epoch": 0.0690502662531453, "grad_norm": 1.3359375, "learning_rate": 0.00019963456590568437, "loss": 3.9566, "step": 1475 }, { "epoch": 0.06909707999297794, "grad_norm": 1.3984375, "learning_rate": 0.00019963406715387184, "loss": 4.093, "step": 1476 }, { "epoch": 0.06914389373281057, "grad_norm": 1.46875, "learning_rate": 0.00019963356806256203, "loss": 4.2526, "step": 1477 }, { "epoch": 0.06919070747264322, "grad_norm": 1.3984375, "learning_rate": 0.00019963306863175656, "loss": 4.2232, "step": 1478 }, { "epoch": 0.06923752121247587, "grad_norm": 1.34375, "learning_rate": 0.0001996325688614572, "loss": 4.2528, "step": 1479 }, { "epoch": 0.0692843349523085, "grad_norm": 1.3671875, "learning_rate": 0.0001996320687516656, "loss": 4.6054, "step": 1480 }, { "epoch": 0.06933114869214115, "grad_norm": 1.109375, "learning_rate": 0.0001996315683023835, "loss": 6.3833, "step": 1481 }, { "epoch": 0.06937796243197378, "grad_norm": 1.6171875, "learning_rate": 0.00019963106751361257, "loss": 4.4158, "step": 1482 }, { "epoch": 0.06942477617180642, "grad_norm": 1.203125, "learning_rate": 0.00019963056638535456, "loss": 4.307, "step": 1483 }, { "epoch": 0.06947158991163907, "grad_norm": 1.34375, "learning_rate": 0.00019963006491761115, "loss": 4.653, "step": 1484 }, { "epoch": 0.0695184036514717, "grad_norm": 1.2734375, "learning_rate": 0.00019962956311038403, "loss": 4.7592, "step": 1485 }, { "epoch": 0.06956521739130435, "grad_norm": 1.4765625, "learning_rate": 0.00019962906096367497, "loss": 4.2038, "step": 1486 }, { "epoch": 0.069612031131137, "grad_norm": 1.7421875, "learning_rate": 0.00019962855847748562, "loss": 4.5202, "step": 1487 }, { "epoch": 0.06965884487096963, "grad_norm": 1.5234375, "learning_rate": 0.00019962805565181772, "loss": 4.3609, "step": 1488 }, { "epoch": 0.06970565861080227, "grad_norm": 1.5234375, "learning_rate": 0.00019962755248667297, "loss": 4.3852, "step": 1489 }, { "epoch": 0.0697524723506349, "grad_norm": 1.3203125, "learning_rate": 0.00019962704898205308, "loss": 3.9263, "step": 1490 }, { "epoch": 0.06979928609046755, "grad_norm": 1.421875, "learning_rate": 0.00019962654513795981, "loss": 4.0843, "step": 1491 }, { "epoch": 0.0698460998303002, "grad_norm": 1.4765625, "learning_rate": 0.00019962604095439485, "loss": 4.2462, "step": 1492 }, { "epoch": 0.06989291357013283, "grad_norm": 2.0, "learning_rate": 0.0001996255364313599, "loss": 4.4966, "step": 1493 }, { "epoch": 0.06993972730996548, "grad_norm": 1.9375, "learning_rate": 0.00019962503156885667, "loss": 4.2746, "step": 1494 }, { "epoch": 0.06998654104979811, "grad_norm": 1.234375, "learning_rate": 0.00019962452636688695, "loss": 4.2718, "step": 1495 }, { "epoch": 0.07003335478963076, "grad_norm": 1.5859375, "learning_rate": 0.00019962402082545236, "loss": 4.2182, "step": 1496 }, { "epoch": 0.0700801685294634, "grad_norm": 3.359375, "learning_rate": 0.00019962351494455473, "loss": 4.4177, "step": 1497 }, { "epoch": 0.07012698226929603, "grad_norm": 1.6953125, "learning_rate": 0.0001996230087241957, "loss": 4.0534, "step": 1498 }, { "epoch": 0.07017379600912868, "grad_norm": 1.3828125, "learning_rate": 0.00019962250216437702, "loss": 4.504, "step": 1499 }, { "epoch": 0.07022060974896133, "grad_norm": 1.515625, "learning_rate": 0.00019962199526510048, "loss": 3.3218, "step": 1500 }, { "epoch": 0.07026742348879396, "grad_norm": 1.5078125, "learning_rate": 0.0001996214880263677, "loss": 4.2913, "step": 1501 }, { "epoch": 0.0703142372286266, "grad_norm": 1.2890625, "learning_rate": 0.00019962098044818048, "loss": 3.9903, "step": 1502 }, { "epoch": 0.07036105096845924, "grad_norm": 1.4140625, "learning_rate": 0.00019962047253054048, "loss": 4.3019, "step": 1503 }, { "epoch": 0.07040786470829188, "grad_norm": 1.484375, "learning_rate": 0.00019961996427344953, "loss": 6.0605, "step": 1504 }, { "epoch": 0.07045467844812453, "grad_norm": 1.2421875, "learning_rate": 0.0001996194556769093, "loss": 4.2847, "step": 1505 }, { "epoch": 0.07050149218795716, "grad_norm": 1.84375, "learning_rate": 0.0001996189467409215, "loss": 4.0119, "step": 1506 }, { "epoch": 0.07054830592778981, "grad_norm": 1.234375, "learning_rate": 0.00019961843746548795, "loss": 4.3006, "step": 1507 }, { "epoch": 0.07059511966762244, "grad_norm": 1.4375, "learning_rate": 0.00019961792785061028, "loss": 4.4513, "step": 1508 }, { "epoch": 0.07064193340745509, "grad_norm": 1.640625, "learning_rate": 0.00019961741789629032, "loss": 4.5926, "step": 1509 }, { "epoch": 0.07068874714728773, "grad_norm": 1.5, "learning_rate": 0.00019961690760252973, "loss": 4.264, "step": 1510 }, { "epoch": 0.07073556088712037, "grad_norm": 1.359375, "learning_rate": 0.00019961639696933033, "loss": 4.6667, "step": 1511 }, { "epoch": 0.07078237462695301, "grad_norm": 1.5625, "learning_rate": 0.00019961588599669378, "loss": 4.3425, "step": 1512 }, { "epoch": 0.07082918836678564, "grad_norm": 1.171875, "learning_rate": 0.0001996153746846219, "loss": 4.4269, "step": 1513 }, { "epoch": 0.07087600210661829, "grad_norm": 1.34375, "learning_rate": 0.00019961486303311633, "loss": 5.1198, "step": 1514 }, { "epoch": 0.07092281584645094, "grad_norm": 1.1015625, "learning_rate": 0.0001996143510421789, "loss": 4.137, "step": 1515 }, { "epoch": 0.07096962958628357, "grad_norm": 1.4921875, "learning_rate": 0.00019961383871181133, "loss": 4.2071, "step": 1516 }, { "epoch": 0.07101644332611622, "grad_norm": 1.5, "learning_rate": 0.00019961332604201534, "loss": 4.362, "step": 1517 }, { "epoch": 0.07106325706594886, "grad_norm": 2.140625, "learning_rate": 0.00019961281303279273, "loss": 4.4849, "step": 1518 }, { "epoch": 0.0711100708057815, "grad_norm": 1.3359375, "learning_rate": 0.0001996122996841452, "loss": 4.0368, "step": 1519 }, { "epoch": 0.07115688454561414, "grad_norm": 1.796875, "learning_rate": 0.00019961178599607452, "loss": 4.4886, "step": 1520 }, { "epoch": 0.07120369828544677, "grad_norm": 1.53125, "learning_rate": 0.00019961127196858246, "loss": 4.9324, "step": 1521 }, { "epoch": 0.07125051202527942, "grad_norm": 1.1328125, "learning_rate": 0.0001996107576016707, "loss": 4.3078, "step": 1522 }, { "epoch": 0.07129732576511207, "grad_norm": 1.5, "learning_rate": 0.0001996102428953411, "loss": 4.3101, "step": 1523 }, { "epoch": 0.0713441395049447, "grad_norm": 1.2734375, "learning_rate": 0.00019960972784959532, "loss": 4.5112, "step": 1524 }, { "epoch": 0.07139095324477734, "grad_norm": 1.3359375, "learning_rate": 0.00019960921246443513, "loss": 4.6397, "step": 1525 }, { "epoch": 0.07143776698460998, "grad_norm": 1.375, "learning_rate": 0.00019960869673986236, "loss": 4.8241, "step": 1526 }, { "epoch": 0.07148458072444262, "grad_norm": 1.6015625, "learning_rate": 0.00019960818067587868, "loss": 4.4199, "step": 1527 }, { "epoch": 0.07153139446427527, "grad_norm": 1.7578125, "learning_rate": 0.00019960766427248587, "loss": 3.7618, "step": 1528 }, { "epoch": 0.0715782082041079, "grad_norm": 1.2578125, "learning_rate": 0.00019960714752968574, "loss": 4.2135, "step": 1529 }, { "epoch": 0.07162502194394055, "grad_norm": 1.4140625, "learning_rate": 0.00019960663044748, "loss": 4.3197, "step": 1530 }, { "epoch": 0.0716718356837732, "grad_norm": 1.296875, "learning_rate": 0.0001996061130258704, "loss": 4.1577, "step": 1531 }, { "epoch": 0.07171864942360583, "grad_norm": 1.734375, "learning_rate": 0.00019960559526485872, "loss": 4.1992, "step": 1532 }, { "epoch": 0.07176546316343847, "grad_norm": 1.6640625, "learning_rate": 0.00019960507716444676, "loss": 4.8117, "step": 1533 }, { "epoch": 0.0718122769032711, "grad_norm": 1.25, "learning_rate": 0.00019960455872463625, "loss": 4.3215, "step": 1534 }, { "epoch": 0.07185909064310375, "grad_norm": 1.34375, "learning_rate": 0.00019960403994542897, "loss": 4.0782, "step": 1535 }, { "epoch": 0.0719059043829364, "grad_norm": 1.1796875, "learning_rate": 0.00019960352082682667, "loss": 4.1944, "step": 1536 }, { "epoch": 0.07195271812276903, "grad_norm": 1.453125, "learning_rate": 0.00019960300136883115, "loss": 3.8856, "step": 1537 }, { "epoch": 0.07199953186260168, "grad_norm": 1.265625, "learning_rate": 0.00019960248157144413, "loss": 4.1671, "step": 1538 }, { "epoch": 0.07204634560243431, "grad_norm": 1.390625, "learning_rate": 0.00019960196143466746, "loss": 4.0009, "step": 1539 }, { "epoch": 0.07209315934226695, "grad_norm": 1.2734375, "learning_rate": 0.0001996014409585028, "loss": 4.5447, "step": 1540 }, { "epoch": 0.0721399730820996, "grad_norm": 1.0234375, "learning_rate": 0.000199600920142952, "loss": 3.8556, "step": 1541 }, { "epoch": 0.07218678682193223, "grad_norm": 1.1875, "learning_rate": 0.00019960039898801686, "loss": 4.2028, "step": 1542 }, { "epoch": 0.07223360056176488, "grad_norm": 1.3125, "learning_rate": 0.0001995998774936991, "loss": 4.1697, "step": 1543 }, { "epoch": 0.07228041430159751, "grad_norm": 1.5078125, "learning_rate": 0.0001995993556600005, "loss": 4.4326, "step": 1544 }, { "epoch": 0.07232722804143016, "grad_norm": 1.7890625, "learning_rate": 0.00019959883348692287, "loss": 4.8434, "step": 1545 }, { "epoch": 0.0723740417812628, "grad_norm": 1.53125, "learning_rate": 0.00019959831097446796, "loss": 4.5456, "step": 1546 }, { "epoch": 0.07242085552109544, "grad_norm": 1.7578125, "learning_rate": 0.00019959778812263758, "loss": 3.8611, "step": 1547 }, { "epoch": 0.07246766926092808, "grad_norm": 1.1640625, "learning_rate": 0.0001995972649314335, "loss": 4.4541, "step": 1548 }, { "epoch": 0.07251448300076073, "grad_norm": 1.2578125, "learning_rate": 0.00019959674140085747, "loss": 4.4146, "step": 1549 }, { "epoch": 0.07256129674059336, "grad_norm": 1.3828125, "learning_rate": 0.0001995962175309113, "loss": 4.8947, "step": 1550 }, { "epoch": 0.07260811048042601, "grad_norm": 1.25, "learning_rate": 0.00019959569332159678, "loss": 4.4259, "step": 1551 }, { "epoch": 0.07265492422025864, "grad_norm": 1.1640625, "learning_rate": 0.0001995951687729157, "loss": 4.3455, "step": 1552 }, { "epoch": 0.07270173796009129, "grad_norm": 1.265625, "learning_rate": 0.00019959464388486983, "loss": 4.1333, "step": 1553 }, { "epoch": 0.07274855169992393, "grad_norm": 1.1875, "learning_rate": 0.000199594118657461, "loss": 4.1205, "step": 1554 }, { "epoch": 0.07279536543975657, "grad_norm": 1.515625, "learning_rate": 0.00019959359309069092, "loss": 4.0227, "step": 1555 }, { "epoch": 0.07284217917958921, "grad_norm": 1.5078125, "learning_rate": 0.0001995930671845615, "loss": 4.2296, "step": 1556 }, { "epoch": 0.07288899291942184, "grad_norm": 1.3984375, "learning_rate": 0.00019959254093907439, "loss": 4.4461, "step": 1557 }, { "epoch": 0.07293580665925449, "grad_norm": 1.453125, "learning_rate": 0.00019959201435423144, "loss": 4.3332, "step": 1558 }, { "epoch": 0.07298262039908714, "grad_norm": 1.5078125, "learning_rate": 0.0001995914874300345, "loss": 4.2531, "step": 1559 }, { "epoch": 0.07302943413891977, "grad_norm": 1.96875, "learning_rate": 0.00019959096016648533, "loss": 4.5192, "step": 1560 }, { "epoch": 0.07307624787875242, "grad_norm": 1.3828125, "learning_rate": 0.0001995904325635857, "loss": 4.1485, "step": 1561 }, { "epoch": 0.07312306161858506, "grad_norm": 1.25, "learning_rate": 0.0001995899046213374, "loss": 4.2029, "step": 1562 }, { "epoch": 0.0731698753584177, "grad_norm": 1.75, "learning_rate": 0.00019958937633974232, "loss": 3.9459, "step": 1563 }, { "epoch": 0.07321668909825034, "grad_norm": 1.328125, "learning_rate": 0.00019958884771880217, "loss": 4.3736, "step": 1564 }, { "epoch": 0.07326350283808297, "grad_norm": 1.7109375, "learning_rate": 0.00019958831875851877, "loss": 4.4313, "step": 1565 }, { "epoch": 0.07331031657791562, "grad_norm": 0.89453125, "learning_rate": 0.00019958778945889394, "loss": 5.4521, "step": 1566 }, { "epoch": 0.07335713031774826, "grad_norm": 1.390625, "learning_rate": 0.00019958725981992947, "loss": 4.0776, "step": 1567 }, { "epoch": 0.0734039440575809, "grad_norm": 1.2734375, "learning_rate": 0.00019958672984162715, "loss": 4.26, "step": 1568 }, { "epoch": 0.07345075779741354, "grad_norm": 1.609375, "learning_rate": 0.00019958619952398885, "loss": 4.2903, "step": 1569 }, { "epoch": 0.07349757153724618, "grad_norm": 1.4453125, "learning_rate": 0.00019958566886701628, "loss": 4.1635, "step": 1570 }, { "epoch": 0.07354438527707882, "grad_norm": 1.8046875, "learning_rate": 0.00019958513787071133, "loss": 4.6948, "step": 1571 }, { "epoch": 0.07359119901691147, "grad_norm": 1.7578125, "learning_rate": 0.00019958460653507576, "loss": 4.4976, "step": 1572 }, { "epoch": 0.0736380127567441, "grad_norm": 1.3203125, "learning_rate": 0.00019958407486011143, "loss": 4.5823, "step": 1573 }, { "epoch": 0.07368482649657675, "grad_norm": 1.1796875, "learning_rate": 0.0001995835428458201, "loss": 3.6952, "step": 1574 }, { "epoch": 0.07373164023640938, "grad_norm": 1.2890625, "learning_rate": 0.0001995830104922036, "loss": 4.1324, "step": 1575 }, { "epoch": 0.07377845397624203, "grad_norm": 1.3046875, "learning_rate": 0.00019958247779926376, "loss": 4.152, "step": 1576 }, { "epoch": 0.07382526771607467, "grad_norm": 1.609375, "learning_rate": 0.0001995819447670024, "loss": 4.1023, "step": 1577 }, { "epoch": 0.0738720814559073, "grad_norm": 1.28125, "learning_rate": 0.00019958141139542125, "loss": 4.1143, "step": 1578 }, { "epoch": 0.07391889519573995, "grad_norm": 1.3125, "learning_rate": 0.00019958087768452227, "loss": 4.4203, "step": 1579 }, { "epoch": 0.0739657089355726, "grad_norm": 1.1796875, "learning_rate": 0.00019958034363430716, "loss": 4.279, "step": 1580 }, { "epoch": 0.07401252267540523, "grad_norm": 1.765625, "learning_rate": 0.00019957980924477782, "loss": 4.198, "step": 1581 }, { "epoch": 0.07405933641523788, "grad_norm": 1.6328125, "learning_rate": 0.00019957927451593602, "loss": 4.4183, "step": 1582 }, { "epoch": 0.07410615015507051, "grad_norm": 1.1640625, "learning_rate": 0.00019957873944778358, "loss": 4.1097, "step": 1583 }, { "epoch": 0.07415296389490315, "grad_norm": 1.921875, "learning_rate": 0.00019957820404032238, "loss": 4.683, "step": 1584 }, { "epoch": 0.0741997776347358, "grad_norm": 1.5546875, "learning_rate": 0.00019957766829355417, "loss": 4.2457, "step": 1585 }, { "epoch": 0.07424659137456843, "grad_norm": 1.4609375, "learning_rate": 0.00019957713220748082, "loss": 3.9578, "step": 1586 }, { "epoch": 0.07429340511440108, "grad_norm": 1.546875, "learning_rate": 0.00019957659578210418, "loss": 4.1463, "step": 1587 }, { "epoch": 0.07434021885423371, "grad_norm": 1.6328125, "learning_rate": 0.00019957605901742598, "loss": 4.6912, "step": 1588 }, { "epoch": 0.07438703259406636, "grad_norm": 1.59375, "learning_rate": 0.00019957552191344818, "loss": 4.2062, "step": 1589 }, { "epoch": 0.074433846333899, "grad_norm": 1.234375, "learning_rate": 0.0001995749844701725, "loss": 4.2184, "step": 1590 }, { "epoch": 0.07448066007373164, "grad_norm": 1.875, "learning_rate": 0.00019957444668760086, "loss": 4.3618, "step": 1591 }, { "epoch": 0.07452747381356428, "grad_norm": 1.1953125, "learning_rate": 0.00019957390856573503, "loss": 4.3399, "step": 1592 }, { "epoch": 0.07457428755339693, "grad_norm": 1.671875, "learning_rate": 0.00019957337010457685, "loss": 4.1346, "step": 1593 }, { "epoch": 0.07462110129322956, "grad_norm": 1.21875, "learning_rate": 0.00019957283130412815, "loss": 3.1236, "step": 1594 }, { "epoch": 0.07466791503306221, "grad_norm": 1.1015625, "learning_rate": 0.00019957229216439082, "loss": 4.1792, "step": 1595 }, { "epoch": 0.07471472877289484, "grad_norm": 1.453125, "learning_rate": 0.00019957175268536665, "loss": 4.3685, "step": 1596 }, { "epoch": 0.07476154251272749, "grad_norm": 1.1484375, "learning_rate": 0.00019957121286705748, "loss": 4.3558, "step": 1597 }, { "epoch": 0.07480835625256013, "grad_norm": 1.2421875, "learning_rate": 0.00019957067270946513, "loss": 4.2528, "step": 1598 }, { "epoch": 0.07485516999239276, "grad_norm": 1.3984375, "learning_rate": 0.00019957013221259153, "loss": 4.2695, "step": 1599 }, { "epoch": 0.07490198373222541, "grad_norm": 1.390625, "learning_rate": 0.00019956959137643843, "loss": 4.432, "step": 1600 }, { "epoch": 0.07494879747205804, "grad_norm": 1.890625, "learning_rate": 0.00019956905020100768, "loss": 4.3367, "step": 1601 }, { "epoch": 0.07499561121189069, "grad_norm": 1.59375, "learning_rate": 0.00019956850868630118, "loss": 4.5927, "step": 1602 }, { "epoch": 0.07504242495172334, "grad_norm": 1.359375, "learning_rate": 0.00019956796683232074, "loss": 4.2196, "step": 1603 }, { "epoch": 0.07508923869155597, "grad_norm": 1.5, "learning_rate": 0.00019956742463906818, "loss": 4.6277, "step": 1604 }, { "epoch": 0.07513605243138861, "grad_norm": 1.3984375, "learning_rate": 0.00019956688210654542, "loss": 4.3052, "step": 1605 }, { "epoch": 0.07518286617122126, "grad_norm": 1.1640625, "learning_rate": 0.00019956633923475425, "loss": 4.1856, "step": 1606 }, { "epoch": 0.07522967991105389, "grad_norm": 1.21875, "learning_rate": 0.00019956579602369652, "loss": 4.282, "step": 1607 }, { "epoch": 0.07527649365088654, "grad_norm": 1.640625, "learning_rate": 0.00019956525247337407, "loss": 3.9687, "step": 1608 }, { "epoch": 0.07532330739071917, "grad_norm": 1.2421875, "learning_rate": 0.00019956470858378884, "loss": 4.238, "step": 1609 }, { "epoch": 0.07537012113055182, "grad_norm": 1.3828125, "learning_rate": 0.00019956416435494258, "loss": 4.0616, "step": 1610 }, { "epoch": 0.07541693487038446, "grad_norm": 1.5390625, "learning_rate": 0.00019956361978683718, "loss": 4.5828, "step": 1611 }, { "epoch": 0.0754637486102171, "grad_norm": 1.875, "learning_rate": 0.00019956307487947451, "loss": 3.9046, "step": 1612 }, { "epoch": 0.07551056235004974, "grad_norm": 1.2421875, "learning_rate": 0.00019956252963285645, "loss": 3.9659, "step": 1613 }, { "epoch": 0.07555737608988237, "grad_norm": 1.53125, "learning_rate": 0.00019956198404698475, "loss": 3.9043, "step": 1614 }, { "epoch": 0.07560418982971502, "grad_norm": 1.421875, "learning_rate": 0.00019956143812186142, "loss": 3.9079, "step": 1615 }, { "epoch": 0.07565100356954767, "grad_norm": 1.75, "learning_rate": 0.00019956089185748822, "loss": 3.8517, "step": 1616 }, { "epoch": 0.0756978173093803, "grad_norm": 1.390625, "learning_rate": 0.00019956034525386702, "loss": 4.2585, "step": 1617 }, { "epoch": 0.07574463104921295, "grad_norm": 1.5546875, "learning_rate": 0.00019955979831099972, "loss": 4.0788, "step": 1618 }, { "epoch": 0.07579144478904558, "grad_norm": 1.234375, "learning_rate": 0.00019955925102888814, "loss": 4.1957, "step": 1619 }, { "epoch": 0.07583825852887822, "grad_norm": 1.71875, "learning_rate": 0.00019955870340753417, "loss": 4.4448, "step": 1620 }, { "epoch": 0.07588507226871087, "grad_norm": 1.3046875, "learning_rate": 0.00019955815544693968, "loss": 4.4812, "step": 1621 }, { "epoch": 0.0759318860085435, "grad_norm": 1.546875, "learning_rate": 0.00019955760714710654, "loss": 4.2783, "step": 1622 }, { "epoch": 0.07597869974837615, "grad_norm": 1.09375, "learning_rate": 0.00019955705850803658, "loss": 3.3403, "step": 1623 }, { "epoch": 0.0760255134882088, "grad_norm": 1.203125, "learning_rate": 0.00019955650952973173, "loss": 4.1742, "step": 1624 }, { "epoch": 0.07607232722804143, "grad_norm": 1.3984375, "learning_rate": 0.00019955596021219382, "loss": 4.599, "step": 1625 }, { "epoch": 0.07611914096787407, "grad_norm": 1.796875, "learning_rate": 0.00019955541055542472, "loss": 4.0596, "step": 1626 }, { "epoch": 0.0761659547077067, "grad_norm": 1.375, "learning_rate": 0.00019955486055942633, "loss": 4.1577, "step": 1627 }, { "epoch": 0.07621276844753935, "grad_norm": 1.46875, "learning_rate": 0.00019955431022420048, "loss": 4.1659, "step": 1628 }, { "epoch": 0.076259582187372, "grad_norm": 1.4375, "learning_rate": 0.0001995537595497491, "loss": 4.5848, "step": 1629 }, { "epoch": 0.07630639592720463, "grad_norm": 1.28125, "learning_rate": 0.00019955320853607403, "loss": 3.9644, "step": 1630 }, { "epoch": 0.07635320966703728, "grad_norm": 1.53125, "learning_rate": 0.00019955265718317716, "loss": 4.379, "step": 1631 }, { "epoch": 0.07640002340686991, "grad_norm": 2.84375, "learning_rate": 0.00019955210549106033, "loss": 5.0249, "step": 1632 }, { "epoch": 0.07644683714670256, "grad_norm": 1.390625, "learning_rate": 0.0001995515534597255, "loss": 4.4164, "step": 1633 }, { "epoch": 0.0764936508865352, "grad_norm": 1.6015625, "learning_rate": 0.00019955100108917447, "loss": 4.3631, "step": 1634 }, { "epoch": 0.07654046462636784, "grad_norm": 1.8359375, "learning_rate": 0.0001995504483794092, "loss": 4.428, "step": 1635 }, { "epoch": 0.07658727836620048, "grad_norm": 1.4765625, "learning_rate": 0.00019954989533043152, "loss": 4.1747, "step": 1636 }, { "epoch": 0.07663409210603313, "grad_norm": 2.484375, "learning_rate": 0.0001995493419422433, "loss": 4.2197, "step": 1637 }, { "epoch": 0.07668090584586576, "grad_norm": 1.1015625, "learning_rate": 0.00019954878821484647, "loss": 3.9414, "step": 1638 }, { "epoch": 0.0767277195856984, "grad_norm": 1.3203125, "learning_rate": 0.0001995482341482429, "loss": 4.2227, "step": 1639 }, { "epoch": 0.07677453332553104, "grad_norm": 1.2265625, "learning_rate": 0.00019954767974243444, "loss": 4.0042, "step": 1640 }, { "epoch": 0.07682134706536368, "grad_norm": 1.2734375, "learning_rate": 0.00019954712499742306, "loss": 3.9691, "step": 1641 }, { "epoch": 0.07686816080519633, "grad_norm": 1.4375, "learning_rate": 0.00019954656991321058, "loss": 4.4957, "step": 1642 }, { "epoch": 0.07691497454502896, "grad_norm": 1.71875, "learning_rate": 0.00019954601448979894, "loss": 4.2444, "step": 1643 }, { "epoch": 0.07696178828486161, "grad_norm": 1.6171875, "learning_rate": 0.00019954545872719002, "loss": 4.4672, "step": 1644 }, { "epoch": 0.07700860202469424, "grad_norm": 1.2734375, "learning_rate": 0.0001995449026253857, "loss": 4.1914, "step": 1645 }, { "epoch": 0.07705541576452689, "grad_norm": 1.3359375, "learning_rate": 0.0001995443461843878, "loss": 4.484, "step": 1646 }, { "epoch": 0.07710222950435953, "grad_norm": 1.4375, "learning_rate": 0.00019954378940419836, "loss": 4.5305, "step": 1647 }, { "epoch": 0.07714904324419217, "grad_norm": 1.1796875, "learning_rate": 0.00019954323228481922, "loss": 4.4233, "step": 1648 }, { "epoch": 0.07719585698402481, "grad_norm": 1.4140625, "learning_rate": 0.00019954267482625226, "loss": 4.3982, "step": 1649 }, { "epoch": 0.07724267072385745, "grad_norm": 1.3125, "learning_rate": 0.0001995421170284994, "loss": 4.5802, "step": 1650 }, { "epoch": 0.07728948446369009, "grad_norm": 2.359375, "learning_rate": 0.00019954155889156253, "loss": 4.1848, "step": 1651 }, { "epoch": 0.07733629820352274, "grad_norm": 1.3125, "learning_rate": 0.00019954100041544352, "loss": 4.1589, "step": 1652 }, { "epoch": 0.07738311194335537, "grad_norm": 1.4765625, "learning_rate": 0.00019954044160014431, "loss": 4.108, "step": 1653 }, { "epoch": 0.07742992568318802, "grad_norm": 1.84375, "learning_rate": 0.00019953988244566682, "loss": 4.1862, "step": 1654 }, { "epoch": 0.07747673942302066, "grad_norm": 1.171875, "learning_rate": 0.0001995393229520129, "loss": 4.3147, "step": 1655 }, { "epoch": 0.0775235531628533, "grad_norm": 1.3828125, "learning_rate": 0.00019953876311918454, "loss": 3.9105, "step": 1656 }, { "epoch": 0.07757036690268594, "grad_norm": 1.6640625, "learning_rate": 0.00019953820294718358, "loss": 4.6309, "step": 1657 }, { "epoch": 0.07761718064251857, "grad_norm": 1.3359375, "learning_rate": 0.00019953764243601195, "loss": 4.3736, "step": 1658 }, { "epoch": 0.07766399438235122, "grad_norm": 1.2421875, "learning_rate": 0.00019953708158567152, "loss": 4.2349, "step": 1659 }, { "epoch": 0.07771080812218387, "grad_norm": 1.2890625, "learning_rate": 0.00019953652039616428, "loss": 4.1777, "step": 1660 }, { "epoch": 0.0777576218620165, "grad_norm": 2.15625, "learning_rate": 0.00019953595886749209, "loss": 4.5293, "step": 1661 }, { "epoch": 0.07780443560184915, "grad_norm": 1.2109375, "learning_rate": 0.00019953539699965687, "loss": 4.2007, "step": 1662 }, { "epoch": 0.07785124934168178, "grad_norm": 1.546875, "learning_rate": 0.00019953483479266053, "loss": 4.1508, "step": 1663 }, { "epoch": 0.07789806308151442, "grad_norm": 1.2109375, "learning_rate": 0.000199534272246505, "loss": 4.5331, "step": 1664 }, { "epoch": 0.07794487682134707, "grad_norm": 1.5546875, "learning_rate": 0.0001995337093611922, "loss": 4.1861, "step": 1665 }, { "epoch": 0.0779916905611797, "grad_norm": 1.3125, "learning_rate": 0.00019953314613672403, "loss": 3.1832, "step": 1666 }, { "epoch": 0.07803850430101235, "grad_norm": 1.5078125, "learning_rate": 0.0001995325825731024, "loss": 4.1763, "step": 1667 }, { "epoch": 0.078085318040845, "grad_norm": 1.6328125, "learning_rate": 0.0001995320186703293, "loss": 4.4109, "step": 1668 }, { "epoch": 0.07813213178067763, "grad_norm": 1.3984375, "learning_rate": 0.00019953145442840655, "loss": 4.4258, "step": 1669 }, { "epoch": 0.07817894552051027, "grad_norm": 1.2890625, "learning_rate": 0.00019953088984733615, "loss": 4.5424, "step": 1670 }, { "epoch": 0.0782257592603429, "grad_norm": 1.21875, "learning_rate": 0.00019953032492711997, "loss": 4.0085, "step": 1671 }, { "epoch": 0.07827257300017555, "grad_norm": 1.859375, "learning_rate": 0.00019952975966775998, "loss": 4.5076, "step": 1672 }, { "epoch": 0.0783193867400082, "grad_norm": 1.3125, "learning_rate": 0.00019952919406925808, "loss": 4.1624, "step": 1673 }, { "epoch": 0.07836620047984083, "grad_norm": 1.984375, "learning_rate": 0.00019952862813161622, "loss": 4.3441, "step": 1674 }, { "epoch": 0.07841301421967348, "grad_norm": 1.2421875, "learning_rate": 0.0001995280618548363, "loss": 4.4409, "step": 1675 }, { "epoch": 0.07845982795950611, "grad_norm": 1.4921875, "learning_rate": 0.00019952749523892026, "loss": 4.2786, "step": 1676 }, { "epoch": 0.07850664169933876, "grad_norm": 1.4453125, "learning_rate": 0.00019952692828387002, "loss": 4.5734, "step": 1677 }, { "epoch": 0.0785534554391714, "grad_norm": 1.078125, "learning_rate": 0.00019952636098968755, "loss": 4.3102, "step": 1678 }, { "epoch": 0.07860026917900403, "grad_norm": 1.8828125, "learning_rate": 0.00019952579335637475, "loss": 4.2409, "step": 1679 }, { "epoch": 0.07864708291883668, "grad_norm": 1.3125, "learning_rate": 0.00019952522538393356, "loss": 4.4147, "step": 1680 }, { "epoch": 0.07869389665866931, "grad_norm": 1.28125, "learning_rate": 0.00019952465707236591, "loss": 4.3155, "step": 1681 }, { "epoch": 0.07874071039850196, "grad_norm": 1.546875, "learning_rate": 0.00019952408842167376, "loss": 4.1399, "step": 1682 }, { "epoch": 0.0787875241383346, "grad_norm": 1.3984375, "learning_rate": 0.00019952351943185902, "loss": 4.6065, "step": 1683 }, { "epoch": 0.07883433787816724, "grad_norm": 1.578125, "learning_rate": 0.00019952295010292365, "loss": 4.6442, "step": 1684 }, { "epoch": 0.07888115161799988, "grad_norm": 1.4609375, "learning_rate": 0.00019952238043486958, "loss": 4.6386, "step": 1685 }, { "epoch": 0.07892796535783253, "grad_norm": 1.328125, "learning_rate": 0.00019952181042769872, "loss": 4.5032, "step": 1686 }, { "epoch": 0.07897477909766516, "grad_norm": 1.328125, "learning_rate": 0.00019952124008141309, "loss": 4.0384, "step": 1687 }, { "epoch": 0.07902159283749781, "grad_norm": 1.6171875, "learning_rate": 0.00019952066939601454, "loss": 4.2034, "step": 1688 }, { "epoch": 0.07906840657733044, "grad_norm": 1.2109375, "learning_rate": 0.0001995200983715051, "loss": 4.3161, "step": 1689 }, { "epoch": 0.07911522031716309, "grad_norm": 1.1875, "learning_rate": 0.00019951952700788668, "loss": 3.8893, "step": 1690 }, { "epoch": 0.07916203405699573, "grad_norm": 1.4140625, "learning_rate": 0.0001995189553051612, "loss": 4.3087, "step": 1691 }, { "epoch": 0.07920884779682837, "grad_norm": 1.4140625, "learning_rate": 0.00019951838326333064, "loss": 3.8462, "step": 1692 }, { "epoch": 0.07925566153666101, "grad_norm": 1.4296875, "learning_rate": 0.00019951781088239695, "loss": 4.1497, "step": 1693 }, { "epoch": 0.07930247527649364, "grad_norm": 1.5, "learning_rate": 0.00019951723816236206, "loss": 4.4111, "step": 1694 }, { "epoch": 0.07934928901632629, "grad_norm": 1.21875, "learning_rate": 0.00019951666510322792, "loss": 4.2822, "step": 1695 }, { "epoch": 0.07939610275615894, "grad_norm": 1.9609375, "learning_rate": 0.00019951609170499652, "loss": 4.0627, "step": 1696 }, { "epoch": 0.07944291649599157, "grad_norm": 1.40625, "learning_rate": 0.00019951551796766978, "loss": 4.0207, "step": 1697 }, { "epoch": 0.07948973023582422, "grad_norm": 1.515625, "learning_rate": 0.00019951494389124967, "loss": 3.968, "step": 1698 }, { "epoch": 0.07953654397565686, "grad_norm": 1.265625, "learning_rate": 0.00019951436947573815, "loss": 4.5268, "step": 1699 }, { "epoch": 0.0795833577154895, "grad_norm": 1.59375, "learning_rate": 0.00019951379472113714, "loss": 4.2061, "step": 1700 }, { "epoch": 0.07963017145532214, "grad_norm": 1.1875, "learning_rate": 0.00019951321962744865, "loss": 3.868, "step": 1701 }, { "epoch": 0.07967698519515477, "grad_norm": 1.53125, "learning_rate": 0.00019951264419467462, "loss": 4.0985, "step": 1702 }, { "epoch": 0.07972379893498742, "grad_norm": 1.7734375, "learning_rate": 0.00019951206842281697, "loss": 4.6774, "step": 1703 }, { "epoch": 0.07977061267482007, "grad_norm": 1.3359375, "learning_rate": 0.00019951149231187777, "loss": 3.8954, "step": 1704 }, { "epoch": 0.0798174264146527, "grad_norm": 1.203125, "learning_rate": 0.00019951091586185884, "loss": 4.3346, "step": 1705 }, { "epoch": 0.07986424015448534, "grad_norm": 1.4453125, "learning_rate": 0.00019951033907276224, "loss": 3.9927, "step": 1706 }, { "epoch": 0.07991105389431798, "grad_norm": 1.484375, "learning_rate": 0.00019950976194458992, "loss": 4.5253, "step": 1707 }, { "epoch": 0.07995786763415062, "grad_norm": 1.171875, "learning_rate": 0.00019950918447734383, "loss": 4.3777, "step": 1708 }, { "epoch": 0.08000468137398327, "grad_norm": 1.53125, "learning_rate": 0.00019950860667102596, "loss": 4.042, "step": 1709 }, { "epoch": 0.0800514951138159, "grad_norm": 1.2578125, "learning_rate": 0.00019950802852563825, "loss": 4.1731, "step": 1710 }, { "epoch": 0.08009830885364855, "grad_norm": 1.203125, "learning_rate": 0.0001995074500411827, "loss": 4.2395, "step": 1711 }, { "epoch": 0.08014512259348118, "grad_norm": 1.0625, "learning_rate": 0.00019950687121766123, "loss": 4.2033, "step": 1712 }, { "epoch": 0.08019193633331383, "grad_norm": 1.375, "learning_rate": 0.00019950629205507588, "loss": 4.325, "step": 1713 }, { "epoch": 0.08023875007314647, "grad_norm": 1.234375, "learning_rate": 0.00019950571255342858, "loss": 4.1192, "step": 1714 }, { "epoch": 0.0802855638129791, "grad_norm": 1.1328125, "learning_rate": 0.00019950513271272134, "loss": 4.3361, "step": 1715 }, { "epoch": 0.08033237755281175, "grad_norm": 1.59375, "learning_rate": 0.00019950455253295608, "loss": 3.8601, "step": 1716 }, { "epoch": 0.0803791912926444, "grad_norm": 1.3828125, "learning_rate": 0.00019950397201413483, "loss": 4.5227, "step": 1717 }, { "epoch": 0.08042600503247703, "grad_norm": 1.4921875, "learning_rate": 0.00019950339115625953, "loss": 4.2166, "step": 1718 }, { "epoch": 0.08047281877230968, "grad_norm": 1.328125, "learning_rate": 0.00019950280995933216, "loss": 4.3574, "step": 1719 }, { "epoch": 0.08051963251214231, "grad_norm": 1.2421875, "learning_rate": 0.00019950222842335475, "loss": 4.0889, "step": 1720 }, { "epoch": 0.08056644625197495, "grad_norm": 1.25, "learning_rate": 0.00019950164654832927, "loss": 4.3089, "step": 1721 }, { "epoch": 0.0806132599918076, "grad_norm": 1.3984375, "learning_rate": 0.00019950106433425763, "loss": 4.2592, "step": 1722 }, { "epoch": 0.08066007373164023, "grad_norm": 1.28125, "learning_rate": 0.0001995004817811419, "loss": 4.2371, "step": 1723 }, { "epoch": 0.08070688747147288, "grad_norm": 1.7109375, "learning_rate": 0.000199499898888984, "loss": 4.147, "step": 1724 }, { "epoch": 0.08075370121130551, "grad_norm": 1.734375, "learning_rate": 0.00019949931565778597, "loss": 4.5843, "step": 1725 }, { "epoch": 0.08080051495113816, "grad_norm": 1.875, "learning_rate": 0.00019949873208754975, "loss": 4.7118, "step": 1726 }, { "epoch": 0.0808473286909708, "grad_norm": 1.453125, "learning_rate": 0.00019949814817827737, "loss": 3.9458, "step": 1727 }, { "epoch": 0.08089414243080344, "grad_norm": 0.9765625, "learning_rate": 0.0001994975639299708, "loss": 4.9717, "step": 1728 }, { "epoch": 0.08094095617063608, "grad_norm": 1.4296875, "learning_rate": 0.00019949697934263202, "loss": 3.9344, "step": 1729 }, { "epoch": 0.08098776991046873, "grad_norm": 1.40625, "learning_rate": 0.00019949639441626307, "loss": 4.3616, "step": 1730 }, { "epoch": 0.08103458365030136, "grad_norm": 1.4140625, "learning_rate": 0.00019949580915086586, "loss": 4.4175, "step": 1731 }, { "epoch": 0.08108139739013401, "grad_norm": 1.8125, "learning_rate": 0.00019949522354644245, "loss": 4.0824, "step": 1732 }, { "epoch": 0.08112821112996664, "grad_norm": 1.0703125, "learning_rate": 0.00019949463760299483, "loss": 4.0507, "step": 1733 }, { "epoch": 0.08117502486979929, "grad_norm": 1.5234375, "learning_rate": 0.00019949405132052496, "loss": 4.1428, "step": 1734 }, { "epoch": 0.08122183860963193, "grad_norm": 1.140625, "learning_rate": 0.00019949346469903486, "loss": 4.0593, "step": 1735 }, { "epoch": 0.08126865234946457, "grad_norm": 2.03125, "learning_rate": 0.00019949287773852656, "loss": 4.495, "step": 1736 }, { "epoch": 0.08131546608929721, "grad_norm": 2.140625, "learning_rate": 0.000199492290439002, "loss": 3.8415, "step": 1737 }, { "epoch": 0.08136227982912984, "grad_norm": 1.265625, "learning_rate": 0.00019949170280046324, "loss": 4.2416, "step": 1738 }, { "epoch": 0.08140909356896249, "grad_norm": 1.3203125, "learning_rate": 0.00019949111482291224, "loss": 4.3844, "step": 1739 }, { "epoch": 0.08145590730879514, "grad_norm": 1.4296875, "learning_rate": 0.000199490526506351, "loss": 4.0648, "step": 1740 }, { "epoch": 0.08150272104862777, "grad_norm": 1.421875, "learning_rate": 0.00019948993785078156, "loss": 4.4911, "step": 1741 }, { "epoch": 0.08154953478846041, "grad_norm": 1.3046875, "learning_rate": 0.00019948934885620593, "loss": 4.0402, "step": 1742 }, { "epoch": 0.08159634852829305, "grad_norm": 1.7265625, "learning_rate": 0.00019948875952262605, "loss": 4.2618, "step": 1743 }, { "epoch": 0.0816431622681257, "grad_norm": 1.234375, "learning_rate": 0.000199488169850044, "loss": 4.0186, "step": 1744 }, { "epoch": 0.08168997600795834, "grad_norm": 1.765625, "learning_rate": 0.00019948757983846176, "loss": 4.0379, "step": 1745 }, { "epoch": 0.08173678974779097, "grad_norm": 1.2109375, "learning_rate": 0.00019948698948788133, "loss": 3.9761, "step": 1746 }, { "epoch": 0.08178360348762362, "grad_norm": 1.3203125, "learning_rate": 0.00019948639879830473, "loss": 3.7311, "step": 1747 }, { "epoch": 0.08183041722745626, "grad_norm": 1.453125, "learning_rate": 0.000199485807769734, "loss": 3.9275, "step": 1748 }, { "epoch": 0.0818772309672889, "grad_norm": 1.3125, "learning_rate": 0.00019948521640217112, "loss": 4.3417, "step": 1749 }, { "epoch": 0.08192404470712154, "grad_norm": 1.546875, "learning_rate": 0.00019948462469561809, "loss": 4.287, "step": 1750 }, { "epoch": 0.08197085844695418, "grad_norm": 1.5703125, "learning_rate": 0.00019948403265007697, "loss": 4.43, "step": 1751 }, { "epoch": 0.08201767218678682, "grad_norm": 1.5078125, "learning_rate": 0.00019948344026554977, "loss": 4.0256, "step": 1752 }, { "epoch": 0.08206448592661947, "grad_norm": 1.6484375, "learning_rate": 0.0001994828475420385, "loss": 4.3007, "step": 1753 }, { "epoch": 0.0821112996664521, "grad_norm": 1.65625, "learning_rate": 0.00019948225447954517, "loss": 4.5438, "step": 1754 }, { "epoch": 0.08215811340628475, "grad_norm": 1.7421875, "learning_rate": 0.0001994816610780718, "loss": 4.1808, "step": 1755 }, { "epoch": 0.08220492714611738, "grad_norm": 1.9375, "learning_rate": 0.00019948106733762045, "loss": 4.4482, "step": 1756 }, { "epoch": 0.08225174088595003, "grad_norm": 1.640625, "learning_rate": 0.00019948047325819306, "loss": 4.2734, "step": 1757 }, { "epoch": 0.08229855462578267, "grad_norm": 1.34375, "learning_rate": 0.00019947987883979176, "loss": 4.3639, "step": 1758 }, { "epoch": 0.0823453683656153, "grad_norm": 1.421875, "learning_rate": 0.0001994792840824185, "loss": 4.2892, "step": 1759 }, { "epoch": 0.08239218210544795, "grad_norm": 1.609375, "learning_rate": 0.0001994786889860753, "loss": 3.7472, "step": 1760 }, { "epoch": 0.0824389958452806, "grad_norm": 1.4765625, "learning_rate": 0.00019947809355076425, "loss": 4.2367, "step": 1761 }, { "epoch": 0.08248580958511323, "grad_norm": 1.4765625, "learning_rate": 0.00019947749777648736, "loss": 4.1837, "step": 1762 }, { "epoch": 0.08253262332494588, "grad_norm": 1.25, "learning_rate": 0.00019947690166324664, "loss": 4.4022, "step": 1763 }, { "epoch": 0.08257943706477851, "grad_norm": 1.578125, "learning_rate": 0.00019947630521104413, "loss": 4.6189, "step": 1764 }, { "epoch": 0.08262625080461115, "grad_norm": 1.171875, "learning_rate": 0.00019947570841988182, "loss": 4.1182, "step": 1765 }, { "epoch": 0.0826730645444438, "grad_norm": 1.1640625, "learning_rate": 0.0001994751112897618, "loss": 4.5098, "step": 1766 }, { "epoch": 0.08271987828427643, "grad_norm": 1.53125, "learning_rate": 0.0001994745138206861, "loss": 4.0658, "step": 1767 }, { "epoch": 0.08276669202410908, "grad_norm": 1.4375, "learning_rate": 0.00019947391601265676, "loss": 3.6775, "step": 1768 }, { "epoch": 0.08281350576394171, "grad_norm": 1.4296875, "learning_rate": 0.0001994733178656758, "loss": 3.533, "step": 1769 }, { "epoch": 0.08286031950377436, "grad_norm": 1.21875, "learning_rate": 0.00019947271937974524, "loss": 4.0988, "step": 1770 }, { "epoch": 0.082907133243607, "grad_norm": 1.4375, "learning_rate": 0.00019947212055486713, "loss": 4.1671, "step": 1771 }, { "epoch": 0.08295394698343964, "grad_norm": 1.46875, "learning_rate": 0.00019947152139104354, "loss": 4.4364, "step": 1772 }, { "epoch": 0.08300076072327228, "grad_norm": 1.5859375, "learning_rate": 0.00019947092188827648, "loss": 4.4394, "step": 1773 }, { "epoch": 0.08304757446310491, "grad_norm": 1.25, "learning_rate": 0.000199470322046568, "loss": 4.2268, "step": 1774 }, { "epoch": 0.08309438820293756, "grad_norm": 1.3046875, "learning_rate": 0.00019946972186592017, "loss": 3.8439, "step": 1775 }, { "epoch": 0.08314120194277021, "grad_norm": 1.328125, "learning_rate": 0.00019946912134633497, "loss": 4.537, "step": 1776 }, { "epoch": 0.08318801568260284, "grad_norm": 1.296875, "learning_rate": 0.00019946852048781453, "loss": 4.1445, "step": 1777 }, { "epoch": 0.08323482942243549, "grad_norm": 1.171875, "learning_rate": 0.00019946791929036084, "loss": 4.1729, "step": 1778 }, { "epoch": 0.08328164316226813, "grad_norm": 1.515625, "learning_rate": 0.00019946731775397596, "loss": 4.2356, "step": 1779 }, { "epoch": 0.08332845690210076, "grad_norm": 1.4296875, "learning_rate": 0.00019946671587866196, "loss": 4.6139, "step": 1780 }, { "epoch": 0.08337527064193341, "grad_norm": 1.84375, "learning_rate": 0.00019946611366442087, "loss": 4.1845, "step": 1781 }, { "epoch": 0.08342208438176604, "grad_norm": 1.4296875, "learning_rate": 0.00019946551111125473, "loss": 4.091, "step": 1782 }, { "epoch": 0.08346889812159869, "grad_norm": 1.34375, "learning_rate": 0.00019946490821916562, "loss": 4.0914, "step": 1783 }, { "epoch": 0.08351571186143134, "grad_norm": 1.265625, "learning_rate": 0.00019946430498815558, "loss": 4.5811, "step": 1784 }, { "epoch": 0.08356252560126397, "grad_norm": 1.46875, "learning_rate": 0.0001994637014182267, "loss": 4.4177, "step": 1785 }, { "epoch": 0.08360933934109661, "grad_norm": 1.6796875, "learning_rate": 0.00019946309750938098, "loss": 4.4947, "step": 1786 }, { "epoch": 0.08365615308092925, "grad_norm": 1.5859375, "learning_rate": 0.0001994624932616205, "loss": 3.9319, "step": 1787 }, { "epoch": 0.08370296682076189, "grad_norm": 1.4765625, "learning_rate": 0.00019946188867494733, "loss": 3.9959, "step": 1788 }, { "epoch": 0.08374978056059454, "grad_norm": 1.6015625, "learning_rate": 0.00019946128374936353, "loss": 4.6528, "step": 1789 }, { "epoch": 0.08379659430042717, "grad_norm": 1.3671875, "learning_rate": 0.00019946067848487114, "loss": 4.325, "step": 1790 }, { "epoch": 0.08384340804025982, "grad_norm": 1.4375, "learning_rate": 0.00019946007288147225, "loss": 4.2572, "step": 1791 }, { "epoch": 0.08389022178009246, "grad_norm": 1.421875, "learning_rate": 0.0001994594669391689, "loss": 4.2125, "step": 1792 }, { "epoch": 0.0839370355199251, "grad_norm": 1.453125, "learning_rate": 0.00019945886065796318, "loss": 4.5101, "step": 1793 }, { "epoch": 0.08398384925975774, "grad_norm": 1.4140625, "learning_rate": 0.00019945825403785712, "loss": 3.9409, "step": 1794 }, { "epoch": 0.08403066299959037, "grad_norm": 1.3125, "learning_rate": 0.00019945764707885285, "loss": 4.2455, "step": 1795 }, { "epoch": 0.08407747673942302, "grad_norm": 1.21875, "learning_rate": 0.00019945703978095235, "loss": 4.0174, "step": 1796 }, { "epoch": 0.08412429047925567, "grad_norm": 1.171875, "learning_rate": 0.00019945643214415773, "loss": 4.0623, "step": 1797 }, { "epoch": 0.0841711042190883, "grad_norm": 1.5703125, "learning_rate": 0.0001994558241684711, "loss": 4.5115, "step": 1798 }, { "epoch": 0.08421791795892095, "grad_norm": 1.3515625, "learning_rate": 0.0001994552158538945, "loss": 3.6403, "step": 1799 }, { "epoch": 0.08426473169875358, "grad_norm": 2.21875, "learning_rate": 0.00019945460720043002, "loss": 4.6772, "step": 1800 }, { "epoch": 0.08431154543858622, "grad_norm": 1.4609375, "learning_rate": 0.00019945399820807966, "loss": 4.1609, "step": 1801 }, { "epoch": 0.08435835917841887, "grad_norm": 1.671875, "learning_rate": 0.0001994533888768456, "loss": 4.6516, "step": 1802 }, { "epoch": 0.0844051729182515, "grad_norm": 1.515625, "learning_rate": 0.00019945277920672982, "loss": 4.2197, "step": 1803 }, { "epoch": 0.08445198665808415, "grad_norm": 1.359375, "learning_rate": 0.00019945216919773446, "loss": 4.2304, "step": 1804 }, { "epoch": 0.08449880039791678, "grad_norm": 1.359375, "learning_rate": 0.0001994515588498616, "loss": 4.4883, "step": 1805 }, { "epoch": 0.08454561413774943, "grad_norm": 1.2265625, "learning_rate": 0.00019945094816311326, "loss": 4.0711, "step": 1806 }, { "epoch": 0.08459242787758207, "grad_norm": 1.28125, "learning_rate": 0.0001994503371374916, "loss": 3.7202, "step": 1807 }, { "epoch": 0.0846392416174147, "grad_norm": 1.25, "learning_rate": 0.00019944972577299867, "loss": 4.1501, "step": 1808 }, { "epoch": 0.08468605535724735, "grad_norm": 1.21875, "learning_rate": 0.00019944911406963653, "loss": 3.7724, "step": 1809 }, { "epoch": 0.08473286909708, "grad_norm": 1.1953125, "learning_rate": 0.00019944850202740727, "loss": 4.166, "step": 1810 }, { "epoch": 0.08477968283691263, "grad_norm": 1.25, "learning_rate": 0.000199447889646313, "loss": 4.2814, "step": 1811 }, { "epoch": 0.08482649657674528, "grad_norm": 1.109375, "learning_rate": 0.00019944727692635582, "loss": 3.9333, "step": 1812 }, { "epoch": 0.08487331031657791, "grad_norm": 1.7578125, "learning_rate": 0.00019944666386753775, "loss": 4.5592, "step": 1813 }, { "epoch": 0.08492012405641056, "grad_norm": 1.046875, "learning_rate": 0.00019944605046986094, "loss": 3.7361, "step": 1814 }, { "epoch": 0.0849669377962432, "grad_norm": 1.28125, "learning_rate": 0.00019944543673332748, "loss": 4.167, "step": 1815 }, { "epoch": 0.08501375153607584, "grad_norm": 1.203125, "learning_rate": 0.00019944482265793942, "loss": 4.2197, "step": 1816 }, { "epoch": 0.08506056527590848, "grad_norm": 1.328125, "learning_rate": 0.00019944420824369888, "loss": 4.486, "step": 1817 }, { "epoch": 0.08510737901574111, "grad_norm": 1.75, "learning_rate": 0.00019944359349060794, "loss": 4.2691, "step": 1818 }, { "epoch": 0.08515419275557376, "grad_norm": 1.8828125, "learning_rate": 0.0001994429783986687, "loss": 4.7107, "step": 1819 }, { "epoch": 0.0852010064954064, "grad_norm": 1.2109375, "learning_rate": 0.00019944236296788327, "loss": 4.3362, "step": 1820 }, { "epoch": 0.08524782023523904, "grad_norm": 1.5078125, "learning_rate": 0.00019944174719825375, "loss": 4.2838, "step": 1821 }, { "epoch": 0.08529463397507168, "grad_norm": 1.5625, "learning_rate": 0.00019944113108978222, "loss": 4.1366, "step": 1822 }, { "epoch": 0.08534144771490433, "grad_norm": 1.171875, "learning_rate": 0.00019944051464247076, "loss": 4.2469, "step": 1823 }, { "epoch": 0.08538826145473696, "grad_norm": 1.859375, "learning_rate": 0.0001994398978563215, "loss": 3.881, "step": 1824 }, { "epoch": 0.08543507519456961, "grad_norm": 1.1875, "learning_rate": 0.00019943928073133655, "loss": 3.973, "step": 1825 }, { "epoch": 0.08548188893440224, "grad_norm": 1.4140625, "learning_rate": 0.000199438663267518, "loss": 3.8404, "step": 1826 }, { "epoch": 0.08552870267423489, "grad_norm": 1.3671875, "learning_rate": 0.00019943804546486795, "loss": 4.2076, "step": 1827 }, { "epoch": 0.08557551641406753, "grad_norm": 1.203125, "learning_rate": 0.00019943742732338852, "loss": 4.2493, "step": 1828 }, { "epoch": 0.08562233015390017, "grad_norm": 1.3984375, "learning_rate": 0.00019943680884308175, "loss": 4.2523, "step": 1829 }, { "epoch": 0.08566914389373281, "grad_norm": 1.6875, "learning_rate": 0.00019943619002394986, "loss": 4.4484, "step": 1830 }, { "epoch": 0.08571595763356545, "grad_norm": 1.53125, "learning_rate": 0.00019943557086599486, "loss": 4.0627, "step": 1831 }, { "epoch": 0.08576277137339809, "grad_norm": 1.4453125, "learning_rate": 0.00019943495136921893, "loss": 4.5532, "step": 1832 }, { "epoch": 0.08580958511323074, "grad_norm": 1.59375, "learning_rate": 0.00019943433153362414, "loss": 4.2099, "step": 1833 }, { "epoch": 0.08585639885306337, "grad_norm": 1.234375, "learning_rate": 0.00019943371135921264, "loss": 3.2455, "step": 1834 }, { "epoch": 0.08590321259289602, "grad_norm": 1.125, "learning_rate": 0.0001994330908459865, "loss": 3.6636, "step": 1835 }, { "epoch": 0.08595002633272865, "grad_norm": 1.421875, "learning_rate": 0.00019943246999394782, "loss": 4.3175, "step": 1836 }, { "epoch": 0.0859968400725613, "grad_norm": 1.4140625, "learning_rate": 0.00019943184880309877, "loss": 3.8209, "step": 1837 }, { "epoch": 0.08604365381239394, "grad_norm": 1.25, "learning_rate": 0.00019943122727344144, "loss": 3.0241, "step": 1838 }, { "epoch": 0.08609046755222657, "grad_norm": 1.3203125, "learning_rate": 0.00019943060540497795, "loss": 3.6718, "step": 1839 }, { "epoch": 0.08613728129205922, "grad_norm": 2.625, "learning_rate": 0.0001994299831977104, "loss": 3.9091, "step": 1840 }, { "epoch": 0.08618409503189187, "grad_norm": 1.5546875, "learning_rate": 0.00019942936065164095, "loss": 3.8772, "step": 1841 }, { "epoch": 0.0862309087717245, "grad_norm": 1.765625, "learning_rate": 0.00019942873776677172, "loss": 4.4213, "step": 1842 }, { "epoch": 0.08627772251155715, "grad_norm": 2.03125, "learning_rate": 0.00019942811454310477, "loss": 3.8077, "step": 1843 }, { "epoch": 0.08632453625138978, "grad_norm": 1.546875, "learning_rate": 0.0001994274909806423, "loss": 4.4257, "step": 1844 }, { "epoch": 0.08637134999122242, "grad_norm": 1.4296875, "learning_rate": 0.0001994268670793864, "loss": 4.0536, "step": 1845 }, { "epoch": 0.08641816373105507, "grad_norm": 1.5859375, "learning_rate": 0.00019942624283933918, "loss": 4.705, "step": 1846 }, { "epoch": 0.0864649774708877, "grad_norm": 1.3046875, "learning_rate": 0.0001994256182605028, "loss": 4.2332, "step": 1847 }, { "epoch": 0.08651179121072035, "grad_norm": 2.09375, "learning_rate": 0.00019942499334287935, "loss": 3.8992, "step": 1848 }, { "epoch": 0.08655860495055298, "grad_norm": 1.390625, "learning_rate": 0.000199424368086471, "loss": 4.024, "step": 1849 }, { "epoch": 0.08660541869038563, "grad_norm": 1.265625, "learning_rate": 0.0001994237424912799, "loss": 4.6253, "step": 1850 }, { "epoch": 0.08665223243021827, "grad_norm": 1.1171875, "learning_rate": 0.00019942311655730808, "loss": 4.2433, "step": 1851 }, { "epoch": 0.0866990461700509, "grad_norm": 1.140625, "learning_rate": 0.00019942249028455776, "loss": 5.0787, "step": 1852 }, { "epoch": 0.08674585990988355, "grad_norm": 1.1640625, "learning_rate": 0.00019942186367303104, "loss": 4.3922, "step": 1853 }, { "epoch": 0.0867926736497162, "grad_norm": 1.109375, "learning_rate": 0.0001994212367227301, "loss": 3.6125, "step": 1854 }, { "epoch": 0.08683948738954883, "grad_norm": 1.109375, "learning_rate": 0.00019942060943365703, "loss": 3.9832, "step": 1855 }, { "epoch": 0.08688630112938148, "grad_norm": 1.2421875, "learning_rate": 0.00019941998180581397, "loss": 4.1064, "step": 1856 }, { "epoch": 0.08693311486921411, "grad_norm": 1.5078125, "learning_rate": 0.00019941935383920308, "loss": 4.2146, "step": 1857 }, { "epoch": 0.08697992860904676, "grad_norm": 1.765625, "learning_rate": 0.00019941872553382647, "loss": 3.8757, "step": 1858 }, { "epoch": 0.0870267423488794, "grad_norm": 1.25, "learning_rate": 0.0001994180968896863, "loss": 4.1285, "step": 1859 }, { "epoch": 0.08707355608871203, "grad_norm": 1.2421875, "learning_rate": 0.00019941746790678473, "loss": 4.2135, "step": 1860 }, { "epoch": 0.08712036982854468, "grad_norm": 1.2109375, "learning_rate": 0.00019941683858512387, "loss": 4.3154, "step": 1861 }, { "epoch": 0.08716718356837731, "grad_norm": 1.6015625, "learning_rate": 0.00019941620892470588, "loss": 3.9445, "step": 1862 }, { "epoch": 0.08721399730820996, "grad_norm": 1.328125, "learning_rate": 0.00019941557892553293, "loss": 3.9124, "step": 1863 }, { "epoch": 0.0872608110480426, "grad_norm": 1.359375, "learning_rate": 0.00019941494858760712, "loss": 3.8485, "step": 1864 }, { "epoch": 0.08730762478787524, "grad_norm": 1.578125, "learning_rate": 0.00019941431791093062, "loss": 4.1088, "step": 1865 }, { "epoch": 0.08735443852770788, "grad_norm": 1.5, "learning_rate": 0.00019941368689550557, "loss": 3.6945, "step": 1866 }, { "epoch": 0.08740125226754052, "grad_norm": 1.296875, "learning_rate": 0.00019941305554133413, "loss": 3.9135, "step": 1867 }, { "epoch": 0.08744806600737316, "grad_norm": 1.4375, "learning_rate": 0.00019941242384841846, "loss": 4.0733, "step": 1868 }, { "epoch": 0.08749487974720581, "grad_norm": 1.4375, "learning_rate": 0.0001994117918167607, "loss": 4.0204, "step": 1869 }, { "epoch": 0.08754169348703844, "grad_norm": 1.359375, "learning_rate": 0.00019941115944636302, "loss": 4.2207, "step": 1870 }, { "epoch": 0.08758850722687109, "grad_norm": 1.1640625, "learning_rate": 0.00019941052673722753, "loss": 4.1941, "step": 1871 }, { "epoch": 0.08763532096670373, "grad_norm": 1.171875, "learning_rate": 0.00019940989368935643, "loss": 3.8352, "step": 1872 }, { "epoch": 0.08768213470653637, "grad_norm": 0.98828125, "learning_rate": 0.00019940926030275188, "loss": 3.6871, "step": 1873 }, { "epoch": 0.08772894844636901, "grad_norm": 1.3125, "learning_rate": 0.000199408626577416, "loss": 3.9434, "step": 1874 }, { "epoch": 0.08777576218620164, "grad_norm": 1.3125, "learning_rate": 0.00019940799251335096, "loss": 4.0476, "step": 1875 }, { "epoch": 0.08782257592603429, "grad_norm": 1.1328125, "learning_rate": 0.00019940735811055897, "loss": 4.049, "step": 1876 }, { "epoch": 0.08786938966586694, "grad_norm": 1.5390625, "learning_rate": 0.0001994067233690421, "loss": 3.3605, "step": 1877 }, { "epoch": 0.08791620340569957, "grad_norm": 1.703125, "learning_rate": 0.0001994060882888026, "loss": 4.028, "step": 1878 }, { "epoch": 0.08796301714553222, "grad_norm": 1.203125, "learning_rate": 0.0001994054528698426, "loss": 4.0629, "step": 1879 }, { "epoch": 0.08800983088536485, "grad_norm": 1.1328125, "learning_rate": 0.0001994048171121643, "loss": 4.1891, "step": 1880 }, { "epoch": 0.0880566446251975, "grad_norm": 1.2578125, "learning_rate": 0.00019940418101576976, "loss": 4.0581, "step": 1881 }, { "epoch": 0.08810345836503014, "grad_norm": 1.4609375, "learning_rate": 0.0001994035445806613, "loss": 4.0638, "step": 1882 }, { "epoch": 0.08815027210486277, "grad_norm": 1.3671875, "learning_rate": 0.00019940290780684096, "loss": 4.4492, "step": 1883 }, { "epoch": 0.08819708584469542, "grad_norm": 1.25, "learning_rate": 0.00019940227069431093, "loss": 3.9404, "step": 1884 }, { "epoch": 0.08824389958452807, "grad_norm": 1.3671875, "learning_rate": 0.00019940163324307344, "loss": 4.2707, "step": 1885 }, { "epoch": 0.0882907133243607, "grad_norm": 1.421875, "learning_rate": 0.00019940099545313062, "loss": 4.2895, "step": 1886 }, { "epoch": 0.08833752706419334, "grad_norm": 1.34375, "learning_rate": 0.00019940035732448467, "loss": 3.8552, "step": 1887 }, { "epoch": 0.08838434080402598, "grad_norm": 1.1953125, "learning_rate": 0.00019939971885713775, "loss": 4.2213, "step": 1888 }, { "epoch": 0.08843115454385862, "grad_norm": 1.125, "learning_rate": 0.000199399080051092, "loss": 4.1029, "step": 1889 }, { "epoch": 0.08847796828369127, "grad_norm": 1.6015625, "learning_rate": 0.00019939844090634967, "loss": 4.6895, "step": 1890 }, { "epoch": 0.0885247820235239, "grad_norm": 1.3671875, "learning_rate": 0.0001993978014229129, "loss": 3.8934, "step": 1891 }, { "epoch": 0.08857159576335655, "grad_norm": 1.59375, "learning_rate": 0.00019939716160078384, "loss": 4.0205, "step": 1892 }, { "epoch": 0.08861840950318918, "grad_norm": 1.6796875, "learning_rate": 0.00019939652143996468, "loss": 4.1611, "step": 1893 }, { "epoch": 0.08866522324302183, "grad_norm": 1.6171875, "learning_rate": 0.00019939588094045765, "loss": 4.0659, "step": 1894 }, { "epoch": 0.08871203698285447, "grad_norm": 1.2109375, "learning_rate": 0.00019939524010226492, "loss": 4.6465, "step": 1895 }, { "epoch": 0.0887588507226871, "grad_norm": 1.40625, "learning_rate": 0.00019939459892538862, "loss": 3.0875, "step": 1896 }, { "epoch": 0.08880566446251975, "grad_norm": 1.359375, "learning_rate": 0.000199393957409831, "loss": 4.1672, "step": 1897 }, { "epoch": 0.0888524782023524, "grad_norm": 1.265625, "learning_rate": 0.00019939331555559418, "loss": 3.8891, "step": 1898 }, { "epoch": 0.08889929194218503, "grad_norm": 1.203125, "learning_rate": 0.0001993926733626804, "loss": 4.0118, "step": 1899 }, { "epoch": 0.08894610568201768, "grad_norm": 2.0625, "learning_rate": 0.0001993920308310918, "loss": 4.2854, "step": 1900 }, { "epoch": 0.08899291942185031, "grad_norm": 1.4296875, "learning_rate": 0.00019939138796083063, "loss": 4.3214, "step": 1901 }, { "epoch": 0.08903973316168295, "grad_norm": 1.3046875, "learning_rate": 0.00019939074475189904, "loss": 4.4256, "step": 1902 }, { "epoch": 0.0890865469015156, "grad_norm": 1.3046875, "learning_rate": 0.00019939010120429925, "loss": 4.5893, "step": 1903 }, { "epoch": 0.08913336064134823, "grad_norm": 1.3203125, "learning_rate": 0.00019938945731803344, "loss": 3.7435, "step": 1904 }, { "epoch": 0.08918017438118088, "grad_norm": 1.015625, "learning_rate": 0.00019938881309310377, "loss": 3.338, "step": 1905 }, { "epoch": 0.08922698812101351, "grad_norm": 1.515625, "learning_rate": 0.0001993881685295125, "loss": 4.3328, "step": 1906 }, { "epoch": 0.08927380186084616, "grad_norm": 1.1015625, "learning_rate": 0.00019938752362726176, "loss": 3.4769, "step": 1907 }, { "epoch": 0.0893206156006788, "grad_norm": 1.4609375, "learning_rate": 0.0001993868783863538, "loss": 4.4928, "step": 1908 }, { "epoch": 0.08936742934051144, "grad_norm": 1.6640625, "learning_rate": 0.00019938623280679076, "loss": 3.8054, "step": 1909 }, { "epoch": 0.08941424308034408, "grad_norm": 1.5, "learning_rate": 0.0001993855868885749, "loss": 4.7426, "step": 1910 }, { "epoch": 0.08946105682017672, "grad_norm": 1.3515625, "learning_rate": 0.00019938494063170842, "loss": 4.2496, "step": 1911 }, { "epoch": 0.08950787056000936, "grad_norm": 1.3203125, "learning_rate": 0.00019938429403619347, "loss": 4.4885, "step": 1912 }, { "epoch": 0.08955468429984201, "grad_norm": 1.2109375, "learning_rate": 0.0001993836471020323, "loss": 3.9292, "step": 1913 }, { "epoch": 0.08960149803967464, "grad_norm": 1.03125, "learning_rate": 0.0001993829998292271, "loss": 2.8818, "step": 1914 }, { "epoch": 0.08964831177950729, "grad_norm": 1.4609375, "learning_rate": 0.00019938235221778004, "loss": 4.2952, "step": 1915 }, { "epoch": 0.08969512551933993, "grad_norm": 1.3359375, "learning_rate": 0.0001993817042676934, "loss": 4.1326, "step": 1916 }, { "epoch": 0.08974193925917257, "grad_norm": 1.3359375, "learning_rate": 0.0001993810559789693, "loss": 4.1972, "step": 1917 }, { "epoch": 0.08978875299900521, "grad_norm": 1.546875, "learning_rate": 0.00019938040735161002, "loss": 4.4584, "step": 1918 }, { "epoch": 0.08983556673883784, "grad_norm": 1.2421875, "learning_rate": 0.00019937975838561778, "loss": 3.9008, "step": 1919 }, { "epoch": 0.08988238047867049, "grad_norm": 1.71875, "learning_rate": 0.00019937910908099471, "loss": 3.7717, "step": 1920 }, { "epoch": 0.08992919421850314, "grad_norm": 1.4921875, "learning_rate": 0.0001993784594377431, "loss": 4.3167, "step": 1921 }, { "epoch": 0.08997600795833577, "grad_norm": 1.2578125, "learning_rate": 0.00019937780945586511, "loss": 3.2598, "step": 1922 }, { "epoch": 0.09002282169816841, "grad_norm": 1.765625, "learning_rate": 0.000199377159135363, "loss": 4.3056, "step": 1923 }, { "epoch": 0.09006963543800105, "grad_norm": 1.3046875, "learning_rate": 0.00019937650847623898, "loss": 4.1465, "step": 1924 }, { "epoch": 0.0901164491778337, "grad_norm": 1.359375, "learning_rate": 0.0001993758574784952, "loss": 4.1814, "step": 1925 }, { "epoch": 0.09016326291766634, "grad_norm": 1.1171875, "learning_rate": 0.00019937520614213397, "loss": 3.8016, "step": 1926 }, { "epoch": 0.09021007665749897, "grad_norm": 1.515625, "learning_rate": 0.00019937455446715746, "loss": 3.6991, "step": 1927 }, { "epoch": 0.09025689039733162, "grad_norm": 1.140625, "learning_rate": 0.00019937390245356788, "loss": 3.7915, "step": 1928 }, { "epoch": 0.09030370413716426, "grad_norm": 1.5390625, "learning_rate": 0.0001993732501013675, "loss": 4.0096, "step": 1929 }, { "epoch": 0.0903505178769969, "grad_norm": 1.46875, "learning_rate": 0.00019937259741055848, "loss": 4.1582, "step": 1930 }, { "epoch": 0.09039733161682954, "grad_norm": 1.3125, "learning_rate": 0.00019937194438114314, "loss": 4.0669, "step": 1931 }, { "epoch": 0.09044414535666218, "grad_norm": 1.140625, "learning_rate": 0.00019937129101312356, "loss": 4.3271, "step": 1932 }, { "epoch": 0.09049095909649482, "grad_norm": 1.8671875, "learning_rate": 0.00019937063730650212, "loss": 4.2702, "step": 1933 }, { "epoch": 0.09053777283632747, "grad_norm": 1.5234375, "learning_rate": 0.00019936998326128093, "loss": 4.1286, "step": 1934 }, { "epoch": 0.0905845865761601, "grad_norm": 1.3984375, "learning_rate": 0.00019936932887746229, "loss": 3.8953, "step": 1935 }, { "epoch": 0.09063140031599275, "grad_norm": 1.5625, "learning_rate": 0.00019936867415504842, "loss": 4.1823, "step": 1936 }, { "epoch": 0.09067821405582538, "grad_norm": 1.5234375, "learning_rate": 0.0001993680190940415, "loss": 3.8601, "step": 1937 }, { "epoch": 0.09072502779565803, "grad_norm": 1.015625, "learning_rate": 0.00019936736369444385, "loss": 3.1489, "step": 1938 }, { "epoch": 0.09077184153549067, "grad_norm": 1.6796875, "learning_rate": 0.0001993667079562576, "loss": 4.2848, "step": 1939 }, { "epoch": 0.0908186552753233, "grad_norm": 1.5546875, "learning_rate": 0.00019936605187948506, "loss": 4.4338, "step": 1940 }, { "epoch": 0.09086546901515595, "grad_norm": 1.40625, "learning_rate": 0.00019936539546412844, "loss": 4.2839, "step": 1941 }, { "epoch": 0.09091228275498858, "grad_norm": 1.2578125, "learning_rate": 0.00019936473871018997, "loss": 4.3166, "step": 1942 }, { "epoch": 0.09095909649482123, "grad_norm": 1.0625, "learning_rate": 0.0001993640816176719, "loss": 4.3479, "step": 1943 }, { "epoch": 0.09100591023465388, "grad_norm": 1.1796875, "learning_rate": 0.00019936342418657647, "loss": 4.0981, "step": 1944 }, { "epoch": 0.09105272397448651, "grad_norm": 1.75, "learning_rate": 0.0001993627664169059, "loss": 3.8913, "step": 1945 }, { "epoch": 0.09109953771431915, "grad_norm": 1.265625, "learning_rate": 0.0001993621083086625, "loss": 4.2365, "step": 1946 }, { "epoch": 0.0911463514541518, "grad_norm": 1.2890625, "learning_rate": 0.00019936144986184838, "loss": 4.238, "step": 1947 }, { "epoch": 0.09119316519398443, "grad_norm": 1.6015625, "learning_rate": 0.00019936079107646593, "loss": 3.7162, "step": 1948 }, { "epoch": 0.09123997893381708, "grad_norm": 1.4453125, "learning_rate": 0.00019936013195251731, "loss": 4.5719, "step": 1949 }, { "epoch": 0.09128679267364971, "grad_norm": 2.1875, "learning_rate": 0.00019935947249000475, "loss": 3.7328, "step": 1950 }, { "epoch": 0.09133360641348236, "grad_norm": 1.265625, "learning_rate": 0.00019935881268893057, "loss": 3.9697, "step": 1951 }, { "epoch": 0.091380420153315, "grad_norm": 1.234375, "learning_rate": 0.00019935815254929696, "loss": 3.8487, "step": 1952 }, { "epoch": 0.09142723389314764, "grad_norm": 1.25, "learning_rate": 0.0001993574920711062, "loss": 4.2574, "step": 1953 }, { "epoch": 0.09147404763298028, "grad_norm": 1.4296875, "learning_rate": 0.00019935683125436052, "loss": 3.5128, "step": 1954 }, { "epoch": 0.09152086137281291, "grad_norm": 1.890625, "learning_rate": 0.00019935617009906218, "loss": 3.946, "step": 1955 }, { "epoch": 0.09156767511264556, "grad_norm": 1.6953125, "learning_rate": 0.0001993555086052134, "loss": 4.0668, "step": 1956 }, { "epoch": 0.09161448885247821, "grad_norm": 1.4375, "learning_rate": 0.00019935484677281653, "loss": 4.2542, "step": 1957 }, { "epoch": 0.09166130259231084, "grad_norm": 1.7109375, "learning_rate": 0.00019935418460187375, "loss": 4.617, "step": 1958 }, { "epoch": 0.09170811633214349, "grad_norm": 1.421875, "learning_rate": 0.0001993535220923873, "loss": 4.366, "step": 1959 }, { "epoch": 0.09175493007197613, "grad_norm": 2.421875, "learning_rate": 0.00019935285924435947, "loss": 4.0128, "step": 1960 }, { "epoch": 0.09180174381180876, "grad_norm": 1.25, "learning_rate": 0.00019935219605779253, "loss": 4.0745, "step": 1961 }, { "epoch": 0.09184855755164141, "grad_norm": 2.109375, "learning_rate": 0.00019935153253268872, "loss": 4.2824, "step": 1962 }, { "epoch": 0.09189537129147404, "grad_norm": 1.28125, "learning_rate": 0.0001993508686690503, "loss": 3.9197, "step": 1963 }, { "epoch": 0.09194218503130669, "grad_norm": 1.7109375, "learning_rate": 0.00019935020446687956, "loss": 4.0082, "step": 1964 }, { "epoch": 0.09198899877113934, "grad_norm": 1.296875, "learning_rate": 0.0001993495399261787, "loss": 4.3894, "step": 1965 }, { "epoch": 0.09203581251097197, "grad_norm": 1.484375, "learning_rate": 0.00019934887504695006, "loss": 3.7681, "step": 1966 }, { "epoch": 0.09208262625080461, "grad_norm": 1.0859375, "learning_rate": 0.00019934820982919586, "loss": 2.8271, "step": 1967 }, { "epoch": 0.09212943999063725, "grad_norm": 2.34375, "learning_rate": 0.00019934754427291837, "loss": 3.8717, "step": 1968 }, { "epoch": 0.09217625373046989, "grad_norm": 1.3828125, "learning_rate": 0.0001993468783781199, "loss": 3.578, "step": 1969 }, { "epoch": 0.09222306747030254, "grad_norm": 1.1640625, "learning_rate": 0.00019934621214480266, "loss": 6.6154, "step": 1970 }, { "epoch": 0.09226988121013517, "grad_norm": 1.40625, "learning_rate": 0.0001993455455729689, "loss": 4.2379, "step": 1971 }, { "epoch": 0.09231669494996782, "grad_norm": 1.1953125, "learning_rate": 0.00019934487866262102, "loss": 3.7266, "step": 1972 }, { "epoch": 0.09236350868980045, "grad_norm": 1.59375, "learning_rate": 0.00019934421141376115, "loss": 4.2549, "step": 1973 }, { "epoch": 0.0924103224296331, "grad_norm": 1.21875, "learning_rate": 0.00019934354382639165, "loss": 3.6864, "step": 1974 }, { "epoch": 0.09245713616946574, "grad_norm": 1.5390625, "learning_rate": 0.00019934287590051475, "loss": 4.2047, "step": 1975 }, { "epoch": 0.09250394990929837, "grad_norm": 1.9375, "learning_rate": 0.00019934220763613274, "loss": 4.2246, "step": 1976 }, { "epoch": 0.09255076364913102, "grad_norm": 1.7578125, "learning_rate": 0.00019934153903324793, "loss": 4.2045, "step": 1977 }, { "epoch": 0.09259757738896367, "grad_norm": 1.21875, "learning_rate": 0.00019934087009186254, "loss": 4.1969, "step": 1978 }, { "epoch": 0.0926443911287963, "grad_norm": 5.53125, "learning_rate": 0.0001993402008119789, "loss": 4.7046, "step": 1979 }, { "epoch": 0.09269120486862895, "grad_norm": 1.5, "learning_rate": 0.00019933953119359924, "loss": 4.3879, "step": 1980 }, { "epoch": 0.09273801860846158, "grad_norm": 1.328125, "learning_rate": 0.00019933886123672586, "loss": 4.0064, "step": 1981 }, { "epoch": 0.09278483234829422, "grad_norm": 1.65625, "learning_rate": 0.0001993381909413611, "loss": 4.3047, "step": 1982 }, { "epoch": 0.09283164608812687, "grad_norm": 1.4296875, "learning_rate": 0.00019933752030750715, "loss": 4.2035, "step": 1983 }, { "epoch": 0.0928784598279595, "grad_norm": 1.4921875, "learning_rate": 0.0001993368493351664, "loss": 4.214, "step": 1984 }, { "epoch": 0.09292527356779215, "grad_norm": 1.6953125, "learning_rate": 0.00019933617802434103, "loss": 4.4395, "step": 1985 }, { "epoch": 0.09297208730762478, "grad_norm": 1.796875, "learning_rate": 0.00019933550637503338, "loss": 3.6059, "step": 1986 }, { "epoch": 0.09301890104745743, "grad_norm": 1.3359375, "learning_rate": 0.00019933483438724575, "loss": 4.0971, "step": 1987 }, { "epoch": 0.09306571478729007, "grad_norm": 1.3515625, "learning_rate": 0.00019933416206098037, "loss": 3.9921, "step": 1988 }, { "epoch": 0.0931125285271227, "grad_norm": 1.875, "learning_rate": 0.00019933348939623962, "loss": 4.1167, "step": 1989 }, { "epoch": 0.09315934226695535, "grad_norm": 1.2265625, "learning_rate": 0.00019933281639302573, "loss": 4.1734, "step": 1990 }, { "epoch": 0.093206156006788, "grad_norm": 1.359375, "learning_rate": 0.00019933214305134102, "loss": 4.0612, "step": 1991 }, { "epoch": 0.09325296974662063, "grad_norm": 1.3125, "learning_rate": 0.00019933146937118777, "loss": 3.8514, "step": 1992 }, { "epoch": 0.09329978348645328, "grad_norm": 2.109375, "learning_rate": 0.00019933079535256826, "loss": 4.0067, "step": 1993 }, { "epoch": 0.09334659722628591, "grad_norm": 1.46875, "learning_rate": 0.00019933012099548482, "loss": 4.2571, "step": 1994 }, { "epoch": 0.09339341096611856, "grad_norm": 1.796875, "learning_rate": 0.00019932944629993976, "loss": 4.1826, "step": 1995 }, { "epoch": 0.0934402247059512, "grad_norm": 1.53125, "learning_rate": 0.0001993287712659353, "loss": 4.9366, "step": 1996 }, { "epoch": 0.09348703844578383, "grad_norm": 1.3203125, "learning_rate": 0.00019932809589347383, "loss": 3.977, "step": 1997 }, { "epoch": 0.09353385218561648, "grad_norm": 1.609375, "learning_rate": 0.00019932742018255762, "loss": 3.9195, "step": 1998 }, { "epoch": 0.09358066592544911, "grad_norm": 1.2578125, "learning_rate": 0.00019932674413318898, "loss": 3.963, "step": 1999 }, { "epoch": 0.09362747966528176, "grad_norm": 1.609375, "learning_rate": 0.00019932606774537015, "loss": 4.0727, "step": 2000 }, { "epoch": 0.0936742934051144, "grad_norm": 1.375, "learning_rate": 0.00019932539101910352, "loss": 4.5644, "step": 2001 }, { "epoch": 0.09372110714494704, "grad_norm": 1.8984375, "learning_rate": 0.00019932471395439137, "loss": 3.9953, "step": 2002 }, { "epoch": 0.09376792088477968, "grad_norm": 1.390625, "learning_rate": 0.00019932403655123596, "loss": 3.8163, "step": 2003 }, { "epoch": 0.09381473462461232, "grad_norm": 1.328125, "learning_rate": 0.00019932335880963966, "loss": 4.2699, "step": 2004 }, { "epoch": 0.09386154836444496, "grad_norm": 1.6484375, "learning_rate": 0.00019932268072960475, "loss": 4.1963, "step": 2005 }, { "epoch": 0.09390836210427761, "grad_norm": 2.03125, "learning_rate": 0.00019932200231113356, "loss": 3.9763, "step": 2006 }, { "epoch": 0.09395517584411024, "grad_norm": 1.5390625, "learning_rate": 0.00019932132355422838, "loss": 4.3526, "step": 2007 }, { "epoch": 0.09400198958394289, "grad_norm": 1.6484375, "learning_rate": 0.00019932064445889153, "loss": 4.4826, "step": 2008 }, { "epoch": 0.09404880332377553, "grad_norm": 1.3125, "learning_rate": 0.00019931996502512534, "loss": 3.9159, "step": 2009 }, { "epoch": 0.09409561706360817, "grad_norm": 1.25, "learning_rate": 0.00019931928525293208, "loss": 4.036, "step": 2010 }, { "epoch": 0.09414243080344081, "grad_norm": 1.390625, "learning_rate": 0.0001993186051423141, "loss": 4.2489, "step": 2011 }, { "epoch": 0.09418924454327345, "grad_norm": 1.3046875, "learning_rate": 0.00019931792469327374, "loss": 4.3495, "step": 2012 }, { "epoch": 0.09423605828310609, "grad_norm": 1.515625, "learning_rate": 0.0001993172439058133, "loss": 4.0565, "step": 2013 }, { "epoch": 0.09428287202293874, "grad_norm": 1.5390625, "learning_rate": 0.00019931656277993507, "loss": 4.2946, "step": 2014 }, { "epoch": 0.09432968576277137, "grad_norm": 1.203125, "learning_rate": 0.00019931588131564135, "loss": 4.1743, "step": 2015 }, { "epoch": 0.09437649950260402, "grad_norm": 1.578125, "learning_rate": 0.0001993151995129346, "loss": 4.1085, "step": 2016 }, { "epoch": 0.09442331324243665, "grad_norm": 1.6484375, "learning_rate": 0.000199314517371817, "loss": 3.915, "step": 2017 }, { "epoch": 0.0944701269822693, "grad_norm": 1.3046875, "learning_rate": 0.0001993138348922909, "loss": 4.1011, "step": 2018 }, { "epoch": 0.09451694072210194, "grad_norm": 1.3828125, "learning_rate": 0.00019931315207435865, "loss": 3.575, "step": 2019 }, { "epoch": 0.09456375446193457, "grad_norm": 1.28125, "learning_rate": 0.00019931246891802262, "loss": 4.0268, "step": 2020 }, { "epoch": 0.09461056820176722, "grad_norm": 1.375, "learning_rate": 0.00019931178542328506, "loss": 4.1159, "step": 2021 }, { "epoch": 0.09465738194159987, "grad_norm": 1.2109375, "learning_rate": 0.00019931110159014832, "loss": 3.9805, "step": 2022 }, { "epoch": 0.0947041956814325, "grad_norm": 1.2421875, "learning_rate": 0.00019931041741861477, "loss": 4.0056, "step": 2023 }, { "epoch": 0.09475100942126514, "grad_norm": 1.3671875, "learning_rate": 0.0001993097329086867, "loss": 3.9956, "step": 2024 }, { "epoch": 0.09479782316109778, "grad_norm": 1.390625, "learning_rate": 0.00019930904806036645, "loss": 4.2399, "step": 2025 }, { "epoch": 0.09484463690093042, "grad_norm": 1.421875, "learning_rate": 0.00019930836287365637, "loss": 4.3276, "step": 2026 }, { "epoch": 0.09489145064076307, "grad_norm": 1.2109375, "learning_rate": 0.0001993076773485588, "loss": 4.1506, "step": 2027 }, { "epoch": 0.0949382643805957, "grad_norm": 1.296875, "learning_rate": 0.00019930699148507604, "loss": 3.8974, "step": 2028 }, { "epoch": 0.09498507812042835, "grad_norm": 1.109375, "learning_rate": 0.00019930630528321045, "loss": 3.9263, "step": 2029 }, { "epoch": 0.09503189186026098, "grad_norm": 1.203125, "learning_rate": 0.00019930561874296435, "loss": 3.9094, "step": 2030 }, { "epoch": 0.09507870560009363, "grad_norm": 1.265625, "learning_rate": 0.00019930493186434012, "loss": 4.1765, "step": 2031 }, { "epoch": 0.09512551933992627, "grad_norm": 1.1640625, "learning_rate": 0.00019930424464734007, "loss": 4.2806, "step": 2032 }, { "epoch": 0.0951723330797589, "grad_norm": 1.265625, "learning_rate": 0.00019930355709196654, "loss": 3.9838, "step": 2033 }, { "epoch": 0.09521914681959155, "grad_norm": 1.4375, "learning_rate": 0.00019930286919822186, "loss": 3.7503, "step": 2034 }, { "epoch": 0.09526596055942418, "grad_norm": 1.421875, "learning_rate": 0.00019930218096610841, "loss": 3.9799, "step": 2035 }, { "epoch": 0.09531277429925683, "grad_norm": 1.1796875, "learning_rate": 0.0001993014923956285, "loss": 4.0681, "step": 2036 }, { "epoch": 0.09535958803908948, "grad_norm": 1.265625, "learning_rate": 0.00019930080348678453, "loss": 4.0901, "step": 2037 }, { "epoch": 0.09540640177892211, "grad_norm": 1.3515625, "learning_rate": 0.00019930011423957878, "loss": 3.8981, "step": 2038 }, { "epoch": 0.09545321551875476, "grad_norm": 1.25, "learning_rate": 0.00019929942465401364, "loss": 3.8707, "step": 2039 }, { "epoch": 0.0955000292585874, "grad_norm": 1.2734375, "learning_rate": 0.00019929873473009145, "loss": 4.0182, "step": 2040 }, { "epoch": 0.09554684299842003, "grad_norm": 1.4453125, "learning_rate": 0.0001992980444678146, "loss": 3.6595, "step": 2041 }, { "epoch": 0.09559365673825268, "grad_norm": 1.078125, "learning_rate": 0.00019929735386718536, "loss": 4.0436, "step": 2042 }, { "epoch": 0.09564047047808531, "grad_norm": 1.3046875, "learning_rate": 0.00019929666292820613, "loss": 4.5974, "step": 2043 }, { "epoch": 0.09568728421791796, "grad_norm": 1.28125, "learning_rate": 0.00019929597165087927, "loss": 3.9717, "step": 2044 }, { "epoch": 0.0957340979577506, "grad_norm": 1.25, "learning_rate": 0.00019929528003520712, "loss": 3.9582, "step": 2045 }, { "epoch": 0.09578091169758324, "grad_norm": 1.6484375, "learning_rate": 0.00019929458808119207, "loss": 4.2203, "step": 2046 }, { "epoch": 0.09582772543741588, "grad_norm": 1.3359375, "learning_rate": 0.0001992938957888364, "loss": 3.5766, "step": 2047 }, { "epoch": 0.09587453917724852, "grad_norm": 1.40625, "learning_rate": 0.00019929320315814256, "loss": 4.0488, "step": 2048 }, { "epoch": 0.09592135291708116, "grad_norm": 1.4453125, "learning_rate": 0.00019929251018911286, "loss": 3.6503, "step": 2049 }, { "epoch": 0.09596816665691381, "grad_norm": 1.6171875, "learning_rate": 0.00019929181688174965, "loss": 3.9889, "step": 2050 }, { "epoch": 0.09601498039674644, "grad_norm": 1.1953125, "learning_rate": 0.00019929112323605535, "loss": 4.0955, "step": 2051 }, { "epoch": 0.09606179413657909, "grad_norm": 1.2890625, "learning_rate": 0.00019929042925203225, "loss": 3.7876, "step": 2052 }, { "epoch": 0.09610860787641173, "grad_norm": 1.265625, "learning_rate": 0.00019928973492968277, "loss": 5.5664, "step": 2053 }, { "epoch": 0.09615542161624437, "grad_norm": 1.4296875, "learning_rate": 0.00019928904026900925, "loss": 3.9903, "step": 2054 }, { "epoch": 0.09620223535607701, "grad_norm": 1.2265625, "learning_rate": 0.0001992883452700141, "loss": 3.8646, "step": 2055 }, { "epoch": 0.09624904909590964, "grad_norm": 1.3359375, "learning_rate": 0.00019928764993269962, "loss": 4.0339, "step": 2056 }, { "epoch": 0.09629586283574229, "grad_norm": 1.3125, "learning_rate": 0.0001992869542570682, "loss": 2.924, "step": 2057 }, { "epoch": 0.09634267657557494, "grad_norm": 1.4765625, "learning_rate": 0.00019928625824312224, "loss": 4.7141, "step": 2058 }, { "epoch": 0.09638949031540757, "grad_norm": 1.6015625, "learning_rate": 0.0001992855618908641, "loss": 4.1829, "step": 2059 }, { "epoch": 0.09643630405524022, "grad_norm": 1.3515625, "learning_rate": 0.00019928486520029616, "loss": 4.2302, "step": 2060 }, { "epoch": 0.09648311779507285, "grad_norm": 1.1796875, "learning_rate": 0.00019928416817142073, "loss": 4.5724, "step": 2061 }, { "epoch": 0.0965299315349055, "grad_norm": 1.28125, "learning_rate": 0.0001992834708042403, "loss": 4.0035, "step": 2062 }, { "epoch": 0.09657674527473814, "grad_norm": 1.34375, "learning_rate": 0.0001992827730987571, "loss": 4.0051, "step": 2063 }, { "epoch": 0.09662355901457077, "grad_norm": 1.1875, "learning_rate": 0.00019928207505497364, "loss": 3.9475, "step": 2064 }, { "epoch": 0.09667037275440342, "grad_norm": 1.2578125, "learning_rate": 0.00019928137667289225, "loss": 3.9074, "step": 2065 }, { "epoch": 0.09671718649423605, "grad_norm": 1.6484375, "learning_rate": 0.0001992806779525153, "loss": 4.1736, "step": 2066 }, { "epoch": 0.0967640002340687, "grad_norm": 1.0625, "learning_rate": 0.00019927997889384517, "loss": 4.1076, "step": 2067 }, { "epoch": 0.09681081397390134, "grad_norm": 1.453125, "learning_rate": 0.0001992792794968843, "loss": 3.9622, "step": 2068 }, { "epoch": 0.09685762771373398, "grad_norm": 1.84375, "learning_rate": 0.00019927857976163497, "loss": 3.7623, "step": 2069 }, { "epoch": 0.09690444145356662, "grad_norm": 1.109375, "learning_rate": 0.00019927787968809964, "loss": 4.1462, "step": 2070 }, { "epoch": 0.09695125519339927, "grad_norm": 1.7734375, "learning_rate": 0.00019927717927628065, "loss": 4.6957, "step": 2071 }, { "epoch": 0.0969980689332319, "grad_norm": 1.8984375, "learning_rate": 0.00019927647852618043, "loss": 4.4644, "step": 2072 }, { "epoch": 0.09704488267306455, "grad_norm": 1.5078125, "learning_rate": 0.00019927577743780134, "loss": 4.2938, "step": 2073 }, { "epoch": 0.09709169641289718, "grad_norm": 1.421875, "learning_rate": 0.0001992750760111458, "loss": 3.9194, "step": 2074 }, { "epoch": 0.09713851015272983, "grad_norm": 1.6328125, "learning_rate": 0.00019927437424621615, "loss": 4.2161, "step": 2075 }, { "epoch": 0.09718532389256247, "grad_norm": 1.5625, "learning_rate": 0.00019927367214301483, "loss": 4.6053, "step": 2076 }, { "epoch": 0.0972321376323951, "grad_norm": 1.34375, "learning_rate": 0.0001992729697015442, "loss": 4.5111, "step": 2077 }, { "epoch": 0.09727895137222775, "grad_norm": 1.3828125, "learning_rate": 0.00019927226692180664, "loss": 4.2799, "step": 2078 }, { "epoch": 0.09732576511206038, "grad_norm": 1.4375, "learning_rate": 0.0001992715638038046, "loss": 4.4057, "step": 2079 }, { "epoch": 0.09737257885189303, "grad_norm": 1.40625, "learning_rate": 0.00019927086034754042, "loss": 3.8675, "step": 2080 }, { "epoch": 0.09741939259172568, "grad_norm": 1.1171875, "learning_rate": 0.00019927015655301654, "loss": 2.9606, "step": 2081 }, { "epoch": 0.09746620633155831, "grad_norm": 1.4296875, "learning_rate": 0.00019926945242023533, "loss": 4.2219, "step": 2082 }, { "epoch": 0.09751302007139095, "grad_norm": 1.5546875, "learning_rate": 0.00019926874794919922, "loss": 3.6597, "step": 2083 }, { "epoch": 0.0975598338112236, "grad_norm": 1.921875, "learning_rate": 0.0001992680431399106, "loss": 4.2379, "step": 2084 }, { "epoch": 0.09760664755105623, "grad_norm": 1.0625, "learning_rate": 0.00019926733799237182, "loss": 3.8338, "step": 2085 }, { "epoch": 0.09765346129088888, "grad_norm": 1.3046875, "learning_rate": 0.00019926663250658533, "loss": 3.8872, "step": 2086 }, { "epoch": 0.09770027503072151, "grad_norm": 1.09375, "learning_rate": 0.00019926592668255354, "loss": 4.0288, "step": 2087 }, { "epoch": 0.09774708877055416, "grad_norm": 1.2890625, "learning_rate": 0.00019926522052027883, "loss": 4.1729, "step": 2088 }, { "epoch": 0.0977939025103868, "grad_norm": 1.2421875, "learning_rate": 0.00019926451401976362, "loss": 4.0079, "step": 2089 }, { "epoch": 0.09784071625021944, "grad_norm": 1.984375, "learning_rate": 0.00019926380718101032, "loss": 4.3885, "step": 2090 }, { "epoch": 0.09788752999005208, "grad_norm": 1.4375, "learning_rate": 0.00019926310000402135, "loss": 3.7482, "step": 2091 }, { "epoch": 0.09793434372988472, "grad_norm": 1.7578125, "learning_rate": 0.00019926239248879907, "loss": 4.1535, "step": 2092 }, { "epoch": 0.09798115746971736, "grad_norm": 1.46875, "learning_rate": 0.00019926168463534598, "loss": 2.8754, "step": 2093 }, { "epoch": 0.09802797120955001, "grad_norm": 1.4609375, "learning_rate": 0.00019926097644366438, "loss": 4.1904, "step": 2094 }, { "epoch": 0.09807478494938264, "grad_norm": 1.3984375, "learning_rate": 0.00019926026791375679, "loss": 4.3239, "step": 2095 }, { "epoch": 0.09812159868921529, "grad_norm": 1.4921875, "learning_rate": 0.00019925955904562554, "loss": 4.2164, "step": 2096 }, { "epoch": 0.09816841242904792, "grad_norm": 1.8046875, "learning_rate": 0.0001992588498392731, "loss": 4.0977, "step": 2097 }, { "epoch": 0.09821522616888057, "grad_norm": 1.5, "learning_rate": 0.00019925814029470184, "loss": 4.188, "step": 2098 }, { "epoch": 0.09826203990871321, "grad_norm": 1.109375, "learning_rate": 0.00019925743041191422, "loss": 5.9839, "step": 2099 }, { "epoch": 0.09830885364854584, "grad_norm": 1.5234375, "learning_rate": 0.00019925672019091266, "loss": 3.9791, "step": 2100 }, { "epoch": 0.09835566738837849, "grad_norm": 1.421875, "learning_rate": 0.00019925600963169955, "loss": 4.2538, "step": 2101 }, { "epoch": 0.09840248112821114, "grad_norm": 1.546875, "learning_rate": 0.00019925529873427732, "loss": 3.8396, "step": 2102 }, { "epoch": 0.09844929486804377, "grad_norm": 1.234375, "learning_rate": 0.00019925458749864842, "loss": 3.7084, "step": 2103 }, { "epoch": 0.09849610860787641, "grad_norm": 1.890625, "learning_rate": 0.00019925387592481522, "loss": 4.0217, "step": 2104 }, { "epoch": 0.09854292234770905, "grad_norm": 1.3984375, "learning_rate": 0.00019925316401278015, "loss": 4.2509, "step": 2105 }, { "epoch": 0.0985897360875417, "grad_norm": 2.0, "learning_rate": 0.00019925245176254572, "loss": 3.7459, "step": 2106 }, { "epoch": 0.09863654982737434, "grad_norm": 1.1484375, "learning_rate": 0.00019925173917411426, "loss": 3.5636, "step": 2107 }, { "epoch": 0.09868336356720697, "grad_norm": 1.2421875, "learning_rate": 0.00019925102624748827, "loss": 3.9102, "step": 2108 }, { "epoch": 0.09873017730703962, "grad_norm": 1.3046875, "learning_rate": 0.00019925031298267013, "loss": 4.0219, "step": 2109 }, { "epoch": 0.09877699104687225, "grad_norm": 1.171875, "learning_rate": 0.00019924959937966228, "loss": 3.7873, "step": 2110 }, { "epoch": 0.0988238047867049, "grad_norm": 1.640625, "learning_rate": 0.00019924888543846714, "loss": 3.8198, "step": 2111 }, { "epoch": 0.09887061852653754, "grad_norm": 1.2890625, "learning_rate": 0.0001992481711590872, "loss": 3.3631, "step": 2112 }, { "epoch": 0.09891743226637018, "grad_norm": 1.40625, "learning_rate": 0.00019924745654152483, "loss": 3.7652, "step": 2113 }, { "epoch": 0.09896424600620282, "grad_norm": 1.5, "learning_rate": 0.00019924674158578248, "loss": 4.0672, "step": 2114 }, { "epoch": 0.09901105974603547, "grad_norm": 1.5859375, "learning_rate": 0.0001992460262918626, "loss": 4.3266, "step": 2115 }, { "epoch": 0.0990578734858681, "grad_norm": 1.5625, "learning_rate": 0.0001992453106597676, "loss": 4.0789, "step": 2116 }, { "epoch": 0.09910468722570075, "grad_norm": 1.46875, "learning_rate": 0.0001992445946895, "loss": 4.3316, "step": 2117 }, { "epoch": 0.09915150096553338, "grad_norm": 1.671875, "learning_rate": 0.00019924387838106213, "loss": 4.0219, "step": 2118 }, { "epoch": 0.09919831470536603, "grad_norm": 1.390625, "learning_rate": 0.00019924316173445646, "loss": 4.1107, "step": 2119 }, { "epoch": 0.09924512844519867, "grad_norm": 1.1796875, "learning_rate": 0.0001992424447496855, "loss": 4.0608, "step": 2120 }, { "epoch": 0.0992919421850313, "grad_norm": 1.265625, "learning_rate": 0.00019924172742675163, "loss": 3.9819, "step": 2121 }, { "epoch": 0.09933875592486395, "grad_norm": 1.453125, "learning_rate": 0.00019924100976565734, "loss": 3.9319, "step": 2122 }, { "epoch": 0.09938556966469658, "grad_norm": 1.1328125, "learning_rate": 0.000199240291766405, "loss": 4.0924, "step": 2123 }, { "epoch": 0.09943238340452923, "grad_norm": 1.6171875, "learning_rate": 0.00019923957342899714, "loss": 4.5257, "step": 2124 }, { "epoch": 0.09947919714436187, "grad_norm": 1.265625, "learning_rate": 0.00019923885475343615, "loss": 3.5673, "step": 2125 }, { "epoch": 0.09952601088419451, "grad_norm": 1.40625, "learning_rate": 0.00019923813573972452, "loss": 3.1938, "step": 2126 }, { "epoch": 0.09957282462402715, "grad_norm": 1.34375, "learning_rate": 0.00019923741638786465, "loss": 3.9083, "step": 2127 }, { "epoch": 0.09961963836385979, "grad_norm": 1.734375, "learning_rate": 0.00019923669669785903, "loss": 4.3222, "step": 2128 }, { "epoch": 0.09966645210369243, "grad_norm": 1.2890625, "learning_rate": 0.0001992359766697101, "loss": 4.0289, "step": 2129 }, { "epoch": 0.09971326584352508, "grad_norm": 1.578125, "learning_rate": 0.00019923525630342033, "loss": 4.1619, "step": 2130 }, { "epoch": 0.09976007958335771, "grad_norm": 1.6171875, "learning_rate": 0.00019923453559899214, "loss": 3.8344, "step": 2131 }, { "epoch": 0.09980689332319036, "grad_norm": 1.390625, "learning_rate": 0.00019923381455642803, "loss": 4.119, "step": 2132 }, { "epoch": 0.099853707063023, "grad_norm": 1.2265625, "learning_rate": 0.0001992330931757304, "loss": 4.3112, "step": 2133 }, { "epoch": 0.09990052080285564, "grad_norm": 1.4375, "learning_rate": 0.00019923237145690176, "loss": 3.5671, "step": 2134 }, { "epoch": 0.09994733454268828, "grad_norm": 1.484375, "learning_rate": 0.00019923164939994456, "loss": 3.7147, "step": 2135 }, { "epoch": 0.09999414828252091, "grad_norm": 1.2890625, "learning_rate": 0.00019923092700486122, "loss": 4.0222, "step": 2136 }, { "epoch": 0.10004096202235356, "grad_norm": 1.1015625, "learning_rate": 0.00019923020427165425, "loss": 3.6611, "step": 2137 }, { "epoch": 0.1000877757621862, "grad_norm": 1.1015625, "learning_rate": 0.0001992294812003261, "loss": 3.7949, "step": 2138 }, { "epoch": 0.10013458950201884, "grad_norm": 0.9296875, "learning_rate": 0.00019922875779087925, "loss": 4.4883, "step": 2139 }, { "epoch": 0.10018140324185149, "grad_norm": 1.3203125, "learning_rate": 0.0001992280340433161, "loss": 4.0017, "step": 2140 }, { "epoch": 0.10022821698168412, "grad_norm": 1.4375, "learning_rate": 0.00019922730995763915, "loss": 4.2674, "step": 2141 }, { "epoch": 0.10027503072151676, "grad_norm": 1.1953125, "learning_rate": 0.00019922658553385092, "loss": 3.6673, "step": 2142 }, { "epoch": 0.10032184446134941, "grad_norm": 1.8125, "learning_rate": 0.00019922586077195385, "loss": 4.5653, "step": 2143 }, { "epoch": 0.10036865820118204, "grad_norm": 1.8515625, "learning_rate": 0.00019922513567195032, "loss": 4.098, "step": 2144 }, { "epoch": 0.10041547194101469, "grad_norm": 1.359375, "learning_rate": 0.00019922441023384295, "loss": 4.1272, "step": 2145 }, { "epoch": 0.10046228568084734, "grad_norm": 1.3671875, "learning_rate": 0.0001992236844576341, "loss": 4.1719, "step": 2146 }, { "epoch": 0.10050909942067997, "grad_norm": 1.234375, "learning_rate": 0.00019922295834332628, "loss": 3.5184, "step": 2147 }, { "epoch": 0.10055591316051261, "grad_norm": 1.5859375, "learning_rate": 0.000199222231890922, "loss": 4.4838, "step": 2148 }, { "epoch": 0.10060272690034525, "grad_norm": 1.5234375, "learning_rate": 0.00019922150510042367, "loss": 4.0864, "step": 2149 }, { "epoch": 0.10064954064017789, "grad_norm": 1.40625, "learning_rate": 0.0001992207779718338, "loss": 3.9496, "step": 2150 }, { "epoch": 0.10069635438001054, "grad_norm": 1.484375, "learning_rate": 0.00019922005050515486, "loss": 4.129, "step": 2151 }, { "epoch": 0.10074316811984317, "grad_norm": 1.3671875, "learning_rate": 0.00019921932270038935, "loss": 3.9846, "step": 2152 }, { "epoch": 0.10078998185967582, "grad_norm": 1.4296875, "learning_rate": 0.0001992185945575397, "loss": 4.6179, "step": 2153 }, { "epoch": 0.10083679559950845, "grad_norm": 1.2578125, "learning_rate": 0.00019921786607660847, "loss": 3.8177, "step": 2154 }, { "epoch": 0.1008836093393411, "grad_norm": 1.9921875, "learning_rate": 0.00019921713725759804, "loss": 4.4209, "step": 2155 }, { "epoch": 0.10093042307917374, "grad_norm": 1.1640625, "learning_rate": 0.000199216408100511, "loss": 3.6976, "step": 2156 }, { "epoch": 0.10097723681900637, "grad_norm": 1.125, "learning_rate": 0.00019921567860534977, "loss": 3.708, "step": 2157 }, { "epoch": 0.10102405055883902, "grad_norm": 1.265625, "learning_rate": 0.00019921494877211683, "loss": 4.3911, "step": 2158 }, { "epoch": 0.10107086429867165, "grad_norm": 1.4296875, "learning_rate": 0.00019921421860081472, "loss": 4.1068, "step": 2159 }, { "epoch": 0.1011176780385043, "grad_norm": 1.484375, "learning_rate": 0.00019921348809144587, "loss": 4.1685, "step": 2160 }, { "epoch": 0.10116449177833695, "grad_norm": 1.171875, "learning_rate": 0.0001992127572440128, "loss": 3.9217, "step": 2161 }, { "epoch": 0.10121130551816958, "grad_norm": 1.78125, "learning_rate": 0.00019921202605851802, "loss": 4.3494, "step": 2162 }, { "epoch": 0.10125811925800222, "grad_norm": 1.8125, "learning_rate": 0.00019921129453496396, "loss": 3.735, "step": 2163 }, { "epoch": 0.10130493299783487, "grad_norm": 1.265625, "learning_rate": 0.00019921056267335315, "loss": 4.1012, "step": 2164 }, { "epoch": 0.1013517467376675, "grad_norm": 1.359375, "learning_rate": 0.0001992098304736881, "loss": 3.4753, "step": 2165 }, { "epoch": 0.10139856047750015, "grad_norm": 1.515625, "learning_rate": 0.00019920909793597126, "loss": 4.2736, "step": 2166 }, { "epoch": 0.10144537421733278, "grad_norm": 1.578125, "learning_rate": 0.0001992083650602052, "loss": 4.3256, "step": 2167 }, { "epoch": 0.10149218795716543, "grad_norm": 1.4375, "learning_rate": 0.00019920763184639234, "loss": 4.2848, "step": 2168 }, { "epoch": 0.10153900169699807, "grad_norm": 1.5859375, "learning_rate": 0.0001992068982945352, "loss": 4.6915, "step": 2169 }, { "epoch": 0.1015858154368307, "grad_norm": 1.265625, "learning_rate": 0.0001992061644046363, "loss": 4.1848, "step": 2170 }, { "epoch": 0.10163262917666335, "grad_norm": 1.171875, "learning_rate": 0.00019920543017669812, "loss": 3.9001, "step": 2171 }, { "epoch": 0.10167944291649599, "grad_norm": 1.5, "learning_rate": 0.00019920469561072318, "loss": 4.0233, "step": 2172 }, { "epoch": 0.10172625665632863, "grad_norm": 1.875, "learning_rate": 0.00019920396070671398, "loss": 4.2978, "step": 2173 }, { "epoch": 0.10177307039616128, "grad_norm": 2.890625, "learning_rate": 0.00019920322546467304, "loss": 4.1238, "step": 2174 }, { "epoch": 0.10181988413599391, "grad_norm": 1.671875, "learning_rate": 0.00019920248988460282, "loss": 4.1696, "step": 2175 }, { "epoch": 0.10186669787582656, "grad_norm": 1.65625, "learning_rate": 0.00019920175396650585, "loss": 3.967, "step": 2176 }, { "epoch": 0.1019135116156592, "grad_norm": 1.359375, "learning_rate": 0.00019920101771038462, "loss": 4.1379, "step": 2177 }, { "epoch": 0.10196032535549183, "grad_norm": 1.421875, "learning_rate": 0.00019920028111624168, "loss": 3.9259, "step": 2178 }, { "epoch": 0.10200713909532448, "grad_norm": 1.1171875, "learning_rate": 0.0001991995441840795, "loss": 3.9631, "step": 2179 }, { "epoch": 0.10205395283515711, "grad_norm": 1.0703125, "learning_rate": 0.00019919880691390062, "loss": 3.7646, "step": 2180 }, { "epoch": 0.10210076657498976, "grad_norm": 2.796875, "learning_rate": 0.00019919806930570753, "loss": 3.3995, "step": 2181 }, { "epoch": 0.1021475803148224, "grad_norm": 1.1484375, "learning_rate": 0.00019919733135950277, "loss": 3.6986, "step": 2182 }, { "epoch": 0.10219439405465504, "grad_norm": 2.234375, "learning_rate": 0.0001991965930752888, "loss": 4.1778, "step": 2183 }, { "epoch": 0.10224120779448768, "grad_norm": 1.3046875, "learning_rate": 0.00019919585445306822, "loss": 4.1548, "step": 2184 }, { "epoch": 0.10228802153432032, "grad_norm": 1.2890625, "learning_rate": 0.00019919511549284347, "loss": 4.2415, "step": 2185 }, { "epoch": 0.10233483527415296, "grad_norm": 1.3515625, "learning_rate": 0.00019919437619461713, "loss": 3.78, "step": 2186 }, { "epoch": 0.10238164901398561, "grad_norm": 1.5390625, "learning_rate": 0.00019919363655839163, "loss": 3.8486, "step": 2187 }, { "epoch": 0.10242846275381824, "grad_norm": 2.125, "learning_rate": 0.00019919289658416956, "loss": 3.989, "step": 2188 }, { "epoch": 0.10247527649365089, "grad_norm": 1.734375, "learning_rate": 0.00019919215627195347, "loss": 4.0017, "step": 2189 }, { "epoch": 0.10252209023348353, "grad_norm": 1.5859375, "learning_rate": 0.00019919141562174583, "loss": 3.9715, "step": 2190 }, { "epoch": 0.10256890397331617, "grad_norm": 1.234375, "learning_rate": 0.00019919067463354913, "loss": 4.2194, "step": 2191 }, { "epoch": 0.10261571771314881, "grad_norm": 1.390625, "learning_rate": 0.000199189933307366, "loss": 3.8345, "step": 2192 }, { "epoch": 0.10266253145298145, "grad_norm": 1.515625, "learning_rate": 0.00019918919164319888, "loss": 3.9904, "step": 2193 }, { "epoch": 0.10270934519281409, "grad_norm": 2.359375, "learning_rate": 0.00019918844964105032, "loss": 3.5965, "step": 2194 }, { "epoch": 0.10275615893264674, "grad_norm": 1.609375, "learning_rate": 0.00019918770730092283, "loss": 4.3448, "step": 2195 }, { "epoch": 0.10280297267247937, "grad_norm": 1.3125, "learning_rate": 0.000199186964622819, "loss": 4.302, "step": 2196 }, { "epoch": 0.10284978641231202, "grad_norm": 1.5703125, "learning_rate": 0.0001991862216067413, "loss": 3.8785, "step": 2197 }, { "epoch": 0.10289660015214465, "grad_norm": 1.34375, "learning_rate": 0.0001991854782526923, "loss": 4.3682, "step": 2198 }, { "epoch": 0.1029434138919773, "grad_norm": 1.8125, "learning_rate": 0.0001991847345606745, "loss": 4.2704, "step": 2199 }, { "epoch": 0.10299022763180994, "grad_norm": 1.3515625, "learning_rate": 0.00019918399053069042, "loss": 4.1284, "step": 2200 }, { "epoch": 0.10303704137164257, "grad_norm": 1.875, "learning_rate": 0.0001991832461627427, "loss": 4.2373, "step": 2201 }, { "epoch": 0.10308385511147522, "grad_norm": 1.265625, "learning_rate": 0.00019918250145683374, "loss": 3.7452, "step": 2202 }, { "epoch": 0.10313066885130785, "grad_norm": 1.484375, "learning_rate": 0.00019918175641296616, "loss": 3.7427, "step": 2203 }, { "epoch": 0.1031774825911405, "grad_norm": 1.375, "learning_rate": 0.0001991810110311425, "loss": 4.0744, "step": 2204 }, { "epoch": 0.10322429633097314, "grad_norm": 1.6953125, "learning_rate": 0.00019918026531136524, "loss": 3.9821, "step": 2205 }, { "epoch": 0.10327111007080578, "grad_norm": 1.296875, "learning_rate": 0.00019917951925363698, "loss": 4.5012, "step": 2206 }, { "epoch": 0.10331792381063842, "grad_norm": 1.6796875, "learning_rate": 0.00019917877285796022, "loss": 3.8726, "step": 2207 }, { "epoch": 0.10336473755047107, "grad_norm": 1.296875, "learning_rate": 0.00019917802612433753, "loss": 4.3279, "step": 2208 }, { "epoch": 0.1034115512903037, "grad_norm": 1.578125, "learning_rate": 0.00019917727905277148, "loss": 4.1392, "step": 2209 }, { "epoch": 0.10345836503013635, "grad_norm": 1.203125, "learning_rate": 0.00019917653164326456, "loss": 4.0307, "step": 2210 }, { "epoch": 0.10350517876996898, "grad_norm": 1.234375, "learning_rate": 0.00019917578389581935, "loss": 4.0787, "step": 2211 }, { "epoch": 0.10355199250980163, "grad_norm": 1.484375, "learning_rate": 0.00019917503581043836, "loss": 4.5631, "step": 2212 }, { "epoch": 0.10359880624963427, "grad_norm": 1.2421875, "learning_rate": 0.0001991742873871242, "loss": 3.869, "step": 2213 }, { "epoch": 0.1036456199894669, "grad_norm": 1.1953125, "learning_rate": 0.00019917353862587938, "loss": 3.6063, "step": 2214 }, { "epoch": 0.10369243372929955, "grad_norm": 1.3828125, "learning_rate": 0.00019917278952670645, "loss": 4.1517, "step": 2215 }, { "epoch": 0.10373924746913218, "grad_norm": 1.5, "learning_rate": 0.000199172040089608, "loss": 4.2038, "step": 2216 }, { "epoch": 0.10378606120896483, "grad_norm": 4.21875, "learning_rate": 0.00019917129031458655, "loss": 4.8197, "step": 2217 }, { "epoch": 0.10383287494879748, "grad_norm": 1.1015625, "learning_rate": 0.00019917054020164465, "loss": 3.8331, "step": 2218 }, { "epoch": 0.10387968868863011, "grad_norm": 1.59375, "learning_rate": 0.00019916978975078483, "loss": 4.1272, "step": 2219 }, { "epoch": 0.10392650242846276, "grad_norm": 1.3203125, "learning_rate": 0.00019916903896200974, "loss": 3.9204, "step": 2220 }, { "epoch": 0.1039733161682954, "grad_norm": 1.1015625, "learning_rate": 0.00019916828783532188, "loss": 4.1678, "step": 2221 }, { "epoch": 0.10402012990812803, "grad_norm": 1.3125, "learning_rate": 0.00019916753637072375, "loss": 4.4764, "step": 2222 }, { "epoch": 0.10406694364796068, "grad_norm": 1.1484375, "learning_rate": 0.00019916678456821803, "loss": 4.4602, "step": 2223 }, { "epoch": 0.10411375738779331, "grad_norm": 1.9765625, "learning_rate": 0.0001991660324278072, "loss": 3.8323, "step": 2224 }, { "epoch": 0.10416057112762596, "grad_norm": 1.4140625, "learning_rate": 0.00019916527994949385, "loss": 3.8176, "step": 2225 }, { "epoch": 0.1042073848674586, "grad_norm": 1.2578125, "learning_rate": 0.00019916452713328057, "loss": 4.0378, "step": 2226 }, { "epoch": 0.10425419860729124, "grad_norm": 1.15625, "learning_rate": 0.00019916377397916985, "loss": 4.1181, "step": 2227 }, { "epoch": 0.10430101234712388, "grad_norm": 1.6328125, "learning_rate": 0.00019916302048716432, "loss": 4.0872, "step": 2228 }, { "epoch": 0.10434782608695652, "grad_norm": 1.578125, "learning_rate": 0.00019916226665726654, "loss": 3.9369, "step": 2229 }, { "epoch": 0.10439463982678916, "grad_norm": 1.3203125, "learning_rate": 0.00019916151248947905, "loss": 3.9196, "step": 2230 }, { "epoch": 0.10444145356662181, "grad_norm": 1.2109375, "learning_rate": 0.00019916075798380445, "loss": 6.1388, "step": 2231 }, { "epoch": 0.10448826730645444, "grad_norm": 1.578125, "learning_rate": 0.00019916000314024532, "loss": 4.2648, "step": 2232 }, { "epoch": 0.10453508104628709, "grad_norm": 1.3046875, "learning_rate": 0.00019915924795880417, "loss": 4.0937, "step": 2233 }, { "epoch": 0.10458189478611972, "grad_norm": 1.0625, "learning_rate": 0.00019915849243948365, "loss": 3.6253, "step": 2234 }, { "epoch": 0.10462870852595237, "grad_norm": 1.5234375, "learning_rate": 0.00019915773658228626, "loss": 4.0334, "step": 2235 }, { "epoch": 0.10467552226578501, "grad_norm": 1.421875, "learning_rate": 0.00019915698038721464, "loss": 4.3557, "step": 2236 }, { "epoch": 0.10472233600561764, "grad_norm": 1.2734375, "learning_rate": 0.00019915622385427133, "loss": 4.1372, "step": 2237 }, { "epoch": 0.10476914974545029, "grad_norm": 1.2578125, "learning_rate": 0.00019915546698345894, "loss": 3.9044, "step": 2238 }, { "epoch": 0.10481596348528294, "grad_norm": 1.46875, "learning_rate": 0.00019915470977478002, "loss": 4.3558, "step": 2239 }, { "epoch": 0.10486277722511557, "grad_norm": 1.2109375, "learning_rate": 0.00019915395222823713, "loss": 4.1087, "step": 2240 }, { "epoch": 0.10490959096494822, "grad_norm": 1.2890625, "learning_rate": 0.0001991531943438329, "loss": 3.931, "step": 2241 }, { "epoch": 0.10495640470478085, "grad_norm": 1.3515625, "learning_rate": 0.0001991524361215699, "loss": 3.4749, "step": 2242 }, { "epoch": 0.1050032184446135, "grad_norm": 1.4375, "learning_rate": 0.0001991516775614507, "loss": 3.8443, "step": 2243 }, { "epoch": 0.10505003218444614, "grad_norm": 1.09375, "learning_rate": 0.00019915091866347788, "loss": 3.896, "step": 2244 }, { "epoch": 0.10509684592427877, "grad_norm": 1.109375, "learning_rate": 0.00019915015942765404, "loss": 3.9701, "step": 2245 }, { "epoch": 0.10514365966411142, "grad_norm": 1.328125, "learning_rate": 0.00019914939985398177, "loss": 3.8788, "step": 2246 }, { "epoch": 0.10519047340394405, "grad_norm": 1.3359375, "learning_rate": 0.00019914863994246364, "loss": 4.2204, "step": 2247 }, { "epoch": 0.1052372871437767, "grad_norm": 1.1640625, "learning_rate": 0.00019914787969310224, "loss": 3.8155, "step": 2248 }, { "epoch": 0.10528410088360934, "grad_norm": 1.2421875, "learning_rate": 0.00019914711910590021, "loss": 4.1567, "step": 2249 }, { "epoch": 0.10533091462344198, "grad_norm": 1.4609375, "learning_rate": 0.0001991463581808601, "loss": 3.5989, "step": 2250 }, { "epoch": 0.10537772836327462, "grad_norm": 1.4921875, "learning_rate": 0.00019914559691798446, "loss": 4.2114, "step": 2251 }, { "epoch": 0.10542454210310727, "grad_norm": 1.25, "learning_rate": 0.00019914483531727596, "loss": 4.0209, "step": 2252 }, { "epoch": 0.1054713558429399, "grad_norm": 1.0390625, "learning_rate": 0.00019914407337873717, "loss": 4.0826, "step": 2253 }, { "epoch": 0.10551816958277255, "grad_norm": 1.6328125, "learning_rate": 0.00019914331110237066, "loss": 3.7501, "step": 2254 }, { "epoch": 0.10556498332260518, "grad_norm": 1.46875, "learning_rate": 0.00019914254848817905, "loss": 3.5623, "step": 2255 }, { "epoch": 0.10561179706243783, "grad_norm": 1.4609375, "learning_rate": 0.00019914178553616493, "loss": 4.0459, "step": 2256 }, { "epoch": 0.10565861080227047, "grad_norm": 1.6328125, "learning_rate": 0.00019914102224633093, "loss": 4.1796, "step": 2257 }, { "epoch": 0.1057054245421031, "grad_norm": 1.7265625, "learning_rate": 0.00019914025861867963, "loss": 4.3015, "step": 2258 }, { "epoch": 0.10575223828193575, "grad_norm": 1.2109375, "learning_rate": 0.00019913949465321363, "loss": 3.9036, "step": 2259 }, { "epoch": 0.10579905202176838, "grad_norm": 1.296875, "learning_rate": 0.00019913873034993552, "loss": 4.1493, "step": 2260 }, { "epoch": 0.10584586576160103, "grad_norm": 1.4453125, "learning_rate": 0.0001991379657088479, "loss": 4.2042, "step": 2261 }, { "epoch": 0.10589267950143368, "grad_norm": 1.328125, "learning_rate": 0.0001991372007299534, "loss": 3.602, "step": 2262 }, { "epoch": 0.10593949324126631, "grad_norm": 1.28125, "learning_rate": 0.00019913643541325464, "loss": 4.3965, "step": 2263 }, { "epoch": 0.10598630698109895, "grad_norm": 1.9140625, "learning_rate": 0.0001991356697587542, "loss": 4.0387, "step": 2264 }, { "epoch": 0.10603312072093159, "grad_norm": 1.375, "learning_rate": 0.0001991349037664547, "loss": 4.5794, "step": 2265 }, { "epoch": 0.10607993446076423, "grad_norm": 1.5625, "learning_rate": 0.00019913413743635874, "loss": 4.1066, "step": 2266 }, { "epoch": 0.10612674820059688, "grad_norm": 1.6953125, "learning_rate": 0.00019913337076846892, "loss": 3.9455, "step": 2267 }, { "epoch": 0.10617356194042951, "grad_norm": 1.4296875, "learning_rate": 0.0001991326037627879, "loss": 3.7794, "step": 2268 }, { "epoch": 0.10622037568026216, "grad_norm": 1.5, "learning_rate": 0.00019913183641931823, "loss": 3.7805, "step": 2269 }, { "epoch": 0.1062671894200948, "grad_norm": 1.2578125, "learning_rate": 0.00019913106873806257, "loss": 4.271, "step": 2270 }, { "epoch": 0.10631400315992744, "grad_norm": 1.359375, "learning_rate": 0.00019913030071902355, "loss": 3.9212, "step": 2271 }, { "epoch": 0.10636081689976008, "grad_norm": 1.34375, "learning_rate": 0.00019912953236220373, "loss": 4.1436, "step": 2272 }, { "epoch": 0.10640763063959272, "grad_norm": 1.7265625, "learning_rate": 0.00019912876366760576, "loss": 4.0965, "step": 2273 }, { "epoch": 0.10645444437942536, "grad_norm": 1.3203125, "learning_rate": 0.00019912799463523226, "loss": 4.2166, "step": 2274 }, { "epoch": 0.10650125811925801, "grad_norm": 1.09375, "learning_rate": 0.00019912722526508586, "loss": 3.1313, "step": 2275 }, { "epoch": 0.10654807185909064, "grad_norm": 1.3515625, "learning_rate": 0.00019912645555716915, "loss": 4.195, "step": 2276 }, { "epoch": 0.10659488559892329, "grad_norm": 1.109375, "learning_rate": 0.0001991256855114848, "loss": 3.8926, "step": 2277 }, { "epoch": 0.10664169933875592, "grad_norm": 1.0390625, "learning_rate": 0.00019912491512803538, "loss": 3.7703, "step": 2278 }, { "epoch": 0.10668851307858856, "grad_norm": 1.125, "learning_rate": 0.00019912414440682353, "loss": 3.9839, "step": 2279 }, { "epoch": 0.10673532681842121, "grad_norm": 1.2421875, "learning_rate": 0.00019912337334785193, "loss": 4.049, "step": 2280 }, { "epoch": 0.10678214055825384, "grad_norm": 1.2734375, "learning_rate": 0.00019912260195112312, "loss": 4.9437, "step": 2281 }, { "epoch": 0.10682895429808649, "grad_norm": 1.1640625, "learning_rate": 0.0001991218302166398, "loss": 3.7699, "step": 2282 }, { "epoch": 0.10687576803791914, "grad_norm": 1.3984375, "learning_rate": 0.0001991210581444046, "loss": 3.8897, "step": 2283 }, { "epoch": 0.10692258177775177, "grad_norm": 1.5390625, "learning_rate": 0.00019912028573442007, "loss": 3.8991, "step": 2284 }, { "epoch": 0.10696939551758441, "grad_norm": 2.46875, "learning_rate": 0.00019911951298668892, "loss": 3.9653, "step": 2285 }, { "epoch": 0.10701620925741705, "grad_norm": 1.5546875, "learning_rate": 0.00019911873990121376, "loss": 4.2266, "step": 2286 }, { "epoch": 0.1070630229972497, "grad_norm": 0.99609375, "learning_rate": 0.00019911796647799723, "loss": 4.1011, "step": 2287 }, { "epoch": 0.10710983673708234, "grad_norm": 1.3046875, "learning_rate": 0.00019911719271704193, "loss": 3.9165, "step": 2288 }, { "epoch": 0.10715665047691497, "grad_norm": 1.1796875, "learning_rate": 0.00019911641861835056, "loss": 3.6583, "step": 2289 }, { "epoch": 0.10720346421674762, "grad_norm": 1.3125, "learning_rate": 0.0001991156441819257, "loss": 3.6021, "step": 2290 }, { "epoch": 0.10725027795658025, "grad_norm": 1.40625, "learning_rate": 0.00019911486940777002, "loss": 4.3091, "step": 2291 }, { "epoch": 0.1072970916964129, "grad_norm": 1.2890625, "learning_rate": 0.00019911409429588617, "loss": 4.0635, "step": 2292 }, { "epoch": 0.10734390543624554, "grad_norm": 1.7109375, "learning_rate": 0.00019911331884627675, "loss": 3.9184, "step": 2293 }, { "epoch": 0.10739071917607818, "grad_norm": 1.3671875, "learning_rate": 0.00019911254305894443, "loss": 4.2524, "step": 2294 }, { "epoch": 0.10743753291591082, "grad_norm": 1.3203125, "learning_rate": 0.00019911176693389185, "loss": 3.9792, "step": 2295 }, { "epoch": 0.10748434665574345, "grad_norm": 1.5078125, "learning_rate": 0.00019911099047112166, "loss": 4.2279, "step": 2296 }, { "epoch": 0.1075311603955761, "grad_norm": 1.4140625, "learning_rate": 0.00019911021367063648, "loss": 3.6983, "step": 2297 }, { "epoch": 0.10757797413540875, "grad_norm": 1.421875, "learning_rate": 0.000199109436532439, "loss": 4.2834, "step": 2298 }, { "epoch": 0.10762478787524138, "grad_norm": 2.421875, "learning_rate": 0.00019910865905653182, "loss": 3.7675, "step": 2299 }, { "epoch": 0.10767160161507403, "grad_norm": 1.078125, "learning_rate": 0.0001991078812429176, "loss": 3.8129, "step": 2300 }, { "epoch": 0.10771841535490667, "grad_norm": 1.671875, "learning_rate": 0.00019910710309159902, "loss": 4.1486, "step": 2301 }, { "epoch": 0.1077652290947393, "grad_norm": 1.296875, "learning_rate": 0.00019910632460257873, "loss": 3.7366, "step": 2302 }, { "epoch": 0.10781204283457195, "grad_norm": 1.6171875, "learning_rate": 0.00019910554577585936, "loss": 4.1415, "step": 2303 }, { "epoch": 0.10785885657440458, "grad_norm": 1.3828125, "learning_rate": 0.00019910476661144357, "loss": 4.339, "step": 2304 }, { "epoch": 0.10790567031423723, "grad_norm": 1.2734375, "learning_rate": 0.00019910398710933403, "loss": 4.3024, "step": 2305 }, { "epoch": 0.10795248405406987, "grad_norm": 1.2109375, "learning_rate": 0.00019910320726953336, "loss": 4.2017, "step": 2306 }, { "epoch": 0.10799929779390251, "grad_norm": 1.71875, "learning_rate": 0.00019910242709204426, "loss": 3.8479, "step": 2307 }, { "epoch": 0.10804611153373515, "grad_norm": 1.609375, "learning_rate": 0.00019910164657686936, "loss": 4.3768, "step": 2308 }, { "epoch": 0.10809292527356779, "grad_norm": 1.515625, "learning_rate": 0.0001991008657240113, "loss": 4.1646, "step": 2309 }, { "epoch": 0.10813973901340043, "grad_norm": 1.8515625, "learning_rate": 0.0001991000845334728, "loss": 3.8772, "step": 2310 }, { "epoch": 0.10818655275323308, "grad_norm": 1.3046875, "learning_rate": 0.00019909930300525646, "loss": 3.967, "step": 2311 }, { "epoch": 0.10823336649306571, "grad_norm": 1.453125, "learning_rate": 0.000199098521139365, "loss": 3.9549, "step": 2312 }, { "epoch": 0.10828018023289836, "grad_norm": 1.359375, "learning_rate": 0.00019909773893580108, "loss": 4.3303, "step": 2313 }, { "epoch": 0.108326993972731, "grad_norm": 1.6796875, "learning_rate": 0.0001990969563945673, "loss": 3.7685, "step": 2314 }, { "epoch": 0.10837380771256364, "grad_norm": 1.28125, "learning_rate": 0.0001990961735156664, "loss": 3.9955, "step": 2315 }, { "epoch": 0.10842062145239628, "grad_norm": 1.25, "learning_rate": 0.00019909539029910097, "loss": 4.1165, "step": 2316 }, { "epoch": 0.10846743519222891, "grad_norm": 1.3359375, "learning_rate": 0.0001990946067448738, "loss": 3.9705, "step": 2317 }, { "epoch": 0.10851424893206156, "grad_norm": 1.2265625, "learning_rate": 0.00019909382285298742, "loss": 3.8833, "step": 2318 }, { "epoch": 0.1085610626718942, "grad_norm": 1.078125, "learning_rate": 0.0001990930386234446, "loss": 4.1285, "step": 2319 }, { "epoch": 0.10860787641172684, "grad_norm": 1.296875, "learning_rate": 0.00019909225405624796, "loss": 4.0967, "step": 2320 }, { "epoch": 0.10865469015155949, "grad_norm": 1.3046875, "learning_rate": 0.0001990914691514002, "loss": 4.0316, "step": 2321 }, { "epoch": 0.10870150389139212, "grad_norm": 1.2421875, "learning_rate": 0.000199090683908904, "loss": 3.8558, "step": 2322 }, { "epoch": 0.10874831763122476, "grad_norm": 1.3203125, "learning_rate": 0.000199089898328762, "loss": 3.3632, "step": 2323 }, { "epoch": 0.10879513137105741, "grad_norm": 1.3203125, "learning_rate": 0.0001990891124109769, "loss": 3.915, "step": 2324 }, { "epoch": 0.10884194511089004, "grad_norm": 1.125, "learning_rate": 0.0001990883261555514, "loss": 4.1764, "step": 2325 }, { "epoch": 0.10888875885072269, "grad_norm": 1.1875, "learning_rate": 0.00019908753956248814, "loss": 3.8678, "step": 2326 }, { "epoch": 0.10893557259055532, "grad_norm": 1.4375, "learning_rate": 0.0001990867526317898, "loss": 4.1357, "step": 2327 }, { "epoch": 0.10898238633038797, "grad_norm": 1.4140625, "learning_rate": 0.0001990859653634591, "loss": 4.0029, "step": 2328 }, { "epoch": 0.10902920007022061, "grad_norm": 1.2109375, "learning_rate": 0.00019908517775749868, "loss": 3.7574, "step": 2329 }, { "epoch": 0.10907601381005325, "grad_norm": 1.4375, "learning_rate": 0.0001990843898139113, "loss": 3.9179, "step": 2330 }, { "epoch": 0.10912282754988589, "grad_norm": 1.546875, "learning_rate": 0.0001990836015326995, "loss": 4.2075, "step": 2331 }, { "epoch": 0.10916964128971854, "grad_norm": 1.71875, "learning_rate": 0.00019908281291386613, "loss": 3.9561, "step": 2332 }, { "epoch": 0.10921645502955117, "grad_norm": 1.3671875, "learning_rate": 0.00019908202395741377, "loss": 4.2083, "step": 2333 }, { "epoch": 0.10926326876938382, "grad_norm": 1.1640625, "learning_rate": 0.00019908123466334514, "loss": 4.0471, "step": 2334 }, { "epoch": 0.10931008250921645, "grad_norm": 1.2265625, "learning_rate": 0.00019908044503166294, "loss": 4.0731, "step": 2335 }, { "epoch": 0.1093568962490491, "grad_norm": 1.390625, "learning_rate": 0.00019907965506236986, "loss": 3.8435, "step": 2336 }, { "epoch": 0.10940370998888174, "grad_norm": 0.98046875, "learning_rate": 0.0001990788647554686, "loss": 4.197, "step": 2337 }, { "epoch": 0.10945052372871437, "grad_norm": 1.71875, "learning_rate": 0.00019907807411096177, "loss": 3.6908, "step": 2338 }, { "epoch": 0.10949733746854702, "grad_norm": 1.25, "learning_rate": 0.00019907728312885216, "loss": 3.9843, "step": 2339 }, { "epoch": 0.10954415120837965, "grad_norm": 1.2265625, "learning_rate": 0.00019907649180914242, "loss": 4.0415, "step": 2340 }, { "epoch": 0.1095909649482123, "grad_norm": 1.4375, "learning_rate": 0.0001990757001518353, "loss": 4.1506, "step": 2341 }, { "epoch": 0.10963777868804495, "grad_norm": 1.6953125, "learning_rate": 0.00019907490815693346, "loss": 4.0812, "step": 2342 }, { "epoch": 0.10968459242787758, "grad_norm": 1.2890625, "learning_rate": 0.00019907411582443954, "loss": 4.3013, "step": 2343 }, { "epoch": 0.10973140616771022, "grad_norm": 1.21875, "learning_rate": 0.00019907332315435635, "loss": 4.0688, "step": 2344 }, { "epoch": 0.10977821990754287, "grad_norm": 1.1640625, "learning_rate": 0.00019907253014668651, "loss": 3.5586, "step": 2345 }, { "epoch": 0.1098250336473755, "grad_norm": 1.078125, "learning_rate": 0.00019907173680143276, "loss": 3.6583, "step": 2346 }, { "epoch": 0.10987184738720815, "grad_norm": 1.4921875, "learning_rate": 0.0001990709431185978, "loss": 3.931, "step": 2347 }, { "epoch": 0.10991866112704078, "grad_norm": 2.8125, "learning_rate": 0.00019907014909818432, "loss": 2.8422, "step": 2348 }, { "epoch": 0.10996547486687343, "grad_norm": 1.21875, "learning_rate": 0.00019906935474019505, "loss": 3.4908, "step": 2349 }, { "epoch": 0.11001228860670607, "grad_norm": 1.1328125, "learning_rate": 0.00019906856004463266, "loss": 3.9689, "step": 2350 }, { "epoch": 0.1100591023465387, "grad_norm": 1.234375, "learning_rate": 0.0001990677650114999, "loss": 4.1435, "step": 2351 }, { "epoch": 0.11010591608637135, "grad_norm": 1.21875, "learning_rate": 0.00019906696964079945, "loss": 3.8403, "step": 2352 }, { "epoch": 0.11015272982620399, "grad_norm": 1.609375, "learning_rate": 0.000199066173932534, "loss": 3.9528, "step": 2353 }, { "epoch": 0.11019954356603663, "grad_norm": 1.0546875, "learning_rate": 0.0001990653778867063, "loss": 4.3028, "step": 2354 }, { "epoch": 0.11024635730586928, "grad_norm": 1.5703125, "learning_rate": 0.00019906458150331907, "loss": 4.2979, "step": 2355 }, { "epoch": 0.11029317104570191, "grad_norm": 1.5859375, "learning_rate": 0.000199063784782375, "loss": 4.1646, "step": 2356 }, { "epoch": 0.11033998478553456, "grad_norm": 1.0546875, "learning_rate": 0.00019906298772387682, "loss": 4.0112, "step": 2357 }, { "epoch": 0.11038679852536719, "grad_norm": 1.515625, "learning_rate": 0.00019906219032782721, "loss": 3.8465, "step": 2358 }, { "epoch": 0.11043361226519983, "grad_norm": 1.609375, "learning_rate": 0.0001990613925942289, "loss": 4.343, "step": 2359 }, { "epoch": 0.11048042600503248, "grad_norm": 1.2890625, "learning_rate": 0.00019906059452308466, "loss": 3.8871, "step": 2360 }, { "epoch": 0.11052723974486511, "grad_norm": 1.484375, "learning_rate": 0.00019905979611439714, "loss": 4.4483, "step": 2361 }, { "epoch": 0.11057405348469776, "grad_norm": 1.5234375, "learning_rate": 0.0001990589973681691, "loss": 3.9423, "step": 2362 }, { "epoch": 0.1106208672245304, "grad_norm": 1.25, "learning_rate": 0.00019905819828440325, "loss": 4.087, "step": 2363 }, { "epoch": 0.11066768096436304, "grad_norm": 1.390625, "learning_rate": 0.00019905739886310234, "loss": 4.1428, "step": 2364 }, { "epoch": 0.11071449470419568, "grad_norm": 1.3515625, "learning_rate": 0.00019905659910426904, "loss": 4.1954, "step": 2365 }, { "epoch": 0.11076130844402832, "grad_norm": 1.4765625, "learning_rate": 0.00019905579900790613, "loss": 3.7682, "step": 2366 }, { "epoch": 0.11080812218386096, "grad_norm": 1.3359375, "learning_rate": 0.00019905499857401628, "loss": 3.5482, "step": 2367 }, { "epoch": 0.11085493592369361, "grad_norm": 1.2578125, "learning_rate": 0.00019905419780260226, "loss": 3.8859, "step": 2368 }, { "epoch": 0.11090174966352624, "grad_norm": 1.5625, "learning_rate": 0.00019905339669366677, "loss": 4.5176, "step": 2369 }, { "epoch": 0.11094856340335889, "grad_norm": 1.3515625, "learning_rate": 0.0001990525952472126, "loss": 3.867, "step": 2370 }, { "epoch": 0.11099537714319152, "grad_norm": 1.6015625, "learning_rate": 0.0001990517934632424, "loss": 3.8062, "step": 2371 }, { "epoch": 0.11104219088302417, "grad_norm": 1.390625, "learning_rate": 0.00019905099134175894, "loss": 3.9082, "step": 2372 }, { "epoch": 0.11108900462285681, "grad_norm": 1.640625, "learning_rate": 0.000199050188882765, "loss": 4.0453, "step": 2373 }, { "epoch": 0.11113581836268945, "grad_norm": 1.3125, "learning_rate": 0.00019904938608626322, "loss": 4.2154, "step": 2374 }, { "epoch": 0.11118263210252209, "grad_norm": 1.6015625, "learning_rate": 0.00019904858295225638, "loss": 3.957, "step": 2375 }, { "epoch": 0.11122944584235474, "grad_norm": 1.2890625, "learning_rate": 0.00019904777948074725, "loss": 3.9717, "step": 2376 }, { "epoch": 0.11127625958218737, "grad_norm": 1.9296875, "learning_rate": 0.0001990469756717385, "loss": 4.0133, "step": 2377 }, { "epoch": 0.11132307332202002, "grad_norm": 1.2890625, "learning_rate": 0.00019904617152523293, "loss": 3.7125, "step": 2378 }, { "epoch": 0.11136988706185265, "grad_norm": 1.6328125, "learning_rate": 0.00019904536704123325, "loss": 3.8152, "step": 2379 }, { "epoch": 0.1114167008016853, "grad_norm": 1.46875, "learning_rate": 0.00019904456221974222, "loss": 4.3149, "step": 2380 }, { "epoch": 0.11146351454151794, "grad_norm": 1.46875, "learning_rate": 0.00019904375706076256, "loss": 4.3199, "step": 2381 }, { "epoch": 0.11151032828135057, "grad_norm": 1.609375, "learning_rate": 0.000199042951564297, "loss": 3.7972, "step": 2382 }, { "epoch": 0.11155714202118322, "grad_norm": 1.796875, "learning_rate": 0.00019904214573034833, "loss": 4.6443, "step": 2383 }, { "epoch": 0.11160395576101585, "grad_norm": 1.84375, "learning_rate": 0.0001990413395589193, "loss": 4.1092, "step": 2384 }, { "epoch": 0.1116507695008485, "grad_norm": 1.40625, "learning_rate": 0.00019904053305001257, "loss": 3.7256, "step": 2385 }, { "epoch": 0.11169758324068114, "grad_norm": 1.9140625, "learning_rate": 0.00019903972620363098, "loss": 4.25, "step": 2386 }, { "epoch": 0.11174439698051378, "grad_norm": 1.4921875, "learning_rate": 0.00019903891901977726, "loss": 4.0672, "step": 2387 }, { "epoch": 0.11179121072034642, "grad_norm": 1.515625, "learning_rate": 0.00019903811149845412, "loss": 4.2005, "step": 2388 }, { "epoch": 0.11183802446017906, "grad_norm": 1.2578125, "learning_rate": 0.00019903730363966435, "loss": 3.6275, "step": 2389 }, { "epoch": 0.1118848382000117, "grad_norm": 1.8671875, "learning_rate": 0.0001990364954434107, "loss": 3.9537, "step": 2390 }, { "epoch": 0.11193165193984435, "grad_norm": 5.6875, "learning_rate": 0.00019903568690969592, "loss": 4.5395, "step": 2391 }, { "epoch": 0.11197846567967698, "grad_norm": 1.453125, "learning_rate": 0.00019903487803852275, "loss": 3.9381, "step": 2392 }, { "epoch": 0.11202527941950963, "grad_norm": 1.4453125, "learning_rate": 0.00019903406882989397, "loss": 4.0589, "step": 2393 }, { "epoch": 0.11207209315934227, "grad_norm": 1.3046875, "learning_rate": 0.00019903325928381232, "loss": 3.5992, "step": 2394 }, { "epoch": 0.1121189068991749, "grad_norm": 1.2109375, "learning_rate": 0.00019903244940028055, "loss": 3.954, "step": 2395 }, { "epoch": 0.11216572063900755, "grad_norm": 1.2421875, "learning_rate": 0.00019903163917930146, "loss": 3.8905, "step": 2396 }, { "epoch": 0.11221253437884018, "grad_norm": 1.2734375, "learning_rate": 0.00019903082862087777, "loss": 3.8284, "step": 2397 }, { "epoch": 0.11225934811867283, "grad_norm": 1.25, "learning_rate": 0.00019903001772501224, "loss": 3.9143, "step": 2398 }, { "epoch": 0.11230616185850548, "grad_norm": 1.2734375, "learning_rate": 0.00019902920649170767, "loss": 4.0744, "step": 2399 }, { "epoch": 0.11235297559833811, "grad_norm": 2.03125, "learning_rate": 0.0001990283949209668, "loss": 3.7415, "step": 2400 }, { "epoch": 0.11239978933817076, "grad_norm": 1.90625, "learning_rate": 0.00019902758301279237, "loss": 4.2852, "step": 2401 }, { "epoch": 0.11244660307800339, "grad_norm": 1.203125, "learning_rate": 0.0001990267707671872, "loss": 3.9165, "step": 2402 }, { "epoch": 0.11249341681783603, "grad_norm": 1.46875, "learning_rate": 0.00019902595818415404, "loss": 4.2472, "step": 2403 }, { "epoch": 0.11254023055766868, "grad_norm": 1.3125, "learning_rate": 0.00019902514526369566, "loss": 4.0463, "step": 2404 }, { "epoch": 0.11258704429750131, "grad_norm": 1.390625, "learning_rate": 0.00019902433200581478, "loss": 3.8996, "step": 2405 }, { "epoch": 0.11263385803733396, "grad_norm": 1.609375, "learning_rate": 0.00019902351841051425, "loss": 4.2722, "step": 2406 }, { "epoch": 0.1126806717771666, "grad_norm": 1.703125, "learning_rate": 0.0001990227044777968, "loss": 3.7018, "step": 2407 }, { "epoch": 0.11272748551699924, "grad_norm": 1.28125, "learning_rate": 0.00019902189020766517, "loss": 4.0577, "step": 2408 }, { "epoch": 0.11277429925683188, "grad_norm": 1.3515625, "learning_rate": 0.0001990210756001222, "loss": 3.8159, "step": 2409 }, { "epoch": 0.11282111299666452, "grad_norm": 1.515625, "learning_rate": 0.00019902026065517065, "loss": 3.6813, "step": 2410 }, { "epoch": 0.11286792673649716, "grad_norm": 1.578125, "learning_rate": 0.00019901944537281327, "loss": 3.9336, "step": 2411 }, { "epoch": 0.11291474047632981, "grad_norm": 1.1953125, "learning_rate": 0.00019901862975305287, "loss": 3.6021, "step": 2412 }, { "epoch": 0.11296155421616244, "grad_norm": 1.828125, "learning_rate": 0.0001990178137958922, "loss": 3.759, "step": 2413 }, { "epoch": 0.11300836795599509, "grad_norm": 1.4296875, "learning_rate": 0.00019901699750133408, "loss": 3.6029, "step": 2414 }, { "epoch": 0.11305518169582772, "grad_norm": 1.296875, "learning_rate": 0.0001990161808693812, "loss": 4.128, "step": 2415 }, { "epoch": 0.11310199543566037, "grad_norm": 1.28125, "learning_rate": 0.00019901536390003647, "loss": 3.9183, "step": 2416 }, { "epoch": 0.11314880917549301, "grad_norm": 1.6015625, "learning_rate": 0.0001990145465933026, "loss": 4.1799, "step": 2417 }, { "epoch": 0.11319562291532564, "grad_norm": 1.3125, "learning_rate": 0.00019901372894918235, "loss": 3.9733, "step": 2418 }, { "epoch": 0.11324243665515829, "grad_norm": 1.09375, "learning_rate": 0.00019901291096767858, "loss": 3.9045, "step": 2419 }, { "epoch": 0.11328925039499092, "grad_norm": 1.1796875, "learning_rate": 0.00019901209264879403, "loss": 3.7876, "step": 2420 }, { "epoch": 0.11333606413482357, "grad_norm": 1.4375, "learning_rate": 0.00019901127399253149, "loss": 3.7838, "step": 2421 }, { "epoch": 0.11338287787465622, "grad_norm": 1.3359375, "learning_rate": 0.00019901045499889374, "loss": 3.7686, "step": 2422 }, { "epoch": 0.11342969161448885, "grad_norm": 1.359375, "learning_rate": 0.0001990096356678836, "loss": 3.9163, "step": 2423 }, { "epoch": 0.1134765053543215, "grad_norm": 1.2109375, "learning_rate": 0.00019900881599950386, "loss": 3.9641, "step": 2424 }, { "epoch": 0.11352331909415414, "grad_norm": 1.03125, "learning_rate": 0.00019900799599375732, "loss": 3.757, "step": 2425 }, { "epoch": 0.11357013283398677, "grad_norm": 1.359375, "learning_rate": 0.00019900717565064674, "loss": 4.069, "step": 2426 }, { "epoch": 0.11361694657381942, "grad_norm": 1.234375, "learning_rate": 0.00019900635497017492, "loss": 3.7223, "step": 2427 }, { "epoch": 0.11366376031365205, "grad_norm": 1.609375, "learning_rate": 0.00019900553395234467, "loss": 4.2511, "step": 2428 }, { "epoch": 0.1137105740534847, "grad_norm": 1.390625, "learning_rate": 0.0001990047125971588, "loss": 4.0249, "step": 2429 }, { "epoch": 0.11375738779331734, "grad_norm": 1.3359375, "learning_rate": 0.00019900389090462007, "loss": 4.0213, "step": 2430 }, { "epoch": 0.11380420153314998, "grad_norm": 0.99609375, "learning_rate": 0.00019900306887473134, "loss": 4.496, "step": 2431 }, { "epoch": 0.11385101527298262, "grad_norm": 1.390625, "learning_rate": 0.00019900224650749534, "loss": 4.1175, "step": 2432 }, { "epoch": 0.11389782901281525, "grad_norm": 1.4453125, "learning_rate": 0.00019900142380291494, "loss": 3.958, "step": 2433 }, { "epoch": 0.1139446427526479, "grad_norm": 1.5546875, "learning_rate": 0.00019900060076099289, "loss": 3.8316, "step": 2434 }, { "epoch": 0.11399145649248055, "grad_norm": 1.6953125, "learning_rate": 0.000198999777381732, "loss": 4.008, "step": 2435 }, { "epoch": 0.11403827023231318, "grad_norm": 1.515625, "learning_rate": 0.00019899895366513512, "loss": 4.3304, "step": 2436 }, { "epoch": 0.11408508397214583, "grad_norm": 1.3046875, "learning_rate": 0.00019899812961120502, "loss": 3.4644, "step": 2437 }, { "epoch": 0.11413189771197847, "grad_norm": 1.203125, "learning_rate": 0.00019899730521994453, "loss": 4.0563, "step": 2438 }, { "epoch": 0.1141787114518111, "grad_norm": 1.078125, "learning_rate": 0.00019899648049135642, "loss": 2.7964, "step": 2439 }, { "epoch": 0.11422552519164375, "grad_norm": 1.421875, "learning_rate": 0.0001989956554254435, "loss": 3.9252, "step": 2440 }, { "epoch": 0.11427233893147638, "grad_norm": 1.484375, "learning_rate": 0.00019899483002220868, "loss": 4.1995, "step": 2441 }, { "epoch": 0.11431915267130903, "grad_norm": 1.40625, "learning_rate": 0.00019899400428165465, "loss": 4.143, "step": 2442 }, { "epoch": 0.11436596641114168, "grad_norm": 1.28125, "learning_rate": 0.00019899317820378429, "loss": 3.8884, "step": 2443 }, { "epoch": 0.11441278015097431, "grad_norm": 1.234375, "learning_rate": 0.0001989923517886004, "loss": 4.23, "step": 2444 }, { "epoch": 0.11445959389080695, "grad_norm": 2.140625, "learning_rate": 0.00019899152503610577, "loss": 3.3697, "step": 2445 }, { "epoch": 0.11450640763063959, "grad_norm": 1.3671875, "learning_rate": 0.00019899069794630325, "loss": 3.7376, "step": 2446 }, { "epoch": 0.11455322137047223, "grad_norm": 1.28125, "learning_rate": 0.00019898987051919563, "loss": 3.9834, "step": 2447 }, { "epoch": 0.11460003511030488, "grad_norm": 1.359375, "learning_rate": 0.0001989890427547858, "loss": 4.2466, "step": 2448 }, { "epoch": 0.11464684885013751, "grad_norm": 1.671875, "learning_rate": 0.0001989882146530765, "loss": 4.3699, "step": 2449 }, { "epoch": 0.11469366258997016, "grad_norm": 1.359375, "learning_rate": 0.00019898738621407057, "loss": 3.8997, "step": 2450 }, { "epoch": 0.11474047632980279, "grad_norm": 2.28125, "learning_rate": 0.00019898655743777086, "loss": 3.4607, "step": 2451 }, { "epoch": 0.11478729006963544, "grad_norm": 1.921875, "learning_rate": 0.00019898572832418015, "loss": 4.2744, "step": 2452 }, { "epoch": 0.11483410380946808, "grad_norm": 1.078125, "learning_rate": 0.00019898489887330132, "loss": 6.2768, "step": 2453 }, { "epoch": 0.11488091754930072, "grad_norm": 1.484375, "learning_rate": 0.00019898406908513717, "loss": 4.3795, "step": 2454 }, { "epoch": 0.11492773128913336, "grad_norm": 1.65625, "learning_rate": 0.0001989832389596905, "loss": 3.7986, "step": 2455 }, { "epoch": 0.11497454502896601, "grad_norm": 1.2265625, "learning_rate": 0.0001989824084969642, "loss": 4.3304, "step": 2456 }, { "epoch": 0.11502135876879864, "grad_norm": 1.46875, "learning_rate": 0.00019898157769696103, "loss": 3.983, "step": 2457 }, { "epoch": 0.11506817250863129, "grad_norm": 1.375, "learning_rate": 0.0001989807465596839, "loss": 4.0619, "step": 2458 }, { "epoch": 0.11511498624846392, "grad_norm": 1.7734375, "learning_rate": 0.00019897991508513555, "loss": 3.8563, "step": 2459 }, { "epoch": 0.11516179998829656, "grad_norm": 1.4765625, "learning_rate": 0.00019897908327331886, "loss": 4.38, "step": 2460 }, { "epoch": 0.11520861372812921, "grad_norm": 1.3515625, "learning_rate": 0.00019897825112423669, "loss": 3.674, "step": 2461 }, { "epoch": 0.11525542746796184, "grad_norm": 1.1953125, "learning_rate": 0.00019897741863789185, "loss": 4.2834, "step": 2462 }, { "epoch": 0.11530224120779449, "grad_norm": 1.234375, "learning_rate": 0.00019897658581428718, "loss": 4.346, "step": 2463 }, { "epoch": 0.11534905494762712, "grad_norm": 1.3515625, "learning_rate": 0.0001989757526534255, "loss": 4.0709, "step": 2464 }, { "epoch": 0.11539586868745977, "grad_norm": 1.5, "learning_rate": 0.00019897491915530968, "loss": 3.6841, "step": 2465 }, { "epoch": 0.11544268242729241, "grad_norm": 1.4921875, "learning_rate": 0.00019897408531994253, "loss": 3.8711, "step": 2466 }, { "epoch": 0.11548949616712505, "grad_norm": 1.3828125, "learning_rate": 0.0001989732511473269, "loss": 3.6557, "step": 2467 }, { "epoch": 0.1155363099069577, "grad_norm": 1.1796875, "learning_rate": 0.00019897241663746566, "loss": 4.2877, "step": 2468 }, { "epoch": 0.11558312364679034, "grad_norm": 1.953125, "learning_rate": 0.00019897158179036162, "loss": 4.3358, "step": 2469 }, { "epoch": 0.11562993738662297, "grad_norm": 1.0546875, "learning_rate": 0.00019897074660601762, "loss": 3.9991, "step": 2470 }, { "epoch": 0.11567675112645562, "grad_norm": 1.5, "learning_rate": 0.00019896991108443653, "loss": 3.7097, "step": 2471 }, { "epoch": 0.11572356486628825, "grad_norm": 1.1328125, "learning_rate": 0.0001989690752256212, "loss": 3.7726, "step": 2472 }, { "epoch": 0.1157703786061209, "grad_norm": 1.7109375, "learning_rate": 0.00019896823902957448, "loss": 3.6925, "step": 2473 }, { "epoch": 0.11581719234595354, "grad_norm": 1.5, "learning_rate": 0.00019896740249629918, "loss": 3.8888, "step": 2474 }, { "epoch": 0.11586400608578618, "grad_norm": 1.3125, "learning_rate": 0.00019896656562579817, "loss": 4.1726, "step": 2475 }, { "epoch": 0.11591081982561882, "grad_norm": 1.609375, "learning_rate": 0.00019896572841807432, "loss": 3.8194, "step": 2476 }, { "epoch": 0.11595763356545145, "grad_norm": 1.25, "learning_rate": 0.0001989648908731305, "loss": 3.4232, "step": 2477 }, { "epoch": 0.1160044473052841, "grad_norm": 1.8046875, "learning_rate": 0.00019896405299096948, "loss": 3.7991, "step": 2478 }, { "epoch": 0.11605126104511675, "grad_norm": 2.015625, "learning_rate": 0.0001989632147715942, "loss": 4.563, "step": 2479 }, { "epoch": 0.11609807478494938, "grad_norm": 1.390625, "learning_rate": 0.0001989623762150075, "loss": 4.1142, "step": 2480 }, { "epoch": 0.11614488852478203, "grad_norm": 1.2265625, "learning_rate": 0.0001989615373212122, "loss": 3.9255, "step": 2481 }, { "epoch": 0.11619170226461467, "grad_norm": 1.6796875, "learning_rate": 0.00019896069809021119, "loss": 4.0697, "step": 2482 }, { "epoch": 0.1162385160044473, "grad_norm": 1.2265625, "learning_rate": 0.00019895985852200733, "loss": 4.0525, "step": 2483 }, { "epoch": 0.11628532974427995, "grad_norm": 1.6015625, "learning_rate": 0.00019895901861660347, "loss": 3.8341, "step": 2484 }, { "epoch": 0.11633214348411258, "grad_norm": 1.5234375, "learning_rate": 0.00019895817837400245, "loss": 3.9821, "step": 2485 }, { "epoch": 0.11637895722394523, "grad_norm": 1.1953125, "learning_rate": 0.0001989573377942072, "loss": 4.0567, "step": 2486 }, { "epoch": 0.11642577096377787, "grad_norm": 1.4609375, "learning_rate": 0.0001989564968772205, "loss": 3.9777, "step": 2487 }, { "epoch": 0.11647258470361051, "grad_norm": 1.0859375, "learning_rate": 0.00019895565562304528, "loss": 3.8252, "step": 2488 }, { "epoch": 0.11651939844344315, "grad_norm": 1.6171875, "learning_rate": 0.00019895481403168436, "loss": 4.3981, "step": 2489 }, { "epoch": 0.11656621218327579, "grad_norm": 1.328125, "learning_rate": 0.00019895397210314066, "loss": 3.5262, "step": 2490 }, { "epoch": 0.11661302592310843, "grad_norm": 1.1796875, "learning_rate": 0.000198953129837417, "loss": 3.728, "step": 2491 }, { "epoch": 0.11665983966294108, "grad_norm": 1.296875, "learning_rate": 0.0001989522872345163, "loss": 3.7408, "step": 2492 }, { "epoch": 0.11670665340277371, "grad_norm": 1.3515625, "learning_rate": 0.00019895144429444135, "loss": 3.6426, "step": 2493 }, { "epoch": 0.11675346714260636, "grad_norm": 1.5390625, "learning_rate": 0.00019895060101719512, "loss": 4.0052, "step": 2494 }, { "epoch": 0.11680028088243899, "grad_norm": 1.0390625, "learning_rate": 0.0001989497574027804, "loss": 5.9419, "step": 2495 }, { "epoch": 0.11684709462227164, "grad_norm": 1.3515625, "learning_rate": 0.00019894891345120013, "loss": 3.6111, "step": 2496 }, { "epoch": 0.11689390836210428, "grad_norm": 2.34375, "learning_rate": 0.00019894806916245713, "loss": 3.5005, "step": 2497 }, { "epoch": 0.11694072210193691, "grad_norm": 1.2890625, "learning_rate": 0.00019894722453655433, "loss": 3.8159, "step": 2498 }, { "epoch": 0.11698753584176956, "grad_norm": 1.7265625, "learning_rate": 0.00019894637957349457, "loss": 4.2828, "step": 2499 }, { "epoch": 0.1170343495816022, "grad_norm": 1.3671875, "learning_rate": 0.00019894553427328074, "loss": 3.9295, "step": 2500 }, { "epoch": 0.11708116332143484, "grad_norm": 1.3203125, "learning_rate": 0.00019894468863591573, "loss": 3.9585, "step": 2501 }, { "epoch": 0.11712797706126749, "grad_norm": 1.5703125, "learning_rate": 0.0001989438426614024, "loss": 3.7111, "step": 2502 }, { "epoch": 0.11717479080110012, "grad_norm": 1.0546875, "learning_rate": 0.00019894299634974364, "loss": 3.7454, "step": 2503 }, { "epoch": 0.11722160454093276, "grad_norm": 1.2578125, "learning_rate": 0.0001989421497009424, "loss": 4.0403, "step": 2504 }, { "epoch": 0.11726841828076541, "grad_norm": 1.140625, "learning_rate": 0.00019894130271500143, "loss": 3.6793, "step": 2505 }, { "epoch": 0.11731523202059804, "grad_norm": 1.3671875, "learning_rate": 0.00019894045539192373, "loss": 3.8608, "step": 2506 }, { "epoch": 0.11736204576043069, "grad_norm": 1.1953125, "learning_rate": 0.00019893960773171212, "loss": 3.1301, "step": 2507 }, { "epoch": 0.11740885950026332, "grad_norm": 1.4609375, "learning_rate": 0.00019893875973436953, "loss": 4.0304, "step": 2508 }, { "epoch": 0.11745567324009597, "grad_norm": 1.3203125, "learning_rate": 0.00019893791139989883, "loss": 4.2865, "step": 2509 }, { "epoch": 0.11750248697992861, "grad_norm": 1.6875, "learning_rate": 0.00019893706272830294, "loss": 3.7813, "step": 2510 }, { "epoch": 0.11754930071976125, "grad_norm": 1.6328125, "learning_rate": 0.00019893621371958468, "loss": 4.0298, "step": 2511 }, { "epoch": 0.11759611445959389, "grad_norm": 1.5, "learning_rate": 0.00019893536437374701, "loss": 3.5515, "step": 2512 }, { "epoch": 0.11764292819942654, "grad_norm": 1.40625, "learning_rate": 0.00019893451469079284, "loss": 4.1483, "step": 2513 }, { "epoch": 0.11768974193925917, "grad_norm": 1.3671875, "learning_rate": 0.000198933664670725, "loss": 3.6199, "step": 2514 }, { "epoch": 0.11773655567909182, "grad_norm": 1.421875, "learning_rate": 0.0001989328143135464, "loss": 3.9237, "step": 2515 }, { "epoch": 0.11778336941892445, "grad_norm": 1.3359375, "learning_rate": 0.00019893196361925997, "loss": 3.9096, "step": 2516 }, { "epoch": 0.1178301831587571, "grad_norm": 1.53125, "learning_rate": 0.00019893111258786862, "loss": 3.9453, "step": 2517 }, { "epoch": 0.11787699689858974, "grad_norm": 1.21875, "learning_rate": 0.00019893026121937517, "loss": 3.8315, "step": 2518 }, { "epoch": 0.11792381063842237, "grad_norm": 1.3046875, "learning_rate": 0.00019892940951378262, "loss": 3.8772, "step": 2519 }, { "epoch": 0.11797062437825502, "grad_norm": 1.421875, "learning_rate": 0.0001989285574710938, "loss": 4.1951, "step": 2520 }, { "epoch": 0.11801743811808765, "grad_norm": 1.375, "learning_rate": 0.00019892770509131165, "loss": 4.0277, "step": 2521 }, { "epoch": 0.1180642518579203, "grad_norm": 1.4921875, "learning_rate": 0.00019892685237443906, "loss": 3.8544, "step": 2522 }, { "epoch": 0.11811106559775295, "grad_norm": 1.265625, "learning_rate": 0.00019892599932047893, "loss": 4.058, "step": 2523 }, { "epoch": 0.11815787933758558, "grad_norm": 1.203125, "learning_rate": 0.00019892514592943417, "loss": 3.6377, "step": 2524 }, { "epoch": 0.11820469307741822, "grad_norm": 2.078125, "learning_rate": 0.0001989242922013077, "loss": 4.3342, "step": 2525 }, { "epoch": 0.11825150681725086, "grad_norm": 1.7109375, "learning_rate": 0.00019892343813610242, "loss": 3.7147, "step": 2526 }, { "epoch": 0.1182983205570835, "grad_norm": 1.3515625, "learning_rate": 0.00019892258373382127, "loss": 3.8071, "step": 2527 }, { "epoch": 0.11834513429691615, "grad_norm": 1.4140625, "learning_rate": 0.0001989217289944671, "loss": 3.8286, "step": 2528 }, { "epoch": 0.11839194803674878, "grad_norm": 1.4375, "learning_rate": 0.00019892087391804288, "loss": 3.8616, "step": 2529 }, { "epoch": 0.11843876177658143, "grad_norm": 1.4609375, "learning_rate": 0.0001989200185045515, "loss": 4.1273, "step": 2530 }, { "epoch": 0.11848557551641407, "grad_norm": 1.5078125, "learning_rate": 0.00019891916275399587, "loss": 4.4542, "step": 2531 }, { "epoch": 0.1185323892562467, "grad_norm": 1.3046875, "learning_rate": 0.0001989183066663789, "loss": 4.0134, "step": 2532 }, { "epoch": 0.11857920299607935, "grad_norm": 1.421875, "learning_rate": 0.00019891745024170352, "loss": 3.8498, "step": 2533 }, { "epoch": 0.11862601673591198, "grad_norm": 1.5390625, "learning_rate": 0.00019891659347997265, "loss": 3.943, "step": 2534 }, { "epoch": 0.11867283047574463, "grad_norm": 1.4765625, "learning_rate": 0.0001989157363811892, "loss": 4.1106, "step": 2535 }, { "epoch": 0.11871964421557728, "grad_norm": 1.3125, "learning_rate": 0.0001989148789453561, "loss": 3.9921, "step": 2536 }, { "epoch": 0.11876645795540991, "grad_norm": 1.3984375, "learning_rate": 0.00019891402117247627, "loss": 4.0519, "step": 2537 }, { "epoch": 0.11881327169524256, "grad_norm": 1.2265625, "learning_rate": 0.00019891316306255263, "loss": 4.0343, "step": 2538 }, { "epoch": 0.11886008543507519, "grad_norm": 1.4453125, "learning_rate": 0.0001989123046155881, "loss": 4.3088, "step": 2539 }, { "epoch": 0.11890689917490783, "grad_norm": 1.484375, "learning_rate": 0.00019891144583158562, "loss": 4.1511, "step": 2540 }, { "epoch": 0.11895371291474048, "grad_norm": 1.59375, "learning_rate": 0.00019891058671054812, "loss": 4.1974, "step": 2541 }, { "epoch": 0.11900052665457311, "grad_norm": 1.21875, "learning_rate": 0.00019890972725247847, "loss": 4.105, "step": 2542 }, { "epoch": 0.11904734039440576, "grad_norm": 1.2421875, "learning_rate": 0.00019890886745737966, "loss": 3.8439, "step": 2543 }, { "epoch": 0.1190941541342384, "grad_norm": 1.5, "learning_rate": 0.0001989080073252546, "loss": 4.1044, "step": 2544 }, { "epoch": 0.11914096787407104, "grad_norm": 1.03125, "learning_rate": 0.00019890714685610625, "loss": 2.7406, "step": 2545 }, { "epoch": 0.11918778161390368, "grad_norm": 1.28125, "learning_rate": 0.00019890628604993748, "loss": 3.9054, "step": 2546 }, { "epoch": 0.11923459535373632, "grad_norm": 1.171875, "learning_rate": 0.00019890542490675128, "loss": 4.1054, "step": 2547 }, { "epoch": 0.11928140909356896, "grad_norm": 1.390625, "learning_rate": 0.00019890456342655055, "loss": 4.0368, "step": 2548 }, { "epoch": 0.11932822283340161, "grad_norm": 1.3984375, "learning_rate": 0.00019890370160933823, "loss": 3.8384, "step": 2549 }, { "epoch": 0.11937503657323424, "grad_norm": 1.4453125, "learning_rate": 0.00019890283945511725, "loss": 3.7535, "step": 2550 }, { "epoch": 0.11942185031306689, "grad_norm": 1.765625, "learning_rate": 0.0001989019769638906, "loss": 4.0065, "step": 2551 }, { "epoch": 0.11946866405289952, "grad_norm": 1.5625, "learning_rate": 0.00019890111413566115, "loss": 3.3341, "step": 2552 }, { "epoch": 0.11951547779273217, "grad_norm": 1.515625, "learning_rate": 0.0001989002509704319, "loss": 3.8758, "step": 2553 }, { "epoch": 0.11956229153256481, "grad_norm": 3.125, "learning_rate": 0.00019889938746820574, "loss": 3.6436, "step": 2554 }, { "epoch": 0.11960910527239745, "grad_norm": 1.5234375, "learning_rate": 0.00019889852362898564, "loss": 3.5532, "step": 2555 }, { "epoch": 0.11965591901223009, "grad_norm": 1.28125, "learning_rate": 0.00019889765945277455, "loss": 3.8557, "step": 2556 }, { "epoch": 0.11970273275206272, "grad_norm": 1.2578125, "learning_rate": 0.00019889679493957538, "loss": 4.0671, "step": 2557 }, { "epoch": 0.11974954649189537, "grad_norm": 1.2578125, "learning_rate": 0.0001988959300893911, "loss": 3.8673, "step": 2558 }, { "epoch": 0.11979636023172802, "grad_norm": 1.4453125, "learning_rate": 0.00019889506490222467, "loss": 3.9627, "step": 2559 }, { "epoch": 0.11984317397156065, "grad_norm": 1.1484375, "learning_rate": 0.00019889419937807898, "loss": 2.8467, "step": 2560 }, { "epoch": 0.1198899877113933, "grad_norm": 1.4453125, "learning_rate": 0.00019889333351695706, "loss": 3.4061, "step": 2561 }, { "epoch": 0.11993680145122594, "grad_norm": 2.5, "learning_rate": 0.00019889246731886182, "loss": 3.8806, "step": 2562 }, { "epoch": 0.11998361519105857, "grad_norm": 1.3984375, "learning_rate": 0.0001988916007837962, "loss": 3.8956, "step": 2563 }, { "epoch": 0.12003042893089122, "grad_norm": 1.5078125, "learning_rate": 0.00019889073391176322, "loss": 4.2947, "step": 2564 }, { "epoch": 0.12007724267072385, "grad_norm": 1.296875, "learning_rate": 0.0001988898667027657, "loss": 3.7339, "step": 2565 }, { "epoch": 0.1201240564105565, "grad_norm": 1.3359375, "learning_rate": 0.00019888899915680673, "loss": 3.3512, "step": 2566 }, { "epoch": 0.12017087015038914, "grad_norm": 1.171875, "learning_rate": 0.00019888813127388918, "loss": 2.8293, "step": 2567 }, { "epoch": 0.12021768389022178, "grad_norm": 1.4609375, "learning_rate": 0.0001988872630540161, "loss": 3.9382, "step": 2568 }, { "epoch": 0.12026449763005442, "grad_norm": 1.1015625, "learning_rate": 0.00019888639449719032, "loss": 3.7552, "step": 2569 }, { "epoch": 0.12031131136988706, "grad_norm": 1.2265625, "learning_rate": 0.00019888552560341487, "loss": 3.5578, "step": 2570 }, { "epoch": 0.1203581251097197, "grad_norm": 1.3515625, "learning_rate": 0.00019888465637269274, "loss": 3.7882, "step": 2571 }, { "epoch": 0.12040493884955235, "grad_norm": 1.3203125, "learning_rate": 0.00019888378680502685, "loss": 3.5156, "step": 2572 }, { "epoch": 0.12045175258938498, "grad_norm": 1.2109375, "learning_rate": 0.00019888291690042014, "loss": 3.5301, "step": 2573 }, { "epoch": 0.12049856632921763, "grad_norm": 1.3515625, "learning_rate": 0.00019888204665887565, "loss": 3.8753, "step": 2574 }, { "epoch": 0.12054538006905027, "grad_norm": 1.0859375, "learning_rate": 0.00019888117608039626, "loss": 3.6269, "step": 2575 }, { "epoch": 0.1205921938088829, "grad_norm": 1.2890625, "learning_rate": 0.00019888030516498502, "loss": 4.2107, "step": 2576 }, { "epoch": 0.12063900754871555, "grad_norm": 1.6484375, "learning_rate": 0.00019887943391264483, "loss": 4.0204, "step": 2577 }, { "epoch": 0.12068582128854818, "grad_norm": 1.1171875, "learning_rate": 0.0001988785623233787, "loss": 3.9251, "step": 2578 }, { "epoch": 0.12073263502838083, "grad_norm": 1.3359375, "learning_rate": 0.00019887769039718959, "loss": 3.8536, "step": 2579 }, { "epoch": 0.12077944876821348, "grad_norm": 1.2578125, "learning_rate": 0.00019887681813408043, "loss": 3.8311, "step": 2580 }, { "epoch": 0.12082626250804611, "grad_norm": 1.75, "learning_rate": 0.00019887594553405426, "loss": 3.3636, "step": 2581 }, { "epoch": 0.12087307624787876, "grad_norm": 1.53125, "learning_rate": 0.00019887507259711403, "loss": 4.1004, "step": 2582 }, { "epoch": 0.12091988998771139, "grad_norm": 1.46875, "learning_rate": 0.00019887419932326265, "loss": 4.2275, "step": 2583 }, { "epoch": 0.12096670372754403, "grad_norm": 1.4921875, "learning_rate": 0.00019887332571250318, "loss": 4.004, "step": 2584 }, { "epoch": 0.12101351746737668, "grad_norm": 1.3359375, "learning_rate": 0.0001988724517648386, "loss": 4.0581, "step": 2585 }, { "epoch": 0.12106033120720931, "grad_norm": 1.125, "learning_rate": 0.0001988715774802718, "loss": 3.9466, "step": 2586 }, { "epoch": 0.12110714494704196, "grad_norm": 1.2421875, "learning_rate": 0.00019887070285880588, "loss": 3.7477, "step": 2587 }, { "epoch": 0.12115395868687459, "grad_norm": 1.625, "learning_rate": 0.0001988698279004437, "loss": 4.2282, "step": 2588 }, { "epoch": 0.12120077242670724, "grad_norm": 1.6171875, "learning_rate": 0.00019886895260518833, "loss": 4.1349, "step": 2589 }, { "epoch": 0.12124758616653988, "grad_norm": 2.09375, "learning_rate": 0.0001988680769730427, "loss": 4.3011, "step": 2590 }, { "epoch": 0.12129439990637252, "grad_norm": 1.1484375, "learning_rate": 0.00019886720100400983, "loss": 3.6339, "step": 2591 }, { "epoch": 0.12134121364620516, "grad_norm": 1.28125, "learning_rate": 0.00019886632469809268, "loss": 4.2441, "step": 2592 }, { "epoch": 0.12138802738603781, "grad_norm": 3.34375, "learning_rate": 0.00019886544805529423, "loss": 3.706, "step": 2593 }, { "epoch": 0.12143484112587044, "grad_norm": 1.4140625, "learning_rate": 0.0001988645710756175, "loss": 4.0486, "step": 2594 }, { "epoch": 0.12148165486570309, "grad_norm": 1.203125, "learning_rate": 0.0001988636937590654, "loss": 3.838, "step": 2595 }, { "epoch": 0.12152846860553572, "grad_norm": 1.359375, "learning_rate": 0.00019886281610564103, "loss": 3.8013, "step": 2596 }, { "epoch": 0.12157528234536837, "grad_norm": 3.421875, "learning_rate": 0.00019886193811534736, "loss": 4.1639, "step": 2597 }, { "epoch": 0.12162209608520101, "grad_norm": 1.234375, "learning_rate": 0.0001988610597881873, "loss": 3.7948, "step": 2598 }, { "epoch": 0.12166890982503364, "grad_norm": 1.2421875, "learning_rate": 0.0001988601811241639, "loss": 3.9266, "step": 2599 }, { "epoch": 0.12171572356486629, "grad_norm": 1.5546875, "learning_rate": 0.00019885930212328016, "loss": 4.2304, "step": 2600 }, { "epoch": 0.12176253730469892, "grad_norm": 1.3203125, "learning_rate": 0.00019885842278553905, "loss": 3.8294, "step": 2601 }, { "epoch": 0.12180935104453157, "grad_norm": 1.453125, "learning_rate": 0.0001988575431109436, "loss": 3.8629, "step": 2602 }, { "epoch": 0.12185616478436422, "grad_norm": 1.6328125, "learning_rate": 0.00019885666309949677, "loss": 4.1494, "step": 2603 }, { "epoch": 0.12190297852419685, "grad_norm": 1.3125, "learning_rate": 0.0001988557827512016, "loss": 3.9215, "step": 2604 }, { "epoch": 0.1219497922640295, "grad_norm": 1.734375, "learning_rate": 0.000198854902066061, "loss": 4.4447, "step": 2605 }, { "epoch": 0.12199660600386214, "grad_norm": 2.40625, "learning_rate": 0.0001988540210440781, "loss": 3.8299, "step": 2606 }, { "epoch": 0.12204341974369477, "grad_norm": 1.5859375, "learning_rate": 0.0001988531396852558, "loss": 4.2263, "step": 2607 }, { "epoch": 0.12209023348352742, "grad_norm": 1.6796875, "learning_rate": 0.00019885225798959717, "loss": 4.3852, "step": 2608 }, { "epoch": 0.12213704722336005, "grad_norm": 1.2109375, "learning_rate": 0.00019885137595710518, "loss": 3.9097, "step": 2609 }, { "epoch": 0.1221838609631927, "grad_norm": 1.34375, "learning_rate": 0.0001988504935877828, "loss": 3.7092, "step": 2610 }, { "epoch": 0.12223067470302534, "grad_norm": 1.5078125, "learning_rate": 0.0001988496108816331, "loss": 3.934, "step": 2611 }, { "epoch": 0.12227748844285798, "grad_norm": 2.125, "learning_rate": 0.00019884872783865908, "loss": 3.6018, "step": 2612 }, { "epoch": 0.12232430218269062, "grad_norm": 1.25, "learning_rate": 0.00019884784445886372, "loss": 3.8644, "step": 2613 }, { "epoch": 0.12237111592252325, "grad_norm": 1.3046875, "learning_rate": 0.00019884696074225008, "loss": 4.0567, "step": 2614 }, { "epoch": 0.1224179296623559, "grad_norm": 1.6875, "learning_rate": 0.0001988460766888211, "loss": 3.3143, "step": 2615 }, { "epoch": 0.12246474340218855, "grad_norm": 1.53125, "learning_rate": 0.0001988451922985798, "loss": 3.8215, "step": 2616 }, { "epoch": 0.12251155714202118, "grad_norm": 1.2265625, "learning_rate": 0.00019884430757152928, "loss": 4.0052, "step": 2617 }, { "epoch": 0.12255837088185383, "grad_norm": 1.203125, "learning_rate": 0.00019884342250767246, "loss": 3.9014, "step": 2618 }, { "epoch": 0.12260518462168646, "grad_norm": 1.3359375, "learning_rate": 0.0001988425371070124, "loss": 3.7583, "step": 2619 }, { "epoch": 0.1226519983615191, "grad_norm": 1.0703125, "learning_rate": 0.0001988416513695521, "loss": 3.9037, "step": 2620 }, { "epoch": 0.12269881210135175, "grad_norm": 1.2578125, "learning_rate": 0.0001988407652952946, "loss": 3.981, "step": 2621 }, { "epoch": 0.12274562584118438, "grad_norm": 1.8515625, "learning_rate": 0.0001988398788842429, "loss": 3.9071, "step": 2622 }, { "epoch": 0.12279243958101703, "grad_norm": 1.203125, "learning_rate": 0.0001988389921364, "loss": 3.9464, "step": 2623 }, { "epoch": 0.12283925332084968, "grad_norm": 1.2265625, "learning_rate": 0.000198838105051769, "loss": 3.8853, "step": 2624 }, { "epoch": 0.12288606706068231, "grad_norm": 5.21875, "learning_rate": 0.00019883721763035283, "loss": 4.4647, "step": 2625 }, { "epoch": 0.12293288080051495, "grad_norm": 1.265625, "learning_rate": 0.00019883632987215458, "loss": 3.9778, "step": 2626 }, { "epoch": 0.12297969454034759, "grad_norm": 1.5234375, "learning_rate": 0.00019883544177717722, "loss": 3.9399, "step": 2627 }, { "epoch": 0.12302650828018023, "grad_norm": 1.46875, "learning_rate": 0.00019883455334542382, "loss": 3.8594, "step": 2628 }, { "epoch": 0.12307332202001288, "grad_norm": 1.5703125, "learning_rate": 0.00019883366457689738, "loss": 3.8539, "step": 2629 }, { "epoch": 0.12312013575984551, "grad_norm": 1.3046875, "learning_rate": 0.00019883277547160097, "loss": 3.8148, "step": 2630 }, { "epoch": 0.12316694949967816, "grad_norm": 1.296875, "learning_rate": 0.00019883188602953756, "loss": 3.9552, "step": 2631 }, { "epoch": 0.12321376323951079, "grad_norm": 2.265625, "learning_rate": 0.00019883099625071023, "loss": 4.6374, "step": 2632 }, { "epoch": 0.12326057697934344, "grad_norm": 1.46875, "learning_rate": 0.000198830106135122, "loss": 4.0926, "step": 2633 }, { "epoch": 0.12330739071917608, "grad_norm": 1.171875, "learning_rate": 0.00019882921568277587, "loss": 3.6716, "step": 2634 }, { "epoch": 0.12335420445900872, "grad_norm": 1.171875, "learning_rate": 0.0001988283248936749, "loss": 3.7558, "step": 2635 }, { "epoch": 0.12340101819884136, "grad_norm": 1.2734375, "learning_rate": 0.00019882743376782217, "loss": 4.0649, "step": 2636 }, { "epoch": 0.12344783193867401, "grad_norm": 1.453125, "learning_rate": 0.00019882654230522065, "loss": 3.9308, "step": 2637 }, { "epoch": 0.12349464567850664, "grad_norm": 1.2265625, "learning_rate": 0.0001988256505058734, "loss": 3.7247, "step": 2638 }, { "epoch": 0.12354145941833929, "grad_norm": 1.359375, "learning_rate": 0.0001988247583697834, "loss": 3.812, "step": 2639 }, { "epoch": 0.12358827315817192, "grad_norm": 1.5703125, "learning_rate": 0.00019882386589695382, "loss": 3.8266, "step": 2640 }, { "epoch": 0.12363508689800456, "grad_norm": 1.265625, "learning_rate": 0.0001988229730873876, "loss": 3.8943, "step": 2641 }, { "epoch": 0.12368190063783721, "grad_norm": 1.5, "learning_rate": 0.00019882207994108782, "loss": 3.7914, "step": 2642 }, { "epoch": 0.12372871437766984, "grad_norm": 1.140625, "learning_rate": 0.00019882118645805753, "loss": 3.7761, "step": 2643 }, { "epoch": 0.12377552811750249, "grad_norm": 1.078125, "learning_rate": 0.00019882029263829974, "loss": 3.6062, "step": 2644 }, { "epoch": 0.12382234185733512, "grad_norm": 1.484375, "learning_rate": 0.0001988193984818175, "loss": 4.4834, "step": 2645 }, { "epoch": 0.12386915559716777, "grad_norm": 1.2734375, "learning_rate": 0.0001988185039886139, "loss": 3.6698, "step": 2646 }, { "epoch": 0.12391596933700041, "grad_norm": 1.4453125, "learning_rate": 0.00019881760915869194, "loss": 3.9652, "step": 2647 }, { "epoch": 0.12396278307683305, "grad_norm": 1.078125, "learning_rate": 0.0001988167139920547, "loss": 3.7199, "step": 2648 }, { "epoch": 0.1240095968166657, "grad_norm": 1.953125, "learning_rate": 0.0001988158184887052, "loss": 4.251, "step": 2649 }, { "epoch": 0.12405641055649833, "grad_norm": 1.140625, "learning_rate": 0.00019881492264864654, "loss": 4.0412, "step": 2650 }, { "epoch": 0.12410322429633097, "grad_norm": 1.3828125, "learning_rate": 0.00019881402647188172, "loss": 4.2069, "step": 2651 }, { "epoch": 0.12415003803616362, "grad_norm": 1.3984375, "learning_rate": 0.00019881312995841384, "loss": 3.9212, "step": 2652 }, { "epoch": 0.12419685177599625, "grad_norm": 1.3828125, "learning_rate": 0.0001988122331082459, "loss": 4.0308, "step": 2653 }, { "epoch": 0.1242436655158289, "grad_norm": 0.9921875, "learning_rate": 0.000198811335921381, "loss": 3.1462, "step": 2654 }, { "epoch": 0.12429047925566154, "grad_norm": 1.125, "learning_rate": 0.0001988104383978222, "loss": 3.6548, "step": 2655 }, { "epoch": 0.12433729299549418, "grad_norm": 1.5, "learning_rate": 0.00019880954053757256, "loss": 3.7963, "step": 2656 }, { "epoch": 0.12438410673532682, "grad_norm": 1.0703125, "learning_rate": 0.00019880864234063505, "loss": 3.8869, "step": 2657 }, { "epoch": 0.12443092047515945, "grad_norm": 1.15625, "learning_rate": 0.00019880774380701287, "loss": 3.4943, "step": 2658 }, { "epoch": 0.1244777342149921, "grad_norm": 1.3203125, "learning_rate": 0.00019880684493670897, "loss": 3.6373, "step": 2659 }, { "epoch": 0.12452454795482475, "grad_norm": 2.140625, "learning_rate": 0.0001988059457297265, "loss": 3.8833, "step": 2660 }, { "epoch": 0.12457136169465738, "grad_norm": 1.1796875, "learning_rate": 0.00019880504618606843, "loss": 3.8638, "step": 2661 }, { "epoch": 0.12461817543449002, "grad_norm": 1.203125, "learning_rate": 0.0001988041463057379, "loss": 3.6353, "step": 2662 }, { "epoch": 0.12466498917432266, "grad_norm": 1.15625, "learning_rate": 0.00019880324608873798, "loss": 3.6438, "step": 2663 }, { "epoch": 0.1247118029141553, "grad_norm": 1.0, "learning_rate": 0.00019880234553507168, "loss": 2.8163, "step": 2664 }, { "epoch": 0.12475861665398795, "grad_norm": 1.421875, "learning_rate": 0.0001988014446447421, "loss": 4.1005, "step": 2665 }, { "epoch": 0.12480543039382058, "grad_norm": 1.4921875, "learning_rate": 0.00019880054341775232, "loss": 3.8858, "step": 2666 }, { "epoch": 0.12485224413365323, "grad_norm": 1.28125, "learning_rate": 0.0001987996418541054, "loss": 4.0019, "step": 2667 }, { "epoch": 0.12489905787348587, "grad_norm": 1.3671875, "learning_rate": 0.0001987987399538044, "loss": 3.6683, "step": 2668 }, { "epoch": 0.12494587161331851, "grad_norm": 1.03125, "learning_rate": 0.00019879783771685242, "loss": 4.113, "step": 2669 }, { "epoch": 0.12499268535315115, "grad_norm": 5.25, "learning_rate": 0.0001987969351432525, "loss": 5.7916, "step": 2670 }, { "epoch": 0.12503949909298379, "grad_norm": 1.234375, "learning_rate": 0.00019879603223300777, "loss": 4.04, "step": 2671 }, { "epoch": 0.12508631283281643, "grad_norm": 1.5859375, "learning_rate": 0.00019879512898612122, "loss": 3.9589, "step": 2672 }, { "epoch": 0.12513312657264908, "grad_norm": 1.8046875, "learning_rate": 0.000198794225402596, "loss": 3.966, "step": 2673 }, { "epoch": 0.12517994031248172, "grad_norm": 1.1328125, "learning_rate": 0.00019879332148243516, "loss": 4.0915, "step": 2674 }, { "epoch": 0.12522675405231434, "grad_norm": 1.71875, "learning_rate": 0.0001987924172256418, "loss": 3.7386, "step": 2675 }, { "epoch": 0.125273567792147, "grad_norm": 1.6328125, "learning_rate": 0.000198791512632219, "loss": 3.9213, "step": 2676 }, { "epoch": 0.12532038153197964, "grad_norm": 1.484375, "learning_rate": 0.00019879060770216982, "loss": 4.2616, "step": 2677 }, { "epoch": 0.12536719527181228, "grad_norm": 1.3515625, "learning_rate": 0.00019878970243549735, "loss": 4.0375, "step": 2678 }, { "epoch": 0.12541400901164493, "grad_norm": 1.578125, "learning_rate": 0.0001987887968322047, "loss": 3.6529, "step": 2679 }, { "epoch": 0.12546082275147755, "grad_norm": 1.484375, "learning_rate": 0.00019878789089229488, "loss": 3.4626, "step": 2680 }, { "epoch": 0.1255076364913102, "grad_norm": 1.40625, "learning_rate": 0.00019878698461577108, "loss": 3.9374, "step": 2681 }, { "epoch": 0.12555445023114284, "grad_norm": 1.3203125, "learning_rate": 0.00019878607800263634, "loss": 3.8787, "step": 2682 }, { "epoch": 0.12560126397097549, "grad_norm": 1.640625, "learning_rate": 0.0001987851710528937, "loss": 4.2952, "step": 2683 }, { "epoch": 0.12564807771080813, "grad_norm": 1.2109375, "learning_rate": 0.00019878426376654636, "loss": 4.6619, "step": 2684 }, { "epoch": 0.12569489145064075, "grad_norm": 1.9296875, "learning_rate": 0.00019878335614359734, "loss": 4.3377, "step": 2685 }, { "epoch": 0.1257417051904734, "grad_norm": 1.515625, "learning_rate": 0.0001987824481840497, "loss": 3.9132, "step": 2686 }, { "epoch": 0.12578851893030604, "grad_norm": 1.28125, "learning_rate": 0.00019878153988790663, "loss": 4.0987, "step": 2687 }, { "epoch": 0.1258353326701387, "grad_norm": 1.296875, "learning_rate": 0.00019878063125517117, "loss": 3.8276, "step": 2688 }, { "epoch": 0.12588214640997133, "grad_norm": 1.4296875, "learning_rate": 0.00019877972228584637, "loss": 4.1228, "step": 2689 }, { "epoch": 0.12592896014980398, "grad_norm": 1.15625, "learning_rate": 0.00019877881297993542, "loss": 3.832, "step": 2690 }, { "epoch": 0.1259757738896366, "grad_norm": 1.3984375, "learning_rate": 0.0001987779033374414, "loss": 3.6165, "step": 2691 }, { "epoch": 0.12602258762946925, "grad_norm": 1.2578125, "learning_rate": 0.00019877699335836733, "loss": 4.0593, "step": 2692 }, { "epoch": 0.1260694013693019, "grad_norm": 1.4609375, "learning_rate": 0.00019877608304271638, "loss": 3.9345, "step": 2693 }, { "epoch": 0.12611621510913454, "grad_norm": 1.2578125, "learning_rate": 0.00019877517239049163, "loss": 3.9836, "step": 2694 }, { "epoch": 0.12616302884896718, "grad_norm": 1.1796875, "learning_rate": 0.0001987742614016962, "loss": 4.2487, "step": 2695 }, { "epoch": 0.1262098425887998, "grad_norm": 1.3046875, "learning_rate": 0.00019877335007633317, "loss": 3.7416, "step": 2696 }, { "epoch": 0.12625665632863245, "grad_norm": 1.703125, "learning_rate": 0.00019877243841440566, "loss": 4.1271, "step": 2697 }, { "epoch": 0.1263034700684651, "grad_norm": 1.546875, "learning_rate": 0.0001987715264159168, "loss": 4.0804, "step": 2698 }, { "epoch": 0.12635028380829774, "grad_norm": 1.4609375, "learning_rate": 0.00019877061408086965, "loss": 3.8248, "step": 2699 }, { "epoch": 0.1263970975481304, "grad_norm": 1.2421875, "learning_rate": 0.00019876970140926737, "loss": 4.015, "step": 2700 }, { "epoch": 0.126443911287963, "grad_norm": 1.3984375, "learning_rate": 0.000198768788401113, "loss": 4.0403, "step": 2701 }, { "epoch": 0.12649072502779565, "grad_norm": 1.828125, "learning_rate": 0.00019876787505640971, "loss": 4.0424, "step": 2702 }, { "epoch": 0.1265375387676283, "grad_norm": 1.609375, "learning_rate": 0.0001987669613751606, "loss": 3.845, "step": 2703 }, { "epoch": 0.12658435250746095, "grad_norm": 1.1953125, "learning_rate": 0.0001987660473573688, "loss": 4.0208, "step": 2704 }, { "epoch": 0.1266311662472936, "grad_norm": 1.625, "learning_rate": 0.00019876513300303737, "loss": 2.8584, "step": 2705 }, { "epoch": 0.1266779799871262, "grad_norm": 1.7109375, "learning_rate": 0.00019876421831216946, "loss": 4.2726, "step": 2706 }, { "epoch": 0.12672479372695886, "grad_norm": 1.4453125, "learning_rate": 0.00019876330328476817, "loss": 3.7807, "step": 2707 }, { "epoch": 0.1267716074667915, "grad_norm": 1.2578125, "learning_rate": 0.00019876238792083668, "loss": 4.0276, "step": 2708 }, { "epoch": 0.12681842120662415, "grad_norm": 1.1640625, "learning_rate": 0.00019876147222037802, "loss": 3.4788, "step": 2709 }, { "epoch": 0.1268652349464568, "grad_norm": 1.2890625, "learning_rate": 0.0001987605561833954, "loss": 3.9531, "step": 2710 }, { "epoch": 0.1269120486862894, "grad_norm": 1.453125, "learning_rate": 0.00019875963980989182, "loss": 3.8392, "step": 2711 }, { "epoch": 0.12695886242612206, "grad_norm": 1.2734375, "learning_rate": 0.00019875872309987056, "loss": 3.8826, "step": 2712 }, { "epoch": 0.1270056761659547, "grad_norm": 1.234375, "learning_rate": 0.0001987578060533346, "loss": 3.7797, "step": 2713 }, { "epoch": 0.12705248990578735, "grad_norm": 1.4375, "learning_rate": 0.00019875688867028714, "loss": 3.6945, "step": 2714 }, { "epoch": 0.12709930364562, "grad_norm": 1.2421875, "learning_rate": 0.0001987559709507313, "loss": 3.2299, "step": 2715 }, { "epoch": 0.12714611738545262, "grad_norm": 1.3125, "learning_rate": 0.00019875505289467017, "loss": 4.0244, "step": 2716 }, { "epoch": 0.12719293112528526, "grad_norm": 1.8984375, "learning_rate": 0.00019875413450210695, "loss": 4.3442, "step": 2717 }, { "epoch": 0.1272397448651179, "grad_norm": 1.4375, "learning_rate": 0.0001987532157730447, "loss": 3.4975, "step": 2718 }, { "epoch": 0.12728655860495056, "grad_norm": 1.4375, "learning_rate": 0.00019875229670748658, "loss": 4.0891, "step": 2719 }, { "epoch": 0.1273333723447832, "grad_norm": 1.5234375, "learning_rate": 0.0001987513773054357, "loss": 5.0288, "step": 2720 }, { "epoch": 0.12738018608461585, "grad_norm": 1.1875, "learning_rate": 0.00019875045756689524, "loss": 3.8303, "step": 2721 }, { "epoch": 0.12742699982444847, "grad_norm": 1.3046875, "learning_rate": 0.00019874953749186828, "loss": 3.6886, "step": 2722 }, { "epoch": 0.1274738135642811, "grad_norm": 1.6875, "learning_rate": 0.00019874861708035798, "loss": 4.0817, "step": 2723 }, { "epoch": 0.12752062730411376, "grad_norm": 1.1171875, "learning_rate": 0.00019874769633236749, "loss": 4.9672, "step": 2724 }, { "epoch": 0.1275674410439464, "grad_norm": 1.9609375, "learning_rate": 0.00019874677524789992, "loss": 3.5263, "step": 2725 }, { "epoch": 0.12761425478377905, "grad_norm": 1.3203125, "learning_rate": 0.00019874585382695842, "loss": 3.778, "step": 2726 }, { "epoch": 0.12766106852361167, "grad_norm": 1.53125, "learning_rate": 0.00019874493206954613, "loss": 3.9686, "step": 2727 }, { "epoch": 0.12770788226344432, "grad_norm": 1.46875, "learning_rate": 0.0001987440099756662, "loss": 3.8909, "step": 2728 }, { "epoch": 0.12775469600327696, "grad_norm": 1.140625, "learning_rate": 0.00019874308754532177, "loss": 4.0388, "step": 2729 }, { "epoch": 0.1278015097431096, "grad_norm": 1.28125, "learning_rate": 0.00019874216477851596, "loss": 4.1795, "step": 2730 }, { "epoch": 0.12784832348294226, "grad_norm": 1.2578125, "learning_rate": 0.00019874124167525194, "loss": 3.7149, "step": 2731 }, { "epoch": 0.12789513722277487, "grad_norm": 1.3515625, "learning_rate": 0.00019874031823553285, "loss": 3.8552, "step": 2732 }, { "epoch": 0.12794195096260752, "grad_norm": 1.1015625, "learning_rate": 0.00019873939445936182, "loss": 3.9889, "step": 2733 }, { "epoch": 0.12798876470244017, "grad_norm": 1.640625, "learning_rate": 0.00019873847034674206, "loss": 4.3396, "step": 2734 }, { "epoch": 0.1280355784422728, "grad_norm": 1.375, "learning_rate": 0.0001987375458976766, "loss": 3.4114, "step": 2735 }, { "epoch": 0.12808239218210546, "grad_norm": 1.1171875, "learning_rate": 0.0001987366211121687, "loss": 3.848, "step": 2736 }, { "epoch": 0.12812920592193808, "grad_norm": 1.3515625, "learning_rate": 0.00019873569599022144, "loss": 3.4558, "step": 2737 }, { "epoch": 0.12817601966177072, "grad_norm": 1.3671875, "learning_rate": 0.00019873477053183804, "loss": 4.0999, "step": 2738 }, { "epoch": 0.12822283340160337, "grad_norm": 1.2890625, "learning_rate": 0.00019873384473702158, "loss": 3.6177, "step": 2739 }, { "epoch": 0.12826964714143602, "grad_norm": 1.578125, "learning_rate": 0.0001987329186057753, "loss": 3.9148, "step": 2740 }, { "epoch": 0.12831646088126866, "grad_norm": 1.421875, "learning_rate": 0.00019873199213810226, "loss": 3.7226, "step": 2741 }, { "epoch": 0.12836327462110128, "grad_norm": 1.328125, "learning_rate": 0.00019873106533400568, "loss": 3.5757, "step": 2742 }, { "epoch": 0.12841008836093393, "grad_norm": 1.3515625, "learning_rate": 0.00019873013819348868, "loss": 3.1828, "step": 2743 }, { "epoch": 0.12845690210076657, "grad_norm": 1.2890625, "learning_rate": 0.0001987292107165545, "loss": 3.6067, "step": 2744 }, { "epoch": 0.12850371584059922, "grad_norm": 1.15625, "learning_rate": 0.0001987282829032062, "loss": 3.6294, "step": 2745 }, { "epoch": 0.12855052958043187, "grad_norm": 1.6328125, "learning_rate": 0.000198727354753447, "loss": 3.6757, "step": 2746 }, { "epoch": 0.12859734332026448, "grad_norm": 1.484375, "learning_rate": 0.00019872642626728003, "loss": 3.6698, "step": 2747 }, { "epoch": 0.12864415706009713, "grad_norm": 1.6484375, "learning_rate": 0.00019872549744470846, "loss": 3.5787, "step": 2748 }, { "epoch": 0.12869097079992978, "grad_norm": 1.3125, "learning_rate": 0.0001987245682857355, "loss": 4.0896, "step": 2749 }, { "epoch": 0.12873778453976242, "grad_norm": 1.796875, "learning_rate": 0.00019872363879036423, "loss": 3.7771, "step": 2750 }, { "epoch": 0.12878459827959507, "grad_norm": 1.515625, "learning_rate": 0.0001987227089585979, "loss": 4.1492, "step": 2751 }, { "epoch": 0.12883141201942772, "grad_norm": 1.46875, "learning_rate": 0.00019872177879043962, "loss": 3.5856, "step": 2752 }, { "epoch": 0.12887822575926033, "grad_norm": 1.546875, "learning_rate": 0.00019872084828589261, "loss": 4.2352, "step": 2753 }, { "epoch": 0.12892503949909298, "grad_norm": 1.34375, "learning_rate": 0.00019871991744496002, "loss": 3.8251, "step": 2754 }, { "epoch": 0.12897185323892563, "grad_norm": 1.171875, "learning_rate": 0.00019871898626764502, "loss": 3.8559, "step": 2755 }, { "epoch": 0.12901866697875827, "grad_norm": 1.1328125, "learning_rate": 0.00019871805475395077, "loss": 3.8348, "step": 2756 }, { "epoch": 0.12906548071859092, "grad_norm": 1.3203125, "learning_rate": 0.00019871712290388046, "loss": 3.4638, "step": 2757 }, { "epoch": 0.12911229445842354, "grad_norm": 1.6875, "learning_rate": 0.00019871619071743724, "loss": 3.8615, "step": 2758 }, { "epoch": 0.12915910819825618, "grad_norm": 1.4609375, "learning_rate": 0.00019871525819462432, "loss": 3.972, "step": 2759 }, { "epoch": 0.12920592193808883, "grad_norm": 1.3515625, "learning_rate": 0.00019871432533544487, "loss": 3.7014, "step": 2760 }, { "epoch": 0.12925273567792148, "grad_norm": 1.5, "learning_rate": 0.00019871339213990203, "loss": 3.9511, "step": 2761 }, { "epoch": 0.12929954941775412, "grad_norm": 1.1796875, "learning_rate": 0.00019871245860799905, "loss": 3.6017, "step": 2762 }, { "epoch": 0.12934636315758674, "grad_norm": 1.46875, "learning_rate": 0.00019871152473973906, "loss": 4.0802, "step": 2763 }, { "epoch": 0.1293931768974194, "grad_norm": 1.4375, "learning_rate": 0.00019871059053512528, "loss": 3.8885, "step": 2764 }, { "epoch": 0.12943999063725203, "grad_norm": 1.1953125, "learning_rate": 0.00019870965599416083, "loss": 3.7006, "step": 2765 }, { "epoch": 0.12948680437708468, "grad_norm": 1.4375, "learning_rate": 0.00019870872111684893, "loss": 4.1696, "step": 2766 }, { "epoch": 0.12953361811691733, "grad_norm": 1.796875, "learning_rate": 0.00019870778590319282, "loss": 3.7468, "step": 2767 }, { "epoch": 0.12958043185674994, "grad_norm": 1.234375, "learning_rate": 0.0001987068503531956, "loss": 3.8024, "step": 2768 }, { "epoch": 0.1296272455965826, "grad_norm": 1.734375, "learning_rate": 0.00019870591446686047, "loss": 3.7748, "step": 2769 }, { "epoch": 0.12967405933641524, "grad_norm": 1.609375, "learning_rate": 0.00019870497824419066, "loss": 3.6958, "step": 2770 }, { "epoch": 0.12972087307624788, "grad_norm": 1.375, "learning_rate": 0.00019870404168518936, "loss": 3.7702, "step": 2771 }, { "epoch": 0.12976768681608053, "grad_norm": 1.625, "learning_rate": 0.00019870310478985973, "loss": 3.6131, "step": 2772 }, { "epoch": 0.12981450055591315, "grad_norm": 1.2109375, "learning_rate": 0.00019870216755820498, "loss": 3.419, "step": 2773 }, { "epoch": 0.1298613142957458, "grad_norm": 1.421875, "learning_rate": 0.0001987012299902283, "loss": 4.3795, "step": 2774 }, { "epoch": 0.12990812803557844, "grad_norm": 1.5390625, "learning_rate": 0.0001987002920859329, "loss": 3.8949, "step": 2775 }, { "epoch": 0.1299549417754111, "grad_norm": 1.203125, "learning_rate": 0.0001986993538453219, "loss": 5.8596, "step": 2776 }, { "epoch": 0.13000175551524373, "grad_norm": 1.7578125, "learning_rate": 0.0001986984152683986, "loss": 3.6267, "step": 2777 }, { "epoch": 0.13004856925507635, "grad_norm": 1.5078125, "learning_rate": 0.00019869747635516615, "loss": 4.1544, "step": 2778 }, { "epoch": 0.130095382994909, "grad_norm": 1.1953125, "learning_rate": 0.00019869653710562777, "loss": 3.7919, "step": 2779 }, { "epoch": 0.13014219673474164, "grad_norm": 1.3046875, "learning_rate": 0.00019869559751978664, "loss": 3.8029, "step": 2780 }, { "epoch": 0.1301890104745743, "grad_norm": 1.5625, "learning_rate": 0.00019869465759764595, "loss": 3.8171, "step": 2781 }, { "epoch": 0.13023582421440694, "grad_norm": 1.671875, "learning_rate": 0.00019869371733920895, "loss": 4.3021, "step": 2782 }, { "epoch": 0.13028263795423958, "grad_norm": 1.3359375, "learning_rate": 0.00019869277674447878, "loss": 3.659, "step": 2783 }, { "epoch": 0.1303294516940722, "grad_norm": 1.390625, "learning_rate": 0.00019869183581345867, "loss": 4.1824, "step": 2784 }, { "epoch": 0.13037626543390485, "grad_norm": 1.2421875, "learning_rate": 0.00019869089454615187, "loss": 3.5557, "step": 2785 }, { "epoch": 0.1304230791737375, "grad_norm": 1.2734375, "learning_rate": 0.00019868995294256153, "loss": 3.5611, "step": 2786 }, { "epoch": 0.13046989291357014, "grad_norm": 1.34375, "learning_rate": 0.00019868901100269089, "loss": 4.0577, "step": 2787 }, { "epoch": 0.1305167066534028, "grad_norm": 1.3671875, "learning_rate": 0.00019868806872654313, "loss": 4.037, "step": 2788 }, { "epoch": 0.1305635203932354, "grad_norm": 1.171875, "learning_rate": 0.00019868712611412147, "loss": 3.8716, "step": 2789 }, { "epoch": 0.13061033413306805, "grad_norm": 1.15625, "learning_rate": 0.00019868618316542917, "loss": 3.6805, "step": 2790 }, { "epoch": 0.1306571478729007, "grad_norm": 1.2578125, "learning_rate": 0.0001986852398804694, "loss": 3.7825, "step": 2791 }, { "epoch": 0.13070396161273334, "grad_norm": 1.234375, "learning_rate": 0.00019868429625924535, "loss": 3.7793, "step": 2792 }, { "epoch": 0.130750775352566, "grad_norm": 1.5234375, "learning_rate": 0.00019868335230176028, "loss": 3.8415, "step": 2793 }, { "epoch": 0.1307975890923986, "grad_norm": 1.953125, "learning_rate": 0.00019868240800801742, "loss": 4.0164, "step": 2794 }, { "epoch": 0.13084440283223125, "grad_norm": 1.09375, "learning_rate": 0.00019868146337801993, "loss": 3.837, "step": 2795 }, { "epoch": 0.1308912165720639, "grad_norm": 1.84375, "learning_rate": 0.00019868051841177105, "loss": 4.0389, "step": 2796 }, { "epoch": 0.13093803031189655, "grad_norm": 1.34375, "learning_rate": 0.00019867957310927404, "loss": 3.8527, "step": 2797 }, { "epoch": 0.1309848440517292, "grad_norm": 2.078125, "learning_rate": 0.00019867862747053206, "loss": 3.6836, "step": 2798 }, { "epoch": 0.1310316577915618, "grad_norm": 1.4921875, "learning_rate": 0.00019867768149554836, "loss": 3.5688, "step": 2799 }, { "epoch": 0.13107847153139446, "grad_norm": 1.796875, "learning_rate": 0.00019867673518432616, "loss": 3.4723, "step": 2800 }, { "epoch": 0.1311252852712271, "grad_norm": 1.2265625, "learning_rate": 0.0001986757885368687, "loss": 4.3053, "step": 2801 }, { "epoch": 0.13117209901105975, "grad_norm": 1.375, "learning_rate": 0.00019867484155317922, "loss": 3.6612, "step": 2802 }, { "epoch": 0.1312189127508924, "grad_norm": 1.453125, "learning_rate": 0.00019867389423326089, "loss": 3.4689, "step": 2803 }, { "epoch": 0.13126572649072502, "grad_norm": 1.4140625, "learning_rate": 0.00019867294657711698, "loss": 4.0209, "step": 2804 }, { "epoch": 0.13131254023055766, "grad_norm": 1.1796875, "learning_rate": 0.0001986719985847507, "loss": 3.8654, "step": 2805 }, { "epoch": 0.1313593539703903, "grad_norm": 1.1484375, "learning_rate": 0.0001986710502561653, "loss": 3.7218, "step": 2806 }, { "epoch": 0.13140616771022295, "grad_norm": 1.25, "learning_rate": 0.00019867010159136398, "loss": 3.8122, "step": 2807 }, { "epoch": 0.1314529814500556, "grad_norm": 1.3515625, "learning_rate": 0.00019866915259035003, "loss": 3.4605, "step": 2808 }, { "epoch": 0.13149979518988822, "grad_norm": 1.4140625, "learning_rate": 0.0001986682032531266, "loss": 3.6425, "step": 2809 }, { "epoch": 0.13154660892972087, "grad_norm": 1.828125, "learning_rate": 0.000198667253579697, "loss": 3.9495, "step": 2810 }, { "epoch": 0.1315934226695535, "grad_norm": 1.15625, "learning_rate": 0.00019866630357006444, "loss": 3.2256, "step": 2811 }, { "epoch": 0.13164023640938616, "grad_norm": 1.28125, "learning_rate": 0.0001986653532242321, "loss": 3.9776, "step": 2812 }, { "epoch": 0.1316870501492188, "grad_norm": 1.3671875, "learning_rate": 0.00019866440254220333, "loss": 3.7021, "step": 2813 }, { "epoch": 0.13173386388905145, "grad_norm": 1.40625, "learning_rate": 0.0001986634515239813, "loss": 3.6764, "step": 2814 }, { "epoch": 0.13178067762888407, "grad_norm": 1.6875, "learning_rate": 0.00019866250016956926, "loss": 3.8399, "step": 2815 }, { "epoch": 0.13182749136871671, "grad_norm": 1.328125, "learning_rate": 0.00019866154847897045, "loss": 3.9412, "step": 2816 }, { "epoch": 0.13187430510854936, "grad_norm": 1.234375, "learning_rate": 0.00019866059645218812, "loss": 3.7441, "step": 2817 }, { "epoch": 0.131921118848382, "grad_norm": 1.3203125, "learning_rate": 0.00019865964408922551, "loss": 3.6439, "step": 2818 }, { "epoch": 0.13196793258821465, "grad_norm": 1.8046875, "learning_rate": 0.00019865869139008586, "loss": 3.6239, "step": 2819 }, { "epoch": 0.13201474632804727, "grad_norm": 1.6015625, "learning_rate": 0.00019865773835477243, "loss": 3.9422, "step": 2820 }, { "epoch": 0.13206156006787992, "grad_norm": 1.2421875, "learning_rate": 0.00019865678498328846, "loss": 3.7678, "step": 2821 }, { "epoch": 0.13210837380771256, "grad_norm": 1.625, "learning_rate": 0.00019865583127563717, "loss": 3.7013, "step": 2822 }, { "epoch": 0.1321551875475452, "grad_norm": 1.3125, "learning_rate": 0.0001986548772318219, "loss": 3.831, "step": 2823 }, { "epoch": 0.13220200128737786, "grad_norm": 1.234375, "learning_rate": 0.00019865392285184577, "loss": 3.8535, "step": 2824 }, { "epoch": 0.13224881502721048, "grad_norm": 1.6640625, "learning_rate": 0.00019865296813571212, "loss": 3.9461, "step": 2825 }, { "epoch": 0.13229562876704312, "grad_norm": 1.3046875, "learning_rate": 0.0001986520130834242, "loss": 3.9104, "step": 2826 }, { "epoch": 0.13234244250687577, "grad_norm": 1.109375, "learning_rate": 0.00019865105769498526, "loss": 3.9148, "step": 2827 }, { "epoch": 0.13238925624670841, "grad_norm": 1.4296875, "learning_rate": 0.0001986501019703985, "loss": 3.9405, "step": 2828 }, { "epoch": 0.13243606998654106, "grad_norm": 1.2265625, "learning_rate": 0.00019864914590966723, "loss": 3.4719, "step": 2829 }, { "epoch": 0.13248288372637368, "grad_norm": 1.5390625, "learning_rate": 0.00019864818951279473, "loss": 3.542, "step": 2830 }, { "epoch": 0.13252969746620633, "grad_norm": 1.1953125, "learning_rate": 0.00019864723277978417, "loss": 3.7999, "step": 2831 }, { "epoch": 0.13257651120603897, "grad_norm": 1.2109375, "learning_rate": 0.0001986462757106389, "loss": 5.7164, "step": 2832 }, { "epoch": 0.13262332494587162, "grad_norm": 1.4140625, "learning_rate": 0.00019864531830536216, "loss": 3.7125, "step": 2833 }, { "epoch": 0.13267013868570426, "grad_norm": 1.4765625, "learning_rate": 0.0001986443605639572, "loss": 4.1156, "step": 2834 }, { "epoch": 0.13271695242553688, "grad_norm": 1.109375, "learning_rate": 0.00019864340248642727, "loss": 3.6049, "step": 2835 }, { "epoch": 0.13276376616536953, "grad_norm": 1.6015625, "learning_rate": 0.00019864244407277566, "loss": 3.8696, "step": 2836 }, { "epoch": 0.13281057990520218, "grad_norm": 1.84375, "learning_rate": 0.0001986414853230056, "loss": 3.5397, "step": 2837 }, { "epoch": 0.13285739364503482, "grad_norm": 1.203125, "learning_rate": 0.0001986405262371204, "loss": 3.9112, "step": 2838 }, { "epoch": 0.13290420738486747, "grad_norm": 1.4453125, "learning_rate": 0.00019863956681512331, "loss": 3.696, "step": 2839 }, { "epoch": 0.1329510211247001, "grad_norm": 1.4609375, "learning_rate": 0.0001986386070570176, "loss": 3.7361, "step": 2840 }, { "epoch": 0.13299783486453273, "grad_norm": 2.171875, "learning_rate": 0.00019863764696280653, "loss": 3.4043, "step": 2841 }, { "epoch": 0.13304464860436538, "grad_norm": 1.375, "learning_rate": 0.0001986366865324934, "loss": 3.7065, "step": 2842 }, { "epoch": 0.13309146234419802, "grad_norm": 1.1796875, "learning_rate": 0.0001986357257660814, "loss": 3.8411, "step": 2843 }, { "epoch": 0.13313827608403067, "grad_norm": 1.1875, "learning_rate": 0.0001986347646635739, "loss": 3.357, "step": 2844 }, { "epoch": 0.13318508982386332, "grad_norm": 1.21875, "learning_rate": 0.00019863380322497417, "loss": 3.7134, "step": 2845 }, { "epoch": 0.13323190356369594, "grad_norm": 2.046875, "learning_rate": 0.00019863284145028542, "loss": 4.2835, "step": 2846 }, { "epoch": 0.13327871730352858, "grad_norm": 1.2421875, "learning_rate": 0.00019863187933951099, "loss": 3.9097, "step": 2847 }, { "epoch": 0.13332553104336123, "grad_norm": 1.6171875, "learning_rate": 0.00019863091689265413, "loss": 4.5025, "step": 2848 }, { "epoch": 0.13337234478319387, "grad_norm": 1.3515625, "learning_rate": 0.00019862995410971812, "loss": 3.9899, "step": 2849 }, { "epoch": 0.13341915852302652, "grad_norm": 1.3828125, "learning_rate": 0.00019862899099070623, "loss": 4.079, "step": 2850 }, { "epoch": 0.13346597226285914, "grad_norm": 1.296875, "learning_rate": 0.00019862802753562174, "loss": 3.614, "step": 2851 }, { "epoch": 0.13351278600269179, "grad_norm": 1.5859375, "learning_rate": 0.00019862706374446798, "loss": 3.8231, "step": 2852 }, { "epoch": 0.13355959974252443, "grad_norm": 1.3046875, "learning_rate": 0.00019862609961724819, "loss": 3.8257, "step": 2853 }, { "epoch": 0.13360641348235708, "grad_norm": 2.21875, "learning_rate": 0.00019862513515396567, "loss": 4.2681, "step": 2854 }, { "epoch": 0.13365322722218972, "grad_norm": 1.2890625, "learning_rate": 0.00019862417035462372, "loss": 3.5441, "step": 2855 }, { "epoch": 0.13370004096202234, "grad_norm": 1.2109375, "learning_rate": 0.00019862320521922558, "loss": 3.9379, "step": 2856 }, { "epoch": 0.133746854701855, "grad_norm": 1.71875, "learning_rate": 0.00019862223974777455, "loss": 4.1581, "step": 2857 }, { "epoch": 0.13379366844168764, "grad_norm": 1.421875, "learning_rate": 0.00019862127394027396, "loss": 4.108, "step": 2858 }, { "epoch": 0.13384048218152028, "grad_norm": 1.53125, "learning_rate": 0.0001986203077967271, "loss": 3.9354, "step": 2859 }, { "epoch": 0.13388729592135293, "grad_norm": 1.5234375, "learning_rate": 0.0001986193413171372, "loss": 3.5823, "step": 2860 }, { "epoch": 0.13393410966118555, "grad_norm": 1.296875, "learning_rate": 0.00019861837450150765, "loss": 3.663, "step": 2861 }, { "epoch": 0.1339809234010182, "grad_norm": 1.25, "learning_rate": 0.00019861740734984166, "loss": 4.3771, "step": 2862 }, { "epoch": 0.13402773714085084, "grad_norm": 1.28125, "learning_rate": 0.00019861643986214254, "loss": 3.6746, "step": 2863 }, { "epoch": 0.13407455088068349, "grad_norm": 1.71875, "learning_rate": 0.00019861547203841361, "loss": 3.7979, "step": 2864 }, { "epoch": 0.13412136462051613, "grad_norm": 1.515625, "learning_rate": 0.00019861450387865818, "loss": 3.759, "step": 2865 }, { "epoch": 0.13416817836034875, "grad_norm": 1.703125, "learning_rate": 0.0001986135353828795, "loss": 3.7873, "step": 2866 }, { "epoch": 0.1342149921001814, "grad_norm": 1.4140625, "learning_rate": 0.00019861256655108093, "loss": 4.0527, "step": 2867 }, { "epoch": 0.13426180584001404, "grad_norm": 1.4375, "learning_rate": 0.00019861159738326572, "loss": 3.6904, "step": 2868 }, { "epoch": 0.1343086195798467, "grad_norm": 1.1875, "learning_rate": 0.00019861062787943722, "loss": 4.0883, "step": 2869 }, { "epoch": 0.13435543331967933, "grad_norm": 1.3359375, "learning_rate": 0.00019860965803959865, "loss": 4.2812, "step": 2870 }, { "epoch": 0.13440224705951195, "grad_norm": 1.3203125, "learning_rate": 0.00019860868786375342, "loss": 3.8668, "step": 2871 }, { "epoch": 0.1344490607993446, "grad_norm": 2.03125, "learning_rate": 0.00019860771735190474, "loss": 3.9837, "step": 2872 }, { "epoch": 0.13449587453917725, "grad_norm": 1.84375, "learning_rate": 0.000198606746504056, "loss": 3.6139, "step": 2873 }, { "epoch": 0.1345426882790099, "grad_norm": 2.1875, "learning_rate": 0.00019860577532021046, "loss": 3.8181, "step": 2874 }, { "epoch": 0.13458950201884254, "grad_norm": 1.6640625, "learning_rate": 0.00019860480380037143, "loss": 4.2187, "step": 2875 }, { "epoch": 0.13463631575867518, "grad_norm": 1.515625, "learning_rate": 0.00019860383194454225, "loss": 3.9173, "step": 2876 }, { "epoch": 0.1346831294985078, "grad_norm": 1.109375, "learning_rate": 0.0001986028597527262, "loss": 3.9237, "step": 2877 }, { "epoch": 0.13472994323834045, "grad_norm": 1.5, "learning_rate": 0.0001986018872249266, "loss": 3.497, "step": 2878 }, { "epoch": 0.1347767569781731, "grad_norm": 1.65625, "learning_rate": 0.00019860091436114678, "loss": 4.3092, "step": 2879 }, { "epoch": 0.13482357071800574, "grad_norm": 1.328125, "learning_rate": 0.00019859994116139005, "loss": 3.6627, "step": 2880 }, { "epoch": 0.1348703844578384, "grad_norm": 1.1875, "learning_rate": 0.00019859896762565972, "loss": 3.4748, "step": 2881 }, { "epoch": 0.134917198197671, "grad_norm": 1.3671875, "learning_rate": 0.0001985979937539591, "loss": 3.8229, "step": 2882 }, { "epoch": 0.13496401193750365, "grad_norm": 1.375, "learning_rate": 0.0001985970195462915, "loss": 3.7802, "step": 2883 }, { "epoch": 0.1350108256773363, "grad_norm": 1.046875, "learning_rate": 0.00019859604500266026, "loss": 3.6887, "step": 2884 }, { "epoch": 0.13505763941716895, "grad_norm": 1.46875, "learning_rate": 0.0001985950701230687, "loss": 3.7967, "step": 2885 }, { "epoch": 0.1351044531570016, "grad_norm": 1.390625, "learning_rate": 0.00019859409490752015, "loss": 3.7309, "step": 2886 }, { "epoch": 0.1351512668968342, "grad_norm": 1.4453125, "learning_rate": 0.0001985931193560179, "loss": 3.9326, "step": 2887 }, { "epoch": 0.13519808063666686, "grad_norm": 1.9140625, "learning_rate": 0.00019859214346856533, "loss": 3.9641, "step": 2888 }, { "epoch": 0.1352448943764995, "grad_norm": 1.5703125, "learning_rate": 0.00019859116724516568, "loss": 3.9511, "step": 2889 }, { "epoch": 0.13529170811633215, "grad_norm": 1.328125, "learning_rate": 0.00019859019068582236, "loss": 3.4833, "step": 2890 }, { "epoch": 0.1353385218561648, "grad_norm": 1.421875, "learning_rate": 0.00019858921379053865, "loss": 4.1156, "step": 2891 }, { "epoch": 0.1353853355959974, "grad_norm": 1.84375, "learning_rate": 0.0001985882365593179, "loss": 4.2827, "step": 2892 }, { "epoch": 0.13543214933583006, "grad_norm": 1.65625, "learning_rate": 0.00019858725899216345, "loss": 3.566, "step": 2893 }, { "epoch": 0.1354789630756627, "grad_norm": 1.4140625, "learning_rate": 0.0001985862810890786, "loss": 4.1095, "step": 2894 }, { "epoch": 0.13552577681549535, "grad_norm": 1.25, "learning_rate": 0.00019858530285006668, "loss": 4.0504, "step": 2895 }, { "epoch": 0.135572590555328, "grad_norm": 1.3671875, "learning_rate": 0.00019858432427513105, "loss": 3.9051, "step": 2896 }, { "epoch": 0.13561940429516062, "grad_norm": 1.1875, "learning_rate": 0.00019858334536427505, "loss": 4.158, "step": 2897 }, { "epoch": 0.13566621803499326, "grad_norm": 1.203125, "learning_rate": 0.00019858236611750197, "loss": 3.8896, "step": 2898 }, { "epoch": 0.1357130317748259, "grad_norm": 1.3046875, "learning_rate": 0.0001985813865348152, "loss": 4.212, "step": 2899 }, { "epoch": 0.13575984551465856, "grad_norm": 1.4140625, "learning_rate": 0.00019858040661621803, "loss": 3.9943, "step": 2900 }, { "epoch": 0.1358066592544912, "grad_norm": 1.234375, "learning_rate": 0.00019857942636171384, "loss": 4.2548, "step": 2901 }, { "epoch": 0.13585347299432382, "grad_norm": 1.578125, "learning_rate": 0.00019857844577130594, "loss": 4.3372, "step": 2902 }, { "epoch": 0.13590028673415647, "grad_norm": 1.2578125, "learning_rate": 0.00019857746484499772, "loss": 3.8586, "step": 2903 }, { "epoch": 0.1359471004739891, "grad_norm": 1.4375, "learning_rate": 0.00019857648358279244, "loss": 4.3085, "step": 2904 }, { "epoch": 0.13599391421382176, "grad_norm": 1.9765625, "learning_rate": 0.0001985755019846935, "loss": 4.0446, "step": 2905 }, { "epoch": 0.1360407279536544, "grad_norm": 1.5546875, "learning_rate": 0.00019857452005070424, "loss": 3.9333, "step": 2906 }, { "epoch": 0.13608754169348705, "grad_norm": 1.3828125, "learning_rate": 0.00019857353778082802, "loss": 3.8992, "step": 2907 }, { "epoch": 0.13613435543331967, "grad_norm": 1.2421875, "learning_rate": 0.00019857255517506816, "loss": 3.2253, "step": 2908 }, { "epoch": 0.13618116917315232, "grad_norm": 1.8984375, "learning_rate": 0.000198571572233428, "loss": 3.6385, "step": 2909 }, { "epoch": 0.13622798291298496, "grad_norm": 1.2109375, "learning_rate": 0.0001985705889559109, "loss": 3.7177, "step": 2910 }, { "epoch": 0.1362747966528176, "grad_norm": 2.734375, "learning_rate": 0.00019856960534252025, "loss": 3.7551, "step": 2911 }, { "epoch": 0.13632161039265026, "grad_norm": 1.9453125, "learning_rate": 0.00019856862139325934, "loss": 3.3957, "step": 2912 }, { "epoch": 0.13636842413248287, "grad_norm": 1.234375, "learning_rate": 0.00019856763710813155, "loss": 3.7805, "step": 2913 }, { "epoch": 0.13641523787231552, "grad_norm": 1.4375, "learning_rate": 0.00019856665248714026, "loss": 4.1942, "step": 2914 }, { "epoch": 0.13646205161214817, "grad_norm": 1.234375, "learning_rate": 0.00019856566753028877, "loss": 3.5263, "step": 2915 }, { "epoch": 0.1365088653519808, "grad_norm": 1.359375, "learning_rate": 0.00019856468223758048, "loss": 3.7899, "step": 2916 }, { "epoch": 0.13655567909181346, "grad_norm": 1.1484375, "learning_rate": 0.00019856369660901872, "loss": 3.7122, "step": 2917 }, { "epoch": 0.13660249283164608, "grad_norm": 1.28125, "learning_rate": 0.00019856271064460688, "loss": 3.9829, "step": 2918 }, { "epoch": 0.13664930657147872, "grad_norm": 1.5859375, "learning_rate": 0.00019856172434434827, "loss": 3.7407, "step": 2919 }, { "epoch": 0.13669612031131137, "grad_norm": 1.546875, "learning_rate": 0.0001985607377082463, "loss": 4.03, "step": 2920 }, { "epoch": 0.13674293405114402, "grad_norm": 1.5078125, "learning_rate": 0.00019855975073630432, "loss": 3.8279, "step": 2921 }, { "epoch": 0.13678974779097666, "grad_norm": 1.6953125, "learning_rate": 0.00019855876342852568, "loss": 3.6596, "step": 2922 }, { "epoch": 0.13683656153080928, "grad_norm": 1.4609375, "learning_rate": 0.00019855777578491374, "loss": 3.9192, "step": 2923 }, { "epoch": 0.13688337527064193, "grad_norm": 1.5859375, "learning_rate": 0.00019855678780547191, "loss": 3.9164, "step": 2924 }, { "epoch": 0.13693018901047457, "grad_norm": 1.3125, "learning_rate": 0.0001985557994902035, "loss": 3.0842, "step": 2925 }, { "epoch": 0.13697700275030722, "grad_norm": 1.859375, "learning_rate": 0.0001985548108391119, "loss": 3.8163, "step": 2926 }, { "epoch": 0.13702381649013987, "grad_norm": 1.2109375, "learning_rate": 0.00019855382185220047, "loss": 3.9873, "step": 2927 }, { "epoch": 0.13707063022997248, "grad_norm": 1.1640625, "learning_rate": 0.0001985528325294726, "loss": 3.8839, "step": 2928 }, { "epoch": 0.13711744396980513, "grad_norm": 1.3984375, "learning_rate": 0.00019855184287093165, "loss": 3.9003, "step": 2929 }, { "epoch": 0.13716425770963778, "grad_norm": 1.3671875, "learning_rate": 0.000198550852876581, "loss": 3.7415, "step": 2930 }, { "epoch": 0.13721107144947042, "grad_norm": 1.390625, "learning_rate": 0.00019854986254642397, "loss": 3.9922, "step": 2931 }, { "epoch": 0.13725788518930307, "grad_norm": 1.15625, "learning_rate": 0.00019854887188046404, "loss": 3.606, "step": 2932 }, { "epoch": 0.1373046989291357, "grad_norm": 1.1484375, "learning_rate": 0.00019854788087870451, "loss": 4.0171, "step": 2933 }, { "epoch": 0.13735151266896833, "grad_norm": 1.1015625, "learning_rate": 0.00019854688954114874, "loss": 3.952, "step": 2934 }, { "epoch": 0.13739832640880098, "grad_norm": 1.28125, "learning_rate": 0.0001985458978678002, "loss": 3.5601, "step": 2935 }, { "epoch": 0.13744514014863363, "grad_norm": 1.390625, "learning_rate": 0.00019854490585866216, "loss": 4.2582, "step": 2936 }, { "epoch": 0.13749195388846627, "grad_norm": 1.3828125, "learning_rate": 0.00019854391351373804, "loss": 3.7354, "step": 2937 }, { "epoch": 0.13753876762829892, "grad_norm": 1.2578125, "learning_rate": 0.00019854292083303126, "loss": 4.0859, "step": 2938 }, { "epoch": 0.13758558136813154, "grad_norm": 1.3125, "learning_rate": 0.00019854192781654516, "loss": 3.4935, "step": 2939 }, { "epoch": 0.13763239510796418, "grad_norm": 1.0234375, "learning_rate": 0.00019854093446428314, "loss": 3.7147, "step": 2940 }, { "epoch": 0.13767920884779683, "grad_norm": 1.1875, "learning_rate": 0.0001985399407762486, "loss": 3.782, "step": 2941 }, { "epoch": 0.13772602258762948, "grad_norm": 1.25, "learning_rate": 0.0001985389467524449, "loss": 4.0104, "step": 2942 }, { "epoch": 0.13777283632746212, "grad_norm": 1.453125, "learning_rate": 0.00019853795239287543, "loss": 3.968, "step": 2943 }, { "epoch": 0.13781965006729474, "grad_norm": 1.4453125, "learning_rate": 0.00019853695769754355, "loss": 3.9363, "step": 2944 }, { "epoch": 0.1378664638071274, "grad_norm": 1.4140625, "learning_rate": 0.00019853596266645275, "loss": 4.0028, "step": 2945 }, { "epoch": 0.13791327754696003, "grad_norm": 1.4453125, "learning_rate": 0.00019853496729960627, "loss": 3.7266, "step": 2946 }, { "epoch": 0.13796009128679268, "grad_norm": 1.625, "learning_rate": 0.00019853397159700763, "loss": 3.9121, "step": 2947 }, { "epoch": 0.13800690502662533, "grad_norm": 3.5, "learning_rate": 0.00019853297555866018, "loss": 4.2169, "step": 2948 }, { "epoch": 0.13805371876645794, "grad_norm": 1.265625, "learning_rate": 0.0001985319791845673, "loss": 3.6207, "step": 2949 }, { "epoch": 0.1381005325062906, "grad_norm": 1.609375, "learning_rate": 0.00019853098247473244, "loss": 3.5893, "step": 2950 }, { "epoch": 0.13814734624612324, "grad_norm": 1.4765625, "learning_rate": 0.0001985299854291589, "loss": 3.8527, "step": 2951 }, { "epoch": 0.13819415998595588, "grad_norm": 1.3515625, "learning_rate": 0.00019852898804785017, "loss": 4.0466, "step": 2952 }, { "epoch": 0.13824097372578853, "grad_norm": 1.1640625, "learning_rate": 0.00019852799033080956, "loss": 3.839, "step": 2953 }, { "epoch": 0.13828778746562115, "grad_norm": 1.3515625, "learning_rate": 0.00019852699227804055, "loss": 3.4738, "step": 2954 }, { "epoch": 0.1383346012054538, "grad_norm": 1.2578125, "learning_rate": 0.00019852599388954652, "loss": 3.9066, "step": 2955 }, { "epoch": 0.13838141494528644, "grad_norm": 1.0234375, "learning_rate": 0.00019852499516533084, "loss": 3.4401, "step": 2956 }, { "epoch": 0.1384282286851191, "grad_norm": 1.1875, "learning_rate": 0.00019852399610539693, "loss": 3.7441, "step": 2957 }, { "epoch": 0.13847504242495173, "grad_norm": 1.3515625, "learning_rate": 0.0001985229967097482, "loss": 4.3751, "step": 2958 }, { "epoch": 0.13852185616478435, "grad_norm": 1.328125, "learning_rate": 0.00019852199697838806, "loss": 3.9316, "step": 2959 }, { "epoch": 0.138568669904617, "grad_norm": 1.234375, "learning_rate": 0.0001985209969113199, "loss": 3.7296, "step": 2960 }, { "epoch": 0.13861548364444964, "grad_norm": 1.1796875, "learning_rate": 0.00019851999650854715, "loss": 3.7156, "step": 2961 }, { "epoch": 0.1386622973842823, "grad_norm": 1.28125, "learning_rate": 0.0001985189957700732, "loss": 4.3427, "step": 2962 }, { "epoch": 0.13870911112411494, "grad_norm": 1.859375, "learning_rate": 0.0001985179946959015, "loss": 4.0308, "step": 2963 }, { "epoch": 0.13875592486394756, "grad_norm": 2.234375, "learning_rate": 0.00019851699328603538, "loss": 3.8776, "step": 2964 }, { "epoch": 0.1388027386037802, "grad_norm": 1.8203125, "learning_rate": 0.0001985159915404783, "loss": 3.9457, "step": 2965 }, { "epoch": 0.13884955234361285, "grad_norm": 1.59375, "learning_rate": 0.0001985149894592337, "loss": 3.4767, "step": 2966 }, { "epoch": 0.1388963660834455, "grad_norm": 1.171875, "learning_rate": 0.00019851398704230495, "loss": 3.5774, "step": 2967 }, { "epoch": 0.13894317982327814, "grad_norm": 1.296875, "learning_rate": 0.0001985129842896955, "loss": 3.4624, "step": 2968 }, { "epoch": 0.1389899935631108, "grad_norm": 1.375, "learning_rate": 0.00019851198120140874, "loss": 3.885, "step": 2969 }, { "epoch": 0.1390368073029434, "grad_norm": 1.203125, "learning_rate": 0.0001985109777774481, "loss": 4.3242, "step": 2970 }, { "epoch": 0.13908362104277605, "grad_norm": 1.421875, "learning_rate": 0.00019850997401781698, "loss": 3.8149, "step": 2971 }, { "epoch": 0.1391304347826087, "grad_norm": 1.21875, "learning_rate": 0.00019850896992251886, "loss": 3.4665, "step": 2972 }, { "epoch": 0.13917724852244134, "grad_norm": 1.3671875, "learning_rate": 0.0001985079654915571, "loss": 3.8858, "step": 2973 }, { "epoch": 0.139224062262274, "grad_norm": 1.234375, "learning_rate": 0.00019850696072493514, "loss": 3.6603, "step": 2974 }, { "epoch": 0.1392708760021066, "grad_norm": 1.546875, "learning_rate": 0.0001985059556226564, "loss": 4.2908, "step": 2975 }, { "epoch": 0.13931768974193925, "grad_norm": 1.6484375, "learning_rate": 0.00019850495018472431, "loss": 3.7787, "step": 2976 }, { "epoch": 0.1393645034817719, "grad_norm": 1.1953125, "learning_rate": 0.00019850394441114227, "loss": 3.808, "step": 2977 }, { "epoch": 0.13941131722160455, "grad_norm": 1.515625, "learning_rate": 0.00019850293830191376, "loss": 3.9503, "step": 2978 }, { "epoch": 0.1394581309614372, "grad_norm": 1.390625, "learning_rate": 0.0001985019318570422, "loss": 3.8058, "step": 2979 }, { "epoch": 0.1395049447012698, "grad_norm": 1.5390625, "learning_rate": 0.000198500925076531, "loss": 3.5317, "step": 2980 }, { "epoch": 0.13955175844110246, "grad_norm": 1.1328125, "learning_rate": 0.00019849991796038355, "loss": 3.8023, "step": 2981 }, { "epoch": 0.1395985721809351, "grad_norm": 1.3515625, "learning_rate": 0.00019849891050860338, "loss": 3.7288, "step": 2982 }, { "epoch": 0.13964538592076775, "grad_norm": 1.171875, "learning_rate": 0.0001984979027211938, "loss": 3.8335, "step": 2983 }, { "epoch": 0.1396921996606004, "grad_norm": 1.4296875, "learning_rate": 0.00019849689459815837, "loss": 3.7496, "step": 2984 }, { "epoch": 0.13973901340043302, "grad_norm": 2.015625, "learning_rate": 0.00019849588613950046, "loss": 3.1657, "step": 2985 }, { "epoch": 0.13978582714026566, "grad_norm": 1.5, "learning_rate": 0.00019849487734522348, "loss": 3.6232, "step": 2986 }, { "epoch": 0.1398326408800983, "grad_norm": 1.28125, "learning_rate": 0.00019849386821533093, "loss": 3.9041, "step": 2987 }, { "epoch": 0.13987945461993095, "grad_norm": 1.0703125, "learning_rate": 0.00019849285874982617, "loss": 6.1182, "step": 2988 }, { "epoch": 0.1399262683597636, "grad_norm": 1.109375, "learning_rate": 0.0001984918489487127, "loss": 3.3405, "step": 2989 }, { "epoch": 0.13997308209959622, "grad_norm": 1.4609375, "learning_rate": 0.00019849083881199396, "loss": 3.9327, "step": 2990 }, { "epoch": 0.14001989583942887, "grad_norm": 1.2578125, "learning_rate": 0.0001984898283396734, "loss": 3.6497, "step": 2991 }, { "epoch": 0.1400667095792615, "grad_norm": 1.8828125, "learning_rate": 0.00019848881753175443, "loss": 4.0838, "step": 2992 }, { "epoch": 0.14011352331909416, "grad_norm": 1.0546875, "learning_rate": 0.00019848780638824048, "loss": 3.7218, "step": 2993 }, { "epoch": 0.1401603370589268, "grad_norm": 1.0234375, "learning_rate": 0.00019848679490913504, "loss": 3.8595, "step": 2994 }, { "epoch": 0.14020715079875942, "grad_norm": 1.2109375, "learning_rate": 0.00019848578309444154, "loss": 3.4479, "step": 2995 }, { "epoch": 0.14025396453859207, "grad_norm": 1.1328125, "learning_rate": 0.00019848477094416345, "loss": 3.7638, "step": 2996 }, { "epoch": 0.14030077827842471, "grad_norm": 4.46875, "learning_rate": 0.00019848375845830417, "loss": 3.8347, "step": 2997 }, { "epoch": 0.14034759201825736, "grad_norm": 1.3984375, "learning_rate": 0.0001984827456368672, "loss": 3.914, "step": 2998 }, { "epoch": 0.14039440575809, "grad_norm": 1.3515625, "learning_rate": 0.00019848173247985593, "loss": 4.2858, "step": 2999 }, { "epoch": 0.14044121949792265, "grad_norm": 1.1328125, "learning_rate": 0.00019848071898727386, "loss": 3.601, "step": 3000 }, { "epoch": 0.14048803323775527, "grad_norm": 1.578125, "learning_rate": 0.00019847970515912447, "loss": 4.3804, "step": 3001 }, { "epoch": 0.14053484697758792, "grad_norm": 1.1328125, "learning_rate": 0.00019847869099541115, "loss": 3.549, "step": 3002 }, { "epoch": 0.14058166071742056, "grad_norm": 1.6171875, "learning_rate": 0.00019847767649613736, "loss": 3.9247, "step": 3003 }, { "epoch": 0.1406284744572532, "grad_norm": 1.5390625, "learning_rate": 0.0001984766616613066, "loss": 4.1714, "step": 3004 }, { "epoch": 0.14067528819708586, "grad_norm": 1.3828125, "learning_rate": 0.0001984756464909223, "loss": 3.7495, "step": 3005 }, { "epoch": 0.14072210193691848, "grad_norm": 1.09375, "learning_rate": 0.00019847463098498794, "loss": 3.6431, "step": 3006 }, { "epoch": 0.14076891567675112, "grad_norm": 1.375, "learning_rate": 0.00019847361514350697, "loss": 3.7969, "step": 3007 }, { "epoch": 0.14081572941658377, "grad_norm": 1.4921875, "learning_rate": 0.00019847259896648282, "loss": 3.5375, "step": 3008 }, { "epoch": 0.14086254315641641, "grad_norm": 1.46875, "learning_rate": 0.000198471582453919, "loss": 3.9189, "step": 3009 }, { "epoch": 0.14090935689624906, "grad_norm": 1.4609375, "learning_rate": 0.00019847056560581895, "loss": 3.8922, "step": 3010 }, { "epoch": 0.14095617063608168, "grad_norm": 1.5625, "learning_rate": 0.00019846954842218616, "loss": 3.8909, "step": 3011 }, { "epoch": 0.14100298437591433, "grad_norm": 1.1953125, "learning_rate": 0.00019846853090302406, "loss": 4.0637, "step": 3012 }, { "epoch": 0.14104979811574697, "grad_norm": 1.328125, "learning_rate": 0.00019846751304833612, "loss": 3.9988, "step": 3013 }, { "epoch": 0.14109661185557962, "grad_norm": 1.1640625, "learning_rate": 0.00019846649485812584, "loss": 3.792, "step": 3014 }, { "epoch": 0.14114342559541226, "grad_norm": 1.2578125, "learning_rate": 0.00019846547633239667, "loss": 3.3438, "step": 3015 }, { "epoch": 0.14119023933524488, "grad_norm": 1.8828125, "learning_rate": 0.00019846445747115208, "loss": 4.0147, "step": 3016 }, { "epoch": 0.14123705307507753, "grad_norm": 1.515625, "learning_rate": 0.00019846343827439552, "loss": 3.5125, "step": 3017 }, { "epoch": 0.14128386681491018, "grad_norm": 1.421875, "learning_rate": 0.00019846241874213051, "loss": 4.0821, "step": 3018 }, { "epoch": 0.14133068055474282, "grad_norm": 1.5390625, "learning_rate": 0.00019846139887436047, "loss": 3.7715, "step": 3019 }, { "epoch": 0.14137749429457547, "grad_norm": 1.3515625, "learning_rate": 0.00019846037867108895, "loss": 3.9646, "step": 3020 }, { "epoch": 0.14142430803440809, "grad_norm": 1.375, "learning_rate": 0.00019845935813231935, "loss": 3.7793, "step": 3021 }, { "epoch": 0.14147112177424073, "grad_norm": 1.34375, "learning_rate": 0.0001984583372580552, "loss": 3.5906, "step": 3022 }, { "epoch": 0.14151793551407338, "grad_norm": 1.328125, "learning_rate": 0.00019845731604829994, "loss": 3.6117, "step": 3023 }, { "epoch": 0.14156474925390602, "grad_norm": 1.3515625, "learning_rate": 0.00019845629450305707, "loss": 4.0048, "step": 3024 }, { "epoch": 0.14161156299373867, "grad_norm": 1.328125, "learning_rate": 0.0001984552726223301, "loss": 3.9896, "step": 3025 }, { "epoch": 0.1416583767335713, "grad_norm": 1.3125, "learning_rate": 0.0001984542504061224, "loss": 4.0222, "step": 3026 }, { "epoch": 0.14170519047340394, "grad_norm": 1.375, "learning_rate": 0.0001984532278544376, "loss": 3.9336, "step": 3027 }, { "epoch": 0.14175200421323658, "grad_norm": 1.3515625, "learning_rate": 0.0001984522049672791, "loss": 2.7521, "step": 3028 }, { "epoch": 0.14179881795306923, "grad_norm": 1.515625, "learning_rate": 0.00019845118174465038, "loss": 3.9865, "step": 3029 }, { "epoch": 0.14184563169290187, "grad_norm": 2.25, "learning_rate": 0.000198450158186555, "loss": 4.0907, "step": 3030 }, { "epoch": 0.14189244543273452, "grad_norm": 1.109375, "learning_rate": 0.00019844913429299633, "loss": 3.7105, "step": 3031 }, { "epoch": 0.14193925917256714, "grad_norm": 1.5078125, "learning_rate": 0.000198448110063978, "loss": 4.0902, "step": 3032 }, { "epoch": 0.14198607291239979, "grad_norm": 0.99609375, "learning_rate": 0.00019844708549950337, "loss": 3.6729, "step": 3033 }, { "epoch": 0.14203288665223243, "grad_norm": 1.640625, "learning_rate": 0.000198446060599576, "loss": 3.7497, "step": 3034 }, { "epoch": 0.14207970039206508, "grad_norm": 1.25, "learning_rate": 0.0001984450353641994, "loss": 3.6504, "step": 3035 }, { "epoch": 0.14212651413189772, "grad_norm": 1.3984375, "learning_rate": 0.00019844400979337698, "loss": 3.8346, "step": 3036 }, { "epoch": 0.14217332787173034, "grad_norm": 1.1953125, "learning_rate": 0.0001984429838871123, "loss": 3.9445, "step": 3037 }, { "epoch": 0.142220141611563, "grad_norm": 1.234375, "learning_rate": 0.00019844195764540885, "loss": 3.941, "step": 3038 }, { "epoch": 0.14226695535139564, "grad_norm": 1.3515625, "learning_rate": 0.00019844093106827012, "loss": 3.8456, "step": 3039 }, { "epoch": 0.14231376909122828, "grad_norm": 2.1875, "learning_rate": 0.00019843990415569965, "loss": 4.3313, "step": 3040 }, { "epoch": 0.14236058283106093, "grad_norm": 1.2890625, "learning_rate": 0.00019843887690770083, "loss": 3.9659, "step": 3041 }, { "epoch": 0.14240739657089355, "grad_norm": 1.484375, "learning_rate": 0.00019843784932427726, "loss": 3.9637, "step": 3042 }, { "epoch": 0.1424542103107262, "grad_norm": 1.5, "learning_rate": 0.0001984368214054324, "loss": 3.6968, "step": 3043 }, { "epoch": 0.14250102405055884, "grad_norm": 1.234375, "learning_rate": 0.00019843579315116978, "loss": 4.0798, "step": 3044 }, { "epoch": 0.14254783779039149, "grad_norm": 1.5625, "learning_rate": 0.00019843476456149286, "loss": 3.9457, "step": 3045 }, { "epoch": 0.14259465153022413, "grad_norm": 1.296875, "learning_rate": 0.00019843373563640517, "loss": 3.6737, "step": 3046 }, { "epoch": 0.14264146527005675, "grad_norm": 1.5, "learning_rate": 0.00019843270637591022, "loss": 3.7728, "step": 3047 }, { "epoch": 0.1426882790098894, "grad_norm": 1.3984375, "learning_rate": 0.00019843167678001153, "loss": 3.8879, "step": 3048 }, { "epoch": 0.14273509274972204, "grad_norm": 1.21875, "learning_rate": 0.00019843064684871257, "loss": 3.6522, "step": 3049 }, { "epoch": 0.1427819064895547, "grad_norm": 1.2890625, "learning_rate": 0.00019842961658201688, "loss": 3.7018, "step": 3050 }, { "epoch": 0.14282872022938733, "grad_norm": 1.328125, "learning_rate": 0.00019842858597992797, "loss": 4.1173, "step": 3051 }, { "epoch": 0.14287553396921995, "grad_norm": 1.734375, "learning_rate": 0.00019842755504244933, "loss": 4.3496, "step": 3052 }, { "epoch": 0.1429223477090526, "grad_norm": 1.4609375, "learning_rate": 0.00019842652376958448, "loss": 3.7933, "step": 3053 }, { "epoch": 0.14296916144888525, "grad_norm": 1.3203125, "learning_rate": 0.00019842549216133695, "loss": 3.8384, "step": 3054 }, { "epoch": 0.1430159751887179, "grad_norm": 1.4921875, "learning_rate": 0.00019842446021771024, "loss": 3.9366, "step": 3055 }, { "epoch": 0.14306278892855054, "grad_norm": 1.5546875, "learning_rate": 0.00019842342793870786, "loss": 4.0619, "step": 3056 }, { "epoch": 0.14310960266838316, "grad_norm": 1.2265625, "learning_rate": 0.00019842239532433337, "loss": 4.128, "step": 3057 }, { "epoch": 0.1431564164082158, "grad_norm": 1.3046875, "learning_rate": 0.00019842136237459024, "loss": 3.4673, "step": 3058 }, { "epoch": 0.14320323014804845, "grad_norm": 1.328125, "learning_rate": 0.00019842032908948197, "loss": 3.9675, "step": 3059 }, { "epoch": 0.1432500438878811, "grad_norm": 2.59375, "learning_rate": 0.00019841929546901218, "loss": 3.984, "step": 3060 }, { "epoch": 0.14329685762771374, "grad_norm": 1.375, "learning_rate": 0.0001984182615131843, "loss": 3.9945, "step": 3061 }, { "epoch": 0.1433436713675464, "grad_norm": 1.1953125, "learning_rate": 0.0001984172272220019, "loss": 3.7003, "step": 3062 }, { "epoch": 0.143390485107379, "grad_norm": 0.99609375, "learning_rate": 0.00019841619259546844, "loss": 3.4075, "step": 3063 }, { "epoch": 0.14343729884721165, "grad_norm": 1.15625, "learning_rate": 0.00019841515763358754, "loss": 3.8392, "step": 3064 }, { "epoch": 0.1434841125870443, "grad_norm": 1.2890625, "learning_rate": 0.00019841412233636264, "loss": 3.8977, "step": 3065 }, { "epoch": 0.14353092632687695, "grad_norm": 1.28125, "learning_rate": 0.00019841308670379734, "loss": 3.9773, "step": 3066 }, { "epoch": 0.1435777400667096, "grad_norm": 1.2421875, "learning_rate": 0.00019841205073589514, "loss": 3.7858, "step": 3067 }, { "epoch": 0.1436245538065422, "grad_norm": 1.1953125, "learning_rate": 0.00019841101443265956, "loss": 3.4007, "step": 3068 }, { "epoch": 0.14367136754637486, "grad_norm": 1.6640625, "learning_rate": 0.00019840997779409412, "loss": 3.88, "step": 3069 }, { "epoch": 0.1437181812862075, "grad_norm": 1.6015625, "learning_rate": 0.00019840894082020239, "loss": 4.002, "step": 3070 }, { "epoch": 0.14376499502604015, "grad_norm": 1.296875, "learning_rate": 0.00019840790351098787, "loss": 4.3803, "step": 3071 }, { "epoch": 0.1438118087658728, "grad_norm": 1.109375, "learning_rate": 0.0001984068658664541, "loss": 3.7806, "step": 3072 }, { "epoch": 0.1438586225057054, "grad_norm": 1.453125, "learning_rate": 0.00019840582788660462, "loss": 3.8078, "step": 3073 }, { "epoch": 0.14390543624553806, "grad_norm": 1.5234375, "learning_rate": 0.00019840478957144298, "loss": 3.6368, "step": 3074 }, { "epoch": 0.1439522499853707, "grad_norm": 1.6015625, "learning_rate": 0.00019840375092097272, "loss": 3.4367, "step": 3075 }, { "epoch": 0.14399906372520335, "grad_norm": 1.5234375, "learning_rate": 0.00019840271193519735, "loss": 4.2496, "step": 3076 }, { "epoch": 0.144045877465036, "grad_norm": 1.953125, "learning_rate": 0.00019840167261412042, "loss": 3.7318, "step": 3077 }, { "epoch": 0.14409269120486862, "grad_norm": 1.265625, "learning_rate": 0.00019840063295774549, "loss": 3.4932, "step": 3078 }, { "epoch": 0.14413950494470126, "grad_norm": 1.375, "learning_rate": 0.0001983995929660761, "loss": 3.5948, "step": 3079 }, { "epoch": 0.1441863186845339, "grad_norm": 1.390625, "learning_rate": 0.00019839855263911575, "loss": 4.1721, "step": 3080 }, { "epoch": 0.14423313242436656, "grad_norm": 1.6328125, "learning_rate": 0.00019839751197686805, "loss": 3.9281, "step": 3081 }, { "epoch": 0.1442799461641992, "grad_norm": 1.7890625, "learning_rate": 0.00019839647097933655, "loss": 3.8406, "step": 3082 }, { "epoch": 0.14432675990403182, "grad_norm": 1.375, "learning_rate": 0.00019839542964652468, "loss": 3.7268, "step": 3083 }, { "epoch": 0.14437357364386447, "grad_norm": 1.5234375, "learning_rate": 0.00019839438797843612, "loss": 4.0995, "step": 3084 }, { "epoch": 0.1444203873836971, "grad_norm": 1.5078125, "learning_rate": 0.00019839334597507438, "loss": 3.7185, "step": 3085 }, { "epoch": 0.14446720112352976, "grad_norm": 1.1875, "learning_rate": 0.00019839230363644295, "loss": 3.7646, "step": 3086 }, { "epoch": 0.1445140148633624, "grad_norm": 1.25, "learning_rate": 0.00019839126096254548, "loss": 3.6648, "step": 3087 }, { "epoch": 0.14456082860319502, "grad_norm": 1.296875, "learning_rate": 0.00019839021795338546, "loss": 4.0669, "step": 3088 }, { "epoch": 0.14460764234302767, "grad_norm": 1.203125, "learning_rate": 0.00019838917460896646, "loss": 4.0725, "step": 3089 }, { "epoch": 0.14465445608286032, "grad_norm": 1.125, "learning_rate": 0.00019838813092929204, "loss": 3.4787, "step": 3090 }, { "epoch": 0.14470126982269296, "grad_norm": 1.2734375, "learning_rate": 0.00019838708691436577, "loss": 4.0003, "step": 3091 }, { "epoch": 0.1447480835625256, "grad_norm": 1.578125, "learning_rate": 0.00019838604256419115, "loss": 4.1958, "step": 3092 }, { "epoch": 0.14479489730235826, "grad_norm": 1.125, "learning_rate": 0.00019838499787877182, "loss": 3.6701, "step": 3093 }, { "epoch": 0.14484171104219087, "grad_norm": 1.2734375, "learning_rate": 0.00019838395285811124, "loss": 3.5841, "step": 3094 }, { "epoch": 0.14488852478202352, "grad_norm": 1.3046875, "learning_rate": 0.0001983829075022131, "loss": 3.6869, "step": 3095 }, { "epoch": 0.14493533852185617, "grad_norm": 1.28125, "learning_rate": 0.00019838186181108083, "loss": 3.6894, "step": 3096 }, { "epoch": 0.1449821522616888, "grad_norm": 0.98046875, "learning_rate": 0.00019838081578471805, "loss": 6.1344, "step": 3097 }, { "epoch": 0.14502896600152146, "grad_norm": 1.3828125, "learning_rate": 0.00019837976942312837, "loss": 4.1466, "step": 3098 }, { "epoch": 0.14507577974135408, "grad_norm": 1.546875, "learning_rate": 0.00019837872272631527, "loss": 4.0548, "step": 3099 }, { "epoch": 0.14512259348118672, "grad_norm": 1.921875, "learning_rate": 0.0001983776756942824, "loss": 4.1051, "step": 3100 }, { "epoch": 0.14516940722101937, "grad_norm": 1.390625, "learning_rate": 0.00019837662832703325, "loss": 4.127, "step": 3101 }, { "epoch": 0.14521622096085202, "grad_norm": 1.203125, "learning_rate": 0.0001983755806245714, "loss": 3.4208, "step": 3102 }, { "epoch": 0.14526303470068466, "grad_norm": 2.0625, "learning_rate": 0.00019837453258690052, "loss": 4.2164, "step": 3103 }, { "epoch": 0.14530984844051728, "grad_norm": 1.203125, "learning_rate": 0.00019837348421402408, "loss": 3.503, "step": 3104 }, { "epoch": 0.14535666218034993, "grad_norm": 1.3671875, "learning_rate": 0.00019837243550594567, "loss": 3.7362, "step": 3105 }, { "epoch": 0.14540347592018257, "grad_norm": 1.28125, "learning_rate": 0.0001983713864626689, "loss": 3.6169, "step": 3106 }, { "epoch": 0.14545028966001522, "grad_norm": 1.59375, "learning_rate": 0.00019837033708419727, "loss": 4.7803, "step": 3107 }, { "epoch": 0.14549710339984787, "grad_norm": 1.34375, "learning_rate": 0.00019836928737053443, "loss": 3.823, "step": 3108 }, { "epoch": 0.14554391713968048, "grad_norm": 1.3125, "learning_rate": 0.0001983682373216839, "loss": 3.8414, "step": 3109 }, { "epoch": 0.14559073087951313, "grad_norm": 1.421875, "learning_rate": 0.0001983671869376493, "loss": 3.6712, "step": 3110 }, { "epoch": 0.14563754461934578, "grad_norm": 1.2734375, "learning_rate": 0.00019836613621843424, "loss": 3.7982, "step": 3111 }, { "epoch": 0.14568435835917842, "grad_norm": 1.609375, "learning_rate": 0.0001983650851640422, "loss": 3.2058, "step": 3112 }, { "epoch": 0.14573117209901107, "grad_norm": 1.21875, "learning_rate": 0.00019836403377447684, "loss": 3.692, "step": 3113 }, { "epoch": 0.1457779858388437, "grad_norm": 1.6796875, "learning_rate": 0.00019836298204974173, "loss": 3.6505, "step": 3114 }, { "epoch": 0.14582479957867633, "grad_norm": 1.6640625, "learning_rate": 0.00019836192998984043, "loss": 3.7639, "step": 3115 }, { "epoch": 0.14587161331850898, "grad_norm": 1.265625, "learning_rate": 0.00019836087759477654, "loss": 3.7905, "step": 3116 }, { "epoch": 0.14591842705834163, "grad_norm": 1.375, "learning_rate": 0.00019835982486455364, "loss": 3.729, "step": 3117 }, { "epoch": 0.14596524079817427, "grad_norm": 1.46875, "learning_rate": 0.00019835877179917534, "loss": 3.61, "step": 3118 }, { "epoch": 0.1460120545380069, "grad_norm": 1.1015625, "learning_rate": 0.00019835771839864518, "loss": 4.0434, "step": 3119 }, { "epoch": 0.14605886827783954, "grad_norm": 1.453125, "learning_rate": 0.00019835666466296682, "loss": 3.4506, "step": 3120 }, { "epoch": 0.14610568201767218, "grad_norm": 1.2578125, "learning_rate": 0.00019835561059214382, "loss": 3.7166, "step": 3121 }, { "epoch": 0.14615249575750483, "grad_norm": 1.28125, "learning_rate": 0.00019835455618617972, "loss": 4.0308, "step": 3122 }, { "epoch": 0.14619930949733748, "grad_norm": 1.765625, "learning_rate": 0.00019835350144507816, "loss": 4.1215, "step": 3123 }, { "epoch": 0.14624612323717012, "grad_norm": 1.203125, "learning_rate": 0.00019835244636884274, "loss": 4.0194, "step": 3124 }, { "epoch": 0.14629293697700274, "grad_norm": 1.1875, "learning_rate": 0.00019835139095747704, "loss": 3.6908, "step": 3125 }, { "epoch": 0.1463397507168354, "grad_norm": 1.265625, "learning_rate": 0.00019835033521098464, "loss": 3.9316, "step": 3126 }, { "epoch": 0.14638656445666803, "grad_norm": 1.4921875, "learning_rate": 0.0001983492791293692, "loss": 4.1416, "step": 3127 }, { "epoch": 0.14643337819650068, "grad_norm": 2.0625, "learning_rate": 0.00019834822271263427, "loss": 4.1796, "step": 3128 }, { "epoch": 0.14648019193633333, "grad_norm": 1.1484375, "learning_rate": 0.00019834716596078345, "loss": 3.8046, "step": 3129 }, { "epoch": 0.14652700567616594, "grad_norm": 1.3359375, "learning_rate": 0.00019834610887382032, "loss": 3.9587, "step": 3130 }, { "epoch": 0.1465738194159986, "grad_norm": 1.515625, "learning_rate": 0.00019834505145174854, "loss": 4.0134, "step": 3131 }, { "epoch": 0.14662063315583124, "grad_norm": 1.4453125, "learning_rate": 0.00019834399369457166, "loss": 3.6168, "step": 3132 }, { "epoch": 0.14666744689566388, "grad_norm": 1.5546875, "learning_rate": 0.00019834293560229332, "loss": 3.8663, "step": 3133 }, { "epoch": 0.14671426063549653, "grad_norm": 1.3671875, "learning_rate": 0.0001983418771749171, "loss": 4.1558, "step": 3134 }, { "epoch": 0.14676107437532915, "grad_norm": 1.21875, "learning_rate": 0.00019834081841244662, "loss": 4.024, "step": 3135 }, { "epoch": 0.1468078881151618, "grad_norm": 1.6796875, "learning_rate": 0.0001983397593148855, "loss": 4.1299, "step": 3136 }, { "epoch": 0.14685470185499444, "grad_norm": 1.734375, "learning_rate": 0.0001983386998822373, "loss": 3.9603, "step": 3137 }, { "epoch": 0.1469015155948271, "grad_norm": 1.4296875, "learning_rate": 0.0001983376401145057, "loss": 3.721, "step": 3138 }, { "epoch": 0.14694832933465973, "grad_norm": 1.3203125, "learning_rate": 0.00019833658001169427, "loss": 4.1262, "step": 3139 }, { "epoch": 0.14699514307449235, "grad_norm": 1.296875, "learning_rate": 0.00019833551957380662, "loss": 3.9775, "step": 3140 }, { "epoch": 0.147041956814325, "grad_norm": 1.546875, "learning_rate": 0.00019833445880084637, "loss": 3.7785, "step": 3141 }, { "epoch": 0.14708877055415764, "grad_norm": 1.546875, "learning_rate": 0.00019833339769281715, "loss": 4.1918, "step": 3142 }, { "epoch": 0.1471355842939903, "grad_norm": 1.171875, "learning_rate": 0.00019833233624972255, "loss": 3.3656, "step": 3143 }, { "epoch": 0.14718239803382294, "grad_norm": 0.95703125, "learning_rate": 0.00019833127447156618, "loss": 3.4935, "step": 3144 }, { "epoch": 0.14722921177365556, "grad_norm": 1.1796875, "learning_rate": 0.00019833021235835172, "loss": 3.734, "step": 3145 }, { "epoch": 0.1472760255134882, "grad_norm": 2.546875, "learning_rate": 0.0001983291499100827, "loss": 3.6317, "step": 3146 }, { "epoch": 0.14732283925332085, "grad_norm": 1.3359375, "learning_rate": 0.0001983280871267628, "loss": 3.5325, "step": 3147 }, { "epoch": 0.1473696529931535, "grad_norm": 1.40625, "learning_rate": 0.00019832702400839565, "loss": 4.0956, "step": 3148 }, { "epoch": 0.14741646673298614, "grad_norm": 1.5078125, "learning_rate": 0.00019832596055498485, "loss": 4.051, "step": 3149 }, { "epoch": 0.14746328047281876, "grad_norm": 2.296875, "learning_rate": 0.000198324896766534, "loss": 3.4346, "step": 3150 }, { "epoch": 0.1475100942126514, "grad_norm": 1.8125, "learning_rate": 0.00019832383264304675, "loss": 3.7702, "step": 3151 }, { "epoch": 0.14755690795248405, "grad_norm": 1.2109375, "learning_rate": 0.00019832276818452672, "loss": 3.7653, "step": 3152 }, { "epoch": 0.1476037216923167, "grad_norm": 1.5390625, "learning_rate": 0.00019832170339097754, "loss": 4.708, "step": 3153 }, { "epoch": 0.14765053543214934, "grad_norm": 1.5, "learning_rate": 0.00019832063826240285, "loss": 4.0258, "step": 3154 }, { "epoch": 0.147697349171982, "grad_norm": 1.3046875, "learning_rate": 0.00019831957279880627, "loss": 3.4848, "step": 3155 }, { "epoch": 0.1477441629118146, "grad_norm": 1.5390625, "learning_rate": 0.00019831850700019143, "loss": 4.0378, "step": 3156 }, { "epoch": 0.14779097665164725, "grad_norm": 1.3046875, "learning_rate": 0.00019831744086656194, "loss": 3.0457, "step": 3157 }, { "epoch": 0.1478377903914799, "grad_norm": 1.2265625, "learning_rate": 0.00019831637439792147, "loss": 4.0411, "step": 3158 }, { "epoch": 0.14788460413131255, "grad_norm": 1.71875, "learning_rate": 0.00019831530759427363, "loss": 3.7671, "step": 3159 }, { "epoch": 0.1479314178711452, "grad_norm": 1.546875, "learning_rate": 0.00019831424045562205, "loss": 4.3867, "step": 3160 }, { "epoch": 0.1479782316109778, "grad_norm": 1.265625, "learning_rate": 0.00019831317298197042, "loss": 3.644, "step": 3161 }, { "epoch": 0.14802504535081046, "grad_norm": 1.4296875, "learning_rate": 0.0001983121051733223, "loss": 4.1659, "step": 3162 }, { "epoch": 0.1480718590906431, "grad_norm": 1.3203125, "learning_rate": 0.00019831103702968136, "loss": 3.339, "step": 3163 }, { "epoch": 0.14811867283047575, "grad_norm": 1.625, "learning_rate": 0.00019830996855105127, "loss": 3.7335, "step": 3164 }, { "epoch": 0.1481654865703084, "grad_norm": 2.09375, "learning_rate": 0.00019830889973743564, "loss": 4.0398, "step": 3165 }, { "epoch": 0.14821230031014102, "grad_norm": 1.140625, "learning_rate": 0.0001983078305888381, "loss": 3.8105, "step": 3166 }, { "epoch": 0.14825911404997366, "grad_norm": 1.078125, "learning_rate": 0.00019830676110526235, "loss": 3.2542, "step": 3167 }, { "epoch": 0.1483059277898063, "grad_norm": 1.1328125, "learning_rate": 0.00019830569128671192, "loss": 3.8671, "step": 3168 }, { "epoch": 0.14835274152963895, "grad_norm": 1.46875, "learning_rate": 0.0001983046211331906, "loss": 4.0058, "step": 3169 }, { "epoch": 0.1483995552694716, "grad_norm": 1.21875, "learning_rate": 0.00019830355064470192, "loss": 3.4137, "step": 3170 }, { "epoch": 0.14844636900930422, "grad_norm": 1.2890625, "learning_rate": 0.0001983024798212496, "loss": 4.117, "step": 3171 }, { "epoch": 0.14849318274913686, "grad_norm": 1.5, "learning_rate": 0.00019830140866283725, "loss": 3.6466, "step": 3172 }, { "epoch": 0.1485399964889695, "grad_norm": 1.2578125, "learning_rate": 0.00019830033716946853, "loss": 3.7786, "step": 3173 }, { "epoch": 0.14858681022880216, "grad_norm": 1.3984375, "learning_rate": 0.00019829926534114712, "loss": 3.781, "step": 3174 }, { "epoch": 0.1486336239686348, "grad_norm": 1.578125, "learning_rate": 0.00019829819317787663, "loss": 3.5013, "step": 3175 }, { "epoch": 0.14868043770846742, "grad_norm": 1.5390625, "learning_rate": 0.0001982971206796607, "loss": 4.157, "step": 3176 }, { "epoch": 0.14872725144830007, "grad_norm": 1.3671875, "learning_rate": 0.00019829604784650306, "loss": 4.1841, "step": 3177 }, { "epoch": 0.14877406518813271, "grad_norm": 1.2734375, "learning_rate": 0.0001982949746784073, "loss": 3.9475, "step": 3178 }, { "epoch": 0.14882087892796536, "grad_norm": 0.9609375, "learning_rate": 0.00019829390117537707, "loss": 3.5373, "step": 3179 }, { "epoch": 0.148867692667798, "grad_norm": 1.640625, "learning_rate": 0.00019829282733741608, "loss": 4.03, "step": 3180 }, { "epoch": 0.14891450640763063, "grad_norm": 1.765625, "learning_rate": 0.00019829175316452794, "loss": 3.5024, "step": 3181 }, { "epoch": 0.14896132014746327, "grad_norm": 1.953125, "learning_rate": 0.00019829067865671638, "loss": 3.8119, "step": 3182 }, { "epoch": 0.14900813388729592, "grad_norm": 1.8984375, "learning_rate": 0.000198289603813985, "loss": 3.8042, "step": 3183 }, { "epoch": 0.14905494762712856, "grad_norm": 1.3671875, "learning_rate": 0.00019828852863633742, "loss": 3.8849, "step": 3184 }, { "epoch": 0.1491017613669612, "grad_norm": 1.5, "learning_rate": 0.0001982874531237774, "loss": 3.5952, "step": 3185 }, { "epoch": 0.14914857510679386, "grad_norm": 1.671875, "learning_rate": 0.00019828637727630857, "loss": 3.6575, "step": 3186 }, { "epoch": 0.14919538884662648, "grad_norm": 1.171875, "learning_rate": 0.0001982853010939346, "loss": 3.5942, "step": 3187 }, { "epoch": 0.14924220258645912, "grad_norm": 1.7109375, "learning_rate": 0.00019828422457665912, "loss": 4.1045, "step": 3188 }, { "epoch": 0.14928901632629177, "grad_norm": 1.0, "learning_rate": 0.00019828314772448583, "loss": 6.0855, "step": 3189 }, { "epoch": 0.14933583006612441, "grad_norm": 2.40625, "learning_rate": 0.00019828207053741842, "loss": 3.8339, "step": 3190 }, { "epoch": 0.14938264380595706, "grad_norm": 1.0703125, "learning_rate": 0.0001982809930154605, "loss": 3.6272, "step": 3191 }, { "epoch": 0.14942945754578968, "grad_norm": 1.34375, "learning_rate": 0.0001982799151586158, "loss": 3.5853, "step": 3192 }, { "epoch": 0.14947627128562233, "grad_norm": 1.3125, "learning_rate": 0.00019827883696688797, "loss": 4.0504, "step": 3193 }, { "epoch": 0.14952308502545497, "grad_norm": 1.484375, "learning_rate": 0.00019827775844028067, "loss": 3.9011, "step": 3194 }, { "epoch": 0.14956989876528762, "grad_norm": 1.4375, "learning_rate": 0.00019827667957879764, "loss": 4.1531, "step": 3195 }, { "epoch": 0.14961671250512026, "grad_norm": 1.1796875, "learning_rate": 0.00019827560038244245, "loss": 3.6656, "step": 3196 }, { "epoch": 0.14966352624495288, "grad_norm": 1.25, "learning_rate": 0.00019827452085121884, "loss": 3.7077, "step": 3197 }, { "epoch": 0.14971033998478553, "grad_norm": 1.703125, "learning_rate": 0.00019827344098513047, "loss": 3.8526, "step": 3198 }, { "epoch": 0.14975715372461817, "grad_norm": 1.359375, "learning_rate": 0.00019827236078418106, "loss": 3.5023, "step": 3199 }, { "epoch": 0.14980396746445082, "grad_norm": 1.171875, "learning_rate": 0.00019827128024837426, "loss": 3.7992, "step": 3200 }, { "epoch": 0.14985078120428347, "grad_norm": 1.546875, "learning_rate": 0.0001982701993777137, "loss": 4.1269, "step": 3201 }, { "epoch": 0.14989759494411609, "grad_norm": 1.3125, "learning_rate": 0.00019826911817220316, "loss": 3.3043, "step": 3202 }, { "epoch": 0.14994440868394873, "grad_norm": 1.5, "learning_rate": 0.0001982680366318463, "loss": 3.4864, "step": 3203 }, { "epoch": 0.14999122242378138, "grad_norm": 1.0390625, "learning_rate": 0.0001982669547566468, "loss": 3.8296, "step": 3204 }, { "epoch": 0.15003803616361402, "grad_norm": 1.3203125, "learning_rate": 0.00019826587254660828, "loss": 3.8617, "step": 3205 }, { "epoch": 0.15008484990344667, "grad_norm": 1.46875, "learning_rate": 0.0001982647900017345, "loss": 4.1136, "step": 3206 }, { "epoch": 0.1501316636432793, "grad_norm": 1.4921875, "learning_rate": 0.0001982637071220291, "loss": 3.7256, "step": 3207 }, { "epoch": 0.15017847738311194, "grad_norm": 1.3671875, "learning_rate": 0.0001982626239074958, "loss": 4.1304, "step": 3208 }, { "epoch": 0.15022529112294458, "grad_norm": 1.296875, "learning_rate": 0.00019826154035813834, "loss": 3.5496, "step": 3209 }, { "epoch": 0.15027210486277723, "grad_norm": 1.4765625, "learning_rate": 0.00019826045647396032, "loss": 3.4877, "step": 3210 }, { "epoch": 0.15031891860260987, "grad_norm": 1.5390625, "learning_rate": 0.0001982593722549655, "loss": 3.9522, "step": 3211 }, { "epoch": 0.15036573234244252, "grad_norm": 1.5546875, "learning_rate": 0.0001982582877011575, "loss": 3.7045, "step": 3212 }, { "epoch": 0.15041254608227514, "grad_norm": 2.234375, "learning_rate": 0.0001982572028125401, "loss": 4.1634, "step": 3213 }, { "epoch": 0.15045935982210779, "grad_norm": 1.3828125, "learning_rate": 0.00019825611758911697, "loss": 2.8903, "step": 3214 }, { "epoch": 0.15050617356194043, "grad_norm": 1.34375, "learning_rate": 0.0001982550320308918, "loss": 4.034, "step": 3215 }, { "epoch": 0.15055298730177308, "grad_norm": 1.34375, "learning_rate": 0.00019825394613786826, "loss": 3.7194, "step": 3216 }, { "epoch": 0.15059980104160572, "grad_norm": 1.21875, "learning_rate": 0.0001982528599100501, "loss": 3.9352, "step": 3217 }, { "epoch": 0.15064661478143834, "grad_norm": 1.3203125, "learning_rate": 0.00019825177334744097, "loss": 3.8743, "step": 3218 }, { "epoch": 0.150693428521271, "grad_norm": 1.09375, "learning_rate": 0.00019825068645004464, "loss": 3.7489, "step": 3219 }, { "epoch": 0.15074024226110364, "grad_norm": 1.2109375, "learning_rate": 0.00019824959921786476, "loss": 4.0175, "step": 3220 }, { "epoch": 0.15078705600093628, "grad_norm": 1.0, "learning_rate": 0.00019824851165090506, "loss": 4.8735, "step": 3221 }, { "epoch": 0.15083386974076893, "grad_norm": 1.71875, "learning_rate": 0.0001982474237491692, "loss": 3.3205, "step": 3222 }, { "epoch": 0.15088068348060155, "grad_norm": 1.4609375, "learning_rate": 0.00019824633551266096, "loss": 3.1954, "step": 3223 }, { "epoch": 0.1509274972204342, "grad_norm": 1.25, "learning_rate": 0.00019824524694138398, "loss": 3.5543, "step": 3224 }, { "epoch": 0.15097431096026684, "grad_norm": 1.671875, "learning_rate": 0.000198244158035342, "loss": 3.5654, "step": 3225 }, { "epoch": 0.15102112470009948, "grad_norm": 1.2890625, "learning_rate": 0.00019824306879453877, "loss": 3.7024, "step": 3226 }, { "epoch": 0.15106793843993213, "grad_norm": 1.453125, "learning_rate": 0.00019824197921897795, "loss": 3.8642, "step": 3227 }, { "epoch": 0.15111475217976475, "grad_norm": 1.75, "learning_rate": 0.00019824088930866325, "loss": 4.0925, "step": 3228 }, { "epoch": 0.1511615659195974, "grad_norm": 1.6484375, "learning_rate": 0.0001982397990635984, "loss": 3.7161, "step": 3229 }, { "epoch": 0.15120837965943004, "grad_norm": 1.6171875, "learning_rate": 0.00019823870848378712, "loss": 3.5118, "step": 3230 }, { "epoch": 0.1512551933992627, "grad_norm": 0.92578125, "learning_rate": 0.0001982376175692331, "loss": 5.2743, "step": 3231 }, { "epoch": 0.15130200713909533, "grad_norm": 1.3203125, "learning_rate": 0.0001982365263199401, "loss": 3.2614, "step": 3232 }, { "epoch": 0.15134882087892795, "grad_norm": 0.94140625, "learning_rate": 0.0001982354347359118, "loss": 1.7864, "step": 3233 }, { "epoch": 0.1513956346187606, "grad_norm": 1.2890625, "learning_rate": 0.00019823434281715193, "loss": 4.0864, "step": 3234 }, { "epoch": 0.15144244835859325, "grad_norm": 1.5625, "learning_rate": 0.00019823325056366424, "loss": 3.9076, "step": 3235 }, { "epoch": 0.1514892620984259, "grad_norm": 2.15625, "learning_rate": 0.00019823215797545242, "loss": 3.3555, "step": 3236 }, { "epoch": 0.15153607583825854, "grad_norm": 1.4765625, "learning_rate": 0.0001982310650525202, "loss": 3.274, "step": 3237 }, { "epoch": 0.15158288957809116, "grad_norm": 1.7265625, "learning_rate": 0.0001982299717948713, "loss": 4.4047, "step": 3238 }, { "epoch": 0.1516297033179238, "grad_norm": 1.3515625, "learning_rate": 0.0001982288782025094, "loss": 3.7189, "step": 3239 }, { "epoch": 0.15167651705775645, "grad_norm": 1.109375, "learning_rate": 0.00019822778427543836, "loss": 3.4338, "step": 3240 }, { "epoch": 0.1517233307975891, "grad_norm": 1.6796875, "learning_rate": 0.00019822669001366176, "loss": 4.135, "step": 3241 }, { "epoch": 0.15177014453742174, "grad_norm": 1.3046875, "learning_rate": 0.00019822559541718342, "loss": 3.8363, "step": 3242 }, { "epoch": 0.1518169582772544, "grad_norm": 1.4921875, "learning_rate": 0.00019822450048600704, "loss": 3.5651, "step": 3243 }, { "epoch": 0.151863772017087, "grad_norm": 1.5546875, "learning_rate": 0.00019822340522013634, "loss": 3.8833, "step": 3244 }, { "epoch": 0.15191058575691965, "grad_norm": 1.53125, "learning_rate": 0.00019822230961957508, "loss": 4.0118, "step": 3245 }, { "epoch": 0.1519573994967523, "grad_norm": 1.2890625, "learning_rate": 0.00019822121368432699, "loss": 3.9304, "step": 3246 }, { "epoch": 0.15200421323658495, "grad_norm": 1.5, "learning_rate": 0.00019822011741439573, "loss": 3.2953, "step": 3247 }, { "epoch": 0.1520510269764176, "grad_norm": 1.1171875, "learning_rate": 0.00019821902080978515, "loss": 3.4259, "step": 3248 }, { "epoch": 0.1520978407162502, "grad_norm": 1.5, "learning_rate": 0.00019821792387049892, "loss": 3.5198, "step": 3249 }, { "epoch": 0.15214465445608286, "grad_norm": 1.2734375, "learning_rate": 0.00019821682659654078, "loss": 3.4074, "step": 3250 }, { "epoch": 0.1521914681959155, "grad_norm": 1.3671875, "learning_rate": 0.0001982157289879145, "loss": 4.0869, "step": 3251 }, { "epoch": 0.15223828193574815, "grad_norm": 1.1796875, "learning_rate": 0.0001982146310446238, "loss": 3.8798, "step": 3252 }, { "epoch": 0.1522850956755808, "grad_norm": 1.7265625, "learning_rate": 0.00019821353276667242, "loss": 4.5269, "step": 3253 }, { "epoch": 0.1523319094154134, "grad_norm": 1.203125, "learning_rate": 0.0001982124341540641, "loss": 3.5182, "step": 3254 }, { "epoch": 0.15237872315524606, "grad_norm": 1.671875, "learning_rate": 0.00019821133520680259, "loss": 3.9999, "step": 3255 }, { "epoch": 0.1524255368950787, "grad_norm": 1.1953125, "learning_rate": 0.00019821023592489162, "loss": 3.7186, "step": 3256 }, { "epoch": 0.15247235063491135, "grad_norm": 1.40625, "learning_rate": 0.00019820913630833498, "loss": 3.879, "step": 3257 }, { "epoch": 0.152519164374744, "grad_norm": 1.421875, "learning_rate": 0.00019820803635713636, "loss": 3.7636, "step": 3258 }, { "epoch": 0.15256597811457662, "grad_norm": 1.6171875, "learning_rate": 0.00019820693607129955, "loss": 4.148, "step": 3259 }, { "epoch": 0.15261279185440926, "grad_norm": 1.3359375, "learning_rate": 0.00019820583545082827, "loss": 3.7038, "step": 3260 }, { "epoch": 0.1526596055942419, "grad_norm": 1.890625, "learning_rate": 0.00019820473449572629, "loss": 3.6031, "step": 3261 }, { "epoch": 0.15270641933407456, "grad_norm": 1.25, "learning_rate": 0.00019820363320599733, "loss": 3.4724, "step": 3262 }, { "epoch": 0.1527532330739072, "grad_norm": 1.1640625, "learning_rate": 0.00019820253158164518, "loss": 3.6522, "step": 3263 }, { "epoch": 0.15280004681373982, "grad_norm": 1.8125, "learning_rate": 0.00019820142962267357, "loss": 3.6425, "step": 3264 }, { "epoch": 0.15284686055357247, "grad_norm": 1.3671875, "learning_rate": 0.00019820032732908627, "loss": 3.752, "step": 3265 }, { "epoch": 0.1528936742934051, "grad_norm": 1.296875, "learning_rate": 0.00019819922470088705, "loss": 3.9201, "step": 3266 }, { "epoch": 0.15294048803323776, "grad_norm": 1.28125, "learning_rate": 0.00019819812173807962, "loss": 3.8678, "step": 3267 }, { "epoch": 0.1529873017730704, "grad_norm": 1.640625, "learning_rate": 0.0001981970184406678, "loss": 3.9229, "step": 3268 }, { "epoch": 0.15303411551290302, "grad_norm": 1.4609375, "learning_rate": 0.00019819591480865528, "loss": 4.0098, "step": 3269 }, { "epoch": 0.15308092925273567, "grad_norm": 1.0546875, "learning_rate": 0.00019819481084204588, "loss": 4.2442, "step": 3270 }, { "epoch": 0.15312774299256832, "grad_norm": 1.3515625, "learning_rate": 0.00019819370654084332, "loss": 3.7206, "step": 3271 }, { "epoch": 0.15317455673240096, "grad_norm": 1.203125, "learning_rate": 0.0001981926019050514, "loss": 3.7229, "step": 3272 }, { "epoch": 0.1532213704722336, "grad_norm": 1.390625, "learning_rate": 0.00019819149693467387, "loss": 3.4878, "step": 3273 }, { "epoch": 0.15326818421206626, "grad_norm": 1.4921875, "learning_rate": 0.00019819039162971447, "loss": 3.6411, "step": 3274 }, { "epoch": 0.15331499795189887, "grad_norm": 1.4375, "learning_rate": 0.000198189285990177, "loss": 2.7764, "step": 3275 }, { "epoch": 0.15336181169173152, "grad_norm": 1.2734375, "learning_rate": 0.0001981881800160652, "loss": 2.9944, "step": 3276 }, { "epoch": 0.15340862543156417, "grad_norm": 1.7421875, "learning_rate": 0.00019818707370738284, "loss": 3.3579, "step": 3277 }, { "epoch": 0.1534554391713968, "grad_norm": 1.3046875, "learning_rate": 0.00019818596706413372, "loss": 3.8381, "step": 3278 }, { "epoch": 0.15350225291122946, "grad_norm": 1.515625, "learning_rate": 0.0001981848600863216, "loss": 3.7701, "step": 3279 }, { "epoch": 0.15354906665106208, "grad_norm": 1.7109375, "learning_rate": 0.00019818375277395024, "loss": 4.1623, "step": 3280 }, { "epoch": 0.15359588039089472, "grad_norm": 1.109375, "learning_rate": 0.00019818264512702342, "loss": 5.1531, "step": 3281 }, { "epoch": 0.15364269413072737, "grad_norm": 1.2890625, "learning_rate": 0.0001981815371455449, "loss": 3.6468, "step": 3282 }, { "epoch": 0.15368950787056002, "grad_norm": 1.3828125, "learning_rate": 0.0001981804288295185, "loss": 4.053, "step": 3283 }, { "epoch": 0.15373632161039266, "grad_norm": 1.3125, "learning_rate": 0.0001981793201789479, "loss": 3.666, "step": 3284 }, { "epoch": 0.15378313535022528, "grad_norm": 2.046875, "learning_rate": 0.00019817821119383702, "loss": 3.6824, "step": 3285 }, { "epoch": 0.15382994909005793, "grad_norm": 2.03125, "learning_rate": 0.0001981771018741895, "loss": 3.9513, "step": 3286 }, { "epoch": 0.15387676282989057, "grad_norm": 1.3359375, "learning_rate": 0.00019817599222000923, "loss": 4.0539, "step": 3287 }, { "epoch": 0.15392357656972322, "grad_norm": 1.5, "learning_rate": 0.00019817488223129989, "loss": 4.0053, "step": 3288 }, { "epoch": 0.15397039030955587, "grad_norm": 1.2109375, "learning_rate": 0.00019817377190806535, "loss": 3.5165, "step": 3289 }, { "epoch": 0.15401720404938848, "grad_norm": 1.3671875, "learning_rate": 0.00019817266125030932, "loss": 3.5568, "step": 3290 }, { "epoch": 0.15406401778922113, "grad_norm": 1.8359375, "learning_rate": 0.00019817155025803564, "loss": 3.6226, "step": 3291 }, { "epoch": 0.15411083152905378, "grad_norm": 1.375, "learning_rate": 0.0001981704389312481, "loss": 4.2739, "step": 3292 }, { "epoch": 0.15415764526888642, "grad_norm": 1.4609375, "learning_rate": 0.00019816932726995044, "loss": 3.5777, "step": 3293 }, { "epoch": 0.15420445900871907, "grad_norm": 1.2421875, "learning_rate": 0.00019816821527414647, "loss": 3.7949, "step": 3294 }, { "epoch": 0.1542512727485517, "grad_norm": 1.3828125, "learning_rate": 0.00019816710294384, "loss": 3.9422, "step": 3295 }, { "epoch": 0.15429808648838433, "grad_norm": 1.375, "learning_rate": 0.00019816599027903478, "loss": 3.2014, "step": 3296 }, { "epoch": 0.15434490022821698, "grad_norm": 1.46875, "learning_rate": 0.00019816487727973464, "loss": 3.2703, "step": 3297 }, { "epoch": 0.15439171396804963, "grad_norm": 1.5, "learning_rate": 0.00019816376394594335, "loss": 3.9529, "step": 3298 }, { "epoch": 0.15443852770788227, "grad_norm": 1.328125, "learning_rate": 0.0001981626502776647, "loss": 3.5726, "step": 3299 }, { "epoch": 0.1544853414477149, "grad_norm": 1.34375, "learning_rate": 0.00019816153627490247, "loss": 3.3926, "step": 3300 }, { "epoch": 0.15453215518754754, "grad_norm": 1.4140625, "learning_rate": 0.00019816042193766052, "loss": 3.966, "step": 3301 }, { "epoch": 0.15457896892738018, "grad_norm": 1.6328125, "learning_rate": 0.00019815930726594261, "loss": 3.8549, "step": 3302 }, { "epoch": 0.15462578266721283, "grad_norm": 1.65625, "learning_rate": 0.0001981581922597525, "loss": 4.1295, "step": 3303 }, { "epoch": 0.15467259640704548, "grad_norm": 1.84375, "learning_rate": 0.00019815707691909405, "loss": 3.9381, "step": 3304 }, { "epoch": 0.15471941014687812, "grad_norm": 1.1640625, "learning_rate": 0.00019815596124397102, "loss": 3.4882, "step": 3305 }, { "epoch": 0.15476622388671074, "grad_norm": 1.3984375, "learning_rate": 0.00019815484523438723, "loss": 3.9655, "step": 3306 }, { "epoch": 0.1548130376265434, "grad_norm": 1.3359375, "learning_rate": 0.0001981537288903465, "loss": 3.922, "step": 3307 }, { "epoch": 0.15485985136637603, "grad_norm": 1.1171875, "learning_rate": 0.00019815261221185258, "loss": 3.7054, "step": 3308 }, { "epoch": 0.15490666510620868, "grad_norm": 1.078125, "learning_rate": 0.0001981514951989093, "loss": 2.716, "step": 3309 }, { "epoch": 0.15495347884604133, "grad_norm": 1.5390625, "learning_rate": 0.00019815037785152052, "loss": 4.3684, "step": 3310 }, { "epoch": 0.15500029258587394, "grad_norm": 2.0625, "learning_rate": 0.00019814926016968997, "loss": 4.0704, "step": 3311 }, { "epoch": 0.1550471063257066, "grad_norm": 2.265625, "learning_rate": 0.00019814814215342148, "loss": 3.8368, "step": 3312 }, { "epoch": 0.15509392006553924, "grad_norm": 1.4609375, "learning_rate": 0.0001981470238027189, "loss": 3.647, "step": 3313 }, { "epoch": 0.15514073380537188, "grad_norm": 1.625, "learning_rate": 0.00019814590511758596, "loss": 4.2971, "step": 3314 }, { "epoch": 0.15518754754520453, "grad_norm": 1.65625, "learning_rate": 0.00019814478609802655, "loss": 3.5743, "step": 3315 }, { "epoch": 0.15523436128503715, "grad_norm": 1.3984375, "learning_rate": 0.0001981436667440445, "loss": 3.8317, "step": 3316 }, { "epoch": 0.1552811750248698, "grad_norm": 1.328125, "learning_rate": 0.0001981425470556435, "loss": 3.8547, "step": 3317 }, { "epoch": 0.15532798876470244, "grad_norm": 1.1171875, "learning_rate": 0.0001981414270328275, "loss": 3.4932, "step": 3318 }, { "epoch": 0.1553748025045351, "grad_norm": 1.703125, "learning_rate": 0.00019814030667560025, "loss": 4.0587, "step": 3319 }, { "epoch": 0.15542161624436773, "grad_norm": 2.453125, "learning_rate": 0.00019813918598396554, "loss": 3.9, "step": 3320 }, { "epoch": 0.15546842998420035, "grad_norm": 1.3671875, "learning_rate": 0.00019813806495792727, "loss": 3.9856, "step": 3321 }, { "epoch": 0.155515243724033, "grad_norm": 1.109375, "learning_rate": 0.0001981369435974892, "loss": 3.3355, "step": 3322 }, { "epoch": 0.15556205746386564, "grad_norm": 1.2109375, "learning_rate": 0.00019813582190265517, "loss": 3.8441, "step": 3323 }, { "epoch": 0.1556088712036983, "grad_norm": 1.1484375, "learning_rate": 0.000198134699873429, "loss": 3.7632, "step": 3324 }, { "epoch": 0.15565568494353094, "grad_norm": 1.3984375, "learning_rate": 0.0001981335775098145, "loss": 3.5896, "step": 3325 }, { "epoch": 0.15570249868336355, "grad_norm": 2.53125, "learning_rate": 0.00019813245481181552, "loss": 3.6636, "step": 3326 }, { "epoch": 0.1557493124231962, "grad_norm": 1.4921875, "learning_rate": 0.00019813133177943587, "loss": 3.7984, "step": 3327 }, { "epoch": 0.15579612616302885, "grad_norm": 1.25, "learning_rate": 0.00019813020841267937, "loss": 3.8176, "step": 3328 }, { "epoch": 0.1558429399028615, "grad_norm": 1.15625, "learning_rate": 0.00019812908471154988, "loss": 3.6461, "step": 3329 }, { "epoch": 0.15588975364269414, "grad_norm": 1.5234375, "learning_rate": 0.0001981279606760512, "loss": 3.2664, "step": 3330 }, { "epoch": 0.15593656738252676, "grad_norm": 1.0078125, "learning_rate": 0.00019812683630618714, "loss": 3.829, "step": 3331 }, { "epoch": 0.1559833811223594, "grad_norm": 1.84375, "learning_rate": 0.00019812571160196155, "loss": 4.3331, "step": 3332 }, { "epoch": 0.15603019486219205, "grad_norm": 1.671875, "learning_rate": 0.00019812458656337832, "loss": 4.0278, "step": 3333 }, { "epoch": 0.1560770086020247, "grad_norm": 1.6171875, "learning_rate": 0.0001981234611904412, "loss": 4.1921, "step": 3334 }, { "epoch": 0.15612382234185734, "grad_norm": 1.453125, "learning_rate": 0.00019812233548315407, "loss": 3.5001, "step": 3335 }, { "epoch": 0.15617063608169, "grad_norm": 1.2734375, "learning_rate": 0.00019812120944152076, "loss": 3.9554, "step": 3336 }, { "epoch": 0.1562174498215226, "grad_norm": 1.5078125, "learning_rate": 0.0001981200830655451, "loss": 3.5774, "step": 3337 }, { "epoch": 0.15626426356135525, "grad_norm": 1.125, "learning_rate": 0.0001981189563552309, "loss": 3.7562, "step": 3338 }, { "epoch": 0.1563110773011879, "grad_norm": 1.21875, "learning_rate": 0.00019811782931058205, "loss": 3.6046, "step": 3339 }, { "epoch": 0.15635789104102055, "grad_norm": 1.125, "learning_rate": 0.00019811670193160236, "loss": 3.8962, "step": 3340 }, { "epoch": 0.1564047047808532, "grad_norm": 1.0625, "learning_rate": 0.00019811557421829567, "loss": 3.7299, "step": 3341 }, { "epoch": 0.1564515185206858, "grad_norm": 1.046875, "learning_rate": 0.00019811444617066586, "loss": 6.089, "step": 3342 }, { "epoch": 0.15649833226051846, "grad_norm": 1.0546875, "learning_rate": 0.0001981133177887167, "loss": 2.8053, "step": 3343 }, { "epoch": 0.1565451460003511, "grad_norm": 1.34375, "learning_rate": 0.00019811218907245213, "loss": 3.994, "step": 3344 }, { "epoch": 0.15659195974018375, "grad_norm": 1.3671875, "learning_rate": 0.00019811106002187593, "loss": 3.9333, "step": 3345 }, { "epoch": 0.1566387734800164, "grad_norm": 1.2890625, "learning_rate": 0.00019810993063699197, "loss": 3.9986, "step": 3346 }, { "epoch": 0.15668558721984902, "grad_norm": 1.5546875, "learning_rate": 0.0001981088009178041, "loss": 3.8131, "step": 3347 }, { "epoch": 0.15673240095968166, "grad_norm": 2.21875, "learning_rate": 0.00019810767086431613, "loss": 4.0845, "step": 3348 }, { "epoch": 0.1567792146995143, "grad_norm": 1.2421875, "learning_rate": 0.00019810654047653197, "loss": 3.6994, "step": 3349 }, { "epoch": 0.15682602843934695, "grad_norm": 1.2265625, "learning_rate": 0.00019810540975445542, "loss": 3.9757, "step": 3350 }, { "epoch": 0.1568728421791796, "grad_norm": 1.4921875, "learning_rate": 0.00019810427869809038, "loss": 3.4727, "step": 3351 }, { "epoch": 0.15691965591901222, "grad_norm": 1.3671875, "learning_rate": 0.00019810314730744067, "loss": 4.0044, "step": 3352 }, { "epoch": 0.15696646965884486, "grad_norm": 1.40625, "learning_rate": 0.00019810201558251016, "loss": 3.5646, "step": 3353 }, { "epoch": 0.1570132833986775, "grad_norm": 1.171875, "learning_rate": 0.0001981008835233027, "loss": 4.225, "step": 3354 }, { "epoch": 0.15706009713851016, "grad_norm": 1.7734375, "learning_rate": 0.00019809975112982216, "loss": 3.6713, "step": 3355 }, { "epoch": 0.1571069108783428, "grad_norm": 1.109375, "learning_rate": 0.00019809861840207237, "loss": 3.7645, "step": 3356 }, { "epoch": 0.15715372461817542, "grad_norm": 1.0703125, "learning_rate": 0.00019809748534005723, "loss": 3.5805, "step": 3357 }, { "epoch": 0.15720053835800807, "grad_norm": 1.4140625, "learning_rate": 0.00019809635194378055, "loss": 3.8332, "step": 3358 }, { "epoch": 0.15724735209784071, "grad_norm": 1.40625, "learning_rate": 0.00019809521821324624, "loss": 3.7875, "step": 3359 }, { "epoch": 0.15729416583767336, "grad_norm": 1.2734375, "learning_rate": 0.00019809408414845817, "loss": 3.3942, "step": 3360 }, { "epoch": 0.157340979577506, "grad_norm": 1.2265625, "learning_rate": 0.00019809294974942011, "loss": 3.6757, "step": 3361 }, { "epoch": 0.15738779331733863, "grad_norm": 1.265625, "learning_rate": 0.00019809181501613605, "loss": 3.3934, "step": 3362 }, { "epoch": 0.15743460705717127, "grad_norm": 1.09375, "learning_rate": 0.00019809067994860978, "loss": 4.4703, "step": 3363 }, { "epoch": 0.15748142079700392, "grad_norm": 1.65625, "learning_rate": 0.00019808954454684518, "loss": 4.1078, "step": 3364 }, { "epoch": 0.15752823453683656, "grad_norm": 1.234375, "learning_rate": 0.00019808840881084617, "loss": 3.4954, "step": 3365 }, { "epoch": 0.1575750482766692, "grad_norm": 1.25, "learning_rate": 0.00019808727274061653, "loss": 3.5262, "step": 3366 }, { "epoch": 0.15762186201650186, "grad_norm": 1.125, "learning_rate": 0.00019808613633616018, "loss": 5.415, "step": 3367 }, { "epoch": 0.15766867575633448, "grad_norm": 1.703125, "learning_rate": 0.00019808499959748102, "loss": 3.6895, "step": 3368 }, { "epoch": 0.15771548949616712, "grad_norm": 1.4375, "learning_rate": 0.0001980838625245829, "loss": 3.6276, "step": 3369 }, { "epoch": 0.15776230323599977, "grad_norm": 1.265625, "learning_rate": 0.00019808272511746963, "loss": 3.6132, "step": 3370 }, { "epoch": 0.15780911697583241, "grad_norm": 1.03125, "learning_rate": 0.0001980815873761452, "loss": 3.7869, "step": 3371 }, { "epoch": 0.15785593071566506, "grad_norm": 1.484375, "learning_rate": 0.00019808044930061337, "loss": 3.5918, "step": 3372 }, { "epoch": 0.15790274445549768, "grad_norm": 1.2890625, "learning_rate": 0.00019807931089087816, "loss": 3.679, "step": 3373 }, { "epoch": 0.15794955819533033, "grad_norm": 1.0703125, "learning_rate": 0.00019807817214694332, "loss": 3.8293, "step": 3374 }, { "epoch": 0.15799637193516297, "grad_norm": 1.171875, "learning_rate": 0.00019807703306881278, "loss": 3.4635, "step": 3375 }, { "epoch": 0.15804318567499562, "grad_norm": 1.28125, "learning_rate": 0.00019807589365649042, "loss": 3.4371, "step": 3376 }, { "epoch": 0.15808999941482826, "grad_norm": 1.328125, "learning_rate": 0.00019807475390998011, "loss": 3.6385, "step": 3377 }, { "epoch": 0.15813681315466088, "grad_norm": 1.3046875, "learning_rate": 0.00019807361382928577, "loss": 3.5396, "step": 3378 }, { "epoch": 0.15818362689449353, "grad_norm": 1.5078125, "learning_rate": 0.00019807247341441125, "loss": 3.7973, "step": 3379 }, { "epoch": 0.15823044063432617, "grad_norm": 0.94921875, "learning_rate": 0.00019807133266536045, "loss": 4.6212, "step": 3380 }, { "epoch": 0.15827725437415882, "grad_norm": 1.2578125, "learning_rate": 0.00019807019158213723, "loss": 3.8049, "step": 3381 }, { "epoch": 0.15832406811399147, "grad_norm": 1.1171875, "learning_rate": 0.00019806905016474553, "loss": 3.8021, "step": 3382 }, { "epoch": 0.15837088185382409, "grad_norm": 1.3359375, "learning_rate": 0.0001980679084131892, "loss": 3.9538, "step": 3383 }, { "epoch": 0.15841769559365673, "grad_norm": 2.03125, "learning_rate": 0.00019806676632747213, "loss": 3.8122, "step": 3384 }, { "epoch": 0.15846450933348938, "grad_norm": 1.2734375, "learning_rate": 0.00019806562390759823, "loss": 3.6521, "step": 3385 }, { "epoch": 0.15851132307332202, "grad_norm": 1.734375, "learning_rate": 0.00019806448115357138, "loss": 3.442, "step": 3386 }, { "epoch": 0.15855813681315467, "grad_norm": 1.375, "learning_rate": 0.00019806333806539548, "loss": 3.5119, "step": 3387 }, { "epoch": 0.1586049505529873, "grad_norm": 1.359375, "learning_rate": 0.00019806219464307443, "loss": 3.9489, "step": 3388 }, { "epoch": 0.15865176429281994, "grad_norm": 1.578125, "learning_rate": 0.00019806105088661212, "loss": 3.191, "step": 3389 }, { "epoch": 0.15869857803265258, "grad_norm": 1.6640625, "learning_rate": 0.00019805990679601246, "loss": 3.9119, "step": 3390 }, { "epoch": 0.15874539177248523, "grad_norm": 1.1171875, "learning_rate": 0.00019805876237127929, "loss": 3.4893, "step": 3391 }, { "epoch": 0.15879220551231787, "grad_norm": 1.28125, "learning_rate": 0.0001980576176124166, "loss": 3.435, "step": 3392 }, { "epoch": 0.1588390192521505, "grad_norm": 1.390625, "learning_rate": 0.00019805647251942824, "loss": 3.2937, "step": 3393 }, { "epoch": 0.15888583299198314, "grad_norm": 1.3046875, "learning_rate": 0.0001980553270923181, "loss": 3.4949, "step": 3394 }, { "epoch": 0.15893264673181579, "grad_norm": 1.265625, "learning_rate": 0.00019805418133109012, "loss": 3.5838, "step": 3395 }, { "epoch": 0.15897946047164843, "grad_norm": 2.65625, "learning_rate": 0.00019805303523574817, "loss": 4.3372, "step": 3396 }, { "epoch": 0.15902627421148108, "grad_norm": 1.4140625, "learning_rate": 0.00019805188880629617, "loss": 3.2519, "step": 3397 }, { "epoch": 0.15907308795131372, "grad_norm": 1.2578125, "learning_rate": 0.00019805074204273804, "loss": 2.8968, "step": 3398 }, { "epoch": 0.15911990169114634, "grad_norm": 1.375, "learning_rate": 0.00019804959494507766, "loss": 4.0469, "step": 3399 }, { "epoch": 0.159166715430979, "grad_norm": 1.0859375, "learning_rate": 0.00019804844751331895, "loss": 3.4352, "step": 3400 }, { "epoch": 0.15921352917081164, "grad_norm": 1.6328125, "learning_rate": 0.00019804729974746583, "loss": 3.6132, "step": 3401 }, { "epoch": 0.15926034291064428, "grad_norm": 1.28125, "learning_rate": 0.00019804615164752224, "loss": 3.8497, "step": 3402 }, { "epoch": 0.15930715665047693, "grad_norm": 1.2265625, "learning_rate": 0.000198045003213492, "loss": 3.741, "step": 3403 }, { "epoch": 0.15935397039030955, "grad_norm": 1.484375, "learning_rate": 0.0001980438544453791, "loss": 3.5574, "step": 3404 }, { "epoch": 0.1594007841301422, "grad_norm": 1.4140625, "learning_rate": 0.00019804270534318742, "loss": 3.5039, "step": 3405 }, { "epoch": 0.15944759786997484, "grad_norm": 1.4453125, "learning_rate": 0.0001980415559069209, "loss": 4.0975, "step": 3406 }, { "epoch": 0.15949441160980748, "grad_norm": 1.515625, "learning_rate": 0.00019804040613658345, "loss": 3.7668, "step": 3407 }, { "epoch": 0.15954122534964013, "grad_norm": 1.3203125, "learning_rate": 0.000198039256032179, "loss": 4.0191, "step": 3408 }, { "epoch": 0.15958803908947275, "grad_norm": 1.1015625, "learning_rate": 0.00019803810559371142, "loss": 3.6452, "step": 3409 }, { "epoch": 0.1596348528293054, "grad_norm": 1.4140625, "learning_rate": 0.00019803695482118467, "loss": 3.2861, "step": 3410 }, { "epoch": 0.15968166656913804, "grad_norm": 1.5546875, "learning_rate": 0.00019803580371460265, "loss": 3.784, "step": 3411 }, { "epoch": 0.1597284803089707, "grad_norm": 1.1640625, "learning_rate": 0.00019803465227396933, "loss": 3.5387, "step": 3412 }, { "epoch": 0.15977529404880333, "grad_norm": 1.5, "learning_rate": 0.0001980335004992886, "loss": 3.9164, "step": 3413 }, { "epoch": 0.15982210778863595, "grad_norm": 1.4375, "learning_rate": 0.00019803234839056434, "loss": 3.4975, "step": 3414 }, { "epoch": 0.1598689215284686, "grad_norm": 2.0, "learning_rate": 0.00019803119594780056, "loss": 4.1753, "step": 3415 }, { "epoch": 0.15991573526830125, "grad_norm": 2.0, "learning_rate": 0.00019803004317100112, "loss": 3.7993, "step": 3416 }, { "epoch": 0.1599625490081339, "grad_norm": 1.3046875, "learning_rate": 0.00019802889006016998, "loss": 3.8833, "step": 3417 }, { "epoch": 0.16000936274796654, "grad_norm": 1.34375, "learning_rate": 0.00019802773661531107, "loss": 3.8278, "step": 3418 }, { "epoch": 0.16005617648779916, "grad_norm": 1.09375, "learning_rate": 0.0001980265828364283, "loss": 2.9535, "step": 3419 }, { "epoch": 0.1601029902276318, "grad_norm": 1.4375, "learning_rate": 0.00019802542872352565, "loss": 3.5564, "step": 3420 }, { "epoch": 0.16014980396746445, "grad_norm": 1.3359375, "learning_rate": 0.000198024274276607, "loss": 3.685, "step": 3421 }, { "epoch": 0.1601966177072971, "grad_norm": 1.2109375, "learning_rate": 0.0001980231194956763, "loss": 3.4893, "step": 3422 }, { "epoch": 0.16024343144712974, "grad_norm": 1.3984375, "learning_rate": 0.00019802196438073748, "loss": 3.4629, "step": 3423 }, { "epoch": 0.16029024518696236, "grad_norm": 1.0625, "learning_rate": 0.0001980208089317945, "loss": 3.8318, "step": 3424 }, { "epoch": 0.160337058926795, "grad_norm": 1.3828125, "learning_rate": 0.00019801965314885124, "loss": 3.8382, "step": 3425 }, { "epoch": 0.16038387266662765, "grad_norm": 1.5703125, "learning_rate": 0.00019801849703191172, "loss": 3.6618, "step": 3426 }, { "epoch": 0.1604306864064603, "grad_norm": 1.6640625, "learning_rate": 0.0001980173405809798, "loss": 4.2054, "step": 3427 }, { "epoch": 0.16047750014629295, "grad_norm": 1.46875, "learning_rate": 0.00019801618379605948, "loss": 3.7107, "step": 3428 }, { "epoch": 0.1605243138861256, "grad_norm": 1.3984375, "learning_rate": 0.0001980150266771547, "loss": 3.5298, "step": 3429 }, { "epoch": 0.1605711276259582, "grad_norm": 1.640625, "learning_rate": 0.00019801386922426933, "loss": 3.8592, "step": 3430 }, { "epoch": 0.16061794136579086, "grad_norm": 1.265625, "learning_rate": 0.00019801271143740743, "loss": 3.7402, "step": 3431 }, { "epoch": 0.1606647551056235, "grad_norm": 1.265625, "learning_rate": 0.00019801155331657282, "loss": 3.6417, "step": 3432 }, { "epoch": 0.16071156884545615, "grad_norm": 1.5, "learning_rate": 0.00019801039486176954, "loss": 3.5247, "step": 3433 }, { "epoch": 0.1607583825852888, "grad_norm": 1.3515625, "learning_rate": 0.0001980092360730015, "loss": 3.7952, "step": 3434 }, { "epoch": 0.1608051963251214, "grad_norm": 1.0390625, "learning_rate": 0.00019800807695027266, "loss": 3.6101, "step": 3435 }, { "epoch": 0.16085201006495406, "grad_norm": 1.28125, "learning_rate": 0.00019800691749358697, "loss": 5.6164, "step": 3436 }, { "epoch": 0.1608988238047867, "grad_norm": 1.2265625, "learning_rate": 0.00019800575770294833, "loss": 3.8697, "step": 3437 }, { "epoch": 0.16094563754461935, "grad_norm": 1.5078125, "learning_rate": 0.0001980045975783608, "loss": 3.7256, "step": 3438 }, { "epoch": 0.160992451284452, "grad_norm": 1.234375, "learning_rate": 0.00019800343711982822, "loss": 2.9618, "step": 3439 }, { "epoch": 0.16103926502428462, "grad_norm": 1.71875, "learning_rate": 0.00019800227632735457, "loss": 5.2375, "step": 3440 }, { "epoch": 0.16108607876411726, "grad_norm": 1.21875, "learning_rate": 0.0001980011152009439, "loss": 3.4084, "step": 3441 }, { "epoch": 0.1611328925039499, "grad_norm": 1.671875, "learning_rate": 0.00019799995374060006, "loss": 3.689, "step": 3442 }, { "epoch": 0.16117970624378256, "grad_norm": 1.46875, "learning_rate": 0.00019799879194632702, "loss": 3.715, "step": 3443 }, { "epoch": 0.1612265199836152, "grad_norm": 1.21875, "learning_rate": 0.0001979976298181288, "loss": 3.6515, "step": 3444 }, { "epoch": 0.16127333372344782, "grad_norm": 1.703125, "learning_rate": 0.0001979964673560093, "loss": 5.2128, "step": 3445 }, { "epoch": 0.16132014746328047, "grad_norm": 1.3203125, "learning_rate": 0.0001979953045599725, "loss": 3.274, "step": 3446 }, { "epoch": 0.1613669612031131, "grad_norm": 1.234375, "learning_rate": 0.00019799414143002237, "loss": 3.5952, "step": 3447 }, { "epoch": 0.16141377494294576, "grad_norm": 1.390625, "learning_rate": 0.00019799297796616286, "loss": 4.0041, "step": 3448 }, { "epoch": 0.1614605886827784, "grad_norm": 1.625, "learning_rate": 0.00019799181416839794, "loss": 3.2469, "step": 3449 }, { "epoch": 0.16150740242261102, "grad_norm": 1.2265625, "learning_rate": 0.00019799065003673158, "loss": 3.4625, "step": 3450 }, { "epoch": 0.16155421616244367, "grad_norm": 1.171875, "learning_rate": 0.00019798948557116778, "loss": 3.5977, "step": 3451 }, { "epoch": 0.16160102990227632, "grad_norm": 1.40625, "learning_rate": 0.00019798832077171042, "loss": 3.3871, "step": 3452 }, { "epoch": 0.16164784364210896, "grad_norm": 1.125, "learning_rate": 0.00019798715563836355, "loss": 3.9182, "step": 3453 }, { "epoch": 0.1616946573819416, "grad_norm": 1.65625, "learning_rate": 0.0001979859901711311, "loss": 3.7766, "step": 3454 }, { "epoch": 0.16174147112177423, "grad_norm": 2.125, "learning_rate": 0.00019798482437001705, "loss": 3.234, "step": 3455 }, { "epoch": 0.16178828486160687, "grad_norm": 1.53125, "learning_rate": 0.0001979836582350254, "loss": 3.8275, "step": 3456 }, { "epoch": 0.16183509860143952, "grad_norm": 1.1484375, "learning_rate": 0.00019798249176616008, "loss": 3.692, "step": 3457 }, { "epoch": 0.16188191234127217, "grad_norm": 1.4375, "learning_rate": 0.00019798132496342508, "loss": 4.0808, "step": 3458 }, { "epoch": 0.1619287260811048, "grad_norm": 1.15625, "learning_rate": 0.00019798015782682436, "loss": 3.2866, "step": 3459 }, { "epoch": 0.16197553982093746, "grad_norm": 1.9296875, "learning_rate": 0.00019797899035636194, "loss": 3.8865, "step": 3460 }, { "epoch": 0.16202235356077008, "grad_norm": 1.3828125, "learning_rate": 0.00019797782255204178, "loss": 3.875, "step": 3461 }, { "epoch": 0.16206916730060272, "grad_norm": 1.6796875, "learning_rate": 0.00019797665441386784, "loss": 4.0673, "step": 3462 }, { "epoch": 0.16211598104043537, "grad_norm": 1.6640625, "learning_rate": 0.0001979754859418441, "loss": 3.9018, "step": 3463 }, { "epoch": 0.16216279478026802, "grad_norm": 1.4609375, "learning_rate": 0.00019797431713597458, "loss": 3.3013, "step": 3464 }, { "epoch": 0.16220960852010066, "grad_norm": 1.1640625, "learning_rate": 0.00019797314799626324, "loss": 3.9179, "step": 3465 }, { "epoch": 0.16225642225993328, "grad_norm": 1.7109375, "learning_rate": 0.00019797197852271405, "loss": 3.5285, "step": 3466 }, { "epoch": 0.16230323599976593, "grad_norm": 1.21875, "learning_rate": 0.000197970808715331, "loss": 4.1572, "step": 3467 }, { "epoch": 0.16235004973959857, "grad_norm": 1.671875, "learning_rate": 0.0001979696385741181, "loss": 4.0715, "step": 3468 }, { "epoch": 0.16239686347943122, "grad_norm": 1.484375, "learning_rate": 0.0001979684680990793, "loss": 3.7443, "step": 3469 }, { "epoch": 0.16244367721926387, "grad_norm": 1.5, "learning_rate": 0.00019796729729021863, "loss": 3.4897, "step": 3470 }, { "epoch": 0.16249049095909648, "grad_norm": 1.1953125, "learning_rate": 0.00019796612614754003, "loss": 3.6386, "step": 3471 }, { "epoch": 0.16253730469892913, "grad_norm": 2.265625, "learning_rate": 0.00019796495467104755, "loss": 3.3776, "step": 3472 }, { "epoch": 0.16258411843876178, "grad_norm": 1.3125, "learning_rate": 0.00019796378286074512, "loss": 3.2644, "step": 3473 }, { "epoch": 0.16263093217859442, "grad_norm": 1.375, "learning_rate": 0.00019796261071663678, "loss": 3.8793, "step": 3474 }, { "epoch": 0.16267774591842707, "grad_norm": 1.515625, "learning_rate": 0.00019796143823872647, "loss": 3.4045, "step": 3475 }, { "epoch": 0.1627245596582597, "grad_norm": 1.0859375, "learning_rate": 0.0001979602654270183, "loss": 3.7774, "step": 3476 }, { "epoch": 0.16277137339809233, "grad_norm": 1.34375, "learning_rate": 0.00019795909228151613, "loss": 3.5897, "step": 3477 }, { "epoch": 0.16281818713792498, "grad_norm": 1.5546875, "learning_rate": 0.00019795791880222402, "loss": 4.1555, "step": 3478 }, { "epoch": 0.16286500087775763, "grad_norm": 1.3125, "learning_rate": 0.00019795674498914595, "loss": 4.0774, "step": 3479 }, { "epoch": 0.16291181461759027, "grad_norm": 1.5234375, "learning_rate": 0.00019795557084228594, "loss": 3.7089, "step": 3480 }, { "epoch": 0.1629586283574229, "grad_norm": 1.25, "learning_rate": 0.000197954396361648, "loss": 4.098, "step": 3481 }, { "epoch": 0.16300544209725554, "grad_norm": 1.875, "learning_rate": 0.00019795322154723613, "loss": 4.1696, "step": 3482 }, { "epoch": 0.16305225583708818, "grad_norm": 1.375, "learning_rate": 0.0001979520463990543, "loss": 4.2295, "step": 3483 }, { "epoch": 0.16309906957692083, "grad_norm": 1.234375, "learning_rate": 0.00019795087091710657, "loss": 4.1438, "step": 3484 }, { "epoch": 0.16314588331675348, "grad_norm": 1.375, "learning_rate": 0.00019794969510139685, "loss": 3.5327, "step": 3485 }, { "epoch": 0.1631926970565861, "grad_norm": 1.4765625, "learning_rate": 0.00019794851895192924, "loss": 3.6089, "step": 3486 }, { "epoch": 0.16323951079641874, "grad_norm": 1.2265625, "learning_rate": 0.0001979473424687077, "loss": 3.6074, "step": 3487 }, { "epoch": 0.1632863245362514, "grad_norm": 1.453125, "learning_rate": 0.00019794616565173626, "loss": 3.9904, "step": 3488 }, { "epoch": 0.16333313827608403, "grad_norm": 1.171875, "learning_rate": 0.00019794498850101891, "loss": 3.7518, "step": 3489 }, { "epoch": 0.16337995201591668, "grad_norm": 1.140625, "learning_rate": 0.00019794381101655967, "loss": 3.8204, "step": 3490 }, { "epoch": 0.16342676575574933, "grad_norm": 2.0625, "learning_rate": 0.00019794263319836258, "loss": 3.789, "step": 3491 }, { "epoch": 0.16347357949558194, "grad_norm": 1.34375, "learning_rate": 0.00019794145504643161, "loss": 3.5854, "step": 3492 }, { "epoch": 0.1635203932354146, "grad_norm": 1.3046875, "learning_rate": 0.0001979402765607708, "loss": 3.8883, "step": 3493 }, { "epoch": 0.16356720697524724, "grad_norm": 1.234375, "learning_rate": 0.00019793909774138416, "loss": 3.3835, "step": 3494 }, { "epoch": 0.16361402071507988, "grad_norm": 1.3828125, "learning_rate": 0.0001979379185882757, "loss": 3.762, "step": 3495 }, { "epoch": 0.16366083445491253, "grad_norm": 1.4296875, "learning_rate": 0.00019793673910144946, "loss": 3.8376, "step": 3496 }, { "epoch": 0.16370764819474515, "grad_norm": 1.3203125, "learning_rate": 0.00019793555928090942, "loss": 3.8332, "step": 3497 }, { "epoch": 0.1637544619345778, "grad_norm": 1.203125, "learning_rate": 0.00019793437912665965, "loss": 3.8375, "step": 3498 }, { "epoch": 0.16380127567441044, "grad_norm": 1.9765625, "learning_rate": 0.0001979331986387041, "loss": 4.1406, "step": 3499 }, { "epoch": 0.1638480894142431, "grad_norm": 1.640625, "learning_rate": 0.00019793201781704688, "loss": 3.6988, "step": 3500 }, { "epoch": 0.16389490315407573, "grad_norm": 1.453125, "learning_rate": 0.00019793083666169196, "loss": 3.6081, "step": 3501 }, { "epoch": 0.16394171689390835, "grad_norm": 1.9453125, "learning_rate": 0.00019792965517264336, "loss": 3.7922, "step": 3502 }, { "epoch": 0.163988530633741, "grad_norm": 1.1015625, "learning_rate": 0.00019792847334990514, "loss": 3.4, "step": 3503 }, { "epoch": 0.16403534437357364, "grad_norm": 1.1953125, "learning_rate": 0.0001979272911934813, "loss": 3.6535, "step": 3504 }, { "epoch": 0.1640821581134063, "grad_norm": 1.21875, "learning_rate": 0.00019792610870337588, "loss": 3.7006, "step": 3505 }, { "epoch": 0.16412897185323894, "grad_norm": 1.6953125, "learning_rate": 0.0001979249258795929, "loss": 3.3995, "step": 3506 }, { "epoch": 0.16417578559307155, "grad_norm": 1.640625, "learning_rate": 0.00019792374272213637, "loss": 3.8478, "step": 3507 }, { "epoch": 0.1642225993329042, "grad_norm": 1.3671875, "learning_rate": 0.00019792255923101038, "loss": 3.6326, "step": 3508 }, { "epoch": 0.16426941307273685, "grad_norm": 1.0703125, "learning_rate": 0.0001979213754062189, "loss": 2.957, "step": 3509 }, { "epoch": 0.1643162268125695, "grad_norm": 1.484375, "learning_rate": 0.00019792019124776605, "loss": 3.6148, "step": 3510 }, { "epoch": 0.16436304055240214, "grad_norm": 1.4375, "learning_rate": 0.00019791900675565573, "loss": 3.863, "step": 3511 }, { "epoch": 0.16440985429223476, "grad_norm": 1.078125, "learning_rate": 0.0001979178219298921, "loss": 3.4226, "step": 3512 }, { "epoch": 0.1644566680320674, "grad_norm": 1.40625, "learning_rate": 0.00019791663677047917, "loss": 3.6071, "step": 3513 }, { "epoch": 0.16450348177190005, "grad_norm": 1.1015625, "learning_rate": 0.00019791545127742093, "loss": 3.8069, "step": 3514 }, { "epoch": 0.1645502955117327, "grad_norm": 1.453125, "learning_rate": 0.00019791426545072148, "loss": 3.2825, "step": 3515 }, { "epoch": 0.16459710925156534, "grad_norm": 1.359375, "learning_rate": 0.00019791307929038478, "loss": 3.7882, "step": 3516 }, { "epoch": 0.16464392299139796, "grad_norm": 1.2734375, "learning_rate": 0.00019791189279641495, "loss": 3.5063, "step": 3517 }, { "epoch": 0.1646907367312306, "grad_norm": 1.21875, "learning_rate": 0.000197910705968816, "loss": 3.859, "step": 3518 }, { "epoch": 0.16473755047106325, "grad_norm": 1.3359375, "learning_rate": 0.00019790951880759199, "loss": 3.7045, "step": 3519 }, { "epoch": 0.1647843642108959, "grad_norm": 1.3125, "learning_rate": 0.00019790833131274696, "loss": 3.5166, "step": 3520 }, { "epoch": 0.16483117795072855, "grad_norm": 1.421875, "learning_rate": 0.0001979071434842849, "loss": 3.5638, "step": 3521 }, { "epoch": 0.1648779916905612, "grad_norm": 1.2578125, "learning_rate": 0.00019790595532220997, "loss": 3.5397, "step": 3522 }, { "epoch": 0.1649248054303938, "grad_norm": 1.4296875, "learning_rate": 0.00019790476682652615, "loss": 3.9296, "step": 3523 }, { "epoch": 0.16497161917022646, "grad_norm": 1.2109375, "learning_rate": 0.00019790357799723745, "loss": 3.2861, "step": 3524 }, { "epoch": 0.1650184329100591, "grad_norm": 1.3671875, "learning_rate": 0.00019790238883434798, "loss": 3.6804, "step": 3525 }, { "epoch": 0.16506524664989175, "grad_norm": 1.6171875, "learning_rate": 0.0001979011993378618, "loss": 3.8557, "step": 3526 }, { "epoch": 0.1651120603897244, "grad_norm": 1.4609375, "learning_rate": 0.00019790000950778295, "loss": 4.0416, "step": 3527 }, { "epoch": 0.16515887412955702, "grad_norm": 2.3125, "learning_rate": 0.00019789881934411546, "loss": 4.6316, "step": 3528 }, { "epoch": 0.16520568786938966, "grad_norm": 1.265625, "learning_rate": 0.00019789762884686344, "loss": 3.4974, "step": 3529 }, { "epoch": 0.1652525016092223, "grad_norm": 1.140625, "learning_rate": 0.00019789643801603084, "loss": 3.7349, "step": 3530 }, { "epoch": 0.16529931534905495, "grad_norm": 1.1953125, "learning_rate": 0.00019789524685162181, "loss": 3.851, "step": 3531 }, { "epoch": 0.1653461290888876, "grad_norm": 1.3515625, "learning_rate": 0.0001978940553536404, "loss": 3.9046, "step": 3532 }, { "epoch": 0.16539294282872022, "grad_norm": 1.6796875, "learning_rate": 0.00019789286352209063, "loss": 3.8723, "step": 3533 }, { "epoch": 0.16543975656855286, "grad_norm": 1.125, "learning_rate": 0.00019789167135697664, "loss": 3.2168, "step": 3534 }, { "epoch": 0.1654865703083855, "grad_norm": 1.28125, "learning_rate": 0.0001978904788583024, "loss": 3.8592, "step": 3535 }, { "epoch": 0.16553338404821816, "grad_norm": 1.265625, "learning_rate": 0.00019788928602607202, "loss": 3.6043, "step": 3536 }, { "epoch": 0.1655801977880508, "grad_norm": 1.2265625, "learning_rate": 0.00019788809286028954, "loss": 3.9217, "step": 3537 }, { "epoch": 0.16562701152788342, "grad_norm": 1.46875, "learning_rate": 0.00019788689936095906, "loss": 3.534, "step": 3538 }, { "epoch": 0.16567382526771607, "grad_norm": 1.2265625, "learning_rate": 0.00019788570552808462, "loss": 4.899, "step": 3539 }, { "epoch": 0.16572063900754871, "grad_norm": 1.5234375, "learning_rate": 0.0001978845113616703, "loss": 3.6906, "step": 3540 }, { "epoch": 0.16576745274738136, "grad_norm": 1.7421875, "learning_rate": 0.00019788331686172015, "loss": 4.1079, "step": 3541 }, { "epoch": 0.165814266487214, "grad_norm": 1.8046875, "learning_rate": 0.0001978821220282383, "loss": 3.9288, "step": 3542 }, { "epoch": 0.16586108022704663, "grad_norm": 1.3359375, "learning_rate": 0.00019788092686122871, "loss": 3.7995, "step": 3543 }, { "epoch": 0.16590789396687927, "grad_norm": 1.3671875, "learning_rate": 0.00019787973136069556, "loss": 3.6406, "step": 3544 }, { "epoch": 0.16595470770671192, "grad_norm": 1.421875, "learning_rate": 0.00019787853552664288, "loss": 3.3598, "step": 3545 }, { "epoch": 0.16600152144654456, "grad_norm": 1.5390625, "learning_rate": 0.00019787733935907475, "loss": 3.3176, "step": 3546 }, { "epoch": 0.1660483351863772, "grad_norm": 1.1328125, "learning_rate": 0.00019787614285799525, "loss": 3.4843, "step": 3547 }, { "epoch": 0.16609514892620983, "grad_norm": 1.390625, "learning_rate": 0.00019787494602340844, "loss": 3.5629, "step": 3548 }, { "epoch": 0.16614196266604248, "grad_norm": 2.03125, "learning_rate": 0.0001978737488553184, "loss": 3.837, "step": 3549 }, { "epoch": 0.16618877640587512, "grad_norm": 1.5078125, "learning_rate": 0.00019787255135372923, "loss": 3.1519, "step": 3550 }, { "epoch": 0.16623559014570777, "grad_norm": 1.375, "learning_rate": 0.00019787135351864498, "loss": 3.7211, "step": 3551 }, { "epoch": 0.16628240388554041, "grad_norm": 1.203125, "learning_rate": 0.00019787015535006978, "loss": 3.7548, "step": 3552 }, { "epoch": 0.16632921762537306, "grad_norm": 1.0390625, "learning_rate": 0.00019786895684800767, "loss": 3.2224, "step": 3553 }, { "epoch": 0.16637603136520568, "grad_norm": 1.53125, "learning_rate": 0.00019786775801246276, "loss": 3.4704, "step": 3554 }, { "epoch": 0.16642284510503833, "grad_norm": 1.4453125, "learning_rate": 0.0001978665588434391, "loss": 3.9243, "step": 3555 }, { "epoch": 0.16646965884487097, "grad_norm": 2.671875, "learning_rate": 0.0001978653593409408, "loss": 3.8613, "step": 3556 }, { "epoch": 0.16651647258470362, "grad_norm": 1.359375, "learning_rate": 0.00019786415950497194, "loss": 4.0002, "step": 3557 }, { "epoch": 0.16656328632453626, "grad_norm": 1.296875, "learning_rate": 0.00019786295933553664, "loss": 3.4295, "step": 3558 }, { "epoch": 0.16661010006436888, "grad_norm": 1.96875, "learning_rate": 0.00019786175883263895, "loss": 3.5811, "step": 3559 }, { "epoch": 0.16665691380420153, "grad_norm": 1.8046875, "learning_rate": 0.00019786055799628297, "loss": 3.2908, "step": 3560 }, { "epoch": 0.16670372754403417, "grad_norm": 1.6875, "learning_rate": 0.0001978593568264728, "loss": 3.7092, "step": 3561 }, { "epoch": 0.16675054128386682, "grad_norm": 1.03125, "learning_rate": 0.00019785815532321252, "loss": 4.0977, "step": 3562 }, { "epoch": 0.16679735502369947, "grad_norm": 1.3125, "learning_rate": 0.00019785695348650625, "loss": 3.7565, "step": 3563 }, { "epoch": 0.16684416876353209, "grad_norm": 1.15625, "learning_rate": 0.00019785575131635804, "loss": 3.6591, "step": 3564 }, { "epoch": 0.16689098250336473, "grad_norm": 1.4375, "learning_rate": 0.00019785454881277202, "loss": 3.8473, "step": 3565 }, { "epoch": 0.16693779624319738, "grad_norm": 1.3046875, "learning_rate": 0.00019785334597575228, "loss": 3.5466, "step": 3566 }, { "epoch": 0.16698460998303002, "grad_norm": 1.2578125, "learning_rate": 0.00019785214280530292, "loss": 3.9222, "step": 3567 }, { "epoch": 0.16703142372286267, "grad_norm": 1.1328125, "learning_rate": 0.00019785093930142807, "loss": 3.5551, "step": 3568 }, { "epoch": 0.1670782374626953, "grad_norm": 1.4921875, "learning_rate": 0.00019784973546413176, "loss": 3.9799, "step": 3569 }, { "epoch": 0.16712505120252794, "grad_norm": 1.171875, "learning_rate": 0.00019784853129341815, "loss": 3.5822, "step": 3570 }, { "epoch": 0.16717186494236058, "grad_norm": 1.2578125, "learning_rate": 0.00019784732678929134, "loss": 3.8784, "step": 3571 }, { "epoch": 0.16721867868219323, "grad_norm": 1.5390625, "learning_rate": 0.0001978461219517554, "loss": 3.8299, "step": 3572 }, { "epoch": 0.16726549242202587, "grad_norm": 2.0625, "learning_rate": 0.00019784491678081448, "loss": 3.8575, "step": 3573 }, { "epoch": 0.1673123061618585, "grad_norm": 1.1484375, "learning_rate": 0.00019784371127647263, "loss": 3.5653, "step": 3574 }, { "epoch": 0.16735911990169114, "grad_norm": 0.9921875, "learning_rate": 0.00019784250543873402, "loss": 3.8433, "step": 3575 }, { "epoch": 0.16740593364152379, "grad_norm": 1.421875, "learning_rate": 0.00019784129926760268, "loss": 3.5591, "step": 3576 }, { "epoch": 0.16745274738135643, "grad_norm": 2.09375, "learning_rate": 0.0001978400927630828, "loss": 4.1046, "step": 3577 }, { "epoch": 0.16749956112118908, "grad_norm": 1.1875, "learning_rate": 0.00019783888592517847, "loss": 3.7778, "step": 3578 }, { "epoch": 0.1675463748610217, "grad_norm": 1.1875, "learning_rate": 0.00019783767875389377, "loss": 3.9864, "step": 3579 }, { "epoch": 0.16759318860085434, "grad_norm": 1.15625, "learning_rate": 0.00019783647124923284, "loss": 3.8753, "step": 3580 }, { "epoch": 0.167640002340687, "grad_norm": 1.3125, "learning_rate": 0.0001978352634111998, "loss": 3.7861, "step": 3581 }, { "epoch": 0.16768681608051964, "grad_norm": 1.3359375, "learning_rate": 0.00019783405523979877, "loss": 3.731, "step": 3582 }, { "epoch": 0.16773362982035228, "grad_norm": 1.6640625, "learning_rate": 0.0001978328467350338, "loss": 3.4946, "step": 3583 }, { "epoch": 0.16778044356018493, "grad_norm": 1.8046875, "learning_rate": 0.0001978316378969091, "loss": 3.5619, "step": 3584 }, { "epoch": 0.16782725730001755, "grad_norm": 1.21875, "learning_rate": 0.00019783042872542874, "loss": 3.5656, "step": 3585 }, { "epoch": 0.1678740710398502, "grad_norm": 1.5546875, "learning_rate": 0.00019782921922059684, "loss": 3.8602, "step": 3586 }, { "epoch": 0.16792088477968284, "grad_norm": 1.4609375, "learning_rate": 0.00019782800938241755, "loss": 3.8493, "step": 3587 }, { "epoch": 0.16796769851951548, "grad_norm": 1.4765625, "learning_rate": 0.00019782679921089496, "loss": 3.4196, "step": 3588 }, { "epoch": 0.16801451225934813, "grad_norm": 1.4375, "learning_rate": 0.0001978255887060332, "loss": 3.9082, "step": 3589 }, { "epoch": 0.16806132599918075, "grad_norm": 1.640625, "learning_rate": 0.0001978243778678364, "loss": 3.5845, "step": 3590 }, { "epoch": 0.1681081397390134, "grad_norm": 1.3046875, "learning_rate": 0.0001978231666963087, "loss": 3.6866, "step": 3591 }, { "epoch": 0.16815495347884604, "grad_norm": 1.3203125, "learning_rate": 0.00019782195519145424, "loss": 3.6922, "step": 3592 }, { "epoch": 0.1682017672186787, "grad_norm": 1.421875, "learning_rate": 0.00019782074335327707, "loss": 3.6372, "step": 3593 }, { "epoch": 0.16824858095851133, "grad_norm": 1.96875, "learning_rate": 0.0001978195311817814, "loss": 3.9543, "step": 3594 }, { "epoch": 0.16829539469834395, "grad_norm": 1.390625, "learning_rate": 0.0001978183186769713, "loss": 3.9665, "step": 3595 }, { "epoch": 0.1683422084381766, "grad_norm": 1.4765625, "learning_rate": 0.00019781710583885098, "loss": 3.8463, "step": 3596 }, { "epoch": 0.16838902217800925, "grad_norm": 1.2109375, "learning_rate": 0.00019781589266742452, "loss": 3.3049, "step": 3597 }, { "epoch": 0.1684358359178419, "grad_norm": 1.203125, "learning_rate": 0.000197814679162696, "loss": 3.4135, "step": 3598 }, { "epoch": 0.16848264965767454, "grad_norm": 1.3046875, "learning_rate": 0.0001978134653246697, "loss": 3.5479, "step": 3599 }, { "epoch": 0.16852946339750716, "grad_norm": 1.703125, "learning_rate": 0.0001978122511533496, "loss": 5.2234, "step": 3600 }, { "epoch": 0.1685762771373398, "grad_norm": 2.140625, "learning_rate": 0.00019781103664873997, "loss": 3.5595, "step": 3601 }, { "epoch": 0.16862309087717245, "grad_norm": 1.125, "learning_rate": 0.00019780982181084484, "loss": 3.2868, "step": 3602 }, { "epoch": 0.1686699046170051, "grad_norm": 1.5625, "learning_rate": 0.0001978086066396684, "loss": 4.0832, "step": 3603 }, { "epoch": 0.16871671835683774, "grad_norm": 1.265625, "learning_rate": 0.0001978073911352148, "loss": 3.469, "step": 3604 }, { "epoch": 0.16876353209667036, "grad_norm": 1.1796875, "learning_rate": 0.00019780617529748816, "loss": 3.7417, "step": 3605 }, { "epoch": 0.168810345836503, "grad_norm": 1.34375, "learning_rate": 0.00019780495912649263, "loss": 3.7349, "step": 3606 }, { "epoch": 0.16885715957633565, "grad_norm": 1.90625, "learning_rate": 0.00019780374262223237, "loss": 3.7496, "step": 3607 }, { "epoch": 0.1689039733161683, "grad_norm": 1.5703125, "learning_rate": 0.0001978025257847115, "loss": 3.1449, "step": 3608 }, { "epoch": 0.16895078705600095, "grad_norm": 1.265625, "learning_rate": 0.00019780130861393418, "loss": 3.9599, "step": 3609 }, { "epoch": 0.16899760079583356, "grad_norm": 1.4140625, "learning_rate": 0.00019780009110990454, "loss": 3.7031, "step": 3610 }, { "epoch": 0.1690444145356662, "grad_norm": 1.4375, "learning_rate": 0.00019779887327262677, "loss": 3.841, "step": 3611 }, { "epoch": 0.16909122827549886, "grad_norm": 1.7734375, "learning_rate": 0.00019779765510210499, "loss": 4.2292, "step": 3612 }, { "epoch": 0.1691380420153315, "grad_norm": 1.1875, "learning_rate": 0.00019779643659834334, "loss": 5.8119, "step": 3613 }, { "epoch": 0.16918485575516415, "grad_norm": 1.2265625, "learning_rate": 0.00019779521776134597, "loss": 3.6195, "step": 3614 }, { "epoch": 0.1692316694949968, "grad_norm": 1.3125, "learning_rate": 0.00019779399859111706, "loss": 3.2032, "step": 3615 }, { "epoch": 0.1692784832348294, "grad_norm": 1.34375, "learning_rate": 0.00019779277908766075, "loss": 3.9019, "step": 3616 }, { "epoch": 0.16932529697466206, "grad_norm": 1.6171875, "learning_rate": 0.0001977915592509812, "loss": 3.6065, "step": 3617 }, { "epoch": 0.1693721107144947, "grad_norm": 1.15625, "learning_rate": 0.00019779033908108256, "loss": 3.5653, "step": 3618 }, { "epoch": 0.16941892445432735, "grad_norm": 1.5, "learning_rate": 0.00019778911857796903, "loss": 3.5453, "step": 3619 }, { "epoch": 0.16946573819416, "grad_norm": 1.328125, "learning_rate": 0.0001977878977416447, "loss": 3.7311, "step": 3620 }, { "epoch": 0.16951255193399262, "grad_norm": 1.2265625, "learning_rate": 0.00019778667657211374, "loss": 3.3682, "step": 3621 }, { "epoch": 0.16955936567382526, "grad_norm": 1.7890625, "learning_rate": 0.0001977854550693804, "loss": 3.5879, "step": 3622 }, { "epoch": 0.1696061794136579, "grad_norm": 1.171875, "learning_rate": 0.0001977842332334487, "loss": 3.6384, "step": 3623 }, { "epoch": 0.16965299315349056, "grad_norm": 1.515625, "learning_rate": 0.0001977830110643229, "loss": 3.5274, "step": 3624 }, { "epoch": 0.1696998068933232, "grad_norm": 1.4140625, "learning_rate": 0.00019778178856200715, "loss": 3.7769, "step": 3625 }, { "epoch": 0.16974662063315582, "grad_norm": 1.140625, "learning_rate": 0.00019778056572650562, "loss": 3.7344, "step": 3626 }, { "epoch": 0.16979343437298847, "grad_norm": 1.3515625, "learning_rate": 0.00019777934255782245, "loss": 3.8437, "step": 3627 }, { "epoch": 0.1698402481128211, "grad_norm": 1.765625, "learning_rate": 0.00019777811905596183, "loss": 3.1488, "step": 3628 }, { "epoch": 0.16988706185265376, "grad_norm": 2.78125, "learning_rate": 0.0001977768952209279, "loss": 3.9785, "step": 3629 }, { "epoch": 0.1699338755924864, "grad_norm": 1.265625, "learning_rate": 0.00019777567105272486, "loss": 3.545, "step": 3630 }, { "epoch": 0.16998068933231902, "grad_norm": 1.6484375, "learning_rate": 0.00019777444655135685, "loss": 3.7889, "step": 3631 }, { "epoch": 0.17002750307215167, "grad_norm": 1.3828125, "learning_rate": 0.00019777322171682813, "loss": 3.6385, "step": 3632 }, { "epoch": 0.17007431681198432, "grad_norm": 1.78125, "learning_rate": 0.00019777199654914273, "loss": 3.4602, "step": 3633 }, { "epoch": 0.17012113055181696, "grad_norm": 1.203125, "learning_rate": 0.00019777077104830498, "loss": 3.6327, "step": 3634 }, { "epoch": 0.1701679442916496, "grad_norm": 1.765625, "learning_rate": 0.00019776954521431893, "loss": 3.8878, "step": 3635 }, { "epoch": 0.17021475803148223, "grad_norm": 1.3515625, "learning_rate": 0.0001977683190471888, "loss": 4.254, "step": 3636 }, { "epoch": 0.17026157177131487, "grad_norm": 1.25, "learning_rate": 0.0001977670925469188, "loss": 3.8431, "step": 3637 }, { "epoch": 0.17030838551114752, "grad_norm": 1.5078125, "learning_rate": 0.00019776586571351308, "loss": 3.8029, "step": 3638 }, { "epoch": 0.17035519925098017, "grad_norm": 1.4921875, "learning_rate": 0.00019776463854697583, "loss": 3.2333, "step": 3639 }, { "epoch": 0.1704020129908128, "grad_norm": 0.875, "learning_rate": 0.00019776341104731123, "loss": 4.3481, "step": 3640 }, { "epoch": 0.17044882673064543, "grad_norm": 1.1875, "learning_rate": 0.00019776218321452342, "loss": 3.504, "step": 3641 }, { "epoch": 0.17049564047047808, "grad_norm": 2.109375, "learning_rate": 0.00019776095504861664, "loss": 3.5723, "step": 3642 }, { "epoch": 0.17054245421031072, "grad_norm": 1.3125, "learning_rate": 0.00019775972654959508, "loss": 4.0215, "step": 3643 }, { "epoch": 0.17058926795014337, "grad_norm": 1.5078125, "learning_rate": 0.00019775849771746287, "loss": 3.5524, "step": 3644 }, { "epoch": 0.17063608168997602, "grad_norm": 1.2265625, "learning_rate": 0.00019775726855222425, "loss": 3.6137, "step": 3645 }, { "epoch": 0.17068289542980866, "grad_norm": 1.359375, "learning_rate": 0.00019775603905388339, "loss": 3.5792, "step": 3646 }, { "epoch": 0.17072970916964128, "grad_norm": 2.0, "learning_rate": 0.00019775480922244447, "loss": 3.547, "step": 3647 }, { "epoch": 0.17077652290947393, "grad_norm": 1.2734375, "learning_rate": 0.00019775357905791168, "loss": 3.5589, "step": 3648 }, { "epoch": 0.17082333664930657, "grad_norm": 1.2578125, "learning_rate": 0.0001977523485602892, "loss": 3.5723, "step": 3649 }, { "epoch": 0.17087015038913922, "grad_norm": 1.2421875, "learning_rate": 0.0001977511177295813, "loss": 3.6822, "step": 3650 }, { "epoch": 0.17091696412897187, "grad_norm": 1.3828125, "learning_rate": 0.00019774988656579207, "loss": 3.6467, "step": 3651 }, { "epoch": 0.17096377786880448, "grad_norm": 1.7734375, "learning_rate": 0.00019774865506892576, "loss": 3.9288, "step": 3652 }, { "epoch": 0.17101059160863713, "grad_norm": 1.3125, "learning_rate": 0.00019774742323898657, "loss": 3.8848, "step": 3653 }, { "epoch": 0.17105740534846978, "grad_norm": 1.4375, "learning_rate": 0.0001977461910759787, "loss": 3.5929, "step": 3654 }, { "epoch": 0.17110421908830242, "grad_norm": 1.59375, "learning_rate": 0.0001977449585799063, "loss": 4.0022, "step": 3655 }, { "epoch": 0.17115103282813507, "grad_norm": 1.75, "learning_rate": 0.0001977437257507736, "loss": 3.7083, "step": 3656 }, { "epoch": 0.1711978465679677, "grad_norm": 1.1796875, "learning_rate": 0.00019774249258858485, "loss": 3.8551, "step": 3657 }, { "epoch": 0.17124466030780033, "grad_norm": 1.15625, "learning_rate": 0.00019774125909334417, "loss": 3.7861, "step": 3658 }, { "epoch": 0.17129147404763298, "grad_norm": 1.78125, "learning_rate": 0.00019774002526505582, "loss": 4.0199, "step": 3659 }, { "epoch": 0.17133828778746563, "grad_norm": 1.640625, "learning_rate": 0.00019773879110372396, "loss": 4.0948, "step": 3660 }, { "epoch": 0.17138510152729827, "grad_norm": 1.2578125, "learning_rate": 0.00019773755660935284, "loss": 3.907, "step": 3661 }, { "epoch": 0.1714319152671309, "grad_norm": 1.640625, "learning_rate": 0.00019773632178194663, "loss": 3.5795, "step": 3662 }, { "epoch": 0.17147872900696354, "grad_norm": 0.9765625, "learning_rate": 0.00019773508662150957, "loss": 3.2993, "step": 3663 }, { "epoch": 0.17152554274679618, "grad_norm": 1.3984375, "learning_rate": 0.00019773385112804582, "loss": 4.0627, "step": 3664 }, { "epoch": 0.17157235648662883, "grad_norm": 1.4453125, "learning_rate": 0.00019773261530155965, "loss": 3.779, "step": 3665 }, { "epoch": 0.17161917022646148, "grad_norm": 1.421875, "learning_rate": 0.00019773137914205524, "loss": 3.4706, "step": 3666 }, { "epoch": 0.1716659839662941, "grad_norm": 1.3125, "learning_rate": 0.0001977301426495368, "loss": 3.7643, "step": 3667 }, { "epoch": 0.17171279770612674, "grad_norm": 1.4921875, "learning_rate": 0.00019772890582400856, "loss": 3.7484, "step": 3668 }, { "epoch": 0.1717596114459594, "grad_norm": 1.46875, "learning_rate": 0.0001977276686654747, "loss": 3.6685, "step": 3669 }, { "epoch": 0.17180642518579203, "grad_norm": 1.2890625, "learning_rate": 0.00019772643117393947, "loss": 3.3764, "step": 3670 }, { "epoch": 0.17185323892562468, "grad_norm": 1.1015625, "learning_rate": 0.00019772519334940706, "loss": 3.632, "step": 3671 }, { "epoch": 0.1719000526654573, "grad_norm": 1.5625, "learning_rate": 0.00019772395519188174, "loss": 3.7994, "step": 3672 }, { "epoch": 0.17194686640528994, "grad_norm": 1.3125, "learning_rate": 0.00019772271670136763, "loss": 3.6146, "step": 3673 }, { "epoch": 0.1719936801451226, "grad_norm": 1.453125, "learning_rate": 0.00019772147787786904, "loss": 3.5592, "step": 3674 }, { "epoch": 0.17204049388495524, "grad_norm": 1.671875, "learning_rate": 0.0001977202387213902, "loss": 4.0899, "step": 3675 }, { "epoch": 0.17208730762478788, "grad_norm": 2.265625, "learning_rate": 0.00019771899923193525, "loss": 3.8462, "step": 3676 }, { "epoch": 0.17213412136462053, "grad_norm": 1.0625, "learning_rate": 0.0001977177594095085, "loss": 3.5701, "step": 3677 }, { "epoch": 0.17218093510445315, "grad_norm": 1.953125, "learning_rate": 0.0001977165192541141, "loss": 3.6928, "step": 3678 }, { "epoch": 0.1722277488442858, "grad_norm": 1.265625, "learning_rate": 0.0001977152787657563, "loss": 3.4299, "step": 3679 }, { "epoch": 0.17227456258411844, "grad_norm": 1.1328125, "learning_rate": 0.00019771403794443938, "loss": 3.4809, "step": 3680 }, { "epoch": 0.1723213763239511, "grad_norm": 1.1328125, "learning_rate": 0.00019771279679016746, "loss": 3.9244, "step": 3681 }, { "epoch": 0.17236819006378373, "grad_norm": 1.15625, "learning_rate": 0.0001977115553029449, "loss": 4.1007, "step": 3682 }, { "epoch": 0.17241500380361635, "grad_norm": 1.453125, "learning_rate": 0.00019771031348277582, "loss": 3.7919, "step": 3683 }, { "epoch": 0.172461817543449, "grad_norm": 1.3203125, "learning_rate": 0.0001977090713296645, "loss": 3.4143, "step": 3684 }, { "epoch": 0.17250863128328164, "grad_norm": 2.03125, "learning_rate": 0.00019770782884361517, "loss": 3.849, "step": 3685 }, { "epoch": 0.1725554450231143, "grad_norm": 2.265625, "learning_rate": 0.00019770658602463208, "loss": 2.4309, "step": 3686 }, { "epoch": 0.17260225876294694, "grad_norm": 1.0234375, "learning_rate": 0.00019770534287271945, "loss": 3.6414, "step": 3687 }, { "epoch": 0.17264907250277955, "grad_norm": 1.5546875, "learning_rate": 0.00019770409938788149, "loss": 3.9005, "step": 3688 }, { "epoch": 0.1726958862426122, "grad_norm": 1.609375, "learning_rate": 0.00019770285557012244, "loss": 3.5587, "step": 3689 }, { "epoch": 0.17274269998244485, "grad_norm": 1.1171875, "learning_rate": 0.0001977016114194466, "loss": 3.6575, "step": 3690 }, { "epoch": 0.1727895137222775, "grad_norm": 1.7109375, "learning_rate": 0.00019770036693585814, "loss": 3.6528, "step": 3691 }, { "epoch": 0.17283632746211014, "grad_norm": 1.171875, "learning_rate": 0.00019769912211936133, "loss": 3.7275, "step": 3692 }, { "epoch": 0.17288314120194276, "grad_norm": 1.390625, "learning_rate": 0.0001976978769699604, "loss": 3.995, "step": 3693 }, { "epoch": 0.1729299549417754, "grad_norm": 1.1796875, "learning_rate": 0.00019769663148765963, "loss": 3.9747, "step": 3694 }, { "epoch": 0.17297676868160805, "grad_norm": 1.515625, "learning_rate": 0.00019769538567246321, "loss": 3.3483, "step": 3695 }, { "epoch": 0.1730235824214407, "grad_norm": 1.5859375, "learning_rate": 0.00019769413952437542, "loss": 3.6155, "step": 3696 }, { "epoch": 0.17307039616127334, "grad_norm": 1.7734375, "learning_rate": 0.0001976928930434005, "loss": 3.8451, "step": 3697 }, { "epoch": 0.17311720990110596, "grad_norm": 1.53125, "learning_rate": 0.00019769164622954267, "loss": 4.0124, "step": 3698 }, { "epoch": 0.1731640236409386, "grad_norm": 1.65625, "learning_rate": 0.00019769039908280626, "loss": 3.381, "step": 3699 }, { "epoch": 0.17321083738077125, "grad_norm": 1.296875, "learning_rate": 0.00019768915160319538, "loss": 3.6759, "step": 3700 }, { "epoch": 0.1732576511206039, "grad_norm": 2.40625, "learning_rate": 0.00019768790379071443, "loss": 3.5022, "step": 3701 }, { "epoch": 0.17330446486043655, "grad_norm": 1.3359375, "learning_rate": 0.00019768665564536756, "loss": 3.7328, "step": 3702 }, { "epoch": 0.17335127860026917, "grad_norm": 1.4140625, "learning_rate": 0.00019768540716715908, "loss": 3.8789, "step": 3703 }, { "epoch": 0.1733980923401018, "grad_norm": 1.421875, "learning_rate": 0.00019768415835609319, "loss": 3.5588, "step": 3704 }, { "epoch": 0.17344490607993446, "grad_norm": 1.2890625, "learning_rate": 0.0001976829092121742, "loss": 3.9636, "step": 3705 }, { "epoch": 0.1734917198197671, "grad_norm": 1.1875, "learning_rate": 0.00019768165973540637, "loss": 3.1946, "step": 3706 }, { "epoch": 0.17353853355959975, "grad_norm": 0.96875, "learning_rate": 0.00019768040992579388, "loss": 5.1988, "step": 3707 }, { "epoch": 0.1735853472994324, "grad_norm": 1.1328125, "learning_rate": 0.00019767915978334107, "loss": 3.5965, "step": 3708 }, { "epoch": 0.17363216103926501, "grad_norm": 1.3125, "learning_rate": 0.00019767790930805214, "loss": 3.4876, "step": 3709 }, { "epoch": 0.17367897477909766, "grad_norm": 1.359375, "learning_rate": 0.0001976766584999314, "loss": 2.9419, "step": 3710 }, { "epoch": 0.1737257885189303, "grad_norm": 1.4375, "learning_rate": 0.0001976754073589831, "loss": 3.865, "step": 3711 }, { "epoch": 0.17377260225876295, "grad_norm": 1.75, "learning_rate": 0.00019767415588521148, "loss": 3.333, "step": 3712 }, { "epoch": 0.1738194159985956, "grad_norm": 1.4453125, "learning_rate": 0.00019767290407862083, "loss": 3.5887, "step": 3713 }, { "epoch": 0.17386622973842822, "grad_norm": 1.7421875, "learning_rate": 0.0001976716519392154, "loss": 3.8112, "step": 3714 }, { "epoch": 0.17391304347826086, "grad_norm": 1.75, "learning_rate": 0.00019767039946699946, "loss": 3.992, "step": 3715 }, { "epoch": 0.1739598572180935, "grad_norm": 1.4609375, "learning_rate": 0.0001976691466619773, "loss": 4.1193, "step": 3716 }, { "epoch": 0.17400667095792616, "grad_norm": 1.3046875, "learning_rate": 0.00019766789352415314, "loss": 3.6633, "step": 3717 }, { "epoch": 0.1740534846977588, "grad_norm": 1.078125, "learning_rate": 0.0001976666400535313, "loss": 3.6283, "step": 3718 }, { "epoch": 0.17410029843759142, "grad_norm": 1.765625, "learning_rate": 0.000197665386250116, "loss": 3.8948, "step": 3719 }, { "epoch": 0.17414711217742407, "grad_norm": 1.1015625, "learning_rate": 0.00019766413211391156, "loss": 3.4976, "step": 3720 }, { "epoch": 0.17419392591725671, "grad_norm": 1.234375, "learning_rate": 0.0001976628776449222, "loss": 3.6412, "step": 3721 }, { "epoch": 0.17424073965708936, "grad_norm": 1.1796875, "learning_rate": 0.00019766162284315232, "loss": 3.7991, "step": 3722 }, { "epoch": 0.174287553396922, "grad_norm": 1.421875, "learning_rate": 0.00019766036770860602, "loss": 3.6934, "step": 3723 }, { "epoch": 0.17433436713675463, "grad_norm": 1.21875, "learning_rate": 0.0001976591122412877, "loss": 3.7019, "step": 3724 }, { "epoch": 0.17438118087658727, "grad_norm": 1.3515625, "learning_rate": 0.00019765785644120158, "loss": 3.8443, "step": 3725 }, { "epoch": 0.17442799461641992, "grad_norm": 1.5859375, "learning_rate": 0.00019765660030835198, "loss": 3.7823, "step": 3726 }, { "epoch": 0.17447480835625256, "grad_norm": 1.3515625, "learning_rate": 0.00019765534384274315, "loss": 3.7573, "step": 3727 }, { "epoch": 0.1745216220960852, "grad_norm": 1.109375, "learning_rate": 0.0001976540870443794, "loss": 3.5472, "step": 3728 }, { "epoch": 0.17456843583591783, "grad_norm": 1.2578125, "learning_rate": 0.00019765282991326494, "loss": 3.6373, "step": 3729 }, { "epoch": 0.17461524957575048, "grad_norm": 1.453125, "learning_rate": 0.00019765157244940415, "loss": 3.8524, "step": 3730 }, { "epoch": 0.17466206331558312, "grad_norm": 1.203125, "learning_rate": 0.00019765031465280127, "loss": 3.8735, "step": 3731 }, { "epoch": 0.17470887705541577, "grad_norm": 1.0703125, "learning_rate": 0.00019764905652346057, "loss": 3.5775, "step": 3732 }, { "epoch": 0.17475569079524841, "grad_norm": 1.125, "learning_rate": 0.00019764779806138636, "loss": 3.3008, "step": 3733 }, { "epoch": 0.17480250453508103, "grad_norm": 1.3203125, "learning_rate": 0.00019764653926658294, "loss": 3.7746, "step": 3734 }, { "epoch": 0.17484931827491368, "grad_norm": 1.140625, "learning_rate": 0.00019764528013905457, "loss": 3.7129, "step": 3735 }, { "epoch": 0.17489613201474632, "grad_norm": 1.625, "learning_rate": 0.00019764402067880553, "loss": 3.3686, "step": 3736 }, { "epoch": 0.17494294575457897, "grad_norm": 1.109375, "learning_rate": 0.00019764276088584016, "loss": 4.0786, "step": 3737 }, { "epoch": 0.17498975949441162, "grad_norm": 1.578125, "learning_rate": 0.00019764150076016272, "loss": 3.1689, "step": 3738 }, { "epoch": 0.17503657323424426, "grad_norm": 1.734375, "learning_rate": 0.0001976402403017775, "loss": 3.9275, "step": 3739 }, { "epoch": 0.17508338697407688, "grad_norm": 1.4453125, "learning_rate": 0.00019763897951068885, "loss": 3.4516, "step": 3740 }, { "epoch": 0.17513020071390953, "grad_norm": 1.1328125, "learning_rate": 0.00019763771838690098, "loss": 3.6366, "step": 3741 }, { "epoch": 0.17517701445374217, "grad_norm": 1.2890625, "learning_rate": 0.00019763645693041823, "loss": 3.7809, "step": 3742 }, { "epoch": 0.17522382819357482, "grad_norm": 2.078125, "learning_rate": 0.0001976351951412449, "loss": 4.1445, "step": 3743 }, { "epoch": 0.17527064193340747, "grad_norm": 1.84375, "learning_rate": 0.00019763393301938529, "loss": 3.6623, "step": 3744 }, { "epoch": 0.17531745567324009, "grad_norm": 1.2265625, "learning_rate": 0.0001976326705648437, "loss": 3.7332, "step": 3745 }, { "epoch": 0.17536426941307273, "grad_norm": 1.4765625, "learning_rate": 0.00019763140777762442, "loss": 3.3937, "step": 3746 }, { "epoch": 0.17541108315290538, "grad_norm": 1.2734375, "learning_rate": 0.00019763014465773175, "loss": 3.7753, "step": 3747 }, { "epoch": 0.17545789689273802, "grad_norm": 1.140625, "learning_rate": 0.00019762888120517002, "loss": 4.8123, "step": 3748 }, { "epoch": 0.17550471063257067, "grad_norm": 1.1953125, "learning_rate": 0.00019762761741994354, "loss": 3.4007, "step": 3749 }, { "epoch": 0.1755515243724033, "grad_norm": 1.2890625, "learning_rate": 0.00019762635330205654, "loss": 3.4851, "step": 3750 }, { "epoch": 0.17559833811223594, "grad_norm": 1.640625, "learning_rate": 0.00019762508885151343, "loss": 3.5599, "step": 3751 }, { "epoch": 0.17564515185206858, "grad_norm": 1.2890625, "learning_rate": 0.00019762382406831848, "loss": 4.0215, "step": 3752 }, { "epoch": 0.17569196559190123, "grad_norm": 5.375, "learning_rate": 0.00019762255895247596, "loss": 4.1946, "step": 3753 }, { "epoch": 0.17573877933173387, "grad_norm": 1.5078125, "learning_rate": 0.00019762129350399025, "loss": 3.6027, "step": 3754 }, { "epoch": 0.1757855930715665, "grad_norm": 1.4375, "learning_rate": 0.00019762002772286558, "loss": 3.8477, "step": 3755 }, { "epoch": 0.17583240681139914, "grad_norm": 1.5390625, "learning_rate": 0.00019761876160910634, "loss": 4.1934, "step": 3756 }, { "epoch": 0.17587922055123179, "grad_norm": 1.4140625, "learning_rate": 0.00019761749516271682, "loss": 3.5352, "step": 3757 }, { "epoch": 0.17592603429106443, "grad_norm": 1.3046875, "learning_rate": 0.00019761622838370131, "loss": 3.0459, "step": 3758 }, { "epoch": 0.17597284803089708, "grad_norm": 1.1484375, "learning_rate": 0.00019761496127206413, "loss": 3.4028, "step": 3759 }, { "epoch": 0.1760196617707297, "grad_norm": 1.5859375, "learning_rate": 0.00019761369382780963, "loss": 3.6004, "step": 3760 }, { "epoch": 0.17606647551056234, "grad_norm": 1.390625, "learning_rate": 0.00019761242605094212, "loss": 3.8584, "step": 3761 }, { "epoch": 0.176113289250395, "grad_norm": 2.453125, "learning_rate": 0.00019761115794146588, "loss": 3.1245, "step": 3762 }, { "epoch": 0.17616010299022763, "grad_norm": 1.59375, "learning_rate": 0.0001976098894993853, "loss": 3.4664, "step": 3763 }, { "epoch": 0.17620691673006028, "grad_norm": 1.3984375, "learning_rate": 0.00019760862072470462, "loss": 3.6565, "step": 3764 }, { "epoch": 0.1762537304698929, "grad_norm": 1.2734375, "learning_rate": 0.00019760735161742824, "loss": 3.6904, "step": 3765 }, { "epoch": 0.17630054420972555, "grad_norm": 1.3359375, "learning_rate": 0.00019760608217756046, "loss": 3.4473, "step": 3766 }, { "epoch": 0.1763473579495582, "grad_norm": 1.2734375, "learning_rate": 0.00019760481240510558, "loss": 3.6734, "step": 3767 }, { "epoch": 0.17639417168939084, "grad_norm": 1.5390625, "learning_rate": 0.00019760354230006794, "loss": 3.9109, "step": 3768 }, { "epoch": 0.17644098542922348, "grad_norm": 1.1875, "learning_rate": 0.00019760227186245186, "loss": 3.739, "step": 3769 }, { "epoch": 0.17648779916905613, "grad_norm": 1.34375, "learning_rate": 0.00019760100109226172, "loss": 3.5348, "step": 3770 }, { "epoch": 0.17653461290888875, "grad_norm": 1.734375, "learning_rate": 0.00019759972998950178, "loss": 4.1743, "step": 3771 }, { "epoch": 0.1765814266487214, "grad_norm": 1.21875, "learning_rate": 0.0001975984585541764, "loss": 3.9192, "step": 3772 }, { "epoch": 0.17662824038855404, "grad_norm": 1.8671875, "learning_rate": 0.00019759718678628993, "loss": 4.1929, "step": 3773 }, { "epoch": 0.1766750541283867, "grad_norm": 1.296875, "learning_rate": 0.00019759591468584669, "loss": 3.6528, "step": 3774 }, { "epoch": 0.17672186786821933, "grad_norm": 1.1875, "learning_rate": 0.000197594642252851, "loss": 3.9645, "step": 3775 }, { "epoch": 0.17676868160805195, "grad_norm": 1.234375, "learning_rate": 0.0001975933694873072, "loss": 3.661, "step": 3776 }, { "epoch": 0.1768154953478846, "grad_norm": 0.98828125, "learning_rate": 0.00019759209638921962, "loss": 3.8217, "step": 3777 }, { "epoch": 0.17686230908771725, "grad_norm": 1.2578125, "learning_rate": 0.00019759082295859264, "loss": 3.7256, "step": 3778 }, { "epoch": 0.1769091228275499, "grad_norm": 1.4765625, "learning_rate": 0.00019758954919543058, "loss": 3.7112, "step": 3779 }, { "epoch": 0.17695593656738254, "grad_norm": 1.421875, "learning_rate": 0.00019758827509973776, "loss": 3.682, "step": 3780 }, { "epoch": 0.17700275030721516, "grad_norm": 1.2890625, "learning_rate": 0.00019758700067151855, "loss": 3.62, "step": 3781 }, { "epoch": 0.1770495640470478, "grad_norm": 1.375, "learning_rate": 0.00019758572591077725, "loss": 3.3706, "step": 3782 }, { "epoch": 0.17709637778688045, "grad_norm": 1.6015625, "learning_rate": 0.00019758445081751824, "loss": 3.6395, "step": 3783 }, { "epoch": 0.1771431915267131, "grad_norm": 1.234375, "learning_rate": 0.00019758317539174584, "loss": 3.4287, "step": 3784 }, { "epoch": 0.17719000526654574, "grad_norm": 1.1640625, "learning_rate": 0.00019758189963346444, "loss": 3.3277, "step": 3785 }, { "epoch": 0.17723681900637836, "grad_norm": 1.1484375, "learning_rate": 0.00019758062354267834, "loss": 3.439, "step": 3786 }, { "epoch": 0.177283632746211, "grad_norm": 2.109375, "learning_rate": 0.0001975793471193919, "loss": 2.9323, "step": 3787 }, { "epoch": 0.17733044648604365, "grad_norm": 1.1796875, "learning_rate": 0.00019757807036360948, "loss": 3.518, "step": 3788 }, { "epoch": 0.1773772602258763, "grad_norm": 1.3203125, "learning_rate": 0.00019757679327533543, "loss": 3.8776, "step": 3789 }, { "epoch": 0.17742407396570894, "grad_norm": 1.390625, "learning_rate": 0.0001975755158545741, "loss": 3.6108, "step": 3790 }, { "epoch": 0.17747088770554156, "grad_norm": 1.2578125, "learning_rate": 0.00019757423810132983, "loss": 3.4988, "step": 3791 }, { "epoch": 0.1775177014453742, "grad_norm": 1.1328125, "learning_rate": 0.00019757296001560697, "loss": 3.3584, "step": 3792 }, { "epoch": 0.17756451518520686, "grad_norm": 1.3203125, "learning_rate": 0.00019757168159740992, "loss": 3.4071, "step": 3793 }, { "epoch": 0.1776113289250395, "grad_norm": 1.3125, "learning_rate": 0.00019757040284674296, "loss": 3.6092, "step": 3794 }, { "epoch": 0.17765814266487215, "grad_norm": 1.453125, "learning_rate": 0.00019756912376361053, "loss": 3.6409, "step": 3795 }, { "epoch": 0.1777049564047048, "grad_norm": 1.171875, "learning_rate": 0.00019756784434801696, "loss": 3.2666, "step": 3796 }, { "epoch": 0.1777517701445374, "grad_norm": 1.546875, "learning_rate": 0.0001975665645999666, "loss": 3.7075, "step": 3797 }, { "epoch": 0.17779858388437006, "grad_norm": 1.390625, "learning_rate": 0.00019756528451946379, "loss": 4.0183, "step": 3798 }, { "epoch": 0.1778453976242027, "grad_norm": 2.28125, "learning_rate": 0.0001975640041065129, "loss": 3.429, "step": 3799 }, { "epoch": 0.17789221136403535, "grad_norm": 1.609375, "learning_rate": 0.00019756272336111835, "loss": 3.7413, "step": 3800 }, { "epoch": 0.177939025103868, "grad_norm": 1.0546875, "learning_rate": 0.00019756144228328442, "loss": 3.8905, "step": 3801 }, { "epoch": 0.17798583884370062, "grad_norm": 1.5234375, "learning_rate": 0.00019756016087301554, "loss": 3.699, "step": 3802 }, { "epoch": 0.17803265258353326, "grad_norm": 1.140625, "learning_rate": 0.000197558879130316, "loss": 3.7339, "step": 3803 }, { "epoch": 0.1780794663233659, "grad_norm": 1.5703125, "learning_rate": 0.0001975575970551903, "loss": 3.6103, "step": 3804 }, { "epoch": 0.17812628006319856, "grad_norm": 2.125, "learning_rate": 0.00019755631464764267, "loss": 3.7265, "step": 3805 }, { "epoch": 0.1781730938030312, "grad_norm": 1.2265625, "learning_rate": 0.00019755503190767754, "loss": 3.4633, "step": 3806 }, { "epoch": 0.17821990754286382, "grad_norm": 1.1328125, "learning_rate": 0.0001975537488352993, "loss": 3.375, "step": 3807 }, { "epoch": 0.17826672128269647, "grad_norm": 1.4921875, "learning_rate": 0.00019755246543051228, "loss": 3.4858, "step": 3808 }, { "epoch": 0.1783135350225291, "grad_norm": 1.796875, "learning_rate": 0.00019755118169332086, "loss": 3.4064, "step": 3809 }, { "epoch": 0.17836034876236176, "grad_norm": 1.15625, "learning_rate": 0.00019754989762372946, "loss": 3.4401, "step": 3810 }, { "epoch": 0.1784071625021944, "grad_norm": 1.2734375, "learning_rate": 0.00019754861322174243, "loss": 3.0408, "step": 3811 }, { "epoch": 0.17845397624202702, "grad_norm": 1.5234375, "learning_rate": 0.00019754732848736414, "loss": 3.6452, "step": 3812 }, { "epoch": 0.17850078998185967, "grad_norm": 1.15625, "learning_rate": 0.00019754604342059893, "loss": 3.369, "step": 3813 }, { "epoch": 0.17854760372169232, "grad_norm": 1.5703125, "learning_rate": 0.00019754475802145124, "loss": 3.7004, "step": 3814 }, { "epoch": 0.17859441746152496, "grad_norm": 1.1796875, "learning_rate": 0.0001975434722899254, "loss": 4.0606, "step": 3815 }, { "epoch": 0.1786412312013576, "grad_norm": 1.3515625, "learning_rate": 0.00019754218622602582, "loss": 3.7786, "step": 3816 }, { "epoch": 0.17868804494119023, "grad_norm": 1.1328125, "learning_rate": 0.00019754089982975689, "loss": 3.5933, "step": 3817 }, { "epoch": 0.17873485868102287, "grad_norm": 1.3515625, "learning_rate": 0.00019753961310112298, "loss": 3.8552, "step": 3818 }, { "epoch": 0.17878167242085552, "grad_norm": 1.3828125, "learning_rate": 0.0001975383260401285, "loss": 3.6702, "step": 3819 }, { "epoch": 0.17882848616068817, "grad_norm": 1.2109375, "learning_rate": 0.0001975370386467778, "loss": 3.7215, "step": 3820 }, { "epoch": 0.1788752999005208, "grad_norm": 1.2734375, "learning_rate": 0.00019753575092107525, "loss": 3.5338, "step": 3821 }, { "epoch": 0.17892211364035343, "grad_norm": 1.1484375, "learning_rate": 0.0001975344628630253, "loss": 3.5417, "step": 3822 }, { "epoch": 0.17896892738018608, "grad_norm": 1.453125, "learning_rate": 0.0001975331744726323, "loss": 3.9978, "step": 3823 }, { "epoch": 0.17901574112001872, "grad_norm": 1.3125, "learning_rate": 0.00019753188574990065, "loss": 3.9221, "step": 3824 }, { "epoch": 0.17906255485985137, "grad_norm": 1.0390625, "learning_rate": 0.00019753059669483472, "loss": 3.8566, "step": 3825 }, { "epoch": 0.17910936859968402, "grad_norm": 2.453125, "learning_rate": 0.00019752930730743893, "loss": 4.0043, "step": 3826 }, { "epoch": 0.17915618233951666, "grad_norm": 1.3203125, "learning_rate": 0.00019752801758771767, "loss": 3.8327, "step": 3827 }, { "epoch": 0.17920299607934928, "grad_norm": 1.53125, "learning_rate": 0.00019752672753567532, "loss": 4.2057, "step": 3828 }, { "epoch": 0.17924980981918193, "grad_norm": 1.7421875, "learning_rate": 0.00019752543715131625, "loss": 3.657, "step": 3829 }, { "epoch": 0.17929662355901457, "grad_norm": 1.3046875, "learning_rate": 0.00019752414643464493, "loss": 3.9039, "step": 3830 }, { "epoch": 0.17934343729884722, "grad_norm": 1.4375, "learning_rate": 0.0001975228553856657, "loss": 3.8468, "step": 3831 }, { "epoch": 0.17939025103867987, "grad_norm": 1.3984375, "learning_rate": 0.000197521564004383, "loss": 4.0026, "step": 3832 }, { "epoch": 0.17943706477851248, "grad_norm": 1.2109375, "learning_rate": 0.0001975202722908012, "loss": 3.5189, "step": 3833 }, { "epoch": 0.17948387851834513, "grad_norm": 1.1171875, "learning_rate": 0.0001975189802449247, "loss": 3.6306, "step": 3834 }, { "epoch": 0.17953069225817778, "grad_norm": 1.3671875, "learning_rate": 0.0001975176878667579, "loss": 3.4654, "step": 3835 }, { "epoch": 0.17957750599801042, "grad_norm": 1.3828125, "learning_rate": 0.00019751639515630523, "loss": 3.9189, "step": 3836 }, { "epoch": 0.17962431973784307, "grad_norm": 1.421875, "learning_rate": 0.0001975151021135711, "loss": 3.5048, "step": 3837 }, { "epoch": 0.1796711334776757, "grad_norm": 1.40625, "learning_rate": 0.00019751380873855986, "loss": 3.6422, "step": 3838 }, { "epoch": 0.17971794721750833, "grad_norm": 1.109375, "learning_rate": 0.00019751251503127596, "loss": 4.1748, "step": 3839 }, { "epoch": 0.17976476095734098, "grad_norm": 1.5078125, "learning_rate": 0.00019751122099172382, "loss": 3.7281, "step": 3840 }, { "epoch": 0.17981157469717363, "grad_norm": 1.2109375, "learning_rate": 0.00019750992661990782, "loss": 3.8634, "step": 3841 }, { "epoch": 0.17985838843700627, "grad_norm": 1.28125, "learning_rate": 0.00019750863191583237, "loss": 3.9726, "step": 3842 }, { "epoch": 0.1799052021768389, "grad_norm": 1.28125, "learning_rate": 0.00019750733687950192, "loss": 3.7111, "step": 3843 }, { "epoch": 0.17995201591667154, "grad_norm": 1.421875, "learning_rate": 0.00019750604151092086, "loss": 3.8872, "step": 3844 }, { "epoch": 0.17999882965650418, "grad_norm": 1.265625, "learning_rate": 0.00019750474581009355, "loss": 3.8657, "step": 3845 }, { "epoch": 0.18004564339633683, "grad_norm": 1.3046875, "learning_rate": 0.00019750344977702449, "loss": 3.7184, "step": 3846 }, { "epoch": 0.18009245713616948, "grad_norm": 1.921875, "learning_rate": 0.00019750215341171807, "loss": 3.6768, "step": 3847 }, { "epoch": 0.1801392708760021, "grad_norm": 1.5546875, "learning_rate": 0.0001975008567141787, "loss": 3.8905, "step": 3848 }, { "epoch": 0.18018608461583474, "grad_norm": 1.1796875, "learning_rate": 0.00019749955968441073, "loss": 3.705, "step": 3849 }, { "epoch": 0.1802328983556674, "grad_norm": 3.53125, "learning_rate": 0.00019749826232241872, "loss": 3.3042, "step": 3850 }, { "epoch": 0.18027971209550003, "grad_norm": 1.421875, "learning_rate": 0.000197496964628207, "loss": 3.4325, "step": 3851 }, { "epoch": 0.18032652583533268, "grad_norm": 1.3203125, "learning_rate": 0.00019749566660178, "loss": 3.7839, "step": 3852 }, { "epoch": 0.1803733395751653, "grad_norm": 1.2265625, "learning_rate": 0.00019749436824314213, "loss": 3.8154, "step": 3853 }, { "epoch": 0.18042015331499794, "grad_norm": 1.2734375, "learning_rate": 0.0001974930695522979, "loss": 3.7073, "step": 3854 }, { "epoch": 0.1804669670548306, "grad_norm": 1.5703125, "learning_rate": 0.00019749177052925162, "loss": 3.7999, "step": 3855 }, { "epoch": 0.18051378079466324, "grad_norm": 1.1953125, "learning_rate": 0.00019749047117400775, "loss": 3.4363, "step": 3856 }, { "epoch": 0.18056059453449588, "grad_norm": 1.3359375, "learning_rate": 0.00019748917148657079, "loss": 3.6432, "step": 3857 }, { "epoch": 0.18060740827432853, "grad_norm": 1.1796875, "learning_rate": 0.00019748787146694506, "loss": 3.5984, "step": 3858 }, { "epoch": 0.18065422201416115, "grad_norm": 1.1328125, "learning_rate": 0.00019748657111513508, "loss": 3.6364, "step": 3859 }, { "epoch": 0.1807010357539938, "grad_norm": 1.46875, "learning_rate": 0.00019748527043114523, "loss": 3.5464, "step": 3860 }, { "epoch": 0.18074784949382644, "grad_norm": 1.2734375, "learning_rate": 0.00019748396941497996, "loss": 3.3363, "step": 3861 }, { "epoch": 0.1807946632336591, "grad_norm": 1.234375, "learning_rate": 0.00019748266806664372, "loss": 3.7169, "step": 3862 }, { "epoch": 0.18084147697349173, "grad_norm": 2.734375, "learning_rate": 0.00019748136638614088, "loss": 3.7134, "step": 3863 }, { "epoch": 0.18088829071332435, "grad_norm": 1.4765625, "learning_rate": 0.00019748006437347594, "loss": 3.7842, "step": 3864 }, { "epoch": 0.180935104453157, "grad_norm": 1.015625, "learning_rate": 0.00019747876202865332, "loss": 3.894, "step": 3865 }, { "epoch": 0.18098191819298964, "grad_norm": 1.3515625, "learning_rate": 0.00019747745935167746, "loss": 3.902, "step": 3866 }, { "epoch": 0.1810287319328223, "grad_norm": 1.53125, "learning_rate": 0.00019747615634255276, "loss": 3.4875, "step": 3867 }, { "epoch": 0.18107554567265494, "grad_norm": 1.5546875, "learning_rate": 0.00019747485300128373, "loss": 3.8174, "step": 3868 }, { "epoch": 0.18112235941248755, "grad_norm": 1.3359375, "learning_rate": 0.00019747354932787476, "loss": 3.7336, "step": 3869 }, { "epoch": 0.1811691731523202, "grad_norm": 1.2578125, "learning_rate": 0.0001974722453223303, "loss": 3.9134, "step": 3870 }, { "epoch": 0.18121598689215285, "grad_norm": 1.265625, "learning_rate": 0.00019747094098465477, "loss": 3.4736, "step": 3871 }, { "epoch": 0.1812628006319855, "grad_norm": 1.3515625, "learning_rate": 0.00019746963631485267, "loss": 3.8603, "step": 3872 }, { "epoch": 0.18130961437181814, "grad_norm": 1.4140625, "learning_rate": 0.00019746833131292844, "loss": 3.7253, "step": 3873 }, { "epoch": 0.18135642811165076, "grad_norm": 1.0390625, "learning_rate": 0.0001974670259788865, "loss": 3.3491, "step": 3874 }, { "epoch": 0.1814032418514834, "grad_norm": 1.4765625, "learning_rate": 0.00019746572031273125, "loss": 3.687, "step": 3875 }, { "epoch": 0.18145005559131605, "grad_norm": 1.7109375, "learning_rate": 0.00019746441431446723, "loss": 3.7613, "step": 3876 }, { "epoch": 0.1814968693311487, "grad_norm": 1.609375, "learning_rate": 0.00019746310798409887, "loss": 4.2677, "step": 3877 }, { "epoch": 0.18154368307098134, "grad_norm": 1.0703125, "learning_rate": 0.00019746180132163056, "loss": 2.538, "step": 3878 }, { "epoch": 0.18159049681081396, "grad_norm": 1.125, "learning_rate": 0.00019746049432706683, "loss": 3.4339, "step": 3879 }, { "epoch": 0.1816373105506466, "grad_norm": 1.6015625, "learning_rate": 0.0001974591870004121, "loss": 4.0521, "step": 3880 }, { "epoch": 0.18168412429047925, "grad_norm": 1.3125, "learning_rate": 0.00019745787934167083, "loss": 4.0058, "step": 3881 }, { "epoch": 0.1817309380303119, "grad_norm": 1.6640625, "learning_rate": 0.00019745657135084743, "loss": 3.7313, "step": 3882 }, { "epoch": 0.18177775177014455, "grad_norm": 1.375, "learning_rate": 0.0001974552630279464, "loss": 3.5999, "step": 3883 }, { "epoch": 0.18182456550997717, "grad_norm": 1.4296875, "learning_rate": 0.00019745395437297222, "loss": 3.8913, "step": 3884 }, { "epoch": 0.1818713792498098, "grad_norm": 1.59375, "learning_rate": 0.0001974526453859293, "loss": 3.585, "step": 3885 }, { "epoch": 0.18191819298964246, "grad_norm": 0.91796875, "learning_rate": 0.00019745133606682215, "loss": 4.9224, "step": 3886 }, { "epoch": 0.1819650067294751, "grad_norm": 0.953125, "learning_rate": 0.0001974500264156552, "loss": 3.6432, "step": 3887 }, { "epoch": 0.18201182046930775, "grad_norm": 1.2421875, "learning_rate": 0.00019744871643243293, "loss": 3.617, "step": 3888 }, { "epoch": 0.1820586342091404, "grad_norm": 1.2890625, "learning_rate": 0.00019744740611715974, "loss": 3.5427, "step": 3889 }, { "epoch": 0.18210544794897301, "grad_norm": 1.171875, "learning_rate": 0.0001974460954698402, "loss": 3.6954, "step": 3890 }, { "epoch": 0.18215226168880566, "grad_norm": 1.1875, "learning_rate": 0.0001974447844904787, "loss": 3.6101, "step": 3891 }, { "epoch": 0.1821990754286383, "grad_norm": 1.5390625, "learning_rate": 0.00019744347317907974, "loss": 3.6464, "step": 3892 }, { "epoch": 0.18224588916847095, "grad_norm": 1.6484375, "learning_rate": 0.00019744216153564777, "loss": 3.7089, "step": 3893 }, { "epoch": 0.1822927029083036, "grad_norm": 1.5234375, "learning_rate": 0.00019744084956018723, "loss": 3.9427, "step": 3894 }, { "epoch": 0.18233951664813622, "grad_norm": 1.3984375, "learning_rate": 0.0001974395372527027, "loss": 3.6675, "step": 3895 }, { "epoch": 0.18238633038796886, "grad_norm": 1.21875, "learning_rate": 0.00019743822461319854, "loss": 3.929, "step": 3896 }, { "epoch": 0.1824331441278015, "grad_norm": 1.359375, "learning_rate": 0.00019743691164167925, "loss": 3.7328, "step": 3897 }, { "epoch": 0.18247995786763416, "grad_norm": 1.234375, "learning_rate": 0.00019743559833814933, "loss": 3.7332, "step": 3898 }, { "epoch": 0.1825267716074668, "grad_norm": 1.3984375, "learning_rate": 0.00019743428470261326, "loss": 3.8669, "step": 3899 }, { "epoch": 0.18257358534729942, "grad_norm": 1.6796875, "learning_rate": 0.00019743297073507548, "loss": 4.7209, "step": 3900 }, { "epoch": 0.18262039908713207, "grad_norm": 1.6640625, "learning_rate": 0.00019743165643554047, "loss": 3.9741, "step": 3901 }, { "epoch": 0.18266721282696471, "grad_norm": 1.4375, "learning_rate": 0.00019743034180401273, "loss": 3.9744, "step": 3902 }, { "epoch": 0.18271402656679736, "grad_norm": 1.25, "learning_rate": 0.00019742902684049675, "loss": 3.7104, "step": 3903 }, { "epoch": 0.18276084030663, "grad_norm": 1.234375, "learning_rate": 0.000197427711544997, "loss": 3.84, "step": 3904 }, { "epoch": 0.18280765404646263, "grad_norm": 1.078125, "learning_rate": 0.0001974263959175179, "loss": 5.462, "step": 3905 }, { "epoch": 0.18285446778629527, "grad_norm": 1.4140625, "learning_rate": 0.00019742507995806403, "loss": 4.0897, "step": 3906 }, { "epoch": 0.18290128152612792, "grad_norm": 1.1953125, "learning_rate": 0.00019742376366663984, "loss": 3.9031, "step": 3907 }, { "epoch": 0.18294809526596056, "grad_norm": 1.203125, "learning_rate": 0.0001974224470432498, "loss": 3.2622, "step": 3908 }, { "epoch": 0.1829949090057932, "grad_norm": 1.2265625, "learning_rate": 0.00019742113008789836, "loss": 3.1772, "step": 3909 }, { "epoch": 0.18304172274562583, "grad_norm": 1.328125, "learning_rate": 0.0001974198128005901, "loss": 3.9125, "step": 3910 }, { "epoch": 0.18308853648545848, "grad_norm": 1.3984375, "learning_rate": 0.0001974184951813294, "loss": 3.6876, "step": 3911 }, { "epoch": 0.18313535022529112, "grad_norm": 1.4453125, "learning_rate": 0.0001974171772301209, "loss": 3.9606, "step": 3912 }, { "epoch": 0.18318216396512377, "grad_norm": 1.125, "learning_rate": 0.00019741585894696894, "loss": 3.7965, "step": 3913 }, { "epoch": 0.18322897770495641, "grad_norm": 1.53125, "learning_rate": 0.00019741454033187807, "loss": 3.7259, "step": 3914 }, { "epoch": 0.18327579144478903, "grad_norm": 1.28125, "learning_rate": 0.00019741322138485282, "loss": 3.5288, "step": 3915 }, { "epoch": 0.18332260518462168, "grad_norm": 1.3046875, "learning_rate": 0.0001974119021058976, "loss": 3.4537, "step": 3916 }, { "epoch": 0.18336941892445432, "grad_norm": 1.421875, "learning_rate": 0.00019741058249501698, "loss": 3.8165, "step": 3917 }, { "epoch": 0.18341623266428697, "grad_norm": 1.484375, "learning_rate": 0.00019740926255221544, "loss": 3.3914, "step": 3918 }, { "epoch": 0.18346304640411962, "grad_norm": 1.4140625, "learning_rate": 0.00019740794227749746, "loss": 3.5201, "step": 3919 }, { "epoch": 0.18350986014395226, "grad_norm": 1.359375, "learning_rate": 0.00019740662167086755, "loss": 3.5289, "step": 3920 }, { "epoch": 0.18355667388378488, "grad_norm": 1.140625, "learning_rate": 0.0001974053007323302, "loss": 3.7926, "step": 3921 }, { "epoch": 0.18360348762361753, "grad_norm": 1.078125, "learning_rate": 0.0001974039794618899, "loss": 3.4431, "step": 3922 }, { "epoch": 0.18365030136345017, "grad_norm": 2.046875, "learning_rate": 0.0001974026578595512, "loss": 3.7026, "step": 3923 }, { "epoch": 0.18369711510328282, "grad_norm": 1.875, "learning_rate": 0.00019740133592531858, "loss": 4.0769, "step": 3924 }, { "epoch": 0.18374392884311547, "grad_norm": 1.109375, "learning_rate": 0.00019740001365919652, "loss": 3.2199, "step": 3925 }, { "epoch": 0.18379074258294809, "grad_norm": 1.296875, "learning_rate": 0.00019739869106118952, "loss": 3.3071, "step": 3926 }, { "epoch": 0.18383755632278073, "grad_norm": 1.375, "learning_rate": 0.00019739736813130217, "loss": 3.6084, "step": 3927 }, { "epoch": 0.18388437006261338, "grad_norm": 1.546875, "learning_rate": 0.00019739604486953888, "loss": 3.4954, "step": 3928 }, { "epoch": 0.18393118380244602, "grad_norm": 1.453125, "learning_rate": 0.0001973947212759042, "loss": 3.7258, "step": 3929 }, { "epoch": 0.18397799754227867, "grad_norm": 1.2421875, "learning_rate": 0.00019739339735040266, "loss": 3.7453, "step": 3930 }, { "epoch": 0.1840248112821113, "grad_norm": 1.3203125, "learning_rate": 0.0001973920730930387, "loss": 3.6509, "step": 3931 }, { "epoch": 0.18407162502194394, "grad_norm": 1.1875, "learning_rate": 0.0001973907485038169, "loss": 3.7321, "step": 3932 }, { "epoch": 0.18411843876177658, "grad_norm": 1.3671875, "learning_rate": 0.00019738942358274176, "loss": 3.498, "step": 3933 }, { "epoch": 0.18416525250160923, "grad_norm": 1.3984375, "learning_rate": 0.0001973880983298178, "loss": 3.1697, "step": 3934 }, { "epoch": 0.18421206624144187, "grad_norm": 1.8671875, "learning_rate": 0.0001973867727450495, "loss": 3.765, "step": 3935 }, { "epoch": 0.1842588799812745, "grad_norm": 1.3359375, "learning_rate": 0.00019738544682844142, "loss": 3.6446, "step": 3936 }, { "epoch": 0.18430569372110714, "grad_norm": 1.6015625, "learning_rate": 0.00019738412057999807, "loss": 3.5573, "step": 3937 }, { "epoch": 0.18435250746093979, "grad_norm": 1.125, "learning_rate": 0.00019738279399972392, "loss": 4.0222, "step": 3938 }, { "epoch": 0.18439932120077243, "grad_norm": 1.5546875, "learning_rate": 0.00019738146708762354, "loss": 3.9755, "step": 3939 }, { "epoch": 0.18444613494060508, "grad_norm": 1.2890625, "learning_rate": 0.00019738013984370146, "loss": 3.427, "step": 3940 }, { "epoch": 0.1844929486804377, "grad_norm": 1.5859375, "learning_rate": 0.0001973788122679622, "loss": 3.5654, "step": 3941 }, { "epoch": 0.18453976242027034, "grad_norm": 1.3828125, "learning_rate": 0.00019737748436041022, "loss": 3.7873, "step": 3942 }, { "epoch": 0.184586576160103, "grad_norm": 1.6796875, "learning_rate": 0.00019737615612105012, "loss": 3.2049, "step": 3943 }, { "epoch": 0.18463338989993563, "grad_norm": 1.3671875, "learning_rate": 0.0001973748275498864, "loss": 4.2186, "step": 3944 }, { "epoch": 0.18468020363976828, "grad_norm": 1.859375, "learning_rate": 0.00019737349864692357, "loss": 4.4402, "step": 3945 }, { "epoch": 0.1847270173796009, "grad_norm": 1.3203125, "learning_rate": 0.00019737216941216615, "loss": 4.2908, "step": 3946 }, { "epoch": 0.18477383111943355, "grad_norm": 1.328125, "learning_rate": 0.0001973708398456187, "loss": 3.5181, "step": 3947 }, { "epoch": 0.1848206448592662, "grad_norm": 1.3046875, "learning_rate": 0.0001973695099472858, "loss": 3.5957, "step": 3948 }, { "epoch": 0.18486745859909884, "grad_norm": 1.21875, "learning_rate": 0.00019736817971717185, "loss": 3.3883, "step": 3949 }, { "epoch": 0.18491427233893148, "grad_norm": 1.265625, "learning_rate": 0.00019736684915528147, "loss": 3.6508, "step": 3950 }, { "epoch": 0.18496108607876413, "grad_norm": 1.0703125, "learning_rate": 0.00019736551826161919, "loss": 3.2856, "step": 3951 }, { "epoch": 0.18500789981859675, "grad_norm": 1.1953125, "learning_rate": 0.00019736418703618952, "loss": 3.739, "step": 3952 }, { "epoch": 0.1850547135584294, "grad_norm": 1.1015625, "learning_rate": 0.00019736285547899707, "loss": 3.5144, "step": 3953 }, { "epoch": 0.18510152729826204, "grad_norm": 1.2421875, "learning_rate": 0.00019736152359004625, "loss": 3.6494, "step": 3954 }, { "epoch": 0.1851483410380947, "grad_norm": 1.3046875, "learning_rate": 0.0001973601913693417, "loss": 3.7203, "step": 3955 }, { "epoch": 0.18519515477792733, "grad_norm": 1.4453125, "learning_rate": 0.0001973588588168879, "loss": 3.929, "step": 3956 }, { "epoch": 0.18524196851775995, "grad_norm": 1.28125, "learning_rate": 0.00019735752593268946, "loss": 3.6248, "step": 3957 }, { "epoch": 0.1852887822575926, "grad_norm": 1.390625, "learning_rate": 0.00019735619271675083, "loss": 3.9198, "step": 3958 }, { "epoch": 0.18533559599742525, "grad_norm": 1.296875, "learning_rate": 0.00019735485916907663, "loss": 3.9565, "step": 3959 }, { "epoch": 0.1853824097372579, "grad_norm": 1.46875, "learning_rate": 0.00019735352528967137, "loss": 3.9268, "step": 3960 }, { "epoch": 0.18542922347709054, "grad_norm": 1.5703125, "learning_rate": 0.0001973521910785396, "loss": 3.7615, "step": 3961 }, { "epoch": 0.18547603721692316, "grad_norm": 1.3671875, "learning_rate": 0.00019735085653568586, "loss": 3.821, "step": 3962 }, { "epoch": 0.1855228509567558, "grad_norm": 1.5546875, "learning_rate": 0.0001973495216611147, "loss": 3.6701, "step": 3963 }, { "epoch": 0.18556966469658845, "grad_norm": 1.34375, "learning_rate": 0.00019734818645483068, "loss": 3.6143, "step": 3964 }, { "epoch": 0.1856164784364211, "grad_norm": 1.328125, "learning_rate": 0.00019734685091683836, "loss": 3.6942, "step": 3965 }, { "epoch": 0.18566329217625374, "grad_norm": 1.609375, "learning_rate": 0.00019734551504714225, "loss": 3.9662, "step": 3966 }, { "epoch": 0.18571010591608636, "grad_norm": 1.2265625, "learning_rate": 0.0001973441788457469, "loss": 3.6724, "step": 3967 }, { "epoch": 0.185756919655919, "grad_norm": 2.140625, "learning_rate": 0.00019734284231265692, "loss": 3.6018, "step": 3968 }, { "epoch": 0.18580373339575165, "grad_norm": 1.4453125, "learning_rate": 0.00019734150544787685, "loss": 3.4017, "step": 3969 }, { "epoch": 0.1858505471355843, "grad_norm": 1.2734375, "learning_rate": 0.0001973401682514112, "loss": 3.5063, "step": 3970 }, { "epoch": 0.18589736087541694, "grad_norm": 1.203125, "learning_rate": 0.00019733883072326454, "loss": 3.3664, "step": 3971 }, { "epoch": 0.18594417461524956, "grad_norm": 1.359375, "learning_rate": 0.00019733749286344145, "loss": 3.7855, "step": 3972 }, { "epoch": 0.1859909883550822, "grad_norm": 1.6953125, "learning_rate": 0.00019733615467194647, "loss": 3.5832, "step": 3973 }, { "epoch": 0.18603780209491486, "grad_norm": 1.1484375, "learning_rate": 0.0001973348161487842, "loss": 3.6611, "step": 3974 }, { "epoch": 0.1860846158347475, "grad_norm": 2.296875, "learning_rate": 0.00019733347729395913, "loss": 3.7678, "step": 3975 }, { "epoch": 0.18613142957458015, "grad_norm": 1.4921875, "learning_rate": 0.00019733213810747594, "loss": 3.4653, "step": 3976 }, { "epoch": 0.18617824331441277, "grad_norm": 1.5, "learning_rate": 0.00019733079858933904, "loss": 3.4851, "step": 3977 }, { "epoch": 0.1862250570542454, "grad_norm": 1.40625, "learning_rate": 0.0001973294587395531, "loss": 3.5687, "step": 3978 }, { "epoch": 0.18627187079407806, "grad_norm": 1.2265625, "learning_rate": 0.00019732811855812263, "loss": 3.3892, "step": 3979 }, { "epoch": 0.1863186845339107, "grad_norm": 1.2421875, "learning_rate": 0.00019732677804505224, "loss": 3.8185, "step": 3980 }, { "epoch": 0.18636549827374335, "grad_norm": 1.484375, "learning_rate": 0.0001973254372003465, "loss": 3.9048, "step": 3981 }, { "epoch": 0.186412312013576, "grad_norm": 1.3828125, "learning_rate": 0.00019732409602400992, "loss": 3.1907, "step": 3982 }, { "epoch": 0.18645912575340862, "grad_norm": 2.484375, "learning_rate": 0.00019732275451604713, "loss": 4.098, "step": 3983 }, { "epoch": 0.18650593949324126, "grad_norm": 1.2890625, "learning_rate": 0.00019732141267646267, "loss": 3.8154, "step": 3984 }, { "epoch": 0.1865527532330739, "grad_norm": 1.3984375, "learning_rate": 0.00019732007050526112, "loss": 3.4313, "step": 3985 }, { "epoch": 0.18659956697290656, "grad_norm": 1.8828125, "learning_rate": 0.00019731872800244707, "loss": 3.4206, "step": 3986 }, { "epoch": 0.1866463807127392, "grad_norm": 1.9375, "learning_rate": 0.0001973173851680251, "loss": 3.5309, "step": 3987 }, { "epoch": 0.18669319445257182, "grad_norm": 1.9609375, "learning_rate": 0.0001973160420019997, "loss": 4.1502, "step": 3988 }, { "epoch": 0.18674000819240447, "grad_norm": 1.296875, "learning_rate": 0.00019731469850437557, "loss": 3.4676, "step": 3989 }, { "epoch": 0.1867868219322371, "grad_norm": 1.2109375, "learning_rate": 0.0001973133546751572, "loss": 3.7056, "step": 3990 }, { "epoch": 0.18683363567206976, "grad_norm": 1.609375, "learning_rate": 0.0001973120105143492, "loss": 3.7363, "step": 3991 }, { "epoch": 0.1868804494119024, "grad_norm": 1.296875, "learning_rate": 0.0001973106660219562, "loss": 3.8205, "step": 3992 }, { "epoch": 0.18692726315173502, "grad_norm": 1.265625, "learning_rate": 0.00019730932119798266, "loss": 3.8712, "step": 3993 }, { "epoch": 0.18697407689156767, "grad_norm": 1.3203125, "learning_rate": 0.00019730797604243326, "loss": 3.6887, "step": 3994 }, { "epoch": 0.18702089063140032, "grad_norm": 1.3828125, "learning_rate": 0.00019730663055531257, "loss": 3.8197, "step": 3995 }, { "epoch": 0.18706770437123296, "grad_norm": 1.390625, "learning_rate": 0.00019730528473662512, "loss": 3.8986, "step": 3996 }, { "epoch": 0.1871145181110656, "grad_norm": 1.0859375, "learning_rate": 0.00019730393858637558, "loss": 3.8165, "step": 3997 }, { "epoch": 0.18716133185089823, "grad_norm": 1.671875, "learning_rate": 0.00019730259210456848, "loss": 3.5429, "step": 3998 }, { "epoch": 0.18720814559073087, "grad_norm": 1.21875, "learning_rate": 0.00019730124529120842, "loss": 3.7131, "step": 3999 }, { "epoch": 0.18725495933056352, "grad_norm": 1.2890625, "learning_rate": 0.0001972998981463, "loss": 3.7655, "step": 4000 }, { "epoch": 0.18730177307039617, "grad_norm": 1.25, "learning_rate": 0.00019729855066984777, "loss": 3.6685, "step": 4001 }, { "epoch": 0.1873485868102288, "grad_norm": 1.5234375, "learning_rate": 0.0001972972028618564, "loss": 3.2134, "step": 4002 }, { "epoch": 0.18739540055006143, "grad_norm": 1.3828125, "learning_rate": 0.00019729585472233038, "loss": 4.0118, "step": 4003 }, { "epoch": 0.18744221428989408, "grad_norm": 1.4609375, "learning_rate": 0.00019729450625127442, "loss": 3.803, "step": 4004 }, { "epoch": 0.18748902802972672, "grad_norm": 1.15625, "learning_rate": 0.00019729315744869302, "loss": 3.6343, "step": 4005 }, { "epoch": 0.18753584176955937, "grad_norm": 1.15625, "learning_rate": 0.0001972918083145908, "loss": 3.8223, "step": 4006 }, { "epoch": 0.18758265550939202, "grad_norm": 1.375, "learning_rate": 0.00019729045884897236, "loss": 3.7378, "step": 4007 }, { "epoch": 0.18762946924922463, "grad_norm": 1.3359375, "learning_rate": 0.00019728910905184233, "loss": 3.3083, "step": 4008 }, { "epoch": 0.18767628298905728, "grad_norm": 1.359375, "learning_rate": 0.0001972877589232053, "loss": 3.237, "step": 4009 }, { "epoch": 0.18772309672888993, "grad_norm": 1.8125, "learning_rate": 0.0001972864084630658, "loss": 4.0448, "step": 4010 }, { "epoch": 0.18776991046872257, "grad_norm": 1.578125, "learning_rate": 0.00019728505767142857, "loss": 3.8192, "step": 4011 }, { "epoch": 0.18781672420855522, "grad_norm": 1.1015625, "learning_rate": 0.00019728370654829806, "loss": 3.7051, "step": 4012 }, { "epoch": 0.18786353794838787, "grad_norm": 1.671875, "learning_rate": 0.00019728235509367895, "loss": 3.8511, "step": 4013 }, { "epoch": 0.18791035168822048, "grad_norm": 1.4375, "learning_rate": 0.00019728100330757586, "loss": 3.5369, "step": 4014 }, { "epoch": 0.18795716542805313, "grad_norm": 1.1484375, "learning_rate": 0.00019727965118999336, "loss": 3.4219, "step": 4015 }, { "epoch": 0.18800397916788578, "grad_norm": 1.53125, "learning_rate": 0.00019727829874093607, "loss": 3.478, "step": 4016 }, { "epoch": 0.18805079290771842, "grad_norm": 1.125, "learning_rate": 0.0001972769459604086, "loss": 3.3857, "step": 4017 }, { "epoch": 0.18809760664755107, "grad_norm": 1.359375, "learning_rate": 0.00019727559284841558, "loss": 3.5447, "step": 4018 }, { "epoch": 0.1881444203873837, "grad_norm": 1.25, "learning_rate": 0.0001972742394049616, "loss": 3.8337, "step": 4019 }, { "epoch": 0.18819123412721633, "grad_norm": 1.2734375, "learning_rate": 0.00019727288563005125, "loss": 3.4928, "step": 4020 }, { "epoch": 0.18823804786704898, "grad_norm": 1.1796875, "learning_rate": 0.0001972715315236892, "loss": 3.5623, "step": 4021 }, { "epoch": 0.18828486160688163, "grad_norm": 2.046875, "learning_rate": 0.00019727017708588, "loss": 3.7166, "step": 4022 }, { "epoch": 0.18833167534671427, "grad_norm": 1.09375, "learning_rate": 0.00019726882231662827, "loss": 3.4795, "step": 4023 }, { "epoch": 0.1883784890865469, "grad_norm": 1.2734375, "learning_rate": 0.0001972674672159387, "loss": 3.3473, "step": 4024 }, { "epoch": 0.18842530282637954, "grad_norm": 1.3125, "learning_rate": 0.00019726611178381582, "loss": 3.8836, "step": 4025 }, { "epoch": 0.18847211656621218, "grad_norm": 1.28125, "learning_rate": 0.0001972647560202643, "loss": 3.4573, "step": 4026 }, { "epoch": 0.18851893030604483, "grad_norm": 1.921875, "learning_rate": 0.00019726339992528875, "loss": 3.9237, "step": 4027 }, { "epoch": 0.18856574404587748, "grad_norm": 1.453125, "learning_rate": 0.0001972620434988938, "loss": 3.6639, "step": 4028 }, { "epoch": 0.1886125577857101, "grad_norm": 1.296875, "learning_rate": 0.00019726068674108402, "loss": 3.2355, "step": 4029 }, { "epoch": 0.18865937152554274, "grad_norm": 1.4296875, "learning_rate": 0.0001972593296518641, "loss": 3.3806, "step": 4030 }, { "epoch": 0.1887061852653754, "grad_norm": 1.2109375, "learning_rate": 0.00019725797223123863, "loss": 3.7537, "step": 4031 }, { "epoch": 0.18875299900520803, "grad_norm": 1.421875, "learning_rate": 0.00019725661447921223, "loss": 3.6044, "step": 4032 }, { "epoch": 0.18879981274504068, "grad_norm": 1.15625, "learning_rate": 0.00019725525639578958, "loss": 3.645, "step": 4033 }, { "epoch": 0.1888466264848733, "grad_norm": 1.140625, "learning_rate": 0.0001972538979809752, "loss": 3.6632, "step": 4034 }, { "epoch": 0.18889344022470594, "grad_norm": 1.40625, "learning_rate": 0.00019725253923477386, "loss": 3.7902, "step": 4035 }, { "epoch": 0.1889402539645386, "grad_norm": 1.1171875, "learning_rate": 0.00019725118015719004, "loss": 3.7055, "step": 4036 }, { "epoch": 0.18898706770437124, "grad_norm": 1.0859375, "learning_rate": 0.00019724982074822848, "loss": 3.5271, "step": 4037 }, { "epoch": 0.18903388144420388, "grad_norm": 1.125, "learning_rate": 0.00019724846100789377, "loss": 3.233, "step": 4038 }, { "epoch": 0.1890806951840365, "grad_norm": 1.3984375, "learning_rate": 0.00019724710093619056, "loss": 3.7617, "step": 4039 }, { "epoch": 0.18912750892386915, "grad_norm": 1.3984375, "learning_rate": 0.00019724574053312348, "loss": 3.5671, "step": 4040 }, { "epoch": 0.1891743226637018, "grad_norm": 1.28125, "learning_rate": 0.00019724437979869714, "loss": 3.6463, "step": 4041 }, { "epoch": 0.18922113640353444, "grad_norm": 1.2265625, "learning_rate": 0.00019724301873291623, "loss": 3.4182, "step": 4042 }, { "epoch": 0.1892679501433671, "grad_norm": 1.546875, "learning_rate": 0.00019724165733578532, "loss": 4.1111, "step": 4043 }, { "epoch": 0.18931476388319973, "grad_norm": 1.0390625, "learning_rate": 0.0001972402956073091, "loss": 3.0067, "step": 4044 }, { "epoch": 0.18936157762303235, "grad_norm": 1.46875, "learning_rate": 0.00019723893354749218, "loss": 3.2836, "step": 4045 }, { "epoch": 0.189408391362865, "grad_norm": 1.6015625, "learning_rate": 0.00019723757115633922, "loss": 3.5894, "step": 4046 }, { "epoch": 0.18945520510269764, "grad_norm": 1.171875, "learning_rate": 0.00019723620843385486, "loss": 3.7163, "step": 4047 }, { "epoch": 0.1895020188425303, "grad_norm": 1.1171875, "learning_rate": 0.00019723484538004376, "loss": 3.5801, "step": 4048 }, { "epoch": 0.18954883258236294, "grad_norm": 1.4140625, "learning_rate": 0.0001972334819949105, "loss": 3.8039, "step": 4049 }, { "epoch": 0.18959564632219555, "grad_norm": 1.2265625, "learning_rate": 0.00019723211827845982, "loss": 3.0734, "step": 4050 }, { "epoch": 0.1896424600620282, "grad_norm": 1.328125, "learning_rate": 0.00019723075423069627, "loss": 3.4323, "step": 4051 }, { "epoch": 0.18968927380186085, "grad_norm": 1.359375, "learning_rate": 0.0001972293898516246, "loss": 3.6583, "step": 4052 }, { "epoch": 0.1897360875416935, "grad_norm": 1.359375, "learning_rate": 0.00019722802514124936, "loss": 3.2631, "step": 4053 }, { "epoch": 0.18978290128152614, "grad_norm": 1.2734375, "learning_rate": 0.00019722666009957526, "loss": 3.7601, "step": 4054 }, { "epoch": 0.18982971502135876, "grad_norm": 1.3984375, "learning_rate": 0.00019722529472660693, "loss": 3.4034, "step": 4055 }, { "epoch": 0.1898765287611914, "grad_norm": 4.25, "learning_rate": 0.00019722392902234902, "loss": 3.0925, "step": 4056 }, { "epoch": 0.18992334250102405, "grad_norm": 0.9609375, "learning_rate": 0.00019722256298680623, "loss": 2.8708, "step": 4057 }, { "epoch": 0.1899701562408567, "grad_norm": 1.234375, "learning_rate": 0.00019722119661998315, "loss": 3.4219, "step": 4058 }, { "epoch": 0.19001696998068934, "grad_norm": 1.1953125, "learning_rate": 0.00019721982992188447, "loss": 3.5284, "step": 4059 }, { "epoch": 0.19006378372052196, "grad_norm": 1.1171875, "learning_rate": 0.0001972184628925148, "loss": 3.746, "step": 4060 }, { "epoch": 0.1901105974603546, "grad_norm": 1.3671875, "learning_rate": 0.00019721709553187887, "loss": 3.9221, "step": 4061 }, { "epoch": 0.19015741120018725, "grad_norm": 1.296875, "learning_rate": 0.00019721572783998133, "loss": 3.4678, "step": 4062 }, { "epoch": 0.1902042249400199, "grad_norm": 1.921875, "learning_rate": 0.00019721435981682678, "loss": 4.1755, "step": 4063 }, { "epoch": 0.19025103867985255, "grad_norm": 1.15625, "learning_rate": 0.00019721299146241994, "loss": 3.5611, "step": 4064 }, { "epoch": 0.19029785241968517, "grad_norm": 1.2578125, "learning_rate": 0.00019721162277676544, "loss": 3.759, "step": 4065 }, { "epoch": 0.1903446661595178, "grad_norm": 1.0390625, "learning_rate": 0.00019721025375986795, "loss": 3.4271, "step": 4066 }, { "epoch": 0.19039147989935046, "grad_norm": 1.2421875, "learning_rate": 0.00019720888441173212, "loss": 3.7004, "step": 4067 }, { "epoch": 0.1904382936391831, "grad_norm": 1.78125, "learning_rate": 0.0001972075147323627, "loss": 3.528, "step": 4068 }, { "epoch": 0.19048510737901575, "grad_norm": 1.7265625, "learning_rate": 0.00019720614472176423, "loss": 3.9646, "step": 4069 }, { "epoch": 0.19053192111884837, "grad_norm": 1.1328125, "learning_rate": 0.00019720477437994144, "loss": 3.5888, "step": 4070 }, { "epoch": 0.19057873485868101, "grad_norm": 1.46875, "learning_rate": 0.00019720340370689904, "loss": 3.49, "step": 4071 }, { "epoch": 0.19062554859851366, "grad_norm": 1.0234375, "learning_rate": 0.00019720203270264165, "loss": 3.7534, "step": 4072 }, { "epoch": 0.1906723623383463, "grad_norm": 1.0625, "learning_rate": 0.00019720066136717394, "loss": 5.0479, "step": 4073 }, { "epoch": 0.19071917607817895, "grad_norm": 1.234375, "learning_rate": 0.0001971992897005006, "loss": 3.2215, "step": 4074 }, { "epoch": 0.1907659898180116, "grad_norm": 1.3359375, "learning_rate": 0.0001971979177026263, "loss": 3.0835, "step": 4075 }, { "epoch": 0.19081280355784422, "grad_norm": 1.2109375, "learning_rate": 0.0001971965453735557, "loss": 3.5521, "step": 4076 }, { "epoch": 0.19085961729767686, "grad_norm": 1.2109375, "learning_rate": 0.0001971951727132935, "loss": 3.5333, "step": 4077 }, { "epoch": 0.1909064310375095, "grad_norm": 1.1484375, "learning_rate": 0.0001971937997218444, "loss": 3.5459, "step": 4078 }, { "epoch": 0.19095324477734216, "grad_norm": 1.1796875, "learning_rate": 0.000197192426399213, "loss": 3.8031, "step": 4079 }, { "epoch": 0.1910000585171748, "grad_norm": 1.75, "learning_rate": 0.00019719105274540404, "loss": 3.362, "step": 4080 }, { "epoch": 0.19104687225700742, "grad_norm": 1.6484375, "learning_rate": 0.00019718967876042218, "loss": 4.0198, "step": 4081 }, { "epoch": 0.19109368599684007, "grad_norm": 1.1953125, "learning_rate": 0.00019718830444427212, "loss": 3.6982, "step": 4082 }, { "epoch": 0.19114049973667271, "grad_norm": 1.0546875, "learning_rate": 0.00019718692979695852, "loss": 3.5707, "step": 4083 }, { "epoch": 0.19118731347650536, "grad_norm": 1.140625, "learning_rate": 0.00019718555481848608, "loss": 3.3477, "step": 4084 }, { "epoch": 0.191234127216338, "grad_norm": 1.2265625, "learning_rate": 0.00019718417950885945, "loss": 3.5339, "step": 4085 }, { "epoch": 0.19128094095617063, "grad_norm": 0.99609375, "learning_rate": 0.0001971828038680834, "loss": 4.0152, "step": 4086 }, { "epoch": 0.19132775469600327, "grad_norm": 1.5546875, "learning_rate": 0.0001971814278961625, "loss": 3.8697, "step": 4087 }, { "epoch": 0.19137456843583592, "grad_norm": 1.1796875, "learning_rate": 0.00019718005159310157, "loss": 3.686, "step": 4088 }, { "epoch": 0.19142138217566856, "grad_norm": 1.3515625, "learning_rate": 0.0001971786749589052, "loss": 3.6023, "step": 4089 }, { "epoch": 0.1914681959155012, "grad_norm": 1.4765625, "learning_rate": 0.00019717729799357812, "loss": 3.1126, "step": 4090 }, { "epoch": 0.19151500965533383, "grad_norm": 1.203125, "learning_rate": 0.000197175920697125, "loss": 3.4704, "step": 4091 }, { "epoch": 0.19156182339516648, "grad_norm": 1.640625, "learning_rate": 0.00019717454306955053, "loss": 3.3554, "step": 4092 }, { "epoch": 0.19160863713499912, "grad_norm": 1.5234375, "learning_rate": 0.00019717316511085948, "loss": 3.399, "step": 4093 }, { "epoch": 0.19165545087483177, "grad_norm": 1.25, "learning_rate": 0.00019717178682105644, "loss": 3.819, "step": 4094 }, { "epoch": 0.1917022646146644, "grad_norm": 2.296875, "learning_rate": 0.00019717040820014617, "loss": 3.6845, "step": 4095 }, { "epoch": 0.19174907835449703, "grad_norm": 1.28125, "learning_rate": 0.00019716902924813334, "loss": 3.2419, "step": 4096 }, { "epoch": 0.19179589209432968, "grad_norm": 1.3203125, "learning_rate": 0.00019716764996502266, "loss": 3.7621, "step": 4097 }, { "epoch": 0.19184270583416232, "grad_norm": 1.3984375, "learning_rate": 0.00019716627035081886, "loss": 3.5565, "step": 4098 }, { "epoch": 0.19188951957399497, "grad_norm": 1.4765625, "learning_rate": 0.00019716489040552654, "loss": 3.8366, "step": 4099 }, { "epoch": 0.19193633331382762, "grad_norm": 1.1875, "learning_rate": 0.00019716351012915055, "loss": 2.9817, "step": 4100 }, { "epoch": 0.19198314705366024, "grad_norm": 1.5859375, "learning_rate": 0.00019716212952169547, "loss": 3.827, "step": 4101 }, { "epoch": 0.19202996079349288, "grad_norm": 1.2890625, "learning_rate": 0.00019716074858316607, "loss": 3.5078, "step": 4102 }, { "epoch": 0.19207677453332553, "grad_norm": 1.6015625, "learning_rate": 0.00019715936731356704, "loss": 3.2616, "step": 4103 }, { "epoch": 0.19212358827315817, "grad_norm": 1.171875, "learning_rate": 0.00019715798571290306, "loss": 3.4926, "step": 4104 }, { "epoch": 0.19217040201299082, "grad_norm": 1.2109375, "learning_rate": 0.00019715660378117887, "loss": 3.1934, "step": 4105 }, { "epoch": 0.19221721575282347, "grad_norm": 1.890625, "learning_rate": 0.00019715522151839916, "loss": 3.8878, "step": 4106 }, { "epoch": 0.19226402949265609, "grad_norm": 1.6328125, "learning_rate": 0.00019715383892456866, "loss": 3.7787, "step": 4107 }, { "epoch": 0.19231084323248873, "grad_norm": 1.2109375, "learning_rate": 0.00019715245599969205, "loss": 3.3367, "step": 4108 }, { "epoch": 0.19235765697232138, "grad_norm": 1.0625, "learning_rate": 0.0001971510727437741, "loss": 3.5723, "step": 4109 }, { "epoch": 0.19240447071215402, "grad_norm": 1.3671875, "learning_rate": 0.00019714968915681945, "loss": 3.4838, "step": 4110 }, { "epoch": 0.19245128445198667, "grad_norm": 1.3203125, "learning_rate": 0.00019714830523883284, "loss": 3.8112, "step": 4111 }, { "epoch": 0.1924980981918193, "grad_norm": 2.328125, "learning_rate": 0.00019714692098981904, "loss": 3.5115, "step": 4112 }, { "epoch": 0.19254491193165194, "grad_norm": 1.5859375, "learning_rate": 0.0001971455364097827, "loss": 3.7965, "step": 4113 }, { "epoch": 0.19259172567148458, "grad_norm": 2.046875, "learning_rate": 0.00019714415149872856, "loss": 3.5638, "step": 4114 }, { "epoch": 0.19263853941131723, "grad_norm": 2.25, "learning_rate": 0.00019714276625666134, "loss": 3.7361, "step": 4115 }, { "epoch": 0.19268535315114987, "grad_norm": 1.0625, "learning_rate": 0.00019714138068358577, "loss": 3.631, "step": 4116 }, { "epoch": 0.1927321668909825, "grad_norm": 2.875, "learning_rate": 0.00019713999477950653, "loss": 3.2792, "step": 4117 }, { "epoch": 0.19277898063081514, "grad_norm": 1.4453125, "learning_rate": 0.0001971386085444284, "loss": 3.6955, "step": 4118 }, { "epoch": 0.19282579437064779, "grad_norm": 1.5859375, "learning_rate": 0.00019713722197835606, "loss": 3.8663, "step": 4119 }, { "epoch": 0.19287260811048043, "grad_norm": 1.3359375, "learning_rate": 0.00019713583508129426, "loss": 3.6186, "step": 4120 }, { "epoch": 0.19291942185031308, "grad_norm": 1.328125, "learning_rate": 0.00019713444785324772, "loss": 3.4718, "step": 4121 }, { "epoch": 0.1929662355901457, "grad_norm": 1.765625, "learning_rate": 0.00019713306029422117, "loss": 3.8208, "step": 4122 }, { "epoch": 0.19301304932997834, "grad_norm": 1.3515625, "learning_rate": 0.00019713167240421932, "loss": 3.3544, "step": 4123 }, { "epoch": 0.193059863069811, "grad_norm": 1.34375, "learning_rate": 0.00019713028418324687, "loss": 3.5306, "step": 4124 }, { "epoch": 0.19310667680964363, "grad_norm": 1.90625, "learning_rate": 0.00019712889563130865, "loss": 3.6076, "step": 4125 }, { "epoch": 0.19315349054947628, "grad_norm": 1.328125, "learning_rate": 0.00019712750674840933, "loss": 3.5554, "step": 4126 }, { "epoch": 0.1932003042893089, "grad_norm": 1.3828125, "learning_rate": 0.00019712611753455362, "loss": 3.5291, "step": 4127 }, { "epoch": 0.19324711802914155, "grad_norm": 1.1953125, "learning_rate": 0.0001971247279897463, "loss": 3.2045, "step": 4128 }, { "epoch": 0.1932939317689742, "grad_norm": 1.1875, "learning_rate": 0.00019712333811399208, "loss": 3.4066, "step": 4129 }, { "epoch": 0.19334074550880684, "grad_norm": 1.3203125, "learning_rate": 0.00019712194790729568, "loss": 3.4845, "step": 4130 }, { "epoch": 0.19338755924863948, "grad_norm": 1.3046875, "learning_rate": 0.00019712055736966185, "loss": 3.3114, "step": 4131 }, { "epoch": 0.1934343729884721, "grad_norm": 1.65625, "learning_rate": 0.00019711916650109538, "loss": 3.0636, "step": 4132 }, { "epoch": 0.19348118672830475, "grad_norm": 1.3828125, "learning_rate": 0.0001971177753016009, "loss": 3.7067, "step": 4133 }, { "epoch": 0.1935280004681374, "grad_norm": 1.1484375, "learning_rate": 0.00019711638377118326, "loss": 3.221, "step": 4134 }, { "epoch": 0.19357481420797004, "grad_norm": 1.2890625, "learning_rate": 0.00019711499190984717, "loss": 3.3574, "step": 4135 }, { "epoch": 0.1936216279478027, "grad_norm": 1.2890625, "learning_rate": 0.00019711359971759734, "loss": 3.9958, "step": 4136 }, { "epoch": 0.19366844168763533, "grad_norm": 1.3046875, "learning_rate": 0.0001971122071944385, "loss": 3.735, "step": 4137 }, { "epoch": 0.19371525542746795, "grad_norm": 1.4375, "learning_rate": 0.00019711081434037548, "loss": 3.6454, "step": 4138 }, { "epoch": 0.1937620691673006, "grad_norm": 1.234375, "learning_rate": 0.00019710942115541293, "loss": 3.585, "step": 4139 }, { "epoch": 0.19380888290713325, "grad_norm": 1.28125, "learning_rate": 0.00019710802763955564, "loss": 3.8684, "step": 4140 }, { "epoch": 0.1938556966469659, "grad_norm": 1.5859375, "learning_rate": 0.00019710663379280842, "loss": 3.7946, "step": 4141 }, { "epoch": 0.19390251038679854, "grad_norm": 1.328125, "learning_rate": 0.0001971052396151759, "loss": 3.5374, "step": 4142 }, { "epoch": 0.19394932412663116, "grad_norm": 1.5234375, "learning_rate": 0.0001971038451066629, "loss": 3.6832, "step": 4143 }, { "epoch": 0.1939961378664638, "grad_norm": 1.109375, "learning_rate": 0.00019710245026727417, "loss": 3.4195, "step": 4144 }, { "epoch": 0.19404295160629645, "grad_norm": 1.0625, "learning_rate": 0.00019710105509701444, "loss": 3.5116, "step": 4145 }, { "epoch": 0.1940897653461291, "grad_norm": 1.3203125, "learning_rate": 0.0001970996595958885, "loss": 3.1183, "step": 4146 }, { "epoch": 0.19413657908596174, "grad_norm": 1.359375, "learning_rate": 0.00019709826376390106, "loss": 2.7455, "step": 4147 }, { "epoch": 0.19418339282579436, "grad_norm": 1.3671875, "learning_rate": 0.0001970968676010569, "loss": 3.0913, "step": 4148 }, { "epoch": 0.194230206565627, "grad_norm": 1.234375, "learning_rate": 0.0001970954711073608, "loss": 4.0953, "step": 4149 }, { "epoch": 0.19427702030545965, "grad_norm": 1.4296875, "learning_rate": 0.00019709407428281748, "loss": 3.9237, "step": 4150 }, { "epoch": 0.1943238340452923, "grad_norm": 1.8828125, "learning_rate": 0.00019709267712743168, "loss": 3.6436, "step": 4151 }, { "epoch": 0.19437064778512494, "grad_norm": 1.5078125, "learning_rate": 0.00019709127964120823, "loss": 3.4479, "step": 4152 }, { "epoch": 0.19441746152495756, "grad_norm": 1.328125, "learning_rate": 0.00019708988182415188, "loss": 3.934, "step": 4153 }, { "epoch": 0.1944642752647902, "grad_norm": 1.3359375, "learning_rate": 0.00019708848367626733, "loss": 4.1488, "step": 4154 }, { "epoch": 0.19451108900462286, "grad_norm": 1.8125, "learning_rate": 0.0001970870851975594, "loss": 3.6783, "step": 4155 }, { "epoch": 0.1945579027444555, "grad_norm": 1.2421875, "learning_rate": 0.00019708568638803284, "loss": 3.2927, "step": 4156 }, { "epoch": 0.19460471648428815, "grad_norm": 1.5546875, "learning_rate": 0.00019708428724769244, "loss": 3.5698, "step": 4157 }, { "epoch": 0.19465153022412077, "grad_norm": 1.4296875, "learning_rate": 0.00019708288777654288, "loss": 3.8794, "step": 4158 }, { "epoch": 0.1946983439639534, "grad_norm": 1.6328125, "learning_rate": 0.00019708148797458903, "loss": 3.5446, "step": 4159 }, { "epoch": 0.19474515770378606, "grad_norm": 1.3359375, "learning_rate": 0.00019708008784183562, "loss": 3.7421, "step": 4160 }, { "epoch": 0.1947919714436187, "grad_norm": 1.4375, "learning_rate": 0.00019707868737828743, "loss": 3.7263, "step": 4161 }, { "epoch": 0.19483878518345135, "grad_norm": 1.28125, "learning_rate": 0.0001970772865839492, "loss": 3.5383, "step": 4162 }, { "epoch": 0.19488559892328397, "grad_norm": 1.4453125, "learning_rate": 0.00019707588545882576, "loss": 3.6647, "step": 4163 }, { "epoch": 0.19493241266311662, "grad_norm": 1.28125, "learning_rate": 0.00019707448400292182, "loss": 3.6876, "step": 4164 }, { "epoch": 0.19497922640294926, "grad_norm": 1.5234375, "learning_rate": 0.0001970730822162422, "loss": 3.6633, "step": 4165 }, { "epoch": 0.1950260401427819, "grad_norm": 1.171875, "learning_rate": 0.00019707168009879168, "loss": 3.6138, "step": 4166 }, { "epoch": 0.19507285388261456, "grad_norm": 1.8203125, "learning_rate": 0.00019707027765057498, "loss": 4.0471, "step": 4167 }, { "epoch": 0.1951196676224472, "grad_norm": 1.4609375, "learning_rate": 0.00019706887487159697, "loss": 3.9802, "step": 4168 }, { "epoch": 0.19516648136227982, "grad_norm": 1.8046875, "learning_rate": 0.00019706747176186237, "loss": 3.8625, "step": 4169 }, { "epoch": 0.19521329510211247, "grad_norm": 1.4453125, "learning_rate": 0.00019706606832137593, "loss": 3.4539, "step": 4170 }, { "epoch": 0.1952601088419451, "grad_norm": 2.171875, "learning_rate": 0.00019706466455014253, "loss": 3.3251, "step": 4171 }, { "epoch": 0.19530692258177776, "grad_norm": 1.125, "learning_rate": 0.00019706326044816687, "loss": 3.6301, "step": 4172 }, { "epoch": 0.1953537363216104, "grad_norm": 1.2265625, "learning_rate": 0.00019706185601545373, "loss": 3.8191, "step": 4173 }, { "epoch": 0.19540055006144302, "grad_norm": 2.078125, "learning_rate": 0.00019706045125200794, "loss": 3.3043, "step": 4174 }, { "epoch": 0.19544736380127567, "grad_norm": 1.4609375, "learning_rate": 0.00019705904615783428, "loss": 3.9624, "step": 4175 }, { "epoch": 0.19549417754110832, "grad_norm": 1.171875, "learning_rate": 0.00019705764073293757, "loss": 3.6916, "step": 4176 }, { "epoch": 0.19554099128094096, "grad_norm": 1.234375, "learning_rate": 0.0001970562349773225, "loss": 3.8454, "step": 4177 }, { "epoch": 0.1955878050207736, "grad_norm": 1.15625, "learning_rate": 0.00019705482889099393, "loss": 3.5402, "step": 4178 }, { "epoch": 0.19563461876060623, "grad_norm": 1.5546875, "learning_rate": 0.00019705342247395666, "loss": 3.1458, "step": 4179 }, { "epoch": 0.19568143250043887, "grad_norm": 1.234375, "learning_rate": 0.00019705201572621547, "loss": 4.3685, "step": 4180 }, { "epoch": 0.19572824624027152, "grad_norm": 1.34375, "learning_rate": 0.0001970506086477751, "loss": 3.3043, "step": 4181 }, { "epoch": 0.19577505998010417, "grad_norm": 1.34375, "learning_rate": 0.00019704920123864045, "loss": 3.1884, "step": 4182 }, { "epoch": 0.1958218737199368, "grad_norm": 1.2734375, "learning_rate": 0.00019704779349881624, "loss": 3.6127, "step": 4183 }, { "epoch": 0.19586868745976943, "grad_norm": 1.359375, "learning_rate": 0.00019704638542830725, "loss": 3.7896, "step": 4184 }, { "epoch": 0.19591550119960208, "grad_norm": 1.359375, "learning_rate": 0.00019704497702711831, "loss": 3.442, "step": 4185 }, { "epoch": 0.19596231493943472, "grad_norm": 1.65625, "learning_rate": 0.00019704356829525427, "loss": 2.9216, "step": 4186 }, { "epoch": 0.19600912867926737, "grad_norm": 1.421875, "learning_rate": 0.00019704215923271984, "loss": 3.5623, "step": 4187 }, { "epoch": 0.19605594241910002, "grad_norm": 1.1640625, "learning_rate": 0.0001970407498395199, "loss": 3.3179, "step": 4188 }, { "epoch": 0.19610275615893263, "grad_norm": 1.546875, "learning_rate": 0.0001970393401156592, "loss": 3.7376, "step": 4189 }, { "epoch": 0.19614956989876528, "grad_norm": 1.359375, "learning_rate": 0.00019703793006114255, "loss": 3.8565, "step": 4190 }, { "epoch": 0.19619638363859793, "grad_norm": 1.2109375, "learning_rate": 0.00019703651967597474, "loss": 4.0689, "step": 4191 }, { "epoch": 0.19624319737843057, "grad_norm": 1.3046875, "learning_rate": 0.0001970351089601606, "loss": 3.2273, "step": 4192 }, { "epoch": 0.19629001111826322, "grad_norm": 2.84375, "learning_rate": 0.00019703369791370499, "loss": 3.7452, "step": 4193 }, { "epoch": 0.19633682485809584, "grad_norm": 1.78125, "learning_rate": 0.00019703228653661264, "loss": 4.0911, "step": 4194 }, { "epoch": 0.19638363859792848, "grad_norm": 1.2890625, "learning_rate": 0.00019703087482888837, "loss": 3.5042, "step": 4195 }, { "epoch": 0.19643045233776113, "grad_norm": 1.3046875, "learning_rate": 0.000197029462790537, "loss": 3.5991, "step": 4196 }, { "epoch": 0.19647726607759378, "grad_norm": 1.6484375, "learning_rate": 0.00019702805042156333, "loss": 3.5225, "step": 4197 }, { "epoch": 0.19652407981742642, "grad_norm": 1.2578125, "learning_rate": 0.0001970266377219722, "loss": 3.6959, "step": 4198 }, { "epoch": 0.19657089355725907, "grad_norm": 1.1171875, "learning_rate": 0.0001970252246917684, "loss": 3.0879, "step": 4199 }, { "epoch": 0.1966177072970917, "grad_norm": 1.953125, "learning_rate": 0.00019702381133095676, "loss": 3.6353, "step": 4200 }, { "epoch": 0.19666452103692433, "grad_norm": 1.234375, "learning_rate": 0.00019702239763954213, "loss": 3.2581, "step": 4201 }, { "epoch": 0.19671133477675698, "grad_norm": 1.2109375, "learning_rate": 0.00019702098361752926, "loss": 4.3451, "step": 4202 }, { "epoch": 0.19675814851658963, "grad_norm": 1.1796875, "learning_rate": 0.000197019569264923, "loss": 3.575, "step": 4203 }, { "epoch": 0.19680496225642227, "grad_norm": 1.5390625, "learning_rate": 0.00019701815458172814, "loss": 3.3514, "step": 4204 }, { "epoch": 0.1968517759962549, "grad_norm": 1.015625, "learning_rate": 0.00019701673956794955, "loss": 3.3265, "step": 4205 }, { "epoch": 0.19689858973608754, "grad_norm": 1.1796875, "learning_rate": 0.00019701532422359202, "loss": 3.9048, "step": 4206 }, { "epoch": 0.19694540347592018, "grad_norm": 1.4921875, "learning_rate": 0.0001970139085486604, "loss": 3.2746, "step": 4207 }, { "epoch": 0.19699221721575283, "grad_norm": 2.046875, "learning_rate": 0.0001970124925431595, "loss": 3.602, "step": 4208 }, { "epoch": 0.19703903095558548, "grad_norm": 1.3046875, "learning_rate": 0.0001970110762070941, "loss": 3.4984, "step": 4209 }, { "epoch": 0.1970858446954181, "grad_norm": 1.546875, "learning_rate": 0.00019700965954046908, "loss": 3.3577, "step": 4210 }, { "epoch": 0.19713265843525074, "grad_norm": 1.2734375, "learning_rate": 0.00019700824254328928, "loss": 3.5239, "step": 4211 }, { "epoch": 0.1971794721750834, "grad_norm": 1.34375, "learning_rate": 0.00019700682521555946, "loss": 3.2199, "step": 4212 }, { "epoch": 0.19722628591491603, "grad_norm": 1.0234375, "learning_rate": 0.0001970054075572845, "loss": 4.6433, "step": 4213 }, { "epoch": 0.19727309965474868, "grad_norm": 1.4140625, "learning_rate": 0.00019700398956846922, "loss": 3.5424, "step": 4214 }, { "epoch": 0.1973199133945813, "grad_norm": 1.65625, "learning_rate": 0.00019700257124911848, "loss": 3.5928, "step": 4215 }, { "epoch": 0.19736672713441394, "grad_norm": 1.453125, "learning_rate": 0.00019700115259923707, "loss": 3.8389, "step": 4216 }, { "epoch": 0.1974135408742466, "grad_norm": 1.3671875, "learning_rate": 0.00019699973361882983, "loss": 3.6891, "step": 4217 }, { "epoch": 0.19746035461407924, "grad_norm": 1.3359375, "learning_rate": 0.0001969983143079016, "loss": 3.3908, "step": 4218 }, { "epoch": 0.19750716835391188, "grad_norm": 2.5, "learning_rate": 0.00019699689466645722, "loss": 4.1659, "step": 4219 }, { "epoch": 0.1975539820937445, "grad_norm": 1.3203125, "learning_rate": 0.00019699547469450158, "loss": 3.5356, "step": 4220 }, { "epoch": 0.19760079583357715, "grad_norm": 1.703125, "learning_rate": 0.0001969940543920394, "loss": 3.794, "step": 4221 }, { "epoch": 0.1976476095734098, "grad_norm": 1.375, "learning_rate": 0.00019699263375907563, "loss": 3.2369, "step": 4222 }, { "epoch": 0.19769442331324244, "grad_norm": 1.1640625, "learning_rate": 0.00019699121279561504, "loss": 3.7103, "step": 4223 }, { "epoch": 0.1977412370530751, "grad_norm": 1.2890625, "learning_rate": 0.0001969897915016625, "loss": 3.8493, "step": 4224 }, { "epoch": 0.1977880507929077, "grad_norm": 1.0234375, "learning_rate": 0.00019698836987722284, "loss": 3.4715, "step": 4225 }, { "epoch": 0.19783486453274035, "grad_norm": 1.8671875, "learning_rate": 0.00019698694792230094, "loss": 4.1916, "step": 4226 }, { "epoch": 0.197881678272573, "grad_norm": 1.53125, "learning_rate": 0.00019698552563690163, "loss": 3.7893, "step": 4227 }, { "epoch": 0.19792849201240564, "grad_norm": 1.3984375, "learning_rate": 0.00019698410302102973, "loss": 3.6373, "step": 4228 }, { "epoch": 0.1979753057522383, "grad_norm": 1.390625, "learning_rate": 0.00019698268007469012, "loss": 3.5539, "step": 4229 }, { "epoch": 0.19802211949207094, "grad_norm": 1.2421875, "learning_rate": 0.0001969812567978876, "loss": 3.3489, "step": 4230 }, { "epoch": 0.19806893323190355, "grad_norm": 1.546875, "learning_rate": 0.0001969798331906271, "loss": 4.1063, "step": 4231 }, { "epoch": 0.1981157469717362, "grad_norm": 1.328125, "learning_rate": 0.0001969784092529134, "loss": 3.6052, "step": 4232 }, { "epoch": 0.19816256071156885, "grad_norm": 1.6328125, "learning_rate": 0.00019697698498475136, "loss": 3.2913, "step": 4233 }, { "epoch": 0.1982093744514015, "grad_norm": 1.1796875, "learning_rate": 0.00019697556038614588, "loss": 3.4727, "step": 4234 }, { "epoch": 0.19825618819123414, "grad_norm": 1.2734375, "learning_rate": 0.00019697413545710174, "loss": 3.6059, "step": 4235 }, { "epoch": 0.19830300193106676, "grad_norm": 1.4375, "learning_rate": 0.0001969727101976239, "loss": 3.5145, "step": 4236 }, { "epoch": 0.1983498156708994, "grad_norm": 1.140625, "learning_rate": 0.0001969712846077171, "loss": 3.8147, "step": 4237 }, { "epoch": 0.19839662941073205, "grad_norm": 1.0625, "learning_rate": 0.0001969698586873863, "loss": 5.3198, "step": 4238 }, { "epoch": 0.1984434431505647, "grad_norm": 1.359375, "learning_rate": 0.00019696843243663628, "loss": 3.6955, "step": 4239 }, { "epoch": 0.19849025689039734, "grad_norm": 1.3515625, "learning_rate": 0.0001969670058554719, "loss": 3.6159, "step": 4240 }, { "epoch": 0.19853707063022996, "grad_norm": 1.4453125, "learning_rate": 0.00019696557894389812, "loss": 3.743, "step": 4241 }, { "epoch": 0.1985838843700626, "grad_norm": 1.4921875, "learning_rate": 0.0001969641517019197, "loss": 3.422, "step": 4242 }, { "epoch": 0.19863069810989525, "grad_norm": 1.125, "learning_rate": 0.00019696272412954156, "loss": 3.8322, "step": 4243 }, { "epoch": 0.1986775118497279, "grad_norm": 1.3046875, "learning_rate": 0.00019696129622676854, "loss": 3.3782, "step": 4244 }, { "epoch": 0.19872432558956055, "grad_norm": 1.6171875, "learning_rate": 0.00019695986799360545, "loss": 3.5807, "step": 4245 }, { "epoch": 0.19877113932939316, "grad_norm": 1.2734375, "learning_rate": 0.0001969584394300573, "loss": 3.7905, "step": 4246 }, { "epoch": 0.1988179530692258, "grad_norm": 1.4140625, "learning_rate": 0.00019695701053612882, "loss": 3.762, "step": 4247 }, { "epoch": 0.19886476680905846, "grad_norm": 1.40625, "learning_rate": 0.00019695558131182494, "loss": 3.9071, "step": 4248 }, { "epoch": 0.1989115805488911, "grad_norm": 1.421875, "learning_rate": 0.00019695415175715052, "loss": 3.743, "step": 4249 }, { "epoch": 0.19895839428872375, "grad_norm": 1.375, "learning_rate": 0.00019695272187211042, "loss": 2.982, "step": 4250 }, { "epoch": 0.19900520802855637, "grad_norm": 1.1015625, "learning_rate": 0.00019695129165670956, "loss": 3.6799, "step": 4251 }, { "epoch": 0.19905202176838901, "grad_norm": 1.40625, "learning_rate": 0.00019694986111095275, "loss": 3.7706, "step": 4252 }, { "epoch": 0.19909883550822166, "grad_norm": 1.8671875, "learning_rate": 0.00019694843023484491, "loss": 3.1417, "step": 4253 }, { "epoch": 0.1991456492480543, "grad_norm": 1.15625, "learning_rate": 0.00019694699902839086, "loss": 3.7588, "step": 4254 }, { "epoch": 0.19919246298788695, "grad_norm": 1.1328125, "learning_rate": 0.00019694556749159556, "loss": 5.2821, "step": 4255 }, { "epoch": 0.19923927672771957, "grad_norm": 1.4453125, "learning_rate": 0.0001969441356244638, "loss": 3.9533, "step": 4256 }, { "epoch": 0.19928609046755222, "grad_norm": 1.359375, "learning_rate": 0.00019694270342700054, "loss": 3.6507, "step": 4257 }, { "epoch": 0.19933290420738486, "grad_norm": 1.390625, "learning_rate": 0.00019694127089921063, "loss": 3.1015, "step": 4258 }, { "epoch": 0.1993797179472175, "grad_norm": 1.25, "learning_rate": 0.0001969398380410989, "loss": 3.3652, "step": 4259 }, { "epoch": 0.19942653168705016, "grad_norm": 1.1015625, "learning_rate": 0.0001969384048526703, "loss": 3.1972, "step": 4260 }, { "epoch": 0.1994733454268828, "grad_norm": 1.1796875, "learning_rate": 0.00019693697133392967, "loss": 3.7379, "step": 4261 }, { "epoch": 0.19952015916671542, "grad_norm": 1.5703125, "learning_rate": 0.00019693553748488193, "loss": 3.2774, "step": 4262 }, { "epoch": 0.19956697290654807, "grad_norm": 1.375, "learning_rate": 0.00019693410330553194, "loss": 3.5294, "step": 4263 }, { "epoch": 0.19961378664638071, "grad_norm": 1.140625, "learning_rate": 0.00019693266879588462, "loss": 4.0577, "step": 4264 }, { "epoch": 0.19966060038621336, "grad_norm": 1.3828125, "learning_rate": 0.00019693123395594483, "loss": 3.3626, "step": 4265 }, { "epoch": 0.199707414126046, "grad_norm": 2.421875, "learning_rate": 0.00019692979878571743, "loss": 3.6695, "step": 4266 }, { "epoch": 0.19975422786587863, "grad_norm": 1.390625, "learning_rate": 0.00019692836328520736, "loss": 3.4009, "step": 4267 }, { "epoch": 0.19980104160571127, "grad_norm": 1.5390625, "learning_rate": 0.0001969269274544195, "loss": 3.5832, "step": 4268 }, { "epoch": 0.19984785534554392, "grad_norm": 1.2890625, "learning_rate": 0.00019692549129335873, "loss": 3.2334, "step": 4269 }, { "epoch": 0.19989466908537656, "grad_norm": 1.5078125, "learning_rate": 0.00019692405480202995, "loss": 3.7785, "step": 4270 }, { "epoch": 0.1999414828252092, "grad_norm": 1.375, "learning_rate": 0.00019692261798043807, "loss": 3.4945, "step": 4271 }, { "epoch": 0.19998829656504183, "grad_norm": 2.40625, "learning_rate": 0.00019692118082858796, "loss": 3.3351, "step": 4272 }, { "epoch": 0.20003511030487447, "grad_norm": 1.3828125, "learning_rate": 0.00019691974334648453, "loss": 3.9089, "step": 4273 }, { "epoch": 0.20008192404470712, "grad_norm": 1.5078125, "learning_rate": 0.0001969183055341327, "loss": 3.8309, "step": 4274 }, { "epoch": 0.20012873778453977, "grad_norm": 1.34375, "learning_rate": 0.0001969168673915373, "loss": 3.529, "step": 4275 }, { "epoch": 0.2001755515243724, "grad_norm": 1.234375, "learning_rate": 0.00019691542891870332, "loss": 3.7508, "step": 4276 }, { "epoch": 0.20022236526420503, "grad_norm": 1.3515625, "learning_rate": 0.0001969139901156356, "loss": 2.9811, "step": 4277 }, { "epoch": 0.20026917900403768, "grad_norm": 1.34375, "learning_rate": 0.00019691255098233906, "loss": 3.5201, "step": 4278 }, { "epoch": 0.20031599274387032, "grad_norm": 1.234375, "learning_rate": 0.0001969111115188186, "loss": 3.283, "step": 4279 }, { "epoch": 0.20036280648370297, "grad_norm": 1.1328125, "learning_rate": 0.00019690967172507912, "loss": 3.6106, "step": 4280 }, { "epoch": 0.20040962022353562, "grad_norm": 1.6015625, "learning_rate": 0.00019690823160112554, "loss": 3.5097, "step": 4281 }, { "epoch": 0.20045643396336824, "grad_norm": 1.3671875, "learning_rate": 0.00019690679114696278, "loss": 3.4857, "step": 4282 }, { "epoch": 0.20050324770320088, "grad_norm": 1.5, "learning_rate": 0.00019690535036259572, "loss": 3.8312, "step": 4283 }, { "epoch": 0.20055006144303353, "grad_norm": 1.46875, "learning_rate": 0.00019690390924802928, "loss": 3.3845, "step": 4284 }, { "epoch": 0.20059687518286617, "grad_norm": 1.3671875, "learning_rate": 0.00019690246780326836, "loss": 3.593, "step": 4285 }, { "epoch": 0.20064368892269882, "grad_norm": 1.3125, "learning_rate": 0.0001969010260283179, "loss": 3.6769, "step": 4286 }, { "epoch": 0.20069050266253144, "grad_norm": 1.3125, "learning_rate": 0.0001968995839231828, "loss": 3.2378, "step": 4287 }, { "epoch": 0.20073731640236409, "grad_norm": 1.390625, "learning_rate": 0.00019689814148786796, "loss": 3.7296, "step": 4288 }, { "epoch": 0.20078413014219673, "grad_norm": 1.53125, "learning_rate": 0.0001968966987223783, "loss": 3.0043, "step": 4289 }, { "epoch": 0.20083094388202938, "grad_norm": 2.015625, "learning_rate": 0.00019689525562671873, "loss": 4.0974, "step": 4290 }, { "epoch": 0.20087775762186202, "grad_norm": 1.2734375, "learning_rate": 0.0001968938122008942, "loss": 3.3842, "step": 4291 }, { "epoch": 0.20092457136169467, "grad_norm": 1.4921875, "learning_rate": 0.0001968923684449096, "loss": 3.6772, "step": 4292 }, { "epoch": 0.2009713851015273, "grad_norm": 1.3359375, "learning_rate": 0.00019689092435876986, "loss": 3.3991, "step": 4293 }, { "epoch": 0.20101819884135994, "grad_norm": 1.4140625, "learning_rate": 0.00019688947994247986, "loss": 3.4929, "step": 4294 }, { "epoch": 0.20106501258119258, "grad_norm": 1.328125, "learning_rate": 0.0001968880351960446, "loss": 3.3693, "step": 4295 }, { "epoch": 0.20111182632102523, "grad_norm": 1.3359375, "learning_rate": 0.00019688659011946893, "loss": 3.2922, "step": 4296 }, { "epoch": 0.20115864006085787, "grad_norm": 1.3671875, "learning_rate": 0.00019688514471275783, "loss": 2.9804, "step": 4297 }, { "epoch": 0.2012054538006905, "grad_norm": 1.6171875, "learning_rate": 0.00019688369897591617, "loss": 3.6849, "step": 4298 }, { "epoch": 0.20125226754052314, "grad_norm": 1.4765625, "learning_rate": 0.00019688225290894894, "loss": 3.5508, "step": 4299 }, { "epoch": 0.20129908128035578, "grad_norm": 1.3671875, "learning_rate": 0.00019688080651186104, "loss": 3.8771, "step": 4300 }, { "epoch": 0.20134589502018843, "grad_norm": 1.1484375, "learning_rate": 0.00019687935978465734, "loss": 3.2935, "step": 4301 }, { "epoch": 0.20139270876002108, "grad_norm": 1.2734375, "learning_rate": 0.00019687791272734285, "loss": 3.6938, "step": 4302 }, { "epoch": 0.2014395224998537, "grad_norm": 1.0546875, "learning_rate": 0.00019687646533992248, "loss": 4.5498, "step": 4303 }, { "epoch": 0.20148633623968634, "grad_norm": 1.3125, "learning_rate": 0.00019687501762240115, "loss": 3.4106, "step": 4304 }, { "epoch": 0.201533149979519, "grad_norm": 1.8671875, "learning_rate": 0.0001968735695747838, "loss": 3.7548, "step": 4305 }, { "epoch": 0.20157996371935163, "grad_norm": 1.171875, "learning_rate": 0.00019687212119707533, "loss": 3.5088, "step": 4306 }, { "epoch": 0.20162677745918428, "grad_norm": 1.6796875, "learning_rate": 0.00019687067248928074, "loss": 3.9829, "step": 4307 }, { "epoch": 0.2016735911990169, "grad_norm": 1.375, "learning_rate": 0.00019686922345140494, "loss": 3.4865, "step": 4308 }, { "epoch": 0.20172040493884955, "grad_norm": 1.25, "learning_rate": 0.00019686777408345284, "loss": 3.3888, "step": 4309 }, { "epoch": 0.2017672186786822, "grad_norm": 1.671875, "learning_rate": 0.0001968663243854294, "loss": 4.1456, "step": 4310 }, { "epoch": 0.20181403241851484, "grad_norm": 1.390625, "learning_rate": 0.0001968648743573396, "loss": 3.6512, "step": 4311 }, { "epoch": 0.20186084615834748, "grad_norm": 1.5078125, "learning_rate": 0.00019686342399918827, "loss": 3.3584, "step": 4312 }, { "epoch": 0.2019076598981801, "grad_norm": 1.3984375, "learning_rate": 0.00019686197331098045, "loss": 3.9339, "step": 4313 }, { "epoch": 0.20195447363801275, "grad_norm": 1.2734375, "learning_rate": 0.00019686052229272107, "loss": 3.5817, "step": 4314 }, { "epoch": 0.2020012873778454, "grad_norm": 1.296875, "learning_rate": 0.00019685907094441505, "loss": 3.8238, "step": 4315 }, { "epoch": 0.20204810111767804, "grad_norm": 1.390625, "learning_rate": 0.00019685761926606734, "loss": 4.0156, "step": 4316 }, { "epoch": 0.2020949148575107, "grad_norm": 1.3046875, "learning_rate": 0.00019685616725768288, "loss": 3.6526, "step": 4317 }, { "epoch": 0.2021417285973433, "grad_norm": 1.375, "learning_rate": 0.00019685471491926663, "loss": 3.505, "step": 4318 }, { "epoch": 0.20218854233717595, "grad_norm": 2.515625, "learning_rate": 0.00019685326225082358, "loss": 4.5442, "step": 4319 }, { "epoch": 0.2022353560770086, "grad_norm": 1.2265625, "learning_rate": 0.0001968518092523586, "loss": 3.9144, "step": 4320 }, { "epoch": 0.20228216981684125, "grad_norm": 1.3203125, "learning_rate": 0.00019685035592387668, "loss": 3.3636, "step": 4321 }, { "epoch": 0.2023289835566739, "grad_norm": 1.3828125, "learning_rate": 0.00019684890226538278, "loss": 3.8342, "step": 4322 }, { "epoch": 0.20237579729650654, "grad_norm": 1.3515625, "learning_rate": 0.00019684744827688184, "loss": 3.3941, "step": 4323 }, { "epoch": 0.20242261103633916, "grad_norm": 1.5078125, "learning_rate": 0.0001968459939583788, "loss": 3.4563, "step": 4324 }, { "epoch": 0.2024694247761718, "grad_norm": 1.203125, "learning_rate": 0.00019684453930987866, "loss": 3.3343, "step": 4325 }, { "epoch": 0.20251623851600445, "grad_norm": 1.046875, "learning_rate": 0.00019684308433138634, "loss": 3.4888, "step": 4326 }, { "epoch": 0.2025630522558371, "grad_norm": 1.5234375, "learning_rate": 0.00019684162902290682, "loss": 4.3254, "step": 4327 }, { "epoch": 0.20260986599566974, "grad_norm": 1.1875, "learning_rate": 0.00019684017338444503, "loss": 3.1676, "step": 4328 }, { "epoch": 0.20265667973550236, "grad_norm": 1.21875, "learning_rate": 0.00019683871741600596, "loss": 3.2054, "step": 4329 }, { "epoch": 0.202703493475335, "grad_norm": 1.4296875, "learning_rate": 0.00019683726111759456, "loss": 3.3777, "step": 4330 }, { "epoch": 0.20275030721516765, "grad_norm": 1.296875, "learning_rate": 0.00019683580448921575, "loss": 3.6949, "step": 4331 }, { "epoch": 0.2027971209550003, "grad_norm": 1.296875, "learning_rate": 0.00019683434753087458, "loss": 3.2636, "step": 4332 }, { "epoch": 0.20284393469483294, "grad_norm": 1.703125, "learning_rate": 0.00019683289024257598, "loss": 3.5249, "step": 4333 }, { "epoch": 0.20289074843466556, "grad_norm": 1.1640625, "learning_rate": 0.00019683143262432483, "loss": 3.0983, "step": 4334 }, { "epoch": 0.2029375621744982, "grad_norm": 1.890625, "learning_rate": 0.00019682997467612622, "loss": 3.5134, "step": 4335 }, { "epoch": 0.20298437591433086, "grad_norm": 1.40625, "learning_rate": 0.00019682851639798508, "loss": 3.8262, "step": 4336 }, { "epoch": 0.2030311896541635, "grad_norm": 1.3125, "learning_rate": 0.00019682705778990633, "loss": 3.5669, "step": 4337 }, { "epoch": 0.20307800339399615, "grad_norm": 1.59375, "learning_rate": 0.00019682559885189502, "loss": 3.6838, "step": 4338 }, { "epoch": 0.20312481713382877, "grad_norm": 1.1171875, "learning_rate": 0.00019682413958395606, "loss": 3.9108, "step": 4339 }, { "epoch": 0.2031716308736614, "grad_norm": 1.484375, "learning_rate": 0.00019682267998609443, "loss": 3.7034, "step": 4340 }, { "epoch": 0.20321844461349406, "grad_norm": 1.2734375, "learning_rate": 0.00019682122005831515, "loss": 3.6354, "step": 4341 }, { "epoch": 0.2032652583533267, "grad_norm": 1.484375, "learning_rate": 0.00019681975980062312, "loss": 3.8089, "step": 4342 }, { "epoch": 0.20331207209315935, "grad_norm": 1.1875, "learning_rate": 0.00019681829921302336, "loss": 3.3338, "step": 4343 }, { "epoch": 0.20335888583299197, "grad_norm": 1.546875, "learning_rate": 0.00019681683829552086, "loss": 3.668, "step": 4344 }, { "epoch": 0.20340569957282462, "grad_norm": 1.3046875, "learning_rate": 0.00019681537704812057, "loss": 3.6335, "step": 4345 }, { "epoch": 0.20345251331265726, "grad_norm": 1.6796875, "learning_rate": 0.00019681391547082746, "loss": 4.4603, "step": 4346 }, { "epoch": 0.2034993270524899, "grad_norm": 1.1953125, "learning_rate": 0.00019681245356364656, "loss": 5.4013, "step": 4347 }, { "epoch": 0.20354614079232256, "grad_norm": 1.2421875, "learning_rate": 0.00019681099132658281, "loss": 3.5568, "step": 4348 }, { "epoch": 0.20359295453215517, "grad_norm": 1.234375, "learning_rate": 0.0001968095287596412, "loss": 3.3691, "step": 4349 }, { "epoch": 0.20363976827198782, "grad_norm": 1.1484375, "learning_rate": 0.00019680806586282672, "loss": 3.4167, "step": 4350 }, { "epoch": 0.20368658201182047, "grad_norm": 1.2421875, "learning_rate": 0.00019680660263614436, "loss": 3.9033, "step": 4351 }, { "epoch": 0.2037333957516531, "grad_norm": 1.515625, "learning_rate": 0.00019680513907959908, "loss": 4.6038, "step": 4352 }, { "epoch": 0.20378020949148576, "grad_norm": 1.140625, "learning_rate": 0.00019680367519319587, "loss": 3.6241, "step": 4353 }, { "epoch": 0.2038270232313184, "grad_norm": 1.2890625, "learning_rate": 0.00019680221097693977, "loss": 3.5014, "step": 4354 }, { "epoch": 0.20387383697115102, "grad_norm": 1.0703125, "learning_rate": 0.0001968007464308357, "loss": 4.0163, "step": 4355 }, { "epoch": 0.20392065071098367, "grad_norm": 1.21875, "learning_rate": 0.0001967992815548887, "loss": 3.4314, "step": 4356 }, { "epoch": 0.20396746445081632, "grad_norm": 1.546875, "learning_rate": 0.00019679781634910373, "loss": 4.1294, "step": 4357 }, { "epoch": 0.20401427819064896, "grad_norm": 1.703125, "learning_rate": 0.00019679635081348582, "loss": 4.1901, "step": 4358 }, { "epoch": 0.2040610919304816, "grad_norm": 1.3046875, "learning_rate": 0.00019679488494803992, "loss": 3.3421, "step": 4359 }, { "epoch": 0.20410790567031423, "grad_norm": 1.4140625, "learning_rate": 0.00019679341875277105, "loss": 3.3864, "step": 4360 }, { "epoch": 0.20415471941014687, "grad_norm": 1.75, "learning_rate": 0.00019679195222768422, "loss": 3.605, "step": 4361 }, { "epoch": 0.20420153314997952, "grad_norm": 1.2265625, "learning_rate": 0.00019679048537278438, "loss": 3.3726, "step": 4362 }, { "epoch": 0.20424834688981217, "grad_norm": 1.1328125, "learning_rate": 0.00019678901818807654, "loss": 3.3281, "step": 4363 }, { "epoch": 0.2042951606296448, "grad_norm": 1.7578125, "learning_rate": 0.00019678755067356575, "loss": 3.161, "step": 4364 }, { "epoch": 0.20434197436947743, "grad_norm": 1.234375, "learning_rate": 0.00019678608282925697, "loss": 3.383, "step": 4365 }, { "epoch": 0.20438878810931008, "grad_norm": 1.171875, "learning_rate": 0.0001967846146551552, "loss": 3.2959, "step": 4366 }, { "epoch": 0.20443560184914272, "grad_norm": 1.265625, "learning_rate": 0.00019678314615126545, "loss": 3.234, "step": 4367 }, { "epoch": 0.20448241558897537, "grad_norm": 1.3359375, "learning_rate": 0.00019678167731759273, "loss": 3.552, "step": 4368 }, { "epoch": 0.20452922932880802, "grad_norm": 1.09375, "learning_rate": 0.00019678020815414203, "loss": 2.8411, "step": 4369 }, { "epoch": 0.20457604306864063, "grad_norm": 1.484375, "learning_rate": 0.0001967787386609184, "loss": 3.8763, "step": 4370 }, { "epoch": 0.20462285680847328, "grad_norm": 1.2890625, "learning_rate": 0.00019677726883792675, "loss": 3.9184, "step": 4371 }, { "epoch": 0.20466967054830593, "grad_norm": 1.484375, "learning_rate": 0.0001967757986851722, "loss": 3.9713, "step": 4372 }, { "epoch": 0.20471648428813857, "grad_norm": 1.2578125, "learning_rate": 0.0001967743282026597, "loss": 3.4104, "step": 4373 }, { "epoch": 0.20476329802797122, "grad_norm": 1.234375, "learning_rate": 0.00019677285739039423, "loss": 3.2466, "step": 4374 }, { "epoch": 0.20481011176780384, "grad_norm": 1.640625, "learning_rate": 0.00019677138624838087, "loss": 4.0879, "step": 4375 }, { "epoch": 0.20485692550763648, "grad_norm": 1.1484375, "learning_rate": 0.0001967699147766246, "loss": 3.6047, "step": 4376 }, { "epoch": 0.20490373924746913, "grad_norm": 1.5234375, "learning_rate": 0.00019676844297513045, "loss": 3.3843, "step": 4377 }, { "epoch": 0.20495055298730178, "grad_norm": 1.484375, "learning_rate": 0.00019676697084390344, "loss": 3.3645, "step": 4378 }, { "epoch": 0.20499736672713442, "grad_norm": 1.3515625, "learning_rate": 0.00019676549838294855, "loss": 3.811, "step": 4379 }, { "epoch": 0.20504418046696707, "grad_norm": 1.46875, "learning_rate": 0.00019676402559227083, "loss": 3.4762, "step": 4380 }, { "epoch": 0.2050909942067997, "grad_norm": 1.375, "learning_rate": 0.00019676255247187528, "loss": 3.5904, "step": 4381 }, { "epoch": 0.20513780794663233, "grad_norm": 2.25, "learning_rate": 0.00019676107902176694, "loss": 3.8834, "step": 4382 }, { "epoch": 0.20518462168646498, "grad_norm": 1.3125, "learning_rate": 0.0001967596052419508, "loss": 3.3601, "step": 4383 }, { "epoch": 0.20523143542629763, "grad_norm": 1.890625, "learning_rate": 0.00019675813113243187, "loss": 3.8053, "step": 4384 }, { "epoch": 0.20527824916613027, "grad_norm": 1.5390625, "learning_rate": 0.0001967566566932152, "loss": 3.9708, "step": 4385 }, { "epoch": 0.2053250629059629, "grad_norm": 1.4453125, "learning_rate": 0.00019675518192430587, "loss": 3.7017, "step": 4386 }, { "epoch": 0.20537187664579554, "grad_norm": 1.25, "learning_rate": 0.0001967537068257088, "loss": 3.6318, "step": 4387 }, { "epoch": 0.20541869038562818, "grad_norm": 1.3671875, "learning_rate": 0.00019675223139742912, "loss": 3.5897, "step": 4388 }, { "epoch": 0.20546550412546083, "grad_norm": 1.6171875, "learning_rate": 0.00019675075563947177, "loss": 3.3561, "step": 4389 }, { "epoch": 0.20551231786529348, "grad_norm": 1.203125, "learning_rate": 0.0001967492795518418, "loss": 3.4731, "step": 4390 }, { "epoch": 0.2055591316051261, "grad_norm": 1.1328125, "learning_rate": 0.00019674780313454427, "loss": 3.098, "step": 4391 }, { "epoch": 0.20560594534495874, "grad_norm": 1.2890625, "learning_rate": 0.0001967463263875842, "loss": 3.492, "step": 4392 }, { "epoch": 0.2056527590847914, "grad_norm": 1.3515625, "learning_rate": 0.0001967448493109666, "loss": 3.823, "step": 4393 }, { "epoch": 0.20569957282462403, "grad_norm": 1.375, "learning_rate": 0.00019674337190469653, "loss": 3.0969, "step": 4394 }, { "epoch": 0.20574638656445668, "grad_norm": 1.4140625, "learning_rate": 0.00019674189416877898, "loss": 3.6392, "step": 4395 }, { "epoch": 0.2057932003042893, "grad_norm": 1.1796875, "learning_rate": 0.00019674041610321906, "loss": 3.4593, "step": 4396 }, { "epoch": 0.20584001404412194, "grad_norm": 1.3203125, "learning_rate": 0.00019673893770802176, "loss": 3.6246, "step": 4397 }, { "epoch": 0.2058868277839546, "grad_norm": 1.203125, "learning_rate": 0.0001967374589831921, "loss": 3.3651, "step": 4398 }, { "epoch": 0.20593364152378724, "grad_norm": 1.3515625, "learning_rate": 0.00019673597992873515, "loss": 3.6187, "step": 4399 }, { "epoch": 0.20598045526361988, "grad_norm": 1.2109375, "learning_rate": 0.00019673450054465597, "loss": 3.5176, "step": 4400 }, { "epoch": 0.2060272690034525, "grad_norm": 1.3515625, "learning_rate": 0.00019673302083095953, "loss": 3.7796, "step": 4401 }, { "epoch": 0.20607408274328515, "grad_norm": 1.5546875, "learning_rate": 0.00019673154078765092, "loss": 3.6746, "step": 4402 }, { "epoch": 0.2061208964831178, "grad_norm": 1.4609375, "learning_rate": 0.00019673006041473517, "loss": 3.6042, "step": 4403 }, { "epoch": 0.20616771022295044, "grad_norm": 1.9765625, "learning_rate": 0.00019672857971221735, "loss": 3.0701, "step": 4404 }, { "epoch": 0.2062145239627831, "grad_norm": 1.421875, "learning_rate": 0.00019672709868010245, "loss": 3.836, "step": 4405 }, { "epoch": 0.2062613377026157, "grad_norm": 1.40625, "learning_rate": 0.00019672561731839562, "loss": 3.4975, "step": 4406 }, { "epoch": 0.20630815144244835, "grad_norm": 1.578125, "learning_rate": 0.00019672413562710176, "loss": 3.9307, "step": 4407 }, { "epoch": 0.206354965182281, "grad_norm": 1.203125, "learning_rate": 0.00019672265360622606, "loss": 3.4973, "step": 4408 }, { "epoch": 0.20640177892211364, "grad_norm": 1.6953125, "learning_rate": 0.0001967211712557735, "loss": 3.799, "step": 4409 }, { "epoch": 0.2064485926619463, "grad_norm": 1.140625, "learning_rate": 0.00019671968857574908, "loss": 3.315, "step": 4410 }, { "epoch": 0.20649540640177894, "grad_norm": 1.546875, "learning_rate": 0.00019671820556615798, "loss": 3.8207, "step": 4411 }, { "epoch": 0.20654222014161155, "grad_norm": 1.828125, "learning_rate": 0.00019671672222700516, "loss": 3.5827, "step": 4412 }, { "epoch": 0.2065890338814442, "grad_norm": 1.1328125, "learning_rate": 0.00019671523855829573, "loss": 3.0369, "step": 4413 }, { "epoch": 0.20663584762127685, "grad_norm": 1.2265625, "learning_rate": 0.00019671375456003468, "loss": 3.3668, "step": 4414 }, { "epoch": 0.2066826613611095, "grad_norm": 1.5703125, "learning_rate": 0.0001967122702322271, "loss": 3.3881, "step": 4415 }, { "epoch": 0.20672947510094214, "grad_norm": 1.5625, "learning_rate": 0.00019671078557487805, "loss": 3.6986, "step": 4416 }, { "epoch": 0.20677628884077476, "grad_norm": 1.578125, "learning_rate": 0.0001967093005879926, "loss": 3.482, "step": 4417 }, { "epoch": 0.2068231025806074, "grad_norm": 1.1796875, "learning_rate": 0.0001967078152715758, "loss": 3.3025, "step": 4418 }, { "epoch": 0.20686991632044005, "grad_norm": 1.2734375, "learning_rate": 0.00019670632962563272, "loss": 3.2363, "step": 4419 }, { "epoch": 0.2069167300602727, "grad_norm": 1.5703125, "learning_rate": 0.00019670484365016838, "loss": 3.4484, "step": 4420 }, { "epoch": 0.20696354380010534, "grad_norm": 1.59375, "learning_rate": 0.00019670335734518787, "loss": 3.7223, "step": 4421 }, { "epoch": 0.20701035753993796, "grad_norm": 1.5, "learning_rate": 0.0001967018707106963, "loss": 3.3981, "step": 4422 }, { "epoch": 0.2070571712797706, "grad_norm": 1.5078125, "learning_rate": 0.00019670038374669868, "loss": 3.7429, "step": 4423 }, { "epoch": 0.20710398501960325, "grad_norm": 1.5625, "learning_rate": 0.0001966988964532001, "loss": 3.6052, "step": 4424 }, { "epoch": 0.2071507987594359, "grad_norm": 1.734375, "learning_rate": 0.0001966974088302056, "loss": 4.1338, "step": 4425 }, { "epoch": 0.20719761249926855, "grad_norm": 1.203125, "learning_rate": 0.00019669592087772028, "loss": 3.7281, "step": 4426 }, { "epoch": 0.20724442623910116, "grad_norm": 1.0234375, "learning_rate": 0.0001966944325957492, "loss": 3.8228, "step": 4427 }, { "epoch": 0.2072912399789338, "grad_norm": 1.6953125, "learning_rate": 0.0001966929439842974, "loss": 3.9486, "step": 4428 }, { "epoch": 0.20733805371876646, "grad_norm": 1.2890625, "learning_rate": 0.00019669145504337001, "loss": 3.3876, "step": 4429 }, { "epoch": 0.2073848674585991, "grad_norm": 1.1875, "learning_rate": 0.00019668996577297207, "loss": 3.0656, "step": 4430 }, { "epoch": 0.20743168119843175, "grad_norm": 1.5078125, "learning_rate": 0.00019668847617310869, "loss": 3.643, "step": 4431 }, { "epoch": 0.20747849493826437, "grad_norm": 1.296875, "learning_rate": 0.0001966869862437849, "loss": 3.3188, "step": 4432 }, { "epoch": 0.20752530867809701, "grad_norm": 1.421875, "learning_rate": 0.00019668549598500577, "loss": 3.6045, "step": 4433 }, { "epoch": 0.20757212241792966, "grad_norm": 1.28125, "learning_rate": 0.0001966840053967764, "loss": 3.6063, "step": 4434 }, { "epoch": 0.2076189361577623, "grad_norm": 1.421875, "learning_rate": 0.0001966825144791019, "loss": 3.3703, "step": 4435 }, { "epoch": 0.20766574989759495, "grad_norm": 0.89453125, "learning_rate": 0.0001966810232319873, "loss": 2.7251, "step": 4436 }, { "epoch": 0.20771256363742757, "grad_norm": 1.46875, "learning_rate": 0.0001966795316554377, "loss": 3.6492, "step": 4437 }, { "epoch": 0.20775937737726022, "grad_norm": 1.578125, "learning_rate": 0.0001966780397494582, "loss": 3.7189, "step": 4438 }, { "epoch": 0.20780619111709286, "grad_norm": 1.328125, "learning_rate": 0.00019667654751405386, "loss": 3.3973, "step": 4439 }, { "epoch": 0.2078530048569255, "grad_norm": 1.765625, "learning_rate": 0.0001966750549492298, "loss": 3.2757, "step": 4440 }, { "epoch": 0.20789981859675816, "grad_norm": 1.34375, "learning_rate": 0.000196673562054991, "loss": 3.8697, "step": 4441 }, { "epoch": 0.2079466323365908, "grad_norm": 1.671875, "learning_rate": 0.00019667206883134267, "loss": 3.5265, "step": 4442 }, { "epoch": 0.20799344607642342, "grad_norm": 1.28125, "learning_rate": 0.0001966705752782899, "loss": 3.4553, "step": 4443 }, { "epoch": 0.20804025981625607, "grad_norm": 1.8984375, "learning_rate": 0.00019666908139583767, "loss": 3.6827, "step": 4444 }, { "epoch": 0.20808707355608871, "grad_norm": 1.703125, "learning_rate": 0.00019666758718399116, "loss": 4.1075, "step": 4445 }, { "epoch": 0.20813388729592136, "grad_norm": 1.515625, "learning_rate": 0.00019666609264275544, "loss": 3.6556, "step": 4446 }, { "epoch": 0.208180701035754, "grad_norm": 1.46875, "learning_rate": 0.00019666459777213557, "loss": 3.9527, "step": 4447 }, { "epoch": 0.20822751477558663, "grad_norm": 1.4453125, "learning_rate": 0.00019666310257213668, "loss": 3.5643, "step": 4448 }, { "epoch": 0.20827432851541927, "grad_norm": 1.296875, "learning_rate": 0.00019666160704276387, "loss": 3.1745, "step": 4449 }, { "epoch": 0.20832114225525192, "grad_norm": 2.578125, "learning_rate": 0.00019666011118402223, "loss": 3.5394, "step": 4450 }, { "epoch": 0.20836795599508456, "grad_norm": 1.3125, "learning_rate": 0.00019665861499591682, "loss": 3.7408, "step": 4451 }, { "epoch": 0.2084147697349172, "grad_norm": 1.5, "learning_rate": 0.00019665711847845278, "loss": 3.4433, "step": 4452 }, { "epoch": 0.20846158347474983, "grad_norm": 1.828125, "learning_rate": 0.00019665562163163517, "loss": 3.6638, "step": 4453 }, { "epoch": 0.20850839721458247, "grad_norm": 1.7265625, "learning_rate": 0.00019665412445546917, "loss": 4.3675, "step": 4454 }, { "epoch": 0.20855521095441512, "grad_norm": 1.3359375, "learning_rate": 0.0001966526269499598, "loss": 3.3103, "step": 4455 }, { "epoch": 0.20860202469424777, "grad_norm": 1.3046875, "learning_rate": 0.00019665112911511215, "loss": 3.9647, "step": 4456 }, { "epoch": 0.2086488384340804, "grad_norm": 1.546875, "learning_rate": 0.00019664963095093144, "loss": 3.474, "step": 4457 }, { "epoch": 0.20869565217391303, "grad_norm": 1.9921875, "learning_rate": 0.00019664813245742267, "loss": 3.1915, "step": 4458 }, { "epoch": 0.20874246591374568, "grad_norm": 1.78125, "learning_rate": 0.00019664663363459095, "loss": 3.7278, "step": 4459 }, { "epoch": 0.20878927965357832, "grad_norm": 1.578125, "learning_rate": 0.00019664513448244142, "loss": 2.803, "step": 4460 }, { "epoch": 0.20883609339341097, "grad_norm": 1.5078125, "learning_rate": 0.0001966436350009792, "loss": 4.246, "step": 4461 }, { "epoch": 0.20888290713324362, "grad_norm": 1.3515625, "learning_rate": 0.0001966421351902094, "loss": 3.6697, "step": 4462 }, { "epoch": 0.20892972087307624, "grad_norm": 1.3828125, "learning_rate": 0.00019664063505013703, "loss": 3.869, "step": 4463 }, { "epoch": 0.20897653461290888, "grad_norm": 1.5234375, "learning_rate": 0.00019663913458076735, "loss": 3.8181, "step": 4464 }, { "epoch": 0.20902334835274153, "grad_norm": 1.140625, "learning_rate": 0.00019663763378210538, "loss": 4.8457, "step": 4465 }, { "epoch": 0.20907016209257417, "grad_norm": 2.125, "learning_rate": 0.00019663613265415627, "loss": 3.6879, "step": 4466 }, { "epoch": 0.20911697583240682, "grad_norm": 1.171875, "learning_rate": 0.0001966346311969251, "loss": 3.4659, "step": 4467 }, { "epoch": 0.20916378957223944, "grad_norm": 1.1640625, "learning_rate": 0.00019663312941041705, "loss": 3.1538, "step": 4468 }, { "epoch": 0.20921060331207209, "grad_norm": 1.3046875, "learning_rate": 0.00019663162729463717, "loss": 3.4416, "step": 4469 }, { "epoch": 0.20925741705190473, "grad_norm": 1.15625, "learning_rate": 0.00019663012484959062, "loss": 3.6015, "step": 4470 }, { "epoch": 0.20930423079173738, "grad_norm": 1.1484375, "learning_rate": 0.0001966286220752825, "loss": 5.2415, "step": 4471 }, { "epoch": 0.20935104453157002, "grad_norm": 1.2421875, "learning_rate": 0.00019662711897171792, "loss": 3.3203, "step": 4472 }, { "epoch": 0.20939785827140267, "grad_norm": 1.1640625, "learning_rate": 0.00019662561553890204, "loss": 3.2861, "step": 4473 }, { "epoch": 0.2094446720112353, "grad_norm": 1.390625, "learning_rate": 0.00019662411177683996, "loss": 3.4557, "step": 4474 }, { "epoch": 0.20949148575106794, "grad_norm": 2.296875, "learning_rate": 0.0001966226076855368, "loss": 3.7005, "step": 4475 }, { "epoch": 0.20953829949090058, "grad_norm": 1.3203125, "learning_rate": 0.00019662110326499767, "loss": 3.6211, "step": 4476 }, { "epoch": 0.20958511323073323, "grad_norm": 1.4375, "learning_rate": 0.00019661959851522776, "loss": 3.3577, "step": 4477 }, { "epoch": 0.20963192697056587, "grad_norm": 2.515625, "learning_rate": 0.0001966180934362321, "loss": 3.4624, "step": 4478 }, { "epoch": 0.2096787407103985, "grad_norm": 1.3125, "learning_rate": 0.0001966165880280159, "loss": 3.8238, "step": 4479 }, { "epoch": 0.20972555445023114, "grad_norm": 1.203125, "learning_rate": 0.00019661508229058426, "loss": 3.5302, "step": 4480 }, { "epoch": 0.20977236819006378, "grad_norm": 1.3984375, "learning_rate": 0.0001966135762239423, "loss": 4.0838, "step": 4481 }, { "epoch": 0.20981918192989643, "grad_norm": 1.4453125, "learning_rate": 0.00019661206982809516, "loss": 3.7825, "step": 4482 }, { "epoch": 0.20986599566972908, "grad_norm": 1.3671875, "learning_rate": 0.000196610563103048, "loss": 3.8098, "step": 4483 }, { "epoch": 0.2099128094095617, "grad_norm": 1.46875, "learning_rate": 0.0001966090560488059, "loss": 3.9294, "step": 4484 }, { "epoch": 0.20995962314939434, "grad_norm": 1.34375, "learning_rate": 0.00019660754866537405, "loss": 3.3171, "step": 4485 }, { "epoch": 0.210006436889227, "grad_norm": 1.140625, "learning_rate": 0.00019660604095275756, "loss": 3.3925, "step": 4486 }, { "epoch": 0.21005325062905963, "grad_norm": 1.078125, "learning_rate": 0.00019660453291096158, "loss": 3.5263, "step": 4487 }, { "epoch": 0.21010006436889228, "grad_norm": 1.3203125, "learning_rate": 0.00019660302453999122, "loss": 3.6159, "step": 4488 }, { "epoch": 0.2101468781087249, "grad_norm": 1.296875, "learning_rate": 0.00019660151583985162, "loss": 3.5249, "step": 4489 }, { "epoch": 0.21019369184855755, "grad_norm": 1.4609375, "learning_rate": 0.00019660000681054798, "loss": 4.0081, "step": 4490 }, { "epoch": 0.2102405055883902, "grad_norm": 1.28125, "learning_rate": 0.0001965984974520854, "loss": 3.4301, "step": 4491 }, { "epoch": 0.21028731932822284, "grad_norm": 1.421875, "learning_rate": 0.00019659698776446899, "loss": 3.5415, "step": 4492 }, { "epoch": 0.21033413306805548, "grad_norm": 0.984375, "learning_rate": 0.00019659547774770393, "loss": 5.2782, "step": 4493 }, { "epoch": 0.2103809468078881, "grad_norm": 1.21875, "learning_rate": 0.00019659396740179537, "loss": 3.6936, "step": 4494 }, { "epoch": 0.21042776054772075, "grad_norm": 1.1171875, "learning_rate": 0.00019659245672674845, "loss": 3.0928, "step": 4495 }, { "epoch": 0.2104745742875534, "grad_norm": 1.5390625, "learning_rate": 0.00019659094572256833, "loss": 3.408, "step": 4496 }, { "epoch": 0.21052138802738604, "grad_norm": 1.3203125, "learning_rate": 0.00019658943438926013, "loss": 3.7069, "step": 4497 }, { "epoch": 0.2105682017672187, "grad_norm": 1.15625, "learning_rate": 0.00019658792272682902, "loss": 3.3949, "step": 4498 }, { "epoch": 0.2106150155070513, "grad_norm": 1.1953125, "learning_rate": 0.00019658641073528016, "loss": 3.1628, "step": 4499 }, { "epoch": 0.21066182924688395, "grad_norm": 1.3046875, "learning_rate": 0.00019658489841461866, "loss": 3.5943, "step": 4500 }, { "epoch": 0.2107086429867166, "grad_norm": 1.140625, "learning_rate": 0.00019658338576484974, "loss": 3.3649, "step": 4501 }, { "epoch": 0.21075545672654925, "grad_norm": 1.1015625, "learning_rate": 0.0001965818727859785, "loss": 3.4615, "step": 4502 }, { "epoch": 0.2108022704663819, "grad_norm": 1.15625, "learning_rate": 0.0001965803594780101, "loss": 3.5429, "step": 4503 }, { "epoch": 0.21084908420621454, "grad_norm": 1.1328125, "learning_rate": 0.0001965788458409497, "loss": 3.2823, "step": 4504 }, { "epoch": 0.21089589794604716, "grad_norm": 1.4296875, "learning_rate": 0.00019657733187480249, "loss": 3.7219, "step": 4505 }, { "epoch": 0.2109427116858798, "grad_norm": 1.3125, "learning_rate": 0.0001965758175795736, "loss": 4.223, "step": 4506 }, { "epoch": 0.21098952542571245, "grad_norm": 1.2578125, "learning_rate": 0.0001965743029552682, "loss": 3.2424, "step": 4507 }, { "epoch": 0.2110363391655451, "grad_norm": 1.265625, "learning_rate": 0.00019657278800189141, "loss": 3.605, "step": 4508 }, { "epoch": 0.21108315290537774, "grad_norm": 1.75, "learning_rate": 0.00019657127271944846, "loss": 3.576, "step": 4509 }, { "epoch": 0.21112996664521036, "grad_norm": 1.2578125, "learning_rate": 0.00019656975710794446, "loss": 3.0402, "step": 4510 }, { "epoch": 0.211176780385043, "grad_norm": 1.5, "learning_rate": 0.00019656824116738458, "loss": 3.5101, "step": 4511 }, { "epoch": 0.21122359412487565, "grad_norm": 1.1640625, "learning_rate": 0.00019656672489777406, "loss": 3.7203, "step": 4512 }, { "epoch": 0.2112704078647083, "grad_norm": 1.3828125, "learning_rate": 0.00019656520829911797, "loss": 3.4124, "step": 4513 }, { "epoch": 0.21131722160454094, "grad_norm": 1.6640625, "learning_rate": 0.0001965636913714215, "loss": 3.7775, "step": 4514 }, { "epoch": 0.21136403534437356, "grad_norm": 1.609375, "learning_rate": 0.00019656217411468988, "loss": 3.4903, "step": 4515 }, { "epoch": 0.2114108490842062, "grad_norm": 2.046875, "learning_rate": 0.00019656065652892818, "loss": 3.1455, "step": 4516 }, { "epoch": 0.21145766282403886, "grad_norm": 1.1640625, "learning_rate": 0.00019655913861414166, "loss": 3.678, "step": 4517 }, { "epoch": 0.2115044765638715, "grad_norm": 2.578125, "learning_rate": 0.00019655762037033543, "loss": 3.5706, "step": 4518 }, { "epoch": 0.21155129030370415, "grad_norm": 1.765625, "learning_rate": 0.0001965561017975147, "loss": 3.813, "step": 4519 }, { "epoch": 0.21159810404353677, "grad_norm": 1.3671875, "learning_rate": 0.00019655458289568464, "loss": 4.4721, "step": 4520 }, { "epoch": 0.2116449177833694, "grad_norm": 1.109375, "learning_rate": 0.0001965530636648504, "loss": 3.5308, "step": 4521 }, { "epoch": 0.21169173152320206, "grad_norm": 1.8984375, "learning_rate": 0.0001965515441050172, "loss": 3.9287, "step": 4522 }, { "epoch": 0.2117385452630347, "grad_norm": 1.125, "learning_rate": 0.0001965500242161902, "loss": 3.4757, "step": 4523 }, { "epoch": 0.21178535900286735, "grad_norm": 1.234375, "learning_rate": 0.00019654850399837453, "loss": 3.6711, "step": 4524 }, { "epoch": 0.21183217274269997, "grad_norm": 1.4765625, "learning_rate": 0.00019654698345157546, "loss": 3.7268, "step": 4525 }, { "epoch": 0.21187898648253262, "grad_norm": 2.0, "learning_rate": 0.00019654546257579812, "loss": 3.6904, "step": 4526 }, { "epoch": 0.21192580022236526, "grad_norm": 2.015625, "learning_rate": 0.00019654394137104771, "loss": 3.5268, "step": 4527 }, { "epoch": 0.2119726139621979, "grad_norm": 1.25, "learning_rate": 0.00019654241983732936, "loss": 3.3581, "step": 4528 }, { "epoch": 0.21201942770203056, "grad_norm": 1.2421875, "learning_rate": 0.00019654089797464832, "loss": 2.8547, "step": 4529 }, { "epoch": 0.21206624144186317, "grad_norm": 1.4140625, "learning_rate": 0.00019653937578300971, "loss": 3.7467, "step": 4530 }, { "epoch": 0.21211305518169582, "grad_norm": 1.015625, "learning_rate": 0.0001965378532624188, "loss": 4.2506, "step": 4531 }, { "epoch": 0.21215986892152847, "grad_norm": 1.8828125, "learning_rate": 0.00019653633041288073, "loss": 4.1312, "step": 4532 }, { "epoch": 0.2122066826613611, "grad_norm": 1.234375, "learning_rate": 0.0001965348072344007, "loss": 3.4078, "step": 4533 }, { "epoch": 0.21225349640119376, "grad_norm": 1.9765625, "learning_rate": 0.00019653328372698387, "loss": 3.8192, "step": 4534 }, { "epoch": 0.2123003101410264, "grad_norm": 1.1171875, "learning_rate": 0.00019653175989063545, "loss": 4.7065, "step": 4535 }, { "epoch": 0.21234712388085902, "grad_norm": 1.3125, "learning_rate": 0.00019653023572536066, "loss": 3.7623, "step": 4536 }, { "epoch": 0.21239393762069167, "grad_norm": 1.328125, "learning_rate": 0.0001965287112311647, "loss": 3.4977, "step": 4537 }, { "epoch": 0.21244075136052432, "grad_norm": 1.2734375, "learning_rate": 0.00019652718640805267, "loss": 3.4657, "step": 4538 }, { "epoch": 0.21248756510035696, "grad_norm": 2.03125, "learning_rate": 0.00019652566125602988, "loss": 3.366, "step": 4539 }, { "epoch": 0.2125343788401896, "grad_norm": 1.53125, "learning_rate": 0.00019652413577510145, "loss": 3.6312, "step": 4540 }, { "epoch": 0.21258119258002223, "grad_norm": 1.6953125, "learning_rate": 0.00019652260996527263, "loss": 3.7173, "step": 4541 }, { "epoch": 0.21262800631985487, "grad_norm": 1.4453125, "learning_rate": 0.00019652108382654858, "loss": 3.7732, "step": 4542 }, { "epoch": 0.21267482005968752, "grad_norm": 1.296875, "learning_rate": 0.00019651955735893454, "loss": 3.772, "step": 4543 }, { "epoch": 0.21272163379952017, "grad_norm": 1.3203125, "learning_rate": 0.00019651803056243568, "loss": 3.5365, "step": 4544 }, { "epoch": 0.2127684475393528, "grad_norm": 2.046875, "learning_rate": 0.0001965165034370572, "loss": 3.6329, "step": 4545 }, { "epoch": 0.21281526127918543, "grad_norm": 1.1796875, "learning_rate": 0.0001965149759828043, "loss": 3.5302, "step": 4546 }, { "epoch": 0.21286207501901808, "grad_norm": 1.21875, "learning_rate": 0.0001965134481996822, "loss": 3.4658, "step": 4547 }, { "epoch": 0.21290888875885072, "grad_norm": 1.2265625, "learning_rate": 0.00019651192008769613, "loss": 3.5323, "step": 4548 }, { "epoch": 0.21295570249868337, "grad_norm": 1.125, "learning_rate": 0.00019651039164685126, "loss": 3.5672, "step": 4549 }, { "epoch": 0.21300251623851602, "grad_norm": 1.1875, "learning_rate": 0.0001965088628771528, "loss": 3.1114, "step": 4550 }, { "epoch": 0.21304932997834863, "grad_norm": 1.609375, "learning_rate": 0.00019650733377860597, "loss": 3.9113, "step": 4551 }, { "epoch": 0.21309614371818128, "grad_norm": 1.4921875, "learning_rate": 0.00019650580435121598, "loss": 3.6304, "step": 4552 }, { "epoch": 0.21314295745801393, "grad_norm": 1.5234375, "learning_rate": 0.00019650427459498807, "loss": 3.646, "step": 4553 }, { "epoch": 0.21318977119784657, "grad_norm": 1.4609375, "learning_rate": 0.00019650274450992735, "loss": 3.7159, "step": 4554 }, { "epoch": 0.21323658493767922, "grad_norm": 1.1015625, "learning_rate": 0.00019650121409603918, "loss": 3.1486, "step": 4555 }, { "epoch": 0.21328339867751184, "grad_norm": 1.4609375, "learning_rate": 0.00019649968335332867, "loss": 3.3802, "step": 4556 }, { "epoch": 0.21333021241734448, "grad_norm": 1.640625, "learning_rate": 0.00019649815228180108, "loss": 3.4881, "step": 4557 }, { "epoch": 0.21337702615717713, "grad_norm": 1.515625, "learning_rate": 0.00019649662088146158, "loss": 3.8771, "step": 4558 }, { "epoch": 0.21342383989700978, "grad_norm": 1.6796875, "learning_rate": 0.00019649508915231545, "loss": 3.6408, "step": 4559 }, { "epoch": 0.21347065363684242, "grad_norm": 1.359375, "learning_rate": 0.00019649355709436787, "loss": 3.3889, "step": 4560 }, { "epoch": 0.21351746737667504, "grad_norm": 1.421875, "learning_rate": 0.0001964920247076241, "loss": 3.2196, "step": 4561 }, { "epoch": 0.2135642811165077, "grad_norm": 1.3984375, "learning_rate": 0.0001964904919920893, "loss": 3.6765, "step": 4562 }, { "epoch": 0.21361109485634033, "grad_norm": 1.65625, "learning_rate": 0.00019648895894776875, "loss": 3.0995, "step": 4563 }, { "epoch": 0.21365790859617298, "grad_norm": 1.4921875, "learning_rate": 0.00019648742557466765, "loss": 3.8209, "step": 4564 }, { "epoch": 0.21370472233600563, "grad_norm": 1.546875, "learning_rate": 0.00019648589187279118, "loss": 3.7985, "step": 4565 }, { "epoch": 0.21375153607583827, "grad_norm": 1.21875, "learning_rate": 0.00019648435784214464, "loss": 3.0855, "step": 4566 }, { "epoch": 0.2137983498156709, "grad_norm": 1.2265625, "learning_rate": 0.00019648282348273322, "loss": 3.4019, "step": 4567 }, { "epoch": 0.21384516355550354, "grad_norm": 1.203125, "learning_rate": 0.0001964812887945622, "loss": 3.4052, "step": 4568 }, { "epoch": 0.21389197729533618, "grad_norm": 1.1953125, "learning_rate": 0.00019647975377763672, "loss": 3.5457, "step": 4569 }, { "epoch": 0.21393879103516883, "grad_norm": 1.28125, "learning_rate": 0.00019647821843196206, "loss": 3.287, "step": 4570 }, { "epoch": 0.21398560477500148, "grad_norm": 1.53125, "learning_rate": 0.00019647668275754346, "loss": 3.5226, "step": 4571 }, { "epoch": 0.2140324185148341, "grad_norm": 1.078125, "learning_rate": 0.00019647514675438611, "loss": 3.1922, "step": 4572 }, { "epoch": 0.21407923225466674, "grad_norm": 1.1953125, "learning_rate": 0.00019647361042249533, "loss": 3.5261, "step": 4573 }, { "epoch": 0.2141260459944994, "grad_norm": 1.2109375, "learning_rate": 0.00019647207376187627, "loss": 3.6466, "step": 4574 }, { "epoch": 0.21417285973433203, "grad_norm": 1.2109375, "learning_rate": 0.00019647053677253418, "loss": 3.7525, "step": 4575 }, { "epoch": 0.21421967347416468, "grad_norm": 1.4765625, "learning_rate": 0.00019646899945447436, "loss": 3.7419, "step": 4576 }, { "epoch": 0.2142664872139973, "grad_norm": 1.734375, "learning_rate": 0.00019646746180770194, "loss": 3.4603, "step": 4577 }, { "epoch": 0.21431330095382994, "grad_norm": 1.3671875, "learning_rate": 0.00019646592383222225, "loss": 3.4958, "step": 4578 }, { "epoch": 0.2143601146936626, "grad_norm": 1.0703125, "learning_rate": 0.0001964643855280405, "loss": 3.8514, "step": 4579 }, { "epoch": 0.21440692843349524, "grad_norm": 1.3125, "learning_rate": 0.0001964628468951619, "loss": 3.3953, "step": 4580 }, { "epoch": 0.21445374217332788, "grad_norm": 1.34375, "learning_rate": 0.00019646130793359175, "loss": 3.4411, "step": 4581 }, { "epoch": 0.2145005559131605, "grad_norm": 1.1015625, "learning_rate": 0.0001964597686433353, "loss": 5.0048, "step": 4582 }, { "epoch": 0.21454736965299315, "grad_norm": 1.3046875, "learning_rate": 0.00019645822902439772, "loss": 3.9434, "step": 4583 }, { "epoch": 0.2145941833928258, "grad_norm": 1.4453125, "learning_rate": 0.0001964566890767843, "loss": 3.0921, "step": 4584 }, { "epoch": 0.21464099713265844, "grad_norm": 1.28125, "learning_rate": 0.00019645514880050033, "loss": 3.7693, "step": 4585 }, { "epoch": 0.21468781087249109, "grad_norm": 1.5859375, "learning_rate": 0.000196453608195551, "loss": 3.4275, "step": 4586 }, { "epoch": 0.2147346246123237, "grad_norm": 1.296875, "learning_rate": 0.00019645206726194158, "loss": 3.9539, "step": 4587 }, { "epoch": 0.21478143835215635, "grad_norm": 1.6015625, "learning_rate": 0.00019645052599967725, "loss": 3.5965, "step": 4588 }, { "epoch": 0.214828252091989, "grad_norm": 1.234375, "learning_rate": 0.0001964489844087634, "loss": 2.6003, "step": 4589 }, { "epoch": 0.21487506583182164, "grad_norm": 1.40625, "learning_rate": 0.0001964474424892052, "loss": 3.8448, "step": 4590 }, { "epoch": 0.2149218795716543, "grad_norm": 1.390625, "learning_rate": 0.0001964459002410079, "loss": 3.8154, "step": 4591 }, { "epoch": 0.2149686933114869, "grad_norm": 1.3984375, "learning_rate": 0.00019644435766417678, "loss": 3.8931, "step": 4592 }, { "epoch": 0.21501550705131955, "grad_norm": 1.109375, "learning_rate": 0.00019644281475871705, "loss": 3.5344, "step": 4593 }, { "epoch": 0.2150623207911522, "grad_norm": 1.4765625, "learning_rate": 0.00019644127152463405, "loss": 3.5103, "step": 4594 }, { "epoch": 0.21510913453098485, "grad_norm": 1.25, "learning_rate": 0.00019643972796193297, "loss": 3.5428, "step": 4595 }, { "epoch": 0.2151559482708175, "grad_norm": 1.359375, "learning_rate": 0.0001964381840706191, "loss": 3.6136, "step": 4596 }, { "epoch": 0.21520276201065014, "grad_norm": 2.765625, "learning_rate": 0.00019643663985069768, "loss": 3.7489, "step": 4597 }, { "epoch": 0.21524957575048276, "grad_norm": 1.515625, "learning_rate": 0.000196435095302174, "loss": 3.3322, "step": 4598 }, { "epoch": 0.2152963894903154, "grad_norm": 1.328125, "learning_rate": 0.0001964335504250533, "loss": 3.4966, "step": 4599 }, { "epoch": 0.21534320323014805, "grad_norm": 1.6484375, "learning_rate": 0.00019643200521934088, "loss": 3.55, "step": 4600 }, { "epoch": 0.2153900169699807, "grad_norm": 1.375, "learning_rate": 0.00019643045968504192, "loss": 4.0968, "step": 4601 }, { "epoch": 0.21543683070981334, "grad_norm": 2.625, "learning_rate": 0.00019642891382216178, "loss": 3.5729, "step": 4602 }, { "epoch": 0.21548364444964596, "grad_norm": 1.328125, "learning_rate": 0.00019642736763070566, "loss": 3.2847, "step": 4603 }, { "epoch": 0.2155304581894786, "grad_norm": 1.421875, "learning_rate": 0.0001964258211106789, "loss": 3.5682, "step": 4604 }, { "epoch": 0.21557727192931125, "grad_norm": 1.1328125, "learning_rate": 0.0001964242742620867, "loss": 2.734, "step": 4605 }, { "epoch": 0.2156240856691439, "grad_norm": 1.328125, "learning_rate": 0.00019642272708493436, "loss": 3.6048, "step": 4606 }, { "epoch": 0.21567089940897655, "grad_norm": 1.6953125, "learning_rate": 0.00019642117957922715, "loss": 3.5765, "step": 4607 }, { "epoch": 0.21571771314880916, "grad_norm": 1.8203125, "learning_rate": 0.00019641963174497036, "loss": 3.0455, "step": 4608 }, { "epoch": 0.2157645268886418, "grad_norm": 1.734375, "learning_rate": 0.00019641808358216922, "loss": 3.261, "step": 4609 }, { "epoch": 0.21581134062847446, "grad_norm": 1.09375, "learning_rate": 0.00019641653509082908, "loss": 3.482, "step": 4610 }, { "epoch": 0.2158581543683071, "grad_norm": 1.21875, "learning_rate": 0.00019641498627095513, "loss": 3.1906, "step": 4611 }, { "epoch": 0.21590496810813975, "grad_norm": 1.375, "learning_rate": 0.0001964134371225527, "loss": 3.3827, "step": 4612 }, { "epoch": 0.21595178184797237, "grad_norm": 1.125, "learning_rate": 0.00019641188764562705, "loss": 3.2756, "step": 4613 }, { "epoch": 0.21599859558780501, "grad_norm": 1.21875, "learning_rate": 0.00019641033784018348, "loss": 3.6963, "step": 4614 }, { "epoch": 0.21604540932763766, "grad_norm": 1.375, "learning_rate": 0.00019640878770622722, "loss": 3.2945, "step": 4615 }, { "epoch": 0.2160922230674703, "grad_norm": 1.59375, "learning_rate": 0.00019640723724376362, "loss": 3.611, "step": 4616 }, { "epoch": 0.21613903680730295, "grad_norm": 1.2109375, "learning_rate": 0.00019640568645279793, "loss": 3.6445, "step": 4617 }, { "epoch": 0.21618585054713557, "grad_norm": 1.1328125, "learning_rate": 0.00019640413533333545, "loss": 3.5898, "step": 4618 }, { "epoch": 0.21623266428696822, "grad_norm": 1.78125, "learning_rate": 0.00019640258388538143, "loss": 3.8611, "step": 4619 }, { "epoch": 0.21627947802680086, "grad_norm": 1.390625, "learning_rate": 0.00019640103210894118, "loss": 3.4157, "step": 4620 }, { "epoch": 0.2163262917666335, "grad_norm": 1.65625, "learning_rate": 0.00019639948000402, "loss": 3.6661, "step": 4621 }, { "epoch": 0.21637310550646616, "grad_norm": 1.234375, "learning_rate": 0.00019639792757062313, "loss": 3.48, "step": 4622 }, { "epoch": 0.21641991924629878, "grad_norm": 1.5703125, "learning_rate": 0.0001963963748087559, "loss": 3.5948, "step": 4623 }, { "epoch": 0.21646673298613142, "grad_norm": 1.6796875, "learning_rate": 0.00019639482171842363, "loss": 3.4448, "step": 4624 }, { "epoch": 0.21651354672596407, "grad_norm": 1.328125, "learning_rate": 0.0001963932682996316, "loss": 3.5335, "step": 4625 }, { "epoch": 0.21656036046579671, "grad_norm": 1.2890625, "learning_rate": 0.000196391714552385, "loss": 3.7263, "step": 4626 }, { "epoch": 0.21660717420562936, "grad_norm": 1.3828125, "learning_rate": 0.00019639016047668926, "loss": 3.6087, "step": 4627 }, { "epoch": 0.216653987945462, "grad_norm": 1.234375, "learning_rate": 0.00019638860607254957, "loss": 3.6547, "step": 4628 }, { "epoch": 0.21670080168529463, "grad_norm": 1.6328125, "learning_rate": 0.00019638705133997133, "loss": 3.2853, "step": 4629 }, { "epoch": 0.21674761542512727, "grad_norm": 1.515625, "learning_rate": 0.00019638549627895975, "loss": 3.8222, "step": 4630 }, { "epoch": 0.21679442916495992, "grad_norm": 1.984375, "learning_rate": 0.0001963839408895202, "loss": 2.6999, "step": 4631 }, { "epoch": 0.21684124290479256, "grad_norm": 7.1875, "learning_rate": 0.0001963823851716579, "loss": 4.8684, "step": 4632 }, { "epoch": 0.2168880566446252, "grad_norm": 1.5, "learning_rate": 0.00019638082912537823, "loss": 3.3968, "step": 4633 }, { "epoch": 0.21693487038445783, "grad_norm": 2.046875, "learning_rate": 0.00019637927275068643, "loss": 3.4044, "step": 4634 }, { "epoch": 0.21698168412429047, "grad_norm": 1.6640625, "learning_rate": 0.00019637771604758783, "loss": 3.4974, "step": 4635 }, { "epoch": 0.21702849786412312, "grad_norm": 1.046875, "learning_rate": 0.00019637615901608774, "loss": 3.5026, "step": 4636 }, { "epoch": 0.21707531160395577, "grad_norm": 1.25, "learning_rate": 0.00019637460165619145, "loss": 3.4019, "step": 4637 }, { "epoch": 0.2171221253437884, "grad_norm": 1.1953125, "learning_rate": 0.0001963730439679043, "loss": 5.7509, "step": 4638 }, { "epoch": 0.21716893908362103, "grad_norm": 1.375, "learning_rate": 0.00019637148595123155, "loss": 3.2703, "step": 4639 }, { "epoch": 0.21721575282345368, "grad_norm": 1.15625, "learning_rate": 0.00019636992760617853, "loss": 3.5439, "step": 4640 }, { "epoch": 0.21726256656328632, "grad_norm": 1.1875, "learning_rate": 0.00019636836893275058, "loss": 3.5969, "step": 4641 }, { "epoch": 0.21730938030311897, "grad_norm": 1.3828125, "learning_rate": 0.00019636680993095296, "loss": 3.3734, "step": 4642 }, { "epoch": 0.21735619404295162, "grad_norm": 1.2421875, "learning_rate": 0.000196365250600791, "loss": 3.3899, "step": 4643 }, { "epoch": 0.21740300778278424, "grad_norm": 1.5859375, "learning_rate": 0.00019636369094227002, "loss": 3.5423, "step": 4644 }, { "epoch": 0.21744982152261688, "grad_norm": 1.796875, "learning_rate": 0.00019636213095539535, "loss": 3.2861, "step": 4645 }, { "epoch": 0.21749663526244953, "grad_norm": 1.34375, "learning_rate": 0.0001963605706401723, "loss": 3.3527, "step": 4646 }, { "epoch": 0.21754344900228217, "grad_norm": 1.234375, "learning_rate": 0.00019635900999660614, "loss": 3.4175, "step": 4647 }, { "epoch": 0.21759026274211482, "grad_norm": 1.5234375, "learning_rate": 0.00019635744902470224, "loss": 3.8316, "step": 4648 }, { "epoch": 0.21763707648194744, "grad_norm": 1.3828125, "learning_rate": 0.0001963558877244659, "loss": 3.5143, "step": 4649 }, { "epoch": 0.21768389022178009, "grad_norm": 1.390625, "learning_rate": 0.00019635432609590246, "loss": 3.9693, "step": 4650 }, { "epoch": 0.21773070396161273, "grad_norm": 1.8359375, "learning_rate": 0.0001963527641390172, "loss": 4.0074, "step": 4651 }, { "epoch": 0.21777751770144538, "grad_norm": 0.95703125, "learning_rate": 0.00019635120185381547, "loss": 5.4877, "step": 4652 }, { "epoch": 0.21782433144127802, "grad_norm": 1.140625, "learning_rate": 0.00019634963924030258, "loss": 3.0855, "step": 4653 }, { "epoch": 0.21787114518111064, "grad_norm": 1.28125, "learning_rate": 0.00019634807629848389, "loss": 3.7895, "step": 4654 }, { "epoch": 0.2179179589209433, "grad_norm": 1.234375, "learning_rate": 0.00019634651302836468, "loss": 3.4511, "step": 4655 }, { "epoch": 0.21796477266077594, "grad_norm": 1.78125, "learning_rate": 0.0001963449494299503, "loss": 3.4374, "step": 4656 }, { "epoch": 0.21801158640060858, "grad_norm": 1.453125, "learning_rate": 0.00019634338550324608, "loss": 4.3925, "step": 4657 }, { "epoch": 0.21805840014044123, "grad_norm": 1.4921875, "learning_rate": 0.00019634182124825733, "loss": 3.6904, "step": 4658 }, { "epoch": 0.21810521388027387, "grad_norm": 1.6484375, "learning_rate": 0.0001963402566649894, "loss": 3.4971, "step": 4659 }, { "epoch": 0.2181520276201065, "grad_norm": 1.375, "learning_rate": 0.00019633869175344763, "loss": 3.4286, "step": 4660 }, { "epoch": 0.21819884135993914, "grad_norm": 1.484375, "learning_rate": 0.0001963371265136373, "loss": 3.4899, "step": 4661 }, { "epoch": 0.21824565509977178, "grad_norm": 1.5078125, "learning_rate": 0.0001963355609455638, "loss": 3.4151, "step": 4662 }, { "epoch": 0.21829246883960443, "grad_norm": 1.3046875, "learning_rate": 0.00019633399504923244, "loss": 3.7029, "step": 4663 }, { "epoch": 0.21833928257943708, "grad_norm": 1.5703125, "learning_rate": 0.0001963324288246486, "loss": 3.5683, "step": 4664 }, { "epoch": 0.2183860963192697, "grad_norm": 1.3125, "learning_rate": 0.00019633086227181755, "loss": 3.6553, "step": 4665 }, { "epoch": 0.21843291005910234, "grad_norm": 2.0625, "learning_rate": 0.00019632929539074463, "loss": 3.6822, "step": 4666 }, { "epoch": 0.218479723798935, "grad_norm": 1.2421875, "learning_rate": 0.0001963277281814352, "loss": 3.6782, "step": 4667 }, { "epoch": 0.21852653753876763, "grad_norm": 1.265625, "learning_rate": 0.00019632616064389465, "loss": 3.7654, "step": 4668 }, { "epoch": 0.21857335127860028, "grad_norm": 1.4453125, "learning_rate": 0.00019632459277812826, "loss": 4.0037, "step": 4669 }, { "epoch": 0.2186201650184329, "grad_norm": 2.03125, "learning_rate": 0.00019632302458414137, "loss": 3.9437, "step": 4670 }, { "epoch": 0.21866697875826555, "grad_norm": 1.625, "learning_rate": 0.00019632145606193936, "loss": 3.5878, "step": 4671 }, { "epoch": 0.2187137924980982, "grad_norm": 1.3125, "learning_rate": 0.00019631988721152755, "loss": 3.8712, "step": 4672 }, { "epoch": 0.21876060623793084, "grad_norm": 1.4609375, "learning_rate": 0.00019631831803291127, "loss": 3.5585, "step": 4673 }, { "epoch": 0.21880741997776348, "grad_norm": 1.390625, "learning_rate": 0.0001963167485260959, "loss": 3.6624, "step": 4674 }, { "epoch": 0.2188542337175961, "grad_norm": 1.3515625, "learning_rate": 0.0001963151786910868, "loss": 3.7591, "step": 4675 }, { "epoch": 0.21890104745742875, "grad_norm": 1.3046875, "learning_rate": 0.00019631360852788928, "loss": 3.2557, "step": 4676 }, { "epoch": 0.2189478611972614, "grad_norm": 1.5234375, "learning_rate": 0.0001963120380365087, "loss": 3.5699, "step": 4677 }, { "epoch": 0.21899467493709404, "grad_norm": 1.1640625, "learning_rate": 0.00019631046721695043, "loss": 3.508, "step": 4678 }, { "epoch": 0.2190414886769267, "grad_norm": 1.1015625, "learning_rate": 0.0001963088960692198, "loss": 3.7423, "step": 4679 }, { "epoch": 0.2190883024167593, "grad_norm": 1.5078125, "learning_rate": 0.0001963073245933222, "loss": 3.6147, "step": 4680 }, { "epoch": 0.21913511615659195, "grad_norm": 1.375, "learning_rate": 0.00019630575278926294, "loss": 3.8053, "step": 4681 }, { "epoch": 0.2191819298964246, "grad_norm": 1.2578125, "learning_rate": 0.00019630418065704736, "loss": 3.451, "step": 4682 }, { "epoch": 0.21922874363625725, "grad_norm": 1.3984375, "learning_rate": 0.0001963026081966809, "loss": 3.5203, "step": 4683 }, { "epoch": 0.2192755573760899, "grad_norm": 1.703125, "learning_rate": 0.00019630103540816885, "loss": 3.5939, "step": 4684 }, { "epoch": 0.2193223711159225, "grad_norm": 1.8671875, "learning_rate": 0.00019629946229151658, "loss": 3.3252, "step": 4685 }, { "epoch": 0.21936918485575516, "grad_norm": 1.375, "learning_rate": 0.00019629788884672946, "loss": 3.7713, "step": 4686 }, { "epoch": 0.2194159985955878, "grad_norm": 1.3203125, "learning_rate": 0.00019629631507381284, "loss": 3.6959, "step": 4687 }, { "epoch": 0.21946281233542045, "grad_norm": 1.4609375, "learning_rate": 0.00019629474097277214, "loss": 3.2857, "step": 4688 }, { "epoch": 0.2195096260752531, "grad_norm": 1.0234375, "learning_rate": 0.00019629316654361264, "loss": 3.0973, "step": 4689 }, { "epoch": 0.21955643981508574, "grad_norm": 1.359375, "learning_rate": 0.00019629159178633976, "loss": 3.1876, "step": 4690 }, { "epoch": 0.21960325355491836, "grad_norm": 1.3359375, "learning_rate": 0.00019629001670095885, "loss": 3.4956, "step": 4691 }, { "epoch": 0.219650067294751, "grad_norm": 1.4921875, "learning_rate": 0.0001962884412874752, "loss": 3.2099, "step": 4692 }, { "epoch": 0.21969688103458365, "grad_norm": 1.25, "learning_rate": 0.00019628686554589434, "loss": 3.6117, "step": 4693 }, { "epoch": 0.2197436947744163, "grad_norm": 1.265625, "learning_rate": 0.00019628528947622154, "loss": 3.4086, "step": 4694 }, { "epoch": 0.21979050851424894, "grad_norm": 0.96875, "learning_rate": 0.00019628371307846214, "loss": 3.5827, "step": 4695 }, { "epoch": 0.21983732225408156, "grad_norm": 1.828125, "learning_rate": 0.0001962821363526216, "loss": 3.6422, "step": 4696 }, { "epoch": 0.2198841359939142, "grad_norm": 1.7890625, "learning_rate": 0.00019628055929870518, "loss": 3.6842, "step": 4697 }, { "epoch": 0.21993094973374686, "grad_norm": 1.65625, "learning_rate": 0.00019627898191671838, "loss": 3.8752, "step": 4698 }, { "epoch": 0.2199777634735795, "grad_norm": 1.109375, "learning_rate": 0.0001962774042066665, "loss": 3.5916, "step": 4699 }, { "epoch": 0.22002457721341215, "grad_norm": 1.6015625, "learning_rate": 0.00019627582616855494, "loss": 3.7544, "step": 4700 }, { "epoch": 0.22007139095324477, "grad_norm": 1.4375, "learning_rate": 0.00019627424780238904, "loss": 3.635, "step": 4701 }, { "epoch": 0.2201182046930774, "grad_norm": 1.1171875, "learning_rate": 0.00019627266910817422, "loss": 3.4805, "step": 4702 }, { "epoch": 0.22016501843291006, "grad_norm": 1.1484375, "learning_rate": 0.00019627109008591584, "loss": 3.2314, "step": 4703 }, { "epoch": 0.2202118321727427, "grad_norm": 1.328125, "learning_rate": 0.0001962695107356193, "loss": 3.429, "step": 4704 }, { "epoch": 0.22025864591257535, "grad_norm": 1.203125, "learning_rate": 0.00019626793105728996, "loss": 3.5633, "step": 4705 }, { "epoch": 0.22030545965240797, "grad_norm": 1.125, "learning_rate": 0.00019626635105093318, "loss": 3.2957, "step": 4706 }, { "epoch": 0.22035227339224062, "grad_norm": 1.3828125, "learning_rate": 0.0001962647707165544, "loss": 3.8769, "step": 4707 }, { "epoch": 0.22039908713207326, "grad_norm": 1.640625, "learning_rate": 0.00019626319005415898, "loss": 3.6757, "step": 4708 }, { "epoch": 0.2204459008719059, "grad_norm": 1.40625, "learning_rate": 0.0001962616090637523, "loss": 3.4173, "step": 4709 }, { "epoch": 0.22049271461173856, "grad_norm": 1.2734375, "learning_rate": 0.00019626002774533977, "loss": 3.3456, "step": 4710 }, { "epoch": 0.22053952835157117, "grad_norm": 1.203125, "learning_rate": 0.00019625844609892675, "loss": 5.0588, "step": 4711 }, { "epoch": 0.22058634209140382, "grad_norm": 1.15625, "learning_rate": 0.0001962568641245186, "loss": 3.5133, "step": 4712 }, { "epoch": 0.22063315583123647, "grad_norm": 1.2421875, "learning_rate": 0.00019625528182212079, "loss": 3.4318, "step": 4713 }, { "epoch": 0.2206799695710691, "grad_norm": 1.421875, "learning_rate": 0.00019625369919173865, "loss": 3.4425, "step": 4714 }, { "epoch": 0.22072678331090176, "grad_norm": 1.171875, "learning_rate": 0.0001962521162333776, "loss": 3.7436, "step": 4715 }, { "epoch": 0.22077359705073438, "grad_norm": 1.828125, "learning_rate": 0.00019625053294704302, "loss": 3.2228, "step": 4716 }, { "epoch": 0.22082041079056702, "grad_norm": 1.2421875, "learning_rate": 0.00019624894933274037, "loss": 3.4824, "step": 4717 }, { "epoch": 0.22086722453039967, "grad_norm": 1.5390625, "learning_rate": 0.00019624736539047494, "loss": 3.5251, "step": 4718 }, { "epoch": 0.22091403827023232, "grad_norm": 1.34375, "learning_rate": 0.00019624578112025217, "loss": 2.9263, "step": 4719 }, { "epoch": 0.22096085201006496, "grad_norm": 1.0859375, "learning_rate": 0.00019624419652207746, "loss": 3.4535, "step": 4720 }, { "epoch": 0.2210076657498976, "grad_norm": 1.34375, "learning_rate": 0.00019624261159595623, "loss": 4.0113, "step": 4721 }, { "epoch": 0.22105447948973023, "grad_norm": 1.1796875, "learning_rate": 0.00019624102634189386, "loss": 3.533, "step": 4722 }, { "epoch": 0.22110129322956287, "grad_norm": 1.4609375, "learning_rate": 0.00019623944075989575, "loss": 3.4822, "step": 4723 }, { "epoch": 0.22114810696939552, "grad_norm": 1.1875, "learning_rate": 0.00019623785484996733, "loss": 3.4331, "step": 4724 }, { "epoch": 0.22119492070922817, "grad_norm": 1.2890625, "learning_rate": 0.00019623626861211397, "loss": 3.687, "step": 4725 }, { "epoch": 0.2212417344490608, "grad_norm": 1.2734375, "learning_rate": 0.00019623468204634108, "loss": 3.368, "step": 4726 }, { "epoch": 0.22128854818889343, "grad_norm": 1.1875, "learning_rate": 0.0001962330951526541, "loss": 3.9451, "step": 4727 }, { "epoch": 0.22133536192872608, "grad_norm": 1.09375, "learning_rate": 0.00019623150793105838, "loss": 3.1156, "step": 4728 }, { "epoch": 0.22138217566855872, "grad_norm": 1.3515625, "learning_rate": 0.00019622992038155937, "loss": 3.6443, "step": 4729 }, { "epoch": 0.22142898940839137, "grad_norm": 1.0390625, "learning_rate": 0.00019622833250416245, "loss": 3.462, "step": 4730 }, { "epoch": 0.22147580314822402, "grad_norm": 1.359375, "learning_rate": 0.00019622674429887308, "loss": 3.5409, "step": 4731 }, { "epoch": 0.22152261688805663, "grad_norm": 1.2109375, "learning_rate": 0.00019622515576569663, "loss": 3.2816, "step": 4732 }, { "epoch": 0.22156943062788928, "grad_norm": 1.5078125, "learning_rate": 0.00019622356690463853, "loss": 3.5255, "step": 4733 }, { "epoch": 0.22161624436772193, "grad_norm": 1.2734375, "learning_rate": 0.00019622197771570416, "loss": 3.6145, "step": 4734 }, { "epoch": 0.22166305810755457, "grad_norm": 1.5703125, "learning_rate": 0.00019622038819889896, "loss": 3.4143, "step": 4735 }, { "epoch": 0.22170987184738722, "grad_norm": 1.1171875, "learning_rate": 0.00019621879835422839, "loss": 3.2812, "step": 4736 }, { "epoch": 0.22175668558721984, "grad_norm": 1.1640625, "learning_rate": 0.0001962172081816978, "loss": 3.4839, "step": 4737 }, { "epoch": 0.22180349932705248, "grad_norm": 1.203125, "learning_rate": 0.00019621561768131264, "loss": 2.5874, "step": 4738 }, { "epoch": 0.22185031306688513, "grad_norm": 1.3046875, "learning_rate": 0.00019621402685307833, "loss": 3.3464, "step": 4739 }, { "epoch": 0.22189712680671778, "grad_norm": 1.5546875, "learning_rate": 0.00019621243569700029, "loss": 3.6132, "step": 4740 }, { "epoch": 0.22194394054655042, "grad_norm": 1.390625, "learning_rate": 0.00019621084421308393, "loss": 3.2339, "step": 4741 }, { "epoch": 0.22199075428638304, "grad_norm": 1.2890625, "learning_rate": 0.00019620925240133467, "loss": 3.576, "step": 4742 }, { "epoch": 0.2220375680262157, "grad_norm": 1.328125, "learning_rate": 0.00019620766026175797, "loss": 3.5476, "step": 4743 }, { "epoch": 0.22208438176604833, "grad_norm": 1.0703125, "learning_rate": 0.0001962060677943592, "loss": 3.3162, "step": 4744 }, { "epoch": 0.22213119550588098, "grad_norm": 1.5, "learning_rate": 0.0001962044749991438, "loss": 3.423, "step": 4745 }, { "epoch": 0.22217800924571363, "grad_norm": 1.34375, "learning_rate": 0.00019620288187611727, "loss": 3.5816, "step": 4746 }, { "epoch": 0.22222482298554624, "grad_norm": 1.0625, "learning_rate": 0.00019620128842528494, "loss": 3.3419, "step": 4747 }, { "epoch": 0.2222716367253789, "grad_norm": 1.4453125, "learning_rate": 0.00019619969464665228, "loss": 3.5128, "step": 4748 }, { "epoch": 0.22231845046521154, "grad_norm": 1.515625, "learning_rate": 0.00019619810054022474, "loss": 2.6288, "step": 4749 }, { "epoch": 0.22236526420504418, "grad_norm": 1.8046875, "learning_rate": 0.00019619650610600773, "loss": 2.7923, "step": 4750 }, { "epoch": 0.22241207794487683, "grad_norm": 1.4453125, "learning_rate": 0.00019619491134400665, "loss": 3.1621, "step": 4751 }, { "epoch": 0.22245889168470948, "grad_norm": 2.390625, "learning_rate": 0.000196193316254227, "loss": 4.0217, "step": 4752 }, { "epoch": 0.2225057054245421, "grad_norm": 1.0, "learning_rate": 0.0001961917208366742, "loss": 3.4614, "step": 4753 }, { "epoch": 0.22255251916437474, "grad_norm": 1.1015625, "learning_rate": 0.00019619012509135365, "loss": 3.4885, "step": 4754 }, { "epoch": 0.2225993329042074, "grad_norm": 1.09375, "learning_rate": 0.00019618852901827084, "loss": 3.6604, "step": 4755 }, { "epoch": 0.22264614664404003, "grad_norm": 1.109375, "learning_rate": 0.00019618693261743114, "loss": 3.4831, "step": 4756 }, { "epoch": 0.22269296038387268, "grad_norm": 1.4765625, "learning_rate": 0.00019618533588884, "loss": 3.6135, "step": 4757 }, { "epoch": 0.2227397741237053, "grad_norm": 1.296875, "learning_rate": 0.00019618373883250297, "loss": 3.2329, "step": 4758 }, { "epoch": 0.22278658786353794, "grad_norm": 1.1484375, "learning_rate": 0.00019618214144842533, "loss": 3.5379, "step": 4759 }, { "epoch": 0.2228334016033706, "grad_norm": 1.75, "learning_rate": 0.00019618054373661266, "loss": 3.4897, "step": 4760 }, { "epoch": 0.22288021534320324, "grad_norm": 1.5390625, "learning_rate": 0.00019617894569707033, "loss": 3.6647, "step": 4761 }, { "epoch": 0.22292702908303588, "grad_norm": 3.1875, "learning_rate": 0.00019617734732980377, "loss": 4.106, "step": 4762 }, { "epoch": 0.2229738428228685, "grad_norm": 1.2265625, "learning_rate": 0.00019617574863481848, "loss": 3.6824, "step": 4763 }, { "epoch": 0.22302065656270115, "grad_norm": 1.8046875, "learning_rate": 0.00019617414961211992, "loss": 3.5228, "step": 4764 }, { "epoch": 0.2230674703025338, "grad_norm": 1.3984375, "learning_rate": 0.00019617255026171346, "loss": 3.2708, "step": 4765 }, { "epoch": 0.22311428404236644, "grad_norm": 1.140625, "learning_rate": 0.0001961709505836046, "loss": 3.4561, "step": 4766 }, { "epoch": 0.22316109778219909, "grad_norm": 2.234375, "learning_rate": 0.0001961693505777988, "loss": 3.3529, "step": 4767 }, { "epoch": 0.2232079115220317, "grad_norm": 1.1796875, "learning_rate": 0.0001961677502443015, "loss": 3.6948, "step": 4768 }, { "epoch": 0.22325472526186435, "grad_norm": 1.59375, "learning_rate": 0.00019616614958311814, "loss": 3.7512, "step": 4769 }, { "epoch": 0.223301539001697, "grad_norm": 1.2109375, "learning_rate": 0.00019616454859425417, "loss": 3.6476, "step": 4770 }, { "epoch": 0.22334835274152964, "grad_norm": 1.4921875, "learning_rate": 0.00019616294727771506, "loss": 3.539, "step": 4771 }, { "epoch": 0.2233951664813623, "grad_norm": 1.46875, "learning_rate": 0.0001961613456335063, "loss": 3.4559, "step": 4772 }, { "epoch": 0.2234419802211949, "grad_norm": 1.5625, "learning_rate": 0.00019615974366163328, "loss": 3.765, "step": 4773 }, { "epoch": 0.22348879396102755, "grad_norm": 1.3359375, "learning_rate": 0.0001961581413621015, "loss": 3.4191, "step": 4774 }, { "epoch": 0.2235356077008602, "grad_norm": 1.1953125, "learning_rate": 0.00019615653873491642, "loss": 3.3824, "step": 4775 }, { "epoch": 0.22358242144069285, "grad_norm": 1.046875, "learning_rate": 0.0001961549357800835, "loss": 3.3543, "step": 4776 }, { "epoch": 0.2236292351805255, "grad_norm": 1.0703125, "learning_rate": 0.00019615333249760819, "loss": 3.2302, "step": 4777 }, { "epoch": 0.2236760489203581, "grad_norm": 1.2890625, "learning_rate": 0.00019615172888749595, "loss": 3.1767, "step": 4778 }, { "epoch": 0.22372286266019076, "grad_norm": 1.0546875, "learning_rate": 0.00019615012494975226, "loss": 3.2434, "step": 4779 }, { "epoch": 0.2237696764000234, "grad_norm": 1.21875, "learning_rate": 0.00019614852068438254, "loss": 3.6004, "step": 4780 }, { "epoch": 0.22381649013985605, "grad_norm": 1.46875, "learning_rate": 0.00019614691609139232, "loss": 3.5394, "step": 4781 }, { "epoch": 0.2238633038796887, "grad_norm": 1.59375, "learning_rate": 0.00019614531117078705, "loss": 3.3673, "step": 4782 }, { "epoch": 0.22391011761952134, "grad_norm": 0.98046875, "learning_rate": 0.00019614370592257218, "loss": 2.9404, "step": 4783 }, { "epoch": 0.22395693135935396, "grad_norm": 1.2421875, "learning_rate": 0.0001961421003467532, "loss": 3.7976, "step": 4784 }, { "epoch": 0.2240037450991866, "grad_norm": 1.3359375, "learning_rate": 0.00019614049444333556, "loss": 3.5053, "step": 4785 }, { "epoch": 0.22405055883901925, "grad_norm": 1.3828125, "learning_rate": 0.00019613888821232473, "loss": 3.5546, "step": 4786 }, { "epoch": 0.2240973725788519, "grad_norm": 1.1484375, "learning_rate": 0.00019613728165372623, "loss": 5.5511, "step": 4787 }, { "epoch": 0.22414418631868455, "grad_norm": 4.34375, "learning_rate": 0.00019613567476754547, "loss": 3.8155, "step": 4788 }, { "epoch": 0.22419100005851716, "grad_norm": 1.7265625, "learning_rate": 0.000196134067553788, "loss": 3.2688, "step": 4789 }, { "epoch": 0.2242378137983498, "grad_norm": 1.5390625, "learning_rate": 0.00019613246001245918, "loss": 3.715, "step": 4790 }, { "epoch": 0.22428462753818246, "grad_norm": 1.53125, "learning_rate": 0.00019613085214356462, "loss": 3.5968, "step": 4791 }, { "epoch": 0.2243314412780151, "grad_norm": 1.2421875, "learning_rate": 0.00019612924394710968, "loss": 2.8154, "step": 4792 }, { "epoch": 0.22437825501784775, "grad_norm": 1.234375, "learning_rate": 0.00019612763542309995, "loss": 3.6943, "step": 4793 }, { "epoch": 0.22442506875768037, "grad_norm": 1.484375, "learning_rate": 0.0001961260265715408, "loss": 3.8213, "step": 4794 }, { "epoch": 0.22447188249751301, "grad_norm": 1.109375, "learning_rate": 0.00019612441739243783, "loss": 3.1706, "step": 4795 }, { "epoch": 0.22451869623734566, "grad_norm": 1.0859375, "learning_rate": 0.00019612280788579645, "loss": 3.5884, "step": 4796 }, { "epoch": 0.2245655099771783, "grad_norm": 1.34375, "learning_rate": 0.00019612119805162217, "loss": 3.0524, "step": 4797 }, { "epoch": 0.22461232371701095, "grad_norm": 1.234375, "learning_rate": 0.00019611958788992044, "loss": 3.4991, "step": 4798 }, { "epoch": 0.22465913745684357, "grad_norm": 1.359375, "learning_rate": 0.00019611797740069677, "loss": 3.9842, "step": 4799 }, { "epoch": 0.22470595119667622, "grad_norm": 1.4609375, "learning_rate": 0.00019611636658395666, "loss": 3.6829, "step": 4800 }, { "epoch": 0.22475276493650886, "grad_norm": 1.640625, "learning_rate": 0.00019611475543970558, "loss": 3.8928, "step": 4801 }, { "epoch": 0.2247995786763415, "grad_norm": 1.8671875, "learning_rate": 0.000196113143967949, "loss": 3.884, "step": 4802 }, { "epoch": 0.22484639241617416, "grad_norm": 1.21875, "learning_rate": 0.0001961115321686925, "loss": 3.402, "step": 4803 }, { "epoch": 0.22489320615600678, "grad_norm": 1.5, "learning_rate": 0.00019610992004194148, "loss": 3.8704, "step": 4804 }, { "epoch": 0.22494001989583942, "grad_norm": 1.5234375, "learning_rate": 0.00019610830758770144, "loss": 3.1178, "step": 4805 }, { "epoch": 0.22498683363567207, "grad_norm": 1.828125, "learning_rate": 0.00019610669480597795, "loss": 3.3008, "step": 4806 }, { "epoch": 0.22503364737550471, "grad_norm": 1.4375, "learning_rate": 0.0001961050816967764, "loss": 3.695, "step": 4807 }, { "epoch": 0.22508046111533736, "grad_norm": 1.1328125, "learning_rate": 0.00019610346826010234, "loss": 3.5373, "step": 4808 }, { "epoch": 0.22512727485516998, "grad_norm": 1.1328125, "learning_rate": 0.0001961018544959613, "loss": 3.7047, "step": 4809 }, { "epoch": 0.22517408859500262, "grad_norm": 1.3984375, "learning_rate": 0.00019610024040435872, "loss": 3.1713, "step": 4810 }, { "epoch": 0.22522090233483527, "grad_norm": 1.0859375, "learning_rate": 0.00019609862598530015, "loss": 2.5487, "step": 4811 }, { "epoch": 0.22526771607466792, "grad_norm": 1.234375, "learning_rate": 0.00019609701123879104, "loss": 3.3426, "step": 4812 }, { "epoch": 0.22531452981450056, "grad_norm": 2.703125, "learning_rate": 0.00019609539616483694, "loss": 3.8405, "step": 4813 }, { "epoch": 0.2253613435543332, "grad_norm": 1.5, "learning_rate": 0.0001960937807634433, "loss": 3.6575, "step": 4814 }, { "epoch": 0.22540815729416583, "grad_norm": 1.2890625, "learning_rate": 0.0001960921650346157, "loss": 3.5972, "step": 4815 }, { "epoch": 0.22545497103399847, "grad_norm": 1.4296875, "learning_rate": 0.0001960905489783596, "loss": 3.863, "step": 4816 }, { "epoch": 0.22550178477383112, "grad_norm": 1.390625, "learning_rate": 0.00019608893259468046, "loss": 3.6637, "step": 4817 }, { "epoch": 0.22554859851366377, "grad_norm": 1.1796875, "learning_rate": 0.00019608731588358387, "loss": 3.5139, "step": 4818 }, { "epoch": 0.2255954122534964, "grad_norm": 1.46875, "learning_rate": 0.00019608569884507532, "loss": 3.6105, "step": 4819 }, { "epoch": 0.22564222599332903, "grad_norm": 1.375, "learning_rate": 0.0001960840814791603, "loss": 3.6091, "step": 4820 }, { "epoch": 0.22568903973316168, "grad_norm": 1.3203125, "learning_rate": 0.0001960824637858443, "loss": 3.9037, "step": 4821 }, { "epoch": 0.22573585347299432, "grad_norm": 1.2109375, "learning_rate": 0.00019608084576513285, "loss": 3.4221, "step": 4822 }, { "epoch": 0.22578266721282697, "grad_norm": 1.15625, "learning_rate": 0.0001960792274170315, "loss": 3.1278, "step": 4823 }, { "epoch": 0.22582948095265962, "grad_norm": 1.3515625, "learning_rate": 0.00019607760874154574, "loss": 3.4783, "step": 4824 }, { "epoch": 0.22587629469249224, "grad_norm": 1.03125, "learning_rate": 0.00019607598973868108, "loss": 4.1734, "step": 4825 }, { "epoch": 0.22592310843232488, "grad_norm": 1.4453125, "learning_rate": 0.00019607437040844303, "loss": 3.3527, "step": 4826 }, { "epoch": 0.22596992217215753, "grad_norm": 1.53125, "learning_rate": 0.0001960727507508371, "loss": 3.4817, "step": 4827 }, { "epoch": 0.22601673591199017, "grad_norm": 1.7578125, "learning_rate": 0.00019607113076586884, "loss": 3.8592, "step": 4828 }, { "epoch": 0.22606354965182282, "grad_norm": 1.5078125, "learning_rate": 0.00019606951045354377, "loss": 3.4481, "step": 4829 }, { "epoch": 0.22611036339165544, "grad_norm": 1.3125, "learning_rate": 0.00019606788981386738, "loss": 3.4896, "step": 4830 }, { "epoch": 0.22615717713148809, "grad_norm": 1.3046875, "learning_rate": 0.00019606626884684523, "loss": 3.187, "step": 4831 }, { "epoch": 0.22620399087132073, "grad_norm": 1.234375, "learning_rate": 0.0001960646475524828, "loss": 3.6949, "step": 4832 }, { "epoch": 0.22625080461115338, "grad_norm": 1.265625, "learning_rate": 0.00019606302593078563, "loss": 3.5171, "step": 4833 }, { "epoch": 0.22629761835098602, "grad_norm": 1.34375, "learning_rate": 0.00019606140398175924, "loss": 3.698, "step": 4834 }, { "epoch": 0.22634443209081864, "grad_norm": 1.5859375, "learning_rate": 0.0001960597817054092, "loss": 3.8145, "step": 4835 }, { "epoch": 0.2263912458306513, "grad_norm": 1.140625, "learning_rate": 0.00019605815910174102, "loss": 3.2287, "step": 4836 }, { "epoch": 0.22643805957048393, "grad_norm": 1.1640625, "learning_rate": 0.00019605653617076017, "loss": 3.4342, "step": 4837 }, { "epoch": 0.22648487331031658, "grad_norm": 1.34375, "learning_rate": 0.00019605491291247224, "loss": 3.4183, "step": 4838 }, { "epoch": 0.22653168705014923, "grad_norm": 1.421875, "learning_rate": 0.00019605328932688275, "loss": 3.5282, "step": 4839 }, { "epoch": 0.22657850078998185, "grad_norm": 1.375, "learning_rate": 0.00019605166541399727, "loss": 3.245, "step": 4840 }, { "epoch": 0.2266253145298145, "grad_norm": 1.3828125, "learning_rate": 0.00019605004117382124, "loss": 3.6333, "step": 4841 }, { "epoch": 0.22667212826964714, "grad_norm": 1.3515625, "learning_rate": 0.00019604841660636027, "loss": 3.5133, "step": 4842 }, { "epoch": 0.22671894200947978, "grad_norm": 1.6953125, "learning_rate": 0.00019604679171161986, "loss": 3.0784, "step": 4843 }, { "epoch": 0.22676575574931243, "grad_norm": 1.140625, "learning_rate": 0.00019604516648960555, "loss": 3.5193, "step": 4844 }, { "epoch": 0.22681256948914508, "grad_norm": 1.265625, "learning_rate": 0.0001960435409403229, "loss": 3.0674, "step": 4845 }, { "epoch": 0.2268593832289777, "grad_norm": 1.09375, "learning_rate": 0.00019604191506377743, "loss": 3.3467, "step": 4846 }, { "epoch": 0.22690619696881034, "grad_norm": 1.6171875, "learning_rate": 0.0001960402888599747, "loss": 3.3406, "step": 4847 }, { "epoch": 0.226953010708643, "grad_norm": 1.21875, "learning_rate": 0.00019603866232892022, "loss": 3.4466, "step": 4848 }, { "epoch": 0.22699982444847563, "grad_norm": 1.4375, "learning_rate": 0.00019603703547061959, "loss": 3.5464, "step": 4849 }, { "epoch": 0.22704663818830828, "grad_norm": 1.6171875, "learning_rate": 0.00019603540828507825, "loss": 3.7236, "step": 4850 }, { "epoch": 0.2270934519281409, "grad_norm": 1.59375, "learning_rate": 0.00019603378077230184, "loss": 3.3692, "step": 4851 }, { "epoch": 0.22714026566797355, "grad_norm": 1.25, "learning_rate": 0.00019603215293229588, "loss": 3.4407, "step": 4852 }, { "epoch": 0.2271870794078062, "grad_norm": 1.5078125, "learning_rate": 0.00019603052476506588, "loss": 3.3551, "step": 4853 }, { "epoch": 0.22723389314763884, "grad_norm": 1.296875, "learning_rate": 0.00019602889627061746, "loss": 3.1591, "step": 4854 }, { "epoch": 0.22728070688747148, "grad_norm": 1.15625, "learning_rate": 0.0001960272674489561, "loss": 3.312, "step": 4855 }, { "epoch": 0.2273275206273041, "grad_norm": 1.1875, "learning_rate": 0.00019602563830008738, "loss": 3.617, "step": 4856 }, { "epoch": 0.22737433436713675, "grad_norm": 1.359375, "learning_rate": 0.00019602400882401684, "loss": 3.5156, "step": 4857 }, { "epoch": 0.2274211481069694, "grad_norm": 1.3828125, "learning_rate": 0.00019602237902075005, "loss": 3.2148, "step": 4858 }, { "epoch": 0.22746796184680204, "grad_norm": 1.703125, "learning_rate": 0.00019602074889029256, "loss": 3.5157, "step": 4859 }, { "epoch": 0.2275147755866347, "grad_norm": 1.3046875, "learning_rate": 0.0001960191184326499, "loss": 3.2574, "step": 4860 }, { "epoch": 0.2275615893264673, "grad_norm": 1.6875, "learning_rate": 0.00019601748764782765, "loss": 3.6702, "step": 4861 }, { "epoch": 0.22760840306629995, "grad_norm": 1.0859375, "learning_rate": 0.0001960158565358314, "loss": 5.1083, "step": 4862 }, { "epoch": 0.2276552168061326, "grad_norm": 1.3671875, "learning_rate": 0.00019601422509666662, "loss": 3.0479, "step": 4863 }, { "epoch": 0.22770203054596524, "grad_norm": 9.1875, "learning_rate": 0.00019601259333033895, "loss": 7.376, "step": 4864 }, { "epoch": 0.2277488442857979, "grad_norm": 1.6171875, "learning_rate": 0.0001960109612368539, "loss": 3.0229, "step": 4865 }, { "epoch": 0.2277956580256305, "grad_norm": 1.4609375, "learning_rate": 0.00019600932881621704, "loss": 3.6234, "step": 4866 }, { "epoch": 0.22784247176546316, "grad_norm": 1.4140625, "learning_rate": 0.00019600769606843395, "loss": 3.418, "step": 4867 }, { "epoch": 0.2278892855052958, "grad_norm": 1.46875, "learning_rate": 0.00019600606299351018, "loss": 3.6108, "step": 4868 }, { "epoch": 0.22793609924512845, "grad_norm": 1.90625, "learning_rate": 0.00019600442959145134, "loss": 3.8786, "step": 4869 }, { "epoch": 0.2279829129849611, "grad_norm": 1.8203125, "learning_rate": 0.00019600279586226292, "loss": 3.9713, "step": 4870 }, { "epoch": 0.2280297267247937, "grad_norm": 1.6484375, "learning_rate": 0.0001960011618059505, "loss": 3.6374, "step": 4871 }, { "epoch": 0.22807654046462636, "grad_norm": 1.3046875, "learning_rate": 0.0001959995274225197, "loss": 3.7341, "step": 4872 }, { "epoch": 0.228123354204459, "grad_norm": 1.640625, "learning_rate": 0.00019599789271197607, "loss": 3.7925, "step": 4873 }, { "epoch": 0.22817016794429165, "grad_norm": 1.328125, "learning_rate": 0.00019599625767432514, "loss": 3.0854, "step": 4874 }, { "epoch": 0.2282169816841243, "grad_norm": 1.3359375, "learning_rate": 0.00019599462230957256, "loss": 3.5504, "step": 4875 }, { "epoch": 0.22826379542395694, "grad_norm": 1.171875, "learning_rate": 0.00019599298661772377, "loss": 3.4201, "step": 4876 }, { "epoch": 0.22831060916378956, "grad_norm": 1.4140625, "learning_rate": 0.0001959913505987845, "loss": 3.7567, "step": 4877 }, { "epoch": 0.2283574229036222, "grad_norm": 1.2109375, "learning_rate": 0.00019598971425276022, "loss": 3.4854, "step": 4878 }, { "epoch": 0.22840423664345486, "grad_norm": 1.5703125, "learning_rate": 0.00019598807757965656, "loss": 2.9605, "step": 4879 }, { "epoch": 0.2284510503832875, "grad_norm": 1.390625, "learning_rate": 0.00019598644057947906, "loss": 3.2126, "step": 4880 }, { "epoch": 0.22849786412312015, "grad_norm": 1.5625, "learning_rate": 0.0001959848032522333, "loss": 3.9475, "step": 4881 }, { "epoch": 0.22854467786295277, "grad_norm": 1.2578125, "learning_rate": 0.00019598316559792488, "loss": 3.8355, "step": 4882 }, { "epoch": 0.2285914916027854, "grad_norm": 1.5546875, "learning_rate": 0.0001959815276165594, "loss": 3.9164, "step": 4883 }, { "epoch": 0.22863830534261806, "grad_norm": 1.34375, "learning_rate": 0.00019597988930814236, "loss": 3.6064, "step": 4884 }, { "epoch": 0.2286851190824507, "grad_norm": 1.171875, "learning_rate": 0.00019597825067267942, "loss": 3.3654, "step": 4885 }, { "epoch": 0.22873193282228335, "grad_norm": 1.5390625, "learning_rate": 0.00019597661171017613, "loss": 3.6814, "step": 4886 }, { "epoch": 0.22877874656211597, "grad_norm": 1.5703125, "learning_rate": 0.00019597497242063812, "loss": 3.4445, "step": 4887 }, { "epoch": 0.22882556030194862, "grad_norm": 1.5625, "learning_rate": 0.0001959733328040709, "loss": 3.6114, "step": 4888 }, { "epoch": 0.22887237404178126, "grad_norm": 1.140625, "learning_rate": 0.0001959716928604801, "loss": 3.343, "step": 4889 }, { "epoch": 0.2289191877816139, "grad_norm": 1.1953125, "learning_rate": 0.00019597005258987132, "loss": 3.1902, "step": 4890 }, { "epoch": 0.22896600152144655, "grad_norm": 1.328125, "learning_rate": 0.0001959684119922501, "loss": 3.2615, "step": 4891 }, { "epoch": 0.22901281526127917, "grad_norm": 1.40625, "learning_rate": 0.0001959667710676221, "loss": 3.5632, "step": 4892 }, { "epoch": 0.22905962900111182, "grad_norm": 1.515625, "learning_rate": 0.00019596512981599287, "loss": 3.5169, "step": 4893 }, { "epoch": 0.22910644274094447, "grad_norm": 0.9609375, "learning_rate": 0.00019596348823736802, "loss": 3.6818, "step": 4894 }, { "epoch": 0.2291532564807771, "grad_norm": 1.5234375, "learning_rate": 0.0001959618463317531, "loss": 3.8396, "step": 4895 }, { "epoch": 0.22920007022060976, "grad_norm": 1.1953125, "learning_rate": 0.00019596020409915374, "loss": 3.451, "step": 4896 }, { "epoch": 0.22924688396044238, "grad_norm": 1.2890625, "learning_rate": 0.00019595856153957554, "loss": 3.3718, "step": 4897 }, { "epoch": 0.22929369770027502, "grad_norm": 1.53125, "learning_rate": 0.00019595691865302407, "loss": 3.9417, "step": 4898 }, { "epoch": 0.22934051144010767, "grad_norm": 1.3125, "learning_rate": 0.000195955275439505, "loss": 3.4791, "step": 4899 }, { "epoch": 0.22938732517994032, "grad_norm": 1.3125, "learning_rate": 0.00019595363189902381, "loss": 3.1698, "step": 4900 }, { "epoch": 0.22943413891977296, "grad_norm": 1.4140625, "learning_rate": 0.0001959519880315862, "loss": 3.7392, "step": 4901 }, { "epoch": 0.22948095265960558, "grad_norm": 1.1015625, "learning_rate": 0.00019595034383719772, "loss": 3.1403, "step": 4902 }, { "epoch": 0.22952776639943823, "grad_norm": 1.6640625, "learning_rate": 0.00019594869931586402, "loss": 3.3477, "step": 4903 }, { "epoch": 0.22957458013927087, "grad_norm": 1.046875, "learning_rate": 0.00019594705446759062, "loss": 3.6738, "step": 4904 }, { "epoch": 0.22962139387910352, "grad_norm": 1.7109375, "learning_rate": 0.00019594540929238324, "loss": 3.5236, "step": 4905 }, { "epoch": 0.22966820761893617, "grad_norm": 1.171875, "learning_rate": 0.0001959437637902474, "loss": 3.4915, "step": 4906 }, { "epoch": 0.2297150213587688, "grad_norm": 1.046875, "learning_rate": 0.0001959421179611887, "loss": 3.5991, "step": 4907 }, { "epoch": 0.22976183509860143, "grad_norm": 1.3984375, "learning_rate": 0.00019594047180521282, "loss": 3.6298, "step": 4908 }, { "epoch": 0.22980864883843408, "grad_norm": 1.6796875, "learning_rate": 0.0001959388253223253, "loss": 3.4536, "step": 4909 }, { "epoch": 0.22985546257826672, "grad_norm": 1.2890625, "learning_rate": 0.00019593717851253183, "loss": 3.7074, "step": 4910 }, { "epoch": 0.22990227631809937, "grad_norm": 1.3984375, "learning_rate": 0.0001959355313758379, "loss": 3.8442, "step": 4911 }, { "epoch": 0.22994909005793202, "grad_norm": 1.203125, "learning_rate": 0.00019593388391224922, "loss": 3.1164, "step": 4912 }, { "epoch": 0.22999590379776463, "grad_norm": 1.1328125, "learning_rate": 0.0001959322361217714, "loss": 3.6743, "step": 4913 }, { "epoch": 0.23004271753759728, "grad_norm": 1.421875, "learning_rate": 0.00019593058800441, "loss": 3.4161, "step": 4914 }, { "epoch": 0.23008953127742993, "grad_norm": 1.5, "learning_rate": 0.00019592893956017066, "loss": 3.4582, "step": 4915 }, { "epoch": 0.23013634501726257, "grad_norm": 1.796875, "learning_rate": 0.00019592729078905902, "loss": 3.3804, "step": 4916 }, { "epoch": 0.23018315875709522, "grad_norm": 1.25, "learning_rate": 0.00019592564169108068, "loss": 3.5489, "step": 4917 }, { "epoch": 0.23022997249692784, "grad_norm": 0.91015625, "learning_rate": 0.00019592399226624126, "loss": 3.3022, "step": 4918 }, { "epoch": 0.23027678623676048, "grad_norm": 1.328125, "learning_rate": 0.0001959223425145464, "loss": 3.5113, "step": 4919 }, { "epoch": 0.23032359997659313, "grad_norm": 1.078125, "learning_rate": 0.00019592069243600167, "loss": 4.9279, "step": 4920 }, { "epoch": 0.23037041371642578, "grad_norm": 1.703125, "learning_rate": 0.00019591904203061273, "loss": 3.751, "step": 4921 }, { "epoch": 0.23041722745625842, "grad_norm": 1.6171875, "learning_rate": 0.00019591739129838523, "loss": 3.6251, "step": 4922 }, { "epoch": 0.23046404119609104, "grad_norm": 1.1875, "learning_rate": 0.00019591574023932474, "loss": 3.5153, "step": 4923 }, { "epoch": 0.2305108549359237, "grad_norm": 1.203125, "learning_rate": 0.0001959140888534369, "loss": 3.4346, "step": 4924 }, { "epoch": 0.23055766867575633, "grad_norm": 1.109375, "learning_rate": 0.00019591243714072736, "loss": 3.0772, "step": 4925 }, { "epoch": 0.23060448241558898, "grad_norm": 1.359375, "learning_rate": 0.00019591078510120173, "loss": 3.472, "step": 4926 }, { "epoch": 0.23065129615542163, "grad_norm": 1.203125, "learning_rate": 0.00019590913273486563, "loss": 3.6291, "step": 4927 }, { "epoch": 0.23069810989525424, "grad_norm": 1.2578125, "learning_rate": 0.00019590748004172473, "loss": 3.3533, "step": 4928 }, { "epoch": 0.2307449236350869, "grad_norm": 1.3046875, "learning_rate": 0.00019590582702178462, "loss": 3.5537, "step": 4929 }, { "epoch": 0.23079173737491954, "grad_norm": 1.296875, "learning_rate": 0.00019590417367505096, "loss": 3.2445, "step": 4930 }, { "epoch": 0.23083855111475218, "grad_norm": 1.2734375, "learning_rate": 0.00019590252000152938, "loss": 3.4784, "step": 4931 }, { "epoch": 0.23088536485458483, "grad_norm": 1.40625, "learning_rate": 0.00019590086600122547, "loss": 3.2709, "step": 4932 }, { "epoch": 0.23093217859441745, "grad_norm": 1.265625, "learning_rate": 0.00019589921167414493, "loss": 3.3623, "step": 4933 }, { "epoch": 0.2309789923342501, "grad_norm": 1.3359375, "learning_rate": 0.00019589755702029336, "loss": 3.7026, "step": 4934 }, { "epoch": 0.23102580607408274, "grad_norm": 0.99609375, "learning_rate": 0.0001958959020396764, "loss": 3.5184, "step": 4935 }, { "epoch": 0.2310726198139154, "grad_norm": 1.4296875, "learning_rate": 0.00019589424673229972, "loss": 3.4273, "step": 4936 }, { "epoch": 0.23111943355374803, "grad_norm": 1.2734375, "learning_rate": 0.00019589259109816892, "loss": 3.687, "step": 4937 }, { "epoch": 0.23116624729358068, "grad_norm": 1.453125, "learning_rate": 0.00019589093513728968, "loss": 3.4966, "step": 4938 }, { "epoch": 0.2312130610334133, "grad_norm": 1.0703125, "learning_rate": 0.0001958892788496676, "loss": 3.4703, "step": 4939 }, { "epoch": 0.23125987477324594, "grad_norm": 1.140625, "learning_rate": 0.00019588762223530834, "loss": 3.5337, "step": 4940 }, { "epoch": 0.2313066885130786, "grad_norm": 1.1953125, "learning_rate": 0.00019588596529421755, "loss": 3.4513, "step": 4941 }, { "epoch": 0.23135350225291124, "grad_norm": 1.296875, "learning_rate": 0.0001958843080264009, "loss": 3.4589, "step": 4942 }, { "epoch": 0.23140031599274388, "grad_norm": 1.171875, "learning_rate": 0.000195882650431864, "loss": 3.3295, "step": 4943 }, { "epoch": 0.2314471297325765, "grad_norm": 1.2109375, "learning_rate": 0.00019588099251061252, "loss": 4.7132, "step": 4944 }, { "epoch": 0.23149394347240915, "grad_norm": 1.6171875, "learning_rate": 0.00019587933426265205, "loss": 3.7376, "step": 4945 }, { "epoch": 0.2315407572122418, "grad_norm": 2.078125, "learning_rate": 0.00019587767568798837, "loss": 3.4966, "step": 4946 }, { "epoch": 0.23158757095207444, "grad_norm": 1.1171875, "learning_rate": 0.00019587601678662699, "loss": 3.2995, "step": 4947 }, { "epoch": 0.23163438469190709, "grad_norm": 1.3203125, "learning_rate": 0.00019587435755857367, "loss": 3.541, "step": 4948 }, { "epoch": 0.2316811984317397, "grad_norm": 1.3203125, "learning_rate": 0.000195872698003834, "loss": 3.5345, "step": 4949 }, { "epoch": 0.23172801217157235, "grad_norm": 1.3046875, "learning_rate": 0.00019587103812241364, "loss": 3.2013, "step": 4950 }, { "epoch": 0.231774825911405, "grad_norm": 1.21875, "learning_rate": 0.00019586937791431824, "loss": 3.5845, "step": 4951 }, { "epoch": 0.23182163965123764, "grad_norm": 1.2421875, "learning_rate": 0.0001958677173795535, "loss": 3.3615, "step": 4952 }, { "epoch": 0.2318684533910703, "grad_norm": 1.5078125, "learning_rate": 0.00019586605651812506, "loss": 4.0455, "step": 4953 }, { "epoch": 0.2319152671309029, "grad_norm": 1.2109375, "learning_rate": 0.00019586439533003858, "loss": 3.5546, "step": 4954 }, { "epoch": 0.23196208087073555, "grad_norm": 1.109375, "learning_rate": 0.00019586273381529972, "loss": 3.7452, "step": 4955 }, { "epoch": 0.2320088946105682, "grad_norm": 1.59375, "learning_rate": 0.0001958610719739141, "loss": 3.3989, "step": 4956 }, { "epoch": 0.23205570835040085, "grad_norm": 1.53125, "learning_rate": 0.00019585940980588746, "loss": 3.6787, "step": 4957 }, { "epoch": 0.2321025220902335, "grad_norm": 1.4296875, "learning_rate": 0.0001958577473112254, "loss": 2.9983, "step": 4958 }, { "epoch": 0.2321493358300661, "grad_norm": 1.265625, "learning_rate": 0.0001958560844899336, "loss": 3.4525, "step": 4959 }, { "epoch": 0.23219614956989876, "grad_norm": 1.2890625, "learning_rate": 0.00019585442134201775, "loss": 3.5423, "step": 4960 }, { "epoch": 0.2322429633097314, "grad_norm": 1.1015625, "learning_rate": 0.0001958527578674835, "loss": 2.6975, "step": 4961 }, { "epoch": 0.23228977704956405, "grad_norm": 1.5, "learning_rate": 0.0001958510940663365, "loss": 3.3386, "step": 4962 }, { "epoch": 0.2323365907893967, "grad_norm": 1.265625, "learning_rate": 0.00019584942993858243, "loss": 3.7237, "step": 4963 }, { "epoch": 0.23238340452922934, "grad_norm": 2.15625, "learning_rate": 0.00019584776548422697, "loss": 3.475, "step": 4964 }, { "epoch": 0.23243021826906196, "grad_norm": 1.484375, "learning_rate": 0.00019584610070327583, "loss": 3.3921, "step": 4965 }, { "epoch": 0.2324770320088946, "grad_norm": 1.484375, "learning_rate": 0.0001958444355957346, "loss": 3.4854, "step": 4966 }, { "epoch": 0.23252384574872725, "grad_norm": 1.3515625, "learning_rate": 0.00019584277016160902, "loss": 3.8376, "step": 4967 }, { "epoch": 0.2325706594885599, "grad_norm": 1.125, "learning_rate": 0.00019584110440090473, "loss": 3.8128, "step": 4968 }, { "epoch": 0.23261747322839255, "grad_norm": 1.265625, "learning_rate": 0.0001958394383136274, "loss": 3.7053, "step": 4969 }, { "epoch": 0.23266428696822516, "grad_norm": 1.0390625, "learning_rate": 0.00019583777189978276, "loss": 2.9906, "step": 4970 }, { "epoch": 0.2327111007080578, "grad_norm": 1.453125, "learning_rate": 0.00019583610515937645, "loss": 3.3421, "step": 4971 }, { "epoch": 0.23275791444789046, "grad_norm": 1.2265625, "learning_rate": 0.00019583443809241413, "loss": 3.1176, "step": 4972 }, { "epoch": 0.2328047281877231, "grad_norm": 1.4375, "learning_rate": 0.0001958327706989015, "loss": 3.2062, "step": 4973 }, { "epoch": 0.23285154192755575, "grad_norm": 1.2265625, "learning_rate": 0.00019583110297884427, "loss": 3.4593, "step": 4974 }, { "epoch": 0.23289835566738837, "grad_norm": 1.359375, "learning_rate": 0.00019582943493224808, "loss": 3.3866, "step": 4975 }, { "epoch": 0.23294516940722101, "grad_norm": 1.8125, "learning_rate": 0.0001958277665591186, "loss": 3.8448, "step": 4976 }, { "epoch": 0.23299198314705366, "grad_norm": 1.2109375, "learning_rate": 0.00019582609785946157, "loss": 3.2675, "step": 4977 }, { "epoch": 0.2330387968868863, "grad_norm": 1.625, "learning_rate": 0.00019582442883328267, "loss": 3.1737, "step": 4978 }, { "epoch": 0.23308561062671895, "grad_norm": 1.4140625, "learning_rate": 0.00019582275948058756, "loss": 3.6501, "step": 4979 }, { "epoch": 0.23313242436655157, "grad_norm": 1.640625, "learning_rate": 0.00019582108980138193, "loss": 3.7836, "step": 4980 }, { "epoch": 0.23317923810638422, "grad_norm": 1.484375, "learning_rate": 0.00019581941979567144, "loss": 3.2829, "step": 4981 }, { "epoch": 0.23322605184621686, "grad_norm": 1.453125, "learning_rate": 0.00019581774946346184, "loss": 5.1136, "step": 4982 }, { "epoch": 0.2332728655860495, "grad_norm": 1.1796875, "learning_rate": 0.0001958160788047588, "loss": 2.5518, "step": 4983 }, { "epoch": 0.23331967932588216, "grad_norm": 2.265625, "learning_rate": 0.00019581440781956804, "loss": 3.2582, "step": 4984 }, { "epoch": 0.23336649306571478, "grad_norm": 1.3359375, "learning_rate": 0.0001958127365078952, "loss": 3.7859, "step": 4985 }, { "epoch": 0.23341330680554742, "grad_norm": 1.46875, "learning_rate": 0.000195811064869746, "loss": 3.7373, "step": 4986 }, { "epoch": 0.23346012054538007, "grad_norm": 1.9453125, "learning_rate": 0.00019580939290512613, "loss": 3.7621, "step": 4987 }, { "epoch": 0.23350693428521271, "grad_norm": 1.171875, "learning_rate": 0.00019580772061404128, "loss": 3.681, "step": 4988 }, { "epoch": 0.23355374802504536, "grad_norm": 1.3984375, "learning_rate": 0.00019580604799649716, "loss": 3.5093, "step": 4989 }, { "epoch": 0.23360056176487798, "grad_norm": 1.2734375, "learning_rate": 0.0001958043750524995, "loss": 3.5347, "step": 4990 }, { "epoch": 0.23364737550471062, "grad_norm": 1.2890625, "learning_rate": 0.00019580270178205393, "loss": 3.0451, "step": 4991 }, { "epoch": 0.23369418924454327, "grad_norm": 1.5, "learning_rate": 0.00019580102818516623, "loss": 2.954, "step": 4992 }, { "epoch": 0.23374100298437592, "grad_norm": 1.421875, "learning_rate": 0.00019579935426184203, "loss": 3.7405, "step": 4993 }, { "epoch": 0.23378781672420856, "grad_norm": 1.2421875, "learning_rate": 0.00019579768001208708, "loss": 3.2802, "step": 4994 }, { "epoch": 0.2338346304640412, "grad_norm": 1.609375, "learning_rate": 0.0001957960054359071, "loss": 3.2322, "step": 4995 }, { "epoch": 0.23388144420387383, "grad_norm": 1.0078125, "learning_rate": 0.00019579433053330773, "loss": 3.4989, "step": 4996 }, { "epoch": 0.23392825794370647, "grad_norm": 1.1953125, "learning_rate": 0.00019579265530429474, "loss": 3.5169, "step": 4997 }, { "epoch": 0.23397507168353912, "grad_norm": 1.0703125, "learning_rate": 0.00019579097974887382, "loss": 3.9924, "step": 4998 }, { "epoch": 0.23402188542337177, "grad_norm": 1.5234375, "learning_rate": 0.00019578930386705064, "loss": 3.6712, "step": 4999 }, { "epoch": 0.2340686991632044, "grad_norm": 1.3515625, "learning_rate": 0.00019578762765883097, "loss": 3.5841, "step": 5000 }, { "epoch": 0.23411551290303703, "grad_norm": 1.5078125, "learning_rate": 0.0001957859511242205, "loss": 3.5303, "step": 5001 }, { "epoch": 0.23416232664286968, "grad_norm": 1.34375, "learning_rate": 0.0001957842742632249, "loss": 4.116, "step": 5002 }, { "epoch": 0.23420914038270232, "grad_norm": 1.1328125, "learning_rate": 0.00019578259707584995, "loss": 3.4394, "step": 5003 }, { "epoch": 0.23425595412253497, "grad_norm": 1.421875, "learning_rate": 0.00019578091956210135, "loss": 3.4041, "step": 5004 }, { "epoch": 0.23430276786236762, "grad_norm": 1.171875, "learning_rate": 0.00019577924172198478, "loss": 3.5726, "step": 5005 }, { "epoch": 0.23434958160220024, "grad_norm": 1.328125, "learning_rate": 0.000195777563555506, "loss": 3.0847, "step": 5006 }, { "epoch": 0.23439639534203288, "grad_norm": 1.265625, "learning_rate": 0.0001957758850626707, "loss": 3.9337, "step": 5007 }, { "epoch": 0.23444320908186553, "grad_norm": 1.1328125, "learning_rate": 0.0001957742062434846, "loss": 3.4334, "step": 5008 }, { "epoch": 0.23449002282169817, "grad_norm": 1.34375, "learning_rate": 0.00019577252709795343, "loss": 3.933, "step": 5009 }, { "epoch": 0.23453683656153082, "grad_norm": 1.3125, "learning_rate": 0.0001957708476260829, "loss": 3.7324, "step": 5010 }, { "epoch": 0.23458365030136344, "grad_norm": 1.2265625, "learning_rate": 0.00019576916782787875, "loss": 3.6606, "step": 5011 }, { "epoch": 0.23463046404119609, "grad_norm": 1.1328125, "learning_rate": 0.0001957674877033467, "loss": 3.9676, "step": 5012 }, { "epoch": 0.23467727778102873, "grad_norm": 1.046875, "learning_rate": 0.0001957658072524925, "loss": 3.2322, "step": 5013 }, { "epoch": 0.23472409152086138, "grad_norm": 1.515625, "learning_rate": 0.00019576412647532182, "loss": 3.5679, "step": 5014 }, { "epoch": 0.23477090526069402, "grad_norm": 1.25, "learning_rate": 0.00019576244537184041, "loss": 3.1226, "step": 5015 }, { "epoch": 0.23481771900052664, "grad_norm": 1.1796875, "learning_rate": 0.000195760763942054, "loss": 3.4675, "step": 5016 }, { "epoch": 0.2348645327403593, "grad_norm": 1.1328125, "learning_rate": 0.0001957590821859683, "loss": 3.595, "step": 5017 }, { "epoch": 0.23491134648019193, "grad_norm": 1.3984375, "learning_rate": 0.0001957574001035891, "loss": 3.4151, "step": 5018 }, { "epoch": 0.23495816022002458, "grad_norm": 1.9453125, "learning_rate": 0.0001957557176949221, "loss": 3.8576, "step": 5019 }, { "epoch": 0.23500497395985723, "grad_norm": 1.140625, "learning_rate": 0.00019575403495997304, "loss": 3.3482, "step": 5020 }, { "epoch": 0.23505178769968985, "grad_norm": 1.9921875, "learning_rate": 0.0001957523518987476, "loss": 3.2034, "step": 5021 }, { "epoch": 0.2350986014395225, "grad_norm": 1.6796875, "learning_rate": 0.00019575066851125158, "loss": 3.9638, "step": 5022 }, { "epoch": 0.23514541517935514, "grad_norm": 1.09375, "learning_rate": 0.00019574898479749066, "loss": 4.5517, "step": 5023 }, { "epoch": 0.23519222891918778, "grad_norm": 1.390625, "learning_rate": 0.00019574730075747065, "loss": 3.1151, "step": 5024 }, { "epoch": 0.23523904265902043, "grad_norm": 1.515625, "learning_rate": 0.00019574561639119722, "loss": 3.7001, "step": 5025 }, { "epoch": 0.23528585639885308, "grad_norm": 1.28125, "learning_rate": 0.00019574393169867615, "loss": 3.7516, "step": 5026 }, { "epoch": 0.2353326701386857, "grad_norm": 1.3203125, "learning_rate": 0.00019574224667991317, "loss": 3.8144, "step": 5027 }, { "epoch": 0.23537948387851834, "grad_norm": 1.3046875, "learning_rate": 0.000195740561334914, "loss": 3.396, "step": 5028 }, { "epoch": 0.235426297618351, "grad_norm": 1.171875, "learning_rate": 0.00019573887566368443, "loss": 3.1444, "step": 5029 }, { "epoch": 0.23547311135818363, "grad_norm": 1.3125, "learning_rate": 0.00019573718966623015, "loss": 3.3915, "step": 5030 }, { "epoch": 0.23551992509801628, "grad_norm": 1.359375, "learning_rate": 0.00019573550334255692, "loss": 3.1492, "step": 5031 }, { "epoch": 0.2355667388378489, "grad_norm": 1.359375, "learning_rate": 0.0001957338166926705, "loss": 3.5082, "step": 5032 }, { "epoch": 0.23561355257768155, "grad_norm": 1.5078125, "learning_rate": 0.00019573212971657668, "loss": 3.9296, "step": 5033 }, { "epoch": 0.2356603663175142, "grad_norm": 1.7265625, "learning_rate": 0.00019573044241428112, "loss": 3.7672, "step": 5034 }, { "epoch": 0.23570718005734684, "grad_norm": 1.2421875, "learning_rate": 0.0001957287547857896, "loss": 3.4903, "step": 5035 }, { "epoch": 0.23575399379717948, "grad_norm": 1.21875, "learning_rate": 0.0001957270668311079, "loss": 3.8567, "step": 5036 }, { "epoch": 0.2358008075370121, "grad_norm": 1.3046875, "learning_rate": 0.00019572537855024174, "loss": 3.3204, "step": 5037 }, { "epoch": 0.23584762127684475, "grad_norm": 1.2421875, "learning_rate": 0.00019572368994319692, "loss": 3.4986, "step": 5038 }, { "epoch": 0.2358944350166774, "grad_norm": 1.1484375, "learning_rate": 0.00019572200100997914, "loss": 4.0083, "step": 5039 }, { "epoch": 0.23594124875651004, "grad_norm": 1.171875, "learning_rate": 0.00019572031175059417, "loss": 3.2071, "step": 5040 }, { "epoch": 0.2359880624963427, "grad_norm": 1.2890625, "learning_rate": 0.00019571862216504778, "loss": 3.0993, "step": 5041 }, { "epoch": 0.2360348762361753, "grad_norm": 1.171875, "learning_rate": 0.0001957169322533457, "loss": 3.4759, "step": 5042 }, { "epoch": 0.23608168997600795, "grad_norm": 1.5703125, "learning_rate": 0.00019571524201549375, "loss": 3.7718, "step": 5043 }, { "epoch": 0.2361285037158406, "grad_norm": 1.2109375, "learning_rate": 0.00019571355145149759, "loss": 3.4513, "step": 5044 }, { "epoch": 0.23617531745567324, "grad_norm": 1.5, "learning_rate": 0.00019571186056136305, "loss": 3.3226, "step": 5045 }, { "epoch": 0.2362221311955059, "grad_norm": 1.8046875, "learning_rate": 0.0001957101693450959, "loss": 3.9977, "step": 5046 }, { "epoch": 0.2362689449353385, "grad_norm": 1.3515625, "learning_rate": 0.00019570847780270184, "loss": 3.4625, "step": 5047 }, { "epoch": 0.23631575867517116, "grad_norm": 1.109375, "learning_rate": 0.0001957067859341867, "loss": 3.2698, "step": 5048 }, { "epoch": 0.2363625724150038, "grad_norm": 1.375, "learning_rate": 0.00019570509373955622, "loss": 3.4068, "step": 5049 }, { "epoch": 0.23640938615483645, "grad_norm": 1.2890625, "learning_rate": 0.00019570340121881618, "loss": 3.8505, "step": 5050 }, { "epoch": 0.2364561998946691, "grad_norm": 1.4765625, "learning_rate": 0.00019570170837197233, "loss": 3.7873, "step": 5051 }, { "epoch": 0.2365030136345017, "grad_norm": 1.4921875, "learning_rate": 0.00019570001519903042, "loss": 3.7327, "step": 5052 }, { "epoch": 0.23654982737433436, "grad_norm": 1.546875, "learning_rate": 0.00019569832169999624, "loss": 3.81, "step": 5053 }, { "epoch": 0.236596641114167, "grad_norm": 1.40625, "learning_rate": 0.00019569662787487558, "loss": 3.7076, "step": 5054 }, { "epoch": 0.23664345485399965, "grad_norm": 1.34375, "learning_rate": 0.00019569493372367417, "loss": 3.3604, "step": 5055 }, { "epoch": 0.2366902685938323, "grad_norm": 1.5234375, "learning_rate": 0.00019569323924639782, "loss": 3.5263, "step": 5056 }, { "epoch": 0.23673708233366494, "grad_norm": 1.6875, "learning_rate": 0.00019569154444305225, "loss": 3.9562, "step": 5057 }, { "epoch": 0.23678389607349756, "grad_norm": 1.265625, "learning_rate": 0.00019568984931364334, "loss": 3.1071, "step": 5058 }, { "epoch": 0.2368307098133302, "grad_norm": 1.296875, "learning_rate": 0.00019568815385817678, "loss": 3.2951, "step": 5059 }, { "epoch": 0.23687752355316286, "grad_norm": 1.734375, "learning_rate": 0.00019568645807665832, "loss": 3.1557, "step": 5060 }, { "epoch": 0.2369243372929955, "grad_norm": 1.359375, "learning_rate": 0.00019568476196909384, "loss": 3.3836, "step": 5061 }, { "epoch": 0.23697115103282815, "grad_norm": 1.375, "learning_rate": 0.00019568306553548905, "loss": 3.3945, "step": 5062 }, { "epoch": 0.23701796477266077, "grad_norm": 1.2265625, "learning_rate": 0.00019568136877584974, "loss": 3.6233, "step": 5063 }, { "epoch": 0.2370647785124934, "grad_norm": 1.5, "learning_rate": 0.0001956796716901817, "loss": 3.1141, "step": 5064 }, { "epoch": 0.23711159225232606, "grad_norm": 1.28125, "learning_rate": 0.00019567797427849068, "loss": 3.3208, "step": 5065 }, { "epoch": 0.2371584059921587, "grad_norm": 1.015625, "learning_rate": 0.0001956762765407825, "loss": 3.5329, "step": 5066 }, { "epoch": 0.23720521973199135, "grad_norm": 1.296875, "learning_rate": 0.00019567457847706297, "loss": 3.4422, "step": 5067 }, { "epoch": 0.23725203347182397, "grad_norm": 1.1640625, "learning_rate": 0.00019567288008733784, "loss": 3.0631, "step": 5068 }, { "epoch": 0.23729884721165662, "grad_norm": 1.4453125, "learning_rate": 0.0001956711813716129, "loss": 3.5038, "step": 5069 }, { "epoch": 0.23734566095148926, "grad_norm": 1.09375, "learning_rate": 0.0001956694823298939, "loss": 3.1573, "step": 5070 }, { "epoch": 0.2373924746913219, "grad_norm": 1.2578125, "learning_rate": 0.0001956677829621867, "loss": 3.3919, "step": 5071 }, { "epoch": 0.23743928843115455, "grad_norm": 1.578125, "learning_rate": 0.00019566608326849707, "loss": 3.4688, "step": 5072 }, { "epoch": 0.23748610217098717, "grad_norm": 1.21875, "learning_rate": 0.00019566438324883077, "loss": 3.4912, "step": 5073 }, { "epoch": 0.23753291591081982, "grad_norm": 1.1796875, "learning_rate": 0.00019566268290319364, "loss": 3.7669, "step": 5074 }, { "epoch": 0.23757972965065247, "grad_norm": 1.7109375, "learning_rate": 0.00019566098223159143, "loss": 3.4618, "step": 5075 }, { "epoch": 0.2376265433904851, "grad_norm": 1.3125, "learning_rate": 0.00019565928123402995, "loss": 4.0292, "step": 5076 }, { "epoch": 0.23767335713031776, "grad_norm": 1.1640625, "learning_rate": 0.00019565757991051502, "loss": 3.3387, "step": 5077 }, { "epoch": 0.23772017087015038, "grad_norm": 1.4453125, "learning_rate": 0.00019565587826105238, "loss": 3.3898, "step": 5078 }, { "epoch": 0.23776698460998302, "grad_norm": 1.078125, "learning_rate": 0.00019565417628564786, "loss": 3.582, "step": 5079 }, { "epoch": 0.23781379834981567, "grad_norm": 1.328125, "learning_rate": 0.0001956524739843073, "loss": 3.2652, "step": 5080 }, { "epoch": 0.23786061208964832, "grad_norm": 1.265625, "learning_rate": 0.00019565077135703644, "loss": 3.3162, "step": 5081 }, { "epoch": 0.23790742582948096, "grad_norm": 1.4765625, "learning_rate": 0.00019564906840384113, "loss": 3.2648, "step": 5082 }, { "epoch": 0.23795423956931358, "grad_norm": 1.3828125, "learning_rate": 0.00019564736512472713, "loss": 3.4136, "step": 5083 }, { "epoch": 0.23800105330914623, "grad_norm": 1.2265625, "learning_rate": 0.00019564566151970025, "loss": 3.603, "step": 5084 }, { "epoch": 0.23804786704897887, "grad_norm": 1.3046875, "learning_rate": 0.00019564395758876632, "loss": 3.442, "step": 5085 }, { "epoch": 0.23809468078881152, "grad_norm": 1.5625, "learning_rate": 0.00019564225333193114, "loss": 3.1656, "step": 5086 }, { "epoch": 0.23814149452864417, "grad_norm": 1.3359375, "learning_rate": 0.0001956405487492005, "loss": 3.7735, "step": 5087 }, { "epoch": 0.2381883082684768, "grad_norm": 1.2109375, "learning_rate": 0.00019563884384058022, "loss": 3.5286, "step": 5088 }, { "epoch": 0.23823512200830943, "grad_norm": 1.3203125, "learning_rate": 0.0001956371386060761, "loss": 3.793, "step": 5089 }, { "epoch": 0.23828193574814208, "grad_norm": 1.5078125, "learning_rate": 0.000195635433045694, "loss": 3.8796, "step": 5090 }, { "epoch": 0.23832874948797472, "grad_norm": 1.1015625, "learning_rate": 0.00019563372715943964, "loss": 3.2144, "step": 5091 }, { "epoch": 0.23837556322780737, "grad_norm": 1.2890625, "learning_rate": 0.00019563202094731888, "loss": 3.345, "step": 5092 }, { "epoch": 0.23842237696764002, "grad_norm": 1.296875, "learning_rate": 0.00019563031440933756, "loss": 3.2127, "step": 5093 }, { "epoch": 0.23846919070747263, "grad_norm": 1.2421875, "learning_rate": 0.0001956286075455015, "loss": 2.4787, "step": 5094 }, { "epoch": 0.23851600444730528, "grad_norm": 1.1328125, "learning_rate": 0.00019562690035581645, "loss": 3.4294, "step": 5095 }, { "epoch": 0.23856281818713793, "grad_norm": 1.1171875, "learning_rate": 0.00019562519284028825, "loss": 3.4684, "step": 5096 }, { "epoch": 0.23860963192697057, "grad_norm": 1.2578125, "learning_rate": 0.00019562348499892277, "loss": 3.6356, "step": 5097 }, { "epoch": 0.23865644566680322, "grad_norm": 1.140625, "learning_rate": 0.00019562177683172578, "loss": 2.8542, "step": 5098 }, { "epoch": 0.23870325940663584, "grad_norm": 2.15625, "learning_rate": 0.0001956200683387031, "loss": 3.2659, "step": 5099 }, { "epoch": 0.23875007314646848, "grad_norm": 1.140625, "learning_rate": 0.00019561835951986057, "loss": 3.062, "step": 5100 }, { "epoch": 0.23879688688630113, "grad_norm": 1.9453125, "learning_rate": 0.00019561665037520401, "loss": 3.3927, "step": 5101 }, { "epoch": 0.23884370062613378, "grad_norm": 1.28125, "learning_rate": 0.00019561494090473925, "loss": 3.2681, "step": 5102 }, { "epoch": 0.23889051436596642, "grad_norm": 1.75, "learning_rate": 0.0001956132311084721, "loss": 3.7145, "step": 5103 }, { "epoch": 0.23893732810579904, "grad_norm": 1.328125, "learning_rate": 0.0001956115209864084, "loss": 3.3392, "step": 5104 }, { "epoch": 0.2389841418456317, "grad_norm": 1.4375, "learning_rate": 0.00019560981053855394, "loss": 3.8443, "step": 5105 }, { "epoch": 0.23903095558546433, "grad_norm": 1.1484375, "learning_rate": 0.00019560809976491462, "loss": 3.292, "step": 5106 }, { "epoch": 0.23907776932529698, "grad_norm": 1.109375, "learning_rate": 0.0001956063886654962, "loss": 3.3458, "step": 5107 }, { "epoch": 0.23912458306512963, "grad_norm": 1.1953125, "learning_rate": 0.00019560467724030452, "loss": 3.3752, "step": 5108 }, { "epoch": 0.23917139680496224, "grad_norm": 1.4375, "learning_rate": 0.00019560296548934542, "loss": 3.665, "step": 5109 }, { "epoch": 0.2392182105447949, "grad_norm": 1.453125, "learning_rate": 0.0001956012534126248, "loss": 2.9855, "step": 5110 }, { "epoch": 0.23926502428462754, "grad_norm": 1.1875, "learning_rate": 0.00019559954101014838, "loss": 3.4063, "step": 5111 }, { "epoch": 0.23931183802446018, "grad_norm": 1.1640625, "learning_rate": 0.00019559782828192207, "loss": 2.6083, "step": 5112 }, { "epoch": 0.23935865176429283, "grad_norm": 1.4921875, "learning_rate": 0.0001955961152279517, "loss": 3.5704, "step": 5113 }, { "epoch": 0.23940546550412545, "grad_norm": 1.515625, "learning_rate": 0.00019559440184824307, "loss": 3.7357, "step": 5114 }, { "epoch": 0.2394522792439581, "grad_norm": 1.3046875, "learning_rate": 0.00019559268814280207, "loss": 3.6509, "step": 5115 }, { "epoch": 0.23949909298379074, "grad_norm": 1.171875, "learning_rate": 0.0001955909741116345, "loss": 2.9951, "step": 5116 }, { "epoch": 0.2395459067236234, "grad_norm": 1.296875, "learning_rate": 0.00019558925975474617, "loss": 3.7215, "step": 5117 }, { "epoch": 0.23959272046345603, "grad_norm": 1.2421875, "learning_rate": 0.00019558754507214302, "loss": 2.9938, "step": 5118 }, { "epoch": 0.23963953420328868, "grad_norm": 1.3984375, "learning_rate": 0.0001955858300638308, "loss": 3.5075, "step": 5119 }, { "epoch": 0.2396863479431213, "grad_norm": 1.6015625, "learning_rate": 0.0001955841147298154, "loss": 3.4085, "step": 5120 }, { "epoch": 0.23973316168295394, "grad_norm": 1.234375, "learning_rate": 0.00019558239907010267, "loss": 3.9421, "step": 5121 }, { "epoch": 0.2397799754227866, "grad_norm": 1.0625, "learning_rate": 0.00019558068308469842, "loss": 3.0168, "step": 5122 }, { "epoch": 0.23982678916261924, "grad_norm": 1.0625, "learning_rate": 0.0001955789667736085, "loss": 3.4002, "step": 5123 }, { "epoch": 0.23987360290245188, "grad_norm": 1.328125, "learning_rate": 0.00019557725013683884, "loss": 3.2332, "step": 5124 }, { "epoch": 0.2399204166422845, "grad_norm": 1.140625, "learning_rate": 0.00019557553317439515, "loss": 3.3313, "step": 5125 }, { "epoch": 0.23996723038211715, "grad_norm": 1.6015625, "learning_rate": 0.00019557381588628339, "loss": 3.433, "step": 5126 }, { "epoch": 0.2400140441219498, "grad_norm": 1.5234375, "learning_rate": 0.00019557209827250938, "loss": 3.2796, "step": 5127 }, { "epoch": 0.24006085786178244, "grad_norm": 1.296875, "learning_rate": 0.00019557038033307897, "loss": 3.1088, "step": 5128 }, { "epoch": 0.24010767160161509, "grad_norm": 1.2265625, "learning_rate": 0.000195568662067998, "loss": 3.7571, "step": 5129 }, { "epoch": 0.2401544853414477, "grad_norm": 1.40625, "learning_rate": 0.00019556694347727235, "loss": 3.7444, "step": 5130 }, { "epoch": 0.24020129908128035, "grad_norm": 1.109375, "learning_rate": 0.00019556522456090788, "loss": 3.2794, "step": 5131 }, { "epoch": 0.240248112821113, "grad_norm": 1.328125, "learning_rate": 0.00019556350531891038, "loss": 3.4082, "step": 5132 }, { "epoch": 0.24029492656094564, "grad_norm": 1.4296875, "learning_rate": 0.00019556178575128578, "loss": 3.821, "step": 5133 }, { "epoch": 0.2403417403007783, "grad_norm": 1.359375, "learning_rate": 0.00019556006585803995, "loss": 3.3336, "step": 5134 }, { "epoch": 0.2403885540406109, "grad_norm": 0.94921875, "learning_rate": 0.0001955583456391787, "loss": 3.8533, "step": 5135 }, { "epoch": 0.24043536778044355, "grad_norm": 2.703125, "learning_rate": 0.0001955566250947079, "loss": 4.5317, "step": 5136 }, { "epoch": 0.2404821815202762, "grad_norm": 1.375, "learning_rate": 0.00019555490422463345, "loss": 3.3393, "step": 5137 }, { "epoch": 0.24052899526010885, "grad_norm": 1.21875, "learning_rate": 0.00019555318302896116, "loss": 3.7704, "step": 5138 }, { "epoch": 0.2405758089999415, "grad_norm": 1.578125, "learning_rate": 0.00019555146150769694, "loss": 3.4481, "step": 5139 }, { "epoch": 0.2406226227397741, "grad_norm": 1.2734375, "learning_rate": 0.00019554973966084662, "loss": 3.6644, "step": 5140 }, { "epoch": 0.24066943647960676, "grad_norm": 1.5078125, "learning_rate": 0.0001955480174884161, "loss": 3.4383, "step": 5141 }, { "epoch": 0.2407162502194394, "grad_norm": 1.0234375, "learning_rate": 0.00019554629499041122, "loss": 3.309, "step": 5142 }, { "epoch": 0.24076306395927205, "grad_norm": 1.3125, "learning_rate": 0.00019554457216683786, "loss": 3.5967, "step": 5143 }, { "epoch": 0.2408098776991047, "grad_norm": 1.2578125, "learning_rate": 0.00019554284901770192, "loss": 3.8378, "step": 5144 }, { "epoch": 0.24085669143893731, "grad_norm": 1.6484375, "learning_rate": 0.00019554112554300923, "loss": 4.2968, "step": 5145 }, { "epoch": 0.24090350517876996, "grad_norm": 1.75, "learning_rate": 0.00019553940174276568, "loss": 3.9422, "step": 5146 }, { "epoch": 0.2409503189186026, "grad_norm": 1.171875, "learning_rate": 0.00019553767761697715, "loss": 3.3458, "step": 5147 }, { "epoch": 0.24099713265843525, "grad_norm": 1.2578125, "learning_rate": 0.00019553595316564948, "loss": 3.3513, "step": 5148 }, { "epoch": 0.2410439463982679, "grad_norm": 1.1640625, "learning_rate": 0.00019553422838878857, "loss": 3.5493, "step": 5149 }, { "epoch": 0.24109076013810055, "grad_norm": 1.2265625, "learning_rate": 0.00019553250328640035, "loss": 3.2721, "step": 5150 }, { "epoch": 0.24113757387793316, "grad_norm": 1.015625, "learning_rate": 0.00019553077785849058, "loss": 3.6945, "step": 5151 }, { "epoch": 0.2411843876177658, "grad_norm": 1.1953125, "learning_rate": 0.00019552905210506522, "loss": 3.2757, "step": 5152 }, { "epoch": 0.24123120135759846, "grad_norm": 1.78125, "learning_rate": 0.00019552732602613017, "loss": 3.3244, "step": 5153 }, { "epoch": 0.2412780150974311, "grad_norm": 1.2265625, "learning_rate": 0.00019552559962169127, "loss": 3.3091, "step": 5154 }, { "epoch": 0.24132482883726375, "grad_norm": 1.875, "learning_rate": 0.0001955238728917544, "loss": 3.8046, "step": 5155 }, { "epoch": 0.24137164257709637, "grad_norm": 1.4609375, "learning_rate": 0.00019552214583632544, "loss": 3.8875, "step": 5156 }, { "epoch": 0.24141845631692901, "grad_norm": 1.2109375, "learning_rate": 0.00019552041845541035, "loss": 3.7061, "step": 5157 }, { "epoch": 0.24146527005676166, "grad_norm": 1.203125, "learning_rate": 0.00019551869074901487, "loss": 3.1835, "step": 5158 }, { "epoch": 0.2415120837965943, "grad_norm": 1.5703125, "learning_rate": 0.00019551696271714503, "loss": 3.6424, "step": 5159 }, { "epoch": 0.24155889753642695, "grad_norm": 1.3046875, "learning_rate": 0.00019551523435980665, "loss": 3.4789, "step": 5160 }, { "epoch": 0.24160571127625957, "grad_norm": 1.4765625, "learning_rate": 0.0001955135056770056, "loss": 3.3488, "step": 5161 }, { "epoch": 0.24165252501609222, "grad_norm": 1.703125, "learning_rate": 0.00019551177666874783, "loss": 3.8054, "step": 5162 }, { "epoch": 0.24169933875592486, "grad_norm": 1.6328125, "learning_rate": 0.0001955100473350392, "loss": 3.8032, "step": 5163 }, { "epoch": 0.2417461524957575, "grad_norm": 1.203125, "learning_rate": 0.00019550831767588562, "loss": 3.5617, "step": 5164 }, { "epoch": 0.24179296623559016, "grad_norm": 1.6171875, "learning_rate": 0.00019550658769129295, "loss": 3.4645, "step": 5165 }, { "epoch": 0.24183977997542278, "grad_norm": 1.46875, "learning_rate": 0.0001955048573812671, "loss": 3.4105, "step": 5166 }, { "epoch": 0.24188659371525542, "grad_norm": 1.359375, "learning_rate": 0.00019550312674581396, "loss": 3.6337, "step": 5167 }, { "epoch": 0.24193340745508807, "grad_norm": 1.109375, "learning_rate": 0.00019550139578493946, "loss": 3.3948, "step": 5168 }, { "epoch": 0.2419802211949207, "grad_norm": 1.171875, "learning_rate": 0.00019549966449864946, "loss": 2.9057, "step": 5169 }, { "epoch": 0.24202703493475336, "grad_norm": 1.1171875, "learning_rate": 0.0001954979328869499, "loss": 3.1992, "step": 5170 }, { "epoch": 0.24207384867458598, "grad_norm": 1.125, "learning_rate": 0.0001954962009498466, "loss": 3.5096, "step": 5171 }, { "epoch": 0.24212066241441862, "grad_norm": 1.109375, "learning_rate": 0.00019549446868734558, "loss": 3.1907, "step": 5172 }, { "epoch": 0.24216747615425127, "grad_norm": 1.2265625, "learning_rate": 0.00019549273609945262, "loss": 3.6538, "step": 5173 }, { "epoch": 0.24221428989408392, "grad_norm": 1.3671875, "learning_rate": 0.0001954910031861737, "loss": 3.5982, "step": 5174 }, { "epoch": 0.24226110363391656, "grad_norm": 1.078125, "learning_rate": 0.00019548926994751472, "loss": 3.2606, "step": 5175 }, { "epoch": 0.24230791737374918, "grad_norm": 1.890625, "learning_rate": 0.00019548753638348157, "loss": 5.1697, "step": 5176 }, { "epoch": 0.24235473111358183, "grad_norm": 1.2890625, "learning_rate": 0.00019548580249408016, "loss": 3.5879, "step": 5177 }, { "epoch": 0.24240154485341447, "grad_norm": 1.0703125, "learning_rate": 0.0001954840682793164, "loss": 3.596, "step": 5178 }, { "epoch": 0.24244835859324712, "grad_norm": 1.2421875, "learning_rate": 0.0001954823337391962, "loss": 3.5962, "step": 5179 }, { "epoch": 0.24249517233307977, "grad_norm": 1.4453125, "learning_rate": 0.00019548059887372542, "loss": 3.7754, "step": 5180 }, { "epoch": 0.2425419860729124, "grad_norm": 1.421875, "learning_rate": 0.00019547886368291004, "loss": 3.687, "step": 5181 }, { "epoch": 0.24258879981274503, "grad_norm": 1.1484375, "learning_rate": 0.00019547712816675596, "loss": 3.7452, "step": 5182 }, { "epoch": 0.24263561355257768, "grad_norm": 1.6328125, "learning_rate": 0.0001954753923252691, "loss": 3.4939, "step": 5183 }, { "epoch": 0.24268242729241032, "grad_norm": 1.203125, "learning_rate": 0.00019547365615845533, "loss": 3.2767, "step": 5184 }, { "epoch": 0.24272924103224297, "grad_norm": 0.87109375, "learning_rate": 0.00019547191966632062, "loss": 5.0516, "step": 5185 }, { "epoch": 0.24277605477207562, "grad_norm": 1.09375, "learning_rate": 0.00019547018284887083, "loss": 3.3912, "step": 5186 }, { "epoch": 0.24282286851190824, "grad_norm": 1.6484375, "learning_rate": 0.00019546844570611196, "loss": 3.9124, "step": 5187 }, { "epoch": 0.24286968225174088, "grad_norm": 1.25, "learning_rate": 0.00019546670823804987, "loss": 3.3617, "step": 5188 }, { "epoch": 0.24291649599157353, "grad_norm": 1.546875, "learning_rate": 0.00019546497044469047, "loss": 3.5924, "step": 5189 }, { "epoch": 0.24296330973140617, "grad_norm": 1.5390625, "learning_rate": 0.0001954632323260397, "loss": 3.5219, "step": 5190 }, { "epoch": 0.24301012347123882, "grad_norm": 1.2421875, "learning_rate": 0.0001954614938821035, "loss": 3.0449, "step": 5191 }, { "epoch": 0.24305693721107144, "grad_norm": 1.5703125, "learning_rate": 0.00019545975511288773, "loss": 3.5404, "step": 5192 }, { "epoch": 0.24310375095090409, "grad_norm": 2.640625, "learning_rate": 0.0001954580160183984, "loss": 4.2195, "step": 5193 }, { "epoch": 0.24315056469073673, "grad_norm": 1.515625, "learning_rate": 0.00019545627659864142, "loss": 3.6163, "step": 5194 }, { "epoch": 0.24319737843056938, "grad_norm": 1.0625, "learning_rate": 0.00019545453685362266, "loss": 3.251, "step": 5195 }, { "epoch": 0.24324419217040202, "grad_norm": 1.5390625, "learning_rate": 0.0001954527967833481, "loss": 3.6663, "step": 5196 }, { "epoch": 0.24329100591023464, "grad_norm": 1.5703125, "learning_rate": 0.00019545105638782366, "loss": 3.3706, "step": 5197 }, { "epoch": 0.2433378196500673, "grad_norm": 1.4140625, "learning_rate": 0.00019544931566705523, "loss": 3.5487, "step": 5198 }, { "epoch": 0.24338463338989993, "grad_norm": 1.296875, "learning_rate": 0.0001954475746210488, "loss": 3.3832, "step": 5199 }, { "epoch": 0.24343144712973258, "grad_norm": 2.390625, "learning_rate": 0.00019544583324981028, "loss": 3.7094, "step": 5200 }, { "epoch": 0.24347826086956523, "grad_norm": 1.5078125, "learning_rate": 0.00019544409155334557, "loss": 3.745, "step": 5201 }, { "epoch": 0.24352507460939785, "grad_norm": 2.265625, "learning_rate": 0.00019544234953166068, "loss": 3.5247, "step": 5202 }, { "epoch": 0.2435718883492305, "grad_norm": 1.703125, "learning_rate": 0.00019544060718476147, "loss": 3.923, "step": 5203 }, { "epoch": 0.24361870208906314, "grad_norm": 1.5859375, "learning_rate": 0.0001954388645126539, "loss": 3.0383, "step": 5204 }, { "epoch": 0.24366551582889578, "grad_norm": 1.3203125, "learning_rate": 0.00019543712151534395, "loss": 3.6711, "step": 5205 }, { "epoch": 0.24371232956872843, "grad_norm": 1.1328125, "learning_rate": 0.0001954353781928375, "loss": 3.4148, "step": 5206 }, { "epoch": 0.24375914330856105, "grad_norm": 1.15625, "learning_rate": 0.0001954336345451405, "loss": 3.3529, "step": 5207 }, { "epoch": 0.2438059570483937, "grad_norm": 1.5234375, "learning_rate": 0.00019543189057225896, "loss": 3.6543, "step": 5208 }, { "epoch": 0.24385277078822634, "grad_norm": 1.234375, "learning_rate": 0.00019543014627419872, "loss": 3.5774, "step": 5209 }, { "epoch": 0.243899584528059, "grad_norm": 1.265625, "learning_rate": 0.0001954284016509658, "loss": 3.5475, "step": 5210 }, { "epoch": 0.24394639826789163, "grad_norm": 1.3828125, "learning_rate": 0.00019542665670256611, "loss": 3.6508, "step": 5211 }, { "epoch": 0.24399321200772428, "grad_norm": 1.4453125, "learning_rate": 0.0001954249114290056, "loss": 3.5303, "step": 5212 }, { "epoch": 0.2440400257475569, "grad_norm": 1.28125, "learning_rate": 0.00019542316583029022, "loss": 3.3075, "step": 5213 }, { "epoch": 0.24408683948738955, "grad_norm": 1.453125, "learning_rate": 0.0001954214199064259, "loss": 3.4378, "step": 5214 }, { "epoch": 0.2441336532272222, "grad_norm": 1.1875, "learning_rate": 0.00019541967365741862, "loss": 3.2856, "step": 5215 }, { "epoch": 0.24418046696705484, "grad_norm": 1.078125, "learning_rate": 0.00019541792708327433, "loss": 2.9524, "step": 5216 }, { "epoch": 0.24422728070688748, "grad_norm": 1.4375, "learning_rate": 0.00019541618018399897, "loss": 3.5774, "step": 5217 }, { "epoch": 0.2442740944467201, "grad_norm": 2.046875, "learning_rate": 0.00019541443295959847, "loss": 3.6481, "step": 5218 }, { "epoch": 0.24432090818655275, "grad_norm": 1.203125, "learning_rate": 0.00019541268541007885, "loss": 3.7, "step": 5219 }, { "epoch": 0.2443677219263854, "grad_norm": 1.9375, "learning_rate": 0.00019541093753544598, "loss": 3.4614, "step": 5220 }, { "epoch": 0.24441453566621804, "grad_norm": 1.453125, "learning_rate": 0.00019540918933570587, "loss": 3.3697, "step": 5221 }, { "epoch": 0.2444613494060507, "grad_norm": 1.3046875, "learning_rate": 0.00019540744081086444, "loss": 3.2953, "step": 5222 }, { "epoch": 0.2445081631458833, "grad_norm": 1.4453125, "learning_rate": 0.0001954056919609277, "loss": 3.5245, "step": 5223 }, { "epoch": 0.24455497688571595, "grad_norm": 1.3828125, "learning_rate": 0.00019540394278590154, "loss": 3.4404, "step": 5224 }, { "epoch": 0.2446017906255486, "grad_norm": 1.6640625, "learning_rate": 0.00019540219328579198, "loss": 3.3756, "step": 5225 }, { "epoch": 0.24464860436538124, "grad_norm": 1.3046875, "learning_rate": 0.00019540044346060497, "loss": 3.716, "step": 5226 }, { "epoch": 0.2446954181052139, "grad_norm": 1.171875, "learning_rate": 0.00019539869331034647, "loss": 3.3945, "step": 5227 }, { "epoch": 0.2447422318450465, "grad_norm": 1.4921875, "learning_rate": 0.0001953969428350224, "loss": 3.1025, "step": 5228 }, { "epoch": 0.24478904558487916, "grad_norm": 1.5234375, "learning_rate": 0.0001953951920346388, "loss": 2.7491, "step": 5229 }, { "epoch": 0.2448358593247118, "grad_norm": 1.1171875, "learning_rate": 0.0001953934409092016, "loss": 3.0452, "step": 5230 }, { "epoch": 0.24488267306454445, "grad_norm": 1.34375, "learning_rate": 0.00019539168945871676, "loss": 4.0016, "step": 5231 }, { "epoch": 0.2449294868043771, "grad_norm": 1.6171875, "learning_rate": 0.00019538993768319023, "loss": 3.4318, "step": 5232 }, { "epoch": 0.2449763005442097, "grad_norm": 1.6484375, "learning_rate": 0.00019538818558262802, "loss": 3.4246, "step": 5233 }, { "epoch": 0.24502311428404236, "grad_norm": 1.4765625, "learning_rate": 0.00019538643315703604, "loss": 3.6126, "step": 5234 }, { "epoch": 0.245069928023875, "grad_norm": 2.0625, "learning_rate": 0.00019538468040642036, "loss": 2.7708, "step": 5235 }, { "epoch": 0.24511674176370765, "grad_norm": 1.109375, "learning_rate": 0.00019538292733078686, "loss": 3.5608, "step": 5236 }, { "epoch": 0.2451635555035403, "grad_norm": 1.2109375, "learning_rate": 0.00019538117393014156, "loss": 3.3529, "step": 5237 }, { "epoch": 0.24521036924337292, "grad_norm": 1.3984375, "learning_rate": 0.00019537942020449043, "loss": 3.6766, "step": 5238 }, { "epoch": 0.24525718298320556, "grad_norm": 1.2421875, "learning_rate": 0.00019537766615383944, "loss": 3.6313, "step": 5239 }, { "epoch": 0.2453039967230382, "grad_norm": 1.7421875, "learning_rate": 0.00019537591177819455, "loss": 3.3258, "step": 5240 }, { "epoch": 0.24535081046287086, "grad_norm": 1.15625, "learning_rate": 0.00019537415707756174, "loss": 3.082, "step": 5241 }, { "epoch": 0.2453976242027035, "grad_norm": 1.1953125, "learning_rate": 0.00019537240205194705, "loss": 3.1579, "step": 5242 }, { "epoch": 0.24544443794253615, "grad_norm": 1.8828125, "learning_rate": 0.0001953706467013564, "loss": 3.49, "step": 5243 }, { "epoch": 0.24549125168236877, "grad_norm": 1.1796875, "learning_rate": 0.00019536889102579575, "loss": 3.0496, "step": 5244 }, { "epoch": 0.2455380654222014, "grad_norm": 1.375, "learning_rate": 0.00019536713502527113, "loss": 3.5588, "step": 5245 }, { "epoch": 0.24558487916203406, "grad_norm": 1.2890625, "learning_rate": 0.00019536537869978854, "loss": 3.6545, "step": 5246 }, { "epoch": 0.2456316929018667, "grad_norm": 1.375, "learning_rate": 0.00019536362204935391, "loss": 3.789, "step": 5247 }, { "epoch": 0.24567850664169935, "grad_norm": 1.265625, "learning_rate": 0.00019536186507397322, "loss": 3.2146, "step": 5248 }, { "epoch": 0.24572532038153197, "grad_norm": 1.5625, "learning_rate": 0.00019536010777365252, "loss": 2.1802, "step": 5249 }, { "epoch": 0.24577213412136462, "grad_norm": 1.609375, "learning_rate": 0.00019535835014839777, "loss": 3.0115, "step": 5250 }, { "epoch": 0.24581894786119726, "grad_norm": 1.2265625, "learning_rate": 0.00019535659219821494, "loss": 3.4306, "step": 5251 }, { "epoch": 0.2458657616010299, "grad_norm": 2.1875, "learning_rate": 0.00019535483392311005, "loss": 3.0588, "step": 5252 }, { "epoch": 0.24591257534086255, "grad_norm": 1.2734375, "learning_rate": 0.00019535307532308907, "loss": 2.6518, "step": 5253 }, { "epoch": 0.24595938908069517, "grad_norm": 1.265625, "learning_rate": 0.000195351316398158, "loss": 3.1729, "step": 5254 }, { "epoch": 0.24600620282052782, "grad_norm": 1.484375, "learning_rate": 0.00019534955714832283, "loss": 3.508, "step": 5255 }, { "epoch": 0.24605301656036047, "grad_norm": 1.53125, "learning_rate": 0.00019534779757358951, "loss": 3.2157, "step": 5256 }, { "epoch": 0.2460998303001931, "grad_norm": 1.1484375, "learning_rate": 0.00019534603767396412, "loss": 3.2691, "step": 5257 }, { "epoch": 0.24614664404002576, "grad_norm": 1.3203125, "learning_rate": 0.00019534427744945262, "loss": 3.5935, "step": 5258 }, { "epoch": 0.24619345777985838, "grad_norm": 1.1484375, "learning_rate": 0.00019534251690006102, "loss": 3.7468, "step": 5259 }, { "epoch": 0.24624027151969102, "grad_norm": 1.609375, "learning_rate": 0.00019534075602579528, "loss": 3.6707, "step": 5260 }, { "epoch": 0.24628708525952367, "grad_norm": 1.4921875, "learning_rate": 0.00019533899482666143, "loss": 3.728, "step": 5261 }, { "epoch": 0.24633389899935632, "grad_norm": 1.5, "learning_rate": 0.00019533723330266548, "loss": 3.3123, "step": 5262 }, { "epoch": 0.24638071273918896, "grad_norm": 1.4765625, "learning_rate": 0.00019533547145381342, "loss": 3.9739, "step": 5263 }, { "epoch": 0.24642752647902158, "grad_norm": 1.4453125, "learning_rate": 0.00019533370928011124, "loss": 3.4304, "step": 5264 }, { "epoch": 0.24647434021885423, "grad_norm": 1.5390625, "learning_rate": 0.00019533194678156494, "loss": 3.5739, "step": 5265 }, { "epoch": 0.24652115395868687, "grad_norm": 1.375, "learning_rate": 0.00019533018395818057, "loss": 3.2632, "step": 5266 }, { "epoch": 0.24656796769851952, "grad_norm": 1.3046875, "learning_rate": 0.00019532842080996406, "loss": 3.3448, "step": 5267 }, { "epoch": 0.24661478143835217, "grad_norm": 1.3671875, "learning_rate": 0.00019532665733692153, "loss": 3.2535, "step": 5268 }, { "epoch": 0.24666159517818478, "grad_norm": 1.4609375, "learning_rate": 0.0001953248935390589, "loss": 3.5204, "step": 5269 }, { "epoch": 0.24670840891801743, "grad_norm": 1.2265625, "learning_rate": 0.0001953231294163822, "loss": 3.5275, "step": 5270 }, { "epoch": 0.24675522265785008, "grad_norm": 0.8984375, "learning_rate": 0.00019532136496889745, "loss": 3.4508, "step": 5271 }, { "epoch": 0.24680203639768272, "grad_norm": 1.234375, "learning_rate": 0.00019531960019661065, "loss": 2.7769, "step": 5272 }, { "epoch": 0.24684885013751537, "grad_norm": 1.9140625, "learning_rate": 0.00019531783509952783, "loss": 3.19, "step": 5273 }, { "epoch": 0.24689566387734802, "grad_norm": 1.3515625, "learning_rate": 0.000195316069677655, "loss": 3.6051, "step": 5274 }, { "epoch": 0.24694247761718063, "grad_norm": 1.328125, "learning_rate": 0.0001953143039309982, "loss": 3.5178, "step": 5275 }, { "epoch": 0.24698929135701328, "grad_norm": 1.4453125, "learning_rate": 0.00019531253785956337, "loss": 2.9347, "step": 5276 }, { "epoch": 0.24703610509684593, "grad_norm": 1.9765625, "learning_rate": 0.0001953107714633566, "loss": 2.7127, "step": 5277 }, { "epoch": 0.24708291883667857, "grad_norm": 1.3203125, "learning_rate": 0.00019530900474238388, "loss": 3.184, "step": 5278 }, { "epoch": 0.24712973257651122, "grad_norm": 1.515625, "learning_rate": 0.0001953072376966512, "loss": 3.8683, "step": 5279 }, { "epoch": 0.24717654631634384, "grad_norm": 2.203125, "learning_rate": 0.00019530547032616465, "loss": 3.5046, "step": 5280 }, { "epoch": 0.24722336005617648, "grad_norm": 1.7265625, "learning_rate": 0.00019530370263093024, "loss": 3.2423, "step": 5281 }, { "epoch": 0.24727017379600913, "grad_norm": 1.8125, "learning_rate": 0.00019530193461095397, "loss": 3.7232, "step": 5282 }, { "epoch": 0.24731698753584178, "grad_norm": 1.4140625, "learning_rate": 0.00019530016626624183, "loss": 3.7716, "step": 5283 }, { "epoch": 0.24736380127567442, "grad_norm": 1.4609375, "learning_rate": 0.0001952983975967999, "loss": 3.7008, "step": 5284 }, { "epoch": 0.24741061501550704, "grad_norm": 1.8359375, "learning_rate": 0.00019529662860263421, "loss": 4.029, "step": 5285 }, { "epoch": 0.2474574287553397, "grad_norm": 1.4140625, "learning_rate": 0.00019529485928375073, "loss": 3.335, "step": 5286 }, { "epoch": 0.24750424249517233, "grad_norm": 1.15625, "learning_rate": 0.00019529308964015557, "loss": 3.3976, "step": 5287 }, { "epoch": 0.24755105623500498, "grad_norm": 1.3671875, "learning_rate": 0.00019529131967185466, "loss": 3.9075, "step": 5288 }, { "epoch": 0.24759786997483763, "grad_norm": 1.375, "learning_rate": 0.00019528954937885412, "loss": 3.574, "step": 5289 }, { "epoch": 0.24764468371467024, "grad_norm": 1.4375, "learning_rate": 0.00019528777876115993, "loss": 3.8882, "step": 5290 }, { "epoch": 0.2476914974545029, "grad_norm": 1.2734375, "learning_rate": 0.00019528600781877815, "loss": 3.7762, "step": 5291 }, { "epoch": 0.24773831119433554, "grad_norm": 1.2109375, "learning_rate": 0.00019528423655171482, "loss": 3.7704, "step": 5292 }, { "epoch": 0.24778512493416818, "grad_norm": 1.265625, "learning_rate": 0.00019528246495997595, "loss": 3.7871, "step": 5293 }, { "epoch": 0.24783193867400083, "grad_norm": 1.671875, "learning_rate": 0.0001952806930435676, "loss": 3.5824, "step": 5294 }, { "epoch": 0.24787875241383345, "grad_norm": 1.3515625, "learning_rate": 0.00019527892080249579, "loss": 3.4147, "step": 5295 }, { "epoch": 0.2479255661536661, "grad_norm": 1.296875, "learning_rate": 0.00019527714823676655, "loss": 3.2853, "step": 5296 }, { "epoch": 0.24797237989349874, "grad_norm": 1.09375, "learning_rate": 0.00019527537534638593, "loss": 3.7024, "step": 5297 }, { "epoch": 0.2480191936333314, "grad_norm": 1.21875, "learning_rate": 0.00019527360213136, "loss": 3.2211, "step": 5298 }, { "epoch": 0.24806600737316403, "grad_norm": 1.3828125, "learning_rate": 0.00019527182859169478, "loss": 3.6197, "step": 5299 }, { "epoch": 0.24811282111299665, "grad_norm": 1.46875, "learning_rate": 0.00019527005472739627, "loss": 3.4197, "step": 5300 }, { "epoch": 0.2481596348528293, "grad_norm": 1.3984375, "learning_rate": 0.0001952682805384706, "loss": 3.3696, "step": 5301 }, { "epoch": 0.24820644859266194, "grad_norm": 1.3515625, "learning_rate": 0.00019526650602492377, "loss": 3.5526, "step": 5302 }, { "epoch": 0.2482532623324946, "grad_norm": 1.390625, "learning_rate": 0.0001952647311867618, "loss": 3.6745, "step": 5303 }, { "epoch": 0.24830007607232724, "grad_norm": 1.2265625, "learning_rate": 0.0001952629560239908, "loss": 3.5153, "step": 5304 }, { "epoch": 0.24834688981215988, "grad_norm": 1.2421875, "learning_rate": 0.00019526118053661673, "loss": 3.7469, "step": 5305 }, { "epoch": 0.2483937035519925, "grad_norm": 1.3515625, "learning_rate": 0.00019525940472464573, "loss": 3.595, "step": 5306 }, { "epoch": 0.24844051729182515, "grad_norm": 1.28125, "learning_rate": 0.00019525762858808382, "loss": 3.5384, "step": 5307 }, { "epoch": 0.2484873310316578, "grad_norm": 1.15625, "learning_rate": 0.00019525585212693702, "loss": 3.1369, "step": 5308 }, { "epoch": 0.24853414477149044, "grad_norm": 1.296875, "learning_rate": 0.00019525407534121144, "loss": 3.4693, "step": 5309 }, { "epoch": 0.24858095851132309, "grad_norm": 1.2265625, "learning_rate": 0.00019525229823091312, "loss": 3.5743, "step": 5310 }, { "epoch": 0.2486277722511557, "grad_norm": 1.3515625, "learning_rate": 0.00019525052079604805, "loss": 3.8422, "step": 5311 }, { "epoch": 0.24867458599098835, "grad_norm": 1.546875, "learning_rate": 0.00019524874303662235, "loss": 3.6368, "step": 5312 }, { "epoch": 0.248721399730821, "grad_norm": 1.2265625, "learning_rate": 0.00019524696495264206, "loss": 3.3558, "step": 5313 }, { "epoch": 0.24876821347065364, "grad_norm": 1.1875, "learning_rate": 0.00019524518654411325, "loss": 4.8076, "step": 5314 }, { "epoch": 0.2488150272104863, "grad_norm": 1.484375, "learning_rate": 0.000195243407811042, "loss": 3.4667, "step": 5315 }, { "epoch": 0.2488618409503189, "grad_norm": 1.3359375, "learning_rate": 0.0001952416287534343, "loss": 3.6699, "step": 5316 }, { "epoch": 0.24890865469015155, "grad_norm": 1.5390625, "learning_rate": 0.00019523984937129627, "loss": 3.3015, "step": 5317 }, { "epoch": 0.2489554684299842, "grad_norm": 1.4140625, "learning_rate": 0.00019523806966463398, "loss": 3.5141, "step": 5318 }, { "epoch": 0.24900228216981685, "grad_norm": 1.484375, "learning_rate": 0.00019523628963345345, "loss": 3.5284, "step": 5319 }, { "epoch": 0.2490490959096495, "grad_norm": 2.828125, "learning_rate": 0.00019523450927776077, "loss": 2.6611, "step": 5320 }, { "epoch": 0.2490959096494821, "grad_norm": 1.359375, "learning_rate": 0.00019523272859756202, "loss": 3.6118, "step": 5321 }, { "epoch": 0.24914272338931476, "grad_norm": 1.4140625, "learning_rate": 0.0001952309475928632, "loss": 3.1986, "step": 5322 }, { "epoch": 0.2491895371291474, "grad_norm": 1.453125, "learning_rate": 0.0001952291662636705, "loss": 3.5077, "step": 5323 }, { "epoch": 0.24923635086898005, "grad_norm": 1.3515625, "learning_rate": 0.0001952273846099899, "loss": 3.6167, "step": 5324 }, { "epoch": 0.2492831646088127, "grad_norm": 1.1484375, "learning_rate": 0.00019522560263182748, "loss": 3.2332, "step": 5325 }, { "epoch": 0.24932997834864531, "grad_norm": 1.4453125, "learning_rate": 0.0001952238203291893, "loss": 3.541, "step": 5326 }, { "epoch": 0.24937679208847796, "grad_norm": 1.359375, "learning_rate": 0.00019522203770208152, "loss": 3.8515, "step": 5327 }, { "epoch": 0.2494236058283106, "grad_norm": 1.6171875, "learning_rate": 0.0001952202547505101, "loss": 3.4654, "step": 5328 }, { "epoch": 0.24947041956814325, "grad_norm": 1.359375, "learning_rate": 0.0001952184714744812, "loss": 3.3615, "step": 5329 }, { "epoch": 0.2495172333079759, "grad_norm": 1.046875, "learning_rate": 0.0001952166878740008, "loss": 2.9373, "step": 5330 }, { "epoch": 0.24956404704780852, "grad_norm": 1.2578125, "learning_rate": 0.0001952149039490751, "loss": 3.5946, "step": 5331 }, { "epoch": 0.24961086078764116, "grad_norm": 1.375, "learning_rate": 0.0001952131196997101, "loss": 3.7038, "step": 5332 }, { "epoch": 0.2496576745274738, "grad_norm": 1.6015625, "learning_rate": 0.0001952113351259119, "loss": 3.5632, "step": 5333 }, { "epoch": 0.24970448826730646, "grad_norm": 1.4453125, "learning_rate": 0.00019520955022768655, "loss": 3.2997, "step": 5334 }, { "epoch": 0.2497513020071391, "grad_norm": 1.7578125, "learning_rate": 0.00019520776500504022, "loss": 3.3605, "step": 5335 }, { "epoch": 0.24979811574697175, "grad_norm": 1.2578125, "learning_rate": 0.0001952059794579789, "loss": 3.634, "step": 5336 }, { "epoch": 0.24984492948680437, "grad_norm": 1.109375, "learning_rate": 0.00019520419358650868, "loss": 3.5381, "step": 5337 }, { "epoch": 0.24989174322663701, "grad_norm": 1.234375, "learning_rate": 0.00019520240739063572, "loss": 3.1876, "step": 5338 }, { "epoch": 0.24993855696646966, "grad_norm": 1.8515625, "learning_rate": 0.00019520062087036603, "loss": 3.4953, "step": 5339 }, { "epoch": 0.2499853707063023, "grad_norm": 1.1796875, "learning_rate": 0.00019519883402570573, "loss": 3.8852, "step": 5340 }, { "epoch": 0.25003218444613495, "grad_norm": 1.2265625, "learning_rate": 0.00019519704685666095, "loss": 3.3809, "step": 5341 }, { "epoch": 0.25007899818596757, "grad_norm": 1.390625, "learning_rate": 0.00019519525936323768, "loss": 3.1815, "step": 5342 }, { "epoch": 0.25012581192580025, "grad_norm": 1.34375, "learning_rate": 0.00019519347154544207, "loss": 3.5834, "step": 5343 }, { "epoch": 0.25017262566563286, "grad_norm": 1.0625, "learning_rate": 0.00019519168340328025, "loss": 3.2075, "step": 5344 }, { "epoch": 0.2502194394054655, "grad_norm": 1.015625, "learning_rate": 0.00019518989493675824, "loss": 3.2421, "step": 5345 }, { "epoch": 0.25026625314529816, "grad_norm": 1.359375, "learning_rate": 0.00019518810614588218, "loss": 3.5544, "step": 5346 }, { "epoch": 0.2503130668851308, "grad_norm": 1.40625, "learning_rate": 0.00019518631703065813, "loss": 3.4621, "step": 5347 }, { "epoch": 0.25035988062496345, "grad_norm": 1.3046875, "learning_rate": 0.00019518452759109223, "loss": 3.4441, "step": 5348 }, { "epoch": 0.25040669436479607, "grad_norm": 1.0859375, "learning_rate": 0.00019518273782719056, "loss": 3.4878, "step": 5349 }, { "epoch": 0.2504535081046287, "grad_norm": 1.21875, "learning_rate": 0.00019518094773895918, "loss": 3.1107, "step": 5350 }, { "epoch": 0.25050032184446136, "grad_norm": 1.2734375, "learning_rate": 0.00019517915732640423, "loss": 3.618, "step": 5351 }, { "epoch": 0.250547135584294, "grad_norm": 1.3359375, "learning_rate": 0.00019517736658953183, "loss": 3.6282, "step": 5352 }, { "epoch": 0.25059394932412665, "grad_norm": 1.1171875, "learning_rate": 0.00019517557552834803, "loss": 3.1531, "step": 5353 }, { "epoch": 0.25064076306395927, "grad_norm": 1.9609375, "learning_rate": 0.00019517378414285897, "loss": 3.7908, "step": 5354 }, { "epoch": 0.2506875768037919, "grad_norm": 1.4609375, "learning_rate": 0.0001951719924330707, "loss": 3.4618, "step": 5355 }, { "epoch": 0.25073439054362456, "grad_norm": 1.2890625, "learning_rate": 0.00019517020039898944, "loss": 3.2378, "step": 5356 }, { "epoch": 0.2507812042834572, "grad_norm": 1.2109375, "learning_rate": 0.0001951684080406212, "loss": 2.7487, "step": 5357 }, { "epoch": 0.25082801802328986, "grad_norm": 1.3671875, "learning_rate": 0.00019516661535797205, "loss": 3.1889, "step": 5358 }, { "epoch": 0.2508748317631225, "grad_norm": 1.2421875, "learning_rate": 0.00019516482235104824, "loss": 3.3542, "step": 5359 }, { "epoch": 0.2509216455029551, "grad_norm": 1.21875, "learning_rate": 0.00019516302901985576, "loss": 5.3175, "step": 5360 }, { "epoch": 0.25096845924278777, "grad_norm": 1.3359375, "learning_rate": 0.00019516123536440075, "loss": 3.2958, "step": 5361 }, { "epoch": 0.2510152729826204, "grad_norm": 1.4453125, "learning_rate": 0.00019515944138468933, "loss": 3.3206, "step": 5362 }, { "epoch": 0.25106208672245306, "grad_norm": 1.6640625, "learning_rate": 0.00019515764708072765, "loss": 3.9181, "step": 5363 }, { "epoch": 0.2511089004622857, "grad_norm": 1.171875, "learning_rate": 0.00019515585245252174, "loss": 3.408, "step": 5364 }, { "epoch": 0.2511557142021183, "grad_norm": 1.3125, "learning_rate": 0.00019515405750007781, "loss": 3.4598, "step": 5365 }, { "epoch": 0.25120252794195097, "grad_norm": 1.3828125, "learning_rate": 0.0001951522622234019, "loss": 3.4208, "step": 5366 }, { "epoch": 0.2512493416817836, "grad_norm": 1.3046875, "learning_rate": 0.0001951504666225002, "loss": 3.968, "step": 5367 }, { "epoch": 0.25129615542161626, "grad_norm": 1.6796875, "learning_rate": 0.00019514867069737873, "loss": 3.609, "step": 5368 }, { "epoch": 0.2513429691614489, "grad_norm": 1.2890625, "learning_rate": 0.00019514687444804367, "loss": 3.4556, "step": 5369 }, { "epoch": 0.2513897829012815, "grad_norm": 1.4296875, "learning_rate": 0.00019514507787450115, "loss": 3.045, "step": 5370 }, { "epoch": 0.2514365966411142, "grad_norm": 1.1640625, "learning_rate": 0.00019514328097675727, "loss": 3.4887, "step": 5371 }, { "epoch": 0.2514834103809468, "grad_norm": 1.65625, "learning_rate": 0.00019514148375481818, "loss": 3.7891, "step": 5372 }, { "epoch": 0.25153022412077947, "grad_norm": 1.234375, "learning_rate": 0.00019513968620868996, "loss": 3.7092, "step": 5373 }, { "epoch": 0.2515770378606121, "grad_norm": 1.0390625, "learning_rate": 0.00019513788833837878, "loss": 3.2727, "step": 5374 }, { "epoch": 0.2516238516004447, "grad_norm": 1.703125, "learning_rate": 0.00019513609014389075, "loss": 3.5448, "step": 5375 }, { "epoch": 0.2516706653402774, "grad_norm": 1.4453125, "learning_rate": 0.00019513429162523196, "loss": 3.7596, "step": 5376 }, { "epoch": 0.25171747908011, "grad_norm": 1.90625, "learning_rate": 0.00019513249278240856, "loss": 4.2757, "step": 5377 }, { "epoch": 0.25176429281994267, "grad_norm": 1.2578125, "learning_rate": 0.00019513069361542674, "loss": 3.4663, "step": 5378 }, { "epoch": 0.2518111065597753, "grad_norm": 1.1640625, "learning_rate": 0.00019512889412429254, "loss": 3.3674, "step": 5379 }, { "epoch": 0.25185792029960796, "grad_norm": 1.3515625, "learning_rate": 0.00019512709430901217, "loss": 3.7413, "step": 5380 }, { "epoch": 0.2519047340394406, "grad_norm": 1.25, "learning_rate": 0.0001951252941695917, "loss": 3.7868, "step": 5381 }, { "epoch": 0.2519515477792732, "grad_norm": 1.109375, "learning_rate": 0.0001951234937060373, "loss": 3.174, "step": 5382 }, { "epoch": 0.2519983615191059, "grad_norm": 1.7578125, "learning_rate": 0.00019512169291835508, "loss": 3.5906, "step": 5383 }, { "epoch": 0.2520451752589385, "grad_norm": 1.453125, "learning_rate": 0.0001951198918065512, "loss": 3.4258, "step": 5384 }, { "epoch": 0.25209198899877117, "grad_norm": 2.03125, "learning_rate": 0.00019511809037063175, "loss": 3.4128, "step": 5385 }, { "epoch": 0.2521388027386038, "grad_norm": 1.25, "learning_rate": 0.00019511628861060295, "loss": 3.147, "step": 5386 }, { "epoch": 0.2521856164784364, "grad_norm": 1.15625, "learning_rate": 0.00019511448652647086, "loss": 3.5247, "step": 5387 }, { "epoch": 0.2522324302182691, "grad_norm": 1.4375, "learning_rate": 0.0001951126841182417, "loss": 3.3587, "step": 5388 }, { "epoch": 0.2522792439581017, "grad_norm": 1.5, "learning_rate": 0.00019511088138592152, "loss": 3.9336, "step": 5389 }, { "epoch": 0.25232605769793437, "grad_norm": 1.09375, "learning_rate": 0.00019510907832951654, "loss": 3.3889, "step": 5390 }, { "epoch": 0.252372871437767, "grad_norm": 1.296875, "learning_rate": 0.00019510727494903286, "loss": 3.385, "step": 5391 }, { "epoch": 0.2524196851775996, "grad_norm": 1.1640625, "learning_rate": 0.00019510547124447665, "loss": 3.3797, "step": 5392 }, { "epoch": 0.2524664989174323, "grad_norm": 1.46875, "learning_rate": 0.00019510366721585404, "loss": 3.4133, "step": 5393 }, { "epoch": 0.2525133126572649, "grad_norm": 1.640625, "learning_rate": 0.00019510186286317117, "loss": 3.6686, "step": 5394 }, { "epoch": 0.2525601263970976, "grad_norm": 1.125, "learning_rate": 0.0001951000581864342, "loss": 2.4479, "step": 5395 }, { "epoch": 0.2526069401369302, "grad_norm": 1.1171875, "learning_rate": 0.0001950982531856493, "loss": 3.3659, "step": 5396 }, { "epoch": 0.2526537538767628, "grad_norm": 1.40625, "learning_rate": 0.00019509644786082258, "loss": 3.2656, "step": 5397 }, { "epoch": 0.2527005676165955, "grad_norm": 1.1953125, "learning_rate": 0.00019509464221196022, "loss": 3.1126, "step": 5398 }, { "epoch": 0.2527473813564281, "grad_norm": 1.5390625, "learning_rate": 0.00019509283623906834, "loss": 3.5995, "step": 5399 }, { "epoch": 0.2527941950962608, "grad_norm": 2.28125, "learning_rate": 0.00019509102994215316, "loss": 2.6871, "step": 5400 }, { "epoch": 0.2528410088360934, "grad_norm": 1.3203125, "learning_rate": 0.00019508922332122077, "loss": 3.2858, "step": 5401 }, { "epoch": 0.252887822575926, "grad_norm": 1.15625, "learning_rate": 0.00019508741637627735, "loss": 3.2447, "step": 5402 }, { "epoch": 0.2529346363157587, "grad_norm": 1.234375, "learning_rate": 0.00019508560910732903, "loss": 3.4787, "step": 5403 }, { "epoch": 0.2529814500555913, "grad_norm": 1.40625, "learning_rate": 0.00019508380151438205, "loss": 3.6521, "step": 5404 }, { "epoch": 0.253028263795424, "grad_norm": 1.390625, "learning_rate": 0.0001950819935974425, "loss": 3.2797, "step": 5405 }, { "epoch": 0.2530750775352566, "grad_norm": 1.2109375, "learning_rate": 0.00019508018535651652, "loss": 3.0974, "step": 5406 }, { "epoch": 0.2531218912750892, "grad_norm": 1.375, "learning_rate": 0.00019507837679161026, "loss": 2.9613, "step": 5407 }, { "epoch": 0.2531687050149219, "grad_norm": 1.1171875, "learning_rate": 0.00019507656790273002, "loss": 3.6669, "step": 5408 }, { "epoch": 0.2532155187547545, "grad_norm": 1.3203125, "learning_rate": 0.00019507475868988183, "loss": 3.2441, "step": 5409 }, { "epoch": 0.2532623324945872, "grad_norm": 1.21875, "learning_rate": 0.00019507294915307189, "loss": 3.596, "step": 5410 }, { "epoch": 0.2533091462344198, "grad_norm": 1.2734375, "learning_rate": 0.00019507113929230637, "loss": 3.5482, "step": 5411 }, { "epoch": 0.2533559599742524, "grad_norm": 1.5703125, "learning_rate": 0.00019506932910759147, "loss": 3.8658, "step": 5412 }, { "epoch": 0.2534027737140851, "grad_norm": 1.7265625, "learning_rate": 0.0001950675185989333, "loss": 3.7223, "step": 5413 }, { "epoch": 0.2534495874539177, "grad_norm": 1.21875, "learning_rate": 0.00019506570776633804, "loss": 3.3416, "step": 5414 }, { "epoch": 0.2534964011937504, "grad_norm": 1.2421875, "learning_rate": 0.0001950638966098119, "loss": 3.1454, "step": 5415 }, { "epoch": 0.253543214933583, "grad_norm": 1.28125, "learning_rate": 0.00019506208512936102, "loss": 3.7897, "step": 5416 }, { "epoch": 0.2535900286734156, "grad_norm": 1.6640625, "learning_rate": 0.0001950602733249916, "loss": 2.9934, "step": 5417 }, { "epoch": 0.2536368424132483, "grad_norm": 1.3046875, "learning_rate": 0.00019505846119670977, "loss": 3.299, "step": 5418 }, { "epoch": 0.2536836561530809, "grad_norm": 1.4296875, "learning_rate": 0.0001950566487445217, "loss": 3.1489, "step": 5419 }, { "epoch": 0.2537304698929136, "grad_norm": 1.1171875, "learning_rate": 0.00019505483596843367, "loss": 3.5625, "step": 5420 }, { "epoch": 0.2537772836327462, "grad_norm": 2.84375, "learning_rate": 0.00019505302286845172, "loss": 2.877, "step": 5421 }, { "epoch": 0.2538240973725788, "grad_norm": 1.2109375, "learning_rate": 0.0001950512094445821, "loss": 3.2452, "step": 5422 }, { "epoch": 0.2538709111124115, "grad_norm": 1.3515625, "learning_rate": 0.000195049395696831, "loss": 3.4225, "step": 5423 }, { "epoch": 0.2539177248522441, "grad_norm": 1.078125, "learning_rate": 0.00019504758162520455, "loss": 3.169, "step": 5424 }, { "epoch": 0.2539645385920768, "grad_norm": 1.078125, "learning_rate": 0.00019504576722970896, "loss": 3.2664, "step": 5425 }, { "epoch": 0.2540113523319094, "grad_norm": 1.40625, "learning_rate": 0.00019504395251035043, "loss": 3.4438, "step": 5426 }, { "epoch": 0.25405816607174203, "grad_norm": 1.3046875, "learning_rate": 0.00019504213746713512, "loss": 3.0076, "step": 5427 }, { "epoch": 0.2541049798115747, "grad_norm": 2.125, "learning_rate": 0.00019504032210006922, "loss": 4.2464, "step": 5428 }, { "epoch": 0.2541517935514073, "grad_norm": 1.0625, "learning_rate": 0.00019503850640915894, "loss": 3.6222, "step": 5429 }, { "epoch": 0.25419860729124, "grad_norm": 1.2734375, "learning_rate": 0.00019503669039441042, "loss": 3.6393, "step": 5430 }, { "epoch": 0.2542454210310726, "grad_norm": 1.7578125, "learning_rate": 0.00019503487405582985, "loss": 3.0931, "step": 5431 }, { "epoch": 0.25429223477090523, "grad_norm": 3.484375, "learning_rate": 0.00019503305739342346, "loss": 3.5714, "step": 5432 }, { "epoch": 0.2543390485107379, "grad_norm": 1.921875, "learning_rate": 0.00019503124040719746, "loss": 3.6316, "step": 5433 }, { "epoch": 0.2543858622505705, "grad_norm": 1.359375, "learning_rate": 0.00019502942309715791, "loss": 3.5857, "step": 5434 }, { "epoch": 0.2544326759904032, "grad_norm": 1.1328125, "learning_rate": 0.00019502760546331115, "loss": 3.1536, "step": 5435 }, { "epoch": 0.2544794897302358, "grad_norm": 1.140625, "learning_rate": 0.0001950257875056633, "loss": 3.5479, "step": 5436 }, { "epoch": 0.25452630347006844, "grad_norm": 1.1640625, "learning_rate": 0.0001950239692242206, "loss": 3.5925, "step": 5437 }, { "epoch": 0.2545731172099011, "grad_norm": 1.2734375, "learning_rate": 0.00019502215061898922, "loss": 3.0886, "step": 5438 }, { "epoch": 0.25461993094973373, "grad_norm": 0.98828125, "learning_rate": 0.00019502033168997533, "loss": 5.107, "step": 5439 }, { "epoch": 0.2546667446895664, "grad_norm": 1.359375, "learning_rate": 0.00019501851243718517, "loss": 3.4564, "step": 5440 }, { "epoch": 0.254713558429399, "grad_norm": 1.2109375, "learning_rate": 0.00019501669286062492, "loss": 3.2151, "step": 5441 }, { "epoch": 0.2547603721692317, "grad_norm": 1.3359375, "learning_rate": 0.00019501487296030075, "loss": 3.3298, "step": 5442 }, { "epoch": 0.2548071859090643, "grad_norm": 1.7890625, "learning_rate": 0.0001950130527362189, "loss": 3.5335, "step": 5443 }, { "epoch": 0.25485399964889693, "grad_norm": 1.2109375, "learning_rate": 0.0001950112321883856, "loss": 3.7706, "step": 5444 }, { "epoch": 0.2549008133887296, "grad_norm": 1.4375, "learning_rate": 0.00019500941131680698, "loss": 3.628, "step": 5445 }, { "epoch": 0.2549476271285622, "grad_norm": 1.234375, "learning_rate": 0.0001950075901214893, "loss": 3.624, "step": 5446 }, { "epoch": 0.2549944408683949, "grad_norm": 1.15625, "learning_rate": 0.00019500576860243877, "loss": 3.1568, "step": 5447 }, { "epoch": 0.2550412546082275, "grad_norm": 1.25, "learning_rate": 0.00019500394675966157, "loss": 3.6149, "step": 5448 }, { "epoch": 0.25508806834806014, "grad_norm": 1.984375, "learning_rate": 0.00019500212459316386, "loss": 3.4764, "step": 5449 }, { "epoch": 0.2551348820878928, "grad_norm": 1.3046875, "learning_rate": 0.00019500030210295194, "loss": 3.6067, "step": 5450 }, { "epoch": 0.25518169582772543, "grad_norm": 1.34375, "learning_rate": 0.00019499847928903198, "loss": 3.5693, "step": 5451 }, { "epoch": 0.2552285095675581, "grad_norm": 1.4140625, "learning_rate": 0.00019499665615141018, "loss": 3.0145, "step": 5452 }, { "epoch": 0.2552753233073907, "grad_norm": 1.53125, "learning_rate": 0.0001949948326900928, "loss": 2.4525, "step": 5453 }, { "epoch": 0.25532213704722334, "grad_norm": 1.2890625, "learning_rate": 0.00019499300890508597, "loss": 3.5447, "step": 5454 }, { "epoch": 0.255368950787056, "grad_norm": 1.546875, "learning_rate": 0.00019499118479639603, "loss": 3.0784, "step": 5455 }, { "epoch": 0.25541576452688863, "grad_norm": 1.15625, "learning_rate": 0.00019498936036402905, "loss": 3.1773, "step": 5456 }, { "epoch": 0.2554625782667213, "grad_norm": 1.5625, "learning_rate": 0.00019498753560799132, "loss": 3.5483, "step": 5457 }, { "epoch": 0.2555093920065539, "grad_norm": 2.125, "learning_rate": 0.00019498571052828907, "loss": 3.7201, "step": 5458 }, { "epoch": 0.25555620574638654, "grad_norm": 1.3125, "learning_rate": 0.0001949838851249285, "loss": 2.9086, "step": 5459 }, { "epoch": 0.2556030194862192, "grad_norm": 1.203125, "learning_rate": 0.00019498205939791582, "loss": 3.2726, "step": 5460 }, { "epoch": 0.25564983322605184, "grad_norm": 1.171875, "learning_rate": 0.00019498023334725727, "loss": 3.1703, "step": 5461 }, { "epoch": 0.2556966469658845, "grad_norm": 1.3359375, "learning_rate": 0.00019497840697295908, "loss": 3.5807, "step": 5462 }, { "epoch": 0.25574346070571713, "grad_norm": 1.1640625, "learning_rate": 0.00019497658027502746, "loss": 5.2192, "step": 5463 }, { "epoch": 0.25579027444554975, "grad_norm": 1.734375, "learning_rate": 0.0001949747532534686, "loss": 3.5579, "step": 5464 }, { "epoch": 0.2558370881853824, "grad_norm": 1.515625, "learning_rate": 0.00019497292590828878, "loss": 3.3018, "step": 5465 }, { "epoch": 0.25588390192521504, "grad_norm": 1.0, "learning_rate": 0.0001949710982394942, "loss": 3.2826, "step": 5466 }, { "epoch": 0.2559307156650477, "grad_norm": 1.140625, "learning_rate": 0.0001949692702470911, "loss": 3.3067, "step": 5467 }, { "epoch": 0.25597752940488033, "grad_norm": 1.5546875, "learning_rate": 0.00019496744193108569, "loss": 4.2325, "step": 5468 }, { "epoch": 0.25602434314471295, "grad_norm": 1.7578125, "learning_rate": 0.0001949656132914842, "loss": 3.4112, "step": 5469 }, { "epoch": 0.2560711568845456, "grad_norm": 1.21875, "learning_rate": 0.0001949637843282929, "loss": 3.6814, "step": 5470 }, { "epoch": 0.25611797062437824, "grad_norm": 1.4609375, "learning_rate": 0.000194961955041518, "loss": 3.4773, "step": 5471 }, { "epoch": 0.2561647843642109, "grad_norm": 1.5625, "learning_rate": 0.00019496012543116572, "loss": 3.4786, "step": 5472 }, { "epoch": 0.25621159810404354, "grad_norm": 1.21875, "learning_rate": 0.00019495829549724225, "loss": 2.7867, "step": 5473 }, { "epoch": 0.25625841184387615, "grad_norm": 1.578125, "learning_rate": 0.00019495646523975393, "loss": 2.7305, "step": 5474 }, { "epoch": 0.25630522558370883, "grad_norm": 1.3828125, "learning_rate": 0.00019495463465870694, "loss": 3.7638, "step": 5475 }, { "epoch": 0.25635203932354145, "grad_norm": 1.5078125, "learning_rate": 0.0001949528037541075, "loss": 3.9626, "step": 5476 }, { "epoch": 0.2563988530633741, "grad_norm": 17.125, "learning_rate": 0.00019495097252596188, "loss": 7.4704, "step": 5477 }, { "epoch": 0.25644566680320674, "grad_norm": 1.3828125, "learning_rate": 0.0001949491409742763, "loss": 3.3641, "step": 5478 }, { "epoch": 0.25649248054303936, "grad_norm": 1.1640625, "learning_rate": 0.00019494730909905706, "loss": 3.0484, "step": 5479 }, { "epoch": 0.25653929428287203, "grad_norm": 1.6953125, "learning_rate": 0.00019494547690031028, "loss": 3.6311, "step": 5480 }, { "epoch": 0.25658610802270465, "grad_norm": 1.0859375, "learning_rate": 0.00019494364437804232, "loss": 3.6272, "step": 5481 }, { "epoch": 0.2566329217625373, "grad_norm": 1.09375, "learning_rate": 0.00019494181153225938, "loss": 5.3117, "step": 5482 }, { "epoch": 0.25667973550236994, "grad_norm": 1.28125, "learning_rate": 0.0001949399783629677, "loss": 3.28, "step": 5483 }, { "epoch": 0.25672654924220256, "grad_norm": 2.078125, "learning_rate": 0.00019493814487017352, "loss": 3.8896, "step": 5484 }, { "epoch": 0.25677336298203524, "grad_norm": 1.28125, "learning_rate": 0.0001949363110538831, "loss": 3.4814, "step": 5485 }, { "epoch": 0.25682017672186785, "grad_norm": 1.53125, "learning_rate": 0.0001949344769141027, "loss": 3.7079, "step": 5486 }, { "epoch": 0.25686699046170053, "grad_norm": 1.3515625, "learning_rate": 0.00019493264245083855, "loss": 3.0199, "step": 5487 }, { "epoch": 0.25691380420153315, "grad_norm": 1.34375, "learning_rate": 0.0001949308076640969, "loss": 3.6619, "step": 5488 }, { "epoch": 0.25696061794136577, "grad_norm": 1.734375, "learning_rate": 0.00019492897255388403, "loss": 3.6754, "step": 5489 }, { "epoch": 0.25700743168119844, "grad_norm": 1.3984375, "learning_rate": 0.00019492713712020615, "loss": 3.0047, "step": 5490 }, { "epoch": 0.25705424542103106, "grad_norm": 1.34375, "learning_rate": 0.00019492530136306954, "loss": 3.5734, "step": 5491 }, { "epoch": 0.25710105916086373, "grad_norm": 1.6015625, "learning_rate": 0.00019492346528248046, "loss": 3.374, "step": 5492 }, { "epoch": 0.25714787290069635, "grad_norm": 1.3515625, "learning_rate": 0.0001949216288784452, "loss": 3.2969, "step": 5493 }, { "epoch": 0.25719468664052897, "grad_norm": 1.3359375, "learning_rate": 0.00019491979215096992, "loss": 3.174, "step": 5494 }, { "epoch": 0.25724150038036164, "grad_norm": 1.5546875, "learning_rate": 0.00019491795510006093, "loss": 3.6324, "step": 5495 }, { "epoch": 0.25728831412019426, "grad_norm": 1.21875, "learning_rate": 0.00019491611772572454, "loss": 3.2532, "step": 5496 }, { "epoch": 0.25733512786002694, "grad_norm": 1.53125, "learning_rate": 0.00019491428002796696, "loss": 3.4822, "step": 5497 }, { "epoch": 0.25738194159985955, "grad_norm": 1.765625, "learning_rate": 0.0001949124420067944, "loss": 3.7787, "step": 5498 }, { "epoch": 0.2574287553396922, "grad_norm": 1.9453125, "learning_rate": 0.00019491060366221322, "loss": 2.8244, "step": 5499 }, { "epoch": 0.25747556907952485, "grad_norm": 1.421875, "learning_rate": 0.00019490876499422965, "loss": 3.4316, "step": 5500 }, { "epoch": 0.25752238281935746, "grad_norm": 1.609375, "learning_rate": 0.00019490692600284995, "loss": 4.1052, "step": 5501 }, { "epoch": 0.25756919655919014, "grad_norm": 1.421875, "learning_rate": 0.0001949050866880804, "loss": 3.7247, "step": 5502 }, { "epoch": 0.25761601029902276, "grad_norm": 1.5078125, "learning_rate": 0.0001949032470499272, "loss": 3.453, "step": 5503 }, { "epoch": 0.25766282403885543, "grad_norm": 1.5546875, "learning_rate": 0.0001949014070883967, "loss": 3.8843, "step": 5504 }, { "epoch": 0.25770963777868805, "grad_norm": 1.1875, "learning_rate": 0.00019489956680349515, "loss": 3.2327, "step": 5505 }, { "epoch": 0.25775645151852067, "grad_norm": 1.265625, "learning_rate": 0.00019489772619522882, "loss": 3.1369, "step": 5506 }, { "epoch": 0.25780326525835334, "grad_norm": 2.078125, "learning_rate": 0.00019489588526360393, "loss": 3.7499, "step": 5507 }, { "epoch": 0.25785007899818596, "grad_norm": 1.2109375, "learning_rate": 0.00019489404400862683, "loss": 3.3067, "step": 5508 }, { "epoch": 0.25789689273801863, "grad_norm": 1.5078125, "learning_rate": 0.00019489220243030375, "loss": 3.2389, "step": 5509 }, { "epoch": 0.25794370647785125, "grad_norm": 1.125, "learning_rate": 0.000194890360528641, "loss": 5.3996, "step": 5510 }, { "epoch": 0.25799052021768387, "grad_norm": 1.71875, "learning_rate": 0.00019488851830364482, "loss": 3.8102, "step": 5511 }, { "epoch": 0.25803733395751655, "grad_norm": 1.4765625, "learning_rate": 0.00019488667575532148, "loss": 3.3085, "step": 5512 }, { "epoch": 0.25808414769734916, "grad_norm": 1.3203125, "learning_rate": 0.00019488483288367727, "loss": 3.6645, "step": 5513 }, { "epoch": 0.25813096143718184, "grad_norm": 1.3515625, "learning_rate": 0.0001948829896887185, "loss": 3.5296, "step": 5514 }, { "epoch": 0.25817777517701446, "grad_norm": 1.4609375, "learning_rate": 0.00019488114617045141, "loss": 3.1014, "step": 5515 }, { "epoch": 0.2582245889168471, "grad_norm": 1.125, "learning_rate": 0.00019487930232888232, "loss": 3.106, "step": 5516 }, { "epoch": 0.25827140265667975, "grad_norm": 1.3828125, "learning_rate": 0.00019487745816401748, "loss": 2.7638, "step": 5517 }, { "epoch": 0.25831821639651237, "grad_norm": 1.265625, "learning_rate": 0.00019487561367586317, "loss": 3.368, "step": 5518 }, { "epoch": 0.25836503013634504, "grad_norm": 2.296875, "learning_rate": 0.00019487376886442572, "loss": 3.602, "step": 5519 }, { "epoch": 0.25841184387617766, "grad_norm": 1.7890625, "learning_rate": 0.00019487192372971137, "loss": 3.8983, "step": 5520 }, { "epoch": 0.2584586576160103, "grad_norm": 1.4140625, "learning_rate": 0.00019487007827172643, "loss": 3.747, "step": 5521 }, { "epoch": 0.25850547135584295, "grad_norm": 1.5, "learning_rate": 0.00019486823249047718, "loss": 3.0568, "step": 5522 }, { "epoch": 0.25855228509567557, "grad_norm": 1.3125, "learning_rate": 0.00019486638638596993, "loss": 3.0916, "step": 5523 }, { "epoch": 0.25859909883550825, "grad_norm": 1.375, "learning_rate": 0.00019486453995821093, "loss": 3.5835, "step": 5524 }, { "epoch": 0.25864591257534086, "grad_norm": 1.53125, "learning_rate": 0.0001948626932072065, "loss": 2.4549, "step": 5525 }, { "epoch": 0.2586927263151735, "grad_norm": 1.375, "learning_rate": 0.0001948608461329629, "loss": 3.3979, "step": 5526 }, { "epoch": 0.25873954005500616, "grad_norm": 1.1796875, "learning_rate": 0.00019485899873548647, "loss": 3.4295, "step": 5527 }, { "epoch": 0.2587863537948388, "grad_norm": 1.3125, "learning_rate": 0.0001948571510147835, "loss": 2.9401, "step": 5528 }, { "epoch": 0.25883316753467145, "grad_norm": 1.34375, "learning_rate": 0.00019485530297086022, "loss": 3.5385, "step": 5529 }, { "epoch": 0.25887998127450407, "grad_norm": 1.1953125, "learning_rate": 0.00019485345460372303, "loss": 3.3354, "step": 5530 }, { "epoch": 0.2589267950143367, "grad_norm": 0.9765625, "learning_rate": 0.00019485160591337815, "loss": 3.3575, "step": 5531 }, { "epoch": 0.25897360875416936, "grad_norm": 1.3046875, "learning_rate": 0.00019484975689983193, "loss": 3.3555, "step": 5532 }, { "epoch": 0.259020422494002, "grad_norm": 1.3828125, "learning_rate": 0.00019484790756309062, "loss": 3.7397, "step": 5533 }, { "epoch": 0.25906723623383465, "grad_norm": 1.0859375, "learning_rate": 0.00019484605790316058, "loss": 3.4957, "step": 5534 }, { "epoch": 0.25911404997366727, "grad_norm": 1.3125, "learning_rate": 0.00019484420792004807, "loss": 3.5603, "step": 5535 }, { "epoch": 0.2591608637134999, "grad_norm": 1.3203125, "learning_rate": 0.0001948423576137594, "loss": 3.2367, "step": 5536 }, { "epoch": 0.25920767745333256, "grad_norm": 1.1953125, "learning_rate": 0.00019484050698430084, "loss": 3.715, "step": 5537 }, { "epoch": 0.2592544911931652, "grad_norm": 1.3828125, "learning_rate": 0.0001948386560316788, "loss": 3.0196, "step": 5538 }, { "epoch": 0.25930130493299786, "grad_norm": 1.2421875, "learning_rate": 0.0001948368047558995, "loss": 3.0955, "step": 5539 }, { "epoch": 0.2593481186728305, "grad_norm": 1.5390625, "learning_rate": 0.00019483495315696924, "loss": 3.4118, "step": 5540 }, { "epoch": 0.2593949324126631, "grad_norm": 1.2265625, "learning_rate": 0.00019483310123489438, "loss": 3.7649, "step": 5541 }, { "epoch": 0.25944174615249577, "grad_norm": 1.1015625, "learning_rate": 0.00019483124898968121, "loss": 3.4626, "step": 5542 }, { "epoch": 0.2594885598923284, "grad_norm": 1.4296875, "learning_rate": 0.00019482939642133604, "loss": 3.6048, "step": 5543 }, { "epoch": 0.25953537363216106, "grad_norm": 1.21875, "learning_rate": 0.00019482754352986517, "loss": 3.4289, "step": 5544 }, { "epoch": 0.2595821873719937, "grad_norm": 1.1875, "learning_rate": 0.00019482569031527495, "loss": 3.5729, "step": 5545 }, { "epoch": 0.2596290011118263, "grad_norm": 1.1640625, "learning_rate": 0.00019482383677757166, "loss": 3.1439, "step": 5546 }, { "epoch": 0.25967581485165897, "grad_norm": 1.46875, "learning_rate": 0.00019482198291676162, "loss": 3.298, "step": 5547 }, { "epoch": 0.2597226285914916, "grad_norm": 1.203125, "learning_rate": 0.00019482012873285116, "loss": 3.165, "step": 5548 }, { "epoch": 0.25976944233132426, "grad_norm": 1.3046875, "learning_rate": 0.0001948182742258466, "loss": 3.4862, "step": 5549 }, { "epoch": 0.2598162560711569, "grad_norm": 1.2109375, "learning_rate": 0.00019481641939575424, "loss": 3.5323, "step": 5550 }, { "epoch": 0.2598630698109895, "grad_norm": 1.5859375, "learning_rate": 0.0001948145642425804, "loss": 3.1548, "step": 5551 }, { "epoch": 0.2599098835508222, "grad_norm": 1.359375, "learning_rate": 0.00019481270876633145, "loss": 3.5223, "step": 5552 }, { "epoch": 0.2599566972906548, "grad_norm": 1.421875, "learning_rate": 0.00019481085296701366, "loss": 3.2244, "step": 5553 }, { "epoch": 0.26000351103048747, "grad_norm": 0.97265625, "learning_rate": 0.00019480899684463335, "loss": 3.1179, "step": 5554 }, { "epoch": 0.2600503247703201, "grad_norm": 1.125, "learning_rate": 0.00019480714039919687, "loss": 3.0985, "step": 5555 }, { "epoch": 0.2600971385101527, "grad_norm": 1.1796875, "learning_rate": 0.00019480528363071057, "loss": 2.731, "step": 5556 }, { "epoch": 0.2601439522499854, "grad_norm": 1.09375, "learning_rate": 0.00019480342653918072, "loss": 2.8749, "step": 5557 }, { "epoch": 0.260190765989818, "grad_norm": 1.15625, "learning_rate": 0.00019480156912461368, "loss": 2.9519, "step": 5558 }, { "epoch": 0.26023757972965067, "grad_norm": 1.1953125, "learning_rate": 0.00019479971138701573, "loss": 3.4317, "step": 5559 }, { "epoch": 0.2602843934694833, "grad_norm": 1.3671875, "learning_rate": 0.0001947978533263933, "loss": 3.518, "step": 5560 }, { "epoch": 0.2603312072093159, "grad_norm": 1.0859375, "learning_rate": 0.00019479599494275264, "loss": 3.4017, "step": 5561 }, { "epoch": 0.2603780209491486, "grad_norm": 1.015625, "learning_rate": 0.0001947941362361001, "loss": 3.3213, "step": 5562 }, { "epoch": 0.2604248346889812, "grad_norm": 1.2265625, "learning_rate": 0.00019479227720644205, "loss": 3.6286, "step": 5563 }, { "epoch": 0.2604716484288139, "grad_norm": 1.140625, "learning_rate": 0.00019479041785378475, "loss": 3.5963, "step": 5564 }, { "epoch": 0.2605184621686465, "grad_norm": 1.390625, "learning_rate": 0.00019478855817813462, "loss": 3.6084, "step": 5565 }, { "epoch": 0.26056527590847917, "grad_norm": 1.234375, "learning_rate": 0.00019478669817949795, "loss": 3.5042, "step": 5566 }, { "epoch": 0.2606120896483118, "grad_norm": 1.2265625, "learning_rate": 0.0001947848378578811, "loss": 3.6949, "step": 5567 }, { "epoch": 0.2606589033881444, "grad_norm": 1.3984375, "learning_rate": 0.00019478297721329036, "loss": 3.5558, "step": 5568 }, { "epoch": 0.2607057171279771, "grad_norm": 2.453125, "learning_rate": 0.00019478111624573213, "loss": 2.8733, "step": 5569 }, { "epoch": 0.2607525308678097, "grad_norm": 1.3125, "learning_rate": 0.0001947792549552127, "loss": 3.246, "step": 5570 }, { "epoch": 0.26079934460764237, "grad_norm": 1.484375, "learning_rate": 0.00019477739334173845, "loss": 3.3932, "step": 5571 }, { "epoch": 0.260846158347475, "grad_norm": 1.1328125, "learning_rate": 0.0001947755314053157, "loss": 3.2848, "step": 5572 }, { "epoch": 0.2608929720873076, "grad_norm": 1.2890625, "learning_rate": 0.00019477366914595083, "loss": 3.3872, "step": 5573 }, { "epoch": 0.2609397858271403, "grad_norm": 1.1171875, "learning_rate": 0.00019477180656365012, "loss": 3.4218, "step": 5574 }, { "epoch": 0.2609865995669729, "grad_norm": 1.40625, "learning_rate": 0.00019476994365842, "loss": 3.4148, "step": 5575 }, { "epoch": 0.2610334133068056, "grad_norm": 1.2734375, "learning_rate": 0.00019476808043026675, "loss": 3.3823, "step": 5576 }, { "epoch": 0.2610802270466382, "grad_norm": 1.03125, "learning_rate": 0.00019476621687919676, "loss": 3.2975, "step": 5577 }, { "epoch": 0.2611270407864708, "grad_norm": 1.3046875, "learning_rate": 0.00019476435300521636, "loss": 3.5018, "step": 5578 }, { "epoch": 0.2611738545263035, "grad_norm": 0.94921875, "learning_rate": 0.0001947624888083319, "loss": 2.2261, "step": 5579 }, { "epoch": 0.2612206682661361, "grad_norm": 1.34375, "learning_rate": 0.00019476062428854974, "loss": 3.5216, "step": 5580 }, { "epoch": 0.2612674820059688, "grad_norm": 1.1015625, "learning_rate": 0.00019475875944587623, "loss": 3.0866, "step": 5581 }, { "epoch": 0.2613142957458014, "grad_norm": 1.3984375, "learning_rate": 0.00019475689428031772, "loss": 2.9984, "step": 5582 }, { "epoch": 0.261361109485634, "grad_norm": 1.3359375, "learning_rate": 0.00019475502879188057, "loss": 3.6744, "step": 5583 }, { "epoch": 0.2614079232254667, "grad_norm": 1.5390625, "learning_rate": 0.00019475316298057113, "loss": 3.4086, "step": 5584 }, { "epoch": 0.2614547369652993, "grad_norm": 1.875, "learning_rate": 0.00019475129684639578, "loss": 2.9194, "step": 5585 }, { "epoch": 0.261501550705132, "grad_norm": 1.2265625, "learning_rate": 0.00019474943038936084, "loss": 2.897, "step": 5586 }, { "epoch": 0.2615483644449646, "grad_norm": 1.6796875, "learning_rate": 0.0001947475636094727, "loss": 3.4995, "step": 5587 }, { "epoch": 0.2615951781847972, "grad_norm": 1.3359375, "learning_rate": 0.00019474569650673775, "loss": 3.372, "step": 5588 }, { "epoch": 0.2616419919246299, "grad_norm": 1.609375, "learning_rate": 0.0001947438290811623, "loss": 3.5388, "step": 5589 }, { "epoch": 0.2616888056644625, "grad_norm": 1.3828125, "learning_rate": 0.0001947419613327527, "loss": 3.593, "step": 5590 }, { "epoch": 0.2617356194042952, "grad_norm": 1.3984375, "learning_rate": 0.00019474009326151537, "loss": 3.2274, "step": 5591 }, { "epoch": 0.2617824331441278, "grad_norm": 1.5, "learning_rate": 0.0001947382248674566, "loss": 3.263, "step": 5592 }, { "epoch": 0.2618292468839604, "grad_norm": 1.1328125, "learning_rate": 0.00019473635615058286, "loss": 3.3753, "step": 5593 }, { "epoch": 0.2618760606237931, "grad_norm": 1.53125, "learning_rate": 0.00019473448711090043, "loss": 3.9062, "step": 5594 }, { "epoch": 0.2619228743636257, "grad_norm": 1.140625, "learning_rate": 0.00019473261774841577, "loss": 3.6808, "step": 5595 }, { "epoch": 0.2619696881034584, "grad_norm": 1.3359375, "learning_rate": 0.00019473074806313512, "loss": 2.9854, "step": 5596 }, { "epoch": 0.262016501843291, "grad_norm": 1.4140625, "learning_rate": 0.00019472887805506493, "loss": 3.2888, "step": 5597 }, { "epoch": 0.2620633155831236, "grad_norm": 1.96875, "learning_rate": 0.0001947270077242116, "loss": 3.5357, "step": 5598 }, { "epoch": 0.2621101293229563, "grad_norm": 1.1875, "learning_rate": 0.00019472513707058148, "loss": 3.3697, "step": 5599 }, { "epoch": 0.2621569430627889, "grad_norm": 1.0625, "learning_rate": 0.00019472326609418087, "loss": 3.2956, "step": 5600 }, { "epoch": 0.2622037568026216, "grad_norm": 1.640625, "learning_rate": 0.00019472139479501623, "loss": 3.2979, "step": 5601 }, { "epoch": 0.2622505705424542, "grad_norm": 1.6484375, "learning_rate": 0.00019471952317309392, "loss": 3.1061, "step": 5602 }, { "epoch": 0.2622973842822868, "grad_norm": 1.21875, "learning_rate": 0.0001947176512284203, "loss": 3.5429, "step": 5603 }, { "epoch": 0.2623441980221195, "grad_norm": 1.0625, "learning_rate": 0.00019471577896100178, "loss": 3.5621, "step": 5604 }, { "epoch": 0.2623910117619521, "grad_norm": 1.7734375, "learning_rate": 0.0001947139063708447, "loss": 2.9882, "step": 5605 }, { "epoch": 0.2624378255017848, "grad_norm": 1.140625, "learning_rate": 0.00019471203345795547, "loss": 3.1136, "step": 5606 }, { "epoch": 0.2624846392416174, "grad_norm": 1.78125, "learning_rate": 0.00019471016022234045, "loss": 3.4889, "step": 5607 }, { "epoch": 0.26253145298145003, "grad_norm": 1.4453125, "learning_rate": 0.00019470828666400602, "loss": 3.5852, "step": 5608 }, { "epoch": 0.2625782667212827, "grad_norm": 1.2109375, "learning_rate": 0.00019470641278295861, "loss": 3.1182, "step": 5609 }, { "epoch": 0.2626250804611153, "grad_norm": 1.5, "learning_rate": 0.00019470453857920456, "loss": 3.6724, "step": 5610 }, { "epoch": 0.262671894200948, "grad_norm": 1.1875, "learning_rate": 0.00019470266405275025, "loss": 3.4268, "step": 5611 }, { "epoch": 0.2627187079407806, "grad_norm": 1.5859375, "learning_rate": 0.00019470078920360212, "loss": 3.439, "step": 5612 }, { "epoch": 0.26276552168061323, "grad_norm": 1.1328125, "learning_rate": 0.0001946989140317665, "loss": 2.5726, "step": 5613 }, { "epoch": 0.2628123354204459, "grad_norm": 2.078125, "learning_rate": 0.00019469703853724983, "loss": 3.2252, "step": 5614 }, { "epoch": 0.2628591491602785, "grad_norm": 1.375, "learning_rate": 0.00019469516272005842, "loss": 3.4258, "step": 5615 }, { "epoch": 0.2629059629001112, "grad_norm": 1.359375, "learning_rate": 0.0001946932865801988, "loss": 3.7279, "step": 5616 }, { "epoch": 0.2629527766399438, "grad_norm": 1.6484375, "learning_rate": 0.0001946914101176772, "loss": 3.7638, "step": 5617 }, { "epoch": 0.26299959037977644, "grad_norm": 1.703125, "learning_rate": 0.00019468953333250016, "loss": 4.1341, "step": 5618 }, { "epoch": 0.2630464041196091, "grad_norm": 1.515625, "learning_rate": 0.00019468765622467394, "loss": 3.4845, "step": 5619 }, { "epoch": 0.26309321785944173, "grad_norm": 1.5546875, "learning_rate": 0.00019468577879420504, "loss": 3.4684, "step": 5620 }, { "epoch": 0.2631400315992744, "grad_norm": 1.3984375, "learning_rate": 0.00019468390104109982, "loss": 3.3239, "step": 5621 }, { "epoch": 0.263186845339107, "grad_norm": 1.015625, "learning_rate": 0.0001946820229653647, "loss": 3.334, "step": 5622 }, { "epoch": 0.26323365907893964, "grad_norm": 1.171875, "learning_rate": 0.00019468014456700602, "loss": 2.5807, "step": 5623 }, { "epoch": 0.2632804728187723, "grad_norm": 3.265625, "learning_rate": 0.00019467826584603027, "loss": 3.2007, "step": 5624 }, { "epoch": 0.26332728655860493, "grad_norm": 1.8125, "learning_rate": 0.00019467638680244374, "loss": 3.557, "step": 5625 }, { "epoch": 0.2633741002984376, "grad_norm": 1.5859375, "learning_rate": 0.00019467450743625294, "loss": 3.2416, "step": 5626 }, { "epoch": 0.2634209140382702, "grad_norm": 1.375, "learning_rate": 0.0001946726277474642, "loss": 3.5917, "step": 5627 }, { "epoch": 0.2634677277781029, "grad_norm": 1.46875, "learning_rate": 0.000194670747736084, "loss": 2.8703, "step": 5628 }, { "epoch": 0.2635145415179355, "grad_norm": 1.296875, "learning_rate": 0.00019466886740211864, "loss": 2.9618, "step": 5629 }, { "epoch": 0.26356135525776814, "grad_norm": 1.2265625, "learning_rate": 0.00019466698674557465, "loss": 3.3275, "step": 5630 }, { "epoch": 0.2636081689976008, "grad_norm": 1.328125, "learning_rate": 0.00019466510576645832, "loss": 3.524, "step": 5631 }, { "epoch": 0.26365498273743343, "grad_norm": 1.1328125, "learning_rate": 0.00019466322446477614, "loss": 3.1647, "step": 5632 }, { "epoch": 0.2637017964772661, "grad_norm": 1.8203125, "learning_rate": 0.00019466134284053447, "loss": 3.0801, "step": 5633 }, { "epoch": 0.2637486102170987, "grad_norm": 1.0078125, "learning_rate": 0.00019465946089373978, "loss": 3.2179, "step": 5634 }, { "epoch": 0.26379542395693134, "grad_norm": 1.7578125, "learning_rate": 0.00019465757862439844, "loss": 3.5009, "step": 5635 }, { "epoch": 0.263842237696764, "grad_norm": 1.5390625, "learning_rate": 0.00019465569603251685, "loss": 3.2462, "step": 5636 }, { "epoch": 0.26388905143659663, "grad_norm": 1.3046875, "learning_rate": 0.0001946538131181015, "loss": 3.3863, "step": 5637 }, { "epoch": 0.2639358651764293, "grad_norm": 1.1953125, "learning_rate": 0.0001946519298811587, "loss": 3.2675, "step": 5638 }, { "epoch": 0.2639826789162619, "grad_norm": 1.5078125, "learning_rate": 0.00019465004632169492, "loss": 3.5961, "step": 5639 }, { "epoch": 0.26402949265609454, "grad_norm": 1.453125, "learning_rate": 0.0001946481624397166, "loss": 3.3039, "step": 5640 }, { "epoch": 0.2640763063959272, "grad_norm": 1.765625, "learning_rate": 0.0001946462782352301, "loss": 3.4788, "step": 5641 }, { "epoch": 0.26412312013575984, "grad_norm": 1.71875, "learning_rate": 0.00019464439370824195, "loss": 3.43, "step": 5642 }, { "epoch": 0.2641699338755925, "grad_norm": 1.6015625, "learning_rate": 0.00019464250885875846, "loss": 3.6841, "step": 5643 }, { "epoch": 0.26421674761542513, "grad_norm": 1.2578125, "learning_rate": 0.00019464062368678605, "loss": 3.2423, "step": 5644 }, { "epoch": 0.26426356135525775, "grad_norm": 1.2109375, "learning_rate": 0.00019463873819233125, "loss": 3.2728, "step": 5645 }, { "epoch": 0.2643103750950904, "grad_norm": 1.6015625, "learning_rate": 0.0001946368523754004, "loss": 3.7685, "step": 5646 }, { "epoch": 0.26435718883492304, "grad_norm": 1.2421875, "learning_rate": 0.00019463496623599994, "loss": 3.7296, "step": 5647 }, { "epoch": 0.2644040025747557, "grad_norm": 1.34375, "learning_rate": 0.0001946330797741363, "loss": 2.9767, "step": 5648 }, { "epoch": 0.26445081631458833, "grad_norm": 1.34375, "learning_rate": 0.0001946311929898159, "loss": 3.5552, "step": 5649 }, { "epoch": 0.26449763005442095, "grad_norm": 1.4921875, "learning_rate": 0.0001946293058830452, "loss": 3.574, "step": 5650 }, { "epoch": 0.2645444437942536, "grad_norm": 1.25, "learning_rate": 0.00019462741845383062, "loss": 3.6042, "step": 5651 }, { "epoch": 0.26459125753408624, "grad_norm": 1.09375, "learning_rate": 0.00019462553070217857, "loss": 3.4196, "step": 5652 }, { "epoch": 0.2646380712739189, "grad_norm": 1.2578125, "learning_rate": 0.00019462364262809548, "loss": 3.4423, "step": 5653 }, { "epoch": 0.26468488501375154, "grad_norm": 1.2578125, "learning_rate": 0.0001946217542315878, "loss": 3.1648, "step": 5654 }, { "epoch": 0.26473169875358415, "grad_norm": 1.296875, "learning_rate": 0.00019461986551266196, "loss": 3.3866, "step": 5655 }, { "epoch": 0.26477851249341683, "grad_norm": 1.4921875, "learning_rate": 0.00019461797647132444, "loss": 3.4091, "step": 5656 }, { "epoch": 0.26482532623324945, "grad_norm": 1.5546875, "learning_rate": 0.00019461608710758159, "loss": 3.3513, "step": 5657 }, { "epoch": 0.2648721399730821, "grad_norm": 1.0703125, "learning_rate": 0.0001946141974214399, "loss": 3.2393, "step": 5658 }, { "epoch": 0.26491895371291474, "grad_norm": 1.609375, "learning_rate": 0.0001946123074129058, "loss": 3.1846, "step": 5659 }, { "epoch": 0.26496576745274736, "grad_norm": 0.98046875, "learning_rate": 0.00019461041708198574, "loss": 3.01, "step": 5660 }, { "epoch": 0.26501258119258003, "grad_norm": 1.390625, "learning_rate": 0.00019460852642868616, "loss": 3.6411, "step": 5661 }, { "epoch": 0.26505939493241265, "grad_norm": 1.546875, "learning_rate": 0.00019460663545301345, "loss": 3.318, "step": 5662 }, { "epoch": 0.2651062086722453, "grad_norm": 1.1796875, "learning_rate": 0.00019460474415497413, "loss": 3.6154, "step": 5663 }, { "epoch": 0.26515302241207794, "grad_norm": 1.328125, "learning_rate": 0.0001946028525345746, "loss": 3.0301, "step": 5664 }, { "epoch": 0.26519983615191056, "grad_norm": 1.6328125, "learning_rate": 0.00019460096059182135, "loss": 3.6325, "step": 5665 }, { "epoch": 0.26524664989174324, "grad_norm": 1.6171875, "learning_rate": 0.0001945990683267208, "loss": 3.7203, "step": 5666 }, { "epoch": 0.26529346363157585, "grad_norm": 1.578125, "learning_rate": 0.00019459717573927935, "loss": 3.2763, "step": 5667 }, { "epoch": 0.26534027737140853, "grad_norm": 1.1171875, "learning_rate": 0.0001945952828295035, "loss": 3.4206, "step": 5668 }, { "epoch": 0.26538709111124115, "grad_norm": 1.3046875, "learning_rate": 0.0001945933895973997, "loss": 3.6152, "step": 5669 }, { "epoch": 0.26543390485107377, "grad_norm": 2.09375, "learning_rate": 0.0001945914960429744, "loss": 3.0487, "step": 5670 }, { "epoch": 0.26548071859090644, "grad_norm": 1.7734375, "learning_rate": 0.00019458960216623406, "loss": 3.324, "step": 5671 }, { "epoch": 0.26552753233073906, "grad_norm": 1.328125, "learning_rate": 0.0001945877079671851, "loss": 3.4128, "step": 5672 }, { "epoch": 0.26557434607057173, "grad_norm": 1.25, "learning_rate": 0.000194585813445834, "loss": 3.3964, "step": 5673 }, { "epoch": 0.26562115981040435, "grad_norm": 2.03125, "learning_rate": 0.00019458391860218717, "loss": 3.3896, "step": 5674 }, { "epoch": 0.26566797355023697, "grad_norm": 4.1875, "learning_rate": 0.00019458202343625115, "loss": 3.3319, "step": 5675 }, { "epoch": 0.26571478729006964, "grad_norm": 1.4765625, "learning_rate": 0.00019458012794803232, "loss": 3.1693, "step": 5676 }, { "epoch": 0.26576160102990226, "grad_norm": 1.671875, "learning_rate": 0.00019457823213753719, "loss": 3.3054, "step": 5677 }, { "epoch": 0.26580841476973494, "grad_norm": 1.3046875, "learning_rate": 0.0001945763360047722, "loss": 2.9034, "step": 5678 }, { "epoch": 0.26585522850956755, "grad_norm": 1.6015625, "learning_rate": 0.00019457443954974378, "loss": 3.7901, "step": 5679 }, { "epoch": 0.2659020422494002, "grad_norm": 1.7421875, "learning_rate": 0.00019457254277245847, "loss": 3.4708, "step": 5680 }, { "epoch": 0.26594885598923285, "grad_norm": 1.4609375, "learning_rate": 0.00019457064567292268, "loss": 3.5172, "step": 5681 }, { "epoch": 0.26599566972906546, "grad_norm": 1.3671875, "learning_rate": 0.00019456874825114287, "loss": 3.711, "step": 5682 }, { "epoch": 0.26604248346889814, "grad_norm": 1.2421875, "learning_rate": 0.0001945668505071255, "loss": 3.2972, "step": 5683 }, { "epoch": 0.26608929720873076, "grad_norm": 1.46875, "learning_rate": 0.00019456495244087708, "loss": 3.4303, "step": 5684 }, { "epoch": 0.2661361109485634, "grad_norm": 0.94921875, "learning_rate": 0.00019456305405240407, "loss": 3.3079, "step": 5685 }, { "epoch": 0.26618292468839605, "grad_norm": 1.171875, "learning_rate": 0.00019456115534171285, "loss": 3.4758, "step": 5686 }, { "epoch": 0.26622973842822867, "grad_norm": 1.28125, "learning_rate": 0.00019455925630881002, "loss": 2.8799, "step": 5687 }, { "epoch": 0.26627655216806134, "grad_norm": 1.3515625, "learning_rate": 0.000194557356953702, "loss": 3.1599, "step": 5688 }, { "epoch": 0.26632336590789396, "grad_norm": 1.390625, "learning_rate": 0.0001945554572763952, "loss": 3.6982, "step": 5689 }, { "epoch": 0.26637017964772663, "grad_norm": 1.7421875, "learning_rate": 0.00019455355727689616, "loss": 3.2314, "step": 5690 }, { "epoch": 0.26641699338755925, "grad_norm": 1.453125, "learning_rate": 0.00019455165695521137, "loss": 3.5746, "step": 5691 }, { "epoch": 0.26646380712739187, "grad_norm": 0.9765625, "learning_rate": 0.00019454975631134723, "loss": 3.5683, "step": 5692 }, { "epoch": 0.26651062086722455, "grad_norm": 1.4375, "learning_rate": 0.00019454785534531028, "loss": 3.3404, "step": 5693 }, { "epoch": 0.26655743460705716, "grad_norm": 1.640625, "learning_rate": 0.00019454595405710699, "loss": 3.5649, "step": 5694 }, { "epoch": 0.26660424834688984, "grad_norm": 2.015625, "learning_rate": 0.0001945440524467438, "loss": 3.1982, "step": 5695 }, { "epoch": 0.26665106208672246, "grad_norm": 1.1328125, "learning_rate": 0.00019454215051422724, "loss": 3.4737, "step": 5696 }, { "epoch": 0.2666978758265551, "grad_norm": 1.3984375, "learning_rate": 0.00019454024825956375, "loss": 3.426, "step": 5697 }, { "epoch": 0.26674468956638775, "grad_norm": 1.2109375, "learning_rate": 0.00019453834568275986, "loss": 3.2296, "step": 5698 }, { "epoch": 0.26679150330622037, "grad_norm": 1.546875, "learning_rate": 0.00019453644278382196, "loss": 3.4715, "step": 5699 }, { "epoch": 0.26683831704605304, "grad_norm": 1.8125, "learning_rate": 0.00019453453956275665, "loss": 3.5825, "step": 5700 }, { "epoch": 0.26688513078588566, "grad_norm": 1.4375, "learning_rate": 0.00019453263601957037, "loss": 3.6376, "step": 5701 }, { "epoch": 0.2669319445257183, "grad_norm": 1.203125, "learning_rate": 0.00019453073215426955, "loss": 3.783, "step": 5702 }, { "epoch": 0.26697875826555095, "grad_norm": 1.4296875, "learning_rate": 0.00019452882796686075, "loss": 3.0596, "step": 5703 }, { "epoch": 0.26702557200538357, "grad_norm": 1.2890625, "learning_rate": 0.00019452692345735045, "loss": 3.5301, "step": 5704 }, { "epoch": 0.26707238574521625, "grad_norm": 1.140625, "learning_rate": 0.0001945250186257451, "loss": 3.1939, "step": 5705 }, { "epoch": 0.26711919948504886, "grad_norm": 1.109375, "learning_rate": 0.0001945231134720512, "loss": 3.3612, "step": 5706 }, { "epoch": 0.2671660132248815, "grad_norm": 1.8203125, "learning_rate": 0.00019452120799627528, "loss": 3.2206, "step": 5707 }, { "epoch": 0.26721282696471416, "grad_norm": 1.1796875, "learning_rate": 0.00019451930219842377, "loss": 3.2217, "step": 5708 }, { "epoch": 0.2672596407045468, "grad_norm": 1.640625, "learning_rate": 0.00019451739607850326, "loss": 3.8473, "step": 5709 }, { "epoch": 0.26730645444437945, "grad_norm": 1.5625, "learning_rate": 0.00019451548963652013, "loss": 3.5877, "step": 5710 }, { "epoch": 0.26735326818421207, "grad_norm": 2.59375, "learning_rate": 0.000194513582872481, "loss": 3.951, "step": 5711 }, { "epoch": 0.2674000819240447, "grad_norm": 1.484375, "learning_rate": 0.00019451167578639223, "loss": 3.3653, "step": 5712 }, { "epoch": 0.26744689566387736, "grad_norm": 2.203125, "learning_rate": 0.0001945097683782604, "loss": 3.3681, "step": 5713 }, { "epoch": 0.26749370940371, "grad_norm": 1.3125, "learning_rate": 0.00019450786064809203, "loss": 3.3555, "step": 5714 }, { "epoch": 0.26754052314354265, "grad_norm": 1.6484375, "learning_rate": 0.00019450595259589356, "loss": 3.4634, "step": 5715 }, { "epoch": 0.26758733688337527, "grad_norm": 1.609375, "learning_rate": 0.0001945040442216715, "loss": 3.6427, "step": 5716 }, { "epoch": 0.2676341506232079, "grad_norm": 1.4375, "learning_rate": 0.00019450213552543244, "loss": 3.6127, "step": 5717 }, { "epoch": 0.26768096436304056, "grad_norm": 1.2734375, "learning_rate": 0.00019450022650718276, "loss": 3.3397, "step": 5718 }, { "epoch": 0.2677277781028732, "grad_norm": 1.46875, "learning_rate": 0.00019449831716692904, "loss": 3.3262, "step": 5719 }, { "epoch": 0.26777459184270586, "grad_norm": 1.671875, "learning_rate": 0.00019449640750467774, "loss": 3.6905, "step": 5720 }, { "epoch": 0.2678214055825385, "grad_norm": 1.0, "learning_rate": 0.00019449449752043544, "loss": 3.4564, "step": 5721 }, { "epoch": 0.2678682193223711, "grad_norm": 1.75, "learning_rate": 0.00019449258721420857, "loss": 3.3407, "step": 5722 }, { "epoch": 0.26791503306220377, "grad_norm": 1.4453125, "learning_rate": 0.00019449067658600368, "loss": 3.7935, "step": 5723 }, { "epoch": 0.2679618468020364, "grad_norm": 1.4609375, "learning_rate": 0.00019448876563582727, "loss": 3.631, "step": 5724 }, { "epoch": 0.26800866054186906, "grad_norm": 1.375, "learning_rate": 0.00019448685436368584, "loss": 3.4085, "step": 5725 }, { "epoch": 0.2680554742817017, "grad_norm": 1.296875, "learning_rate": 0.00019448494276958593, "loss": 3.5113, "step": 5726 }, { "epoch": 0.2681022880215343, "grad_norm": 2.171875, "learning_rate": 0.00019448303085353404, "loss": 5.8549, "step": 5727 }, { "epoch": 0.26814910176136697, "grad_norm": 1.984375, "learning_rate": 0.00019448111861553667, "loss": 3.5283, "step": 5728 }, { "epoch": 0.2681959155011996, "grad_norm": 1.1484375, "learning_rate": 0.0001944792060556004, "loss": 3.4246, "step": 5729 }, { "epoch": 0.26824272924103226, "grad_norm": 1.5546875, "learning_rate": 0.00019447729317373166, "loss": 3.471, "step": 5730 }, { "epoch": 0.2682895429808649, "grad_norm": 1.3125, "learning_rate": 0.000194475379969937, "loss": 3.6037, "step": 5731 }, { "epoch": 0.2683363567206975, "grad_norm": 1.4765625, "learning_rate": 0.00019447346644422292, "loss": 3.376, "step": 5732 }, { "epoch": 0.2683831704605302, "grad_norm": 1.5703125, "learning_rate": 0.000194471552596596, "loss": 3.343, "step": 5733 }, { "epoch": 0.2684299842003628, "grad_norm": 1.796875, "learning_rate": 0.0001944696384270627, "loss": 3.4713, "step": 5734 }, { "epoch": 0.26847679794019547, "grad_norm": 1.375, "learning_rate": 0.0001944677239356296, "loss": 3.7336, "step": 5735 }, { "epoch": 0.2685236116800281, "grad_norm": 1.296875, "learning_rate": 0.00019446580912230318, "loss": 3.2383, "step": 5736 }, { "epoch": 0.2685704254198607, "grad_norm": 0.9609375, "learning_rate": 0.00019446389398708995, "loss": 3.2873, "step": 5737 }, { "epoch": 0.2686172391596934, "grad_norm": 1.0703125, "learning_rate": 0.00019446197852999647, "loss": 3.4193, "step": 5738 }, { "epoch": 0.268664052899526, "grad_norm": 1.375, "learning_rate": 0.00019446006275102929, "loss": 3.3311, "step": 5739 }, { "epoch": 0.26871086663935867, "grad_norm": 1.375, "learning_rate": 0.0001944581466501949, "loss": 2.5725, "step": 5740 }, { "epoch": 0.2687576803791913, "grad_norm": 1.3046875, "learning_rate": 0.0001944562302274998, "loss": 3.719, "step": 5741 }, { "epoch": 0.2688044941190239, "grad_norm": 1.09375, "learning_rate": 0.00019445431348295058, "loss": 3.2814, "step": 5742 }, { "epoch": 0.2688513078588566, "grad_norm": 1.234375, "learning_rate": 0.00019445239641655376, "loss": 3.6428, "step": 5743 }, { "epoch": 0.2688981215986892, "grad_norm": 1.1484375, "learning_rate": 0.00019445047902831583, "loss": 3.5205, "step": 5744 }, { "epoch": 0.2689449353385219, "grad_norm": 1.03125, "learning_rate": 0.00019444856131824336, "loss": 3.1248, "step": 5745 }, { "epoch": 0.2689917490783545, "grad_norm": 1.1328125, "learning_rate": 0.00019444664328634287, "loss": 2.9628, "step": 5746 }, { "epoch": 0.2690385628181871, "grad_norm": 1.5546875, "learning_rate": 0.0001944447249326209, "loss": 3.361, "step": 5747 }, { "epoch": 0.2690853765580198, "grad_norm": 1.40625, "learning_rate": 0.000194442806257084, "loss": 3.3297, "step": 5748 }, { "epoch": 0.2691321902978524, "grad_norm": 1.4453125, "learning_rate": 0.0001944408872597387, "loss": 3.2603, "step": 5749 }, { "epoch": 0.2691790040376851, "grad_norm": 1.8203125, "learning_rate": 0.00019443896794059157, "loss": 3.4425, "step": 5750 }, { "epoch": 0.2692258177775177, "grad_norm": 1.6171875, "learning_rate": 0.00019443704829964905, "loss": 3.8389, "step": 5751 }, { "epoch": 0.26927263151735037, "grad_norm": 1.0703125, "learning_rate": 0.00019443512833691777, "loss": 3.524, "step": 5752 }, { "epoch": 0.269319445257183, "grad_norm": 1.28125, "learning_rate": 0.00019443320805240428, "loss": 3.2406, "step": 5753 }, { "epoch": 0.2693662589970156, "grad_norm": 1.34375, "learning_rate": 0.00019443128744611504, "loss": 3.2923, "step": 5754 }, { "epoch": 0.2694130727368483, "grad_norm": 1.1640625, "learning_rate": 0.0001944293665180567, "loss": 3.1848, "step": 5755 }, { "epoch": 0.2694598864766809, "grad_norm": 1.40625, "learning_rate": 0.00019442744526823572, "loss": 3.4599, "step": 5756 }, { "epoch": 0.2695067002165136, "grad_norm": 1.2421875, "learning_rate": 0.00019442552369665868, "loss": 3.2129, "step": 5757 }, { "epoch": 0.2695535139563462, "grad_norm": 1.40625, "learning_rate": 0.00019442360180333212, "loss": 3.736, "step": 5758 }, { "epoch": 0.2696003276961788, "grad_norm": 2.046875, "learning_rate": 0.0001944216795882626, "loss": 4.1134, "step": 5759 }, { "epoch": 0.2696471414360115, "grad_norm": 1.2265625, "learning_rate": 0.00019441975705145666, "loss": 2.5478, "step": 5760 }, { "epoch": 0.2696939551758441, "grad_norm": 1.5703125, "learning_rate": 0.0001944178341929209, "loss": 3.3377, "step": 5761 }, { "epoch": 0.2697407689156768, "grad_norm": 1.359375, "learning_rate": 0.00019441591101266177, "loss": 3.2958, "step": 5762 }, { "epoch": 0.2697875826555094, "grad_norm": 1.3359375, "learning_rate": 0.0001944139875106859, "loss": 3.3738, "step": 5763 }, { "epoch": 0.269834396395342, "grad_norm": 1.1796875, "learning_rate": 0.00019441206368699986, "loss": 3.0273, "step": 5764 }, { "epoch": 0.2698812101351747, "grad_norm": 1.1796875, "learning_rate": 0.00019441013954161012, "loss": 3.2073, "step": 5765 }, { "epoch": 0.2699280238750073, "grad_norm": 1.6484375, "learning_rate": 0.00019440821507452332, "loss": 3.7727, "step": 5766 }, { "epoch": 0.26997483761484, "grad_norm": 1.1171875, "learning_rate": 0.00019440629028574596, "loss": 3.4647, "step": 5767 }, { "epoch": 0.2700216513546726, "grad_norm": 1.5546875, "learning_rate": 0.00019440436517528461, "loss": 3.387, "step": 5768 }, { "epoch": 0.2700684650945052, "grad_norm": 1.65625, "learning_rate": 0.00019440243974314588, "loss": 3.0975, "step": 5769 }, { "epoch": 0.2701152788343379, "grad_norm": 1.6015625, "learning_rate": 0.00019440051398933626, "loss": 5.4759, "step": 5770 }, { "epoch": 0.2701620925741705, "grad_norm": 1.140625, "learning_rate": 0.00019439858791386237, "loss": 3.0342, "step": 5771 }, { "epoch": 0.2702089063140032, "grad_norm": 1.515625, "learning_rate": 0.00019439666151673074, "loss": 3.4529, "step": 5772 }, { "epoch": 0.2702557200538358, "grad_norm": 1.3671875, "learning_rate": 0.00019439473479794796, "loss": 3.4869, "step": 5773 }, { "epoch": 0.2703025337936684, "grad_norm": 1.3671875, "learning_rate": 0.00019439280775752056, "loss": 3.1892, "step": 5774 }, { "epoch": 0.2703493475335011, "grad_norm": 1.125, "learning_rate": 0.0001943908803954551, "loss": 3.3451, "step": 5775 }, { "epoch": 0.2703961612733337, "grad_norm": 1.328125, "learning_rate": 0.0001943889527117582, "loss": 3.1195, "step": 5776 }, { "epoch": 0.2704429750131664, "grad_norm": 1.1875, "learning_rate": 0.0001943870247064364, "loss": 3.4323, "step": 5777 }, { "epoch": 0.270489788752999, "grad_norm": 1.546875, "learning_rate": 0.00019438509637949628, "loss": 3.0748, "step": 5778 }, { "epoch": 0.2705366024928316, "grad_norm": 1.34375, "learning_rate": 0.00019438316773094435, "loss": 2.8936, "step": 5779 }, { "epoch": 0.2705834162326643, "grad_norm": 1.203125, "learning_rate": 0.0001943812387607873, "loss": 3.8714, "step": 5780 }, { "epoch": 0.2706302299724969, "grad_norm": 1.109375, "learning_rate": 0.0001943793094690316, "loss": 3.0753, "step": 5781 }, { "epoch": 0.2706770437123296, "grad_norm": 1.578125, "learning_rate": 0.00019437737985568385, "loss": 3.2967, "step": 5782 }, { "epoch": 0.2707238574521622, "grad_norm": 1.359375, "learning_rate": 0.00019437544992075063, "loss": 3.7843, "step": 5783 }, { "epoch": 0.2707706711919948, "grad_norm": 1.3671875, "learning_rate": 0.00019437351966423852, "loss": 3.5549, "step": 5784 }, { "epoch": 0.2708174849318275, "grad_norm": 1.5234375, "learning_rate": 0.0001943715890861541, "loss": 3.5363, "step": 5785 }, { "epoch": 0.2708642986716601, "grad_norm": 1.4765625, "learning_rate": 0.00019436965818650398, "loss": 3.3412, "step": 5786 }, { "epoch": 0.2709111124114928, "grad_norm": 1.359375, "learning_rate": 0.0001943677269652947, "loss": 3.2013, "step": 5787 }, { "epoch": 0.2709579261513254, "grad_norm": 1.59375, "learning_rate": 0.00019436579542253283, "loss": 3.3221, "step": 5788 }, { "epoch": 0.27100473989115803, "grad_norm": 1.25, "learning_rate": 0.00019436386355822492, "loss": 3.2491, "step": 5789 }, { "epoch": 0.2710515536309907, "grad_norm": 1.6640625, "learning_rate": 0.00019436193137237765, "loss": 3.2976, "step": 5790 }, { "epoch": 0.2710983673708233, "grad_norm": 1.421875, "learning_rate": 0.00019435999886499756, "loss": 3.7143, "step": 5791 }, { "epoch": 0.271145181110656, "grad_norm": 1.1328125, "learning_rate": 0.0001943580660360912, "loss": 2.9515, "step": 5792 }, { "epoch": 0.2711919948504886, "grad_norm": 1.21875, "learning_rate": 0.0001943561328856652, "loss": 3.3891, "step": 5793 }, { "epoch": 0.27123880859032123, "grad_norm": 1.1875, "learning_rate": 0.00019435419941372614, "loss": 3.4813, "step": 5794 }, { "epoch": 0.2712856223301539, "grad_norm": 1.3515625, "learning_rate": 0.00019435226562028058, "loss": 3.4319, "step": 5795 }, { "epoch": 0.2713324360699865, "grad_norm": 1.4609375, "learning_rate": 0.00019435033150533513, "loss": 3.6205, "step": 5796 }, { "epoch": 0.2713792498098192, "grad_norm": 2.359375, "learning_rate": 0.00019434839706889635, "loss": 3.9879, "step": 5797 }, { "epoch": 0.2714260635496518, "grad_norm": 1.359375, "learning_rate": 0.00019434646231097092, "loss": 3.2059, "step": 5798 }, { "epoch": 0.27147287728948444, "grad_norm": 1.3671875, "learning_rate": 0.00019434452723156534, "loss": 3.8592, "step": 5799 }, { "epoch": 0.2715196910293171, "grad_norm": 1.2265625, "learning_rate": 0.00019434259183068623, "loss": 3.5444, "step": 5800 }, { "epoch": 0.27156650476914973, "grad_norm": 1.109375, "learning_rate": 0.0001943406561083402, "loss": 3.314, "step": 5801 }, { "epoch": 0.2716133185089824, "grad_norm": 1.5625, "learning_rate": 0.00019433872006453386, "loss": 3.4115, "step": 5802 }, { "epoch": 0.271660132248815, "grad_norm": 1.21875, "learning_rate": 0.00019433678369927377, "loss": 3.8937, "step": 5803 }, { "epoch": 0.27170694598864764, "grad_norm": 1.8359375, "learning_rate": 0.00019433484701256656, "loss": 3.4721, "step": 5804 }, { "epoch": 0.2717537597284803, "grad_norm": 1.3984375, "learning_rate": 0.00019433291000441877, "loss": 3.5707, "step": 5805 }, { "epoch": 0.27180057346831293, "grad_norm": 1.0859375, "learning_rate": 0.00019433097267483707, "loss": 3.1551, "step": 5806 }, { "epoch": 0.2718473872081456, "grad_norm": 1.171875, "learning_rate": 0.00019432903502382802, "loss": 3.4982, "step": 5807 }, { "epoch": 0.2718942009479782, "grad_norm": 5.625, "learning_rate": 0.00019432709705139825, "loss": 3.1144, "step": 5808 }, { "epoch": 0.27194101468781084, "grad_norm": 1.8359375, "learning_rate": 0.00019432515875755432, "loss": 3.5723, "step": 5809 }, { "epoch": 0.2719878284276435, "grad_norm": 1.84375, "learning_rate": 0.00019432322014230292, "loss": 2.9959, "step": 5810 }, { "epoch": 0.27203464216747614, "grad_norm": 1.0390625, "learning_rate": 0.00019432128120565055, "loss": 3.4407, "step": 5811 }, { "epoch": 0.2720814559073088, "grad_norm": 1.4453125, "learning_rate": 0.0001943193419476039, "loss": 3.5964, "step": 5812 }, { "epoch": 0.27212826964714143, "grad_norm": 1.28125, "learning_rate": 0.0001943174023681695, "loss": 3.3814, "step": 5813 }, { "epoch": 0.2721750833869741, "grad_norm": 1.4296875, "learning_rate": 0.00019431546246735404, "loss": 3.4331, "step": 5814 }, { "epoch": 0.2722218971268067, "grad_norm": 1.9296875, "learning_rate": 0.00019431352224516405, "loss": 3.3505, "step": 5815 }, { "epoch": 0.27226871086663934, "grad_norm": 1.390625, "learning_rate": 0.00019431158170160623, "loss": 3.392, "step": 5816 }, { "epoch": 0.272315524606472, "grad_norm": 1.34375, "learning_rate": 0.00019430964083668711, "loss": 3.3816, "step": 5817 }, { "epoch": 0.27236233834630463, "grad_norm": 1.625, "learning_rate": 0.0001943076996504134, "loss": 3.523, "step": 5818 }, { "epoch": 0.2724091520861373, "grad_norm": 1.515625, "learning_rate": 0.0001943057581427916, "loss": 3.4764, "step": 5819 }, { "epoch": 0.2724559658259699, "grad_norm": 1.1875, "learning_rate": 0.00019430381631382838, "loss": 3.583, "step": 5820 }, { "epoch": 0.27250277956580254, "grad_norm": 1.3671875, "learning_rate": 0.00019430187416353038, "loss": 3.7781, "step": 5821 }, { "epoch": 0.2725495933056352, "grad_norm": 1.015625, "learning_rate": 0.0001942999316919042, "loss": 4.7601, "step": 5822 }, { "epoch": 0.27259640704546784, "grad_norm": 1.140625, "learning_rate": 0.00019429798889895644, "loss": 2.7211, "step": 5823 }, { "epoch": 0.2726432207853005, "grad_norm": 1.1875, "learning_rate": 0.00019429604578469377, "loss": 3.2228, "step": 5824 }, { "epoch": 0.27269003452513313, "grad_norm": 1.2734375, "learning_rate": 0.00019429410234912271, "loss": 3.6127, "step": 5825 }, { "epoch": 0.27273684826496575, "grad_norm": 1.3671875, "learning_rate": 0.00019429215859224997, "loss": 3.588, "step": 5826 }, { "epoch": 0.2727836620047984, "grad_norm": 1.203125, "learning_rate": 0.00019429021451408218, "loss": 3.4701, "step": 5827 }, { "epoch": 0.27283047574463104, "grad_norm": 1.3984375, "learning_rate": 0.0001942882701146259, "loss": 3.2736, "step": 5828 }, { "epoch": 0.2728772894844637, "grad_norm": 1.5234375, "learning_rate": 0.0001942863253938878, "loss": 3.4742, "step": 5829 }, { "epoch": 0.27292410322429633, "grad_norm": 1.46875, "learning_rate": 0.0001942843803518745, "loss": 3.439, "step": 5830 }, { "epoch": 0.27297091696412895, "grad_norm": 1.2265625, "learning_rate": 0.00019428243498859264, "loss": 3.3155, "step": 5831 }, { "epoch": 0.2730177307039616, "grad_norm": 1.5, "learning_rate": 0.00019428048930404882, "loss": 3.8226, "step": 5832 }, { "epoch": 0.27306454444379424, "grad_norm": 1.453125, "learning_rate": 0.00019427854329824966, "loss": 3.4341, "step": 5833 }, { "epoch": 0.2731113581836269, "grad_norm": 1.3203125, "learning_rate": 0.00019427659697120184, "loss": 5.4385, "step": 5834 }, { "epoch": 0.27315817192345954, "grad_norm": 1.453125, "learning_rate": 0.00019427465032291195, "loss": 3.42, "step": 5835 }, { "epoch": 0.27320498566329215, "grad_norm": 1.4375, "learning_rate": 0.00019427270335338665, "loss": 3.1138, "step": 5836 }, { "epoch": 0.27325179940312483, "grad_norm": 1.4296875, "learning_rate": 0.00019427075606263255, "loss": 2.8818, "step": 5837 }, { "epoch": 0.27329861314295745, "grad_norm": 1.2109375, "learning_rate": 0.00019426880845065632, "loss": 3.6339, "step": 5838 }, { "epoch": 0.2733454268827901, "grad_norm": 1.5703125, "learning_rate": 0.00019426686051746454, "loss": 3.7389, "step": 5839 }, { "epoch": 0.27339224062262274, "grad_norm": 1.28125, "learning_rate": 0.0001942649122630639, "loss": 3.9225, "step": 5840 }, { "epoch": 0.27343905436245536, "grad_norm": 1.2578125, "learning_rate": 0.00019426296368746103, "loss": 3.3326, "step": 5841 }, { "epoch": 0.27348586810228803, "grad_norm": 1.15625, "learning_rate": 0.00019426101479066255, "loss": 3.1166, "step": 5842 }, { "epoch": 0.27353268184212065, "grad_norm": 1.5234375, "learning_rate": 0.00019425906557267507, "loss": 3.4026, "step": 5843 }, { "epoch": 0.2735794955819533, "grad_norm": 1.53125, "learning_rate": 0.00019425711603350535, "loss": 3.4052, "step": 5844 }, { "epoch": 0.27362630932178594, "grad_norm": 1.3046875, "learning_rate": 0.0001942551661731599, "loss": 3.1686, "step": 5845 }, { "epoch": 0.27367312306161856, "grad_norm": 1.640625, "learning_rate": 0.00019425321599164544, "loss": 3.3686, "step": 5846 }, { "epoch": 0.27371993680145124, "grad_norm": 1.484375, "learning_rate": 0.00019425126548896859, "loss": 3.1372, "step": 5847 }, { "epoch": 0.27376675054128385, "grad_norm": 1.1953125, "learning_rate": 0.00019424931466513603, "loss": 3.2899, "step": 5848 }, { "epoch": 0.27381356428111653, "grad_norm": 1.2421875, "learning_rate": 0.00019424736352015434, "loss": 3.4078, "step": 5849 }, { "epoch": 0.27386037802094915, "grad_norm": 1.34375, "learning_rate": 0.00019424541205403023, "loss": 3.8924, "step": 5850 }, { "epoch": 0.27390719176078177, "grad_norm": 1.2890625, "learning_rate": 0.00019424346026677028, "loss": 3.5693, "step": 5851 }, { "epoch": 0.27395400550061444, "grad_norm": 1.0859375, "learning_rate": 0.00019424150815838123, "loss": 3.0726, "step": 5852 }, { "epoch": 0.27400081924044706, "grad_norm": 1.6796875, "learning_rate": 0.00019423955572886968, "loss": 3.3668, "step": 5853 }, { "epoch": 0.27404763298027973, "grad_norm": 2.84375, "learning_rate": 0.0001942376029782423, "loss": 2.9644, "step": 5854 }, { "epoch": 0.27409444672011235, "grad_norm": 1.1484375, "learning_rate": 0.00019423564990650571, "loss": 3.318, "step": 5855 }, { "epoch": 0.27414126045994497, "grad_norm": 1.15625, "learning_rate": 0.00019423369651366663, "loss": 3.0504, "step": 5856 }, { "epoch": 0.27418807419977764, "grad_norm": 1.5390625, "learning_rate": 0.00019423174279973164, "loss": 3.3231, "step": 5857 }, { "epoch": 0.27423488793961026, "grad_norm": 1.546875, "learning_rate": 0.00019422978876470747, "loss": 3.539, "step": 5858 }, { "epoch": 0.27428170167944294, "grad_norm": 1.2578125, "learning_rate": 0.0001942278344086007, "loss": 3.0804, "step": 5859 }, { "epoch": 0.27432851541927555, "grad_norm": 1.40625, "learning_rate": 0.00019422587973141805, "loss": 3.5108, "step": 5860 }, { "epoch": 0.27437532915910817, "grad_norm": 1.2109375, "learning_rate": 0.00019422392473316614, "loss": 3.414, "step": 5861 }, { "epoch": 0.27442214289894085, "grad_norm": 2.203125, "learning_rate": 0.00019422196941385167, "loss": 3.4468, "step": 5862 }, { "epoch": 0.27446895663877346, "grad_norm": 1.15625, "learning_rate": 0.00019422001377348132, "loss": 3.4002, "step": 5863 }, { "epoch": 0.27451577037860614, "grad_norm": 1.1171875, "learning_rate": 0.0001942180578120617, "loss": 3.1109, "step": 5864 }, { "epoch": 0.27456258411843876, "grad_norm": 1.34375, "learning_rate": 0.00019421610152959944, "loss": 3.5926, "step": 5865 }, { "epoch": 0.2746093978582714, "grad_norm": 1.4140625, "learning_rate": 0.0001942141449261013, "loss": 3.5006, "step": 5866 }, { "epoch": 0.27465621159810405, "grad_norm": 1.5390625, "learning_rate": 0.00019421218800157392, "loss": 3.6247, "step": 5867 }, { "epoch": 0.27470302533793667, "grad_norm": 1.390625, "learning_rate": 0.00019421023075602393, "loss": 3.5243, "step": 5868 }, { "epoch": 0.27474983907776934, "grad_norm": 1.328125, "learning_rate": 0.00019420827318945806, "loss": 3.5876, "step": 5869 }, { "epoch": 0.27479665281760196, "grad_norm": 1.28125, "learning_rate": 0.0001942063153018829, "loss": 3.4384, "step": 5870 }, { "epoch": 0.27484346655743463, "grad_norm": 1.546875, "learning_rate": 0.0001942043570933052, "loss": 3.0203, "step": 5871 }, { "epoch": 0.27489028029726725, "grad_norm": 1.6015625, "learning_rate": 0.00019420239856373156, "loss": 3.2717, "step": 5872 }, { "epoch": 0.27493709403709987, "grad_norm": 1.34375, "learning_rate": 0.00019420043971316874, "loss": 3.3275, "step": 5873 }, { "epoch": 0.27498390777693255, "grad_norm": 2.453125, "learning_rate": 0.00019419848054162334, "loss": 2.3115, "step": 5874 }, { "epoch": 0.27503072151676516, "grad_norm": 1.6015625, "learning_rate": 0.00019419652104910205, "loss": 3.5402, "step": 5875 }, { "epoch": 0.27507753525659784, "grad_norm": 1.6640625, "learning_rate": 0.0001941945612356116, "loss": 3.6111, "step": 5876 }, { "epoch": 0.27512434899643046, "grad_norm": 1.0859375, "learning_rate": 0.0001941926011011586, "loss": 3.2702, "step": 5877 }, { "epoch": 0.2751711627362631, "grad_norm": 1.375, "learning_rate": 0.00019419064064574976, "loss": 3.2985, "step": 5878 }, { "epoch": 0.27521797647609575, "grad_norm": 1.1640625, "learning_rate": 0.00019418867986939175, "loss": 3.2528, "step": 5879 }, { "epoch": 0.27526479021592837, "grad_norm": 1.6015625, "learning_rate": 0.00019418671877209127, "loss": 3.827, "step": 5880 }, { "epoch": 0.27531160395576104, "grad_norm": 1.6015625, "learning_rate": 0.00019418475735385497, "loss": 3.3103, "step": 5881 }, { "epoch": 0.27535841769559366, "grad_norm": 1.4296875, "learning_rate": 0.0001941827956146896, "loss": 3.2416, "step": 5882 }, { "epoch": 0.2754052314354263, "grad_norm": 1.6015625, "learning_rate": 0.00019418083355460174, "loss": 3.5301, "step": 5883 }, { "epoch": 0.27545204517525895, "grad_norm": 1.2265625, "learning_rate": 0.00019417887117359817, "loss": 3.0912, "step": 5884 }, { "epoch": 0.27549885891509157, "grad_norm": 1.8125, "learning_rate": 0.00019417690847168553, "loss": 3.1456, "step": 5885 }, { "epoch": 0.27554567265492425, "grad_norm": 1.7109375, "learning_rate": 0.0001941749454488705, "loss": 3.4056, "step": 5886 }, { "epoch": 0.27559248639475686, "grad_norm": 1.8203125, "learning_rate": 0.0001941729821051598, "loss": 3.6811, "step": 5887 }, { "epoch": 0.2756393001345895, "grad_norm": 1.15625, "learning_rate": 0.00019417101844056015, "loss": 3.5361, "step": 5888 }, { "epoch": 0.27568611387442216, "grad_norm": 1.140625, "learning_rate": 0.00019416905445507815, "loss": 3.2614, "step": 5889 }, { "epoch": 0.2757329276142548, "grad_norm": 1.1953125, "learning_rate": 0.00019416709014872055, "loss": 3.2895, "step": 5890 }, { "epoch": 0.27577974135408745, "grad_norm": 1.203125, "learning_rate": 0.00019416512552149402, "loss": 3.2627, "step": 5891 }, { "epoch": 0.27582655509392007, "grad_norm": 1.625, "learning_rate": 0.00019416316057340528, "loss": 3.6269, "step": 5892 }, { "epoch": 0.2758733688337527, "grad_norm": 1.390625, "learning_rate": 0.000194161195304461, "loss": 3.3296, "step": 5893 }, { "epoch": 0.27592018257358536, "grad_norm": 1.46875, "learning_rate": 0.00019415922971466792, "loss": 3.4812, "step": 5894 }, { "epoch": 0.275966996313418, "grad_norm": 1.5078125, "learning_rate": 0.0001941572638040327, "loss": 3.604, "step": 5895 }, { "epoch": 0.27601381005325065, "grad_norm": 1.5, "learning_rate": 0.00019415529757256202, "loss": 3.6555, "step": 5896 }, { "epoch": 0.27606062379308327, "grad_norm": 1.34375, "learning_rate": 0.00019415333102026266, "loss": 3.3469, "step": 5897 }, { "epoch": 0.2761074375329159, "grad_norm": 1.0, "learning_rate": 0.00019415136414714122, "loss": 3.3583, "step": 5898 }, { "epoch": 0.27615425127274856, "grad_norm": 1.3125, "learning_rate": 0.00019414939695320445, "loss": 3.4018, "step": 5899 }, { "epoch": 0.2762010650125812, "grad_norm": 1.1953125, "learning_rate": 0.00019414742943845906, "loss": 3.2341, "step": 5900 }, { "epoch": 0.27624787875241386, "grad_norm": 1.3984375, "learning_rate": 0.00019414546160291174, "loss": 3.5472, "step": 5901 }, { "epoch": 0.2762946924922465, "grad_norm": 1.203125, "learning_rate": 0.00019414349344656922, "loss": 2.5759, "step": 5902 }, { "epoch": 0.2763415062320791, "grad_norm": 1.4609375, "learning_rate": 0.00019414152496943817, "loss": 3.9704, "step": 5903 }, { "epoch": 0.27638831997191177, "grad_norm": 1.046875, "learning_rate": 0.00019413955617152533, "loss": 3.5236, "step": 5904 }, { "epoch": 0.2764351337117444, "grad_norm": 1.78125, "learning_rate": 0.00019413758705283737, "loss": 3.413, "step": 5905 }, { "epoch": 0.27648194745157706, "grad_norm": 1.5078125, "learning_rate": 0.00019413561761338105, "loss": 3.2595, "step": 5906 }, { "epoch": 0.2765287611914097, "grad_norm": 1.1328125, "learning_rate": 0.00019413364785316305, "loss": 3.4694, "step": 5907 }, { "epoch": 0.2765755749312423, "grad_norm": 1.5078125, "learning_rate": 0.00019413167777219007, "loss": 3.0741, "step": 5908 }, { "epoch": 0.27662238867107497, "grad_norm": 1.4609375, "learning_rate": 0.00019412970737046884, "loss": 3.2263, "step": 5909 }, { "epoch": 0.2766692024109076, "grad_norm": 1.5546875, "learning_rate": 0.0001941277366480061, "loss": 3.0906, "step": 5910 }, { "epoch": 0.27671601615074026, "grad_norm": 1.3046875, "learning_rate": 0.00019412576560480852, "loss": 2.9052, "step": 5911 }, { "epoch": 0.2767628298905729, "grad_norm": 1.203125, "learning_rate": 0.00019412379424088282, "loss": 3.6287, "step": 5912 }, { "epoch": 0.2768096436304055, "grad_norm": 1.3125, "learning_rate": 0.00019412182255623575, "loss": 3.4445, "step": 5913 }, { "epoch": 0.2768564573702382, "grad_norm": 2.109375, "learning_rate": 0.00019411985055087404, "loss": 3.4828, "step": 5914 }, { "epoch": 0.2769032711100708, "grad_norm": 1.5625, "learning_rate": 0.00019411787822480432, "loss": 3.5378, "step": 5915 }, { "epoch": 0.27695008484990347, "grad_norm": 1.1875, "learning_rate": 0.0001941159055780334, "loss": 3.1558, "step": 5916 }, { "epoch": 0.2769968985897361, "grad_norm": 1.1875, "learning_rate": 0.00019411393261056797, "loss": 3.6398, "step": 5917 }, { "epoch": 0.2770437123295687, "grad_norm": 1.2890625, "learning_rate": 0.00019411195932241476, "loss": 3.4193, "step": 5918 }, { "epoch": 0.2770905260694014, "grad_norm": 1.578125, "learning_rate": 0.00019410998571358046, "loss": 3.5129, "step": 5919 }, { "epoch": 0.277137339809234, "grad_norm": 1.0546875, "learning_rate": 0.00019410801178407187, "loss": 3.5893, "step": 5920 }, { "epoch": 0.27718415354906667, "grad_norm": 1.1328125, "learning_rate": 0.00019410603753389563, "loss": 3.2279, "step": 5921 }, { "epoch": 0.2772309672888993, "grad_norm": 1.4140625, "learning_rate": 0.00019410406296305852, "loss": 3.1628, "step": 5922 }, { "epoch": 0.2772777810287319, "grad_norm": 1.3203125, "learning_rate": 0.00019410208807156724, "loss": 3.5551, "step": 5923 }, { "epoch": 0.2773245947685646, "grad_norm": 1.2421875, "learning_rate": 0.00019410011285942856, "loss": 3.1341, "step": 5924 }, { "epoch": 0.2773714085083972, "grad_norm": 1.609375, "learning_rate": 0.00019409813732664918, "loss": 3.5096, "step": 5925 }, { "epoch": 0.2774182222482299, "grad_norm": 1.3046875, "learning_rate": 0.00019409616147323584, "loss": 3.5805, "step": 5926 }, { "epoch": 0.2774650359880625, "grad_norm": 1.1953125, "learning_rate": 0.00019409418529919525, "loss": 3.6853, "step": 5927 }, { "epoch": 0.2775118497278951, "grad_norm": 1.2890625, "learning_rate": 0.0001940922088045342, "loss": 3.5157, "step": 5928 }, { "epoch": 0.2775586634677278, "grad_norm": 1.3359375, "learning_rate": 0.00019409023198925933, "loss": 3.5994, "step": 5929 }, { "epoch": 0.2776054772075604, "grad_norm": 1.0546875, "learning_rate": 0.00019408825485337747, "loss": 3.2, "step": 5930 }, { "epoch": 0.2776522909473931, "grad_norm": 1.3984375, "learning_rate": 0.0001940862773968953, "loss": 3.2759, "step": 5931 }, { "epoch": 0.2776991046872257, "grad_norm": 1.3359375, "learning_rate": 0.0001940842996198196, "loss": 3.4028, "step": 5932 }, { "epoch": 0.27774591842705837, "grad_norm": 1.234375, "learning_rate": 0.0001940823215221571, "loss": 3.2189, "step": 5933 }, { "epoch": 0.277792732166891, "grad_norm": 1.4140625, "learning_rate": 0.0001940803431039145, "loss": 3.3841, "step": 5934 }, { "epoch": 0.2778395459067236, "grad_norm": 1.6484375, "learning_rate": 0.00019407836436509857, "loss": 3.6142, "step": 5935 }, { "epoch": 0.2778863596465563, "grad_norm": 1.6640625, "learning_rate": 0.00019407638530571605, "loss": 3.5746, "step": 5936 }, { "epoch": 0.2779331733863889, "grad_norm": 1.4296875, "learning_rate": 0.0001940744059257737, "loss": 3.094, "step": 5937 }, { "epoch": 0.2779799871262216, "grad_norm": 1.5703125, "learning_rate": 0.00019407242622527824, "loss": 3.3435, "step": 5938 }, { "epoch": 0.2780268008660542, "grad_norm": 1.09375, "learning_rate": 0.00019407044620423642, "loss": 3.1823, "step": 5939 }, { "epoch": 0.2780736146058868, "grad_norm": 1.71875, "learning_rate": 0.000194068465862655, "loss": 3.3663, "step": 5940 }, { "epoch": 0.2781204283457195, "grad_norm": 1.453125, "learning_rate": 0.00019406648520054072, "loss": 3.5957, "step": 5941 }, { "epoch": 0.2781672420855521, "grad_norm": 1.4765625, "learning_rate": 0.00019406450421790032, "loss": 3.2556, "step": 5942 }, { "epoch": 0.2782140558253848, "grad_norm": 1.1171875, "learning_rate": 0.0001940625229147406, "loss": 3.254, "step": 5943 }, { "epoch": 0.2782608695652174, "grad_norm": 1.1640625, "learning_rate": 0.0001940605412910682, "loss": 3.0697, "step": 5944 }, { "epoch": 0.27830768330505, "grad_norm": 1.2421875, "learning_rate": 0.00019405855934688998, "loss": 3.1318, "step": 5945 }, { "epoch": 0.2783544970448827, "grad_norm": 1.21875, "learning_rate": 0.00019405657708221265, "loss": 3.427, "step": 5946 }, { "epoch": 0.2784013107847153, "grad_norm": 1.234375, "learning_rate": 0.000194054594497043, "loss": 3.2932, "step": 5947 }, { "epoch": 0.278448124524548, "grad_norm": 1.1953125, "learning_rate": 0.00019405261159138773, "loss": 3.2075, "step": 5948 }, { "epoch": 0.2784949382643806, "grad_norm": 1.2421875, "learning_rate": 0.00019405062836525363, "loss": 3.2463, "step": 5949 }, { "epoch": 0.2785417520042132, "grad_norm": 1.3203125, "learning_rate": 0.00019404864481864742, "loss": 3.2804, "step": 5950 }, { "epoch": 0.2785885657440459, "grad_norm": 1.5078125, "learning_rate": 0.0001940466609515759, "loss": 3.1643, "step": 5951 }, { "epoch": 0.2786353794838785, "grad_norm": 1.390625, "learning_rate": 0.00019404467676404586, "loss": 3.2584, "step": 5952 }, { "epoch": 0.2786821932237112, "grad_norm": 1.984375, "learning_rate": 0.000194042692256064, "loss": 3.4947, "step": 5953 }, { "epoch": 0.2787290069635438, "grad_norm": 1.4296875, "learning_rate": 0.0001940407074276371, "loss": 3.1739, "step": 5954 }, { "epoch": 0.2787758207033764, "grad_norm": 1.2578125, "learning_rate": 0.0001940387222787719, "loss": 2.9444, "step": 5955 }, { "epoch": 0.2788226344432091, "grad_norm": 1.34375, "learning_rate": 0.00019403673680947522, "loss": 3.7037, "step": 5956 }, { "epoch": 0.2788694481830417, "grad_norm": 1.421875, "learning_rate": 0.00019403475101975376, "loss": 3.5004, "step": 5957 }, { "epoch": 0.2789162619228744, "grad_norm": 1.4765625, "learning_rate": 0.00019403276490961434, "loss": 3.4183, "step": 5958 }, { "epoch": 0.278963075662707, "grad_norm": 1.2265625, "learning_rate": 0.00019403077847906372, "loss": 3.3232, "step": 5959 }, { "epoch": 0.2790098894025396, "grad_norm": 1.296875, "learning_rate": 0.00019402879172810867, "loss": 3.0763, "step": 5960 }, { "epoch": 0.2790567031423723, "grad_norm": 1.265625, "learning_rate": 0.00019402680465675594, "loss": 2.9318, "step": 5961 }, { "epoch": 0.2791035168822049, "grad_norm": 1.5, "learning_rate": 0.00019402481726501228, "loss": 3.4368, "step": 5962 }, { "epoch": 0.2791503306220376, "grad_norm": 1.8359375, "learning_rate": 0.00019402282955288452, "loss": 3.7339, "step": 5963 }, { "epoch": 0.2791971443618702, "grad_norm": 1.1875, "learning_rate": 0.00019402084152037938, "loss": 3.2089, "step": 5964 }, { "epoch": 0.2792439581017028, "grad_norm": 1.140625, "learning_rate": 0.00019401885316750364, "loss": 3.3449, "step": 5965 }, { "epoch": 0.2792907718415355, "grad_norm": 1.3671875, "learning_rate": 0.00019401686449426416, "loss": 3.5133, "step": 5966 }, { "epoch": 0.2793375855813681, "grad_norm": 1.3515625, "learning_rate": 0.00019401487550066763, "loss": 2.5194, "step": 5967 }, { "epoch": 0.2793843993212008, "grad_norm": 1.96875, "learning_rate": 0.0001940128861867208, "loss": 3.8388, "step": 5968 }, { "epoch": 0.2794312130610334, "grad_norm": 1.421875, "learning_rate": 0.00019401089655243052, "loss": 3.2574, "step": 5969 }, { "epoch": 0.27947802680086603, "grad_norm": 1.53125, "learning_rate": 0.00019400890659780355, "loss": 3.5086, "step": 5970 }, { "epoch": 0.2795248405406987, "grad_norm": 1.609375, "learning_rate": 0.00019400691632284663, "loss": 3.3097, "step": 5971 }, { "epoch": 0.2795716542805313, "grad_norm": 1.625, "learning_rate": 0.00019400492572756663, "loss": 3.4276, "step": 5972 }, { "epoch": 0.279618468020364, "grad_norm": 1.3515625, "learning_rate": 0.00019400293481197024, "loss": 3.4494, "step": 5973 }, { "epoch": 0.2796652817601966, "grad_norm": 1.671875, "learning_rate": 0.00019400094357606431, "loss": 3.1669, "step": 5974 }, { "epoch": 0.27971209550002923, "grad_norm": 2.03125, "learning_rate": 0.00019399895201985555, "loss": 3.7012, "step": 5975 }, { "epoch": 0.2797589092398619, "grad_norm": 1.359375, "learning_rate": 0.00019399696014335086, "loss": 3.1596, "step": 5976 }, { "epoch": 0.2798057229796945, "grad_norm": 1.0234375, "learning_rate": 0.0001939949679465569, "loss": 3.472, "step": 5977 }, { "epoch": 0.2798525367195272, "grad_norm": 1.453125, "learning_rate": 0.00019399297542948058, "loss": 3.2129, "step": 5978 }, { "epoch": 0.2798993504593598, "grad_norm": 1.265625, "learning_rate": 0.0001939909825921286, "loss": 3.3245, "step": 5979 }, { "epoch": 0.27994616419919244, "grad_norm": 1.640625, "learning_rate": 0.0001939889894345078, "loss": 3.3168, "step": 5980 }, { "epoch": 0.2799929779390251, "grad_norm": 1.109375, "learning_rate": 0.0001939869959566249, "loss": 2.988, "step": 5981 }, { "epoch": 0.28003979167885773, "grad_norm": 1.2578125, "learning_rate": 0.0001939850021584868, "loss": 3.0362, "step": 5982 }, { "epoch": 0.2800866054186904, "grad_norm": 1.296875, "learning_rate": 0.00019398300804010022, "loss": 3.047, "step": 5983 }, { "epoch": 0.280133419158523, "grad_norm": 1.1640625, "learning_rate": 0.00019398101360147197, "loss": 3.2694, "step": 5984 }, { "epoch": 0.28018023289835564, "grad_norm": 1.6015625, "learning_rate": 0.00019397901884260884, "loss": 3.5657, "step": 5985 }, { "epoch": 0.2802270466381883, "grad_norm": 1.296875, "learning_rate": 0.00019397702376351762, "loss": 3.3073, "step": 5986 }, { "epoch": 0.28027386037802093, "grad_norm": 1.8671875, "learning_rate": 0.00019397502836420515, "loss": 3.2018, "step": 5987 }, { "epoch": 0.2803206741178536, "grad_norm": 1.4765625, "learning_rate": 0.0001939730326446782, "loss": 3.6608, "step": 5988 }, { "epoch": 0.2803674878576862, "grad_norm": 1.5546875, "learning_rate": 0.00019397103660494355, "loss": 3.3015, "step": 5989 }, { "epoch": 0.28041430159751884, "grad_norm": 1.734375, "learning_rate": 0.00019396904024500806, "loss": 3.1706, "step": 5990 }, { "epoch": 0.2804611153373515, "grad_norm": 1.6171875, "learning_rate": 0.00019396704356487848, "loss": 3.2062, "step": 5991 }, { "epoch": 0.28050792907718414, "grad_norm": 1.09375, "learning_rate": 0.00019396504656456163, "loss": 3.5477, "step": 5992 }, { "epoch": 0.2805547428170168, "grad_norm": 1.484375, "learning_rate": 0.00019396304924406432, "loss": 3.0753, "step": 5993 }, { "epoch": 0.28060155655684943, "grad_norm": 1.3125, "learning_rate": 0.00019396105160339336, "loss": 3.5352, "step": 5994 }, { "epoch": 0.2806483702966821, "grad_norm": 1.515625, "learning_rate": 0.00019395905364255552, "loss": 3.3657, "step": 5995 }, { "epoch": 0.2806951840365147, "grad_norm": 1.171875, "learning_rate": 0.00019395705536155765, "loss": 3.3798, "step": 5996 }, { "epoch": 0.28074199777634734, "grad_norm": 1.140625, "learning_rate": 0.00019395505676040654, "loss": 3.2207, "step": 5997 }, { "epoch": 0.28078881151618, "grad_norm": 1.1015625, "learning_rate": 0.000193953057839109, "loss": 3.0241, "step": 5998 }, { "epoch": 0.28083562525601263, "grad_norm": 1.4921875, "learning_rate": 0.0001939510585976719, "loss": 3.833, "step": 5999 }, { "epoch": 0.2808824389958453, "grad_norm": 1.09375, "learning_rate": 0.00019394905903610194, "loss": 2.8725, "step": 6000 }, { "epoch": 0.2809292527356779, "grad_norm": 1.2578125, "learning_rate": 0.00019394705915440602, "loss": 3.2037, "step": 6001 }, { "epoch": 0.28097606647551054, "grad_norm": 1.421875, "learning_rate": 0.00019394505895259088, "loss": 3.3752, "step": 6002 }, { "epoch": 0.2810228802153432, "grad_norm": 1.265625, "learning_rate": 0.00019394305843066346, "loss": 3.4064, "step": 6003 }, { "epoch": 0.28106969395517584, "grad_norm": 1.40625, "learning_rate": 0.00019394105758863044, "loss": 3.673, "step": 6004 }, { "epoch": 0.2811165076950085, "grad_norm": 1.5625, "learning_rate": 0.0001939390564264987, "loss": 3.4375, "step": 6005 }, { "epoch": 0.28116332143484113, "grad_norm": 1.2578125, "learning_rate": 0.00019393705494427508, "loss": 3.4395, "step": 6006 }, { "epoch": 0.28121013517467375, "grad_norm": 1.3984375, "learning_rate": 0.00019393505314196636, "loss": 3.4278, "step": 6007 }, { "epoch": 0.2812569489145064, "grad_norm": 1.1484375, "learning_rate": 0.00019393305101957938, "loss": 3.3847, "step": 6008 }, { "epoch": 0.28130376265433904, "grad_norm": 1.28125, "learning_rate": 0.00019393104857712095, "loss": 3.4549, "step": 6009 }, { "epoch": 0.2813505763941717, "grad_norm": 1.828125, "learning_rate": 0.00019392904581459793, "loss": 3.5067, "step": 6010 }, { "epoch": 0.28139739013400433, "grad_norm": 1.515625, "learning_rate": 0.00019392704273201708, "loss": 3.8686, "step": 6011 }, { "epoch": 0.28144420387383695, "grad_norm": 1.375, "learning_rate": 0.00019392503932938527, "loss": 3.9768, "step": 6012 }, { "epoch": 0.2814910176136696, "grad_norm": 1.3046875, "learning_rate": 0.0001939230356067093, "loss": 2.9957, "step": 6013 }, { "epoch": 0.28153783135350224, "grad_norm": 1.34375, "learning_rate": 0.00019392103156399605, "loss": 3.5284, "step": 6014 }, { "epoch": 0.2815846450933349, "grad_norm": 1.4140625, "learning_rate": 0.0001939190272012523, "loss": 3.1656, "step": 6015 }, { "epoch": 0.28163145883316754, "grad_norm": 1.28125, "learning_rate": 0.0001939170225184849, "loss": 3.3774, "step": 6016 }, { "epoch": 0.28167827257300015, "grad_norm": 1.078125, "learning_rate": 0.00019391501751570066, "loss": 3.1937, "step": 6017 }, { "epoch": 0.28172508631283283, "grad_norm": 1.1328125, "learning_rate": 0.00019391301219290642, "loss": 2.9819, "step": 6018 }, { "epoch": 0.28177190005266545, "grad_norm": 1.328125, "learning_rate": 0.00019391100655010904, "loss": 3.4265, "step": 6019 }, { "epoch": 0.2818187137924981, "grad_norm": 1.421875, "learning_rate": 0.00019390900058731533, "loss": 3.4383, "step": 6020 }, { "epoch": 0.28186552753233074, "grad_norm": 1.59375, "learning_rate": 0.00019390699430453214, "loss": 3.9514, "step": 6021 }, { "epoch": 0.28191234127216336, "grad_norm": 1.2578125, "learning_rate": 0.00019390498770176627, "loss": 3.1859, "step": 6022 }, { "epoch": 0.28195915501199603, "grad_norm": 1.296875, "learning_rate": 0.0001939029807790246, "loss": 2.8692, "step": 6023 }, { "epoch": 0.28200596875182865, "grad_norm": 1.3828125, "learning_rate": 0.00019390097353631395, "loss": 3.5408, "step": 6024 }, { "epoch": 0.2820527824916613, "grad_norm": 1.5703125, "learning_rate": 0.00019389896597364116, "loss": 3.7414, "step": 6025 }, { "epoch": 0.28209959623149394, "grad_norm": 1.546875, "learning_rate": 0.00019389695809101304, "loss": 2.9086, "step": 6026 }, { "epoch": 0.28214640997132656, "grad_norm": 1.3359375, "learning_rate": 0.0001938949498884365, "loss": 3.6861, "step": 6027 }, { "epoch": 0.28219322371115924, "grad_norm": 1.265625, "learning_rate": 0.00019389294136591833, "loss": 3.4337, "step": 6028 }, { "epoch": 0.28224003745099185, "grad_norm": 1.0625, "learning_rate": 0.00019389093252346543, "loss": 3.0878, "step": 6029 }, { "epoch": 0.28228685119082453, "grad_norm": 1.4453125, "learning_rate": 0.00019388892336108456, "loss": 3.518, "step": 6030 }, { "epoch": 0.28233366493065715, "grad_norm": 1.296875, "learning_rate": 0.00019388691387878262, "loss": 3.7713, "step": 6031 }, { "epoch": 0.28238047867048977, "grad_norm": 1.1796875, "learning_rate": 0.00019388490407656645, "loss": 3.0719, "step": 6032 }, { "epoch": 0.28242729241032244, "grad_norm": 1.2578125, "learning_rate": 0.0001938828939544429, "loss": 3.4503, "step": 6033 }, { "epoch": 0.28247410615015506, "grad_norm": 1.8125, "learning_rate": 0.0001938808835124188, "loss": 3.3701, "step": 6034 }, { "epoch": 0.28252091988998773, "grad_norm": 1.203125, "learning_rate": 0.00019387887275050102, "loss": 3.3423, "step": 6035 }, { "epoch": 0.28256773362982035, "grad_norm": 1.515625, "learning_rate": 0.0001938768616686964, "loss": 3.6605, "step": 6036 }, { "epoch": 0.28261454736965297, "grad_norm": 1.3125, "learning_rate": 0.00019387485026701182, "loss": 3.2297, "step": 6037 }, { "epoch": 0.28266136110948564, "grad_norm": 1.25, "learning_rate": 0.0001938728385454541, "loss": 3.3324, "step": 6038 }, { "epoch": 0.28270817484931826, "grad_norm": 1.2578125, "learning_rate": 0.00019387082650403013, "loss": 3.3746, "step": 6039 }, { "epoch": 0.28275498858915094, "grad_norm": 1.359375, "learning_rate": 0.00019386881414274674, "loss": 3.2323, "step": 6040 }, { "epoch": 0.28280180232898355, "grad_norm": 1.71875, "learning_rate": 0.00019386680146161079, "loss": 3.3221, "step": 6041 }, { "epoch": 0.28284861606881617, "grad_norm": 1.46875, "learning_rate": 0.0001938647884606291, "loss": 3.2331, "step": 6042 }, { "epoch": 0.28289542980864885, "grad_norm": 1.5, "learning_rate": 0.00019386277513980862, "loss": 3.6636, "step": 6043 }, { "epoch": 0.28294224354848146, "grad_norm": 1.2890625, "learning_rate": 0.00019386076149915612, "loss": 3.2775, "step": 6044 }, { "epoch": 0.28298905728831414, "grad_norm": 1.6640625, "learning_rate": 0.00019385874753867854, "loss": 3.4852, "step": 6045 }, { "epoch": 0.28303587102814676, "grad_norm": 1.1484375, "learning_rate": 0.00019385673325838268, "loss": 3.7626, "step": 6046 }, { "epoch": 0.2830826847679794, "grad_norm": 1.2265625, "learning_rate": 0.00019385471865827544, "loss": 3.1486, "step": 6047 }, { "epoch": 0.28312949850781205, "grad_norm": 1.46875, "learning_rate": 0.00019385270373836364, "loss": 3.174, "step": 6048 }, { "epoch": 0.28317631224764467, "grad_norm": 1.2265625, "learning_rate": 0.00019385068849865422, "loss": 3.3014, "step": 6049 }, { "epoch": 0.28322312598747734, "grad_norm": 1.2890625, "learning_rate": 0.000193848672939154, "loss": 3.0587, "step": 6050 }, { "epoch": 0.28326993972730996, "grad_norm": 1.2734375, "learning_rate": 0.00019384665705986982, "loss": 3.3204, "step": 6051 }, { "epoch": 0.2833167534671426, "grad_norm": 1.265625, "learning_rate": 0.00019384464086080858, "loss": 2.7909, "step": 6052 }, { "epoch": 0.28336356720697525, "grad_norm": 1.3671875, "learning_rate": 0.00019384262434197718, "loss": 3.5071, "step": 6053 }, { "epoch": 0.28341038094680787, "grad_norm": 1.296875, "learning_rate": 0.00019384060750338243, "loss": 3.1967, "step": 6054 }, { "epoch": 0.28345719468664055, "grad_norm": 1.3828125, "learning_rate": 0.00019383859034503127, "loss": 3.3478, "step": 6055 }, { "epoch": 0.28350400842647316, "grad_norm": 1.28125, "learning_rate": 0.0001938365728669305, "loss": 3.3209, "step": 6056 }, { "epoch": 0.28355082216630584, "grad_norm": 1.5, "learning_rate": 0.00019383455506908706, "loss": 3.1399, "step": 6057 }, { "epoch": 0.28359763590613846, "grad_norm": 1.796875, "learning_rate": 0.00019383253695150782, "loss": 4.0202, "step": 6058 }, { "epoch": 0.2836444496459711, "grad_norm": 1.40625, "learning_rate": 0.00019383051851419957, "loss": 3.2796, "step": 6059 }, { "epoch": 0.28369126338580375, "grad_norm": 1.5703125, "learning_rate": 0.00019382849975716927, "loss": 3.6988, "step": 6060 }, { "epoch": 0.28373807712563637, "grad_norm": 1.5859375, "learning_rate": 0.0001938264806804238, "loss": 3.3433, "step": 6061 }, { "epoch": 0.28378489086546904, "grad_norm": 1.4296875, "learning_rate": 0.00019382446128397003, "loss": 3.4797, "step": 6062 }, { "epoch": 0.28383170460530166, "grad_norm": 1.1640625, "learning_rate": 0.00019382244156781483, "loss": 3.626, "step": 6063 }, { "epoch": 0.2838785183451343, "grad_norm": 1.390625, "learning_rate": 0.00019382042153196505, "loss": 3.2308, "step": 6064 }, { "epoch": 0.28392533208496695, "grad_norm": 1.140625, "learning_rate": 0.00019381840117642764, "loss": 3.1702, "step": 6065 }, { "epoch": 0.28397214582479957, "grad_norm": 1.46875, "learning_rate": 0.00019381638050120943, "loss": 3.2066, "step": 6066 }, { "epoch": 0.28401895956463225, "grad_norm": 1.6484375, "learning_rate": 0.00019381435950631732, "loss": 3.1166, "step": 6067 }, { "epoch": 0.28406577330446486, "grad_norm": 1.5078125, "learning_rate": 0.00019381233819175823, "loss": 3.3416, "step": 6068 }, { "epoch": 0.2841125870442975, "grad_norm": 1.109375, "learning_rate": 0.00019381031655753901, "loss": 3.1974, "step": 6069 }, { "epoch": 0.28415940078413016, "grad_norm": 1.4765625, "learning_rate": 0.00019380829460366656, "loss": 3.2946, "step": 6070 }, { "epoch": 0.2842062145239628, "grad_norm": 1.6796875, "learning_rate": 0.00019380627233014775, "loss": 3.4776, "step": 6071 }, { "epoch": 0.28425302826379545, "grad_norm": 1.3203125, "learning_rate": 0.00019380424973698953, "loss": 3.066, "step": 6072 }, { "epoch": 0.28429984200362807, "grad_norm": 1.1875, "learning_rate": 0.00019380222682419872, "loss": 3.2879, "step": 6073 }, { "epoch": 0.2843466557434607, "grad_norm": 1.1953125, "learning_rate": 0.00019380020359178223, "loss": 2.9953, "step": 6074 }, { "epoch": 0.28439346948329336, "grad_norm": 1.65625, "learning_rate": 0.000193798180039747, "loss": 3.3981, "step": 6075 }, { "epoch": 0.284440283223126, "grad_norm": 1.421875, "learning_rate": 0.00019379615616809987, "loss": 3.8849, "step": 6076 }, { "epoch": 0.28448709696295865, "grad_norm": 1.2578125, "learning_rate": 0.00019379413197684778, "loss": 3.2086, "step": 6077 }, { "epoch": 0.28453391070279127, "grad_norm": 1.8125, "learning_rate": 0.00019379210746599758, "loss": 3.6743, "step": 6078 }, { "epoch": 0.2845807244426239, "grad_norm": 1.34375, "learning_rate": 0.00019379008263555622, "loss": 3.3025, "step": 6079 }, { "epoch": 0.28462753818245656, "grad_norm": 1.5234375, "learning_rate": 0.0001937880574855306, "loss": 3.2641, "step": 6080 }, { "epoch": 0.2846743519222892, "grad_norm": 1.375, "learning_rate": 0.00019378603201592752, "loss": 3.2021, "step": 6081 }, { "epoch": 0.28472116566212186, "grad_norm": 1.5703125, "learning_rate": 0.00019378400622675402, "loss": 3.2312, "step": 6082 }, { "epoch": 0.2847679794019545, "grad_norm": 1.40625, "learning_rate": 0.0001937819801180169, "loss": 3.4286, "step": 6083 }, { "epoch": 0.2848147931417871, "grad_norm": 1.2109375, "learning_rate": 0.00019377995368972316, "loss": 3.4651, "step": 6084 }, { "epoch": 0.28486160688161977, "grad_norm": 1.25, "learning_rate": 0.0001937779269418796, "loss": 3.0222, "step": 6085 }, { "epoch": 0.2849084206214524, "grad_norm": 1.2265625, "learning_rate": 0.00019377589987449318, "loss": 3.4483, "step": 6086 }, { "epoch": 0.28495523436128506, "grad_norm": 1.4453125, "learning_rate": 0.0001937738724875708, "loss": 3.7606, "step": 6087 }, { "epoch": 0.2850020481011177, "grad_norm": 1.234375, "learning_rate": 0.00019377184478111938, "loss": 2.9304, "step": 6088 }, { "epoch": 0.2850488618409503, "grad_norm": 1.3203125, "learning_rate": 0.00019376981675514582, "loss": 3.3355, "step": 6089 }, { "epoch": 0.28509567558078297, "grad_norm": 1.34375, "learning_rate": 0.00019376778840965705, "loss": 3.38, "step": 6090 }, { "epoch": 0.2851424893206156, "grad_norm": 1.453125, "learning_rate": 0.0001937657597446599, "loss": 2.9018, "step": 6091 }, { "epoch": 0.28518930306044826, "grad_norm": 1.2734375, "learning_rate": 0.0001937637307601614, "loss": 3.2024, "step": 6092 }, { "epoch": 0.2852361168002809, "grad_norm": 1.109375, "learning_rate": 0.00019376170145616838, "loss": 3.2897, "step": 6093 }, { "epoch": 0.2852829305401135, "grad_norm": 1.2109375, "learning_rate": 0.0001937596718326878, "loss": 3.027, "step": 6094 }, { "epoch": 0.2853297442799462, "grad_norm": 1.8984375, "learning_rate": 0.00019375764188972653, "loss": 3.3915, "step": 6095 }, { "epoch": 0.2853765580197788, "grad_norm": 1.0859375, "learning_rate": 0.00019375561162729156, "loss": 3.495, "step": 6096 }, { "epoch": 0.28542337175961147, "grad_norm": 1.71875, "learning_rate": 0.00019375358104538974, "loss": 3.7506, "step": 6097 }, { "epoch": 0.2854701854994441, "grad_norm": 1.25, "learning_rate": 0.000193751550144028, "loss": 2.9262, "step": 6098 }, { "epoch": 0.2855169992392767, "grad_norm": 1.4296875, "learning_rate": 0.00019374951892321327, "loss": 2.8381, "step": 6099 }, { "epoch": 0.2855638129791094, "grad_norm": 1.4296875, "learning_rate": 0.00019374748738295246, "loss": 3.572, "step": 6100 }, { "epoch": 0.285610626718942, "grad_norm": 1.40625, "learning_rate": 0.00019374545552325255, "loss": 3.8932, "step": 6101 }, { "epoch": 0.28565744045877467, "grad_norm": 1.3203125, "learning_rate": 0.00019374342334412038, "loss": 3.5333, "step": 6102 }, { "epoch": 0.2857042541986073, "grad_norm": 1.390625, "learning_rate": 0.00019374139084556296, "loss": 3.849, "step": 6103 }, { "epoch": 0.2857510679384399, "grad_norm": 1.46875, "learning_rate": 0.00019373935802758713, "loss": 3.6706, "step": 6104 }, { "epoch": 0.2857978816782726, "grad_norm": 2.171875, "learning_rate": 0.00019373732489019987, "loss": 3.3932, "step": 6105 }, { "epoch": 0.2858446954181052, "grad_norm": 1.375, "learning_rate": 0.00019373529143340806, "loss": 3.9038, "step": 6106 }, { "epoch": 0.2858915091579379, "grad_norm": 1.28125, "learning_rate": 0.00019373325765721872, "loss": 3.7048, "step": 6107 }, { "epoch": 0.2859383228977705, "grad_norm": 1.265625, "learning_rate": 0.00019373122356163868, "loss": 3.1661, "step": 6108 }, { "epoch": 0.2859851366376031, "grad_norm": 1.578125, "learning_rate": 0.00019372918914667495, "loss": 3.1821, "step": 6109 }, { "epoch": 0.2860319503774358, "grad_norm": 1.28125, "learning_rate": 0.0001937271544123344, "loss": 3.3311, "step": 6110 }, { "epoch": 0.2860787641172684, "grad_norm": 1.265625, "learning_rate": 0.00019372511935862398, "loss": 3.3278, "step": 6111 }, { "epoch": 0.2861255778571011, "grad_norm": 1.3359375, "learning_rate": 0.00019372308398555067, "loss": 3.5351, "step": 6112 }, { "epoch": 0.2861723915969337, "grad_norm": 1.375, "learning_rate": 0.00019372104829312132, "loss": 3.3599, "step": 6113 }, { "epoch": 0.2862192053367663, "grad_norm": 1.3359375, "learning_rate": 0.00019371901228134295, "loss": 3.2811, "step": 6114 }, { "epoch": 0.286266019076599, "grad_norm": 1.2890625, "learning_rate": 0.00019371697595022247, "loss": 2.9139, "step": 6115 }, { "epoch": 0.2863128328164316, "grad_norm": 1.59375, "learning_rate": 0.0001937149392997668, "loss": 3.5966, "step": 6116 }, { "epoch": 0.2863596465562643, "grad_norm": 1.078125, "learning_rate": 0.00019371290232998286, "loss": 3.0611, "step": 6117 }, { "epoch": 0.2864064602960969, "grad_norm": 1.0078125, "learning_rate": 0.00019371086504087765, "loss": 3.1492, "step": 6118 }, { "epoch": 0.2864532740359296, "grad_norm": 1.140625, "learning_rate": 0.00019370882743245812, "loss": 2.8752, "step": 6119 }, { "epoch": 0.2865000877757622, "grad_norm": 1.3359375, "learning_rate": 0.0001937067895047311, "loss": 3.4657, "step": 6120 }, { "epoch": 0.2865469015155948, "grad_norm": 1.6015625, "learning_rate": 0.00019370475125770367, "loss": 3.5697, "step": 6121 }, { "epoch": 0.2865937152554275, "grad_norm": 1.1796875, "learning_rate": 0.00019370271269138269, "loss": 3.3467, "step": 6122 }, { "epoch": 0.2866405289952601, "grad_norm": 1.4453125, "learning_rate": 0.00019370067380577514, "loss": 3.0874, "step": 6123 }, { "epoch": 0.2866873427350928, "grad_norm": 1.15625, "learning_rate": 0.00019369863460088794, "loss": 2.7788, "step": 6124 }, { "epoch": 0.2867341564749254, "grad_norm": 2.125, "learning_rate": 0.0001936965950767281, "loss": 3.3723, "step": 6125 }, { "epoch": 0.286780970214758, "grad_norm": 1.296875, "learning_rate": 0.0001936945552333025, "loss": 3.4824, "step": 6126 }, { "epoch": 0.2868277839545907, "grad_norm": 1.25, "learning_rate": 0.00019369251507061813, "loss": 3.2669, "step": 6127 }, { "epoch": 0.2868745976944233, "grad_norm": 1.40625, "learning_rate": 0.00019369047458868194, "loss": 3.8192, "step": 6128 }, { "epoch": 0.286921411434256, "grad_norm": 1.6015625, "learning_rate": 0.00019368843378750087, "loss": 3.7309, "step": 6129 }, { "epoch": 0.2869682251740886, "grad_norm": 1.7890625, "learning_rate": 0.00019368639266708186, "loss": 3.2501, "step": 6130 }, { "epoch": 0.2870150389139212, "grad_norm": 1.3125, "learning_rate": 0.00019368435122743186, "loss": 3.6215, "step": 6131 }, { "epoch": 0.2870618526537539, "grad_norm": 1.2265625, "learning_rate": 0.0001936823094685579, "loss": 3.1053, "step": 6132 }, { "epoch": 0.2871086663935865, "grad_norm": 1.78125, "learning_rate": 0.00019368026739046687, "loss": 3.4418, "step": 6133 }, { "epoch": 0.2871554801334192, "grad_norm": 1.390625, "learning_rate": 0.00019367822499316572, "loss": 3.7125, "step": 6134 }, { "epoch": 0.2872022938732518, "grad_norm": 1.8125, "learning_rate": 0.00019367618227666148, "loss": 4.1847, "step": 6135 }, { "epoch": 0.2872491076130844, "grad_norm": 1.234375, "learning_rate": 0.00019367413924096104, "loss": 3.3467, "step": 6136 }, { "epoch": 0.2872959213529171, "grad_norm": 1.4375, "learning_rate": 0.00019367209588607137, "loss": 3.3645, "step": 6137 }, { "epoch": 0.2873427350927497, "grad_norm": 1.1796875, "learning_rate": 0.00019367005221199946, "loss": 2.9456, "step": 6138 }, { "epoch": 0.2873895488325824, "grad_norm": 1.40625, "learning_rate": 0.00019366800821875223, "loss": 2.9483, "step": 6139 }, { "epoch": 0.287436362572415, "grad_norm": 1.765625, "learning_rate": 0.00019366596390633673, "loss": 3.8401, "step": 6140 }, { "epoch": 0.2874831763122476, "grad_norm": 2.1875, "learning_rate": 0.00019366391927475984, "loss": 3.7197, "step": 6141 }, { "epoch": 0.2875299900520803, "grad_norm": 1.5625, "learning_rate": 0.00019366187432402857, "loss": 2.9254, "step": 6142 }, { "epoch": 0.2875768037919129, "grad_norm": 1.171875, "learning_rate": 0.00019365982905414985, "loss": 3.6601, "step": 6143 }, { "epoch": 0.2876236175317456, "grad_norm": 1.34375, "learning_rate": 0.0001936577834651307, "loss": 3.6078, "step": 6144 }, { "epoch": 0.2876704312715782, "grad_norm": 1.84375, "learning_rate": 0.00019365573755697807, "loss": 3.2999, "step": 6145 }, { "epoch": 0.2877172450114108, "grad_norm": 1.40625, "learning_rate": 0.00019365369132969896, "loss": 3.6428, "step": 6146 }, { "epoch": 0.2877640587512435, "grad_norm": 1.3125, "learning_rate": 0.00019365164478330024, "loss": 4.0466, "step": 6147 }, { "epoch": 0.2878108724910761, "grad_norm": 1.3515625, "learning_rate": 0.000193649597917789, "loss": 3.1018, "step": 6148 }, { "epoch": 0.2878576862309088, "grad_norm": 1.3984375, "learning_rate": 0.00019364755073317214, "loss": 3.6682, "step": 6149 }, { "epoch": 0.2879044999707414, "grad_norm": 1.1328125, "learning_rate": 0.00019364550322945672, "loss": 3.3733, "step": 6150 }, { "epoch": 0.28795131371057403, "grad_norm": 1.5390625, "learning_rate": 0.0001936434554066496, "loss": 3.3076, "step": 6151 }, { "epoch": 0.2879981274504067, "grad_norm": 1.6171875, "learning_rate": 0.00019364140726475785, "loss": 3.1753, "step": 6152 }, { "epoch": 0.2880449411902393, "grad_norm": 1.6328125, "learning_rate": 0.00019363935880378842, "loss": 3.3737, "step": 6153 }, { "epoch": 0.288091754930072, "grad_norm": 1.21875, "learning_rate": 0.00019363731002374824, "loss": 2.9913, "step": 6154 }, { "epoch": 0.2881385686699046, "grad_norm": 1.1328125, "learning_rate": 0.00019363526092464438, "loss": 2.5678, "step": 6155 }, { "epoch": 0.28818538240973723, "grad_norm": 1.203125, "learning_rate": 0.0001936332115064838, "loss": 3.5126, "step": 6156 }, { "epoch": 0.2882321961495699, "grad_norm": 1.8046875, "learning_rate": 0.00019363116176927342, "loss": 3.7696, "step": 6157 }, { "epoch": 0.2882790098894025, "grad_norm": 1.3828125, "learning_rate": 0.00019362911171302028, "loss": 3.2962, "step": 6158 }, { "epoch": 0.2883258236292352, "grad_norm": 1.0859375, "learning_rate": 0.00019362706133773137, "loss": 3.2068, "step": 6159 }, { "epoch": 0.2883726373690678, "grad_norm": 1.3359375, "learning_rate": 0.00019362501064341364, "loss": 3.522, "step": 6160 }, { "epoch": 0.28841945110890044, "grad_norm": 1.1640625, "learning_rate": 0.00019362295963007412, "loss": 3.6156, "step": 6161 }, { "epoch": 0.2884662648487331, "grad_norm": 1.1328125, "learning_rate": 0.00019362090829771976, "loss": 3.5744, "step": 6162 }, { "epoch": 0.28851307858856573, "grad_norm": 1.3515625, "learning_rate": 0.00019361885664635758, "loss": 3.4523, "step": 6163 }, { "epoch": 0.2885598923283984, "grad_norm": 1.6640625, "learning_rate": 0.00019361680467599454, "loss": 3.4307, "step": 6164 }, { "epoch": 0.288606706068231, "grad_norm": 1.3359375, "learning_rate": 0.00019361475238663766, "loss": 3.7566, "step": 6165 }, { "epoch": 0.28865351980806364, "grad_norm": 1.2421875, "learning_rate": 0.00019361269977829392, "loss": 3.0695, "step": 6166 }, { "epoch": 0.2887003335478963, "grad_norm": 1.3671875, "learning_rate": 0.00019361064685097031, "loss": 2.9541, "step": 6167 }, { "epoch": 0.28874714728772893, "grad_norm": 1.3203125, "learning_rate": 0.00019360859360467386, "loss": 3.5594, "step": 6168 }, { "epoch": 0.2887939610275616, "grad_norm": 1.3984375, "learning_rate": 0.0001936065400394115, "loss": 3.6187, "step": 6169 }, { "epoch": 0.2888407747673942, "grad_norm": 1.1484375, "learning_rate": 0.0001936044861551903, "loss": 3.5127, "step": 6170 }, { "epoch": 0.28888758850722684, "grad_norm": 1.2421875, "learning_rate": 0.0001936024319520172, "loss": 3.053, "step": 6171 }, { "epoch": 0.2889344022470595, "grad_norm": 1.2578125, "learning_rate": 0.00019360037742989923, "loss": 3.4368, "step": 6172 }, { "epoch": 0.28898121598689214, "grad_norm": 1.5625, "learning_rate": 0.00019359832258884337, "loss": 3.7391, "step": 6173 }, { "epoch": 0.2890280297267248, "grad_norm": 1.4921875, "learning_rate": 0.00019359626742885668, "loss": 3.0477, "step": 6174 }, { "epoch": 0.28907484346655743, "grad_norm": 0.99609375, "learning_rate": 0.0001935942119499461, "loss": 4.3222, "step": 6175 }, { "epoch": 0.28912165720639005, "grad_norm": 1.3828125, "learning_rate": 0.0001935921561521186, "loss": 3.6493, "step": 6176 }, { "epoch": 0.2891684709462227, "grad_norm": 1.625, "learning_rate": 0.00019359010003538129, "loss": 3.3442, "step": 6177 }, { "epoch": 0.28921528468605534, "grad_norm": 1.453125, "learning_rate": 0.00019358804359974113, "loss": 3.3979, "step": 6178 }, { "epoch": 0.289262098425888, "grad_norm": 1.265625, "learning_rate": 0.0001935859868452051, "loss": 3.4889, "step": 6179 }, { "epoch": 0.28930891216572063, "grad_norm": 1.296875, "learning_rate": 0.00019358392977178023, "loss": 3.453, "step": 6180 }, { "epoch": 0.2893557259055533, "grad_norm": 1.375, "learning_rate": 0.00019358187237947353, "loss": 3.5216, "step": 6181 }, { "epoch": 0.2894025396453859, "grad_norm": 1.4765625, "learning_rate": 0.00019357981466829204, "loss": 3.6063, "step": 6182 }, { "epoch": 0.28944935338521854, "grad_norm": 1.1875, "learning_rate": 0.0001935777566382427, "loss": 3.3262, "step": 6183 }, { "epoch": 0.2894961671250512, "grad_norm": 1.296875, "learning_rate": 0.00019357569828933258, "loss": 3.3961, "step": 6184 }, { "epoch": 0.28954298086488384, "grad_norm": 1.5078125, "learning_rate": 0.00019357363962156868, "loss": 3.2493, "step": 6185 }, { "epoch": 0.2895897946047165, "grad_norm": 1.3125, "learning_rate": 0.000193571580634958, "loss": 3.1536, "step": 6186 }, { "epoch": 0.28963660834454913, "grad_norm": 1.4140625, "learning_rate": 0.00019356952132950757, "loss": 3.6341, "step": 6187 }, { "epoch": 0.28968342208438175, "grad_norm": 1.265625, "learning_rate": 0.0001935674617052244, "loss": 2.8107, "step": 6188 }, { "epoch": 0.2897302358242144, "grad_norm": 1.1875, "learning_rate": 0.00019356540176211553, "loss": 3.1149, "step": 6189 }, { "epoch": 0.28977704956404704, "grad_norm": 1.2265625, "learning_rate": 0.00019356334150018794, "loss": 3.3682, "step": 6190 }, { "epoch": 0.2898238633038797, "grad_norm": 1.3359375, "learning_rate": 0.0001935612809194487, "loss": 3.2652, "step": 6191 }, { "epoch": 0.28987067704371233, "grad_norm": 1.4921875, "learning_rate": 0.0001935592200199048, "loss": 3.6285, "step": 6192 }, { "epoch": 0.28991749078354495, "grad_norm": 1.421875, "learning_rate": 0.00019355715880156325, "loss": 3.2359, "step": 6193 }, { "epoch": 0.2899643045233776, "grad_norm": 1.3515625, "learning_rate": 0.0001935550972644311, "loss": 3.259, "step": 6194 }, { "epoch": 0.29001111826321024, "grad_norm": 1.328125, "learning_rate": 0.00019355303540851532, "loss": 3.4967, "step": 6195 }, { "epoch": 0.2900579320030429, "grad_norm": 1.0078125, "learning_rate": 0.00019355097323382303, "loss": 2.6864, "step": 6196 }, { "epoch": 0.29010474574287554, "grad_norm": 1.4453125, "learning_rate": 0.00019354891074036118, "loss": 3.2772, "step": 6197 }, { "epoch": 0.29015155948270815, "grad_norm": 1.4375, "learning_rate": 0.00019354684792813686, "loss": 3.2928, "step": 6198 }, { "epoch": 0.29019837322254083, "grad_norm": 1.609375, "learning_rate": 0.00019354478479715703, "loss": 3.3744, "step": 6199 }, { "epoch": 0.29024518696237345, "grad_norm": 1.3515625, "learning_rate": 0.00019354272134742878, "loss": 3.5286, "step": 6200 }, { "epoch": 0.2902920007022061, "grad_norm": 1.4140625, "learning_rate": 0.00019354065757895908, "loss": 3.6658, "step": 6201 }, { "epoch": 0.29033881444203874, "grad_norm": 1.59375, "learning_rate": 0.00019353859349175502, "loss": 3.9111, "step": 6202 }, { "epoch": 0.29038562818187136, "grad_norm": 1.1953125, "learning_rate": 0.00019353652908582359, "loss": 3.1889, "step": 6203 }, { "epoch": 0.29043244192170403, "grad_norm": 1.265625, "learning_rate": 0.00019353446436117186, "loss": 3.2456, "step": 6204 }, { "epoch": 0.29047925566153665, "grad_norm": 1.4765625, "learning_rate": 0.00019353239931780685, "loss": 3.2344, "step": 6205 }, { "epoch": 0.2905260694013693, "grad_norm": 1.4765625, "learning_rate": 0.0001935303339557356, "loss": 3.2027, "step": 6206 }, { "epoch": 0.29057288314120194, "grad_norm": 1.6640625, "learning_rate": 0.00019352826827496512, "loss": 3.5704, "step": 6207 }, { "epoch": 0.29061969688103456, "grad_norm": 1.1171875, "learning_rate": 0.0001935262022755025, "loss": 2.9202, "step": 6208 }, { "epoch": 0.29066651062086724, "grad_norm": 1.5078125, "learning_rate": 0.00019352413595735473, "loss": 3.3406, "step": 6209 }, { "epoch": 0.29071332436069985, "grad_norm": 1.2421875, "learning_rate": 0.00019352206932052887, "loss": 3.5601, "step": 6210 }, { "epoch": 0.29076013810053253, "grad_norm": 1.1328125, "learning_rate": 0.00019352000236503198, "loss": 3.0125, "step": 6211 }, { "epoch": 0.29080695184036515, "grad_norm": 1.53125, "learning_rate": 0.0001935179350908711, "loss": 2.9061, "step": 6212 }, { "epoch": 0.29085376558019777, "grad_norm": 1.84375, "learning_rate": 0.00019351586749805324, "loss": 4.0052, "step": 6213 }, { "epoch": 0.29090057932003044, "grad_norm": 1.453125, "learning_rate": 0.00019351379958658548, "loss": 3.5101, "step": 6214 }, { "epoch": 0.29094739305986306, "grad_norm": 1.0390625, "learning_rate": 0.00019351173135647484, "loss": 2.9789, "step": 6215 }, { "epoch": 0.29099420679969573, "grad_norm": 1.21875, "learning_rate": 0.00019350966280772837, "loss": 3.0535, "step": 6216 }, { "epoch": 0.29104102053952835, "grad_norm": 1.4765625, "learning_rate": 0.00019350759394035313, "loss": 3.6575, "step": 6217 }, { "epoch": 0.29108783427936097, "grad_norm": 1.3828125, "learning_rate": 0.0001935055247543562, "loss": 3.2993, "step": 6218 }, { "epoch": 0.29113464801919364, "grad_norm": 1.140625, "learning_rate": 0.0001935034552497446, "loss": 3.0446, "step": 6219 }, { "epoch": 0.29118146175902626, "grad_norm": 1.859375, "learning_rate": 0.00019350138542652533, "loss": 3.3227, "step": 6220 }, { "epoch": 0.29122827549885894, "grad_norm": 1.046875, "learning_rate": 0.00019349931528470554, "loss": 2.9463, "step": 6221 }, { "epoch": 0.29127508923869155, "grad_norm": 1.5390625, "learning_rate": 0.00019349724482429224, "loss": 3.2718, "step": 6222 }, { "epoch": 0.29132190297852417, "grad_norm": 1.8984375, "learning_rate": 0.00019349517404529246, "loss": 3.5076, "step": 6223 }, { "epoch": 0.29136871671835685, "grad_norm": 1.1484375, "learning_rate": 0.0001934931029477133, "loss": 3.1199, "step": 6224 }, { "epoch": 0.29141553045818946, "grad_norm": 1.390625, "learning_rate": 0.00019349103153156177, "loss": 3.4107, "step": 6225 }, { "epoch": 0.29146234419802214, "grad_norm": 1.390625, "learning_rate": 0.00019348895979684496, "loss": 3.5333, "step": 6226 }, { "epoch": 0.29150915793785476, "grad_norm": 1.2421875, "learning_rate": 0.00019348688774356996, "loss": 3.0873, "step": 6227 }, { "epoch": 0.2915559716776874, "grad_norm": 1.15625, "learning_rate": 0.00019348481537174374, "loss": 2.8482, "step": 6228 }, { "epoch": 0.29160278541752005, "grad_norm": 1.078125, "learning_rate": 0.00019348274268137346, "loss": 3.235, "step": 6229 }, { "epoch": 0.29164959915735267, "grad_norm": 1.296875, "learning_rate": 0.0001934806696724661, "loss": 3.0863, "step": 6230 }, { "epoch": 0.29169641289718534, "grad_norm": 1.109375, "learning_rate": 0.00019347859634502877, "loss": 3.1711, "step": 6231 }, { "epoch": 0.29174322663701796, "grad_norm": 1.1484375, "learning_rate": 0.00019347652269906856, "loss": 3.3021, "step": 6232 }, { "epoch": 0.2917900403768506, "grad_norm": 1.3046875, "learning_rate": 0.00019347444873459246, "loss": 2.9935, "step": 6233 }, { "epoch": 0.29183685411668325, "grad_norm": 1.3671875, "learning_rate": 0.00019347237445160759, "loss": 3.4228, "step": 6234 }, { "epoch": 0.29188366785651587, "grad_norm": 1.140625, "learning_rate": 0.00019347029985012103, "loss": 3.0197, "step": 6235 }, { "epoch": 0.29193048159634855, "grad_norm": 1.078125, "learning_rate": 0.00019346822493013978, "loss": 2.6831, "step": 6236 }, { "epoch": 0.29197729533618116, "grad_norm": 1.6328125, "learning_rate": 0.000193466149691671, "loss": 3.7406, "step": 6237 }, { "epoch": 0.2920241090760138, "grad_norm": 1.328125, "learning_rate": 0.0001934640741347217, "loss": 3.3221, "step": 6238 }, { "epoch": 0.29207092281584646, "grad_norm": 1.3359375, "learning_rate": 0.00019346199825929898, "loss": 3.6329, "step": 6239 }, { "epoch": 0.2921177365556791, "grad_norm": 1.421875, "learning_rate": 0.0001934599220654099, "loss": 3.5121, "step": 6240 }, { "epoch": 0.29216455029551175, "grad_norm": 1.453125, "learning_rate": 0.00019345784555306153, "loss": 3.6809, "step": 6241 }, { "epoch": 0.29221136403534437, "grad_norm": 1.171875, "learning_rate": 0.00019345576872226093, "loss": 3.4999, "step": 6242 }, { "epoch": 0.29225817777517704, "grad_norm": 1.203125, "learning_rate": 0.00019345369157301528, "loss": 3.2643, "step": 6243 }, { "epoch": 0.29230499151500966, "grad_norm": 1.53125, "learning_rate": 0.0001934516141053315, "loss": 2.7447, "step": 6244 }, { "epoch": 0.2923518052548423, "grad_norm": 1.3984375, "learning_rate": 0.00019344953631921676, "loss": 3.2613, "step": 6245 }, { "epoch": 0.29239861899467495, "grad_norm": 1.5390625, "learning_rate": 0.00019344745821467816, "loss": 3.5753, "step": 6246 }, { "epoch": 0.29244543273450757, "grad_norm": 1.453125, "learning_rate": 0.0001934453797917227, "loss": 3.4688, "step": 6247 }, { "epoch": 0.29249224647434025, "grad_norm": 1.5390625, "learning_rate": 0.00019344330105035756, "loss": 3.5127, "step": 6248 }, { "epoch": 0.29253906021417286, "grad_norm": 1.484375, "learning_rate": 0.00019344122199058974, "loss": 3.3618, "step": 6249 }, { "epoch": 0.2925858739540055, "grad_norm": 1.453125, "learning_rate": 0.00019343914261242637, "loss": 3.4514, "step": 6250 }, { "epoch": 0.29263268769383816, "grad_norm": 1.5234375, "learning_rate": 0.00019343706291587452, "loss": 3.6763, "step": 6251 }, { "epoch": 0.2926795014336708, "grad_norm": 1.46875, "learning_rate": 0.00019343498290094128, "loss": 3.4339, "step": 6252 }, { "epoch": 0.29272631517350345, "grad_norm": 1.8046875, "learning_rate": 0.00019343290256763372, "loss": 3.5193, "step": 6253 }, { "epoch": 0.29277312891333607, "grad_norm": 1.359375, "learning_rate": 0.00019343082191595898, "loss": 3.2214, "step": 6254 }, { "epoch": 0.2928199426531687, "grad_norm": 1.53125, "learning_rate": 0.00019342874094592407, "loss": 3.2462, "step": 6255 }, { "epoch": 0.29286675639300136, "grad_norm": 1.7109375, "learning_rate": 0.00019342665965753616, "loss": 4.0806, "step": 6256 }, { "epoch": 0.292913570132834, "grad_norm": 1.671875, "learning_rate": 0.0001934245780508023, "loss": 4.523, "step": 6257 }, { "epoch": 0.29296038387266665, "grad_norm": 1.28125, "learning_rate": 0.0001934224961257296, "loss": 3.4873, "step": 6258 }, { "epoch": 0.29300719761249927, "grad_norm": 1.546875, "learning_rate": 0.00019342041388232511, "loss": 3.499, "step": 6259 }, { "epoch": 0.2930540113523319, "grad_norm": 1.265625, "learning_rate": 0.00019341833132059598, "loss": 3.6339, "step": 6260 }, { "epoch": 0.29310082509216456, "grad_norm": 1.171875, "learning_rate": 0.00019341624844054932, "loss": 2.6171, "step": 6261 }, { "epoch": 0.2931476388319972, "grad_norm": 1.171875, "learning_rate": 0.00019341416524219212, "loss": 3.5341, "step": 6262 }, { "epoch": 0.29319445257182986, "grad_norm": 1.3984375, "learning_rate": 0.0001934120817255316, "loss": 3.3514, "step": 6263 }, { "epoch": 0.2932412663116625, "grad_norm": 1.2421875, "learning_rate": 0.00019340999789057482, "loss": 3.4189, "step": 6264 }, { "epoch": 0.2932880800514951, "grad_norm": 1.1171875, "learning_rate": 0.00019340791373732884, "loss": 3.1442, "step": 6265 }, { "epoch": 0.29333489379132777, "grad_norm": 1.3671875, "learning_rate": 0.00019340582926580084, "loss": 3.3338, "step": 6266 }, { "epoch": 0.2933817075311604, "grad_norm": 1.2109375, "learning_rate": 0.00019340374447599784, "loss": 3.3454, "step": 6267 }, { "epoch": 0.29342852127099306, "grad_norm": 1.3984375, "learning_rate": 0.00019340165936792699, "loss": 3.219, "step": 6268 }, { "epoch": 0.2934753350108257, "grad_norm": 1.2578125, "learning_rate": 0.0001933995739415954, "loss": 3.1412, "step": 6269 }, { "epoch": 0.2935221487506583, "grad_norm": 1.1640625, "learning_rate": 0.00019339748819701013, "loss": 3.2116, "step": 6270 }, { "epoch": 0.29356896249049097, "grad_norm": 1.484375, "learning_rate": 0.0001933954021341783, "loss": 3.3081, "step": 6271 }, { "epoch": 0.2936157762303236, "grad_norm": 2.046875, "learning_rate": 0.00019339331575310707, "loss": 3.2292, "step": 6272 }, { "epoch": 0.29366258997015626, "grad_norm": 1.3984375, "learning_rate": 0.0001933912290538035, "loss": 3.3195, "step": 6273 }, { "epoch": 0.2937094037099889, "grad_norm": 1.65625, "learning_rate": 0.00019338914203627475, "loss": 3.3007, "step": 6274 }, { "epoch": 0.2937562174498215, "grad_norm": 1.2578125, "learning_rate": 0.00019338705470052785, "loss": 3.5554, "step": 6275 }, { "epoch": 0.2938030311896542, "grad_norm": 1.8046875, "learning_rate": 0.00019338496704657, "loss": 2.8467, "step": 6276 }, { "epoch": 0.2938498449294868, "grad_norm": 1.3671875, "learning_rate": 0.0001933828790744082, "loss": 3.4858, "step": 6277 }, { "epoch": 0.29389665866931947, "grad_norm": 1.5078125, "learning_rate": 0.0001933807907840497, "loss": 3.2085, "step": 6278 }, { "epoch": 0.2939434724091521, "grad_norm": 1.6953125, "learning_rate": 0.00019337870217550154, "loss": 3.0311, "step": 6279 }, { "epoch": 0.2939902861489847, "grad_norm": 1.6640625, "learning_rate": 0.00019337661324877087, "loss": 3.02, "step": 6280 }, { "epoch": 0.2940370998888174, "grad_norm": 1.34375, "learning_rate": 0.00019337452400386478, "loss": 3.2538, "step": 6281 }, { "epoch": 0.29408391362865, "grad_norm": 1.375, "learning_rate": 0.0001933724344407904, "loss": 3.3314, "step": 6282 }, { "epoch": 0.29413072736848267, "grad_norm": 1.4296875, "learning_rate": 0.0001933703445595548, "loss": 3.3835, "step": 6283 }, { "epoch": 0.2941775411083153, "grad_norm": 1.3359375, "learning_rate": 0.0001933682543601652, "loss": 3.7125, "step": 6284 }, { "epoch": 0.2942243548481479, "grad_norm": 1.4140625, "learning_rate": 0.00019336616384262864, "loss": 3.6905, "step": 6285 }, { "epoch": 0.2942711685879806, "grad_norm": 1.484375, "learning_rate": 0.00019336407300695228, "loss": 3.1764, "step": 6286 }, { "epoch": 0.2943179823278132, "grad_norm": 1.2890625, "learning_rate": 0.00019336198185314324, "loss": 2.9237, "step": 6287 }, { "epoch": 0.2943647960676459, "grad_norm": 1.71875, "learning_rate": 0.00019335989038120864, "loss": 2.9487, "step": 6288 }, { "epoch": 0.2944116098074785, "grad_norm": 1.203125, "learning_rate": 0.00019335779859115562, "loss": 3.3941, "step": 6289 }, { "epoch": 0.2944584235473111, "grad_norm": 1.25, "learning_rate": 0.0001933557064829913, "loss": 2.9477, "step": 6290 }, { "epoch": 0.2945052372871438, "grad_norm": 1.203125, "learning_rate": 0.0001933536140567228, "loss": 2.7667, "step": 6291 }, { "epoch": 0.2945520510269764, "grad_norm": 1.5859375, "learning_rate": 0.00019335152131235724, "loss": 3.3335, "step": 6292 }, { "epoch": 0.2945988647668091, "grad_norm": 1.3671875, "learning_rate": 0.00019334942824990182, "loss": 3.4596, "step": 6293 }, { "epoch": 0.2946456785066417, "grad_norm": 1.3203125, "learning_rate": 0.00019334733486936357, "loss": 3.6489, "step": 6294 }, { "epoch": 0.2946924922464743, "grad_norm": 1.515625, "learning_rate": 0.00019334524117074967, "loss": 3.51, "step": 6295 }, { "epoch": 0.294739305986307, "grad_norm": 1.1328125, "learning_rate": 0.0001933431471540673, "loss": 3.0065, "step": 6296 }, { "epoch": 0.2947861197261396, "grad_norm": 1.5, "learning_rate": 0.0001933410528193235, "loss": 3.0296, "step": 6297 }, { "epoch": 0.2948329334659723, "grad_norm": 1.3046875, "learning_rate": 0.00019333895816652546, "loss": 3.0118, "step": 6298 }, { "epoch": 0.2948797472058049, "grad_norm": 1.5390625, "learning_rate": 0.00019333686319568036, "loss": 3.542, "step": 6299 }, { "epoch": 0.2949265609456375, "grad_norm": 1.140625, "learning_rate": 0.00019333476790679526, "loss": 3.3562, "step": 6300 }, { "epoch": 0.2949733746854702, "grad_norm": 2.609375, "learning_rate": 0.00019333267229987736, "loss": 3.4582, "step": 6301 }, { "epoch": 0.2950201884253028, "grad_norm": 1.6953125, "learning_rate": 0.00019333057637493375, "loss": 3.3524, "step": 6302 }, { "epoch": 0.2950670021651355, "grad_norm": 1.421875, "learning_rate": 0.0001933284801319716, "loss": 3.3898, "step": 6303 }, { "epoch": 0.2951138159049681, "grad_norm": 1.390625, "learning_rate": 0.00019332638357099806, "loss": 3.3029, "step": 6304 }, { "epoch": 0.2951606296448008, "grad_norm": 2.375, "learning_rate": 0.00019332428669202026, "loss": 3.6689, "step": 6305 }, { "epoch": 0.2952074433846334, "grad_norm": 1.09375, "learning_rate": 0.00019332218949504534, "loss": 4.4935, "step": 6306 }, { "epoch": 0.295254257124466, "grad_norm": 1.1015625, "learning_rate": 0.00019332009198008043, "loss": 3.1735, "step": 6307 }, { "epoch": 0.2953010708642987, "grad_norm": 1.4375, "learning_rate": 0.00019331799414713273, "loss": 3.4678, "step": 6308 }, { "epoch": 0.2953478846041313, "grad_norm": 1.15625, "learning_rate": 0.00019331589599620936, "loss": 3.2907, "step": 6309 }, { "epoch": 0.295394698343964, "grad_norm": 1.0078125, "learning_rate": 0.00019331379752731746, "loss": 4.1952, "step": 6310 }, { "epoch": 0.2954415120837966, "grad_norm": 1.2890625, "learning_rate": 0.0001933116987404642, "loss": 3.3216, "step": 6311 }, { "epoch": 0.2954883258236292, "grad_norm": 1.3828125, "learning_rate": 0.0001933095996356567, "loss": 3.225, "step": 6312 }, { "epoch": 0.2955351395634619, "grad_norm": 1.1328125, "learning_rate": 0.00019330750021290217, "loss": 3.4952, "step": 6313 }, { "epoch": 0.2955819533032945, "grad_norm": 1.328125, "learning_rate": 0.00019330540047220768, "loss": 3.1859, "step": 6314 }, { "epoch": 0.2956287670431272, "grad_norm": 1.4921875, "learning_rate": 0.00019330330041358046, "loss": 3.555, "step": 6315 }, { "epoch": 0.2956755807829598, "grad_norm": 1.75, "learning_rate": 0.00019330120003702763, "loss": 3.1581, "step": 6316 }, { "epoch": 0.2957223945227924, "grad_norm": 1.3359375, "learning_rate": 0.00019329909934255634, "loss": 3.5206, "step": 6317 }, { "epoch": 0.2957692082626251, "grad_norm": 1.5546875, "learning_rate": 0.0001932969983301738, "loss": 3.2952, "step": 6318 }, { "epoch": 0.2958160220024577, "grad_norm": 1.375, "learning_rate": 0.00019329489699988707, "loss": 3.4706, "step": 6319 }, { "epoch": 0.2958628357422904, "grad_norm": 1.2421875, "learning_rate": 0.0001932927953517034, "loss": 3.4091, "step": 6320 }, { "epoch": 0.295909649482123, "grad_norm": 1.375, "learning_rate": 0.00019329069338562996, "loss": 3.3445, "step": 6321 }, { "epoch": 0.2959564632219556, "grad_norm": 1.5546875, "learning_rate": 0.00019328859110167383, "loss": 3.1613, "step": 6322 }, { "epoch": 0.2960032769617883, "grad_norm": 1.3125, "learning_rate": 0.00019328648849984225, "loss": 3.3311, "step": 6323 }, { "epoch": 0.2960500907016209, "grad_norm": 1.21875, "learning_rate": 0.00019328438558014233, "loss": 3.339, "step": 6324 }, { "epoch": 0.2960969044414536, "grad_norm": 1.546875, "learning_rate": 0.00019328228234258128, "loss": 4.1085, "step": 6325 }, { "epoch": 0.2961437181812862, "grad_norm": 2.703125, "learning_rate": 0.00019328017878716622, "loss": 4.1256, "step": 6326 }, { "epoch": 0.2961905319211188, "grad_norm": 1.28125, "learning_rate": 0.00019327807491390436, "loss": 3.2748, "step": 6327 }, { "epoch": 0.2962373456609515, "grad_norm": 1.546875, "learning_rate": 0.0001932759707228028, "loss": 3.7135, "step": 6328 }, { "epoch": 0.2962841594007841, "grad_norm": 1.5234375, "learning_rate": 0.00019327386621386883, "loss": 3.5237, "step": 6329 }, { "epoch": 0.2963309731406168, "grad_norm": 1.1328125, "learning_rate": 0.00019327176138710952, "loss": 3.0298, "step": 6330 }, { "epoch": 0.2963777868804494, "grad_norm": 1.1640625, "learning_rate": 0.00019326965624253209, "loss": 2.8281, "step": 6331 }, { "epoch": 0.29642460062028203, "grad_norm": 1.421875, "learning_rate": 0.00019326755078014367, "loss": 2.881, "step": 6332 }, { "epoch": 0.2964714143601147, "grad_norm": 1.71875, "learning_rate": 0.0001932654449999515, "loss": 2.9934, "step": 6333 }, { "epoch": 0.2965182280999473, "grad_norm": 1.421875, "learning_rate": 0.0001932633389019627, "loss": 3.3239, "step": 6334 }, { "epoch": 0.29656504183978, "grad_norm": 1.390625, "learning_rate": 0.00019326123248618443, "loss": 3.1086, "step": 6335 }, { "epoch": 0.2966118555796126, "grad_norm": 1.1171875, "learning_rate": 0.00019325912575262396, "loss": 3.4312, "step": 6336 }, { "epoch": 0.29665866931944523, "grad_norm": 1.21875, "learning_rate": 0.00019325701870128836, "loss": 3.0602, "step": 6337 }, { "epoch": 0.2967054830592779, "grad_norm": 1.3828125, "learning_rate": 0.0001932549113321849, "loss": 3.3324, "step": 6338 }, { "epoch": 0.2967522967991105, "grad_norm": 1.5, "learning_rate": 0.0001932528036453207, "loss": 3.3886, "step": 6339 }, { "epoch": 0.2967991105389432, "grad_norm": 1.125, "learning_rate": 0.00019325069564070298, "loss": 3.0463, "step": 6340 }, { "epoch": 0.2968459242787758, "grad_norm": 1.1171875, "learning_rate": 0.00019324858731833885, "loss": 3.4243, "step": 6341 }, { "epoch": 0.29689273801860844, "grad_norm": 1.609375, "learning_rate": 0.0001932464786782356, "loss": 3.5232, "step": 6342 }, { "epoch": 0.2969395517584411, "grad_norm": 1.4296875, "learning_rate": 0.00019324436972040037, "loss": 3.4509, "step": 6343 }, { "epoch": 0.29698636549827373, "grad_norm": 1.171875, "learning_rate": 0.0001932422604448403, "loss": 3.3124, "step": 6344 }, { "epoch": 0.2970331792381064, "grad_norm": 1.265625, "learning_rate": 0.00019324015085156265, "loss": 3.539, "step": 6345 }, { "epoch": 0.297079992977939, "grad_norm": 1.2109375, "learning_rate": 0.00019323804094057456, "loss": 3.2514, "step": 6346 }, { "epoch": 0.29712680671777164, "grad_norm": 1.1796875, "learning_rate": 0.00019323593071188323, "loss": 3.1053, "step": 6347 }, { "epoch": 0.2971736204576043, "grad_norm": 1.4375, "learning_rate": 0.00019323382016549586, "loss": 3.4536, "step": 6348 }, { "epoch": 0.29722043419743693, "grad_norm": 1.671875, "learning_rate": 0.00019323170930141966, "loss": 4.1208, "step": 6349 }, { "epoch": 0.2972672479372696, "grad_norm": 1.1953125, "learning_rate": 0.00019322959811966177, "loss": 3.5737, "step": 6350 }, { "epoch": 0.2973140616771022, "grad_norm": 1.3984375, "learning_rate": 0.00019322748662022943, "loss": 3.6582, "step": 6351 }, { "epoch": 0.29736087541693484, "grad_norm": 1.359375, "learning_rate": 0.00019322537480312982, "loss": 3.3436, "step": 6352 }, { "epoch": 0.2974076891567675, "grad_norm": 1.1796875, "learning_rate": 0.0001932232626683701, "loss": 3.4054, "step": 6353 }, { "epoch": 0.29745450289660014, "grad_norm": 1.1640625, "learning_rate": 0.00019322115021595755, "loss": 3.2489, "step": 6354 }, { "epoch": 0.2975013166364328, "grad_norm": 1.3671875, "learning_rate": 0.00019321903744589925, "loss": 3.3352, "step": 6355 }, { "epoch": 0.29754813037626543, "grad_norm": 1.3984375, "learning_rate": 0.00019321692435820254, "loss": 2.9678, "step": 6356 }, { "epoch": 0.29759494411609805, "grad_norm": 1.734375, "learning_rate": 0.00019321481095287452, "loss": 3.4356, "step": 6357 }, { "epoch": 0.2976417578559307, "grad_norm": 1.765625, "learning_rate": 0.0001932126972299224, "loss": 3.355, "step": 6358 }, { "epoch": 0.29768857159576334, "grad_norm": 1.0859375, "learning_rate": 0.00019321058318935345, "loss": 3.0566, "step": 6359 }, { "epoch": 0.297735385335596, "grad_norm": 1.09375, "learning_rate": 0.0001932084688311748, "loss": 3.1095, "step": 6360 }, { "epoch": 0.29778219907542863, "grad_norm": 1.359375, "learning_rate": 0.00019320635415539367, "loss": 3.5545, "step": 6361 }, { "epoch": 0.29782901281526125, "grad_norm": 1.3359375, "learning_rate": 0.00019320423916201727, "loss": 3.3934, "step": 6362 }, { "epoch": 0.2978758265550939, "grad_norm": 1.3828125, "learning_rate": 0.00019320212385105285, "loss": 3.6532, "step": 6363 }, { "epoch": 0.29792264029492654, "grad_norm": 1.140625, "learning_rate": 0.00019320000822250756, "loss": 3.1688, "step": 6364 }, { "epoch": 0.2979694540347592, "grad_norm": 1.3046875, "learning_rate": 0.0001931978922763886, "loss": 4.7951, "step": 6365 }, { "epoch": 0.29801626777459184, "grad_norm": 1.1796875, "learning_rate": 0.00019319577601270327, "loss": 3.2935, "step": 6366 }, { "epoch": 0.2980630815144245, "grad_norm": 1.421875, "learning_rate": 0.0001931936594314587, "loss": 3.4627, "step": 6367 }, { "epoch": 0.29810989525425713, "grad_norm": 1.3203125, "learning_rate": 0.00019319154253266208, "loss": 3.586, "step": 6368 }, { "epoch": 0.29815670899408975, "grad_norm": 1.2109375, "learning_rate": 0.0001931894253163207, "loss": 3.0655, "step": 6369 }, { "epoch": 0.2982035227339224, "grad_norm": 2.78125, "learning_rate": 0.00019318730778244175, "loss": 3.1291, "step": 6370 }, { "epoch": 0.29825033647375504, "grad_norm": 1.1015625, "learning_rate": 0.0001931851899310324, "loss": 2.9818, "step": 6371 }, { "epoch": 0.2982971502135877, "grad_norm": 2.234375, "learning_rate": 0.00019318307176209993, "loss": 3.3552, "step": 6372 }, { "epoch": 0.29834396395342033, "grad_norm": 1.703125, "learning_rate": 0.00019318095327565152, "loss": 3.2525, "step": 6373 }, { "epoch": 0.29839077769325295, "grad_norm": 1.125, "learning_rate": 0.0001931788344716944, "loss": 3.0474, "step": 6374 }, { "epoch": 0.2984375914330856, "grad_norm": 1.0390625, "learning_rate": 0.0001931767153502358, "loss": 4.1111, "step": 6375 }, { "epoch": 0.29848440517291824, "grad_norm": 1.34375, "learning_rate": 0.00019317459591128292, "loss": 3.428, "step": 6376 }, { "epoch": 0.2985312189127509, "grad_norm": 1.1640625, "learning_rate": 0.00019317247615484298, "loss": 3.4412, "step": 6377 }, { "epoch": 0.29857803265258354, "grad_norm": 1.4609375, "learning_rate": 0.0001931703560809232, "loss": 3.6518, "step": 6378 }, { "epoch": 0.29862484639241615, "grad_norm": 1.3984375, "learning_rate": 0.00019316823568953084, "loss": 3.9333, "step": 6379 }, { "epoch": 0.29867166013224883, "grad_norm": 1.265625, "learning_rate": 0.0001931661149806731, "loss": 2.5395, "step": 6380 }, { "epoch": 0.29871847387208145, "grad_norm": 1.640625, "learning_rate": 0.0001931639939543572, "loss": 3.156, "step": 6381 }, { "epoch": 0.2987652876119141, "grad_norm": 1.3125, "learning_rate": 0.00019316187261059038, "loss": 3.0805, "step": 6382 }, { "epoch": 0.29881210135174674, "grad_norm": 1.7421875, "learning_rate": 0.00019315975094937987, "loss": 3.7981, "step": 6383 }, { "epoch": 0.29885891509157936, "grad_norm": 1.203125, "learning_rate": 0.00019315762897073287, "loss": 3.0326, "step": 6384 }, { "epoch": 0.29890572883141203, "grad_norm": 1.2578125, "learning_rate": 0.00019315550667465666, "loss": 3.2445, "step": 6385 }, { "epoch": 0.29895254257124465, "grad_norm": 1.375, "learning_rate": 0.00019315338406115844, "loss": 2.9907, "step": 6386 }, { "epoch": 0.2989993563110773, "grad_norm": 1.515625, "learning_rate": 0.00019315126113024546, "loss": 3.2226, "step": 6387 }, { "epoch": 0.29904617005090994, "grad_norm": 1.6171875, "learning_rate": 0.00019314913788192492, "loss": 3.6595, "step": 6388 }, { "epoch": 0.29909298379074256, "grad_norm": 1.4453125, "learning_rate": 0.0001931470143162041, "loss": 3.5927, "step": 6389 }, { "epoch": 0.29913979753057524, "grad_norm": 1.34375, "learning_rate": 0.00019314489043309018, "loss": 3.7269, "step": 6390 }, { "epoch": 0.29918661127040785, "grad_norm": 1.5703125, "learning_rate": 0.00019314276623259047, "loss": 4.5317, "step": 6391 }, { "epoch": 0.29923342501024053, "grad_norm": 1.2734375, "learning_rate": 0.00019314064171471214, "loss": 3.3722, "step": 6392 }, { "epoch": 0.29928023875007315, "grad_norm": 1.046875, "learning_rate": 0.00019313851687946247, "loss": 3.1077, "step": 6393 }, { "epoch": 0.29932705248990576, "grad_norm": 1.484375, "learning_rate": 0.0001931363917268487, "loss": 3.4111, "step": 6394 }, { "epoch": 0.29937386622973844, "grad_norm": 1.3046875, "learning_rate": 0.000193134266256878, "loss": 3.0909, "step": 6395 }, { "epoch": 0.29942067996957106, "grad_norm": 1.28125, "learning_rate": 0.00019313214046955772, "loss": 3.6355, "step": 6396 }, { "epoch": 0.29946749370940373, "grad_norm": 1.75, "learning_rate": 0.00019313001436489503, "loss": 3.6354, "step": 6397 }, { "epoch": 0.29951430744923635, "grad_norm": 1.375, "learning_rate": 0.0001931278879428972, "loss": 3.376, "step": 6398 }, { "epoch": 0.29956112118906897, "grad_norm": 1.1875, "learning_rate": 0.00019312576120357152, "loss": 3.2565, "step": 6399 }, { "epoch": 0.29960793492890164, "grad_norm": 1.328125, "learning_rate": 0.00019312363414692517, "loss": 3.3161, "step": 6400 }, { "epoch": 0.29965474866873426, "grad_norm": 1.28125, "learning_rate": 0.0001931215067729654, "loss": 3.1578, "step": 6401 }, { "epoch": 0.29970156240856693, "grad_norm": 1.4140625, "learning_rate": 0.0001931193790816995, "loss": 3.2, "step": 6402 }, { "epoch": 0.29974837614839955, "grad_norm": 1.3984375, "learning_rate": 0.00019311725107313467, "loss": 2.6225, "step": 6403 }, { "epoch": 0.29979518988823217, "grad_norm": 1.4921875, "learning_rate": 0.0001931151227472782, "loss": 3.3119, "step": 6404 }, { "epoch": 0.29984200362806485, "grad_norm": 1.3984375, "learning_rate": 0.00019311299410413736, "loss": 3.1851, "step": 6405 }, { "epoch": 0.29988881736789746, "grad_norm": 1.21875, "learning_rate": 0.00019311086514371933, "loss": 3.091, "step": 6406 }, { "epoch": 0.29993563110773014, "grad_norm": 1.5546875, "learning_rate": 0.00019310873586603146, "loss": 3.2037, "step": 6407 }, { "epoch": 0.29998244484756276, "grad_norm": 1.2109375, "learning_rate": 0.0001931066062710809, "loss": 3.2393, "step": 6408 }, { "epoch": 0.3000292585873954, "grad_norm": 2.375, "learning_rate": 0.00019310447635887502, "loss": 3.694, "step": 6409 }, { "epoch": 0.30007607232722805, "grad_norm": 1.3046875, "learning_rate": 0.00019310234612942098, "loss": 2.9927, "step": 6410 }, { "epoch": 0.30012288606706067, "grad_norm": 1.3515625, "learning_rate": 0.00019310021558272607, "loss": 3.1169, "step": 6411 }, { "epoch": 0.30016969980689334, "grad_norm": 1.59375, "learning_rate": 0.00019309808471879757, "loss": 3.5768, "step": 6412 }, { "epoch": 0.30021651354672596, "grad_norm": 1.0859375, "learning_rate": 0.00019309595353764273, "loss": 3.1046, "step": 6413 }, { "epoch": 0.3002633272865586, "grad_norm": 1.5234375, "learning_rate": 0.0001930938220392688, "loss": 3.4473, "step": 6414 }, { "epoch": 0.30031014102639125, "grad_norm": 1.40625, "learning_rate": 0.00019309169022368308, "loss": 3.2411, "step": 6415 }, { "epoch": 0.30035695476622387, "grad_norm": 1.6875, "learning_rate": 0.00019308955809089277, "loss": 3.1322, "step": 6416 }, { "epoch": 0.30040376850605655, "grad_norm": 1.3984375, "learning_rate": 0.00019308742564090522, "loss": 3.2467, "step": 6417 }, { "epoch": 0.30045058224588916, "grad_norm": 1.15625, "learning_rate": 0.0001930852928737276, "loss": 3.6457, "step": 6418 }, { "epoch": 0.3004973959857218, "grad_norm": 1.203125, "learning_rate": 0.00019308315978936724, "loss": 3.2985, "step": 6419 }, { "epoch": 0.30054420972555446, "grad_norm": 2.46875, "learning_rate": 0.00019308102638783138, "loss": 3.2955, "step": 6420 }, { "epoch": 0.3005910234653871, "grad_norm": 1.234375, "learning_rate": 0.00019307889266912732, "loss": 3.4942, "step": 6421 }, { "epoch": 0.30063783720521975, "grad_norm": 1.1171875, "learning_rate": 0.0001930767586332623, "loss": 3.2783, "step": 6422 }, { "epoch": 0.30068465094505237, "grad_norm": 1.125, "learning_rate": 0.00019307462428024363, "loss": 3.363, "step": 6423 }, { "epoch": 0.30073146468488504, "grad_norm": 1.1484375, "learning_rate": 0.00019307248961007852, "loss": 2.7638, "step": 6424 }, { "epoch": 0.30077827842471766, "grad_norm": 1.6328125, "learning_rate": 0.00019307035462277434, "loss": 3.2596, "step": 6425 }, { "epoch": 0.3008250921645503, "grad_norm": 0.9921875, "learning_rate": 0.00019306821931833827, "loss": 3.396, "step": 6426 }, { "epoch": 0.30087190590438295, "grad_norm": 1.5078125, "learning_rate": 0.0001930660836967776, "loss": 3.7304, "step": 6427 }, { "epoch": 0.30091871964421557, "grad_norm": 1.484375, "learning_rate": 0.0001930639477580997, "loss": 2.555, "step": 6428 }, { "epoch": 0.30096553338404824, "grad_norm": 2.640625, "learning_rate": 0.00019306181150231168, "loss": 3.5383, "step": 6429 }, { "epoch": 0.30101234712388086, "grad_norm": 1.296875, "learning_rate": 0.00019305967492942098, "loss": 2.9457, "step": 6430 }, { "epoch": 0.3010591608637135, "grad_norm": 1.3671875, "learning_rate": 0.00019305753803943481, "loss": 3.4019, "step": 6431 }, { "epoch": 0.30110597460354616, "grad_norm": 1.1015625, "learning_rate": 0.00019305540083236044, "loss": 3.353, "step": 6432 }, { "epoch": 0.3011527883433788, "grad_norm": 1.9140625, "learning_rate": 0.00019305326330820518, "loss": 3.539, "step": 6433 }, { "epoch": 0.30119960208321145, "grad_norm": 1.2109375, "learning_rate": 0.00019305112546697632, "loss": 3.2932, "step": 6434 }, { "epoch": 0.30124641582304407, "grad_norm": 1.1328125, "learning_rate": 0.0001930489873086811, "loss": 2.945, "step": 6435 }, { "epoch": 0.3012932295628767, "grad_norm": 1.7890625, "learning_rate": 0.00019304684883332686, "loss": 3.2362, "step": 6436 }, { "epoch": 0.30134004330270936, "grad_norm": 1.46875, "learning_rate": 0.00019304471004092085, "loss": 3.3796, "step": 6437 }, { "epoch": 0.301386857042542, "grad_norm": 1.4140625, "learning_rate": 0.00019304257093147035, "loss": 3.4895, "step": 6438 }, { "epoch": 0.30143367078237465, "grad_norm": 1.0625, "learning_rate": 0.0001930404315049827, "loss": 3.2853, "step": 6439 }, { "epoch": 0.30148048452220727, "grad_norm": 1.515625, "learning_rate": 0.00019303829176146514, "loss": 3.3302, "step": 6440 }, { "epoch": 0.3015272982620399, "grad_norm": 1.4453125, "learning_rate": 0.000193036151700925, "loss": 2.7214, "step": 6441 }, { "epoch": 0.30157411200187256, "grad_norm": 1.625, "learning_rate": 0.00019303401132336954, "loss": 3.4604, "step": 6442 }, { "epoch": 0.3016209257417052, "grad_norm": 1.1015625, "learning_rate": 0.00019303187062880606, "loss": 2.8585, "step": 6443 }, { "epoch": 0.30166773948153786, "grad_norm": 1.3359375, "learning_rate": 0.00019302972961724187, "loss": 3.674, "step": 6444 }, { "epoch": 0.3017145532213705, "grad_norm": 1.21875, "learning_rate": 0.00019302758828868423, "loss": 3.4261, "step": 6445 }, { "epoch": 0.3017613669612031, "grad_norm": 1.828125, "learning_rate": 0.00019302544664314046, "loss": 3.3418, "step": 6446 }, { "epoch": 0.30180818070103577, "grad_norm": 1.4453125, "learning_rate": 0.00019302330468061788, "loss": 3.0683, "step": 6447 }, { "epoch": 0.3018549944408684, "grad_norm": 1.1953125, "learning_rate": 0.00019302116240112372, "loss": 3.1591, "step": 6448 }, { "epoch": 0.30190180818070106, "grad_norm": 1.609375, "learning_rate": 0.0001930190198046654, "loss": 3.2203, "step": 6449 }, { "epoch": 0.3019486219205337, "grad_norm": 1.3359375, "learning_rate": 0.0001930168768912501, "loss": 3.1683, "step": 6450 }, { "epoch": 0.3019954356603663, "grad_norm": 1.390625, "learning_rate": 0.00019301473366088517, "loss": 3.737, "step": 6451 }, { "epoch": 0.30204224940019897, "grad_norm": 1.125, "learning_rate": 0.0001930125901135779, "loss": 2.9813, "step": 6452 }, { "epoch": 0.3020890631400316, "grad_norm": 1.53125, "learning_rate": 0.00019301044624933563, "loss": 3.2786, "step": 6453 }, { "epoch": 0.30213587687986426, "grad_norm": 1.4453125, "learning_rate": 0.0001930083020681656, "loss": 4.8052, "step": 6454 }, { "epoch": 0.3021826906196969, "grad_norm": 1.5078125, "learning_rate": 0.00019300615757007522, "loss": 3.6749, "step": 6455 }, { "epoch": 0.3022295043595295, "grad_norm": 1.609375, "learning_rate": 0.00019300401275507172, "loss": 3.2742, "step": 6456 }, { "epoch": 0.3022763180993622, "grad_norm": 1.171875, "learning_rate": 0.00019300186762316238, "loss": 3.3822, "step": 6457 }, { "epoch": 0.3023231318391948, "grad_norm": 1.3671875, "learning_rate": 0.0001929997221743546, "loss": 3.1234, "step": 6458 }, { "epoch": 0.30236994557902747, "grad_norm": 1.2734375, "learning_rate": 0.00019299757640865562, "loss": 3.4511, "step": 6459 }, { "epoch": 0.3024167593188601, "grad_norm": 1.953125, "learning_rate": 0.00019299543032607277, "loss": 3.6262, "step": 6460 }, { "epoch": 0.3024635730586927, "grad_norm": 1.1796875, "learning_rate": 0.00019299328392661337, "loss": 3.603, "step": 6461 }, { "epoch": 0.3025103867985254, "grad_norm": 1.5703125, "learning_rate": 0.00019299113721028474, "loss": 3.1843, "step": 6462 }, { "epoch": 0.302557200538358, "grad_norm": 1.390625, "learning_rate": 0.00019298899017709418, "loss": 3.1712, "step": 6463 }, { "epoch": 0.30260401427819067, "grad_norm": 1.2734375, "learning_rate": 0.000192986842827049, "loss": 3.0044, "step": 6464 }, { "epoch": 0.3026508280180233, "grad_norm": 1.2734375, "learning_rate": 0.00019298469516015654, "loss": 3.1139, "step": 6465 }, { "epoch": 0.3026976417578559, "grad_norm": 1.1796875, "learning_rate": 0.0001929825471764241, "loss": 3.2926, "step": 6466 }, { "epoch": 0.3027444554976886, "grad_norm": 1.40625, "learning_rate": 0.000192980398875859, "loss": 3.2983, "step": 6467 }, { "epoch": 0.3027912692375212, "grad_norm": 1.1015625, "learning_rate": 0.00019297825025846858, "loss": 3.3086, "step": 6468 }, { "epoch": 0.3028380829773539, "grad_norm": 1.375, "learning_rate": 0.00019297610132426013, "loss": 3.0958, "step": 6469 }, { "epoch": 0.3028848967171865, "grad_norm": 1.0625, "learning_rate": 0.000192973952073241, "loss": 2.9865, "step": 6470 }, { "epoch": 0.3029317104570191, "grad_norm": 1.0390625, "learning_rate": 0.0001929718025054185, "loss": 2.7768, "step": 6471 }, { "epoch": 0.3029785241968518, "grad_norm": 1.203125, "learning_rate": 0.00019296965262079996, "loss": 3.2932, "step": 6472 }, { "epoch": 0.3030253379366844, "grad_norm": 1.734375, "learning_rate": 0.0001929675024193927, "loss": 3.5071, "step": 6473 }, { "epoch": 0.3030721516765171, "grad_norm": 1.890625, "learning_rate": 0.00019296535190120405, "loss": 3.4033, "step": 6474 }, { "epoch": 0.3031189654163497, "grad_norm": 1.6875, "learning_rate": 0.00019296320106624134, "loss": 3.5898, "step": 6475 }, { "epoch": 0.3031657791561823, "grad_norm": 1.7578125, "learning_rate": 0.00019296104991451188, "loss": 3.2825, "step": 6476 }, { "epoch": 0.303212592896015, "grad_norm": 1.5078125, "learning_rate": 0.00019295889844602303, "loss": 3.4178, "step": 6477 }, { "epoch": 0.3032594066358476, "grad_norm": 1.5546875, "learning_rate": 0.0001929567466607821, "loss": 3.3793, "step": 6478 }, { "epoch": 0.3033062203756803, "grad_norm": 1.3046875, "learning_rate": 0.00019295459455879647, "loss": 3.3943, "step": 6479 }, { "epoch": 0.3033530341155129, "grad_norm": 1.265625, "learning_rate": 0.00019295244214007337, "loss": 3.3624, "step": 6480 }, { "epoch": 0.3033998478553455, "grad_norm": 1.765625, "learning_rate": 0.00019295028940462022, "loss": 3.1985, "step": 6481 }, { "epoch": 0.3034466615951782, "grad_norm": 1.7421875, "learning_rate": 0.00019294813635244433, "loss": 3.7386, "step": 6482 }, { "epoch": 0.3034934753350108, "grad_norm": 1.125, "learning_rate": 0.00019294598298355304, "loss": 3.5132, "step": 6483 }, { "epoch": 0.3035402890748435, "grad_norm": 1.1640625, "learning_rate": 0.00019294382929795369, "loss": 2.9614, "step": 6484 }, { "epoch": 0.3035871028146761, "grad_norm": 1.6015625, "learning_rate": 0.00019294167529565362, "loss": 3.4398, "step": 6485 }, { "epoch": 0.3036339165545088, "grad_norm": 1.0, "learning_rate": 0.00019293952097666016, "loss": 2.0932, "step": 6486 }, { "epoch": 0.3036807302943414, "grad_norm": 1.1484375, "learning_rate": 0.00019293736634098062, "loss": 3.439, "step": 6487 }, { "epoch": 0.303727544034174, "grad_norm": 1.359375, "learning_rate": 0.00019293521138862244, "loss": 3.383, "step": 6488 }, { "epoch": 0.3037743577740067, "grad_norm": 1.3671875, "learning_rate": 0.00019293305611959284, "loss": 3.2222, "step": 6489 }, { "epoch": 0.3038211715138393, "grad_norm": 1.390625, "learning_rate": 0.00019293090053389925, "loss": 3.2548, "step": 6490 }, { "epoch": 0.303867985253672, "grad_norm": 1.3984375, "learning_rate": 0.000192928744631549, "loss": 3.444, "step": 6491 }, { "epoch": 0.3039147989935046, "grad_norm": 1.4765625, "learning_rate": 0.0001929265884125494, "loss": 3.1719, "step": 6492 }, { "epoch": 0.3039616127333372, "grad_norm": 1.5546875, "learning_rate": 0.00019292443187690782, "loss": 3.7057, "step": 6493 }, { "epoch": 0.3040084264731699, "grad_norm": 2.125, "learning_rate": 0.00019292227502463164, "loss": 3.4227, "step": 6494 }, { "epoch": 0.3040552402130025, "grad_norm": 1.7890625, "learning_rate": 0.00019292011785572815, "loss": 3.3991, "step": 6495 }, { "epoch": 0.3041020539528352, "grad_norm": 1.421875, "learning_rate": 0.00019291796037020474, "loss": 3.6695, "step": 6496 }, { "epoch": 0.3041488676926678, "grad_norm": 1.078125, "learning_rate": 0.00019291580256806877, "loss": 3.9143, "step": 6497 }, { "epoch": 0.3041956814325004, "grad_norm": 1.0390625, "learning_rate": 0.00019291364444932752, "loss": 3.1264, "step": 6498 }, { "epoch": 0.3042424951723331, "grad_norm": 1.859375, "learning_rate": 0.00019291148601398845, "loss": 3.5902, "step": 6499 }, { "epoch": 0.3042893089121657, "grad_norm": 2.125, "learning_rate": 0.00019290932726205885, "loss": 3.0863, "step": 6500 }, { "epoch": 0.3043361226519984, "grad_norm": 1.0859375, "learning_rate": 0.00019290716819354606, "loss": 2.9191, "step": 6501 }, { "epoch": 0.304382936391831, "grad_norm": 1.3515625, "learning_rate": 0.0001929050088084575, "loss": 3.2617, "step": 6502 }, { "epoch": 0.3044297501316636, "grad_norm": 1.4453125, "learning_rate": 0.00019290284910680047, "loss": 3.2852, "step": 6503 }, { "epoch": 0.3044765638714963, "grad_norm": 1.296875, "learning_rate": 0.00019290068908858236, "loss": 3.1317, "step": 6504 }, { "epoch": 0.3045233776113289, "grad_norm": 1.4296875, "learning_rate": 0.00019289852875381053, "loss": 2.9719, "step": 6505 }, { "epoch": 0.3045701913511616, "grad_norm": 1.3203125, "learning_rate": 0.00019289636810249233, "loss": 3.6154, "step": 6506 }, { "epoch": 0.3046170050909942, "grad_norm": 1.2109375, "learning_rate": 0.00019289420713463514, "loss": 3.0958, "step": 6507 }, { "epoch": 0.3046638188308268, "grad_norm": 1.5234375, "learning_rate": 0.00019289204585024624, "loss": 2.9713, "step": 6508 }, { "epoch": 0.3047106325706595, "grad_norm": 1.265625, "learning_rate": 0.00019288988424933314, "loss": 3.3722, "step": 6509 }, { "epoch": 0.3047574463104921, "grad_norm": 1.7109375, "learning_rate": 0.0001928877223319031, "loss": 3.307, "step": 6510 }, { "epoch": 0.3048042600503248, "grad_norm": 1.1171875, "learning_rate": 0.0001928855600979635, "loss": 3.3735, "step": 6511 }, { "epoch": 0.3048510737901574, "grad_norm": 1.7734375, "learning_rate": 0.00019288339754752176, "loss": 3.4965, "step": 6512 }, { "epoch": 0.30489788752999003, "grad_norm": 1.1796875, "learning_rate": 0.00019288123468058518, "loss": 3.4529, "step": 6513 }, { "epoch": 0.3049447012698227, "grad_norm": 2.1875, "learning_rate": 0.00019287907149716119, "loss": 3.4907, "step": 6514 }, { "epoch": 0.3049915150096553, "grad_norm": 1.703125, "learning_rate": 0.0001928769079972571, "loss": 3.5666, "step": 6515 }, { "epoch": 0.305038328749488, "grad_norm": 1.3359375, "learning_rate": 0.00019287474418088033, "loss": 3.0502, "step": 6516 }, { "epoch": 0.3050851424893206, "grad_norm": 1.4140625, "learning_rate": 0.00019287258004803827, "loss": 3.6699, "step": 6517 }, { "epoch": 0.30513195622915323, "grad_norm": 1.2578125, "learning_rate": 0.0001928704155987382, "loss": 3.469, "step": 6518 }, { "epoch": 0.3051787699689859, "grad_norm": 1.3125, "learning_rate": 0.00019286825083298758, "loss": 3.5056, "step": 6519 }, { "epoch": 0.3052255837088185, "grad_norm": 2.015625, "learning_rate": 0.0001928660857507938, "loss": 2.8483, "step": 6520 }, { "epoch": 0.3052723974486512, "grad_norm": 1.2578125, "learning_rate": 0.00019286392035216415, "loss": 3.0494, "step": 6521 }, { "epoch": 0.3053192111884838, "grad_norm": 1.1328125, "learning_rate": 0.00019286175463710607, "loss": 3.0544, "step": 6522 }, { "epoch": 0.30536602492831644, "grad_norm": 1.46875, "learning_rate": 0.00019285958860562694, "loss": 3.2877, "step": 6523 }, { "epoch": 0.3054128386681491, "grad_norm": 1.3515625, "learning_rate": 0.00019285742225773412, "loss": 3.2351, "step": 6524 }, { "epoch": 0.30545965240798173, "grad_norm": 1.4375, "learning_rate": 0.00019285525559343503, "loss": 3.7301, "step": 6525 }, { "epoch": 0.3055064661478144, "grad_norm": 1.4453125, "learning_rate": 0.000192853088612737, "loss": 3.5689, "step": 6526 }, { "epoch": 0.305553279887647, "grad_norm": 1.3515625, "learning_rate": 0.00019285092131564744, "loss": 3.3716, "step": 6527 }, { "epoch": 0.30560009362747964, "grad_norm": 1.3359375, "learning_rate": 0.00019284875370217372, "loss": 3.4625, "step": 6528 }, { "epoch": 0.3056469073673123, "grad_norm": 1.203125, "learning_rate": 0.00019284658577232327, "loss": 3.1123, "step": 6529 }, { "epoch": 0.30569372110714493, "grad_norm": 1.203125, "learning_rate": 0.0001928444175261034, "loss": 3.5703, "step": 6530 }, { "epoch": 0.3057405348469776, "grad_norm": 1.2421875, "learning_rate": 0.0001928422489635216, "loss": 3.0318, "step": 6531 }, { "epoch": 0.3057873485868102, "grad_norm": 1.0234375, "learning_rate": 0.00019284008008458515, "loss": 4.0861, "step": 6532 }, { "epoch": 0.30583416232664284, "grad_norm": 1.5859375, "learning_rate": 0.00019283791088930152, "loss": 3.0087, "step": 6533 }, { "epoch": 0.3058809760664755, "grad_norm": 1.171875, "learning_rate": 0.00019283574137767809, "loss": 3.5451, "step": 6534 }, { "epoch": 0.30592778980630814, "grad_norm": 1.390625, "learning_rate": 0.0001928335715497222, "loss": 3.5422, "step": 6535 }, { "epoch": 0.3059746035461408, "grad_norm": 1.2734375, "learning_rate": 0.00019283140140544133, "loss": 3.2133, "step": 6536 }, { "epoch": 0.30602141728597343, "grad_norm": 1.2265625, "learning_rate": 0.00019282923094484278, "loss": 3.2029, "step": 6537 }, { "epoch": 0.30606823102580605, "grad_norm": 1.34375, "learning_rate": 0.00019282706016793402, "loss": 3.3682, "step": 6538 }, { "epoch": 0.3061150447656387, "grad_norm": 1.546875, "learning_rate": 0.0001928248890747224, "loss": 3.4206, "step": 6539 }, { "epoch": 0.30616185850547134, "grad_norm": 1.8046875, "learning_rate": 0.00019282271766521533, "loss": 3.2192, "step": 6540 }, { "epoch": 0.306208672245304, "grad_norm": 1.5078125, "learning_rate": 0.00019282054593942026, "loss": 3.1862, "step": 6541 }, { "epoch": 0.30625548598513663, "grad_norm": 1.15625, "learning_rate": 0.0001928183738973445, "loss": 3.2334, "step": 6542 }, { "epoch": 0.30630229972496925, "grad_norm": 1.9140625, "learning_rate": 0.0001928162015389955, "loss": 3.3717, "step": 6543 }, { "epoch": 0.3063491134648019, "grad_norm": 1.2578125, "learning_rate": 0.00019281402886438067, "loss": 3.8842, "step": 6544 }, { "epoch": 0.30639592720463454, "grad_norm": 1.2734375, "learning_rate": 0.00019281185587350738, "loss": 2.9352, "step": 6545 }, { "epoch": 0.3064427409444672, "grad_norm": 0.99609375, "learning_rate": 0.00019280968256638308, "loss": 2.5564, "step": 6546 }, { "epoch": 0.30648955468429984, "grad_norm": 1.09375, "learning_rate": 0.00019280750894301514, "loss": 3.0208, "step": 6547 }, { "epoch": 0.3065363684241325, "grad_norm": 1.84375, "learning_rate": 0.000192805335003411, "loss": 3.1642, "step": 6548 }, { "epoch": 0.30658318216396513, "grad_norm": 1.3828125, "learning_rate": 0.00019280316074757803, "loss": 3.4092, "step": 6549 }, { "epoch": 0.30662999590379775, "grad_norm": 1.3984375, "learning_rate": 0.00019280098617552365, "loss": 2.8352, "step": 6550 }, { "epoch": 0.3066768096436304, "grad_norm": 1.1015625, "learning_rate": 0.00019279881128725528, "loss": 3.243, "step": 6551 }, { "epoch": 0.30672362338346304, "grad_norm": 1.4375, "learning_rate": 0.00019279663608278032, "loss": 3.3923, "step": 6552 }, { "epoch": 0.3067704371232957, "grad_norm": 2.9375, "learning_rate": 0.0001927944605621062, "loss": 3.0823, "step": 6553 }, { "epoch": 0.30681725086312833, "grad_norm": 1.578125, "learning_rate": 0.0001927922847252403, "loss": 3.2413, "step": 6554 }, { "epoch": 0.30686406460296095, "grad_norm": 1.5, "learning_rate": 0.00019279010857219006, "loss": 2.9049, "step": 6555 }, { "epoch": 0.3069108783427936, "grad_norm": 1.3203125, "learning_rate": 0.00019278793210296286, "loss": 3.4414, "step": 6556 }, { "epoch": 0.30695769208262624, "grad_norm": 1.5078125, "learning_rate": 0.00019278575531756617, "loss": 3.0084, "step": 6557 }, { "epoch": 0.3070045058224589, "grad_norm": 1.796875, "learning_rate": 0.0001927835782160074, "loss": 3.9458, "step": 6558 }, { "epoch": 0.30705131956229154, "grad_norm": 1.3828125, "learning_rate": 0.00019278140079829394, "loss": 3.6635, "step": 6559 }, { "epoch": 0.30709813330212415, "grad_norm": 1.03125, "learning_rate": 0.00019277922306443324, "loss": 2.9637, "step": 6560 }, { "epoch": 0.30714494704195683, "grad_norm": 1.6015625, "learning_rate": 0.00019277704501443265, "loss": 2.9316, "step": 6561 }, { "epoch": 0.30719176078178945, "grad_norm": 1.6875, "learning_rate": 0.00019277486664829967, "loss": 3.2612, "step": 6562 }, { "epoch": 0.3072385745216221, "grad_norm": 1.3125, "learning_rate": 0.00019277268796604168, "loss": 3.2008, "step": 6563 }, { "epoch": 0.30728538826145474, "grad_norm": 1.125, "learning_rate": 0.00019277050896766612, "loss": 2.7209, "step": 6564 }, { "epoch": 0.30733220200128736, "grad_norm": 1.2265625, "learning_rate": 0.00019276832965318047, "loss": 2.9398, "step": 6565 }, { "epoch": 0.30737901574112003, "grad_norm": 1.046875, "learning_rate": 0.00019276615002259204, "loss": 3.3036, "step": 6566 }, { "epoch": 0.30742582948095265, "grad_norm": 1.4453125, "learning_rate": 0.00019276397007590834, "loss": 3.1352, "step": 6567 }, { "epoch": 0.3074726432207853, "grad_norm": 1.1796875, "learning_rate": 0.00019276178981313675, "loss": 2.9494, "step": 6568 }, { "epoch": 0.30751945696061794, "grad_norm": 1.1015625, "learning_rate": 0.00019275960923428474, "loss": 2.1352, "step": 6569 }, { "epoch": 0.30756627070045056, "grad_norm": 1.109375, "learning_rate": 0.0001927574283393597, "loss": 2.925, "step": 6570 }, { "epoch": 0.30761308444028324, "grad_norm": 3.03125, "learning_rate": 0.0001927552471283691, "loss": 2.5328, "step": 6571 }, { "epoch": 0.30765989818011585, "grad_norm": 1.234375, "learning_rate": 0.00019275306560132037, "loss": 3.413, "step": 6572 }, { "epoch": 0.30770671191994853, "grad_norm": 1.2109375, "learning_rate": 0.00019275088375822092, "loss": 3.1747, "step": 6573 }, { "epoch": 0.30775352565978115, "grad_norm": 1.4453125, "learning_rate": 0.0001927487015990782, "loss": 3.6027, "step": 6574 }, { "epoch": 0.30780033939961376, "grad_norm": 1.375, "learning_rate": 0.00019274651912389964, "loss": 3.6253, "step": 6575 }, { "epoch": 0.30784715313944644, "grad_norm": 1.1796875, "learning_rate": 0.00019274433633269266, "loss": 3.1355, "step": 6576 }, { "epoch": 0.30789396687927906, "grad_norm": 1.2578125, "learning_rate": 0.00019274215322546474, "loss": 3.2476, "step": 6577 }, { "epoch": 0.30794078061911173, "grad_norm": 1.421875, "learning_rate": 0.0001927399698022233, "loss": 3.1509, "step": 6578 }, { "epoch": 0.30798759435894435, "grad_norm": 1.1015625, "learning_rate": 0.00019273778606297573, "loss": 3.3207, "step": 6579 }, { "epoch": 0.30803440809877697, "grad_norm": 1.1640625, "learning_rate": 0.00019273560200772956, "loss": 3.2082, "step": 6580 }, { "epoch": 0.30808122183860964, "grad_norm": 1.3203125, "learning_rate": 0.00019273341763649215, "loss": 3.3498, "step": 6581 }, { "epoch": 0.30812803557844226, "grad_norm": 1.375, "learning_rate": 0.000192731232949271, "loss": 3.4063, "step": 6582 }, { "epoch": 0.30817484931827493, "grad_norm": 1.0859375, "learning_rate": 0.00019272904794607353, "loss": 3.1651, "step": 6583 }, { "epoch": 0.30822166305810755, "grad_norm": 1.0703125, "learning_rate": 0.0001927268626269072, "loss": 2.3367, "step": 6584 }, { "epoch": 0.30826847679794017, "grad_norm": 1.203125, "learning_rate": 0.00019272467699177944, "loss": 3.3045, "step": 6585 }, { "epoch": 0.30831529053777285, "grad_norm": 1.1796875, "learning_rate": 0.00019272249104069768, "loss": 3.2846, "step": 6586 }, { "epoch": 0.30836210427760546, "grad_norm": 1.4921875, "learning_rate": 0.00019272030477366942, "loss": 3.0725, "step": 6587 }, { "epoch": 0.30840891801743814, "grad_norm": 1.1953125, "learning_rate": 0.00019271811819070208, "loss": 3.21, "step": 6588 }, { "epoch": 0.30845573175727076, "grad_norm": 1.4921875, "learning_rate": 0.00019271593129180307, "loss": 3.3802, "step": 6589 }, { "epoch": 0.3085025454971034, "grad_norm": 1.078125, "learning_rate": 0.00019271374407697992, "loss": 2.9913, "step": 6590 }, { "epoch": 0.30854935923693605, "grad_norm": 1.3984375, "learning_rate": 0.00019271155654624003, "loss": 3.2976, "step": 6591 }, { "epoch": 0.30859617297676867, "grad_norm": 1.6796875, "learning_rate": 0.00019270936869959088, "loss": 3.3538, "step": 6592 }, { "epoch": 0.30864298671660134, "grad_norm": 1.4296875, "learning_rate": 0.00019270718053703993, "loss": 4.6486, "step": 6593 }, { "epoch": 0.30868980045643396, "grad_norm": 1.0390625, "learning_rate": 0.0001927049920585946, "loss": 3.0413, "step": 6594 }, { "epoch": 0.3087366141962666, "grad_norm": 1.1953125, "learning_rate": 0.00019270280326426237, "loss": 3.068, "step": 6595 }, { "epoch": 0.30878342793609925, "grad_norm": 1.1875, "learning_rate": 0.00019270061415405072, "loss": 2.9882, "step": 6596 }, { "epoch": 0.30883024167593187, "grad_norm": 1.234375, "learning_rate": 0.00019269842472796706, "loss": 3.5253, "step": 6597 }, { "epoch": 0.30887705541576455, "grad_norm": 1.5234375, "learning_rate": 0.0001926962349860189, "loss": 3.4068, "step": 6598 }, { "epoch": 0.30892386915559716, "grad_norm": 1.0859375, "learning_rate": 0.00019269404492821366, "loss": 3.0567, "step": 6599 }, { "epoch": 0.3089706828954298, "grad_norm": 1.1953125, "learning_rate": 0.0001926918545545588, "loss": 3.2982, "step": 6600 }, { "epoch": 0.30901749663526246, "grad_norm": 1.0625, "learning_rate": 0.00019268966386506184, "loss": 3.5119, "step": 6601 }, { "epoch": 0.3090643103750951, "grad_norm": 2.296875, "learning_rate": 0.0001926874728597302, "loss": 3.0759, "step": 6602 }, { "epoch": 0.30911112411492775, "grad_norm": 2.765625, "learning_rate": 0.00019268528153857135, "loss": 3.1936, "step": 6603 }, { "epoch": 0.30915793785476037, "grad_norm": 1.1953125, "learning_rate": 0.00019268308990159273, "loss": 3.2117, "step": 6604 }, { "epoch": 0.309204751594593, "grad_norm": 1.3515625, "learning_rate": 0.00019268089794880185, "loss": 3.1044, "step": 6605 }, { "epoch": 0.30925156533442566, "grad_norm": 1.125, "learning_rate": 0.00019267870568020622, "loss": 3.1564, "step": 6606 }, { "epoch": 0.3092983790742583, "grad_norm": 1.2578125, "learning_rate": 0.0001926765130958132, "loss": 3.4282, "step": 6607 }, { "epoch": 0.30934519281409095, "grad_norm": 1.3828125, "learning_rate": 0.00019267432019563034, "loss": 3.3184, "step": 6608 }, { "epoch": 0.30939200655392357, "grad_norm": 1.15625, "learning_rate": 0.0001926721269796651, "loss": 3.5213, "step": 6609 }, { "epoch": 0.30943882029375624, "grad_norm": 1.234375, "learning_rate": 0.00019266993344792493, "loss": 3.3038, "step": 6610 }, { "epoch": 0.30948563403358886, "grad_norm": 1.59375, "learning_rate": 0.00019266773960041733, "loss": 3.6126, "step": 6611 }, { "epoch": 0.3095324477734215, "grad_norm": 1.328125, "learning_rate": 0.00019266554543714975, "loss": 2.9245, "step": 6612 }, { "epoch": 0.30957926151325416, "grad_norm": 1.5703125, "learning_rate": 0.00019266335095812967, "loss": 3.5187, "step": 6613 }, { "epoch": 0.3096260752530868, "grad_norm": 1.1875, "learning_rate": 0.00019266115616336463, "loss": 2.9578, "step": 6614 }, { "epoch": 0.30967288899291945, "grad_norm": 1.3984375, "learning_rate": 0.000192658961052862, "loss": 3.6709, "step": 6615 }, { "epoch": 0.30971970273275207, "grad_norm": 1.234375, "learning_rate": 0.00019265676562662932, "loss": 2.7688, "step": 6616 }, { "epoch": 0.3097665164725847, "grad_norm": 1.3515625, "learning_rate": 0.0001926545698846741, "loss": 3.1011, "step": 6617 }, { "epoch": 0.30981333021241736, "grad_norm": 1.203125, "learning_rate": 0.00019265237382700377, "loss": 3.5268, "step": 6618 }, { "epoch": 0.30986014395225, "grad_norm": 1.59375, "learning_rate": 0.00019265017745362584, "loss": 3.505, "step": 6619 }, { "epoch": 0.30990695769208265, "grad_norm": 1.5546875, "learning_rate": 0.00019264798076454778, "loss": 3.5438, "step": 6620 }, { "epoch": 0.30995377143191527, "grad_norm": 1.234375, "learning_rate": 0.00019264578375977707, "loss": 3.1707, "step": 6621 }, { "epoch": 0.3100005851717479, "grad_norm": 1.2109375, "learning_rate": 0.00019264358643932122, "loss": 3.1717, "step": 6622 }, { "epoch": 0.31004739891158056, "grad_norm": 1.2109375, "learning_rate": 0.00019264138880318768, "loss": 3.2866, "step": 6623 }, { "epoch": 0.3100942126514132, "grad_norm": 1.2421875, "learning_rate": 0.000192639190851384, "loss": 4.8798, "step": 6624 }, { "epoch": 0.31014102639124586, "grad_norm": 1.109375, "learning_rate": 0.00019263699258391764, "loss": 3.0731, "step": 6625 }, { "epoch": 0.3101878401310785, "grad_norm": 2.328125, "learning_rate": 0.00019263479400079603, "loss": 3.4633, "step": 6626 }, { "epoch": 0.3102346538709111, "grad_norm": 1.5, "learning_rate": 0.00019263259510202675, "loss": 3.5132, "step": 6627 }, { "epoch": 0.31028146761074377, "grad_norm": 1.15625, "learning_rate": 0.00019263039588761723, "loss": 3.0836, "step": 6628 }, { "epoch": 0.3103282813505764, "grad_norm": 1.203125, "learning_rate": 0.00019262819635757503, "loss": 3.4278, "step": 6629 }, { "epoch": 0.31037509509040906, "grad_norm": 2.421875, "learning_rate": 0.00019262599651190758, "loss": 3.1177, "step": 6630 }, { "epoch": 0.3104219088302417, "grad_norm": 2.015625, "learning_rate": 0.0001926237963506224, "loss": 3.3953, "step": 6631 }, { "epoch": 0.3104687225700743, "grad_norm": 1.3125, "learning_rate": 0.000192621595873727, "loss": 3.3342, "step": 6632 }, { "epoch": 0.31051553630990697, "grad_norm": 1.203125, "learning_rate": 0.00019261939508122885, "loss": 2.4701, "step": 6633 }, { "epoch": 0.3105623500497396, "grad_norm": 5.03125, "learning_rate": 0.00019261719397313548, "loss": 2.944, "step": 6634 }, { "epoch": 0.31060916378957226, "grad_norm": 1.6328125, "learning_rate": 0.00019261499254945438, "loss": 3.0576, "step": 6635 }, { "epoch": 0.3106559775294049, "grad_norm": 3.484375, "learning_rate": 0.00019261279081019304, "loss": 3.5989, "step": 6636 }, { "epoch": 0.3107027912692375, "grad_norm": 1.6171875, "learning_rate": 0.00019261058875535896, "loss": 3.267, "step": 6637 }, { "epoch": 0.3107496050090702, "grad_norm": 1.2734375, "learning_rate": 0.00019260838638495965, "loss": 3.1217, "step": 6638 }, { "epoch": 0.3107964187489028, "grad_norm": 1.703125, "learning_rate": 0.0001926061836990026, "loss": 3.6806, "step": 6639 }, { "epoch": 0.31084323248873547, "grad_norm": 1.2578125, "learning_rate": 0.0001926039806974954, "loss": 3.0023, "step": 6640 }, { "epoch": 0.3108900462285681, "grad_norm": 1.1171875, "learning_rate": 0.00019260177738044542, "loss": 3.3827, "step": 6641 }, { "epoch": 0.3109368599684007, "grad_norm": 1.5703125, "learning_rate": 0.00019259957374786026, "loss": 3.4497, "step": 6642 }, { "epoch": 0.3109836737082334, "grad_norm": 1.1796875, "learning_rate": 0.0001925973697997474, "loss": 3.6669, "step": 6643 }, { "epoch": 0.311030487448066, "grad_norm": 1.2734375, "learning_rate": 0.00019259516553611435, "loss": 3.7054, "step": 6644 }, { "epoch": 0.31107730118789867, "grad_norm": 6.65625, "learning_rate": 0.00019259296095696865, "loss": 6.7607, "step": 6645 }, { "epoch": 0.3111241149277313, "grad_norm": 1.328125, "learning_rate": 0.00019259075606231778, "loss": 3.2517, "step": 6646 }, { "epoch": 0.3111709286675639, "grad_norm": 1.34375, "learning_rate": 0.00019258855085216924, "loss": 3.4495, "step": 6647 }, { "epoch": 0.3112177424073966, "grad_norm": 1.359375, "learning_rate": 0.0001925863453265306, "loss": 3.0757, "step": 6648 }, { "epoch": 0.3112645561472292, "grad_norm": 1.34375, "learning_rate": 0.0001925841394854093, "loss": 3.2409, "step": 6649 }, { "epoch": 0.3113113698870619, "grad_norm": 1.6953125, "learning_rate": 0.00019258193332881288, "loss": 3.1878, "step": 6650 }, { "epoch": 0.3113581836268945, "grad_norm": 1.359375, "learning_rate": 0.00019257972685674893, "loss": 3.2016, "step": 6651 }, { "epoch": 0.3114049973667271, "grad_norm": 1.40625, "learning_rate": 0.0001925775200692249, "loss": 3.228, "step": 6652 }, { "epoch": 0.3114518111065598, "grad_norm": 1.375, "learning_rate": 0.0001925753129662483, "loss": 3.3026, "step": 6653 }, { "epoch": 0.3114986248463924, "grad_norm": 1.203125, "learning_rate": 0.0001925731055478267, "loss": 3.3917, "step": 6654 }, { "epoch": 0.3115454385862251, "grad_norm": 1.21875, "learning_rate": 0.00019257089781396757, "loss": 3.038, "step": 6655 }, { "epoch": 0.3115922523260577, "grad_norm": 1.25, "learning_rate": 0.00019256868976467848, "loss": 2.9271, "step": 6656 }, { "epoch": 0.3116390660658903, "grad_norm": 1.3203125, "learning_rate": 0.0001925664813999669, "loss": 3.1988, "step": 6657 }, { "epoch": 0.311685879805723, "grad_norm": 1.3984375, "learning_rate": 0.0001925642727198404, "loss": 3.2586, "step": 6658 }, { "epoch": 0.3117326935455556, "grad_norm": 1.234375, "learning_rate": 0.0001925620637243065, "loss": 3.3503, "step": 6659 }, { "epoch": 0.3117795072853883, "grad_norm": 1.3125, "learning_rate": 0.0001925598544133727, "loss": 3.2077, "step": 6660 }, { "epoch": 0.3118263210252209, "grad_norm": 1.4921875, "learning_rate": 0.00019255764478704656, "loss": 3.3345, "step": 6661 }, { "epoch": 0.3118731347650535, "grad_norm": 1.828125, "learning_rate": 0.0001925554348453356, "loss": 3.4139, "step": 6662 }, { "epoch": 0.3119199485048862, "grad_norm": 2.609375, "learning_rate": 0.00019255322458824732, "loss": 2.8876, "step": 6663 }, { "epoch": 0.3119667622447188, "grad_norm": 1.34375, "learning_rate": 0.0001925510140157893, "loss": 2.9674, "step": 6664 }, { "epoch": 0.3120135759845515, "grad_norm": 1.3046875, "learning_rate": 0.00019254880312796904, "loss": 3.2642, "step": 6665 }, { "epoch": 0.3120603897243841, "grad_norm": 1.390625, "learning_rate": 0.0001925465919247941, "loss": 2.8131, "step": 6666 }, { "epoch": 0.3121072034642167, "grad_norm": 1.2890625, "learning_rate": 0.00019254438040627197, "loss": 3.0812, "step": 6667 }, { "epoch": 0.3121540172040494, "grad_norm": 1.2578125, "learning_rate": 0.0001925421685724102, "loss": 3.4582, "step": 6668 }, { "epoch": 0.312200830943882, "grad_norm": 1.6015625, "learning_rate": 0.00019253995642321638, "loss": 3.2259, "step": 6669 }, { "epoch": 0.3122476446837147, "grad_norm": 1.1015625, "learning_rate": 0.00019253774395869798, "loss": 4.1328, "step": 6670 }, { "epoch": 0.3122944584235473, "grad_norm": 1.8828125, "learning_rate": 0.00019253553117886259, "loss": 3.617, "step": 6671 }, { "epoch": 0.31234127216338, "grad_norm": 1.546875, "learning_rate": 0.00019253331808371773, "loss": 3.4561, "step": 6672 }, { "epoch": 0.3123880859032126, "grad_norm": 1.5703125, "learning_rate": 0.0001925311046732709, "loss": 3.6814, "step": 6673 }, { "epoch": 0.3124348996430452, "grad_norm": 1.3125, "learning_rate": 0.00019252889094752973, "loss": 3.4113, "step": 6674 }, { "epoch": 0.3124817133828779, "grad_norm": 0.97265625, "learning_rate": 0.00019252667690650168, "loss": 3.9863, "step": 6675 }, { "epoch": 0.3125285271227105, "grad_norm": 1.2578125, "learning_rate": 0.00019252446255019434, "loss": 3.0601, "step": 6676 }, { "epoch": 0.3125753408625432, "grad_norm": 1.1484375, "learning_rate": 0.00019252224787861524, "loss": 3.1075, "step": 6677 }, { "epoch": 0.3126221546023758, "grad_norm": 1.2421875, "learning_rate": 0.00019252003289177191, "loss": 3.1746, "step": 6678 }, { "epoch": 0.3126689683422084, "grad_norm": 1.4609375, "learning_rate": 0.00019251781758967192, "loss": 3.7162, "step": 6679 }, { "epoch": 0.3127157820820411, "grad_norm": 1.484375, "learning_rate": 0.00019251560197232283, "loss": 3.449, "step": 6680 }, { "epoch": 0.3127625958218737, "grad_norm": 1.2734375, "learning_rate": 0.00019251338603973217, "loss": 3.314, "step": 6681 }, { "epoch": 0.3128094095617064, "grad_norm": 1.3359375, "learning_rate": 0.00019251116979190748, "loss": 3.3571, "step": 6682 }, { "epoch": 0.312856223301539, "grad_norm": 1.4921875, "learning_rate": 0.00019250895322885634, "loss": 3.3244, "step": 6683 }, { "epoch": 0.3129030370413716, "grad_norm": 1.3515625, "learning_rate": 0.0001925067363505863, "loss": 3.3074, "step": 6684 }, { "epoch": 0.3129498507812043, "grad_norm": 1.5703125, "learning_rate": 0.0001925045191571049, "loss": 2.9835, "step": 6685 }, { "epoch": 0.3129966645210369, "grad_norm": 1.2734375, "learning_rate": 0.00019250230164841967, "loss": 2.7295, "step": 6686 }, { "epoch": 0.3130434782608696, "grad_norm": 1.3203125, "learning_rate": 0.0001925000838245382, "loss": 3.3999, "step": 6687 }, { "epoch": 0.3130902920007022, "grad_norm": 1.4609375, "learning_rate": 0.00019249786568546808, "loss": 3.272, "step": 6688 }, { "epoch": 0.3131371057405348, "grad_norm": 1.4765625, "learning_rate": 0.0001924956472312168, "loss": 3.5828, "step": 6689 }, { "epoch": 0.3131839194803675, "grad_norm": 1.28125, "learning_rate": 0.00019249342846179194, "loss": 3.5787, "step": 6690 }, { "epoch": 0.3132307332202001, "grad_norm": 1.3359375, "learning_rate": 0.00019249120937720108, "loss": 3.5286, "step": 6691 }, { "epoch": 0.3132775469600328, "grad_norm": 1.5625, "learning_rate": 0.00019248898997745179, "loss": 3.3923, "step": 6692 }, { "epoch": 0.3133243606998654, "grad_norm": 1.5625, "learning_rate": 0.00019248677026255156, "loss": 3.6316, "step": 6693 }, { "epoch": 0.31337117443969803, "grad_norm": 1.390625, "learning_rate": 0.00019248455023250806, "loss": 3.5452, "step": 6694 }, { "epoch": 0.3134179881795307, "grad_norm": 1.1015625, "learning_rate": 0.00019248232988732878, "loss": 3.2087, "step": 6695 }, { "epoch": 0.3134648019193633, "grad_norm": 1.5703125, "learning_rate": 0.00019248010922702128, "loss": 2.9112, "step": 6696 }, { "epoch": 0.313511615659196, "grad_norm": 1.828125, "learning_rate": 0.0001924778882515932, "loss": 3.3316, "step": 6697 }, { "epoch": 0.3135584293990286, "grad_norm": 1.453125, "learning_rate": 0.00019247566696105204, "loss": 3.0802, "step": 6698 }, { "epoch": 0.31360524313886123, "grad_norm": 1.6640625, "learning_rate": 0.00019247344535540538, "loss": 3.5836, "step": 6699 }, { "epoch": 0.3136520568786939, "grad_norm": 1.09375, "learning_rate": 0.00019247122343466085, "loss": 2.8805, "step": 6700 }, { "epoch": 0.3136988706185265, "grad_norm": 2.03125, "learning_rate": 0.00019246900119882589, "loss": 3.2612, "step": 6701 }, { "epoch": 0.3137456843583592, "grad_norm": 1.515625, "learning_rate": 0.00019246677864790822, "loss": 3.7831, "step": 6702 }, { "epoch": 0.3137924980981918, "grad_norm": 1.3203125, "learning_rate": 0.0001924645557819153, "loss": 3.4509, "step": 6703 }, { "epoch": 0.31383931183802444, "grad_norm": 1.3671875, "learning_rate": 0.0001924623326008548, "loss": 3.299, "step": 6704 }, { "epoch": 0.3138861255778571, "grad_norm": 1.0625, "learning_rate": 0.0001924601091047342, "loss": 2.4314, "step": 6705 }, { "epoch": 0.31393293931768973, "grad_norm": 1.1015625, "learning_rate": 0.00019245788529356113, "loss": 2.7568, "step": 6706 }, { "epoch": 0.3139797530575224, "grad_norm": 1.2734375, "learning_rate": 0.0001924556611673432, "loss": 3.8485, "step": 6707 }, { "epoch": 0.314026566797355, "grad_norm": 1.640625, "learning_rate": 0.00019245343672608792, "loss": 2.3823, "step": 6708 }, { "epoch": 0.31407338053718764, "grad_norm": 1.34375, "learning_rate": 0.00019245121196980288, "loss": 3.0181, "step": 6709 }, { "epoch": 0.3141201942770203, "grad_norm": 1.203125, "learning_rate": 0.00019244898689849574, "loss": 2.9116, "step": 6710 }, { "epoch": 0.31416700801685293, "grad_norm": 1.28125, "learning_rate": 0.000192446761512174, "loss": 3.1613, "step": 6711 }, { "epoch": 0.3142138217566856, "grad_norm": 1.6015625, "learning_rate": 0.0001924445358108452, "loss": 5.3429, "step": 6712 }, { "epoch": 0.3142606354965182, "grad_norm": 1.6875, "learning_rate": 0.00019244230979451702, "loss": 2.9969, "step": 6713 }, { "epoch": 0.31430744923635084, "grad_norm": 1.2890625, "learning_rate": 0.00019244008346319704, "loss": 3.5874, "step": 6714 }, { "epoch": 0.3143542629761835, "grad_norm": 1.3046875, "learning_rate": 0.0001924378568168928, "loss": 3.2091, "step": 6715 }, { "epoch": 0.31440107671601614, "grad_norm": 1.796875, "learning_rate": 0.00019243562985561192, "loss": 3.3645, "step": 6716 }, { "epoch": 0.3144478904558488, "grad_norm": 1.65625, "learning_rate": 0.00019243340257936198, "loss": 3.134, "step": 6717 }, { "epoch": 0.31449470419568143, "grad_norm": 1.4765625, "learning_rate": 0.00019243117498815053, "loss": 3.5233, "step": 6718 }, { "epoch": 0.31454151793551405, "grad_norm": 1.171875, "learning_rate": 0.0001924289470819852, "loss": 3.2332, "step": 6719 }, { "epoch": 0.3145883316753467, "grad_norm": 1.6640625, "learning_rate": 0.0001924267188608736, "loss": 3.0977, "step": 6720 }, { "epoch": 0.31463514541517934, "grad_norm": 1.2421875, "learning_rate": 0.0001924244903248233, "loss": 3.3795, "step": 6721 }, { "epoch": 0.314681959155012, "grad_norm": 1.125, "learning_rate": 0.00019242226147384185, "loss": 3.1257, "step": 6722 }, { "epoch": 0.31472877289484463, "grad_norm": 1.2734375, "learning_rate": 0.00019242003230793693, "loss": 2.8989, "step": 6723 }, { "epoch": 0.31477558663467725, "grad_norm": 1.5625, "learning_rate": 0.00019241780282711605, "loss": 3.2018, "step": 6724 }, { "epoch": 0.3148224003745099, "grad_norm": 1.171875, "learning_rate": 0.0001924155730313869, "loss": 3.1992, "step": 6725 }, { "epoch": 0.31486921411434254, "grad_norm": 1.2265625, "learning_rate": 0.00019241334292075698, "loss": 3.3025, "step": 6726 }, { "epoch": 0.3149160278541752, "grad_norm": 2.046875, "learning_rate": 0.00019241111249523396, "loss": 3.0201, "step": 6727 }, { "epoch": 0.31496284159400784, "grad_norm": 1.484375, "learning_rate": 0.00019240888175482544, "loss": 3.7326, "step": 6728 }, { "epoch": 0.31500965533384045, "grad_norm": 1.5703125, "learning_rate": 0.00019240665069953896, "loss": 3.2958, "step": 6729 }, { "epoch": 0.31505646907367313, "grad_norm": 1.1796875, "learning_rate": 0.00019240441932938217, "loss": 3.3736, "step": 6730 }, { "epoch": 0.31510328281350575, "grad_norm": 1.3828125, "learning_rate": 0.00019240218764436263, "loss": 3.337, "step": 6731 }, { "epoch": 0.3151500965533384, "grad_norm": 1.4140625, "learning_rate": 0.00019239995564448802, "loss": 3.4553, "step": 6732 }, { "epoch": 0.31519691029317104, "grad_norm": 1.171875, "learning_rate": 0.0001923977233297659, "loss": 3.2467, "step": 6733 }, { "epoch": 0.3152437240330037, "grad_norm": 1.390625, "learning_rate": 0.00019239549070020385, "loss": 2.7994, "step": 6734 }, { "epoch": 0.31529053777283633, "grad_norm": 1.171875, "learning_rate": 0.00019239325775580955, "loss": 3.4497, "step": 6735 }, { "epoch": 0.31533735151266895, "grad_norm": 1.40625, "learning_rate": 0.00019239102449659053, "loss": 3.8283, "step": 6736 }, { "epoch": 0.3153841652525016, "grad_norm": 1.2890625, "learning_rate": 0.00019238879092255443, "loss": 3.3932, "step": 6737 }, { "epoch": 0.31543097899233424, "grad_norm": 1.5625, "learning_rate": 0.00019238655703370888, "loss": 3.1903, "step": 6738 }, { "epoch": 0.3154777927321669, "grad_norm": 1.3046875, "learning_rate": 0.00019238432283006146, "loss": 3.2385, "step": 6739 }, { "epoch": 0.31552460647199954, "grad_norm": 1.4453125, "learning_rate": 0.00019238208831161985, "loss": 3.6945, "step": 6740 }, { "epoch": 0.31557142021183215, "grad_norm": 1.2421875, "learning_rate": 0.00019237985347839157, "loss": 3.0705, "step": 6741 }, { "epoch": 0.31561823395166483, "grad_norm": 1.203125, "learning_rate": 0.0001923776183303843, "loss": 3.1623, "step": 6742 }, { "epoch": 0.31566504769149745, "grad_norm": 1.46875, "learning_rate": 0.00019237538286760562, "loss": 3.4176, "step": 6743 }, { "epoch": 0.3157118614313301, "grad_norm": 1.2734375, "learning_rate": 0.00019237314709006316, "loss": 3.4388, "step": 6744 }, { "epoch": 0.31575867517116274, "grad_norm": 1.1953125, "learning_rate": 0.00019237091099776453, "loss": 3.5256, "step": 6745 }, { "epoch": 0.31580548891099536, "grad_norm": 1.375, "learning_rate": 0.00019236867459071738, "loss": 3.105, "step": 6746 }, { "epoch": 0.31585230265082803, "grad_norm": 1.4453125, "learning_rate": 0.00019236643786892934, "loss": 3.2737, "step": 6747 }, { "epoch": 0.31589911639066065, "grad_norm": 1.6171875, "learning_rate": 0.00019236420083240796, "loss": 3.23, "step": 6748 }, { "epoch": 0.3159459301304933, "grad_norm": 1.2578125, "learning_rate": 0.0001923619634811609, "loss": 3.6173, "step": 6749 }, { "epoch": 0.31599274387032594, "grad_norm": 1.703125, "learning_rate": 0.0001923597258151958, "loss": 3.6031, "step": 6750 }, { "epoch": 0.31603955761015856, "grad_norm": 1.171875, "learning_rate": 0.00019235748783452027, "loss": 2.9797, "step": 6751 }, { "epoch": 0.31608637134999124, "grad_norm": 1.46875, "learning_rate": 0.00019235524953914196, "loss": 3.1926, "step": 6752 }, { "epoch": 0.31613318508982385, "grad_norm": 1.125, "learning_rate": 0.00019235301092906845, "loss": 3.313, "step": 6753 }, { "epoch": 0.31617999882965653, "grad_norm": 1.6796875, "learning_rate": 0.0001923507720043074, "loss": 2.9336, "step": 6754 }, { "epoch": 0.31622681256948915, "grad_norm": 1.390625, "learning_rate": 0.00019234853276486647, "loss": 2.9021, "step": 6755 }, { "epoch": 0.31627362630932176, "grad_norm": 1.375, "learning_rate": 0.00019234629321075318, "loss": 3.2426, "step": 6756 }, { "epoch": 0.31632044004915444, "grad_norm": 1.5703125, "learning_rate": 0.0001923440533419753, "loss": 3.4482, "step": 6757 }, { "epoch": 0.31636725378898706, "grad_norm": 1.375, "learning_rate": 0.00019234181315854037, "loss": 3.256, "step": 6758 }, { "epoch": 0.31641406752881973, "grad_norm": 1.4765625, "learning_rate": 0.00019233957266045603, "loss": 2.8296, "step": 6759 }, { "epoch": 0.31646088126865235, "grad_norm": 1.203125, "learning_rate": 0.00019233733184772996, "loss": 3.0022, "step": 6760 }, { "epoch": 0.31650769500848497, "grad_norm": 1.3984375, "learning_rate": 0.00019233509072036977, "loss": 3.4074, "step": 6761 }, { "epoch": 0.31655450874831764, "grad_norm": 1.453125, "learning_rate": 0.00019233284927838308, "loss": 3.0224, "step": 6762 }, { "epoch": 0.31660132248815026, "grad_norm": 0.9765625, "learning_rate": 0.00019233060752177754, "loss": 2.3118, "step": 6763 }, { "epoch": 0.31664813622798293, "grad_norm": 1.203125, "learning_rate": 0.0001923283654505608, "loss": 3.2822, "step": 6764 }, { "epoch": 0.31669494996781555, "grad_norm": 1.53125, "learning_rate": 0.0001923261230647405, "loss": 6.2496, "step": 6765 }, { "epoch": 0.31674176370764817, "grad_norm": 2.34375, "learning_rate": 0.00019232388036432427, "loss": 3.8574, "step": 6766 }, { "epoch": 0.31678857744748085, "grad_norm": 1.1328125, "learning_rate": 0.00019232163734931974, "loss": 2.7646, "step": 6767 }, { "epoch": 0.31683539118731346, "grad_norm": 1.2421875, "learning_rate": 0.00019231939401973458, "loss": 3.2865, "step": 6768 }, { "epoch": 0.31688220492714614, "grad_norm": 1.1328125, "learning_rate": 0.00019231715037557642, "loss": 3.1463, "step": 6769 }, { "epoch": 0.31692901866697876, "grad_norm": 1.5390625, "learning_rate": 0.0001923149064168529, "loss": 3.3443, "step": 6770 }, { "epoch": 0.3169758324068114, "grad_norm": 1.203125, "learning_rate": 0.00019231266214357167, "loss": 3.2909, "step": 6771 }, { "epoch": 0.31702264614664405, "grad_norm": 1.7734375, "learning_rate": 0.0001923104175557404, "loss": 3.0472, "step": 6772 }, { "epoch": 0.31706945988647667, "grad_norm": 1.2421875, "learning_rate": 0.0001923081726533667, "loss": 3.1613, "step": 6773 }, { "epoch": 0.31711627362630934, "grad_norm": 1.1953125, "learning_rate": 0.00019230592743645823, "loss": 2.994, "step": 6774 }, { "epoch": 0.31716308736614196, "grad_norm": 1.515625, "learning_rate": 0.00019230368190502266, "loss": 3.234, "step": 6775 }, { "epoch": 0.3172099011059746, "grad_norm": 1.3359375, "learning_rate": 0.00019230143605906763, "loss": 3.1451, "step": 6776 }, { "epoch": 0.31725671484580725, "grad_norm": 1.328125, "learning_rate": 0.0001922991898986008, "loss": 2.9386, "step": 6777 }, { "epoch": 0.31730352858563987, "grad_norm": 1.2578125, "learning_rate": 0.0001922969434236298, "loss": 3.366, "step": 6778 }, { "epoch": 0.31735034232547255, "grad_norm": 1.28125, "learning_rate": 0.0001922946966341623, "loss": 3.0958, "step": 6779 }, { "epoch": 0.31739715606530516, "grad_norm": 1.1640625, "learning_rate": 0.00019229244953020594, "loss": 3.3606, "step": 6780 }, { "epoch": 0.3174439698051378, "grad_norm": 1.71875, "learning_rate": 0.00019229020211176845, "loss": 3.0926, "step": 6781 }, { "epoch": 0.31749078354497046, "grad_norm": 1.984375, "learning_rate": 0.00019228795437885738, "loss": 3.4171, "step": 6782 }, { "epoch": 0.3175375972848031, "grad_norm": 1.515625, "learning_rate": 0.00019228570633148047, "loss": 3.215, "step": 6783 }, { "epoch": 0.31758441102463575, "grad_norm": 1.4921875, "learning_rate": 0.00019228345796964532, "loss": 3.3325, "step": 6784 }, { "epoch": 0.31763122476446837, "grad_norm": 1.7109375, "learning_rate": 0.00019228120929335965, "loss": 3.5577, "step": 6785 }, { "epoch": 0.317678038504301, "grad_norm": 1.1796875, "learning_rate": 0.00019227896030263108, "loss": 3.0694, "step": 6786 }, { "epoch": 0.31772485224413366, "grad_norm": 1.234375, "learning_rate": 0.0001922767109974673, "loss": 2.9798, "step": 6787 }, { "epoch": 0.3177716659839663, "grad_norm": 1.3359375, "learning_rate": 0.00019227446137787594, "loss": 3.0266, "step": 6788 }, { "epoch": 0.31781847972379895, "grad_norm": 1.3828125, "learning_rate": 0.0001922722114438647, "loss": 3.1215, "step": 6789 }, { "epoch": 0.31786529346363157, "grad_norm": 1.3828125, "learning_rate": 0.00019226996119544123, "loss": 3.2013, "step": 6790 }, { "epoch": 0.3179121072034642, "grad_norm": 1.2578125, "learning_rate": 0.00019226771063261317, "loss": 3.4148, "step": 6791 }, { "epoch": 0.31795892094329686, "grad_norm": 1.3671875, "learning_rate": 0.00019226545975538825, "loss": 3.1402, "step": 6792 }, { "epoch": 0.3180057346831295, "grad_norm": 1.578125, "learning_rate": 0.0001922632085637741, "loss": 3.6696, "step": 6793 }, { "epoch": 0.31805254842296216, "grad_norm": 1.203125, "learning_rate": 0.0001922609570577784, "loss": 2.7274, "step": 6794 }, { "epoch": 0.3180993621627948, "grad_norm": 2.078125, "learning_rate": 0.00019225870523740886, "loss": 3.2528, "step": 6795 }, { "epoch": 0.31814617590262745, "grad_norm": 1.1484375, "learning_rate": 0.00019225645310267308, "loss": 3.4017, "step": 6796 }, { "epoch": 0.31819298964246007, "grad_norm": 1.40625, "learning_rate": 0.0001922542006535788, "loss": 3.3698, "step": 6797 }, { "epoch": 0.3182398033822927, "grad_norm": 1.359375, "learning_rate": 0.0001922519478901336, "loss": 3.1061, "step": 6798 }, { "epoch": 0.31828661712212536, "grad_norm": 1.8984375, "learning_rate": 0.00019224969481234527, "loss": 2.4515, "step": 6799 }, { "epoch": 0.318333430861958, "grad_norm": 1.453125, "learning_rate": 0.0001922474414202214, "loss": 3.2867, "step": 6800 }, { "epoch": 0.31838024460179065, "grad_norm": 1.25, "learning_rate": 0.00019224518771376975, "loss": 3.0072, "step": 6801 }, { "epoch": 0.31842705834162327, "grad_norm": 1.0078125, "learning_rate": 0.00019224293369299794, "loss": 3.0166, "step": 6802 }, { "epoch": 0.3184738720814559, "grad_norm": 1.3359375, "learning_rate": 0.00019224067935791366, "loss": 3.2514, "step": 6803 }, { "epoch": 0.31852068582128856, "grad_norm": 1.1953125, "learning_rate": 0.00019223842470852457, "loss": 3.1904, "step": 6804 }, { "epoch": 0.3185674995611212, "grad_norm": 1.0703125, "learning_rate": 0.00019223616974483845, "loss": 5.0179, "step": 6805 }, { "epoch": 0.31861431330095386, "grad_norm": 1.7109375, "learning_rate": 0.00019223391446686284, "loss": 3.7597, "step": 6806 }, { "epoch": 0.3186611270407865, "grad_norm": 1.359375, "learning_rate": 0.00019223165887460555, "loss": 3.4603, "step": 6807 }, { "epoch": 0.3187079407806191, "grad_norm": 1.5234375, "learning_rate": 0.00019222940296807417, "loss": 3.3203, "step": 6808 }, { "epoch": 0.31875475452045177, "grad_norm": 1.3984375, "learning_rate": 0.00019222714674727644, "loss": 3.2645, "step": 6809 }, { "epoch": 0.3188015682602844, "grad_norm": 1.1171875, "learning_rate": 0.00019222489021222008, "loss": 4.4005, "step": 6810 }, { "epoch": 0.31884838200011706, "grad_norm": 1.734375, "learning_rate": 0.00019222263336291268, "loss": 2.8036, "step": 6811 }, { "epoch": 0.3188951957399497, "grad_norm": 1.421875, "learning_rate": 0.00019222037619936203, "loss": 3.0154, "step": 6812 }, { "epoch": 0.3189420094797823, "grad_norm": 1.171875, "learning_rate": 0.00019221811872157575, "loss": 3.2165, "step": 6813 }, { "epoch": 0.31898882321961497, "grad_norm": 1.84375, "learning_rate": 0.00019221586092956158, "loss": 3.5063, "step": 6814 }, { "epoch": 0.3190356369594476, "grad_norm": 1.3671875, "learning_rate": 0.0001922136028233272, "loss": 3.2955, "step": 6815 }, { "epoch": 0.31908245069928026, "grad_norm": 1.390625, "learning_rate": 0.00019221134440288026, "loss": 3.31, "step": 6816 }, { "epoch": 0.3191292644391129, "grad_norm": 1.1328125, "learning_rate": 0.00019220908566822855, "loss": 3.0325, "step": 6817 }, { "epoch": 0.3191760781789455, "grad_norm": 1.3671875, "learning_rate": 0.00019220682661937969, "loss": 2.963, "step": 6818 }, { "epoch": 0.3192228919187782, "grad_norm": 1.484375, "learning_rate": 0.00019220456725634136, "loss": 3.0962, "step": 6819 }, { "epoch": 0.3192697056586108, "grad_norm": 1.40625, "learning_rate": 0.0001922023075791213, "loss": 2.6475, "step": 6820 }, { "epoch": 0.31931651939844347, "grad_norm": 1.453125, "learning_rate": 0.00019220004758772725, "loss": 3.0992, "step": 6821 }, { "epoch": 0.3193633331382761, "grad_norm": 1.1015625, "learning_rate": 0.00019219778728216686, "loss": 3.2826, "step": 6822 }, { "epoch": 0.3194101468781087, "grad_norm": 1.7265625, "learning_rate": 0.00019219552666244782, "loss": 3.2578, "step": 6823 }, { "epoch": 0.3194569606179414, "grad_norm": 1.4453125, "learning_rate": 0.00019219326572857788, "loss": 3.3166, "step": 6824 }, { "epoch": 0.319503774357774, "grad_norm": 1.734375, "learning_rate": 0.00019219100448056469, "loss": 3.6804, "step": 6825 }, { "epoch": 0.31955058809760667, "grad_norm": 1.34375, "learning_rate": 0.00019218874291841598, "loss": 3.2617, "step": 6826 }, { "epoch": 0.3195974018374393, "grad_norm": 1.5625, "learning_rate": 0.00019218648104213948, "loss": 2.7983, "step": 6827 }, { "epoch": 0.3196442155772719, "grad_norm": 1.6640625, "learning_rate": 0.00019218421885174286, "loss": 3.5591, "step": 6828 }, { "epoch": 0.3196910293171046, "grad_norm": 1.5234375, "learning_rate": 0.00019218195634723384, "loss": 3.38, "step": 6829 }, { "epoch": 0.3197378430569372, "grad_norm": 1.40625, "learning_rate": 0.00019217969352862014, "loss": 3.3845, "step": 6830 }, { "epoch": 0.3197846567967699, "grad_norm": 1.4375, "learning_rate": 0.00019217743039590945, "loss": 3.1571, "step": 6831 }, { "epoch": 0.3198314705366025, "grad_norm": 1.390625, "learning_rate": 0.0001921751669491095, "loss": 3.4801, "step": 6832 }, { "epoch": 0.3198782842764351, "grad_norm": 1.3125, "learning_rate": 0.000192172903188228, "loss": 3.5198, "step": 6833 }, { "epoch": 0.3199250980162678, "grad_norm": 1.1953125, "learning_rate": 0.00019217063911327267, "loss": 3.3304, "step": 6834 }, { "epoch": 0.3199719117561004, "grad_norm": 1.6953125, "learning_rate": 0.0001921683747242512, "loss": 3.3042, "step": 6835 }, { "epoch": 0.3200187254959331, "grad_norm": 1.28125, "learning_rate": 0.0001921661100211713, "loss": 3.2238, "step": 6836 }, { "epoch": 0.3200655392357657, "grad_norm": 1.40625, "learning_rate": 0.00019216384500404075, "loss": 4.2931, "step": 6837 }, { "epoch": 0.3201123529755983, "grad_norm": 1.0234375, "learning_rate": 0.0001921615796728672, "loss": 2.8194, "step": 6838 }, { "epoch": 0.320159166715431, "grad_norm": 2.171875, "learning_rate": 0.00019215931402765838, "loss": 3.1977, "step": 6839 }, { "epoch": 0.3202059804552636, "grad_norm": 1.2421875, "learning_rate": 0.00019215704806842206, "loss": 3.4247, "step": 6840 }, { "epoch": 0.3202527941950963, "grad_norm": 1.46875, "learning_rate": 0.00019215478179516592, "loss": 2.64, "step": 6841 }, { "epoch": 0.3202996079349289, "grad_norm": 1.2109375, "learning_rate": 0.00019215251520789768, "loss": 2.977, "step": 6842 }, { "epoch": 0.3203464216747615, "grad_norm": 1.2734375, "learning_rate": 0.00019215024830662508, "loss": 3.0029, "step": 6843 }, { "epoch": 0.3203932354145942, "grad_norm": 1.171875, "learning_rate": 0.00019214798109135582, "loss": 3.25, "step": 6844 }, { "epoch": 0.3204400491544268, "grad_norm": 1.75, "learning_rate": 0.00019214571356209766, "loss": 3.1727, "step": 6845 }, { "epoch": 0.3204868628942595, "grad_norm": 1.125, "learning_rate": 0.00019214344571885829, "loss": 3.2371, "step": 6846 }, { "epoch": 0.3205336766340921, "grad_norm": 1.9921875, "learning_rate": 0.00019214117756164543, "loss": 3.5451, "step": 6847 }, { "epoch": 0.3205804903739247, "grad_norm": 1.7421875, "learning_rate": 0.00019213890909046687, "loss": 3.4009, "step": 6848 }, { "epoch": 0.3206273041137574, "grad_norm": 1.3828125, "learning_rate": 0.0001921366403053303, "loss": 3.4333, "step": 6849 }, { "epoch": 0.32067411785359, "grad_norm": 1.5234375, "learning_rate": 0.00019213437120624345, "loss": 3.3425, "step": 6850 }, { "epoch": 0.3207209315934227, "grad_norm": 1.1328125, "learning_rate": 0.00019213210179321407, "loss": 3.0532, "step": 6851 }, { "epoch": 0.3207677453332553, "grad_norm": 1.296875, "learning_rate": 0.00019212983206624986, "loss": 3.2728, "step": 6852 }, { "epoch": 0.3208145590730879, "grad_norm": 1.3203125, "learning_rate": 0.00019212756202535854, "loss": 2.961, "step": 6853 }, { "epoch": 0.3208613728129206, "grad_norm": 1.3125, "learning_rate": 0.00019212529167054792, "loss": 2.907, "step": 6854 }, { "epoch": 0.3209081865527532, "grad_norm": 1.1171875, "learning_rate": 0.0001921230210018257, "loss": 2.6485, "step": 6855 }, { "epoch": 0.3209550002925859, "grad_norm": 1.1953125, "learning_rate": 0.0001921207500191996, "loss": 3.5211, "step": 6856 }, { "epoch": 0.3210018140324185, "grad_norm": 1.34375, "learning_rate": 0.00019211847872267738, "loss": 3.0574, "step": 6857 }, { "epoch": 0.3210486277722512, "grad_norm": 1.6171875, "learning_rate": 0.00019211620711226675, "loss": 3.4126, "step": 6858 }, { "epoch": 0.3210954415120838, "grad_norm": 1.5, "learning_rate": 0.00019211393518797548, "loss": 3.1012, "step": 6859 }, { "epoch": 0.3211422552519164, "grad_norm": 1.3359375, "learning_rate": 0.00019211166294981128, "loss": 3.3329, "step": 6860 }, { "epoch": 0.3211890689917491, "grad_norm": 1.140625, "learning_rate": 0.00019210939039778193, "loss": 3.3959, "step": 6861 }, { "epoch": 0.3212358827315817, "grad_norm": 1.421875, "learning_rate": 0.00019210711753189514, "loss": 3.496, "step": 6862 }, { "epoch": 0.3212826964714144, "grad_norm": 1.40625, "learning_rate": 0.0001921048443521587, "loss": 3.2935, "step": 6863 }, { "epoch": 0.321329510211247, "grad_norm": 1.21875, "learning_rate": 0.00019210257085858033, "loss": 3.0516, "step": 6864 }, { "epoch": 0.3213763239510796, "grad_norm": 1.46875, "learning_rate": 0.00019210029705116774, "loss": 3.2005, "step": 6865 }, { "epoch": 0.3214231376909123, "grad_norm": 1.8828125, "learning_rate": 0.00019209802292992874, "loss": 3.4689, "step": 6866 }, { "epoch": 0.3214699514307449, "grad_norm": 1.3046875, "learning_rate": 0.00019209574849487102, "loss": 3.2574, "step": 6867 }, { "epoch": 0.3215167651705776, "grad_norm": 1.7421875, "learning_rate": 0.00019209347374600237, "loss": 3.3147, "step": 6868 }, { "epoch": 0.3215635789104102, "grad_norm": 1.5390625, "learning_rate": 0.00019209119868333056, "loss": 3.3325, "step": 6869 }, { "epoch": 0.3216103926502428, "grad_norm": 1.53125, "learning_rate": 0.00019208892330686327, "loss": 3.3023, "step": 6870 }, { "epoch": 0.3216572063900755, "grad_norm": 1.34375, "learning_rate": 0.00019208664761660832, "loss": 3.3312, "step": 6871 }, { "epoch": 0.3217040201299081, "grad_norm": 1.9375, "learning_rate": 0.00019208437161257345, "loss": 3.1916, "step": 6872 }, { "epoch": 0.3217508338697408, "grad_norm": 1.1875, "learning_rate": 0.0001920820952947664, "loss": 2.8357, "step": 6873 }, { "epoch": 0.3217976476095734, "grad_norm": 1.359375, "learning_rate": 0.00019207981866319494, "loss": 3.2303, "step": 6874 }, { "epoch": 0.32184446134940603, "grad_norm": 1.484375, "learning_rate": 0.0001920775417178668, "loss": 3.137, "step": 6875 }, { "epoch": 0.3218912750892387, "grad_norm": 1.1953125, "learning_rate": 0.00019207526445878977, "loss": 3.0056, "step": 6876 }, { "epoch": 0.3219380888290713, "grad_norm": 3.53125, "learning_rate": 0.0001920729868859716, "loss": 3.6636, "step": 6877 }, { "epoch": 0.321984902568904, "grad_norm": 1.3359375, "learning_rate": 0.00019207070899942007, "loss": 3.1004, "step": 6878 }, { "epoch": 0.3220317163087366, "grad_norm": 1.2890625, "learning_rate": 0.00019206843079914288, "loss": 3.226, "step": 6879 }, { "epoch": 0.32207853004856923, "grad_norm": 1.28125, "learning_rate": 0.00019206615228514786, "loss": 3.4237, "step": 6880 }, { "epoch": 0.3221253437884019, "grad_norm": 2.59375, "learning_rate": 0.00019206387345744273, "loss": 3.3809, "step": 6881 }, { "epoch": 0.3221721575282345, "grad_norm": 1.3359375, "learning_rate": 0.00019206159431603529, "loss": 3.0413, "step": 6882 }, { "epoch": 0.3222189712680672, "grad_norm": 1.609375, "learning_rate": 0.00019205931486093326, "loss": 3.371, "step": 6883 }, { "epoch": 0.3222657850078998, "grad_norm": 1.4375, "learning_rate": 0.00019205703509214448, "loss": 3.0723, "step": 6884 }, { "epoch": 0.32231259874773244, "grad_norm": 1.3203125, "learning_rate": 0.00019205475500967665, "loss": 3.0853, "step": 6885 }, { "epoch": 0.3223594124875651, "grad_norm": 1.3828125, "learning_rate": 0.00019205247461353758, "loss": 3.4691, "step": 6886 }, { "epoch": 0.32240622622739773, "grad_norm": 1.2265625, "learning_rate": 0.000192050193903735, "loss": 3.3142, "step": 6887 }, { "epoch": 0.3224530399672304, "grad_norm": 1.515625, "learning_rate": 0.0001920479128802767, "loss": 3.1283, "step": 6888 }, { "epoch": 0.322499853707063, "grad_norm": 1.34375, "learning_rate": 0.00019204563154317047, "loss": 3.5947, "step": 6889 }, { "epoch": 0.32254666744689564, "grad_norm": 1.140625, "learning_rate": 0.00019204334989242407, "loss": 3.5684, "step": 6890 }, { "epoch": 0.3225934811867283, "grad_norm": 1.8515625, "learning_rate": 0.00019204106792804527, "loss": 3.39, "step": 6891 }, { "epoch": 0.32264029492656093, "grad_norm": 1.5, "learning_rate": 0.00019203878565004188, "loss": 3.0199, "step": 6892 }, { "epoch": 0.3226871086663936, "grad_norm": 2.5625, "learning_rate": 0.00019203650305842164, "loss": 3.4676, "step": 6893 }, { "epoch": 0.3227339224062262, "grad_norm": 1.953125, "learning_rate": 0.0001920342201531923, "loss": 3.2375, "step": 6894 }, { "epoch": 0.32278073614605884, "grad_norm": 1.1953125, "learning_rate": 0.0001920319369343617, "loss": 2.9642, "step": 6895 }, { "epoch": 0.3228275498858915, "grad_norm": 1.203125, "learning_rate": 0.0001920296534019376, "loss": 3.0184, "step": 6896 }, { "epoch": 0.32287436362572414, "grad_norm": 1.484375, "learning_rate": 0.00019202736955592777, "loss": 3.3291, "step": 6897 }, { "epoch": 0.3229211773655568, "grad_norm": 1.25, "learning_rate": 0.00019202508539633998, "loss": 2.796, "step": 6898 }, { "epoch": 0.32296799110538943, "grad_norm": 1.578125, "learning_rate": 0.00019202280092318205, "loss": 3.241, "step": 6899 }, { "epoch": 0.32301480484522205, "grad_norm": 1.375, "learning_rate": 0.00019202051613646174, "loss": 2.8954, "step": 6900 }, { "epoch": 0.3230616185850547, "grad_norm": 1.1171875, "learning_rate": 0.00019201823103618684, "loss": 3.6347, "step": 6901 }, { "epoch": 0.32310843232488734, "grad_norm": 1.28125, "learning_rate": 0.00019201594562236513, "loss": 3.0498, "step": 6902 }, { "epoch": 0.32315524606472, "grad_norm": 1.40625, "learning_rate": 0.0001920136598950044, "loss": 2.9149, "step": 6903 }, { "epoch": 0.32320205980455263, "grad_norm": 1.4296875, "learning_rate": 0.00019201137385411248, "loss": 3.2223, "step": 6904 }, { "epoch": 0.32324887354438525, "grad_norm": 1.453125, "learning_rate": 0.00019200908749969711, "loss": 3.2445, "step": 6905 }, { "epoch": 0.3232956872842179, "grad_norm": 1.1015625, "learning_rate": 0.00019200680083176605, "loss": 3.5374, "step": 6906 }, { "epoch": 0.32334250102405054, "grad_norm": 1.3046875, "learning_rate": 0.00019200451385032718, "loss": 3.2384, "step": 6907 }, { "epoch": 0.3233893147638832, "grad_norm": 1.5546875, "learning_rate": 0.00019200222655538826, "loss": 3.6026, "step": 6908 }, { "epoch": 0.32343612850371584, "grad_norm": 1.3046875, "learning_rate": 0.000191999938946957, "loss": 3.168, "step": 6909 }, { "epoch": 0.32348294224354845, "grad_norm": 1.59375, "learning_rate": 0.00019199765102504132, "loss": 3.18, "step": 6910 }, { "epoch": 0.32352975598338113, "grad_norm": 1.3359375, "learning_rate": 0.00019199536278964897, "loss": 3.5401, "step": 6911 }, { "epoch": 0.32357656972321375, "grad_norm": 1.3125, "learning_rate": 0.0001919930742407877, "loss": 3.217, "step": 6912 }, { "epoch": 0.3236233834630464, "grad_norm": 1.25, "learning_rate": 0.00019199078537846534, "loss": 3.1451, "step": 6913 }, { "epoch": 0.32367019720287904, "grad_norm": 1.0546875, "learning_rate": 0.00019198849620268976, "loss": 4.1354, "step": 6914 }, { "epoch": 0.32371701094271166, "grad_norm": 1.25, "learning_rate": 0.00019198620671346866, "loss": 3.53, "step": 6915 }, { "epoch": 0.32376382468254433, "grad_norm": 1.203125, "learning_rate": 0.00019198391691080985, "loss": 3.3659, "step": 6916 }, { "epoch": 0.32381063842237695, "grad_norm": 1.2421875, "learning_rate": 0.0001919816267947212, "loss": 3.6307, "step": 6917 }, { "epoch": 0.3238574521622096, "grad_norm": 2.1875, "learning_rate": 0.00019197933636521045, "loss": 3.1177, "step": 6918 }, { "epoch": 0.32390426590204224, "grad_norm": 0.97265625, "learning_rate": 0.00019197704562228546, "loss": 2.8737, "step": 6919 }, { "epoch": 0.3239510796418749, "grad_norm": 1.203125, "learning_rate": 0.00019197475456595394, "loss": 3.1603, "step": 6920 }, { "epoch": 0.32399789338170754, "grad_norm": 1.2734375, "learning_rate": 0.0001919724631962238, "loss": 4.0412, "step": 6921 }, { "epoch": 0.32404470712154015, "grad_norm": 1.2890625, "learning_rate": 0.00019197017151310284, "loss": 3.5196, "step": 6922 }, { "epoch": 0.32409152086137283, "grad_norm": 2.3125, "learning_rate": 0.0001919678795165988, "loss": 2.8505, "step": 6923 }, { "epoch": 0.32413833460120545, "grad_norm": 1.59375, "learning_rate": 0.00019196558720671952, "loss": 3.1907, "step": 6924 }, { "epoch": 0.3241851483410381, "grad_norm": 1.375, "learning_rate": 0.00019196329458347284, "loss": 3.1572, "step": 6925 }, { "epoch": 0.32423196208087074, "grad_norm": 1.4375, "learning_rate": 0.00019196100164686655, "loss": 3.6528, "step": 6926 }, { "epoch": 0.32427877582070336, "grad_norm": 1.484375, "learning_rate": 0.00019195870839690846, "loss": 3.2133, "step": 6927 }, { "epoch": 0.32432558956053603, "grad_norm": 1.46875, "learning_rate": 0.00019195641483360637, "loss": 3.2221, "step": 6928 }, { "epoch": 0.32437240330036865, "grad_norm": 1.1796875, "learning_rate": 0.0001919541209569681, "loss": 3.083, "step": 6929 }, { "epoch": 0.3244192170402013, "grad_norm": 1.734375, "learning_rate": 0.00019195182676700153, "loss": 3.2269, "step": 6930 }, { "epoch": 0.32446603078003394, "grad_norm": 1.1640625, "learning_rate": 0.00019194953226371437, "loss": 3.7828, "step": 6931 }, { "epoch": 0.32451284451986656, "grad_norm": 1.5859375, "learning_rate": 0.00019194723744711453, "loss": 3.5607, "step": 6932 }, { "epoch": 0.32455965825969924, "grad_norm": 1.734375, "learning_rate": 0.00019194494231720977, "loss": 3.6849, "step": 6933 }, { "epoch": 0.32460647199953185, "grad_norm": 1.09375, "learning_rate": 0.00019194264687400793, "loss": 4.002, "step": 6934 }, { "epoch": 0.3246532857393645, "grad_norm": 1.3828125, "learning_rate": 0.00019194035111751688, "loss": 3.1336, "step": 6935 }, { "epoch": 0.32470009947919715, "grad_norm": 1.390625, "learning_rate": 0.00019193805504774436, "loss": 3.2165, "step": 6936 }, { "epoch": 0.32474691321902976, "grad_norm": 1.1640625, "learning_rate": 0.00019193575866469824, "loss": 3.2559, "step": 6937 }, { "epoch": 0.32479372695886244, "grad_norm": 1.2109375, "learning_rate": 0.00019193346196838632, "loss": 3.0426, "step": 6938 }, { "epoch": 0.32484054069869506, "grad_norm": 1.125, "learning_rate": 0.00019193116495881647, "loss": 3.2062, "step": 6939 }, { "epoch": 0.32488735443852773, "grad_norm": 1.203125, "learning_rate": 0.00019192886763599648, "loss": 3.2138, "step": 6940 }, { "epoch": 0.32493416817836035, "grad_norm": 1.140625, "learning_rate": 0.0001919265699999342, "loss": 3.1609, "step": 6941 }, { "epoch": 0.32498098191819297, "grad_norm": 1.1640625, "learning_rate": 0.00019192427205063742, "loss": 3.3582, "step": 6942 }, { "epoch": 0.32502779565802564, "grad_norm": 1.390625, "learning_rate": 0.00019192197378811398, "loss": 3.3313, "step": 6943 }, { "epoch": 0.32507460939785826, "grad_norm": 2.390625, "learning_rate": 0.00019191967521237178, "loss": 4.1063, "step": 6944 }, { "epoch": 0.32512142313769093, "grad_norm": 2.765625, "learning_rate": 0.00019191737632341858, "loss": 3.4615, "step": 6945 }, { "epoch": 0.32516823687752355, "grad_norm": 1.4375, "learning_rate": 0.00019191507712126222, "loss": 3.1699, "step": 6946 }, { "epoch": 0.32521505061735617, "grad_norm": 1.1796875, "learning_rate": 0.00019191277760591055, "loss": 3.3856, "step": 6947 }, { "epoch": 0.32526186435718885, "grad_norm": 1.078125, "learning_rate": 0.0001919104777773714, "loss": 3.1279, "step": 6948 }, { "epoch": 0.32530867809702146, "grad_norm": 1.3828125, "learning_rate": 0.00019190817763565263, "loss": 2.8991, "step": 6949 }, { "epoch": 0.32535549183685414, "grad_norm": 1.2265625, "learning_rate": 0.00019190587718076205, "loss": 3.2913, "step": 6950 }, { "epoch": 0.32540230557668676, "grad_norm": 1.640625, "learning_rate": 0.00019190357641270752, "loss": 2.9141, "step": 6951 }, { "epoch": 0.3254491193165194, "grad_norm": 1.3046875, "learning_rate": 0.00019190127533149682, "loss": 3.2406, "step": 6952 }, { "epoch": 0.32549593305635205, "grad_norm": 1.546875, "learning_rate": 0.00019189897393713786, "loss": 4.2811, "step": 6953 }, { "epoch": 0.32554274679618467, "grad_norm": 1.1015625, "learning_rate": 0.00019189667222963848, "loss": 2.8908, "step": 6954 }, { "epoch": 0.32558956053601734, "grad_norm": 1.484375, "learning_rate": 0.00019189437020900648, "loss": 3.0709, "step": 6955 }, { "epoch": 0.32563637427584996, "grad_norm": 1.78125, "learning_rate": 0.00019189206787524973, "loss": 3.3462, "step": 6956 }, { "epoch": 0.3256831880156826, "grad_norm": 1.2265625, "learning_rate": 0.00019188976522837607, "loss": 2.7781, "step": 6957 }, { "epoch": 0.32573000175551525, "grad_norm": 1.328125, "learning_rate": 0.00019188746226839334, "loss": 3.2647, "step": 6958 }, { "epoch": 0.32577681549534787, "grad_norm": 1.2421875, "learning_rate": 0.00019188515899530941, "loss": 3.4024, "step": 6959 }, { "epoch": 0.32582362923518055, "grad_norm": 1.6953125, "learning_rate": 0.00019188285540913208, "loss": 3.5478, "step": 6960 }, { "epoch": 0.32587044297501316, "grad_norm": 1.484375, "learning_rate": 0.00019188055150986925, "loss": 3.3608, "step": 6961 }, { "epoch": 0.3259172567148458, "grad_norm": 2.03125, "learning_rate": 0.00019187824729752876, "loss": 2.8253, "step": 6962 }, { "epoch": 0.32596407045467846, "grad_norm": 1.0234375, "learning_rate": 0.00019187594277211844, "loss": 3.6219, "step": 6963 }, { "epoch": 0.3260108841945111, "grad_norm": 1.3203125, "learning_rate": 0.00019187363793364619, "loss": 3.1443, "step": 6964 }, { "epoch": 0.32605769793434375, "grad_norm": 1.53125, "learning_rate": 0.0001918713327821198, "loss": 3.2375, "step": 6965 }, { "epoch": 0.32610451167417637, "grad_norm": 1.2109375, "learning_rate": 0.00019186902731754715, "loss": 3.2332, "step": 6966 }, { "epoch": 0.326151325414009, "grad_norm": 1.5546875, "learning_rate": 0.0001918667215399361, "loss": 2.846, "step": 6967 }, { "epoch": 0.32619813915384166, "grad_norm": 1.6171875, "learning_rate": 0.00019186441544929452, "loss": 3.681, "step": 6968 }, { "epoch": 0.3262449528936743, "grad_norm": 1.09375, "learning_rate": 0.00019186210904563025, "loss": 3.1351, "step": 6969 }, { "epoch": 0.32629176663350695, "grad_norm": 1.484375, "learning_rate": 0.00019185980232895114, "loss": 3.135, "step": 6970 }, { "epoch": 0.32633858037333957, "grad_norm": 1.6015625, "learning_rate": 0.00019185749529926507, "loss": 3.192, "step": 6971 }, { "epoch": 0.3263853941131722, "grad_norm": 1.453125, "learning_rate": 0.0001918551879565799, "loss": 3.0559, "step": 6972 }, { "epoch": 0.32643220785300486, "grad_norm": 1.5234375, "learning_rate": 0.0001918528803009035, "loss": 3.069, "step": 6973 }, { "epoch": 0.3264790215928375, "grad_norm": 1.984375, "learning_rate": 0.0001918505723322437, "loss": 2.675, "step": 6974 }, { "epoch": 0.32652583533267016, "grad_norm": 1.59375, "learning_rate": 0.0001918482640506084, "loss": 2.8914, "step": 6975 }, { "epoch": 0.3265726490725028, "grad_norm": 1.7109375, "learning_rate": 0.00019184595545600544, "loss": 3.343, "step": 6976 }, { "epoch": 0.32661946281233545, "grad_norm": 1.1796875, "learning_rate": 0.00019184364654844266, "loss": 3.1192, "step": 6977 }, { "epoch": 0.32666627655216807, "grad_norm": 1.640625, "learning_rate": 0.000191841337327928, "loss": 3.3762, "step": 6978 }, { "epoch": 0.3267130902920007, "grad_norm": 1.265625, "learning_rate": 0.0001918390277944693, "loss": 3.1922, "step": 6979 }, { "epoch": 0.32675990403183336, "grad_norm": 1.25, "learning_rate": 0.00019183671794807442, "loss": 4.4215, "step": 6980 }, { "epoch": 0.326806717771666, "grad_norm": 1.5390625, "learning_rate": 0.0001918344077887512, "loss": 4.4725, "step": 6981 }, { "epoch": 0.32685353151149865, "grad_norm": 1.4609375, "learning_rate": 0.00019183209731650757, "loss": 3.4613, "step": 6982 }, { "epoch": 0.32690034525133127, "grad_norm": 1.7890625, "learning_rate": 0.00019182978653135138, "loss": 2.8396, "step": 6983 }, { "epoch": 0.3269471589911639, "grad_norm": 1.234375, "learning_rate": 0.0001918274754332905, "loss": 5.0338, "step": 6984 }, { "epoch": 0.32699397273099656, "grad_norm": 1.8359375, "learning_rate": 0.00019182516402233279, "loss": 3.1958, "step": 6985 }, { "epoch": 0.3270407864708292, "grad_norm": 1.4921875, "learning_rate": 0.00019182285229848612, "loss": 3.2905, "step": 6986 }, { "epoch": 0.32708760021066186, "grad_norm": 1.515625, "learning_rate": 0.00019182054026175844, "loss": 3.5217, "step": 6987 }, { "epoch": 0.3271344139504945, "grad_norm": 1.5703125, "learning_rate": 0.00019181822791215756, "loss": 3.243, "step": 6988 }, { "epoch": 0.3271812276903271, "grad_norm": 1.34375, "learning_rate": 0.00019181591524969135, "loss": 3.2146, "step": 6989 }, { "epoch": 0.32722804143015977, "grad_norm": 1.2421875, "learning_rate": 0.00019181360227436774, "loss": 3.3507, "step": 6990 }, { "epoch": 0.3272748551699924, "grad_norm": 1.0546875, "learning_rate": 0.00019181128898619458, "loss": 3.0464, "step": 6991 }, { "epoch": 0.32732166890982506, "grad_norm": 1.6328125, "learning_rate": 0.00019180897538517976, "loss": 3.2923, "step": 6992 }, { "epoch": 0.3273684826496577, "grad_norm": 1.3125, "learning_rate": 0.0001918066614713312, "loss": 3.321, "step": 6993 }, { "epoch": 0.3274152963894903, "grad_norm": 1.2890625, "learning_rate": 0.00019180434724465668, "loss": 3.3257, "step": 6994 }, { "epoch": 0.32746211012932297, "grad_norm": 1.2890625, "learning_rate": 0.0001918020327051642, "loss": 3.5084, "step": 6995 }, { "epoch": 0.3275089238691556, "grad_norm": 1.1953125, "learning_rate": 0.00019179971785286158, "loss": 3.0157, "step": 6996 }, { "epoch": 0.32755573760898826, "grad_norm": 1.3828125, "learning_rate": 0.00019179740268775674, "loss": 3.3842, "step": 6997 }, { "epoch": 0.3276025513488209, "grad_norm": 1.4375, "learning_rate": 0.00019179508720985756, "loss": 3.0111, "step": 6998 }, { "epoch": 0.3276493650886535, "grad_norm": 1.34375, "learning_rate": 0.0001917927714191719, "loss": 3.3318, "step": 6999 }, { "epoch": 0.3276961788284862, "grad_norm": 1.265625, "learning_rate": 0.00019179045531570772, "loss": 3.2815, "step": 7000 }, { "epoch": 0.3277429925683188, "grad_norm": 1.1953125, "learning_rate": 0.00019178813889947284, "loss": 3.0248, "step": 7001 }, { "epoch": 0.32778980630815147, "grad_norm": 1.0, "learning_rate": 0.00019178582217047518, "loss": 3.1632, "step": 7002 }, { "epoch": 0.3278366200479841, "grad_norm": 1.4296875, "learning_rate": 0.00019178350512872263, "loss": 3.3828, "step": 7003 }, { "epoch": 0.3278834337878167, "grad_norm": 1.171875, "learning_rate": 0.00019178118777422312, "loss": 3.5364, "step": 7004 }, { "epoch": 0.3279302475276494, "grad_norm": 1.21875, "learning_rate": 0.0001917788701069845, "loss": 3.4102, "step": 7005 }, { "epoch": 0.327977061267482, "grad_norm": 1.15625, "learning_rate": 0.00019177655212701468, "loss": 3.4038, "step": 7006 }, { "epoch": 0.32802387500731467, "grad_norm": 1.3125, "learning_rate": 0.00019177423383432159, "loss": 3.0496, "step": 7007 }, { "epoch": 0.3280706887471473, "grad_norm": 1.125, "learning_rate": 0.00019177191522891306, "loss": 3.3937, "step": 7008 }, { "epoch": 0.3281175024869799, "grad_norm": 1.2421875, "learning_rate": 0.00019176959631079705, "loss": 3.4307, "step": 7009 }, { "epoch": 0.3281643162268126, "grad_norm": 1.8671875, "learning_rate": 0.00019176727707998149, "loss": 3.2019, "step": 7010 }, { "epoch": 0.3282111299666452, "grad_norm": 1.109375, "learning_rate": 0.00019176495753647416, "loss": 4.5408, "step": 7011 }, { "epoch": 0.3282579437064779, "grad_norm": 1.1640625, "learning_rate": 0.00019176263768028311, "loss": 3.2335, "step": 7012 }, { "epoch": 0.3283047574463105, "grad_norm": 1.3203125, "learning_rate": 0.00019176031751141613, "loss": 3.0248, "step": 7013 }, { "epoch": 0.3283515711861431, "grad_norm": 1.2890625, "learning_rate": 0.0001917579970298812, "loss": 3.4397, "step": 7014 }, { "epoch": 0.3283983849259758, "grad_norm": 1.3125, "learning_rate": 0.0001917556762356862, "loss": 3.4482, "step": 7015 }, { "epoch": 0.3284451986658084, "grad_norm": 1.203125, "learning_rate": 0.000191753355128839, "loss": 2.8208, "step": 7016 }, { "epoch": 0.3284920124056411, "grad_norm": 1.3671875, "learning_rate": 0.00019175103370934758, "loss": 3.146, "step": 7017 }, { "epoch": 0.3285388261454737, "grad_norm": 1.2578125, "learning_rate": 0.0001917487119772198, "loss": 3.2403, "step": 7018 }, { "epoch": 0.3285856398853063, "grad_norm": 1.21875, "learning_rate": 0.00019174638993246361, "loss": 3.0118, "step": 7019 }, { "epoch": 0.328632453625139, "grad_norm": 1.609375, "learning_rate": 0.00019174406757508687, "loss": 3.4767, "step": 7020 }, { "epoch": 0.3286792673649716, "grad_norm": 1.3203125, "learning_rate": 0.00019174174490509756, "loss": 3.6983, "step": 7021 }, { "epoch": 0.3287260811048043, "grad_norm": 1.6796875, "learning_rate": 0.00019173942192250354, "loss": 4.0359, "step": 7022 }, { "epoch": 0.3287728948446369, "grad_norm": 1.203125, "learning_rate": 0.00019173709862731274, "loss": 3.3605, "step": 7023 }, { "epoch": 0.3288197085844695, "grad_norm": 1.53125, "learning_rate": 0.00019173477501953305, "loss": 3.0104, "step": 7024 }, { "epoch": 0.3288665223243022, "grad_norm": 1.4140625, "learning_rate": 0.00019173245109917243, "loss": 3.1683, "step": 7025 }, { "epoch": 0.3289133360641348, "grad_norm": 1.09375, "learning_rate": 0.00019173012686623882, "loss": 3.2709, "step": 7026 }, { "epoch": 0.3289601498039675, "grad_norm": 2.1875, "learning_rate": 0.00019172780232074008, "loss": 3.3969, "step": 7027 }, { "epoch": 0.3290069635438001, "grad_norm": 1.15625, "learning_rate": 0.00019172547746268417, "loss": 3.2335, "step": 7028 }, { "epoch": 0.3290537772836327, "grad_norm": 1.3046875, "learning_rate": 0.00019172315229207897, "loss": 3.3921, "step": 7029 }, { "epoch": 0.3291005910234654, "grad_norm": 1.3671875, "learning_rate": 0.00019172082680893245, "loss": 2.7707, "step": 7030 }, { "epoch": 0.329147404763298, "grad_norm": 1.3515625, "learning_rate": 0.00019171850101325252, "loss": 3.2375, "step": 7031 }, { "epoch": 0.3291942185031307, "grad_norm": 1.796875, "learning_rate": 0.00019171617490504708, "loss": 3.941, "step": 7032 }, { "epoch": 0.3292410322429633, "grad_norm": 1.5078125, "learning_rate": 0.00019171384848432407, "loss": 3.5679, "step": 7033 }, { "epoch": 0.3292878459827959, "grad_norm": 1.7109375, "learning_rate": 0.00019171152175109145, "loss": 2.8466, "step": 7034 }, { "epoch": 0.3293346597226286, "grad_norm": 1.34375, "learning_rate": 0.00019170919470535712, "loss": 3.1562, "step": 7035 }, { "epoch": 0.3293814734624612, "grad_norm": 1.5234375, "learning_rate": 0.000191706867347129, "loss": 2.721, "step": 7036 }, { "epoch": 0.3294282872022939, "grad_norm": 1.3203125, "learning_rate": 0.00019170453967641503, "loss": 3.7046, "step": 7037 }, { "epoch": 0.3294751009421265, "grad_norm": 1.21875, "learning_rate": 0.00019170221169322312, "loss": 3.5816, "step": 7038 }, { "epoch": 0.3295219146819592, "grad_norm": 1.3359375, "learning_rate": 0.00019169988339756127, "loss": 3.5612, "step": 7039 }, { "epoch": 0.3295687284217918, "grad_norm": 1.6015625, "learning_rate": 0.00019169755478943735, "loss": 3.4965, "step": 7040 }, { "epoch": 0.3296155421616244, "grad_norm": 1.4375, "learning_rate": 0.00019169522586885931, "loss": 3.4407, "step": 7041 }, { "epoch": 0.3296623559014571, "grad_norm": 1.8359375, "learning_rate": 0.00019169289663583509, "loss": 2.9109, "step": 7042 }, { "epoch": 0.3297091696412897, "grad_norm": 1.3046875, "learning_rate": 0.0001916905670903726, "loss": 3.2547, "step": 7043 }, { "epoch": 0.3297559833811224, "grad_norm": 1.5, "learning_rate": 0.00019168823723247983, "loss": 3.1416, "step": 7044 }, { "epoch": 0.329802797120955, "grad_norm": 1.3359375, "learning_rate": 0.00019168590706216467, "loss": 3.1988, "step": 7045 }, { "epoch": 0.3298496108607876, "grad_norm": 0.9140625, "learning_rate": 0.0001916835765794351, "loss": 2.3753, "step": 7046 }, { "epoch": 0.3298964246006203, "grad_norm": 4.375, "learning_rate": 0.00019168124578429905, "loss": 2.8351, "step": 7047 }, { "epoch": 0.3299432383404529, "grad_norm": 1.90625, "learning_rate": 0.00019167891467676445, "loss": 2.7597, "step": 7048 }, { "epoch": 0.3299900520802856, "grad_norm": 1.109375, "learning_rate": 0.00019167658325683923, "loss": 3.4397, "step": 7049 }, { "epoch": 0.3300368658201182, "grad_norm": 1.328125, "learning_rate": 0.00019167425152453134, "loss": 3.1957, "step": 7050 }, { "epoch": 0.3300836795599508, "grad_norm": 1.4375, "learning_rate": 0.00019167191947984876, "loss": 3.447, "step": 7051 }, { "epoch": 0.3301304932997835, "grad_norm": 1.7421875, "learning_rate": 0.0001916695871227994, "loss": 3.0669, "step": 7052 }, { "epoch": 0.3301773070396161, "grad_norm": 1.2421875, "learning_rate": 0.00019166725445339124, "loss": 3.221, "step": 7053 }, { "epoch": 0.3302241207794488, "grad_norm": 1.203125, "learning_rate": 0.0001916649214716322, "loss": 3.4796, "step": 7054 }, { "epoch": 0.3302709345192814, "grad_norm": 1.5625, "learning_rate": 0.00019166258817753025, "loss": 3.2372, "step": 7055 }, { "epoch": 0.33031774825911403, "grad_norm": 1.4609375, "learning_rate": 0.0001916602545710933, "loss": 3.0331, "step": 7056 }, { "epoch": 0.3303645619989467, "grad_norm": 1.3984375, "learning_rate": 0.00019165792065232938, "loss": 3.6192, "step": 7057 }, { "epoch": 0.3304113757387793, "grad_norm": 2.0, "learning_rate": 0.00019165558642124638, "loss": 3.0547, "step": 7058 }, { "epoch": 0.330458189478612, "grad_norm": 1.5078125, "learning_rate": 0.00019165325187785222, "loss": 4.4631, "step": 7059 }, { "epoch": 0.3305050032184446, "grad_norm": 1.4296875, "learning_rate": 0.00019165091702215493, "loss": 3.2713, "step": 7060 }, { "epoch": 0.33055181695827723, "grad_norm": 1.171875, "learning_rate": 0.00019164858185416245, "loss": 2.9809, "step": 7061 }, { "epoch": 0.3305986306981099, "grad_norm": 1.5625, "learning_rate": 0.0001916462463738827, "loss": 3.3497, "step": 7062 }, { "epoch": 0.3306454444379425, "grad_norm": 1.5546875, "learning_rate": 0.0001916439105813237, "loss": 3.396, "step": 7063 }, { "epoch": 0.3306922581777752, "grad_norm": 2.25, "learning_rate": 0.00019164157447649333, "loss": 3.3129, "step": 7064 }, { "epoch": 0.3307390719176078, "grad_norm": 1.2421875, "learning_rate": 0.0001916392380593996, "loss": 3.0683, "step": 7065 }, { "epoch": 0.33078588565744044, "grad_norm": 1.8671875, "learning_rate": 0.00019163690133005046, "loss": 3.2629, "step": 7066 }, { "epoch": 0.3308326993972731, "grad_norm": 1.3515625, "learning_rate": 0.00019163456428845392, "loss": 3.4754, "step": 7067 }, { "epoch": 0.33087951313710573, "grad_norm": 1.734375, "learning_rate": 0.0001916322269346179, "loss": 3.4828, "step": 7068 }, { "epoch": 0.3309263268769384, "grad_norm": 1.1875, "learning_rate": 0.00019162988926855034, "loss": 2.9722, "step": 7069 }, { "epoch": 0.330973140616771, "grad_norm": 1.484375, "learning_rate": 0.00019162755129025917, "loss": 3.3235, "step": 7070 }, { "epoch": 0.33101995435660364, "grad_norm": 1.09375, "learning_rate": 0.0001916252129997525, "loss": 3.1984, "step": 7071 }, { "epoch": 0.3310667680964363, "grad_norm": 1.34375, "learning_rate": 0.00019162287439703816, "loss": 3.2347, "step": 7072 }, { "epoch": 0.33111358183626893, "grad_norm": 1.46875, "learning_rate": 0.0001916205354821242, "loss": 3.5426, "step": 7073 }, { "epoch": 0.3311603955761016, "grad_norm": 1.65625, "learning_rate": 0.00019161819625501854, "loss": 3.2168, "step": 7074 }, { "epoch": 0.3312072093159342, "grad_norm": 1.59375, "learning_rate": 0.0001916158567157292, "loss": 3.4696, "step": 7075 }, { "epoch": 0.33125402305576684, "grad_norm": 1.4453125, "learning_rate": 0.0001916135168642641, "loss": 3.1855, "step": 7076 }, { "epoch": 0.3313008367955995, "grad_norm": 1.4765625, "learning_rate": 0.00019161117670063124, "loss": 3.4649, "step": 7077 }, { "epoch": 0.33134765053543214, "grad_norm": 1.4453125, "learning_rate": 0.0001916088362248386, "loss": 3.5299, "step": 7078 }, { "epoch": 0.3313944642752648, "grad_norm": 1.0234375, "learning_rate": 0.00019160649543689413, "loss": 4.1462, "step": 7079 }, { "epoch": 0.33144127801509743, "grad_norm": 1.2734375, "learning_rate": 0.00019160415433680584, "loss": 3.1258, "step": 7080 }, { "epoch": 0.33148809175493005, "grad_norm": 1.265625, "learning_rate": 0.0001916018129245817, "loss": 3.3287, "step": 7081 }, { "epoch": 0.3315349054947627, "grad_norm": 1.5390625, "learning_rate": 0.00019159947120022964, "loss": 2.9197, "step": 7082 }, { "epoch": 0.33158171923459534, "grad_norm": 1.828125, "learning_rate": 0.0001915971291637577, "loss": 3.0648, "step": 7083 }, { "epoch": 0.331628532974428, "grad_norm": 1.3125, "learning_rate": 0.00019159478681517384, "loss": 3.2803, "step": 7084 }, { "epoch": 0.33167534671426063, "grad_norm": 1.1953125, "learning_rate": 0.00019159244415448604, "loss": 3.1235, "step": 7085 }, { "epoch": 0.33172216045409325, "grad_norm": 1.4140625, "learning_rate": 0.00019159010118170226, "loss": 3.587, "step": 7086 }, { "epoch": 0.3317689741939259, "grad_norm": 1.0859375, "learning_rate": 0.00019158775789683053, "loss": 3.073, "step": 7087 }, { "epoch": 0.33181578793375854, "grad_norm": 1.4375, "learning_rate": 0.0001915854142998788, "loss": 3.5039, "step": 7088 }, { "epoch": 0.3318626016735912, "grad_norm": 1.4921875, "learning_rate": 0.00019158307039085504, "loss": 3.0541, "step": 7089 }, { "epoch": 0.33190941541342384, "grad_norm": 1.5234375, "learning_rate": 0.0001915807261697673, "loss": 3.5757, "step": 7090 }, { "epoch": 0.33195622915325645, "grad_norm": 1.65625, "learning_rate": 0.00019157838163662348, "loss": 4.2117, "step": 7091 }, { "epoch": 0.33200304289308913, "grad_norm": 1.015625, "learning_rate": 0.0001915760367914317, "loss": 2.9543, "step": 7092 }, { "epoch": 0.33204985663292175, "grad_norm": 1.125, "learning_rate": 0.00019157369163419979, "loss": 3.5248, "step": 7093 }, { "epoch": 0.3320966703727544, "grad_norm": 1.046875, "learning_rate": 0.00019157134616493585, "loss": 4.7287, "step": 7094 }, { "epoch": 0.33214348411258704, "grad_norm": 1.7109375, "learning_rate": 0.00019156900038364783, "loss": 3.8629, "step": 7095 }, { "epoch": 0.33219029785241966, "grad_norm": 1.484375, "learning_rate": 0.00019156665429034375, "loss": 3.4232, "step": 7096 }, { "epoch": 0.33223711159225233, "grad_norm": 1.2109375, "learning_rate": 0.0001915643078850316, "loss": 3.4115, "step": 7097 }, { "epoch": 0.33228392533208495, "grad_norm": 1.765625, "learning_rate": 0.00019156196116771934, "loss": 2.912, "step": 7098 }, { "epoch": 0.3323307390719176, "grad_norm": 1.765625, "learning_rate": 0.00019155961413841495, "loss": 3.3851, "step": 7099 }, { "epoch": 0.33237755281175024, "grad_norm": 1.6328125, "learning_rate": 0.00019155726679712657, "loss": 3.3642, "step": 7100 }, { "epoch": 0.3324243665515829, "grad_norm": 1.1953125, "learning_rate": 0.00019155491914386202, "loss": 4.1491, "step": 7101 }, { "epoch": 0.33247118029141554, "grad_norm": 1.5390625, "learning_rate": 0.00019155257117862938, "loss": 3.4192, "step": 7102 }, { "epoch": 0.33251799403124815, "grad_norm": 1.28125, "learning_rate": 0.00019155022290143664, "loss": 2.9518, "step": 7103 }, { "epoch": 0.33256480777108083, "grad_norm": 1.5078125, "learning_rate": 0.00019154787431229184, "loss": 3.5855, "step": 7104 }, { "epoch": 0.33261162151091345, "grad_norm": 1.4609375, "learning_rate": 0.00019154552541120293, "loss": 3.7103, "step": 7105 }, { "epoch": 0.3326584352507461, "grad_norm": 1.59375, "learning_rate": 0.00019154317619817795, "loss": 3.5663, "step": 7106 }, { "epoch": 0.33270524899057874, "grad_norm": 1.5390625, "learning_rate": 0.00019154082667322485, "loss": 3.2389, "step": 7107 }, { "epoch": 0.33275206273041136, "grad_norm": 1.234375, "learning_rate": 0.00019153847683635173, "loss": 3.2435, "step": 7108 }, { "epoch": 0.33279887647024403, "grad_norm": 1.4765625, "learning_rate": 0.00019153612668756651, "loss": 3.3899, "step": 7109 }, { "epoch": 0.33284569021007665, "grad_norm": 1.46875, "learning_rate": 0.00019153377622687723, "loss": 3.4878, "step": 7110 }, { "epoch": 0.3328925039499093, "grad_norm": 1.7734375, "learning_rate": 0.00019153142545429188, "loss": 2.8649, "step": 7111 }, { "epoch": 0.33293931768974194, "grad_norm": 1.2109375, "learning_rate": 0.0001915290743698185, "loss": 2.5173, "step": 7112 }, { "epoch": 0.33298613142957456, "grad_norm": 1.28125, "learning_rate": 0.00019152672297346513, "loss": 3.0968, "step": 7113 }, { "epoch": 0.33303294516940724, "grad_norm": 1.375, "learning_rate": 0.0001915243712652397, "loss": 3.2179, "step": 7114 }, { "epoch": 0.33307975890923985, "grad_norm": 1.375, "learning_rate": 0.00019152201924515027, "loss": 3.4923, "step": 7115 }, { "epoch": 0.3331265726490725, "grad_norm": 2.3125, "learning_rate": 0.00019151966691320486, "loss": 3.4362, "step": 7116 }, { "epoch": 0.33317338638890515, "grad_norm": 1.2265625, "learning_rate": 0.00019151731426941145, "loss": 3.425, "step": 7117 }, { "epoch": 0.33322020012873776, "grad_norm": 1.6484375, "learning_rate": 0.00019151496131377812, "loss": 3.0371, "step": 7118 }, { "epoch": 0.33326701386857044, "grad_norm": 1.125, "learning_rate": 0.00019151260804631284, "loss": 3.436, "step": 7119 }, { "epoch": 0.33331382760840306, "grad_norm": 1.15625, "learning_rate": 0.0001915102544670236, "loss": 3.4123, "step": 7120 }, { "epoch": 0.33336064134823573, "grad_norm": 1.171875, "learning_rate": 0.00019150790057591852, "loss": 2.861, "step": 7121 }, { "epoch": 0.33340745508806835, "grad_norm": 1.2265625, "learning_rate": 0.0001915055463730055, "loss": 2.9334, "step": 7122 }, { "epoch": 0.33345426882790097, "grad_norm": 1.1953125, "learning_rate": 0.00019150319185829267, "loss": 3.008, "step": 7123 }, { "epoch": 0.33350108256773364, "grad_norm": 1.4296875, "learning_rate": 0.00019150083703178798, "loss": 2.6287, "step": 7124 }, { "epoch": 0.33354789630756626, "grad_norm": 1.3984375, "learning_rate": 0.00019149848189349946, "loss": 2.9892, "step": 7125 }, { "epoch": 0.33359471004739893, "grad_norm": 1.34375, "learning_rate": 0.0001914961264434352, "loss": 3.263, "step": 7126 }, { "epoch": 0.33364152378723155, "grad_norm": 1.4375, "learning_rate": 0.00019149377068160313, "loss": 2.7729, "step": 7127 }, { "epoch": 0.33368833752706417, "grad_norm": 1.5546875, "learning_rate": 0.00019149141460801132, "loss": 3.4142, "step": 7128 }, { "epoch": 0.33373515126689685, "grad_norm": 1.3046875, "learning_rate": 0.00019148905822266783, "loss": 3.1805, "step": 7129 }, { "epoch": 0.33378196500672946, "grad_norm": 1.1640625, "learning_rate": 0.00019148670152558067, "loss": 2.9119, "step": 7130 }, { "epoch": 0.33382877874656214, "grad_norm": 1.921875, "learning_rate": 0.00019148434451675783, "loss": 3.5414, "step": 7131 }, { "epoch": 0.33387559248639476, "grad_norm": 1.8046875, "learning_rate": 0.00019148198719620737, "loss": 3.2559, "step": 7132 }, { "epoch": 0.3339224062262274, "grad_norm": 1.515625, "learning_rate": 0.00019147962956393737, "loss": 3.2306, "step": 7133 }, { "epoch": 0.33396921996606005, "grad_norm": 1.8046875, "learning_rate": 0.0001914772716199558, "loss": 3.7195, "step": 7134 }, { "epoch": 0.33401603370589267, "grad_norm": 1.4140625, "learning_rate": 0.0001914749133642707, "loss": 3.3082, "step": 7135 }, { "epoch": 0.33406284744572534, "grad_norm": 1.09375, "learning_rate": 0.00019147255479689014, "loss": 3.1213, "step": 7136 }, { "epoch": 0.33410966118555796, "grad_norm": 1.3125, "learning_rate": 0.00019147019591782215, "loss": 3.184, "step": 7137 }, { "epoch": 0.3341564749253906, "grad_norm": 1.125, "learning_rate": 0.0001914678367270747, "loss": 4.4875, "step": 7138 }, { "epoch": 0.33420328866522325, "grad_norm": 1.3515625, "learning_rate": 0.00019146547722465594, "loss": 3.0773, "step": 7139 }, { "epoch": 0.33425010240505587, "grad_norm": 1.3046875, "learning_rate": 0.00019146311741057384, "loss": 3.3767, "step": 7140 }, { "epoch": 0.33429691614488855, "grad_norm": 1.3125, "learning_rate": 0.00019146075728483644, "loss": 3.0774, "step": 7141 }, { "epoch": 0.33434372988472116, "grad_norm": 1.1796875, "learning_rate": 0.0001914583968474518, "loss": 3.1561, "step": 7142 }, { "epoch": 0.3343905436245538, "grad_norm": 1.3046875, "learning_rate": 0.00019145603609842796, "loss": 3.0239, "step": 7143 }, { "epoch": 0.33443735736438646, "grad_norm": 1.6484375, "learning_rate": 0.00019145367503777295, "loss": 3.1754, "step": 7144 }, { "epoch": 0.3344841711042191, "grad_norm": 1.34375, "learning_rate": 0.00019145131366549484, "loss": 3.2069, "step": 7145 }, { "epoch": 0.33453098484405175, "grad_norm": 1.1953125, "learning_rate": 0.00019144895198160166, "loss": 3.1123, "step": 7146 }, { "epoch": 0.33457779858388437, "grad_norm": 1.453125, "learning_rate": 0.0001914465899861015, "loss": 3.3941, "step": 7147 }, { "epoch": 0.334624612323717, "grad_norm": 1.15625, "learning_rate": 0.0001914442276790023, "loss": 3.2508, "step": 7148 }, { "epoch": 0.33467142606354966, "grad_norm": 1.203125, "learning_rate": 0.00019144186506031224, "loss": 3.1371, "step": 7149 }, { "epoch": 0.3347182398033823, "grad_norm": 1.3515625, "learning_rate": 0.0001914395021300393, "loss": 3.1707, "step": 7150 }, { "epoch": 0.33476505354321495, "grad_norm": 1.4140625, "learning_rate": 0.00019143713888819153, "loss": 3.0264, "step": 7151 }, { "epoch": 0.33481186728304757, "grad_norm": 1.34375, "learning_rate": 0.000191434775334777, "loss": 3.1807, "step": 7152 }, { "epoch": 0.3348586810228802, "grad_norm": 1.234375, "learning_rate": 0.00019143241146980377, "loss": 3.5004, "step": 7153 }, { "epoch": 0.33490549476271286, "grad_norm": 1.453125, "learning_rate": 0.00019143004729327986, "loss": 2.842, "step": 7154 }, { "epoch": 0.3349523085025455, "grad_norm": 2.140625, "learning_rate": 0.00019142768280521334, "loss": 3.4321, "step": 7155 }, { "epoch": 0.33499912224237816, "grad_norm": 1.78125, "learning_rate": 0.0001914253180056123, "loss": 3.3055, "step": 7156 }, { "epoch": 0.3350459359822108, "grad_norm": 1.4453125, "learning_rate": 0.00019142295289448477, "loss": 3.1647, "step": 7157 }, { "epoch": 0.3350927497220434, "grad_norm": 1.2109375, "learning_rate": 0.0001914205874718388, "loss": 2.7127, "step": 7158 }, { "epoch": 0.33513956346187607, "grad_norm": 1.3671875, "learning_rate": 0.0001914182217376825, "loss": 3.0736, "step": 7159 }, { "epoch": 0.3351863772017087, "grad_norm": 1.1328125, "learning_rate": 0.00019141585569202384, "loss": 3.0571, "step": 7160 }, { "epoch": 0.33523319094154136, "grad_norm": 1.4375, "learning_rate": 0.00019141348933487094, "loss": 3.2682, "step": 7161 }, { "epoch": 0.335280004681374, "grad_norm": 1.234375, "learning_rate": 0.0001914111226662319, "loss": 3.1279, "step": 7162 }, { "epoch": 0.33532681842120665, "grad_norm": 1.3671875, "learning_rate": 0.00019140875568611474, "loss": 3.0097, "step": 7163 }, { "epoch": 0.33537363216103927, "grad_norm": 1.1953125, "learning_rate": 0.00019140638839452748, "loss": 2.9462, "step": 7164 }, { "epoch": 0.3354204459008719, "grad_norm": 1.0703125, "learning_rate": 0.00019140402079147827, "loss": 3.0896, "step": 7165 }, { "epoch": 0.33546725964070456, "grad_norm": 2.578125, "learning_rate": 0.00019140165287697514, "loss": 2.9352, "step": 7166 }, { "epoch": 0.3355140733805372, "grad_norm": 1.2265625, "learning_rate": 0.0001913992846510262, "loss": 3.6079, "step": 7167 }, { "epoch": 0.33556088712036986, "grad_norm": 1.2421875, "learning_rate": 0.00019139691611363944, "loss": 3.0155, "step": 7168 }, { "epoch": 0.3356077008602025, "grad_norm": 1.1171875, "learning_rate": 0.00019139454726482295, "loss": 2.9778, "step": 7169 }, { "epoch": 0.3356545146000351, "grad_norm": 1.8203125, "learning_rate": 0.00019139217810458485, "loss": 3.2715, "step": 7170 }, { "epoch": 0.33570132833986777, "grad_norm": 0.98828125, "learning_rate": 0.0001913898086329332, "loss": 3.2781, "step": 7171 }, { "epoch": 0.3357481420797004, "grad_norm": 1.640625, "learning_rate": 0.000191387438849876, "loss": 3.9821, "step": 7172 }, { "epoch": 0.33579495581953306, "grad_norm": 1.125, "learning_rate": 0.00019138506875542147, "loss": 2.9862, "step": 7173 }, { "epoch": 0.3358417695593657, "grad_norm": 1.4296875, "learning_rate": 0.00019138269834957753, "loss": 3.5042, "step": 7174 }, { "epoch": 0.3358885832991983, "grad_norm": 1.5390625, "learning_rate": 0.00019138032763235235, "loss": 2.8613, "step": 7175 }, { "epoch": 0.33593539703903097, "grad_norm": 1.6796875, "learning_rate": 0.00019137795660375398, "loss": 3.6168, "step": 7176 }, { "epoch": 0.3359822107788636, "grad_norm": 1.2890625, "learning_rate": 0.00019137558526379053, "loss": 3.0267, "step": 7177 }, { "epoch": 0.33602902451869626, "grad_norm": 1.390625, "learning_rate": 0.00019137321361247005, "loss": 3.8512, "step": 7178 }, { "epoch": 0.3360758382585289, "grad_norm": 1.46875, "learning_rate": 0.00019137084164980061, "loss": 3.2248, "step": 7179 }, { "epoch": 0.3361226519983615, "grad_norm": 1.34375, "learning_rate": 0.00019136846937579033, "loss": 3.0678, "step": 7180 }, { "epoch": 0.3361694657381942, "grad_norm": 2.296875, "learning_rate": 0.00019136609679044724, "loss": 3.6863, "step": 7181 }, { "epoch": 0.3362162794780268, "grad_norm": 1.71875, "learning_rate": 0.00019136372389377945, "loss": 3.114, "step": 7182 }, { "epoch": 0.33626309321785947, "grad_norm": 1.5234375, "learning_rate": 0.0001913613506857951, "loss": 3.4703, "step": 7183 }, { "epoch": 0.3363099069576921, "grad_norm": 1.265625, "learning_rate": 0.0001913589771665022, "loss": 3.4516, "step": 7184 }, { "epoch": 0.3363567206975247, "grad_norm": 1.28125, "learning_rate": 0.00019135660333590885, "loss": 3.2728, "step": 7185 }, { "epoch": 0.3364035344373574, "grad_norm": 1.21875, "learning_rate": 0.0001913542291940232, "loss": 2.8725, "step": 7186 }, { "epoch": 0.33645034817719, "grad_norm": 1.171875, "learning_rate": 0.00019135185474085327, "loss": 2.91, "step": 7187 }, { "epoch": 0.33649716191702267, "grad_norm": 1.75, "learning_rate": 0.00019134947997640714, "loss": 3.4966, "step": 7188 }, { "epoch": 0.3365439756568553, "grad_norm": 1.328125, "learning_rate": 0.00019134710490069297, "loss": 3.2316, "step": 7189 }, { "epoch": 0.3365907893966879, "grad_norm": 1.53125, "learning_rate": 0.00019134472951371883, "loss": 3.1791, "step": 7190 }, { "epoch": 0.3366376031365206, "grad_norm": 1.3828125, "learning_rate": 0.00019134235381549283, "loss": 3.1771, "step": 7191 }, { "epoch": 0.3366844168763532, "grad_norm": 1.5390625, "learning_rate": 0.00019133997780602298, "loss": 2.9053, "step": 7192 }, { "epoch": 0.3367312306161859, "grad_norm": 1.265625, "learning_rate": 0.00019133760148531747, "loss": 2.9958, "step": 7193 }, { "epoch": 0.3367780443560185, "grad_norm": 1.5859375, "learning_rate": 0.00019133522485338436, "loss": 3.1534, "step": 7194 }, { "epoch": 0.3368248580958511, "grad_norm": 1.1953125, "learning_rate": 0.00019133284791023175, "loss": 3.0745, "step": 7195 }, { "epoch": 0.3368716718356838, "grad_norm": 1.828125, "learning_rate": 0.0001913304706558677, "loss": 3.7623, "step": 7196 }, { "epoch": 0.3369184855755164, "grad_norm": 1.4453125, "learning_rate": 0.0001913280930903004, "loss": 3.3542, "step": 7197 }, { "epoch": 0.3369652993153491, "grad_norm": 1.6640625, "learning_rate": 0.0001913257152135379, "loss": 3.1424, "step": 7198 }, { "epoch": 0.3370121130551817, "grad_norm": 1.28125, "learning_rate": 0.00019132333702558826, "loss": 3.2787, "step": 7199 }, { "epoch": 0.3370589267950143, "grad_norm": 1.859375, "learning_rate": 0.00019132095852645965, "loss": 3.4654, "step": 7200 }, { "epoch": 0.337105740534847, "grad_norm": 1.4609375, "learning_rate": 0.00019131857971616017, "loss": 3.2157, "step": 7201 }, { "epoch": 0.3371525542746796, "grad_norm": 1.09375, "learning_rate": 0.0001913162005946979, "loss": 3.0344, "step": 7202 }, { "epoch": 0.3371993680145123, "grad_norm": 1.7421875, "learning_rate": 0.00019131382116208094, "loss": 3.2665, "step": 7203 }, { "epoch": 0.3372461817543449, "grad_norm": 1.6015625, "learning_rate": 0.00019131144141831742, "loss": 3.6456, "step": 7204 }, { "epoch": 0.3372929954941775, "grad_norm": 1.4296875, "learning_rate": 0.00019130906136341546, "loss": 3.0569, "step": 7205 }, { "epoch": 0.3373398092340102, "grad_norm": 1.4296875, "learning_rate": 0.00019130668099738313, "loss": 2.8868, "step": 7206 }, { "epoch": 0.3373866229738428, "grad_norm": 1.3515625, "learning_rate": 0.00019130430032022856, "loss": 3.1833, "step": 7207 }, { "epoch": 0.3374334367136755, "grad_norm": 1.203125, "learning_rate": 0.0001913019193319599, "loss": 4.6997, "step": 7208 }, { "epoch": 0.3374802504535081, "grad_norm": 1.4609375, "learning_rate": 0.0001912995380325852, "loss": 3.3102, "step": 7209 }, { "epoch": 0.3375270641933407, "grad_norm": 1.3203125, "learning_rate": 0.00019129715642211262, "loss": 3.0353, "step": 7210 }, { "epoch": 0.3375738779331734, "grad_norm": 1.484375, "learning_rate": 0.00019129477450055023, "loss": 3.1854, "step": 7211 }, { "epoch": 0.337620691673006, "grad_norm": 1.1953125, "learning_rate": 0.0001912923922679062, "loss": 3.741, "step": 7212 }, { "epoch": 0.3376675054128387, "grad_norm": 2.078125, "learning_rate": 0.00019129000972418857, "loss": 3.1124, "step": 7213 }, { "epoch": 0.3377143191526713, "grad_norm": 1.203125, "learning_rate": 0.00019128762686940556, "loss": 3.1859, "step": 7214 }, { "epoch": 0.3377611328925039, "grad_norm": 1.6015625, "learning_rate": 0.00019128524370356522, "loss": 3.1808, "step": 7215 }, { "epoch": 0.3378079466323366, "grad_norm": 1.28125, "learning_rate": 0.00019128286022667567, "loss": 3.586, "step": 7216 }, { "epoch": 0.3378547603721692, "grad_norm": 1.375, "learning_rate": 0.00019128047643874506, "loss": 3.6228, "step": 7217 }, { "epoch": 0.3379015741120019, "grad_norm": 1.34375, "learning_rate": 0.00019127809233978154, "loss": 3.6496, "step": 7218 }, { "epoch": 0.3379483878518345, "grad_norm": 1.3984375, "learning_rate": 0.00019127570792979316, "loss": 3.0501, "step": 7219 }, { "epoch": 0.3379952015916671, "grad_norm": 1.3125, "learning_rate": 0.00019127332320878807, "loss": 3.2198, "step": 7220 }, { "epoch": 0.3380420153314998, "grad_norm": 1.1640625, "learning_rate": 0.00019127093817677447, "loss": 2.8847, "step": 7221 }, { "epoch": 0.3380888290713324, "grad_norm": 1.625, "learning_rate": 0.00019126855283376038, "loss": 3.7874, "step": 7222 }, { "epoch": 0.3381356428111651, "grad_norm": 1.40625, "learning_rate": 0.00019126616717975395, "loss": 3.5598, "step": 7223 }, { "epoch": 0.3381824565509977, "grad_norm": 2.15625, "learning_rate": 0.00019126378121476335, "loss": 3.1757, "step": 7224 }, { "epoch": 0.3382292702908304, "grad_norm": 1.265625, "learning_rate": 0.0001912613949387967, "loss": 3.1775, "step": 7225 }, { "epoch": 0.338276084030663, "grad_norm": 2.03125, "learning_rate": 0.00019125900835186214, "loss": 3.4996, "step": 7226 }, { "epoch": 0.3383228977704956, "grad_norm": 1.1484375, "learning_rate": 0.0001912566214539677, "loss": 3.0552, "step": 7227 }, { "epoch": 0.3383697115103283, "grad_norm": 1.6796875, "learning_rate": 0.0001912542342451217, "loss": 2.9319, "step": 7228 }, { "epoch": 0.3384165252501609, "grad_norm": 1.21875, "learning_rate": 0.00019125184672533213, "loss": 3.3825, "step": 7229 }, { "epoch": 0.3384633389899936, "grad_norm": 1.296875, "learning_rate": 0.00019124945889460715, "loss": 2.9868, "step": 7230 }, { "epoch": 0.3385101527298262, "grad_norm": 1.3125, "learning_rate": 0.00019124707075295492, "loss": 3.3316, "step": 7231 }, { "epoch": 0.3385569664696588, "grad_norm": 1.2578125, "learning_rate": 0.0001912446823003836, "loss": 2.895, "step": 7232 }, { "epoch": 0.3386037802094915, "grad_norm": 1.2734375, "learning_rate": 0.00019124229353690127, "loss": 3.2493, "step": 7233 }, { "epoch": 0.3386505939493241, "grad_norm": 1.3359375, "learning_rate": 0.00019123990446251612, "loss": 3.2291, "step": 7234 }, { "epoch": 0.3386974076891568, "grad_norm": 1.375, "learning_rate": 0.00019123751507723623, "loss": 3.3696, "step": 7235 }, { "epoch": 0.3387442214289894, "grad_norm": 1.484375, "learning_rate": 0.00019123512538106983, "loss": 3.1764, "step": 7236 }, { "epoch": 0.33879103516882203, "grad_norm": 1.2890625, "learning_rate": 0.000191232735374025, "loss": 3.3529, "step": 7237 }, { "epoch": 0.3388378489086547, "grad_norm": 1.2265625, "learning_rate": 0.00019123034505610988, "loss": 3.0575, "step": 7238 }, { "epoch": 0.3388846626484873, "grad_norm": 1.390625, "learning_rate": 0.00019122795442733264, "loss": 2.9669, "step": 7239 }, { "epoch": 0.33893147638832, "grad_norm": 1.3359375, "learning_rate": 0.00019122556348770143, "loss": 3.0353, "step": 7240 }, { "epoch": 0.3389782901281526, "grad_norm": 1.53125, "learning_rate": 0.00019122317223722437, "loss": 3.3102, "step": 7241 }, { "epoch": 0.33902510386798523, "grad_norm": 1.5546875, "learning_rate": 0.0001912207806759096, "loss": 2.9842, "step": 7242 }, { "epoch": 0.3390719176078179, "grad_norm": 1.2890625, "learning_rate": 0.00019121838880376536, "loss": 2.7812, "step": 7243 }, { "epoch": 0.3391187313476505, "grad_norm": 1.3203125, "learning_rate": 0.00019121599662079967, "loss": 3.4592, "step": 7244 }, { "epoch": 0.3391655450874832, "grad_norm": 2.15625, "learning_rate": 0.0001912136041270208, "loss": 3.0305, "step": 7245 }, { "epoch": 0.3392123588273158, "grad_norm": 1.375, "learning_rate": 0.0001912112113224368, "loss": 2.9229, "step": 7246 }, { "epoch": 0.33925917256714844, "grad_norm": 1.828125, "learning_rate": 0.0001912088182070559, "loss": 3.2836, "step": 7247 }, { "epoch": 0.3393059863069811, "grad_norm": 1.78125, "learning_rate": 0.0001912064247808862, "loss": 3.6111, "step": 7248 }, { "epoch": 0.33935280004681373, "grad_norm": 1.5390625, "learning_rate": 0.0001912040310439359, "loss": 3.2611, "step": 7249 }, { "epoch": 0.3393996137866464, "grad_norm": 1.328125, "learning_rate": 0.00019120163699621313, "loss": 3.0258, "step": 7250 }, { "epoch": 0.339446427526479, "grad_norm": 1.4140625, "learning_rate": 0.00019119924263772604, "loss": 3.2762, "step": 7251 }, { "epoch": 0.33949324126631164, "grad_norm": 1.84375, "learning_rate": 0.0001911968479684828, "loss": 3.372, "step": 7252 }, { "epoch": 0.3395400550061443, "grad_norm": 1.359375, "learning_rate": 0.00019119445298849158, "loss": 3.2053, "step": 7253 }, { "epoch": 0.33958686874597693, "grad_norm": 1.359375, "learning_rate": 0.00019119205769776055, "loss": 3.2787, "step": 7254 }, { "epoch": 0.3396336824858096, "grad_norm": 1.5546875, "learning_rate": 0.00019118966209629782, "loss": 3.3164, "step": 7255 }, { "epoch": 0.3396804962256422, "grad_norm": 1.3125, "learning_rate": 0.00019118726618411162, "loss": 3.3465, "step": 7256 }, { "epoch": 0.33972730996547484, "grad_norm": 1.515625, "learning_rate": 0.0001911848699612101, "loss": 3.3771, "step": 7257 }, { "epoch": 0.3397741237053075, "grad_norm": 1.28125, "learning_rate": 0.00019118247342760135, "loss": 3.1302, "step": 7258 }, { "epoch": 0.33982093744514014, "grad_norm": 1.2109375, "learning_rate": 0.00019118007658329363, "loss": 3.2224, "step": 7259 }, { "epoch": 0.3398677511849728, "grad_norm": 1.1875, "learning_rate": 0.00019117767942829504, "loss": 3.0712, "step": 7260 }, { "epoch": 0.33991456492480543, "grad_norm": 1.078125, "learning_rate": 0.0001911752819626138, "loss": 2.9404, "step": 7261 }, { "epoch": 0.33996137866463805, "grad_norm": 1.15625, "learning_rate": 0.00019117288418625802, "loss": 3.019, "step": 7262 }, { "epoch": 0.3400081924044707, "grad_norm": 0.9453125, "learning_rate": 0.00019117048609923595, "loss": 2.2352, "step": 7263 }, { "epoch": 0.34005500614430334, "grad_norm": 1.3125, "learning_rate": 0.00019116808770155573, "loss": 3.3485, "step": 7264 }, { "epoch": 0.340101819884136, "grad_norm": 1.765625, "learning_rate": 0.00019116568899322549, "loss": 3.2429, "step": 7265 }, { "epoch": 0.34014863362396863, "grad_norm": 1.1875, "learning_rate": 0.00019116328997425343, "loss": 3.7043, "step": 7266 }, { "epoch": 0.34019544736380125, "grad_norm": 1.34375, "learning_rate": 0.00019116089064464775, "loss": 3.2552, "step": 7267 }, { "epoch": 0.3402422611036339, "grad_norm": 1.3515625, "learning_rate": 0.0001911584910044166, "loss": 3.0062, "step": 7268 }, { "epoch": 0.34028907484346654, "grad_norm": 1.25, "learning_rate": 0.00019115609105356817, "loss": 3.3265, "step": 7269 }, { "epoch": 0.3403358885832992, "grad_norm": 1.1328125, "learning_rate": 0.00019115369079211058, "loss": 3.2448, "step": 7270 }, { "epoch": 0.34038270232313184, "grad_norm": 1.0703125, "learning_rate": 0.00019115129022005212, "loss": 3.1143, "step": 7271 }, { "epoch": 0.34042951606296445, "grad_norm": 1.3125, "learning_rate": 0.0001911488893374009, "loss": 2.9904, "step": 7272 }, { "epoch": 0.34047632980279713, "grad_norm": 1.3203125, "learning_rate": 0.00019114648814416507, "loss": 2.8531, "step": 7273 }, { "epoch": 0.34052314354262975, "grad_norm": 1.3359375, "learning_rate": 0.00019114408664035285, "loss": 3.4747, "step": 7274 }, { "epoch": 0.3405699572824624, "grad_norm": 1.328125, "learning_rate": 0.00019114168482597247, "loss": 2.7866, "step": 7275 }, { "epoch": 0.34061677102229504, "grad_norm": 1.453125, "learning_rate": 0.00019113928270103202, "loss": 3.4497, "step": 7276 }, { "epoch": 0.34066358476212766, "grad_norm": 1.28125, "learning_rate": 0.00019113688026553978, "loss": 4.1476, "step": 7277 }, { "epoch": 0.34071039850196033, "grad_norm": 1.6640625, "learning_rate": 0.00019113447751950384, "loss": 3.769, "step": 7278 }, { "epoch": 0.34075721224179295, "grad_norm": 1.1875, "learning_rate": 0.00019113207446293246, "loss": 3.1112, "step": 7279 }, { "epoch": 0.3408040259816256, "grad_norm": 1.515625, "learning_rate": 0.0001911296710958338, "loss": 3.1149, "step": 7280 }, { "epoch": 0.34085083972145824, "grad_norm": 1.734375, "learning_rate": 0.00019112726741821604, "loss": 3.6405, "step": 7281 }, { "epoch": 0.34089765346129086, "grad_norm": 1.2265625, "learning_rate": 0.00019112486343008742, "loss": 3.2337, "step": 7282 }, { "epoch": 0.34094446720112354, "grad_norm": 1.46875, "learning_rate": 0.00019112245913145606, "loss": 3.5103, "step": 7283 }, { "epoch": 0.34099128094095615, "grad_norm": 1.3984375, "learning_rate": 0.00019112005452233023, "loss": 2.815, "step": 7284 }, { "epoch": 0.34103809468078883, "grad_norm": 1.234375, "learning_rate": 0.000191117649602718, "loss": 3.0299, "step": 7285 }, { "epoch": 0.34108490842062145, "grad_norm": 1.3515625, "learning_rate": 0.0001911152443726277, "loss": 3.564, "step": 7286 }, { "epoch": 0.3411317221604541, "grad_norm": 1.078125, "learning_rate": 0.0001911128388320675, "loss": 2.9469, "step": 7287 }, { "epoch": 0.34117853590028674, "grad_norm": 1.3125, "learning_rate": 0.00019111043298104553, "loss": 3.1541, "step": 7288 }, { "epoch": 0.34122534964011936, "grad_norm": 1.453125, "learning_rate": 0.00019110802681957003, "loss": 3.4233, "step": 7289 }, { "epoch": 0.34127216337995203, "grad_norm": 1.3125, "learning_rate": 0.0001911056203476492, "loss": 3.2833, "step": 7290 }, { "epoch": 0.34131897711978465, "grad_norm": 1.21875, "learning_rate": 0.00019110321356529121, "loss": 3.0878, "step": 7291 }, { "epoch": 0.3413657908596173, "grad_norm": 1.5859375, "learning_rate": 0.00019110080647250432, "loss": 3.2992, "step": 7292 }, { "epoch": 0.34141260459944994, "grad_norm": 2.34375, "learning_rate": 0.00019109839906929668, "loss": 3.3569, "step": 7293 }, { "epoch": 0.34145941833928256, "grad_norm": 1.3515625, "learning_rate": 0.00019109599135567652, "loss": 3.5327, "step": 7294 }, { "epoch": 0.34150623207911524, "grad_norm": 1.2421875, "learning_rate": 0.00019109358333165203, "loss": 3.5574, "step": 7295 }, { "epoch": 0.34155304581894785, "grad_norm": 1.4609375, "learning_rate": 0.00019109117499723144, "loss": 3.115, "step": 7296 }, { "epoch": 0.3415998595587805, "grad_norm": 1.25, "learning_rate": 0.0001910887663524229, "loss": 3.2256, "step": 7297 }, { "epoch": 0.34164667329861315, "grad_norm": 1.3671875, "learning_rate": 0.00019108635739723467, "loss": 3.0767, "step": 7298 }, { "epoch": 0.34169348703844576, "grad_norm": 1.84375, "learning_rate": 0.00019108394813167496, "loss": 2.7011, "step": 7299 }, { "epoch": 0.34174030077827844, "grad_norm": 1.4140625, "learning_rate": 0.00019108153855575195, "loss": 3.4231, "step": 7300 }, { "epoch": 0.34178711451811106, "grad_norm": 1.6328125, "learning_rate": 0.00019107912866947387, "loss": 3.6385, "step": 7301 }, { "epoch": 0.34183392825794373, "grad_norm": 1.125, "learning_rate": 0.00019107671847284888, "loss": 3.0282, "step": 7302 }, { "epoch": 0.34188074199777635, "grad_norm": 1.4921875, "learning_rate": 0.0001910743079658853, "loss": 3.1311, "step": 7303 }, { "epoch": 0.34192755573760897, "grad_norm": 1.125, "learning_rate": 0.00019107189714859124, "loss": 2.9421, "step": 7304 }, { "epoch": 0.34197436947744164, "grad_norm": 1.421875, "learning_rate": 0.00019106948602097497, "loss": 3.0758, "step": 7305 }, { "epoch": 0.34202118321727426, "grad_norm": 1.40625, "learning_rate": 0.0001910670745830447, "loss": 3.0368, "step": 7306 }, { "epoch": 0.34206799695710693, "grad_norm": 1.484375, "learning_rate": 0.0001910646628348086, "loss": 3.5718, "step": 7307 }, { "epoch": 0.34211481069693955, "grad_norm": 1.484375, "learning_rate": 0.00019106225077627494, "loss": 3.0337, "step": 7308 }, { "epoch": 0.34216162443677217, "grad_norm": 1.28125, "learning_rate": 0.00019105983840745193, "loss": 3.2051, "step": 7309 }, { "epoch": 0.34220843817660485, "grad_norm": 1.609375, "learning_rate": 0.0001910574257283478, "loss": 2.9383, "step": 7310 }, { "epoch": 0.34225525191643746, "grad_norm": 1.3984375, "learning_rate": 0.00019105501273897072, "loss": 3.3491, "step": 7311 }, { "epoch": 0.34230206565627014, "grad_norm": 1.6171875, "learning_rate": 0.00019105259943932897, "loss": 3.2776, "step": 7312 }, { "epoch": 0.34234887939610276, "grad_norm": 1.609375, "learning_rate": 0.00019105018582943074, "loss": 3.6716, "step": 7313 }, { "epoch": 0.3423956931359354, "grad_norm": 5.5, "learning_rate": 0.00019104777190928428, "loss": 3.8394, "step": 7314 }, { "epoch": 0.34244250687576805, "grad_norm": 1.1953125, "learning_rate": 0.0001910453576788978, "loss": 3.2655, "step": 7315 }, { "epoch": 0.34248932061560067, "grad_norm": 1.5625, "learning_rate": 0.00019104294313827952, "loss": 3.4294, "step": 7316 }, { "epoch": 0.34253613435543334, "grad_norm": 1.2734375, "learning_rate": 0.00019104052828743768, "loss": 3.0375, "step": 7317 }, { "epoch": 0.34258294809526596, "grad_norm": 1.59375, "learning_rate": 0.00019103811312638048, "loss": 3.5951, "step": 7318 }, { "epoch": 0.3426297618350986, "grad_norm": 1.4609375, "learning_rate": 0.0001910356976551162, "loss": 3.6281, "step": 7319 }, { "epoch": 0.34267657557493125, "grad_norm": 1.2734375, "learning_rate": 0.000191033281873653, "loss": 3.279, "step": 7320 }, { "epoch": 0.34272338931476387, "grad_norm": 1.5078125, "learning_rate": 0.0001910308657819992, "loss": 3.1643, "step": 7321 }, { "epoch": 0.34277020305459655, "grad_norm": 1.21875, "learning_rate": 0.00019102844938016298, "loss": 4.7299, "step": 7322 }, { "epoch": 0.34281701679442916, "grad_norm": 1.5546875, "learning_rate": 0.00019102603266815256, "loss": 2.8409, "step": 7323 }, { "epoch": 0.3428638305342618, "grad_norm": 4.03125, "learning_rate": 0.0001910236156459762, "loss": 3.2265, "step": 7324 }, { "epoch": 0.34291064427409446, "grad_norm": 1.3671875, "learning_rate": 0.00019102119831364216, "loss": 3.3384, "step": 7325 }, { "epoch": 0.3429574580139271, "grad_norm": 0.97265625, "learning_rate": 0.00019101878067115862, "loss": 2.9134, "step": 7326 }, { "epoch": 0.34300427175375975, "grad_norm": 1.2109375, "learning_rate": 0.00019101636271853383, "loss": 2.8892, "step": 7327 }, { "epoch": 0.34305108549359237, "grad_norm": 1.6875, "learning_rate": 0.00019101394445577606, "loss": 3.1861, "step": 7328 }, { "epoch": 0.343097899233425, "grad_norm": 1.46875, "learning_rate": 0.00019101152588289354, "loss": 3.5454, "step": 7329 }, { "epoch": 0.34314471297325766, "grad_norm": 1.3046875, "learning_rate": 0.00019100910699989453, "loss": 2.9356, "step": 7330 }, { "epoch": 0.3431915267130903, "grad_norm": 1.390625, "learning_rate": 0.00019100668780678724, "loss": 3.4107, "step": 7331 }, { "epoch": 0.34323834045292295, "grad_norm": 1.421875, "learning_rate": 0.0001910042683035799, "loss": 3.447, "step": 7332 }, { "epoch": 0.34328515419275557, "grad_norm": 1.3125, "learning_rate": 0.00019100184849028077, "loss": 3.4586, "step": 7333 }, { "epoch": 0.3433319679325882, "grad_norm": 1.3984375, "learning_rate": 0.0001909994283668981, "loss": 2.9455, "step": 7334 }, { "epoch": 0.34337878167242086, "grad_norm": 1.8359375, "learning_rate": 0.0001909970079334402, "loss": 2.232, "step": 7335 }, { "epoch": 0.3434255954122535, "grad_norm": 1.3125, "learning_rate": 0.00019099458718991518, "loss": 3.4241, "step": 7336 }, { "epoch": 0.34347240915208616, "grad_norm": 1.1875, "learning_rate": 0.00019099216613633138, "loss": 3.0746, "step": 7337 }, { "epoch": 0.3435192228919188, "grad_norm": 1.6953125, "learning_rate": 0.00019098974477269704, "loss": 3.3347, "step": 7338 }, { "epoch": 0.3435660366317514, "grad_norm": 1.3125, "learning_rate": 0.0001909873230990204, "loss": 3.2433, "step": 7339 }, { "epoch": 0.34361285037158407, "grad_norm": 1.1953125, "learning_rate": 0.00019098490111530976, "loss": 3.1341, "step": 7340 }, { "epoch": 0.3436596641114167, "grad_norm": 1.265625, "learning_rate": 0.00019098247882157326, "loss": 3.5144, "step": 7341 }, { "epoch": 0.34370647785124936, "grad_norm": 1.46875, "learning_rate": 0.00019098005621781926, "loss": 3.2418, "step": 7342 }, { "epoch": 0.343753291591082, "grad_norm": 1.2109375, "learning_rate": 0.000190977633304056, "loss": 3.4594, "step": 7343 }, { "epoch": 0.3438001053309146, "grad_norm": 1.6953125, "learning_rate": 0.0001909752100802917, "loss": 3.4402, "step": 7344 }, { "epoch": 0.34384691907074727, "grad_norm": 1.546875, "learning_rate": 0.00019097278654653458, "loss": 3.2237, "step": 7345 }, { "epoch": 0.3438937328105799, "grad_norm": 1.6015625, "learning_rate": 0.000190970362702793, "loss": 3.1704, "step": 7346 }, { "epoch": 0.34394054655041256, "grad_norm": 1.59375, "learning_rate": 0.00019096793854907516, "loss": 3.4016, "step": 7347 }, { "epoch": 0.3439873602902452, "grad_norm": 1.3203125, "learning_rate": 0.00019096551408538933, "loss": 2.7183, "step": 7348 }, { "epoch": 0.34403417403007786, "grad_norm": 1.5625, "learning_rate": 0.00019096308931174373, "loss": 3.0385, "step": 7349 }, { "epoch": 0.3440809877699105, "grad_norm": 1.34375, "learning_rate": 0.0001909606642281467, "loss": 3.3809, "step": 7350 }, { "epoch": 0.3441278015097431, "grad_norm": 1.2578125, "learning_rate": 0.00019095823883460647, "loss": 3.2249, "step": 7351 }, { "epoch": 0.34417461524957577, "grad_norm": 1.59375, "learning_rate": 0.00019095581313113128, "loss": 3.3031, "step": 7352 }, { "epoch": 0.3442214289894084, "grad_norm": 1.3125, "learning_rate": 0.00019095338711772942, "loss": 3.2323, "step": 7353 }, { "epoch": 0.34426824272924106, "grad_norm": 1.4921875, "learning_rate": 0.00019095096079440915, "loss": 3.2367, "step": 7354 }, { "epoch": 0.3443150564690737, "grad_norm": 1.484375, "learning_rate": 0.00019094853416117872, "loss": 2.9211, "step": 7355 }, { "epoch": 0.3443618702089063, "grad_norm": 1.390625, "learning_rate": 0.00019094610721804642, "loss": 2.8274, "step": 7356 }, { "epoch": 0.34440868394873897, "grad_norm": 1.5078125, "learning_rate": 0.00019094367996502057, "loss": 3.3483, "step": 7357 }, { "epoch": 0.3444554976885716, "grad_norm": 2.125, "learning_rate": 0.00019094125240210933, "loss": 3.1762, "step": 7358 }, { "epoch": 0.34450231142840426, "grad_norm": 1.4921875, "learning_rate": 0.00019093882452932105, "loss": 3.2506, "step": 7359 }, { "epoch": 0.3445491251682369, "grad_norm": 1.1796875, "learning_rate": 0.000190936396346664, "loss": 3.3252, "step": 7360 }, { "epoch": 0.3445959389080695, "grad_norm": 1.2734375, "learning_rate": 0.0001909339678541464, "loss": 3.4002, "step": 7361 }, { "epoch": 0.3446427526479022, "grad_norm": 1.4921875, "learning_rate": 0.00019093153905177656, "loss": 3.5473, "step": 7362 }, { "epoch": 0.3446895663877348, "grad_norm": 1.28125, "learning_rate": 0.0001909291099395628, "loss": 3.1507, "step": 7363 }, { "epoch": 0.34473638012756747, "grad_norm": 1.453125, "learning_rate": 0.0001909266805175133, "loss": 3.0528, "step": 7364 }, { "epoch": 0.3447831938674001, "grad_norm": 1.515625, "learning_rate": 0.0001909242507856364, "loss": 3.2147, "step": 7365 }, { "epoch": 0.3448300076072327, "grad_norm": 1.25, "learning_rate": 0.0001909218207439404, "loss": 2.9774, "step": 7366 }, { "epoch": 0.3448768213470654, "grad_norm": 1.171875, "learning_rate": 0.00019091939039243356, "loss": 3.1913, "step": 7367 }, { "epoch": 0.344923635086898, "grad_norm": 1.5625, "learning_rate": 0.00019091695973112412, "loss": 3.0922, "step": 7368 }, { "epoch": 0.34497044882673067, "grad_norm": 1.40625, "learning_rate": 0.0001909145287600204, "loss": 3.2398, "step": 7369 }, { "epoch": 0.3450172625665633, "grad_norm": 1.671875, "learning_rate": 0.0001909120974791307, "loss": 3.3357, "step": 7370 }, { "epoch": 0.3450640763063959, "grad_norm": 1.484375, "learning_rate": 0.00019090966588846325, "loss": 3.0663, "step": 7371 }, { "epoch": 0.3451108900462286, "grad_norm": 1.171875, "learning_rate": 0.00019090723398802642, "loss": 3.4528, "step": 7372 }, { "epoch": 0.3451577037860612, "grad_norm": 1.7890625, "learning_rate": 0.0001909048017778284, "loss": 3.6095, "step": 7373 }, { "epoch": 0.3452045175258939, "grad_norm": 1.5234375, "learning_rate": 0.0001909023692578775, "loss": 3.0622, "step": 7374 }, { "epoch": 0.3452513312657265, "grad_norm": 1.390625, "learning_rate": 0.00019089993642818208, "loss": 3.5801, "step": 7375 }, { "epoch": 0.3452981450055591, "grad_norm": 1.40625, "learning_rate": 0.00019089750328875037, "loss": 3.2061, "step": 7376 }, { "epoch": 0.3453449587453918, "grad_norm": 1.3046875, "learning_rate": 0.0001908950698395907, "loss": 3.4643, "step": 7377 }, { "epoch": 0.3453917724852244, "grad_norm": 1.7109375, "learning_rate": 0.00019089263608071128, "loss": 3.0577, "step": 7378 }, { "epoch": 0.3454385862250571, "grad_norm": 1.3359375, "learning_rate": 0.00019089020201212048, "loss": 3.2263, "step": 7379 }, { "epoch": 0.3454853999648897, "grad_norm": 1.4296875, "learning_rate": 0.00019088776763382658, "loss": 3.4487, "step": 7380 }, { "epoch": 0.3455322137047223, "grad_norm": 1.2734375, "learning_rate": 0.00019088533294583782, "loss": 3.0887, "step": 7381 }, { "epoch": 0.345579027444555, "grad_norm": 1.375, "learning_rate": 0.0001908828979481626, "loss": 3.5153, "step": 7382 }, { "epoch": 0.3456258411843876, "grad_norm": 1.046875, "learning_rate": 0.00019088046264080911, "loss": 3.3452, "step": 7383 }, { "epoch": 0.3456726549242203, "grad_norm": 2.03125, "learning_rate": 0.00019087802702378575, "loss": 4.3566, "step": 7384 }, { "epoch": 0.3457194686640529, "grad_norm": 1.34375, "learning_rate": 0.00019087559109710073, "loss": 3.0145, "step": 7385 }, { "epoch": 0.3457662824038855, "grad_norm": 1.21875, "learning_rate": 0.00019087315486076238, "loss": 3.2138, "step": 7386 }, { "epoch": 0.3458130961437182, "grad_norm": 1.6640625, "learning_rate": 0.00019087071831477904, "loss": 2.9387, "step": 7387 }, { "epoch": 0.3458599098835508, "grad_norm": 1.6328125, "learning_rate": 0.00019086828145915895, "loss": 3.2112, "step": 7388 }, { "epoch": 0.3459067236233835, "grad_norm": 1.9375, "learning_rate": 0.00019086584429391044, "loss": 3.7595, "step": 7389 }, { "epoch": 0.3459535373632161, "grad_norm": 1.203125, "learning_rate": 0.00019086340681904185, "loss": 2.8633, "step": 7390 }, { "epoch": 0.3460003511030487, "grad_norm": 1.3828125, "learning_rate": 0.00019086096903456144, "loss": 2.849, "step": 7391 }, { "epoch": 0.3460471648428814, "grad_norm": 1.4296875, "learning_rate": 0.00019085853094047753, "loss": 3.2255, "step": 7392 }, { "epoch": 0.346093978582714, "grad_norm": 1.359375, "learning_rate": 0.00019085609253679842, "loss": 3.3779, "step": 7393 }, { "epoch": 0.3461407923225467, "grad_norm": 1.421875, "learning_rate": 0.00019085365382353242, "loss": 3.079, "step": 7394 }, { "epoch": 0.3461876060623793, "grad_norm": 1.15625, "learning_rate": 0.00019085121480068786, "loss": 3.1365, "step": 7395 }, { "epoch": 0.3462344198022119, "grad_norm": 1.7890625, "learning_rate": 0.000190848775468273, "loss": 3.4261, "step": 7396 }, { "epoch": 0.3462812335420446, "grad_norm": 1.4921875, "learning_rate": 0.00019084633582629626, "loss": 2.955, "step": 7397 }, { "epoch": 0.3463280472818772, "grad_norm": 1.6328125, "learning_rate": 0.00019084389587476584, "loss": 3.0351, "step": 7398 }, { "epoch": 0.3463748610217099, "grad_norm": 1.765625, "learning_rate": 0.00019084145561369008, "loss": 3.5329, "step": 7399 }, { "epoch": 0.3464216747615425, "grad_norm": 1.140625, "learning_rate": 0.00019083901504307735, "loss": 2.9394, "step": 7400 }, { "epoch": 0.3464684885013751, "grad_norm": 1.65625, "learning_rate": 0.0001908365741629359, "loss": 3.3633, "step": 7401 }, { "epoch": 0.3465153022412078, "grad_norm": 1.1328125, "learning_rate": 0.00019083413297327407, "loss": 3.1727, "step": 7402 }, { "epoch": 0.3465621159810404, "grad_norm": 1.671875, "learning_rate": 0.0001908316914741002, "loss": 3.4444, "step": 7403 }, { "epoch": 0.3466089297208731, "grad_norm": 1.8828125, "learning_rate": 0.00019082924966542258, "loss": 3.5208, "step": 7404 }, { "epoch": 0.3466557434607057, "grad_norm": 1.53125, "learning_rate": 0.00019082680754724956, "loss": 3.2594, "step": 7405 }, { "epoch": 0.34670255720053833, "grad_norm": 1.25, "learning_rate": 0.00019082436511958943, "loss": 3.3576, "step": 7406 }, { "epoch": 0.346749370940371, "grad_norm": 2.15625, "learning_rate": 0.0001908219223824505, "loss": 3.6197, "step": 7407 }, { "epoch": 0.3467961846802036, "grad_norm": 1.2421875, "learning_rate": 0.00019081947933584118, "loss": 3.2993, "step": 7408 }, { "epoch": 0.3468429984200363, "grad_norm": 1.703125, "learning_rate": 0.00019081703597976967, "loss": 3.2281, "step": 7409 }, { "epoch": 0.3468898121598689, "grad_norm": 1.1875, "learning_rate": 0.00019081459231424442, "loss": 3.2819, "step": 7410 }, { "epoch": 0.3469366258997016, "grad_norm": 1.40625, "learning_rate": 0.00019081214833927365, "loss": 2.9379, "step": 7411 }, { "epoch": 0.3469834396395342, "grad_norm": 1.3671875, "learning_rate": 0.00019080970405486577, "loss": 3.1013, "step": 7412 }, { "epoch": 0.3470302533793668, "grad_norm": 1.203125, "learning_rate": 0.00019080725946102903, "loss": 3.468, "step": 7413 }, { "epoch": 0.3470770671191995, "grad_norm": 1.8359375, "learning_rate": 0.0001908048145577718, "loss": 3.5965, "step": 7414 }, { "epoch": 0.3471238808590321, "grad_norm": 1.2109375, "learning_rate": 0.00019080236934510247, "loss": 3.1563, "step": 7415 }, { "epoch": 0.3471706945988648, "grad_norm": 1.546875, "learning_rate": 0.00019079992382302927, "loss": 3.7924, "step": 7416 }, { "epoch": 0.3472175083386974, "grad_norm": 1.296875, "learning_rate": 0.00019079747799156057, "loss": 3.7308, "step": 7417 }, { "epoch": 0.34726432207853003, "grad_norm": 1.5234375, "learning_rate": 0.00019079503185070475, "loss": 3.5212, "step": 7418 }, { "epoch": 0.3473111358183627, "grad_norm": 1.25, "learning_rate": 0.00019079258540047008, "loss": 3.4324, "step": 7419 }, { "epoch": 0.3473579495581953, "grad_norm": 1.40625, "learning_rate": 0.00019079013864086494, "loss": 3.1092, "step": 7420 }, { "epoch": 0.347404763298028, "grad_norm": 2.046875, "learning_rate": 0.00019078769157189764, "loss": 3.7254, "step": 7421 }, { "epoch": 0.3474515770378606, "grad_norm": 1.7109375, "learning_rate": 0.00019078524419357653, "loss": 3.4323, "step": 7422 }, { "epoch": 0.34749839077769323, "grad_norm": 1.53125, "learning_rate": 0.00019078279650590997, "loss": 3.6133, "step": 7423 }, { "epoch": 0.3475452045175259, "grad_norm": 1.46875, "learning_rate": 0.00019078034850890624, "loss": 2.8305, "step": 7424 }, { "epoch": 0.3475920182573585, "grad_norm": 1.21875, "learning_rate": 0.00019077790020257375, "loss": 3.0091, "step": 7425 }, { "epoch": 0.3476388319971912, "grad_norm": 1.1796875, "learning_rate": 0.0001907754515869208, "loss": 3.1157, "step": 7426 }, { "epoch": 0.3476856457370238, "grad_norm": 1.28125, "learning_rate": 0.00019077300266195574, "loss": 3.0214, "step": 7427 }, { "epoch": 0.34773245947685644, "grad_norm": 1.328125, "learning_rate": 0.00019077055342768692, "loss": 3.2198, "step": 7428 }, { "epoch": 0.3477792732166891, "grad_norm": 1.40625, "learning_rate": 0.0001907681038841227, "loss": 3.6922, "step": 7429 }, { "epoch": 0.34782608695652173, "grad_norm": 1.7109375, "learning_rate": 0.0001907656540312714, "loss": 3.4326, "step": 7430 }, { "epoch": 0.3478729006963544, "grad_norm": 1.5, "learning_rate": 0.00019076320386914137, "loss": 3.5007, "step": 7431 }, { "epoch": 0.347919714436187, "grad_norm": 1.21875, "learning_rate": 0.000190760753397741, "loss": 2.8544, "step": 7432 }, { "epoch": 0.34796652817601964, "grad_norm": 1.3125, "learning_rate": 0.00019075830261707858, "loss": 3.186, "step": 7433 }, { "epoch": 0.3480133419158523, "grad_norm": 1.4296875, "learning_rate": 0.0001907558515271625, "loss": 3.0943, "step": 7434 }, { "epoch": 0.34806015565568493, "grad_norm": 1.328125, "learning_rate": 0.00019075340012800112, "loss": 3.1111, "step": 7435 }, { "epoch": 0.3481069693955176, "grad_norm": 1.1171875, "learning_rate": 0.00019075094841960276, "loss": 2.7715, "step": 7436 }, { "epoch": 0.3481537831353502, "grad_norm": 1.078125, "learning_rate": 0.00019074849640197578, "loss": 3.5324, "step": 7437 }, { "epoch": 0.34820059687518284, "grad_norm": 1.3515625, "learning_rate": 0.00019074604407512855, "loss": 3.2385, "step": 7438 }, { "epoch": 0.3482474106150155, "grad_norm": 1.3515625, "learning_rate": 0.00019074359143906943, "loss": 3.4291, "step": 7439 }, { "epoch": 0.34829422435484814, "grad_norm": 1.2109375, "learning_rate": 0.00019074113849380675, "loss": 2.6544, "step": 7440 }, { "epoch": 0.3483410380946808, "grad_norm": 1.3046875, "learning_rate": 0.0001907386852393489, "loss": 3.2574, "step": 7441 }, { "epoch": 0.34838785183451343, "grad_norm": 1.2265625, "learning_rate": 0.00019073623167570423, "loss": 2.823, "step": 7442 }, { "epoch": 0.34843466557434605, "grad_norm": 1.140625, "learning_rate": 0.00019073377780288108, "loss": 2.9402, "step": 7443 }, { "epoch": 0.3484814793141787, "grad_norm": 1.328125, "learning_rate": 0.00019073132362088785, "loss": 3.4137, "step": 7444 }, { "epoch": 0.34852829305401134, "grad_norm": 1.34375, "learning_rate": 0.00019072886912973284, "loss": 3.209, "step": 7445 }, { "epoch": 0.348575106793844, "grad_norm": 1.1875, "learning_rate": 0.00019072641432942448, "loss": 3.801, "step": 7446 }, { "epoch": 0.34862192053367663, "grad_norm": 1.5234375, "learning_rate": 0.0001907239592199711, "loss": 3.013, "step": 7447 }, { "epoch": 0.34866873427350925, "grad_norm": 1.3828125, "learning_rate": 0.00019072150380138107, "loss": 3.1607, "step": 7448 }, { "epoch": 0.3487155480133419, "grad_norm": 1.7578125, "learning_rate": 0.00019071904807366278, "loss": 3.5306, "step": 7449 }, { "epoch": 0.34876236175317454, "grad_norm": 1.546875, "learning_rate": 0.00019071659203682458, "loss": 3.6455, "step": 7450 }, { "epoch": 0.3488091754930072, "grad_norm": 1.4296875, "learning_rate": 0.0001907141356908748, "loss": 3.086, "step": 7451 }, { "epoch": 0.34885598923283984, "grad_norm": 1.4296875, "learning_rate": 0.00019071167903582186, "loss": 2.6271, "step": 7452 }, { "epoch": 0.34890280297267245, "grad_norm": 1.25, "learning_rate": 0.00019070922207167413, "loss": 3.2293, "step": 7453 }, { "epoch": 0.34894961671250513, "grad_norm": 1.953125, "learning_rate": 0.00019070676479843995, "loss": 2.1173, "step": 7454 }, { "epoch": 0.34899643045233775, "grad_norm": 1.6328125, "learning_rate": 0.00019070430721612774, "loss": 3.3636, "step": 7455 }, { "epoch": 0.3490432441921704, "grad_norm": 1.2734375, "learning_rate": 0.00019070184932474583, "loss": 3.1924, "step": 7456 }, { "epoch": 0.34909005793200304, "grad_norm": 1.34375, "learning_rate": 0.0001906993911243026, "loss": 3.2538, "step": 7457 }, { "epoch": 0.34913687167183566, "grad_norm": 1.1953125, "learning_rate": 0.00019069693261480645, "loss": 3.3349, "step": 7458 }, { "epoch": 0.34918368541166833, "grad_norm": 1.1953125, "learning_rate": 0.00019069447379626574, "loss": 3.1489, "step": 7459 }, { "epoch": 0.34923049915150095, "grad_norm": 1.0390625, "learning_rate": 0.00019069201466868885, "loss": 2.9016, "step": 7460 }, { "epoch": 0.3492773128913336, "grad_norm": 1.5703125, "learning_rate": 0.00019068955523208415, "loss": 3.4496, "step": 7461 }, { "epoch": 0.34932412663116624, "grad_norm": 1.4296875, "learning_rate": 0.0001906870954864601, "loss": 3.4825, "step": 7462 }, { "epoch": 0.34937094037099886, "grad_norm": 1.078125, "learning_rate": 0.00019068463543182493, "loss": 3.4803, "step": 7463 }, { "epoch": 0.34941775411083154, "grad_norm": 2.28125, "learning_rate": 0.00019068217506818715, "loss": 3.0451, "step": 7464 }, { "epoch": 0.34946456785066415, "grad_norm": 1.328125, "learning_rate": 0.00019067971439555508, "loss": 3.037, "step": 7465 }, { "epoch": 0.34951138159049683, "grad_norm": 1.078125, "learning_rate": 0.00019067725341393715, "loss": 2.372, "step": 7466 }, { "epoch": 0.34955819533032945, "grad_norm": 1.5078125, "learning_rate": 0.0001906747921233417, "loss": 3.5208, "step": 7467 }, { "epoch": 0.34960500907016206, "grad_norm": 1.296875, "learning_rate": 0.00019067233052377715, "loss": 3.224, "step": 7468 }, { "epoch": 0.34965182280999474, "grad_norm": 1.21875, "learning_rate": 0.00019066986861525186, "loss": 2.9418, "step": 7469 }, { "epoch": 0.34969863654982736, "grad_norm": 2.296875, "learning_rate": 0.00019066740639777426, "loss": 3.158, "step": 7470 }, { "epoch": 0.34974545028966003, "grad_norm": 2.03125, "learning_rate": 0.00019066494387135266, "loss": 3.208, "step": 7471 }, { "epoch": 0.34979226402949265, "grad_norm": 1.5390625, "learning_rate": 0.00019066248103599557, "loss": 3.3573, "step": 7472 }, { "epoch": 0.3498390777693253, "grad_norm": 1.4375, "learning_rate": 0.00019066001789171127, "loss": 3.2343, "step": 7473 }, { "epoch": 0.34988589150915794, "grad_norm": 1.8125, "learning_rate": 0.0001906575544385082, "loss": 3.3873, "step": 7474 }, { "epoch": 0.34993270524899056, "grad_norm": 1.1796875, "learning_rate": 0.0001906550906763948, "loss": 2.5469, "step": 7475 }, { "epoch": 0.34997951898882323, "grad_norm": 1.2109375, "learning_rate": 0.00019065262660537938, "loss": 3.2808, "step": 7476 }, { "epoch": 0.35002633272865585, "grad_norm": 1.203125, "learning_rate": 0.0001906501622254704, "loss": 2.9911, "step": 7477 }, { "epoch": 0.3500731464684885, "grad_norm": 1.203125, "learning_rate": 0.0001906476975366762, "loss": 3.4299, "step": 7478 }, { "epoch": 0.35011996020832115, "grad_norm": 1.296875, "learning_rate": 0.00019064523253900522, "loss": 2.9821, "step": 7479 }, { "epoch": 0.35016677394815376, "grad_norm": 1.4296875, "learning_rate": 0.00019064276723246585, "loss": 3.6771, "step": 7480 }, { "epoch": 0.35021358768798644, "grad_norm": 2.296875, "learning_rate": 0.0001906403016170665, "loss": 2.9349, "step": 7481 }, { "epoch": 0.35026040142781906, "grad_norm": 1.171875, "learning_rate": 0.00019063783569281558, "loss": 3.3114, "step": 7482 }, { "epoch": 0.35030721516765173, "grad_norm": 1.640625, "learning_rate": 0.00019063536945972142, "loss": 3.1889, "step": 7483 }, { "epoch": 0.35035402890748435, "grad_norm": 1.84375, "learning_rate": 0.00019063290291779252, "loss": 3.5312, "step": 7484 }, { "epoch": 0.35040084264731697, "grad_norm": 1.4296875, "learning_rate": 0.00019063043606703723, "loss": 3.3793, "step": 7485 }, { "epoch": 0.35044765638714964, "grad_norm": 1.2890625, "learning_rate": 0.00019062796890746396, "loss": 3.8002, "step": 7486 }, { "epoch": 0.35049447012698226, "grad_norm": 1.7734375, "learning_rate": 0.00019062550143908116, "loss": 3.1624, "step": 7487 }, { "epoch": 0.35054128386681493, "grad_norm": 1.1875, "learning_rate": 0.00019062303366189717, "loss": 2.2421, "step": 7488 }, { "epoch": 0.35058809760664755, "grad_norm": 1.1953125, "learning_rate": 0.00019062056557592042, "loss": 3.6496, "step": 7489 }, { "epoch": 0.35063491134648017, "grad_norm": 1.265625, "learning_rate": 0.00019061809718115934, "loss": 3.08, "step": 7490 }, { "epoch": 0.35068172508631285, "grad_norm": 1.6171875, "learning_rate": 0.00019061562847762236, "loss": 3.3013, "step": 7491 }, { "epoch": 0.35072853882614546, "grad_norm": 1.4140625, "learning_rate": 0.00019061315946531784, "loss": 3.1529, "step": 7492 }, { "epoch": 0.35077535256597814, "grad_norm": 1.2578125, "learning_rate": 0.0001906106901442542, "loss": 3.2188, "step": 7493 }, { "epoch": 0.35082216630581076, "grad_norm": 1.2421875, "learning_rate": 0.0001906082205144399, "loss": 3.6302, "step": 7494 }, { "epoch": 0.3508689800456434, "grad_norm": 1.53125, "learning_rate": 0.00019060575057588332, "loss": 3.0574, "step": 7495 }, { "epoch": 0.35091579378547605, "grad_norm": 1.171875, "learning_rate": 0.00019060328032859288, "loss": 3.1079, "step": 7496 }, { "epoch": 0.35096260752530867, "grad_norm": 1.109375, "learning_rate": 0.00019060080977257698, "loss": 2.3996, "step": 7497 }, { "epoch": 0.35100942126514134, "grad_norm": 1.390625, "learning_rate": 0.00019059833890784408, "loss": 3.1846, "step": 7498 }, { "epoch": 0.35105623500497396, "grad_norm": 2.65625, "learning_rate": 0.0001905958677344026, "loss": 2.5825, "step": 7499 }, { "epoch": 0.3511030487448066, "grad_norm": 1.2890625, "learning_rate": 0.0001905933962522609, "loss": 3.1172, "step": 7500 }, { "epoch": 0.35114986248463925, "grad_norm": 1.984375, "learning_rate": 0.00019059092446142744, "loss": 3.1953, "step": 7501 }, { "epoch": 0.35119667622447187, "grad_norm": 1.15625, "learning_rate": 0.00019058845236191065, "loss": 2.925, "step": 7502 }, { "epoch": 0.35124348996430454, "grad_norm": 1.7890625, "learning_rate": 0.00019058597995371895, "loss": 3.3323, "step": 7503 }, { "epoch": 0.35129030370413716, "grad_norm": 1.0390625, "learning_rate": 0.00019058350723686072, "loss": 3.0963, "step": 7504 }, { "epoch": 0.3513371174439698, "grad_norm": 2.125, "learning_rate": 0.0001905810342113445, "loss": 3.3327, "step": 7505 }, { "epoch": 0.35138393118380246, "grad_norm": 1.453125, "learning_rate": 0.00019057856087717858, "loss": 3.1518, "step": 7506 }, { "epoch": 0.3514307449236351, "grad_norm": 1.1953125, "learning_rate": 0.00019057608723437147, "loss": 3.0777, "step": 7507 }, { "epoch": 0.35147755866346775, "grad_norm": 3.75, "learning_rate": 0.00019057361328293158, "loss": 3.5743, "step": 7508 }, { "epoch": 0.35152437240330037, "grad_norm": 2.671875, "learning_rate": 0.00019057113902286733, "loss": 2.7612, "step": 7509 }, { "epoch": 0.351571186143133, "grad_norm": 1.484375, "learning_rate": 0.00019056866445418718, "loss": 3.4375, "step": 7510 }, { "epoch": 0.35161799988296566, "grad_norm": 1.3828125, "learning_rate": 0.00019056618957689954, "loss": 3.0167, "step": 7511 }, { "epoch": 0.3516648136227983, "grad_norm": 1.0703125, "learning_rate": 0.0001905637143910128, "loss": 2.4435, "step": 7512 }, { "epoch": 0.35171162736263095, "grad_norm": 1.5078125, "learning_rate": 0.00019056123889653548, "loss": 3.1842, "step": 7513 }, { "epoch": 0.35175844110246357, "grad_norm": 1.46875, "learning_rate": 0.00019055876309347598, "loss": 2.9346, "step": 7514 }, { "epoch": 0.3518052548422962, "grad_norm": 1.5625, "learning_rate": 0.00019055628698184268, "loss": 3.3314, "step": 7515 }, { "epoch": 0.35185206858212886, "grad_norm": 1.359375, "learning_rate": 0.0001905538105616441, "loss": 4.0267, "step": 7516 }, { "epoch": 0.3518988823219615, "grad_norm": 1.1875, "learning_rate": 0.00019055133383288867, "loss": 3.0195, "step": 7517 }, { "epoch": 0.35194569606179416, "grad_norm": 1.28125, "learning_rate": 0.00019054885679558478, "loss": 3.514, "step": 7518 }, { "epoch": 0.3519925098016268, "grad_norm": 2.5, "learning_rate": 0.0001905463794497409, "loss": 4.0663, "step": 7519 }, { "epoch": 0.3520393235414594, "grad_norm": 1.2890625, "learning_rate": 0.00019054390179536546, "loss": 3.2081, "step": 7520 }, { "epoch": 0.35208613728129207, "grad_norm": 1.546875, "learning_rate": 0.0001905414238324669, "loss": 3.0619, "step": 7521 }, { "epoch": 0.3521329510211247, "grad_norm": 1.4453125, "learning_rate": 0.00019053894556105366, "loss": 2.9869, "step": 7522 }, { "epoch": 0.35217976476095736, "grad_norm": 2.0, "learning_rate": 0.0001905364669811342, "loss": 3.4291, "step": 7523 }, { "epoch": 0.35222657850079, "grad_norm": 1.1328125, "learning_rate": 0.000190533988092717, "loss": 3.6066, "step": 7524 }, { "epoch": 0.3522733922406226, "grad_norm": 1.2421875, "learning_rate": 0.00019053150889581044, "loss": 3.2074, "step": 7525 }, { "epoch": 0.35232020598045527, "grad_norm": 1.5078125, "learning_rate": 0.00019052902939042298, "loss": 3.8036, "step": 7526 }, { "epoch": 0.3523670197202879, "grad_norm": 1.609375, "learning_rate": 0.00019052654957656313, "loss": 2.8747, "step": 7527 }, { "epoch": 0.35241383346012056, "grad_norm": 1.8671875, "learning_rate": 0.00019052406945423926, "loss": 3.6719, "step": 7528 }, { "epoch": 0.3524606471999532, "grad_norm": 0.97265625, "learning_rate": 0.00019052158902345986, "loss": 1.6576, "step": 7529 }, { "epoch": 0.3525074609397858, "grad_norm": 1.3828125, "learning_rate": 0.00019051910828423338, "loss": 2.9681, "step": 7530 }, { "epoch": 0.3525542746796185, "grad_norm": 1.1171875, "learning_rate": 0.0001905166272365683, "loss": 2.6872, "step": 7531 }, { "epoch": 0.3526010884194511, "grad_norm": 1.5, "learning_rate": 0.00019051414588047304, "loss": 2.8409, "step": 7532 }, { "epoch": 0.35264790215928377, "grad_norm": 1.390625, "learning_rate": 0.000190511664215956, "loss": 3.3637, "step": 7533 }, { "epoch": 0.3526947158991164, "grad_norm": 1.2109375, "learning_rate": 0.00019050918224302574, "loss": 3.0687, "step": 7534 }, { "epoch": 0.35274152963894906, "grad_norm": 1.703125, "learning_rate": 0.0001905066999616907, "loss": 2.812, "step": 7535 }, { "epoch": 0.3527883433787817, "grad_norm": 1.3828125, "learning_rate": 0.00019050421737195925, "loss": 2.9755, "step": 7536 }, { "epoch": 0.3528351571186143, "grad_norm": 1.4765625, "learning_rate": 0.00019050173447383994, "loss": 3.4446, "step": 7537 }, { "epoch": 0.35288197085844697, "grad_norm": 1.609375, "learning_rate": 0.0001904992512673412, "loss": 3.8587, "step": 7538 }, { "epoch": 0.3529287845982796, "grad_norm": 1.6640625, "learning_rate": 0.00019049676775247153, "loss": 3.6532, "step": 7539 }, { "epoch": 0.35297559833811226, "grad_norm": 1.3359375, "learning_rate": 0.0001904942839292393, "loss": 3.3321, "step": 7540 }, { "epoch": 0.3530224120779449, "grad_norm": 1.46875, "learning_rate": 0.00019049179979765304, "loss": 2.6937, "step": 7541 }, { "epoch": 0.3530692258177775, "grad_norm": 1.3515625, "learning_rate": 0.0001904893153577212, "loss": 3.0023, "step": 7542 }, { "epoch": 0.3531160395576102, "grad_norm": 1.3046875, "learning_rate": 0.00019048683060945228, "loss": 3.7012, "step": 7543 }, { "epoch": 0.3531628532974428, "grad_norm": 1.4375, "learning_rate": 0.0001904843455528547, "loss": 3.2669, "step": 7544 }, { "epoch": 0.35320966703727547, "grad_norm": 1.171875, "learning_rate": 0.00019048186018793693, "loss": 3.1263, "step": 7545 }, { "epoch": 0.3532564807771081, "grad_norm": 1.4140625, "learning_rate": 0.00019047937451470746, "loss": 3.3819, "step": 7546 }, { "epoch": 0.3533032945169407, "grad_norm": 1.390625, "learning_rate": 0.00019047688853317474, "loss": 3.1008, "step": 7547 }, { "epoch": 0.3533501082567734, "grad_norm": 1.9453125, "learning_rate": 0.00019047440224334724, "loss": 3.1816, "step": 7548 }, { "epoch": 0.353396921996606, "grad_norm": 1.453125, "learning_rate": 0.0001904719156452335, "loss": 3.5364, "step": 7549 }, { "epoch": 0.35344373573643867, "grad_norm": 1.4921875, "learning_rate": 0.0001904694287388419, "loss": 2.8837, "step": 7550 }, { "epoch": 0.3534905494762713, "grad_norm": 1.65625, "learning_rate": 0.00019046694152418097, "loss": 3.1001, "step": 7551 }, { "epoch": 0.3535373632161039, "grad_norm": 1.4140625, "learning_rate": 0.00019046445400125914, "loss": 3.1664, "step": 7552 }, { "epoch": 0.3535841769559366, "grad_norm": 1.3515625, "learning_rate": 0.00019046196617008491, "loss": 3.3678, "step": 7553 }, { "epoch": 0.3536309906957692, "grad_norm": 1.171875, "learning_rate": 0.00019045947803066678, "loss": 3.4842, "step": 7554 }, { "epoch": 0.3536778044356019, "grad_norm": 1.421875, "learning_rate": 0.0001904569895830132, "loss": 2.9526, "step": 7555 }, { "epoch": 0.3537246181754345, "grad_norm": 1.734375, "learning_rate": 0.00019045450082713264, "loss": 3.4631, "step": 7556 }, { "epoch": 0.3537714319152671, "grad_norm": 1.9140625, "learning_rate": 0.00019045201176303363, "loss": 3.1636, "step": 7557 }, { "epoch": 0.3538182456550998, "grad_norm": 1.46875, "learning_rate": 0.0001904495223907246, "loss": 3.2011, "step": 7558 }, { "epoch": 0.3538650593949324, "grad_norm": 1.2109375, "learning_rate": 0.00019044703271021404, "loss": 3.2386, "step": 7559 }, { "epoch": 0.3539118731347651, "grad_norm": 1.3515625, "learning_rate": 0.00019044454272151045, "loss": 3.245, "step": 7560 }, { "epoch": 0.3539586868745977, "grad_norm": 1.140625, "learning_rate": 0.00019044205242462232, "loss": 2.7146, "step": 7561 }, { "epoch": 0.3540055006144303, "grad_norm": 1.3671875, "learning_rate": 0.00019043956181955812, "loss": 3.0323, "step": 7562 }, { "epoch": 0.354052314354263, "grad_norm": 1.2265625, "learning_rate": 0.00019043707090632634, "loss": 3.282, "step": 7563 }, { "epoch": 0.3540991280940956, "grad_norm": 1.515625, "learning_rate": 0.00019043457968493548, "loss": 3.7636, "step": 7564 }, { "epoch": 0.3541459418339283, "grad_norm": 1.6171875, "learning_rate": 0.000190432088155394, "loss": 3.2435, "step": 7565 }, { "epoch": 0.3541927555737609, "grad_norm": 1.703125, "learning_rate": 0.0001904295963177104, "loss": 3.0537, "step": 7566 }, { "epoch": 0.3542395693135935, "grad_norm": 1.5859375, "learning_rate": 0.0001904271041718932, "loss": 3.1108, "step": 7567 }, { "epoch": 0.3542863830534262, "grad_norm": 1.1171875, "learning_rate": 0.00019042461171795086, "loss": 3.1353, "step": 7568 }, { "epoch": 0.3543331967932588, "grad_norm": 1.0078125, "learning_rate": 0.00019042211895589186, "loss": 4.0838, "step": 7569 }, { "epoch": 0.3543800105330915, "grad_norm": 1.625, "learning_rate": 0.00019041962588572475, "loss": 3.0177, "step": 7570 }, { "epoch": 0.3544268242729241, "grad_norm": 1.25, "learning_rate": 0.00019041713250745796, "loss": 2.9651, "step": 7571 }, { "epoch": 0.3544736380127567, "grad_norm": 2.0625, "learning_rate": 0.00019041463882110007, "loss": 3.3642, "step": 7572 }, { "epoch": 0.3545204517525894, "grad_norm": 1.4375, "learning_rate": 0.00019041214482665947, "loss": 3.0084, "step": 7573 }, { "epoch": 0.354567265492422, "grad_norm": 1.4140625, "learning_rate": 0.00019040965052414475, "loss": 3.201, "step": 7574 }, { "epoch": 0.3546140792322547, "grad_norm": 1.21875, "learning_rate": 0.00019040715591356433, "loss": 2.9973, "step": 7575 }, { "epoch": 0.3546608929720873, "grad_norm": 1.2890625, "learning_rate": 0.00019040466099492676, "loss": 3.2994, "step": 7576 }, { "epoch": 0.3547077067119199, "grad_norm": 1.296875, "learning_rate": 0.00019040216576824058, "loss": 3.1703, "step": 7577 }, { "epoch": 0.3547545204517526, "grad_norm": 2.53125, "learning_rate": 0.00019039967023351423, "loss": 3.0806, "step": 7578 }, { "epoch": 0.3548013341915852, "grad_norm": 1.65625, "learning_rate": 0.0001903971743907562, "loss": 3.2275, "step": 7579 }, { "epoch": 0.3548481479314179, "grad_norm": 1.265625, "learning_rate": 0.00019039467823997504, "loss": 3.1284, "step": 7580 }, { "epoch": 0.3548949616712505, "grad_norm": 1.171875, "learning_rate": 0.00019039218178117922, "loss": 3.3818, "step": 7581 }, { "epoch": 0.3549417754110831, "grad_norm": 1.78125, "learning_rate": 0.0001903896850143773, "loss": 2.929, "step": 7582 }, { "epoch": 0.3549885891509158, "grad_norm": 1.5234375, "learning_rate": 0.00019038718793957772, "loss": 3.4271, "step": 7583 }, { "epoch": 0.3550354028907484, "grad_norm": 1.296875, "learning_rate": 0.00019038469055678904, "loss": 3.2588, "step": 7584 }, { "epoch": 0.3550822166305811, "grad_norm": 1.625, "learning_rate": 0.00019038219286601974, "loss": 3.22, "step": 7585 }, { "epoch": 0.3551290303704137, "grad_norm": 1.1875, "learning_rate": 0.00019037969486727837, "loss": 2.5727, "step": 7586 }, { "epoch": 0.35517584411024633, "grad_norm": 1.1328125, "learning_rate": 0.0001903771965605734, "loss": 3.0406, "step": 7587 }, { "epoch": 0.355222657850079, "grad_norm": 1.375, "learning_rate": 0.00019037469794591335, "loss": 2.416, "step": 7588 }, { "epoch": 0.3552694715899116, "grad_norm": 1.2578125, "learning_rate": 0.00019037219902330675, "loss": 3.2679, "step": 7589 }, { "epoch": 0.3553162853297443, "grad_norm": 1.8671875, "learning_rate": 0.00019036969979276206, "loss": 3.4221, "step": 7590 }, { "epoch": 0.3553630990695769, "grad_norm": 1.2578125, "learning_rate": 0.0001903672002542879, "loss": 2.9628, "step": 7591 }, { "epoch": 0.3554099128094096, "grad_norm": 1.25, "learning_rate": 0.0001903647004078927, "loss": 2.6856, "step": 7592 }, { "epoch": 0.3554567265492422, "grad_norm": 1.1796875, "learning_rate": 0.00019036220025358503, "loss": 3.3533, "step": 7593 }, { "epoch": 0.3555035402890748, "grad_norm": 1.25, "learning_rate": 0.0001903596997913734, "loss": 3.558, "step": 7594 }, { "epoch": 0.3555503540289075, "grad_norm": 1.5234375, "learning_rate": 0.00019035719902126626, "loss": 3.3185, "step": 7595 }, { "epoch": 0.3555971677687401, "grad_norm": 1.7890625, "learning_rate": 0.00019035469794327221, "loss": 2.8382, "step": 7596 }, { "epoch": 0.3556439815085728, "grad_norm": 1.3125, "learning_rate": 0.00019035219655739975, "loss": 3.5936, "step": 7597 }, { "epoch": 0.3556907952484054, "grad_norm": 1.328125, "learning_rate": 0.0001903496948636574, "loss": 3.5455, "step": 7598 }, { "epoch": 0.35573760898823803, "grad_norm": 1.71875, "learning_rate": 0.00019034719286205372, "loss": 3.0819, "step": 7599 }, { "epoch": 0.3557844227280707, "grad_norm": 1.5, "learning_rate": 0.00019034469055259715, "loss": 3.4563, "step": 7600 }, { "epoch": 0.3558312364679033, "grad_norm": 1.1953125, "learning_rate": 0.00019034218793529631, "loss": 2.7313, "step": 7601 }, { "epoch": 0.355878050207736, "grad_norm": 2.890625, "learning_rate": 0.00019033968501015966, "loss": 3.2381, "step": 7602 }, { "epoch": 0.3559248639475686, "grad_norm": 1.5390625, "learning_rate": 0.00019033718177719578, "loss": 3.2305, "step": 7603 }, { "epoch": 0.35597167768740123, "grad_norm": 1.46875, "learning_rate": 0.00019033467823641314, "loss": 3.2425, "step": 7604 }, { "epoch": 0.3560184914272339, "grad_norm": 1.5859375, "learning_rate": 0.00019033217438782033, "loss": 3.4161, "step": 7605 }, { "epoch": 0.3560653051670665, "grad_norm": 1.859375, "learning_rate": 0.00019032967023142585, "loss": 3.4962, "step": 7606 }, { "epoch": 0.3561121189068992, "grad_norm": 1.15625, "learning_rate": 0.00019032716576723827, "loss": 5.293, "step": 7607 }, { "epoch": 0.3561589326467318, "grad_norm": 1.484375, "learning_rate": 0.00019032466099526608, "loss": 3.1369, "step": 7608 }, { "epoch": 0.35620574638656444, "grad_norm": 1.6328125, "learning_rate": 0.0001903221559155178, "loss": 3.018, "step": 7609 }, { "epoch": 0.3562525601263971, "grad_norm": 1.4609375, "learning_rate": 0.000190319650528002, "loss": 3.4988, "step": 7610 }, { "epoch": 0.35629937386622973, "grad_norm": 1.4140625, "learning_rate": 0.00019031714483272723, "loss": 3.4148, "step": 7611 }, { "epoch": 0.3563461876060624, "grad_norm": 1.265625, "learning_rate": 0.000190314638829702, "loss": 3.0736, "step": 7612 }, { "epoch": 0.356393001345895, "grad_norm": 1.703125, "learning_rate": 0.00019031213251893486, "loss": 3.4592, "step": 7613 }, { "epoch": 0.35643981508572764, "grad_norm": 1.2265625, "learning_rate": 0.00019030962590043434, "loss": 3.1904, "step": 7614 }, { "epoch": 0.3564866288255603, "grad_norm": 1.3125, "learning_rate": 0.000190307118974209, "loss": 2.9545, "step": 7615 }, { "epoch": 0.35653344256539293, "grad_norm": 1.1875, "learning_rate": 0.00019030461174026736, "loss": 3.3312, "step": 7616 }, { "epoch": 0.3565802563052256, "grad_norm": 1.53125, "learning_rate": 0.00019030210419861802, "loss": 3.0653, "step": 7617 }, { "epoch": 0.3566270700450582, "grad_norm": 1.140625, "learning_rate": 0.00019029959634926945, "loss": 2.8126, "step": 7618 }, { "epoch": 0.35667388378489084, "grad_norm": 1.1796875, "learning_rate": 0.0001902970881922302, "loss": 2.7891, "step": 7619 }, { "epoch": 0.3567206975247235, "grad_norm": 1.3984375, "learning_rate": 0.00019029457972750887, "loss": 3.5002, "step": 7620 }, { "epoch": 0.35676751126455614, "grad_norm": 1.75, "learning_rate": 0.00019029207095511396, "loss": 3.6754, "step": 7621 }, { "epoch": 0.3568143250043888, "grad_norm": 1.6875, "learning_rate": 0.00019028956187505406, "loss": 3.1414, "step": 7622 }, { "epoch": 0.35686113874422143, "grad_norm": 1.1953125, "learning_rate": 0.0001902870524873377, "loss": 3.1133, "step": 7623 }, { "epoch": 0.35690795248405405, "grad_norm": 1.2421875, "learning_rate": 0.0001902845427919734, "loss": 2.7931, "step": 7624 }, { "epoch": 0.3569547662238867, "grad_norm": 1.703125, "learning_rate": 0.00019028203278896976, "loss": 2.7537, "step": 7625 }, { "epoch": 0.35700157996371934, "grad_norm": 1.6484375, "learning_rate": 0.00019027952247833532, "loss": 3.2102, "step": 7626 }, { "epoch": 0.357048393703552, "grad_norm": 1.3671875, "learning_rate": 0.0001902770118600786, "loss": 3.4377, "step": 7627 }, { "epoch": 0.35709520744338463, "grad_norm": 1.15625, "learning_rate": 0.00019027450093420816, "loss": 3.3992, "step": 7628 }, { "epoch": 0.35714202118321725, "grad_norm": 1.421875, "learning_rate": 0.00019027198970073263, "loss": 3.0631, "step": 7629 }, { "epoch": 0.3571888349230499, "grad_norm": 1.5, "learning_rate": 0.00019026947815966048, "loss": 2.7629, "step": 7630 }, { "epoch": 0.35723564866288254, "grad_norm": 1.5, "learning_rate": 0.00019026696631100034, "loss": 3.2005, "step": 7631 }, { "epoch": 0.3572824624027152, "grad_norm": 1.5390625, "learning_rate": 0.0001902644541547607, "loss": 3.4946, "step": 7632 }, { "epoch": 0.35732927614254784, "grad_norm": 1.171875, "learning_rate": 0.00019026194169095013, "loss": 3.1986, "step": 7633 }, { "epoch": 0.35737608988238045, "grad_norm": 1.203125, "learning_rate": 0.00019025942891957723, "loss": 2.8428, "step": 7634 }, { "epoch": 0.35742290362221313, "grad_norm": 1.09375, "learning_rate": 0.00019025691584065055, "loss": 3.1277, "step": 7635 }, { "epoch": 0.35746971736204575, "grad_norm": 2.578125, "learning_rate": 0.00019025440245417865, "loss": 3.5257, "step": 7636 }, { "epoch": 0.3575165311018784, "grad_norm": 1.390625, "learning_rate": 0.0001902518887601701, "loss": 3.0921, "step": 7637 }, { "epoch": 0.35756334484171104, "grad_norm": 1.296875, "learning_rate": 0.00019024937475863341, "loss": 3.1358, "step": 7638 }, { "epoch": 0.35761015858154366, "grad_norm": 1.46875, "learning_rate": 0.0001902468604495772, "loss": 3.2401, "step": 7639 }, { "epoch": 0.35765697232137633, "grad_norm": 1.2890625, "learning_rate": 0.00019024434583301007, "loss": 2.9745, "step": 7640 }, { "epoch": 0.35770378606120895, "grad_norm": 1.4375, "learning_rate": 0.0001902418309089405, "loss": 3.0883, "step": 7641 }, { "epoch": 0.3577505998010416, "grad_norm": 1.8046875, "learning_rate": 0.00019023931567737718, "loss": 3.3384, "step": 7642 }, { "epoch": 0.35779741354087424, "grad_norm": 1.4609375, "learning_rate": 0.00019023680013832857, "loss": 3.2993, "step": 7643 }, { "epoch": 0.35784422728070686, "grad_norm": 1.6875, "learning_rate": 0.00019023428429180324, "loss": 3.6769, "step": 7644 }, { "epoch": 0.35789104102053954, "grad_norm": 1.359375, "learning_rate": 0.00019023176813780986, "loss": 3.5702, "step": 7645 }, { "epoch": 0.35793785476037215, "grad_norm": 1.46875, "learning_rate": 0.0001902292516763569, "loss": 3.0845, "step": 7646 }, { "epoch": 0.35798466850020483, "grad_norm": 1.4453125, "learning_rate": 0.000190226734907453, "loss": 2.9484, "step": 7647 }, { "epoch": 0.35803148224003745, "grad_norm": 1.4765625, "learning_rate": 0.0001902242178311067, "loss": 3.4669, "step": 7648 }, { "epoch": 0.35807829597987006, "grad_norm": 1.3671875, "learning_rate": 0.00019022170044732665, "loss": 3.1939, "step": 7649 }, { "epoch": 0.35812510971970274, "grad_norm": 1.40625, "learning_rate": 0.00019021918275612131, "loss": 3.6157, "step": 7650 }, { "epoch": 0.35817192345953536, "grad_norm": 1.3046875, "learning_rate": 0.00019021666475749934, "loss": 3.102, "step": 7651 }, { "epoch": 0.35821873719936803, "grad_norm": 1.7890625, "learning_rate": 0.0001902141464514693, "loss": 3.4354, "step": 7652 }, { "epoch": 0.35826555093920065, "grad_norm": 1.6171875, "learning_rate": 0.00019021162783803978, "loss": 3.3689, "step": 7653 }, { "epoch": 0.3583123646790333, "grad_norm": 1.2265625, "learning_rate": 0.00019020910891721934, "loss": 3.0208, "step": 7654 }, { "epoch": 0.35835917841886594, "grad_norm": 1.1484375, "learning_rate": 0.00019020658968901656, "loss": 3.0333, "step": 7655 }, { "epoch": 0.35840599215869856, "grad_norm": 1.40625, "learning_rate": 0.00019020407015344007, "loss": 3.2563, "step": 7656 }, { "epoch": 0.35845280589853123, "grad_norm": 1.984375, "learning_rate": 0.00019020155031049843, "loss": 3.114, "step": 7657 }, { "epoch": 0.35849961963836385, "grad_norm": 1.5234375, "learning_rate": 0.0001901990301602002, "loss": 3.2664, "step": 7658 }, { "epoch": 0.3585464333781965, "grad_norm": 1.2890625, "learning_rate": 0.000190196509702554, "loss": 3.1146, "step": 7659 }, { "epoch": 0.35859324711802915, "grad_norm": 1.0078125, "learning_rate": 0.00019019398893756838, "loss": 2.9796, "step": 7660 }, { "epoch": 0.35864006085786176, "grad_norm": 1.46875, "learning_rate": 0.000190191467865252, "loss": 3.1587, "step": 7661 }, { "epoch": 0.35868687459769444, "grad_norm": 1.203125, "learning_rate": 0.00019018894648561337, "loss": 3.1908, "step": 7662 }, { "epoch": 0.35873368833752706, "grad_norm": 1.328125, "learning_rate": 0.00019018642479866113, "loss": 3.458, "step": 7663 }, { "epoch": 0.35878050207735973, "grad_norm": 1.2890625, "learning_rate": 0.00019018390280440384, "loss": 3.1207, "step": 7664 }, { "epoch": 0.35882731581719235, "grad_norm": 1.2421875, "learning_rate": 0.00019018138050285014, "loss": 3.0885, "step": 7665 }, { "epoch": 0.35887412955702497, "grad_norm": 1.21875, "learning_rate": 0.0001901788578940086, "loss": 3.0141, "step": 7666 }, { "epoch": 0.35892094329685764, "grad_norm": 1.9375, "learning_rate": 0.00019017633497788784, "loss": 3.0725, "step": 7667 }, { "epoch": 0.35896775703669026, "grad_norm": 1.4296875, "learning_rate": 0.0001901738117544964, "loss": 3.3474, "step": 7668 }, { "epoch": 0.35901457077652293, "grad_norm": 3.4375, "learning_rate": 0.0001901712882238429, "loss": 3.4671, "step": 7669 }, { "epoch": 0.35906138451635555, "grad_norm": 1.1484375, "learning_rate": 0.00019016876438593595, "loss": 3.3642, "step": 7670 }, { "epoch": 0.35910819825618817, "grad_norm": 1.2890625, "learning_rate": 0.00019016624024078416, "loss": 3.6651, "step": 7671 }, { "epoch": 0.35915501199602085, "grad_norm": 1.1015625, "learning_rate": 0.0001901637157883961, "loss": 3.2887, "step": 7672 }, { "epoch": 0.35920182573585346, "grad_norm": 1.2265625, "learning_rate": 0.0001901611910287804, "loss": 3.0635, "step": 7673 }, { "epoch": 0.35924863947568614, "grad_norm": 1.3046875, "learning_rate": 0.00019015866596194564, "loss": 3.6164, "step": 7674 }, { "epoch": 0.35929545321551876, "grad_norm": 1.4609375, "learning_rate": 0.00019015614058790048, "loss": 3.5198, "step": 7675 }, { "epoch": 0.3593422669553514, "grad_norm": 1.453125, "learning_rate": 0.00019015361490665343, "loss": 2.9463, "step": 7676 }, { "epoch": 0.35938908069518405, "grad_norm": 1.2578125, "learning_rate": 0.0001901510889182132, "loss": 3.0692, "step": 7677 }, { "epoch": 0.35943589443501667, "grad_norm": 1.4375, "learning_rate": 0.0001901485626225883, "loss": 3.2361, "step": 7678 }, { "epoch": 0.35948270817484934, "grad_norm": 1.1171875, "learning_rate": 0.0001901460360197874, "loss": 3.2959, "step": 7679 }, { "epoch": 0.35952952191468196, "grad_norm": 1.0703125, "learning_rate": 0.0001901435091098191, "loss": 2.6588, "step": 7680 }, { "epoch": 0.3595763356545146, "grad_norm": 2.3125, "learning_rate": 0.00019014098189269198, "loss": 3.3704, "step": 7681 }, { "epoch": 0.35962314939434725, "grad_norm": 1.25, "learning_rate": 0.0001901384543684147, "loss": 3.099, "step": 7682 }, { "epoch": 0.35966996313417987, "grad_norm": 1.265625, "learning_rate": 0.00019013592653699586, "loss": 3.073, "step": 7683 }, { "epoch": 0.35971677687401254, "grad_norm": 1.71875, "learning_rate": 0.00019013339839844405, "loss": 3.2642, "step": 7684 }, { "epoch": 0.35976359061384516, "grad_norm": 1.3125, "learning_rate": 0.0001901308699527679, "loss": 3.3478, "step": 7685 }, { "epoch": 0.3598104043536778, "grad_norm": 1.671875, "learning_rate": 0.00019012834119997595, "loss": 3.3419, "step": 7686 }, { "epoch": 0.35985721809351046, "grad_norm": 1.484375, "learning_rate": 0.00019012581214007698, "loss": 3.0613, "step": 7687 }, { "epoch": 0.3599040318333431, "grad_norm": 1.2578125, "learning_rate": 0.00019012328277307944, "loss": 2.5755, "step": 7688 }, { "epoch": 0.35995084557317575, "grad_norm": 1.625, "learning_rate": 0.00019012075309899208, "loss": 3.3586, "step": 7689 }, { "epoch": 0.35999765931300837, "grad_norm": 1.5625, "learning_rate": 0.00019011822311782342, "loss": 3.2011, "step": 7690 }, { "epoch": 0.360044473052841, "grad_norm": 1.46875, "learning_rate": 0.00019011569282958218, "loss": 2.7712, "step": 7691 }, { "epoch": 0.36009128679267366, "grad_norm": 1.46875, "learning_rate": 0.00019011316223427688, "loss": 3.1337, "step": 7692 }, { "epoch": 0.3601381005325063, "grad_norm": 1.4609375, "learning_rate": 0.00019011063133191622, "loss": 2.9181, "step": 7693 }, { "epoch": 0.36018491427233895, "grad_norm": 1.2421875, "learning_rate": 0.00019010810012250876, "loss": 3.3187, "step": 7694 }, { "epoch": 0.36023172801217157, "grad_norm": 1.5390625, "learning_rate": 0.00019010556860606318, "loss": 3.7247, "step": 7695 }, { "epoch": 0.3602785417520042, "grad_norm": 1.296875, "learning_rate": 0.00019010303678258805, "loss": 3.1946, "step": 7696 }, { "epoch": 0.36032535549183686, "grad_norm": 1.03125, "learning_rate": 0.00019010050465209205, "loss": 3.056, "step": 7697 }, { "epoch": 0.3603721692316695, "grad_norm": 1.3046875, "learning_rate": 0.00019009797221458382, "loss": 3.2573, "step": 7698 }, { "epoch": 0.36041898297150216, "grad_norm": 1.1796875, "learning_rate": 0.00019009543947007192, "loss": 2.9525, "step": 7699 }, { "epoch": 0.3604657967113348, "grad_norm": 1.296875, "learning_rate": 0.00019009290641856504, "loss": 3.2057, "step": 7700 }, { "epoch": 0.3605126104511674, "grad_norm": 1.3203125, "learning_rate": 0.00019009037306007178, "loss": 2.7021, "step": 7701 }, { "epoch": 0.36055942419100007, "grad_norm": 1.2265625, "learning_rate": 0.00019008783939460075, "loss": 2.8957, "step": 7702 }, { "epoch": 0.3606062379308327, "grad_norm": 1.6171875, "learning_rate": 0.00019008530542216065, "loss": 3.4145, "step": 7703 }, { "epoch": 0.36065305167066536, "grad_norm": 1.2734375, "learning_rate": 0.0001900827711427601, "loss": 3.4872, "step": 7704 }, { "epoch": 0.360699865410498, "grad_norm": 1.8046875, "learning_rate": 0.00019008023655640765, "loss": 3.7199, "step": 7705 }, { "epoch": 0.3607466791503306, "grad_norm": 1.21875, "learning_rate": 0.00019007770166311206, "loss": 3.6287, "step": 7706 }, { "epoch": 0.36079349289016327, "grad_norm": 1.4453125, "learning_rate": 0.0001900751664628819, "loss": 3.201, "step": 7707 }, { "epoch": 0.3608403066299959, "grad_norm": 1.25, "learning_rate": 0.0001900726309557258, "loss": 3.1725, "step": 7708 }, { "epoch": 0.36088712036982856, "grad_norm": 1.2421875, "learning_rate": 0.0001900700951416524, "loss": 3.0466, "step": 7709 }, { "epoch": 0.3609339341096612, "grad_norm": 1.390625, "learning_rate": 0.00019006755902067037, "loss": 3.3306, "step": 7710 }, { "epoch": 0.3609807478494938, "grad_norm": 1.3203125, "learning_rate": 0.00019006502259278835, "loss": 3.208, "step": 7711 }, { "epoch": 0.3610275615893265, "grad_norm": 1.234375, "learning_rate": 0.000190062485858015, "loss": 2.8756, "step": 7712 }, { "epoch": 0.3610743753291591, "grad_norm": 1.8828125, "learning_rate": 0.00019005994881635886, "loss": 3.3743, "step": 7713 }, { "epoch": 0.36112118906899177, "grad_norm": 1.234375, "learning_rate": 0.0001900574114678287, "loss": 3.4532, "step": 7714 }, { "epoch": 0.3611680028088244, "grad_norm": 1.234375, "learning_rate": 0.00019005487381243313, "loss": 3.1788, "step": 7715 }, { "epoch": 0.36121481654865706, "grad_norm": 1.8515625, "learning_rate": 0.00019005233585018074, "loss": 3.1883, "step": 7716 }, { "epoch": 0.3612616302884897, "grad_norm": 1.765625, "learning_rate": 0.00019004979758108025, "loss": 3.5939, "step": 7717 }, { "epoch": 0.3613084440283223, "grad_norm": 1.625, "learning_rate": 0.00019004725900514027, "loss": 3.3136, "step": 7718 }, { "epoch": 0.36135525776815497, "grad_norm": 1.1171875, "learning_rate": 0.00019004472012236945, "loss": 3.1621, "step": 7719 }, { "epoch": 0.3614020715079876, "grad_norm": 1.3828125, "learning_rate": 0.00019004218093277646, "loss": 2.554, "step": 7720 }, { "epoch": 0.36144888524782026, "grad_norm": 1.2265625, "learning_rate": 0.00019003964143636998, "loss": 3.2727, "step": 7721 }, { "epoch": 0.3614956989876529, "grad_norm": 1.3671875, "learning_rate": 0.00019003710163315856, "loss": 3.4544, "step": 7722 }, { "epoch": 0.3615425127274855, "grad_norm": 1.2578125, "learning_rate": 0.00019003456152315095, "loss": 3.332, "step": 7723 }, { "epoch": 0.3615893264673182, "grad_norm": 1.921875, "learning_rate": 0.00019003202110635578, "loss": 3.6275, "step": 7724 }, { "epoch": 0.3616361402071508, "grad_norm": 1.171875, "learning_rate": 0.0001900294803827817, "loss": 2.1478, "step": 7725 }, { "epoch": 0.36168295394698347, "grad_norm": 1.109375, "learning_rate": 0.00019002693935243737, "loss": 2.9064, "step": 7726 }, { "epoch": 0.3617297676868161, "grad_norm": 1.28125, "learning_rate": 0.00019002439801533148, "loss": 3.3685, "step": 7727 }, { "epoch": 0.3617765814266487, "grad_norm": 1.4453125, "learning_rate": 0.0001900218563714726, "loss": 3.3472, "step": 7728 }, { "epoch": 0.3618233951664814, "grad_norm": 1.4609375, "learning_rate": 0.00019001931442086945, "loss": 3.0728, "step": 7729 }, { "epoch": 0.361870208906314, "grad_norm": 2.328125, "learning_rate": 0.00019001677216353073, "loss": 3.6626, "step": 7730 }, { "epoch": 0.36191702264614667, "grad_norm": 1.3046875, "learning_rate": 0.00019001422959946505, "loss": 3.3397, "step": 7731 }, { "epoch": 0.3619638363859793, "grad_norm": 1.3125, "learning_rate": 0.00019001168672868108, "loss": 3.032, "step": 7732 }, { "epoch": 0.3620106501258119, "grad_norm": 1.390625, "learning_rate": 0.00019000914355118747, "loss": 3.5045, "step": 7733 }, { "epoch": 0.3620574638656446, "grad_norm": 1.859375, "learning_rate": 0.00019000660006699294, "loss": 3.5449, "step": 7734 }, { "epoch": 0.3621042776054772, "grad_norm": 1.59375, "learning_rate": 0.0001900040562761061, "loss": 3.4055, "step": 7735 }, { "epoch": 0.3621510913453099, "grad_norm": 1.78125, "learning_rate": 0.00019000151217853567, "loss": 3.6592, "step": 7736 }, { "epoch": 0.3621979050851425, "grad_norm": 1.375, "learning_rate": 0.00018999896777429026, "loss": 3.0134, "step": 7737 }, { "epoch": 0.3622447188249751, "grad_norm": 2.546875, "learning_rate": 0.0001899964230633786, "loss": 3.5153, "step": 7738 }, { "epoch": 0.3622915325648078, "grad_norm": 2.015625, "learning_rate": 0.0001899938780458093, "loss": 3.1529, "step": 7739 }, { "epoch": 0.3623383463046404, "grad_norm": 1.296875, "learning_rate": 0.00018999133272159106, "loss": 3.4916, "step": 7740 }, { "epoch": 0.3623851600444731, "grad_norm": 1.7109375, "learning_rate": 0.0001899887870907326, "loss": 2.9494, "step": 7741 }, { "epoch": 0.3624319737843057, "grad_norm": 1.390625, "learning_rate": 0.0001899862411532425, "loss": 3.4579, "step": 7742 }, { "epoch": 0.3624787875241383, "grad_norm": 1.4296875, "learning_rate": 0.0001899836949091295, "loss": 3.1079, "step": 7743 }, { "epoch": 0.362525601263971, "grad_norm": 1.71875, "learning_rate": 0.0001899811483584023, "loss": 2.8675, "step": 7744 }, { "epoch": 0.3625724150038036, "grad_norm": 1.3203125, "learning_rate": 0.00018997860150106948, "loss": 3.0097, "step": 7745 }, { "epoch": 0.3626192287436363, "grad_norm": 1.15625, "learning_rate": 0.00018997605433713978, "loss": 3.2833, "step": 7746 }, { "epoch": 0.3626660424834689, "grad_norm": 1.109375, "learning_rate": 0.0001899735068666219, "loss": 3.0445, "step": 7747 }, { "epoch": 0.3627128562233015, "grad_norm": 1.203125, "learning_rate": 0.0001899709590895245, "loss": 3.2823, "step": 7748 }, { "epoch": 0.3627596699631342, "grad_norm": 1.3515625, "learning_rate": 0.00018996841100585624, "loss": 2.7523, "step": 7749 }, { "epoch": 0.3628064837029668, "grad_norm": 1.2890625, "learning_rate": 0.00018996586261562582, "loss": 3.2865, "step": 7750 }, { "epoch": 0.3628532974427995, "grad_norm": 1.5625, "learning_rate": 0.00018996331391884193, "loss": 3.5341, "step": 7751 }, { "epoch": 0.3629001111826321, "grad_norm": 1.1875, "learning_rate": 0.00018996076491551326, "loss": 3.0755, "step": 7752 }, { "epoch": 0.3629469249224647, "grad_norm": 1.265625, "learning_rate": 0.0001899582156056485, "loss": 3.2943, "step": 7753 }, { "epoch": 0.3629937386622974, "grad_norm": 1.578125, "learning_rate": 0.00018995566598925624, "loss": 3.1059, "step": 7754 }, { "epoch": 0.36304055240213, "grad_norm": 1.2578125, "learning_rate": 0.00018995311606634534, "loss": 2.7874, "step": 7755 }, { "epoch": 0.3630873661419627, "grad_norm": 1.859375, "learning_rate": 0.00018995056583692434, "loss": 2.9032, "step": 7756 }, { "epoch": 0.3631341798817953, "grad_norm": 1.671875, "learning_rate": 0.00018994801530100198, "loss": 3.1438, "step": 7757 }, { "epoch": 0.3631809936216279, "grad_norm": 1.390625, "learning_rate": 0.000189945464458587, "loss": 2.8709, "step": 7758 }, { "epoch": 0.3632278073614606, "grad_norm": 1.890625, "learning_rate": 0.00018994291330968804, "loss": 3.2278, "step": 7759 }, { "epoch": 0.3632746211012932, "grad_norm": 1.2890625, "learning_rate": 0.00018994036185431378, "loss": 3.4839, "step": 7760 }, { "epoch": 0.3633214348411259, "grad_norm": 1.2578125, "learning_rate": 0.00018993781009247297, "loss": 3.2508, "step": 7761 }, { "epoch": 0.3633682485809585, "grad_norm": 1.703125, "learning_rate": 0.00018993525802417423, "loss": 3.3276, "step": 7762 }, { "epoch": 0.3634150623207911, "grad_norm": 2.75, "learning_rate": 0.0001899327056494263, "loss": 3.3754, "step": 7763 }, { "epoch": 0.3634618760606238, "grad_norm": 1.1484375, "learning_rate": 0.0001899301529682379, "loss": 3.5382, "step": 7764 }, { "epoch": 0.3635086898004564, "grad_norm": 1.5390625, "learning_rate": 0.00018992759998061768, "loss": 3.1474, "step": 7765 }, { "epoch": 0.3635555035402891, "grad_norm": 1.328125, "learning_rate": 0.0001899250466865744, "loss": 3.0781, "step": 7766 }, { "epoch": 0.3636023172801217, "grad_norm": 1.390625, "learning_rate": 0.00018992249308611667, "loss": 3.5675, "step": 7767 }, { "epoch": 0.36364913101995433, "grad_norm": 1.3125, "learning_rate": 0.00018991993917925326, "loss": 3.0303, "step": 7768 }, { "epoch": 0.363695944759787, "grad_norm": 1.3046875, "learning_rate": 0.00018991738496599287, "loss": 3.5692, "step": 7769 }, { "epoch": 0.3637427584996196, "grad_norm": 1.3828125, "learning_rate": 0.00018991483044634416, "loss": 3.1018, "step": 7770 }, { "epoch": 0.3637895722394523, "grad_norm": 1.7578125, "learning_rate": 0.0001899122756203159, "loss": 3.2949, "step": 7771 }, { "epoch": 0.3638363859792849, "grad_norm": 1.3984375, "learning_rate": 0.00018990972048791674, "loss": 3.4993, "step": 7772 }, { "epoch": 0.36388319971911753, "grad_norm": 1.453125, "learning_rate": 0.0001899071650491554, "loss": 3.4188, "step": 7773 }, { "epoch": 0.3639300134589502, "grad_norm": 1.4765625, "learning_rate": 0.0001899046093040406, "loss": 3.1719, "step": 7774 }, { "epoch": 0.3639768271987828, "grad_norm": 1.2109375, "learning_rate": 0.00018990205325258104, "loss": 2.8853, "step": 7775 }, { "epoch": 0.3640236409386155, "grad_norm": 1.2578125, "learning_rate": 0.0001898994968947854, "loss": 2.8214, "step": 7776 }, { "epoch": 0.3640704546784481, "grad_norm": 1.5078125, "learning_rate": 0.00018989694023066248, "loss": 3.32, "step": 7777 }, { "epoch": 0.3641172684182808, "grad_norm": 1.515625, "learning_rate": 0.0001898943832602209, "loss": 3.3557, "step": 7778 }, { "epoch": 0.3641640821581134, "grad_norm": 1.265625, "learning_rate": 0.0001898918259834694, "loss": 3.035, "step": 7779 }, { "epoch": 0.36421089589794603, "grad_norm": 1.390625, "learning_rate": 0.00018988926840041666, "loss": 3.3647, "step": 7780 }, { "epoch": 0.3642577096377787, "grad_norm": 1.578125, "learning_rate": 0.0001898867105110715, "loss": 2.9071, "step": 7781 }, { "epoch": 0.3643045233776113, "grad_norm": 7.96875, "learning_rate": 0.00018988415231544257, "loss": 5.664, "step": 7782 }, { "epoch": 0.364351337117444, "grad_norm": 1.4765625, "learning_rate": 0.00018988159381353855, "loss": 3.0957, "step": 7783 }, { "epoch": 0.3643981508572766, "grad_norm": 1.4453125, "learning_rate": 0.0001898790350053682, "loss": 3.1383, "step": 7784 }, { "epoch": 0.36444496459710923, "grad_norm": 1.0546875, "learning_rate": 0.00018987647589094024, "loss": 2.4505, "step": 7785 }, { "epoch": 0.3644917783369419, "grad_norm": 1.6484375, "learning_rate": 0.00018987391647026338, "loss": 3.6191, "step": 7786 }, { "epoch": 0.3645385920767745, "grad_norm": 1.40625, "learning_rate": 0.00018987135674334638, "loss": 3.1232, "step": 7787 }, { "epoch": 0.3645854058166072, "grad_norm": 1.53125, "learning_rate": 0.0001898687967101979, "loss": 3.0018, "step": 7788 }, { "epoch": 0.3646322195564398, "grad_norm": 1.34375, "learning_rate": 0.00018986623637082665, "loss": 3.2474, "step": 7789 }, { "epoch": 0.36467903329627244, "grad_norm": 1.5078125, "learning_rate": 0.00018986367572524144, "loss": 3.7259, "step": 7790 }, { "epoch": 0.3647258470361051, "grad_norm": 1.75, "learning_rate": 0.00018986111477345093, "loss": 3.245, "step": 7791 }, { "epoch": 0.36477266077593773, "grad_norm": 1.5078125, "learning_rate": 0.00018985855351546388, "loss": 3.5687, "step": 7792 }, { "epoch": 0.3648194745157704, "grad_norm": 1.296875, "learning_rate": 0.000189855991951289, "loss": 3.255, "step": 7793 }, { "epoch": 0.364866288255603, "grad_norm": 1.0, "learning_rate": 0.000189853430080935, "loss": 2.9795, "step": 7794 }, { "epoch": 0.36491310199543564, "grad_norm": 1.078125, "learning_rate": 0.00018985086790441067, "loss": 3.2313, "step": 7795 }, { "epoch": 0.3649599157352683, "grad_norm": 1.2265625, "learning_rate": 0.0001898483054217247, "loss": 3.5018, "step": 7796 }, { "epoch": 0.36500672947510093, "grad_norm": 2.0625, "learning_rate": 0.00018984574263288578, "loss": 2.8945, "step": 7797 }, { "epoch": 0.3650535432149336, "grad_norm": 1.203125, "learning_rate": 0.00018984317953790272, "loss": 3.5452, "step": 7798 }, { "epoch": 0.3651003569547662, "grad_norm": 1.25, "learning_rate": 0.00018984061613678418, "loss": 3.3027, "step": 7799 }, { "epoch": 0.36514717069459884, "grad_norm": 1.4296875, "learning_rate": 0.000189838052429539, "loss": 3.5963, "step": 7800 }, { "epoch": 0.3651939844344315, "grad_norm": 1.40625, "learning_rate": 0.0001898354884161758, "loss": 3.1589, "step": 7801 }, { "epoch": 0.36524079817426414, "grad_norm": 1.59375, "learning_rate": 0.00018983292409670337, "loss": 2.9644, "step": 7802 }, { "epoch": 0.3652876119140968, "grad_norm": 1.4140625, "learning_rate": 0.00018983035947113045, "loss": 3.4723, "step": 7803 }, { "epoch": 0.36533442565392943, "grad_norm": 1.796875, "learning_rate": 0.00018982779453946577, "loss": 2.8159, "step": 7804 }, { "epoch": 0.36538123939376205, "grad_norm": 1.6484375, "learning_rate": 0.00018982522930171807, "loss": 3.0858, "step": 7805 }, { "epoch": 0.3654280531335947, "grad_norm": 1.390625, "learning_rate": 0.0001898226637578961, "loss": 3.4005, "step": 7806 }, { "epoch": 0.36547486687342734, "grad_norm": 4.71875, "learning_rate": 0.0001898200979080086, "loss": 2.7954, "step": 7807 }, { "epoch": 0.36552168061326, "grad_norm": 1.4375, "learning_rate": 0.00018981753175206429, "loss": 3.2549, "step": 7808 }, { "epoch": 0.36556849435309263, "grad_norm": 1.1953125, "learning_rate": 0.00018981496529007195, "loss": 3.086, "step": 7809 }, { "epoch": 0.36561530809292525, "grad_norm": 2.0, "learning_rate": 0.00018981239852204032, "loss": 3.0793, "step": 7810 }, { "epoch": 0.3656621218327579, "grad_norm": 1.3125, "learning_rate": 0.0001898098314479781, "loss": 3.6832, "step": 7811 }, { "epoch": 0.36570893557259054, "grad_norm": 1.3359375, "learning_rate": 0.0001898072640678941, "loss": 3.2458, "step": 7812 }, { "epoch": 0.3657557493124232, "grad_norm": 1.34375, "learning_rate": 0.00018980469638179702, "loss": 2.9691, "step": 7813 }, { "epoch": 0.36580256305225584, "grad_norm": 1.2265625, "learning_rate": 0.0001898021283896956, "loss": 3.2797, "step": 7814 }, { "epoch": 0.36584937679208845, "grad_norm": 0.9453125, "learning_rate": 0.00018979956009159866, "loss": 3.6016, "step": 7815 }, { "epoch": 0.36589619053192113, "grad_norm": 1.7265625, "learning_rate": 0.00018979699148751494, "loss": 3.4761, "step": 7816 }, { "epoch": 0.36594300427175375, "grad_norm": 1.4453125, "learning_rate": 0.0001897944225774531, "loss": 3.1485, "step": 7817 }, { "epoch": 0.3659898180115864, "grad_norm": 1.375, "learning_rate": 0.00018979185336142197, "loss": 2.9667, "step": 7818 }, { "epoch": 0.36603663175141904, "grad_norm": 1.0625, "learning_rate": 0.0001897892838394303, "loss": 3.1583, "step": 7819 }, { "epoch": 0.36608344549125166, "grad_norm": 1.328125, "learning_rate": 0.00018978671401148685, "loss": 3.3193, "step": 7820 }, { "epoch": 0.36613025923108433, "grad_norm": 1.6796875, "learning_rate": 0.00018978414387760034, "loss": 3.6495, "step": 7821 }, { "epoch": 0.36617707297091695, "grad_norm": 1.171875, "learning_rate": 0.00018978157343777955, "loss": 2.9147, "step": 7822 }, { "epoch": 0.3662238867107496, "grad_norm": 1.8359375, "learning_rate": 0.00018977900269203323, "loss": 3.4435, "step": 7823 }, { "epoch": 0.36627070045058224, "grad_norm": 1.2734375, "learning_rate": 0.00018977643164037018, "loss": 3.416, "step": 7824 }, { "epoch": 0.36631751419041486, "grad_norm": 1.515625, "learning_rate": 0.0001897738602827991, "loss": 3.5308, "step": 7825 }, { "epoch": 0.36636432793024754, "grad_norm": 1.3515625, "learning_rate": 0.0001897712886193288, "loss": 3.1894, "step": 7826 }, { "epoch": 0.36641114167008015, "grad_norm": 1.7421875, "learning_rate": 0.00018976871664996804, "loss": 3.3565, "step": 7827 }, { "epoch": 0.36645795540991283, "grad_norm": 1.3046875, "learning_rate": 0.0001897661443747255, "loss": 2.9946, "step": 7828 }, { "epoch": 0.36650476914974545, "grad_norm": 1.6796875, "learning_rate": 0.00018976357179361007, "loss": 3.1015, "step": 7829 }, { "epoch": 0.36655158288957806, "grad_norm": 1.296875, "learning_rate": 0.00018976099890663043, "loss": 3.0132, "step": 7830 }, { "epoch": 0.36659839662941074, "grad_norm": 1.6875, "learning_rate": 0.00018975842571379537, "loss": 3.0597, "step": 7831 }, { "epoch": 0.36664521036924336, "grad_norm": 1.3671875, "learning_rate": 0.0001897558522151137, "loss": 3.1121, "step": 7832 }, { "epoch": 0.36669202410907603, "grad_norm": 1.203125, "learning_rate": 0.0001897532784105941, "loss": 3.3562, "step": 7833 }, { "epoch": 0.36673883784890865, "grad_norm": 1.3828125, "learning_rate": 0.00018975070430024543, "loss": 3.153, "step": 7834 }, { "epoch": 0.36678565158874127, "grad_norm": 1.2578125, "learning_rate": 0.00018974812988407642, "loss": 3.2897, "step": 7835 }, { "epoch": 0.36683246532857394, "grad_norm": 1.6640625, "learning_rate": 0.00018974555516209585, "loss": 3.1429, "step": 7836 }, { "epoch": 0.36687927906840656, "grad_norm": 1.1328125, "learning_rate": 0.00018974298013431246, "loss": 3.0621, "step": 7837 }, { "epoch": 0.36692609280823923, "grad_norm": 1.671875, "learning_rate": 0.00018974040480073507, "loss": 3.1605, "step": 7838 }, { "epoch": 0.36697290654807185, "grad_norm": 1.2265625, "learning_rate": 0.00018973782916137247, "loss": 3.3124, "step": 7839 }, { "epoch": 0.3670197202879045, "grad_norm": 0.9921875, "learning_rate": 0.00018973525321623337, "loss": 3.0, "step": 7840 }, { "epoch": 0.36706653402773715, "grad_norm": 1.5, "learning_rate": 0.00018973267696532656, "loss": 3.0355, "step": 7841 }, { "epoch": 0.36711334776756976, "grad_norm": 1.4453125, "learning_rate": 0.00018973010040866087, "loss": 3.1218, "step": 7842 }, { "epoch": 0.36716016150740244, "grad_norm": 1.65625, "learning_rate": 0.00018972752354624503, "loss": 3.4002, "step": 7843 }, { "epoch": 0.36720697524723506, "grad_norm": 1.25, "learning_rate": 0.00018972494637808788, "loss": 3.0289, "step": 7844 }, { "epoch": 0.36725378898706773, "grad_norm": 2.03125, "learning_rate": 0.00018972236890419812, "loss": 3.309, "step": 7845 }, { "epoch": 0.36730060272690035, "grad_norm": 1.8515625, "learning_rate": 0.00018971979112458458, "loss": 3.4923, "step": 7846 }, { "epoch": 0.36734741646673297, "grad_norm": 2.203125, "learning_rate": 0.00018971721303925605, "loss": 3.1477, "step": 7847 }, { "epoch": 0.36739423020656564, "grad_norm": 1.5, "learning_rate": 0.0001897146346482213, "loss": 3.1227, "step": 7848 }, { "epoch": 0.36744104394639826, "grad_norm": 1.9296875, "learning_rate": 0.0001897120559514891, "loss": 3.2038, "step": 7849 }, { "epoch": 0.36748785768623093, "grad_norm": 1.390625, "learning_rate": 0.00018970947694906828, "loss": 3.3735, "step": 7850 }, { "epoch": 0.36753467142606355, "grad_norm": 1.0859375, "learning_rate": 0.0001897068976409676, "loss": 4.1993, "step": 7851 }, { "epoch": 0.36758148516589617, "grad_norm": 1.234375, "learning_rate": 0.00018970431802719584, "loss": 2.9746, "step": 7852 }, { "epoch": 0.36762829890572885, "grad_norm": 1.28125, "learning_rate": 0.0001897017381077618, "loss": 3.0589, "step": 7853 }, { "epoch": 0.36767511264556146, "grad_norm": 1.2890625, "learning_rate": 0.0001896991578826743, "loss": 3.0764, "step": 7854 }, { "epoch": 0.36772192638539414, "grad_norm": 1.828125, "learning_rate": 0.00018969657735194207, "loss": 3.3279, "step": 7855 }, { "epoch": 0.36776874012522676, "grad_norm": 1.375, "learning_rate": 0.00018969399651557394, "loss": 2.9422, "step": 7856 }, { "epoch": 0.3678155538650594, "grad_norm": 1.203125, "learning_rate": 0.0001896914153735787, "loss": 3.2807, "step": 7857 }, { "epoch": 0.36786236760489205, "grad_norm": 1.3671875, "learning_rate": 0.00018968883392596518, "loss": 3.2783, "step": 7858 }, { "epoch": 0.36790918134472467, "grad_norm": 1.375, "learning_rate": 0.0001896862521727421, "loss": 3.4434, "step": 7859 }, { "epoch": 0.36795599508455734, "grad_norm": 1.3359375, "learning_rate": 0.00018968367011391835, "loss": 3.3543, "step": 7860 }, { "epoch": 0.36800280882438996, "grad_norm": 1.15625, "learning_rate": 0.00018968108774950263, "loss": 2.9979, "step": 7861 }, { "epoch": 0.3680496225642226, "grad_norm": 1.578125, "learning_rate": 0.0001896785050795038, "loss": 3.0117, "step": 7862 }, { "epoch": 0.36809643630405525, "grad_norm": 1.34375, "learning_rate": 0.0001896759221039307, "loss": 4.2286, "step": 7863 }, { "epoch": 0.36814325004388787, "grad_norm": 1.5, "learning_rate": 0.00018967333882279202, "loss": 3.3057, "step": 7864 }, { "epoch": 0.36819006378372054, "grad_norm": 1.046875, "learning_rate": 0.0001896707552360966, "loss": 4.0052, "step": 7865 }, { "epoch": 0.36823687752355316, "grad_norm": 1.390625, "learning_rate": 0.00018966817134385332, "loss": 3.3969, "step": 7866 }, { "epoch": 0.3682836912633858, "grad_norm": 1.3203125, "learning_rate": 0.0001896655871460709, "loss": 3.0136, "step": 7867 }, { "epoch": 0.36833050500321846, "grad_norm": 1.0625, "learning_rate": 0.0001896630026427582, "loss": 3.1366, "step": 7868 }, { "epoch": 0.3683773187430511, "grad_norm": 1.625, "learning_rate": 0.00018966041783392398, "loss": 2.9182, "step": 7869 }, { "epoch": 0.36842413248288375, "grad_norm": 1.15625, "learning_rate": 0.00018965783271957708, "loss": 3.0801, "step": 7870 }, { "epoch": 0.36847094622271637, "grad_norm": 1.4921875, "learning_rate": 0.0001896552472997263, "loss": 2.9501, "step": 7871 }, { "epoch": 0.368517759962549, "grad_norm": 1.375, "learning_rate": 0.0001896526615743804, "loss": 3.1842, "step": 7872 }, { "epoch": 0.36856457370238166, "grad_norm": 1.734375, "learning_rate": 0.0001896500755435483, "loss": 3.4617, "step": 7873 }, { "epoch": 0.3686113874422143, "grad_norm": 1.5390625, "learning_rate": 0.00018964748920723872, "loss": 2.6654, "step": 7874 }, { "epoch": 0.36865820118204695, "grad_norm": 1.2734375, "learning_rate": 0.00018964490256546054, "loss": 3.1028, "step": 7875 }, { "epoch": 0.36870501492187957, "grad_norm": 1.4296875, "learning_rate": 0.0001896423156182225, "loss": 3.1131, "step": 7876 }, { "epoch": 0.3687518286617122, "grad_norm": 1.5546875, "learning_rate": 0.00018963972836553343, "loss": 2.8661, "step": 7877 }, { "epoch": 0.36879864240154486, "grad_norm": 1.1875, "learning_rate": 0.0001896371408074022, "loss": 2.9565, "step": 7878 }, { "epoch": 0.3688454561413775, "grad_norm": 1.3046875, "learning_rate": 0.00018963455294383758, "loss": 3.2768, "step": 7879 }, { "epoch": 0.36889226988121016, "grad_norm": 1.0390625, "learning_rate": 0.00018963196477484842, "loss": 2.4631, "step": 7880 }, { "epoch": 0.3689390836210428, "grad_norm": 1.34375, "learning_rate": 0.00018962937630044352, "loss": 3.0757, "step": 7881 }, { "epoch": 0.3689858973608754, "grad_norm": 1.421875, "learning_rate": 0.00018962678752063166, "loss": 3.2874, "step": 7882 }, { "epoch": 0.36903271110070807, "grad_norm": 1.4453125, "learning_rate": 0.00018962419843542177, "loss": 3.3798, "step": 7883 }, { "epoch": 0.3690795248405407, "grad_norm": 1.9921875, "learning_rate": 0.00018962160904482253, "loss": 3.3342, "step": 7884 }, { "epoch": 0.36912633858037336, "grad_norm": 1.171875, "learning_rate": 0.00018961901934884287, "loss": 2.802, "step": 7885 }, { "epoch": 0.369173152320206, "grad_norm": 1.1953125, "learning_rate": 0.00018961642934749162, "loss": 2.9557, "step": 7886 }, { "epoch": 0.3692199660600386, "grad_norm": 1.1953125, "learning_rate": 0.00018961383904077752, "loss": 3.1204, "step": 7887 }, { "epoch": 0.36926677979987127, "grad_norm": 1.6953125, "learning_rate": 0.00018961124842870945, "loss": 4.906, "step": 7888 }, { "epoch": 0.3693135935397039, "grad_norm": 1.40625, "learning_rate": 0.00018960865751129627, "loss": 3.1429, "step": 7889 }, { "epoch": 0.36936040727953656, "grad_norm": 1.4921875, "learning_rate": 0.00018960606628854671, "loss": 3.2419, "step": 7890 }, { "epoch": 0.3694072210193692, "grad_norm": 1.5859375, "learning_rate": 0.0001896034747604697, "loss": 3.527, "step": 7891 }, { "epoch": 0.3694540347592018, "grad_norm": 1.21875, "learning_rate": 0.000189600882927074, "loss": 2.734, "step": 7892 }, { "epoch": 0.3695008484990345, "grad_norm": 1.3828125, "learning_rate": 0.00018959829078836848, "loss": 3.2847, "step": 7893 }, { "epoch": 0.3695476622388671, "grad_norm": 1.765625, "learning_rate": 0.00018959569834436197, "loss": 3.2851, "step": 7894 }, { "epoch": 0.36959447597869977, "grad_norm": 1.640625, "learning_rate": 0.00018959310559506332, "loss": 2.8164, "step": 7895 }, { "epoch": 0.3696412897185324, "grad_norm": 1.2734375, "learning_rate": 0.00018959051254048128, "loss": 3.0642, "step": 7896 }, { "epoch": 0.369688103458365, "grad_norm": 1.109375, "learning_rate": 0.00018958791918062483, "loss": 3.1116, "step": 7897 }, { "epoch": 0.3697349171981977, "grad_norm": 1.4140625, "learning_rate": 0.00018958532551550264, "loss": 3.0886, "step": 7898 }, { "epoch": 0.3697817309380303, "grad_norm": 1.5078125, "learning_rate": 0.0001895827315451237, "loss": 2.7488, "step": 7899 }, { "epoch": 0.36982854467786297, "grad_norm": 1.1640625, "learning_rate": 0.00018958013726949672, "loss": 2.7507, "step": 7900 }, { "epoch": 0.3698753584176956, "grad_norm": 1.515625, "learning_rate": 0.00018957754268863065, "loss": 3.0125, "step": 7901 }, { "epoch": 0.36992217215752826, "grad_norm": 1.3515625, "learning_rate": 0.00018957494780253424, "loss": 3.7398, "step": 7902 }, { "epoch": 0.3699689858973609, "grad_norm": 1.15625, "learning_rate": 0.0001895723526112164, "loss": 2.635, "step": 7903 }, { "epoch": 0.3700157996371935, "grad_norm": 1.4296875, "learning_rate": 0.00018956975711468594, "loss": 3.049, "step": 7904 }, { "epoch": 0.3700626133770262, "grad_norm": 1.265625, "learning_rate": 0.0001895671613129517, "loss": 3.2298, "step": 7905 }, { "epoch": 0.3701094271168588, "grad_norm": 1.4140625, "learning_rate": 0.00018956456520602256, "loss": 3.0016, "step": 7906 }, { "epoch": 0.37015624085669147, "grad_norm": 1.2265625, "learning_rate": 0.00018956196879390733, "loss": 3.1685, "step": 7907 }, { "epoch": 0.3702030545965241, "grad_norm": 2.359375, "learning_rate": 0.00018955937207661486, "loss": 3.3292, "step": 7908 }, { "epoch": 0.3702498683363567, "grad_norm": 1.140625, "learning_rate": 0.000189556775054154, "loss": 3.1123, "step": 7909 }, { "epoch": 0.3702966820761894, "grad_norm": 1.34375, "learning_rate": 0.00018955417772653364, "loss": 3.0243, "step": 7910 }, { "epoch": 0.370343495816022, "grad_norm": 1.9375, "learning_rate": 0.00018955158009376256, "loss": 3.6597, "step": 7911 }, { "epoch": 0.37039030955585467, "grad_norm": 1.5078125, "learning_rate": 0.00018954898215584965, "loss": 3.1214, "step": 7912 }, { "epoch": 0.3704371232956873, "grad_norm": 1.4140625, "learning_rate": 0.00018954638391280377, "loss": 3.1402, "step": 7913 }, { "epoch": 0.3704839370355199, "grad_norm": 1.3203125, "learning_rate": 0.00018954378536463376, "loss": 3.0955, "step": 7914 }, { "epoch": 0.3705307507753526, "grad_norm": 1.8984375, "learning_rate": 0.0001895411865113485, "loss": 3.2067, "step": 7915 }, { "epoch": 0.3705775645151852, "grad_norm": 1.125, "learning_rate": 0.0001895385873529568, "loss": 2.4003, "step": 7916 }, { "epoch": 0.3706243782550179, "grad_norm": 1.171875, "learning_rate": 0.00018953598788946753, "loss": 3.554, "step": 7917 }, { "epoch": 0.3706711919948505, "grad_norm": 1.2578125, "learning_rate": 0.00018953338812088957, "loss": 2.8532, "step": 7918 }, { "epoch": 0.3707180057346831, "grad_norm": 1.109375, "learning_rate": 0.00018953078804723176, "loss": 3.1858, "step": 7919 }, { "epoch": 0.3707648194745158, "grad_norm": 1.765625, "learning_rate": 0.00018952818766850298, "loss": 2.9899, "step": 7920 }, { "epoch": 0.3708116332143484, "grad_norm": 1.203125, "learning_rate": 0.00018952558698471204, "loss": 4.8848, "step": 7921 }, { "epoch": 0.3708584469541811, "grad_norm": 1.7421875, "learning_rate": 0.00018952298599586787, "loss": 3.035, "step": 7922 }, { "epoch": 0.3709052606940137, "grad_norm": 1.2578125, "learning_rate": 0.0001895203847019793, "loss": 3.3213, "step": 7923 }, { "epoch": 0.3709520744338463, "grad_norm": 1.328125, "learning_rate": 0.00018951778310305516, "loss": 3.0806, "step": 7924 }, { "epoch": 0.370998888173679, "grad_norm": 1.453125, "learning_rate": 0.00018951518119910438, "loss": 3.3549, "step": 7925 }, { "epoch": 0.3710457019135116, "grad_norm": 1.25, "learning_rate": 0.00018951257899013578, "loss": 3.2946, "step": 7926 }, { "epoch": 0.3710925156533443, "grad_norm": 1.25, "learning_rate": 0.00018950997647615826, "loss": 2.8236, "step": 7927 }, { "epoch": 0.3711393293931769, "grad_norm": 1.4453125, "learning_rate": 0.00018950737365718066, "loss": 3.0211, "step": 7928 }, { "epoch": 0.3711861431330095, "grad_norm": 1.5390625, "learning_rate": 0.00018950477053321184, "loss": 3.4262, "step": 7929 }, { "epoch": 0.3712329568728422, "grad_norm": 1.1484375, "learning_rate": 0.00018950216710426072, "loss": 3.1108, "step": 7930 }, { "epoch": 0.3712797706126748, "grad_norm": 1.4453125, "learning_rate": 0.00018949956337033612, "loss": 3.2274, "step": 7931 }, { "epoch": 0.3713265843525075, "grad_norm": 1.53125, "learning_rate": 0.00018949695933144693, "loss": 3.0472, "step": 7932 }, { "epoch": 0.3713733980923401, "grad_norm": 1.375, "learning_rate": 0.00018949435498760204, "loss": 3.2736, "step": 7933 }, { "epoch": 0.3714202118321727, "grad_norm": 1.4453125, "learning_rate": 0.00018949175033881028, "loss": 3.2612, "step": 7934 }, { "epoch": 0.3714670255720054, "grad_norm": 1.34375, "learning_rate": 0.0001894891453850806, "loss": 3.1843, "step": 7935 }, { "epoch": 0.371513839311838, "grad_norm": 1.8515625, "learning_rate": 0.00018948654012642177, "loss": 3.6272, "step": 7936 }, { "epoch": 0.3715606530516707, "grad_norm": 1.140625, "learning_rate": 0.00018948393456284275, "loss": 3.2064, "step": 7937 }, { "epoch": 0.3716074667915033, "grad_norm": 1.203125, "learning_rate": 0.00018948132869435241, "loss": 3.0971, "step": 7938 }, { "epoch": 0.3716542805313359, "grad_norm": 1.8671875, "learning_rate": 0.0001894787225209596, "loss": 3.2495, "step": 7939 }, { "epoch": 0.3717010942711686, "grad_norm": 1.1484375, "learning_rate": 0.00018947611604267325, "loss": 3.1789, "step": 7940 }, { "epoch": 0.3717479080110012, "grad_norm": 1.4140625, "learning_rate": 0.00018947350925950217, "loss": 3.4379, "step": 7941 }, { "epoch": 0.3717947217508339, "grad_norm": 1.65625, "learning_rate": 0.00018947090217145526, "loss": 3.298, "step": 7942 }, { "epoch": 0.3718415354906665, "grad_norm": 1.3046875, "learning_rate": 0.00018946829477854146, "loss": 3.0474, "step": 7943 }, { "epoch": 0.3718883492304991, "grad_norm": 1.046875, "learning_rate": 0.00018946568708076963, "loss": 3.2576, "step": 7944 }, { "epoch": 0.3719351629703318, "grad_norm": 1.3828125, "learning_rate": 0.0001894630790781486, "loss": 2.979, "step": 7945 }, { "epoch": 0.3719819767101644, "grad_norm": 1.3828125, "learning_rate": 0.00018946047077068732, "loss": 3.0969, "step": 7946 }, { "epoch": 0.3720287904499971, "grad_norm": 1.1796875, "learning_rate": 0.00018945786215839464, "loss": 3.2545, "step": 7947 }, { "epoch": 0.3720756041898297, "grad_norm": 1.4375, "learning_rate": 0.00018945525324127948, "loss": 3.0183, "step": 7948 }, { "epoch": 0.37212241792966233, "grad_norm": 1.40625, "learning_rate": 0.00018945264401935074, "loss": 3.0744, "step": 7949 }, { "epoch": 0.372169231669495, "grad_norm": 1.234375, "learning_rate": 0.00018945003449261726, "loss": 3.3053, "step": 7950 }, { "epoch": 0.3722160454093276, "grad_norm": 1.0625, "learning_rate": 0.00018944742466108796, "loss": 3.3414, "step": 7951 }, { "epoch": 0.3722628591491603, "grad_norm": 1.359375, "learning_rate": 0.00018944481452477172, "loss": 3.2627, "step": 7952 }, { "epoch": 0.3723096728889929, "grad_norm": 1.421875, "learning_rate": 0.00018944220408367745, "loss": 3.1797, "step": 7953 }, { "epoch": 0.37235648662882553, "grad_norm": 1.375, "learning_rate": 0.00018943959333781403, "loss": 3.1451, "step": 7954 }, { "epoch": 0.3724033003686582, "grad_norm": 1.59375, "learning_rate": 0.0001894369822871904, "loss": 3.107, "step": 7955 }, { "epoch": 0.3724501141084908, "grad_norm": 1.1953125, "learning_rate": 0.00018943437093181542, "loss": 3.0406, "step": 7956 }, { "epoch": 0.3724969278483235, "grad_norm": 1.390625, "learning_rate": 0.00018943175927169797, "loss": 2.7803, "step": 7957 }, { "epoch": 0.3725437415881561, "grad_norm": 1.3515625, "learning_rate": 0.00018942914730684697, "loss": 3.0684, "step": 7958 }, { "epoch": 0.37259055532798874, "grad_norm": 2.25, "learning_rate": 0.00018942653503727132, "loss": 3.4723, "step": 7959 }, { "epoch": 0.3726373690678214, "grad_norm": 1.3828125, "learning_rate": 0.00018942392246297992, "loss": 3.1705, "step": 7960 }, { "epoch": 0.37268418280765403, "grad_norm": 1.3046875, "learning_rate": 0.0001894213095839817, "loss": 3.5438, "step": 7961 }, { "epoch": 0.3727309965474867, "grad_norm": 1.5234375, "learning_rate": 0.00018941869640028548, "loss": 3.1602, "step": 7962 }, { "epoch": 0.3727778102873193, "grad_norm": 1.25, "learning_rate": 0.00018941608291190028, "loss": 3.4596, "step": 7963 }, { "epoch": 0.372824624027152, "grad_norm": 2.3125, "learning_rate": 0.0001894134691188349, "loss": 2.3148, "step": 7964 }, { "epoch": 0.3728714377669846, "grad_norm": 1.1953125, "learning_rate": 0.00018941085502109832, "loss": 3.244, "step": 7965 }, { "epoch": 0.37291825150681723, "grad_norm": 1.2578125, "learning_rate": 0.00018940824061869943, "loss": 3.3958, "step": 7966 }, { "epoch": 0.3729650652466499, "grad_norm": 1.203125, "learning_rate": 0.0001894056259116471, "loss": 3.1517, "step": 7967 }, { "epoch": 0.3730118789864825, "grad_norm": 1.3671875, "learning_rate": 0.00018940301089995028, "loss": 3.1705, "step": 7968 }, { "epoch": 0.3730586927263152, "grad_norm": 1.109375, "learning_rate": 0.0001894003955836179, "loss": 4.5435, "step": 7969 }, { "epoch": 0.3731055064661478, "grad_norm": 1.65625, "learning_rate": 0.00018939777996265878, "loss": 3.6652, "step": 7970 }, { "epoch": 0.37315232020598044, "grad_norm": 1.5234375, "learning_rate": 0.0001893951640370819, "loss": 3.0278, "step": 7971 }, { "epoch": 0.3731991339458131, "grad_norm": 1.7890625, "learning_rate": 0.0001893925478068962, "loss": 3.145, "step": 7972 }, { "epoch": 0.37324594768564573, "grad_norm": 1.8125, "learning_rate": 0.00018938993127211052, "loss": 3.3447, "step": 7973 }, { "epoch": 0.3732927614254784, "grad_norm": 2.28125, "learning_rate": 0.00018938731443273387, "loss": 2.7266, "step": 7974 }, { "epoch": 0.373339575165311, "grad_norm": 1.53125, "learning_rate": 0.00018938469728877507, "loss": 3.2832, "step": 7975 }, { "epoch": 0.37338638890514364, "grad_norm": 1.59375, "learning_rate": 0.0001893820798402431, "loss": 3.512, "step": 7976 }, { "epoch": 0.3734332026449763, "grad_norm": 1.203125, "learning_rate": 0.00018937946208714686, "loss": 3.1224, "step": 7977 }, { "epoch": 0.37348001638480893, "grad_norm": 1.203125, "learning_rate": 0.00018937684402949527, "loss": 3.0908, "step": 7978 }, { "epoch": 0.3735268301246416, "grad_norm": 1.1640625, "learning_rate": 0.00018937422566729723, "loss": 3.0899, "step": 7979 }, { "epoch": 0.3735736438644742, "grad_norm": 1.2890625, "learning_rate": 0.00018937160700056168, "loss": 3.5286, "step": 7980 }, { "epoch": 0.37362045760430684, "grad_norm": 1.3984375, "learning_rate": 0.00018936898802929757, "loss": 3.1937, "step": 7981 }, { "epoch": 0.3736672713441395, "grad_norm": 1.2265625, "learning_rate": 0.00018936636875351378, "loss": 2.9256, "step": 7982 }, { "epoch": 0.37371408508397214, "grad_norm": 1.15625, "learning_rate": 0.0001893637491732193, "loss": 2.8189, "step": 7983 }, { "epoch": 0.3737608988238048, "grad_norm": 1.59375, "learning_rate": 0.00018936112928842296, "loss": 3.2998, "step": 7984 }, { "epoch": 0.37380771256363743, "grad_norm": 1.046875, "learning_rate": 0.00018935850909913374, "loss": 3.2243, "step": 7985 }, { "epoch": 0.37385452630347005, "grad_norm": 2.515625, "learning_rate": 0.00018935588860536056, "loss": 3.4087, "step": 7986 }, { "epoch": 0.3739013400433027, "grad_norm": 1.5078125, "learning_rate": 0.0001893532678071124, "loss": 2.9755, "step": 7987 }, { "epoch": 0.37394815378313534, "grad_norm": 1.359375, "learning_rate": 0.00018935064670439812, "loss": 3.3905, "step": 7988 }, { "epoch": 0.373994967522968, "grad_norm": 1.34375, "learning_rate": 0.00018934802529722665, "loss": 3.1125, "step": 7989 }, { "epoch": 0.37404178126280063, "grad_norm": 1.8515625, "learning_rate": 0.000189345403585607, "loss": 3.1381, "step": 7990 }, { "epoch": 0.37408859500263325, "grad_norm": 1.7734375, "learning_rate": 0.00018934278156954802, "loss": 3.3716, "step": 7991 }, { "epoch": 0.3741354087424659, "grad_norm": 1.40625, "learning_rate": 0.00018934015924905868, "loss": 2.9721, "step": 7992 }, { "epoch": 0.37418222248229854, "grad_norm": 1.359375, "learning_rate": 0.0001893375366241479, "loss": 3.483, "step": 7993 }, { "epoch": 0.3742290362221312, "grad_norm": 1.40625, "learning_rate": 0.00018933491369482462, "loss": 3.1252, "step": 7994 }, { "epoch": 0.37427584996196384, "grad_norm": 1.875, "learning_rate": 0.0001893322904610978, "loss": 3.44, "step": 7995 }, { "epoch": 0.37432266370179645, "grad_norm": 1.203125, "learning_rate": 0.0001893296669229764, "loss": 3.1131, "step": 7996 }, { "epoch": 0.37436947744162913, "grad_norm": 1.1875, "learning_rate": 0.00018932704308046927, "loss": 2.9492, "step": 7997 }, { "epoch": 0.37441629118146175, "grad_norm": 2.375, "learning_rate": 0.00018932441893358543, "loss": 3.3014, "step": 7998 }, { "epoch": 0.3744631049212944, "grad_norm": 1.5859375, "learning_rate": 0.0001893217944823338, "loss": 2.9254, "step": 7999 }, { "epoch": 0.37450991866112704, "grad_norm": 1.375, "learning_rate": 0.0001893191697267233, "loss": 3.0161, "step": 8000 }, { "epoch": 0.37455673240095966, "grad_norm": 1.4140625, "learning_rate": 0.0001893165446667629, "loss": 3.2099, "step": 8001 }, { "epoch": 0.37460354614079233, "grad_norm": 1.1796875, "learning_rate": 0.00018931391930246153, "loss": 3.2214, "step": 8002 }, { "epoch": 0.37465035988062495, "grad_norm": 1.2265625, "learning_rate": 0.00018931129363382817, "loss": 3.0242, "step": 8003 }, { "epoch": 0.3746971736204576, "grad_norm": 1.328125, "learning_rate": 0.0001893086676608717, "loss": 3.1393, "step": 8004 }, { "epoch": 0.37474398736029024, "grad_norm": 1.5546875, "learning_rate": 0.00018930604138360114, "loss": 3.3163, "step": 8005 }, { "epoch": 0.37479080110012286, "grad_norm": 1.390625, "learning_rate": 0.00018930341480202537, "loss": 3.2748, "step": 8006 }, { "epoch": 0.37483761483995554, "grad_norm": 1.40625, "learning_rate": 0.00018930078791615343, "loss": 2.9351, "step": 8007 }, { "epoch": 0.37488442857978815, "grad_norm": 1.1796875, "learning_rate": 0.0001892981607259942, "loss": 3.2327, "step": 8008 }, { "epoch": 0.3749312423196208, "grad_norm": 1.59375, "learning_rate": 0.0001892955332315566, "loss": 3.2299, "step": 8009 }, { "epoch": 0.37497805605945345, "grad_norm": 1.109375, "learning_rate": 0.00018929290543284968, "loss": 2.3021, "step": 8010 }, { "epoch": 0.37502486979928606, "grad_norm": 3.6875, "learning_rate": 0.0001892902773298823, "loss": 2.5279, "step": 8011 }, { "epoch": 0.37507168353911874, "grad_norm": 1.296875, "learning_rate": 0.0001892876489226635, "loss": 3.2061, "step": 8012 }, { "epoch": 0.37511849727895136, "grad_norm": 1.7421875, "learning_rate": 0.0001892850202112022, "loss": 3.5822, "step": 8013 }, { "epoch": 0.37516531101878403, "grad_norm": 2.25, "learning_rate": 0.00018928239119550737, "loss": 3.0744, "step": 8014 }, { "epoch": 0.37521212475861665, "grad_norm": 1.3125, "learning_rate": 0.00018927976187558792, "loss": 3.1291, "step": 8015 }, { "epoch": 0.37525893849844927, "grad_norm": 2.0, "learning_rate": 0.00018927713225145286, "loss": 3.4112, "step": 8016 }, { "epoch": 0.37530575223828194, "grad_norm": 1.5390625, "learning_rate": 0.0001892745023231111, "loss": 3.2531, "step": 8017 }, { "epoch": 0.37535256597811456, "grad_norm": 1.203125, "learning_rate": 0.00018927187209057166, "loss": 3.1844, "step": 8018 }, { "epoch": 0.37539937971794723, "grad_norm": 1.28125, "learning_rate": 0.00018926924155384346, "loss": 2.9037, "step": 8019 }, { "epoch": 0.37544619345777985, "grad_norm": 1.0078125, "learning_rate": 0.0001892666107129355, "loss": 2.6203, "step": 8020 }, { "epoch": 0.37549300719761247, "grad_norm": 1.46875, "learning_rate": 0.00018926397956785674, "loss": 2.8182, "step": 8021 }, { "epoch": 0.37553982093744515, "grad_norm": 1.6328125, "learning_rate": 0.0001892613481186161, "loss": 2.822, "step": 8022 }, { "epoch": 0.37558663467727776, "grad_norm": 1.1953125, "learning_rate": 0.00018925871636522258, "loss": 3.5373, "step": 8023 }, { "epoch": 0.37563344841711044, "grad_norm": 1.09375, "learning_rate": 0.00018925608430768516, "loss": 3.0637, "step": 8024 }, { "epoch": 0.37568026215694306, "grad_norm": 1.4296875, "learning_rate": 0.00018925345194601277, "loss": 3.3057, "step": 8025 }, { "epoch": 0.37572707589677573, "grad_norm": 1.453125, "learning_rate": 0.0001892508192802144, "loss": 3.1312, "step": 8026 }, { "epoch": 0.37577388963660835, "grad_norm": 2.609375, "learning_rate": 0.00018924818631029902, "loss": 3.6261, "step": 8027 }, { "epoch": 0.37582070337644097, "grad_norm": 2.0, "learning_rate": 0.00018924555303627562, "loss": 3.0357, "step": 8028 }, { "epoch": 0.37586751711627364, "grad_norm": 1.4296875, "learning_rate": 0.00018924291945815314, "loss": 3.132, "step": 8029 }, { "epoch": 0.37591433085610626, "grad_norm": 1.625, "learning_rate": 0.0001892402855759406, "loss": 3.5631, "step": 8030 }, { "epoch": 0.37596114459593893, "grad_norm": 1.2890625, "learning_rate": 0.0001892376513896469, "loss": 3.2332, "step": 8031 }, { "epoch": 0.37600795833577155, "grad_norm": 1.3125, "learning_rate": 0.0001892350168992811, "loss": 3.1535, "step": 8032 }, { "epoch": 0.37605477207560417, "grad_norm": 1.4765625, "learning_rate": 0.00018923238210485213, "loss": 3.3237, "step": 8033 }, { "epoch": 0.37610158581543685, "grad_norm": 1.109375, "learning_rate": 0.00018922974700636897, "loss": 3.06, "step": 8034 }, { "epoch": 0.37614839955526946, "grad_norm": 1.3203125, "learning_rate": 0.0001892271116038406, "loss": 2.7688, "step": 8035 }, { "epoch": 0.37619521329510214, "grad_norm": 1.2578125, "learning_rate": 0.00018922447589727603, "loss": 3.196, "step": 8036 }, { "epoch": 0.37624202703493476, "grad_norm": 1.296875, "learning_rate": 0.0001892218398866842, "loss": 2.1179, "step": 8037 }, { "epoch": 0.3762888407747674, "grad_norm": 1.3046875, "learning_rate": 0.0001892192035720741, "loss": 3.2694, "step": 8038 }, { "epoch": 0.37633565451460005, "grad_norm": 1.3515625, "learning_rate": 0.00018921656695345472, "loss": 3.2449, "step": 8039 }, { "epoch": 0.37638246825443267, "grad_norm": 1.3125, "learning_rate": 0.00018921393003083504, "loss": 3.1681, "step": 8040 }, { "epoch": 0.37642928199426534, "grad_norm": 1.1640625, "learning_rate": 0.0001892112928042241, "loss": 3.0813, "step": 8041 }, { "epoch": 0.37647609573409796, "grad_norm": 1.1640625, "learning_rate": 0.00018920865527363078, "loss": 2.957, "step": 8042 }, { "epoch": 0.3765229094739306, "grad_norm": 1.65625, "learning_rate": 0.00018920601743906414, "loss": 3.5859, "step": 8043 }, { "epoch": 0.37656972321376325, "grad_norm": 1.5859375, "learning_rate": 0.00018920337930053313, "loss": 3.4291, "step": 8044 }, { "epoch": 0.37661653695359587, "grad_norm": 1.4765625, "learning_rate": 0.00018920074085804683, "loss": 2.1293, "step": 8045 }, { "epoch": 0.37666335069342854, "grad_norm": 1.3828125, "learning_rate": 0.00018919810211161407, "loss": 3.0403, "step": 8046 }, { "epoch": 0.37671016443326116, "grad_norm": 1.3125, "learning_rate": 0.000189195463061244, "loss": 3.0536, "step": 8047 }, { "epoch": 0.3767569781730938, "grad_norm": 1.234375, "learning_rate": 0.00018919282370694553, "loss": 3.3194, "step": 8048 }, { "epoch": 0.37680379191292646, "grad_norm": 1.703125, "learning_rate": 0.00018919018404872767, "loss": 3.2959, "step": 8049 }, { "epoch": 0.3768506056527591, "grad_norm": 1.0390625, "learning_rate": 0.0001891875440865994, "loss": 3.015, "step": 8050 }, { "epoch": 0.37689741939259175, "grad_norm": 3.265625, "learning_rate": 0.00018918490382056972, "loss": 3.7946, "step": 8051 }, { "epoch": 0.37694423313242437, "grad_norm": 1.6953125, "learning_rate": 0.00018918226325064766, "loss": 2.7595, "step": 8052 }, { "epoch": 0.376991046872257, "grad_norm": 1.2265625, "learning_rate": 0.0001891796223768422, "loss": 3.328, "step": 8053 }, { "epoch": 0.37703786061208966, "grad_norm": 1.15625, "learning_rate": 0.0001891769811991623, "loss": 2.8496, "step": 8054 }, { "epoch": 0.3770846743519223, "grad_norm": 1.515625, "learning_rate": 0.000189174339717617, "loss": 3.4918, "step": 8055 }, { "epoch": 0.37713148809175495, "grad_norm": 1.59375, "learning_rate": 0.0001891716979322153, "loss": 3.2486, "step": 8056 }, { "epoch": 0.37717830183158757, "grad_norm": 1.6171875, "learning_rate": 0.00018916905584296617, "loss": 3.0584, "step": 8057 }, { "epoch": 0.3772251155714202, "grad_norm": 1.0234375, "learning_rate": 0.0001891664134498787, "loss": 3.0152, "step": 8058 }, { "epoch": 0.37727192931125286, "grad_norm": 1.046875, "learning_rate": 0.00018916377075296178, "loss": 1.8618, "step": 8059 }, { "epoch": 0.3773187430510855, "grad_norm": 1.8125, "learning_rate": 0.00018916112775222446, "loss": 3.5108, "step": 8060 }, { "epoch": 0.37736555679091816, "grad_norm": 1.3359375, "learning_rate": 0.00018915848444767578, "loss": 2.9944, "step": 8061 }, { "epoch": 0.3774123705307508, "grad_norm": 1.5703125, "learning_rate": 0.0001891558408393247, "loss": 3.9069, "step": 8062 }, { "epoch": 0.3774591842705834, "grad_norm": 1.3046875, "learning_rate": 0.00018915319692718025, "loss": 3.4304, "step": 8063 }, { "epoch": 0.37750599801041607, "grad_norm": 1.625, "learning_rate": 0.0001891505527112514, "loss": 3.3666, "step": 8064 }, { "epoch": 0.3775528117502487, "grad_norm": 1.234375, "learning_rate": 0.00018914790819154727, "loss": 3.4391, "step": 8065 }, { "epoch": 0.37759962549008136, "grad_norm": 1.984375, "learning_rate": 0.00018914526336807672, "loss": 4.0298, "step": 8066 }, { "epoch": 0.377646439229914, "grad_norm": 1.4375, "learning_rate": 0.0001891426182408489, "loss": 4.2923, "step": 8067 }, { "epoch": 0.3776932529697466, "grad_norm": 1.1171875, "learning_rate": 0.00018913997280987273, "loss": 3.4077, "step": 8068 }, { "epoch": 0.37774006670957927, "grad_norm": 2.109375, "learning_rate": 0.00018913732707515724, "loss": 3.4732, "step": 8069 }, { "epoch": 0.3777868804494119, "grad_norm": 0.99609375, "learning_rate": 0.0001891346810367115, "loss": 2.1936, "step": 8070 }, { "epoch": 0.37783369418924456, "grad_norm": 1.40625, "learning_rate": 0.00018913203469454444, "loss": 3.3862, "step": 8071 }, { "epoch": 0.3778805079290772, "grad_norm": 1.6796875, "learning_rate": 0.00018912938804866515, "loss": 3.4301, "step": 8072 }, { "epoch": 0.3779273216689098, "grad_norm": 1.3828125, "learning_rate": 0.00018912674109908265, "loss": 3.1375, "step": 8073 }, { "epoch": 0.3779741354087425, "grad_norm": 1.7578125, "learning_rate": 0.00018912409384580587, "loss": 3.3336, "step": 8074 }, { "epoch": 0.3780209491485751, "grad_norm": 1.671875, "learning_rate": 0.00018912144628884393, "loss": 3.1119, "step": 8075 }, { "epoch": 0.37806776288840777, "grad_norm": 1.2109375, "learning_rate": 0.00018911879842820582, "loss": 3.0153, "step": 8076 }, { "epoch": 0.3781145766282404, "grad_norm": 1.578125, "learning_rate": 0.00018911615026390055, "loss": 3.5954, "step": 8077 }, { "epoch": 0.378161390368073, "grad_norm": 1.3359375, "learning_rate": 0.0001891135017959371, "loss": 3.2172, "step": 8078 }, { "epoch": 0.3782082041079057, "grad_norm": 1.578125, "learning_rate": 0.00018911085302432464, "loss": 2.7261, "step": 8079 }, { "epoch": 0.3782550178477383, "grad_norm": 1.3671875, "learning_rate": 0.00018910820394907202, "loss": 2.5764, "step": 8080 }, { "epoch": 0.37830183158757097, "grad_norm": 1.0703125, "learning_rate": 0.00018910555457018837, "loss": 2.9912, "step": 8081 }, { "epoch": 0.3783486453274036, "grad_norm": 1.6640625, "learning_rate": 0.0001891029048876827, "loss": 2.9573, "step": 8082 }, { "epoch": 0.3783954590672362, "grad_norm": 1.2578125, "learning_rate": 0.000189100254901564, "loss": 3.2807, "step": 8083 }, { "epoch": 0.3784422728070689, "grad_norm": 1.5625, "learning_rate": 0.00018909760461184138, "loss": 3.3101, "step": 8084 }, { "epoch": 0.3784890865469015, "grad_norm": 1.2421875, "learning_rate": 0.0001890949540185238, "loss": 2.786, "step": 8085 }, { "epoch": 0.3785359002867342, "grad_norm": 1.15625, "learning_rate": 0.0001890923031216203, "loss": 3.0331, "step": 8086 }, { "epoch": 0.3785827140265668, "grad_norm": 1.171875, "learning_rate": 0.00018908965192113993, "loss": 3.0903, "step": 8087 }, { "epoch": 0.37862952776639947, "grad_norm": 1.3515625, "learning_rate": 0.00018908700041709176, "loss": 3.065, "step": 8088 }, { "epoch": 0.3786763415062321, "grad_norm": 1.640625, "learning_rate": 0.00018908434860948474, "loss": 3.1492, "step": 8089 }, { "epoch": 0.3787231552460647, "grad_norm": 1.1953125, "learning_rate": 0.000189081696498328, "loss": 3.1168, "step": 8090 }, { "epoch": 0.3787699689858974, "grad_norm": 1.515625, "learning_rate": 0.00018907904408363047, "loss": 2.9754, "step": 8091 }, { "epoch": 0.37881678272573, "grad_norm": 1.3828125, "learning_rate": 0.00018907639136540129, "loss": 3.2924, "step": 8092 }, { "epoch": 0.37886359646556267, "grad_norm": 1.203125, "learning_rate": 0.00018907373834364943, "loss": 3.0435, "step": 8093 }, { "epoch": 0.3789104102053953, "grad_norm": 1.515625, "learning_rate": 0.00018907108501838398, "loss": 3.6203, "step": 8094 }, { "epoch": 0.3789572239452279, "grad_norm": 1.296875, "learning_rate": 0.00018906843138961392, "loss": 3.3849, "step": 8095 }, { "epoch": 0.3790040376850606, "grad_norm": 1.640625, "learning_rate": 0.00018906577745734837, "loss": 3.0322, "step": 8096 }, { "epoch": 0.3790508514248932, "grad_norm": 1.2578125, "learning_rate": 0.00018906312322159629, "loss": 2.3836, "step": 8097 }, { "epoch": 0.37909766516472587, "grad_norm": 1.71875, "learning_rate": 0.0001890604686823668, "loss": 3.4277, "step": 8098 }, { "epoch": 0.3791444789045585, "grad_norm": 1.40625, "learning_rate": 0.0001890578138396689, "loss": 3.0114, "step": 8099 }, { "epoch": 0.3791912926443911, "grad_norm": 1.125, "learning_rate": 0.00018905515869351166, "loss": 3.1408, "step": 8100 }, { "epoch": 0.3792381063842238, "grad_norm": 1.2734375, "learning_rate": 0.00018905250324390406, "loss": 3.2708, "step": 8101 }, { "epoch": 0.3792849201240564, "grad_norm": 1.4609375, "learning_rate": 0.00018904984749085527, "loss": 2.8694, "step": 8102 }, { "epoch": 0.3793317338638891, "grad_norm": 1.265625, "learning_rate": 0.00018904719143437423, "loss": 3.5909, "step": 8103 }, { "epoch": 0.3793785476037217, "grad_norm": 1.7578125, "learning_rate": 0.00018904453507447003, "loss": 3.1618, "step": 8104 }, { "epoch": 0.3794253613435543, "grad_norm": 1.6328125, "learning_rate": 0.00018904187841115174, "loss": 3.0235, "step": 8105 }, { "epoch": 0.379472175083387, "grad_norm": 1.46875, "learning_rate": 0.00018903922144442842, "loss": 2.7977, "step": 8106 }, { "epoch": 0.3795189888232196, "grad_norm": 1.734375, "learning_rate": 0.00018903656417430906, "loss": 3.1998, "step": 8107 }, { "epoch": 0.3795658025630523, "grad_norm": 1.1640625, "learning_rate": 0.00018903390660080275, "loss": 3.3458, "step": 8108 }, { "epoch": 0.3796126163028849, "grad_norm": 1.2109375, "learning_rate": 0.00018903124872391856, "loss": 2.4913, "step": 8109 }, { "epoch": 0.3796594300427175, "grad_norm": 1.25, "learning_rate": 0.00018902859054366558, "loss": 2.6529, "step": 8110 }, { "epoch": 0.3797062437825502, "grad_norm": 1.125, "learning_rate": 0.00018902593206005278, "loss": 3.2755, "step": 8111 }, { "epoch": 0.3797530575223828, "grad_norm": 1.8984375, "learning_rate": 0.00018902327327308927, "loss": 3.404, "step": 8112 }, { "epoch": 0.3797998712622155, "grad_norm": 1.1875, "learning_rate": 0.00018902061418278412, "loss": 3.3214, "step": 8113 }, { "epoch": 0.3798466850020481, "grad_norm": 2.265625, "learning_rate": 0.00018901795478914636, "loss": 3.2214, "step": 8114 }, { "epoch": 0.3798934987418807, "grad_norm": 1.2890625, "learning_rate": 0.00018901529509218504, "loss": 2.8334, "step": 8115 }, { "epoch": 0.3799403124817134, "grad_norm": 1.25, "learning_rate": 0.00018901263509190928, "loss": 3.0828, "step": 8116 }, { "epoch": 0.379987126221546, "grad_norm": 1.40625, "learning_rate": 0.0001890099747883281, "loss": 3.1446, "step": 8117 }, { "epoch": 0.3800339399613787, "grad_norm": 1.265625, "learning_rate": 0.00018900731418145056, "loss": 3.1001, "step": 8118 }, { "epoch": 0.3800807537012113, "grad_norm": 2.46875, "learning_rate": 0.00018900465327128578, "loss": 3.0245, "step": 8119 }, { "epoch": 0.3801275674410439, "grad_norm": 1.25, "learning_rate": 0.00018900199205784275, "loss": 2.6232, "step": 8120 }, { "epoch": 0.3801743811808766, "grad_norm": 1.5859375, "learning_rate": 0.0001889993305411306, "loss": 3.4292, "step": 8121 }, { "epoch": 0.3802211949207092, "grad_norm": 1.8515625, "learning_rate": 0.00018899666872115837, "loss": 3.6067, "step": 8122 }, { "epoch": 0.3802680086605419, "grad_norm": 1.390625, "learning_rate": 0.00018899400659793512, "loss": 3.3444, "step": 8123 }, { "epoch": 0.3803148224003745, "grad_norm": 1.015625, "learning_rate": 0.00018899134417146995, "loss": 4.4435, "step": 8124 }, { "epoch": 0.3803616361402071, "grad_norm": 1.5625, "learning_rate": 0.00018898868144177192, "loss": 3.3867, "step": 8125 }, { "epoch": 0.3804084498800398, "grad_norm": 2.28125, "learning_rate": 0.0001889860184088501, "loss": 2.7971, "step": 8126 }, { "epoch": 0.3804552636198724, "grad_norm": 1.78125, "learning_rate": 0.0001889833550727136, "loss": 3.7169, "step": 8127 }, { "epoch": 0.3805020773597051, "grad_norm": 1.3984375, "learning_rate": 0.0001889806914333714, "loss": 3.6094, "step": 8128 }, { "epoch": 0.3805488910995377, "grad_norm": 1.296875, "learning_rate": 0.00018897802749083265, "loss": 3.0829, "step": 8129 }, { "epoch": 0.38059570483937033, "grad_norm": 1.1484375, "learning_rate": 0.00018897536324510644, "loss": 4.3796, "step": 8130 }, { "epoch": 0.380642518579203, "grad_norm": 1.5859375, "learning_rate": 0.0001889726986962018, "loss": 3.1532, "step": 8131 }, { "epoch": 0.3806893323190356, "grad_norm": 1.234375, "learning_rate": 0.00018897003384412786, "loss": 3.1271, "step": 8132 }, { "epoch": 0.3807361460588683, "grad_norm": 1.546875, "learning_rate": 0.00018896736868889366, "loss": 4.8958, "step": 8133 }, { "epoch": 0.3807829597987009, "grad_norm": 1.2109375, "learning_rate": 0.00018896470323050826, "loss": 3.1344, "step": 8134 }, { "epoch": 0.38082977353853353, "grad_norm": 1.421875, "learning_rate": 0.00018896203746898084, "loss": 2.951, "step": 8135 }, { "epoch": 0.3808765872783662, "grad_norm": 1.5625, "learning_rate": 0.00018895937140432038, "loss": 2.9403, "step": 8136 }, { "epoch": 0.3809234010181988, "grad_norm": 1.1875, "learning_rate": 0.000188956705036536, "loss": 3.0267, "step": 8137 }, { "epoch": 0.3809702147580315, "grad_norm": 0.984375, "learning_rate": 0.0001889540383656368, "loss": 3.2656, "step": 8138 }, { "epoch": 0.3810170284978641, "grad_norm": 1.5078125, "learning_rate": 0.00018895137139163186, "loss": 3.054, "step": 8139 }, { "epoch": 0.38106384223769674, "grad_norm": 1.734375, "learning_rate": 0.00018894870411453028, "loss": 3.2555, "step": 8140 }, { "epoch": 0.3811106559775294, "grad_norm": 1.8671875, "learning_rate": 0.0001889460365343411, "loss": 3.0728, "step": 8141 }, { "epoch": 0.38115746971736203, "grad_norm": 1.1953125, "learning_rate": 0.00018894336865107344, "loss": 3.176, "step": 8142 }, { "epoch": 0.3812042834571947, "grad_norm": 1.46875, "learning_rate": 0.00018894070046473642, "loss": 3.3724, "step": 8143 }, { "epoch": 0.3812510971970273, "grad_norm": 1.296875, "learning_rate": 0.0001889380319753391, "loss": 2.8406, "step": 8144 }, { "epoch": 0.38129791093686, "grad_norm": 1.6640625, "learning_rate": 0.00018893536318289055, "loss": 3.4735, "step": 8145 }, { "epoch": 0.3813447246766926, "grad_norm": 1.5859375, "learning_rate": 0.00018893269408739991, "loss": 3.5663, "step": 8146 }, { "epoch": 0.38139153841652523, "grad_norm": 1.8515625, "learning_rate": 0.00018893002468887624, "loss": 3.2058, "step": 8147 }, { "epoch": 0.3814383521563579, "grad_norm": 1.1484375, "learning_rate": 0.00018892735498732868, "loss": 3.1049, "step": 8148 }, { "epoch": 0.3814851658961905, "grad_norm": 1.8828125, "learning_rate": 0.00018892468498276626, "loss": 2.94, "step": 8149 }, { "epoch": 0.3815319796360232, "grad_norm": 1.3125, "learning_rate": 0.00018892201467519815, "loss": 3.4214, "step": 8150 }, { "epoch": 0.3815787933758558, "grad_norm": 1.7109375, "learning_rate": 0.00018891934406463343, "loss": 3.1672, "step": 8151 }, { "epoch": 0.38162560711568844, "grad_norm": 1.1875, "learning_rate": 0.00018891667315108115, "loss": 2.3742, "step": 8152 }, { "epoch": 0.3816724208555211, "grad_norm": 1.8125, "learning_rate": 0.00018891400193455043, "loss": 3.7283, "step": 8153 }, { "epoch": 0.38171923459535373, "grad_norm": 1.3203125, "learning_rate": 0.00018891133041505043, "loss": 3.3209, "step": 8154 }, { "epoch": 0.3817660483351864, "grad_norm": 1.421875, "learning_rate": 0.0001889086585925902, "loss": 3.0113, "step": 8155 }, { "epoch": 0.381812862075019, "grad_norm": 1.1484375, "learning_rate": 0.00018890598646717887, "loss": 3.1682, "step": 8156 }, { "epoch": 0.38185967581485164, "grad_norm": 1.3828125, "learning_rate": 0.0001889033140388255, "loss": 2.9457, "step": 8157 }, { "epoch": 0.3819064895546843, "grad_norm": 1.421875, "learning_rate": 0.0001889006413075392, "loss": 3.0568, "step": 8158 }, { "epoch": 0.38195330329451693, "grad_norm": 1.6328125, "learning_rate": 0.00018889796827332916, "loss": 3.5615, "step": 8159 }, { "epoch": 0.3820001170343496, "grad_norm": 1.0859375, "learning_rate": 0.00018889529493620438, "loss": 2.7927, "step": 8160 }, { "epoch": 0.3820469307741822, "grad_norm": 1.484375, "learning_rate": 0.00018889262129617406, "loss": 3.2413, "step": 8161 }, { "epoch": 0.38209374451401484, "grad_norm": 1.6328125, "learning_rate": 0.00018888994735324728, "loss": 3.0472, "step": 8162 }, { "epoch": 0.3821405582538475, "grad_norm": 1.40625, "learning_rate": 0.0001888872731074331, "loss": 3.0062, "step": 8163 }, { "epoch": 0.38218737199368014, "grad_norm": 1.2421875, "learning_rate": 0.00018888459855874072, "loss": 2.9121, "step": 8164 }, { "epoch": 0.3822341857335128, "grad_norm": 1.3828125, "learning_rate": 0.0001888819237071792, "loss": 3.4109, "step": 8165 }, { "epoch": 0.38228099947334543, "grad_norm": 1.4609375, "learning_rate": 0.00018887924855275762, "loss": 3.2749, "step": 8166 }, { "epoch": 0.38232781321317805, "grad_norm": 1.2265625, "learning_rate": 0.00018887657309548517, "loss": 2.8683, "step": 8167 }, { "epoch": 0.3823746269530107, "grad_norm": 1.109375, "learning_rate": 0.00018887389733537093, "loss": 2.9618, "step": 8168 }, { "epoch": 0.38242144069284334, "grad_norm": 1.0546875, "learning_rate": 0.00018887122127242404, "loss": 3.2365, "step": 8169 }, { "epoch": 0.382468254432676, "grad_norm": 1.5546875, "learning_rate": 0.00018886854490665357, "loss": 3.4844, "step": 8170 }, { "epoch": 0.38251506817250863, "grad_norm": 1.5703125, "learning_rate": 0.00018886586823806868, "loss": 3.0806, "step": 8171 }, { "epoch": 0.38256188191234125, "grad_norm": 1.25, "learning_rate": 0.00018886319126667852, "loss": 2.9479, "step": 8172 }, { "epoch": 0.3826086956521739, "grad_norm": 1.59375, "learning_rate": 0.00018886051399249213, "loss": 3.5208, "step": 8173 }, { "epoch": 0.38265550939200654, "grad_norm": 1.2578125, "learning_rate": 0.0001888578364155187, "loss": 3.1123, "step": 8174 }, { "epoch": 0.3827023231318392, "grad_norm": 1.2890625, "learning_rate": 0.00018885515853576728, "loss": 3.1427, "step": 8175 }, { "epoch": 0.38274913687167184, "grad_norm": 1.359375, "learning_rate": 0.00018885248035324707, "loss": 2.8774, "step": 8176 }, { "epoch": 0.38279595061150445, "grad_norm": 1.3984375, "learning_rate": 0.0001888498018679672, "loss": 3.2954, "step": 8177 }, { "epoch": 0.38284276435133713, "grad_norm": 1.3046875, "learning_rate": 0.00018884712307993672, "loss": 2.8325, "step": 8178 }, { "epoch": 0.38288957809116975, "grad_norm": 1.4765625, "learning_rate": 0.00018884444398916482, "loss": 3.2317, "step": 8179 }, { "epoch": 0.3829363918310024, "grad_norm": 1.59375, "learning_rate": 0.0001888417645956606, "loss": 2.7303, "step": 8180 }, { "epoch": 0.38298320557083504, "grad_norm": 1.4921875, "learning_rate": 0.00018883908489943324, "loss": 3.372, "step": 8181 }, { "epoch": 0.38303001931066766, "grad_norm": 1.46875, "learning_rate": 0.0001888364049004918, "loss": 3.4353, "step": 8182 }, { "epoch": 0.38307683305050033, "grad_norm": 1.546875, "learning_rate": 0.00018883372459884546, "loss": 2.7468, "step": 8183 }, { "epoch": 0.38312364679033295, "grad_norm": 1.4140625, "learning_rate": 0.00018883104399450336, "loss": 2.7504, "step": 8184 }, { "epoch": 0.3831704605301656, "grad_norm": 1.0546875, "learning_rate": 0.00018882836308747457, "loss": 3.0106, "step": 8185 }, { "epoch": 0.38321727426999824, "grad_norm": 1.4140625, "learning_rate": 0.00018882568187776827, "loss": 3.238, "step": 8186 }, { "epoch": 0.38326408800983086, "grad_norm": 1.5546875, "learning_rate": 0.00018882300036539363, "loss": 3.2418, "step": 8187 }, { "epoch": 0.38331090174966354, "grad_norm": 1.546875, "learning_rate": 0.00018882031855035973, "loss": 3.7591, "step": 8188 }, { "epoch": 0.38335771548949615, "grad_norm": 2.15625, "learning_rate": 0.00018881763643267572, "loss": 3.1293, "step": 8189 }, { "epoch": 0.3834045292293288, "grad_norm": 1.4921875, "learning_rate": 0.00018881495401235077, "loss": 3.1056, "step": 8190 }, { "epoch": 0.38345134296916145, "grad_norm": 1.0859375, "learning_rate": 0.00018881227128939395, "loss": 2.8516, "step": 8191 }, { "epoch": 0.38349815670899406, "grad_norm": 1.671875, "learning_rate": 0.0001888095882638145, "loss": 3.3056, "step": 8192 }, { "epoch": 0.38354497044882674, "grad_norm": 1.28125, "learning_rate": 0.0001888069049356215, "loss": 3.1296, "step": 8193 }, { "epoch": 0.38359178418865936, "grad_norm": 1.296875, "learning_rate": 0.0001888042213048241, "loss": 3.011, "step": 8194 }, { "epoch": 0.38363859792849203, "grad_norm": 1.2890625, "learning_rate": 0.00018880153737143145, "loss": 3.1095, "step": 8195 }, { "epoch": 0.38368541166832465, "grad_norm": 1.5859375, "learning_rate": 0.00018879885313545267, "loss": 3.1835, "step": 8196 }, { "epoch": 0.38373222540815727, "grad_norm": 1.25, "learning_rate": 0.00018879616859689694, "loss": 3.4166, "step": 8197 }, { "epoch": 0.38377903914798994, "grad_norm": 1.234375, "learning_rate": 0.00018879348375577343, "loss": 4.3411, "step": 8198 }, { "epoch": 0.38382585288782256, "grad_norm": 1.328125, "learning_rate": 0.00018879079861209124, "loss": 2.7542, "step": 8199 }, { "epoch": 0.38387266662765523, "grad_norm": 1.375, "learning_rate": 0.00018878811316585953, "loss": 2.7063, "step": 8200 }, { "epoch": 0.38391948036748785, "grad_norm": 1.140625, "learning_rate": 0.00018878542741708744, "loss": 3.3405, "step": 8201 }, { "epoch": 0.38396629410732047, "grad_norm": 1.3984375, "learning_rate": 0.0001887827413657842, "loss": 2.6941, "step": 8202 }, { "epoch": 0.38401310784715315, "grad_norm": 1.2578125, "learning_rate": 0.00018878005501195885, "loss": 3.2688, "step": 8203 }, { "epoch": 0.38405992158698576, "grad_norm": 1.953125, "learning_rate": 0.00018877736835562058, "loss": 3.067, "step": 8204 }, { "epoch": 0.38410673532681844, "grad_norm": 1.1953125, "learning_rate": 0.0001887746813967786, "loss": 2.7889, "step": 8205 }, { "epoch": 0.38415354906665106, "grad_norm": 1.3046875, "learning_rate": 0.000188771994135442, "loss": 4.006, "step": 8206 }, { "epoch": 0.38420036280648373, "grad_norm": 2.0625, "learning_rate": 0.00018876930657161999, "loss": 3.4108, "step": 8207 }, { "epoch": 0.38424717654631635, "grad_norm": 1.2421875, "learning_rate": 0.00018876661870532164, "loss": 3.5557, "step": 8208 }, { "epoch": 0.38429399028614897, "grad_norm": 1.203125, "learning_rate": 0.00018876393053655622, "loss": 3.1323, "step": 8209 }, { "epoch": 0.38434080402598164, "grad_norm": 1.3984375, "learning_rate": 0.0001887612420653328, "loss": 3.2279, "step": 8210 }, { "epoch": 0.38438761776581426, "grad_norm": 2.1875, "learning_rate": 0.00018875855329166061, "loss": 2.6039, "step": 8211 }, { "epoch": 0.38443443150564693, "grad_norm": 1.3203125, "learning_rate": 0.0001887558642155488, "loss": 2.875, "step": 8212 }, { "epoch": 0.38448124524547955, "grad_norm": 1.3125, "learning_rate": 0.00018875317483700647, "loss": 3.0747, "step": 8213 }, { "epoch": 0.38452805898531217, "grad_norm": 1.40625, "learning_rate": 0.00018875048515604284, "loss": 3.3342, "step": 8214 }, { "epoch": 0.38457487272514485, "grad_norm": 1.1796875, "learning_rate": 0.00018874779517266707, "loss": 3.1635, "step": 8215 }, { "epoch": 0.38462168646497746, "grad_norm": 1.7734375, "learning_rate": 0.0001887451048868883, "loss": 3.1747, "step": 8216 }, { "epoch": 0.38466850020481014, "grad_norm": 1.328125, "learning_rate": 0.0001887424142987157, "loss": 3.0205, "step": 8217 }, { "epoch": 0.38471531394464276, "grad_norm": 1.4375, "learning_rate": 0.0001887397234081585, "loss": 3.563, "step": 8218 }, { "epoch": 0.3847621276844754, "grad_norm": 1.2421875, "learning_rate": 0.0001887370322152258, "loss": 3.1709, "step": 8219 }, { "epoch": 0.38480894142430805, "grad_norm": 1.6171875, "learning_rate": 0.0001887343407199268, "loss": 3.3961, "step": 8220 }, { "epoch": 0.38485575516414067, "grad_norm": 1.515625, "learning_rate": 0.00018873164892227063, "loss": 3.1855, "step": 8221 }, { "epoch": 0.38490256890397334, "grad_norm": 1.296875, "learning_rate": 0.00018872895682226653, "loss": 2.9626, "step": 8222 }, { "epoch": 0.38494938264380596, "grad_norm": 1.46875, "learning_rate": 0.00018872626441992366, "loss": 3.2417, "step": 8223 }, { "epoch": 0.3849961963836386, "grad_norm": 1.609375, "learning_rate": 0.0001887235717152511, "loss": 3.5815, "step": 8224 }, { "epoch": 0.38504301012347125, "grad_norm": 0.9140625, "learning_rate": 0.00018872087870825817, "loss": 2.2371, "step": 8225 }, { "epoch": 0.38508982386330387, "grad_norm": 1.765625, "learning_rate": 0.0001887181853989539, "loss": 3.0402, "step": 8226 }, { "epoch": 0.38513663760313654, "grad_norm": 1.359375, "learning_rate": 0.00018871549178734763, "loss": 3.203, "step": 8227 }, { "epoch": 0.38518345134296916, "grad_norm": 1.2421875, "learning_rate": 0.0001887127978734484, "loss": 2.9874, "step": 8228 }, { "epoch": 0.3852302650828018, "grad_norm": 1.3125, "learning_rate": 0.00018871010365726542, "loss": 3.2017, "step": 8229 }, { "epoch": 0.38527707882263446, "grad_norm": 1.2421875, "learning_rate": 0.0001887074091388079, "loss": 3.3775, "step": 8230 }, { "epoch": 0.3853238925624671, "grad_norm": 1.3515625, "learning_rate": 0.00018870471431808506, "loss": 2.9839, "step": 8231 }, { "epoch": 0.38537070630229975, "grad_norm": 1.3515625, "learning_rate": 0.00018870201919510596, "loss": 3.4089, "step": 8232 }, { "epoch": 0.38541752004213237, "grad_norm": 1.4609375, "learning_rate": 0.00018869932376987993, "loss": 3.2037, "step": 8233 }, { "epoch": 0.385464333781965, "grad_norm": 1.203125, "learning_rate": 0.000188696628042416, "loss": 2.6471, "step": 8234 }, { "epoch": 0.38551114752179766, "grad_norm": 1.2734375, "learning_rate": 0.0001886939320127235, "loss": 3.2818, "step": 8235 }, { "epoch": 0.3855579612616303, "grad_norm": 1.375, "learning_rate": 0.00018869123568081156, "loss": 3.5189, "step": 8236 }, { "epoch": 0.38560477500146295, "grad_norm": 1.375, "learning_rate": 0.00018868853904668932, "loss": 3.0874, "step": 8237 }, { "epoch": 0.38565158874129557, "grad_norm": 1.3203125, "learning_rate": 0.00018868584211036602, "loss": 2.6905, "step": 8238 }, { "epoch": 0.3856984024811282, "grad_norm": 1.1953125, "learning_rate": 0.00018868314487185085, "loss": 3.2076, "step": 8239 }, { "epoch": 0.38574521622096086, "grad_norm": 1.234375, "learning_rate": 0.00018868044733115298, "loss": 2.9229, "step": 8240 }, { "epoch": 0.3857920299607935, "grad_norm": 8.8125, "learning_rate": 0.00018867774948828163, "loss": 4.184, "step": 8241 }, { "epoch": 0.38583884370062616, "grad_norm": 1.421875, "learning_rate": 0.00018867505134324594, "loss": 2.9921, "step": 8242 }, { "epoch": 0.3858856574404588, "grad_norm": 1.3515625, "learning_rate": 0.00018867235289605518, "loss": 3.171, "step": 8243 }, { "epoch": 0.3859324711802914, "grad_norm": 1.953125, "learning_rate": 0.00018866965414671846, "loss": 3.1832, "step": 8244 }, { "epoch": 0.38597928492012407, "grad_norm": 1.171875, "learning_rate": 0.0001886669550952451, "loss": 2.8763, "step": 8245 }, { "epoch": 0.3860260986599567, "grad_norm": 1.0625, "learning_rate": 0.00018866425574164412, "loss": 3.183, "step": 8246 }, { "epoch": 0.38607291239978936, "grad_norm": 1.109375, "learning_rate": 0.00018866155608592483, "loss": 2.8443, "step": 8247 }, { "epoch": 0.386119726139622, "grad_norm": 1.2265625, "learning_rate": 0.00018865885612809647, "loss": 2.9787, "step": 8248 }, { "epoch": 0.3861665398794546, "grad_norm": 1.75, "learning_rate": 0.00018865615586816814, "loss": 3.1836, "step": 8249 }, { "epoch": 0.38621335361928727, "grad_norm": 1.3203125, "learning_rate": 0.00018865345530614906, "loss": 3.1131, "step": 8250 }, { "epoch": 0.3862601673591199, "grad_norm": 1.59375, "learning_rate": 0.00018865075444204853, "loss": 2.9349, "step": 8251 }, { "epoch": 0.38630698109895256, "grad_norm": 1.1171875, "learning_rate": 0.0001886480532758756, "loss": 1.407, "step": 8252 }, { "epoch": 0.3863537948387852, "grad_norm": 1.7109375, "learning_rate": 0.00018864535180763961, "loss": 3.2193, "step": 8253 }, { "epoch": 0.3864006085786178, "grad_norm": 1.4609375, "learning_rate": 0.00018864265003734967, "loss": 3.3822, "step": 8254 }, { "epoch": 0.3864474223184505, "grad_norm": 1.484375, "learning_rate": 0.00018863994796501502, "loss": 3.0124, "step": 8255 }, { "epoch": 0.3864942360582831, "grad_norm": 0.9765625, "learning_rate": 0.0001886372455906449, "loss": 4.1605, "step": 8256 }, { "epoch": 0.38654104979811577, "grad_norm": 1.3125, "learning_rate": 0.00018863454291424847, "loss": 3.3524, "step": 8257 }, { "epoch": 0.3865878635379484, "grad_norm": 1.1796875, "learning_rate": 0.00018863183993583497, "loss": 5.2307, "step": 8258 }, { "epoch": 0.386634677277781, "grad_norm": 2.484375, "learning_rate": 0.0001886291366554136, "loss": 3.5304, "step": 8259 }, { "epoch": 0.3866814910176137, "grad_norm": 1.7421875, "learning_rate": 0.00018862643307299357, "loss": 3.2386, "step": 8260 }, { "epoch": 0.3867283047574463, "grad_norm": 1.4296875, "learning_rate": 0.0001886237291885841, "loss": 3.2628, "step": 8261 }, { "epoch": 0.38677511849727897, "grad_norm": 1.1171875, "learning_rate": 0.00018862102500219433, "loss": 2.7712, "step": 8262 }, { "epoch": 0.3868219322371116, "grad_norm": 1.4609375, "learning_rate": 0.00018861832051383362, "loss": 2.945, "step": 8263 }, { "epoch": 0.3868687459769442, "grad_norm": 1.578125, "learning_rate": 0.00018861561572351107, "loss": 3.2976, "step": 8264 }, { "epoch": 0.3869155597167769, "grad_norm": 1.6171875, "learning_rate": 0.00018861291063123595, "loss": 3.3504, "step": 8265 }, { "epoch": 0.3869623734566095, "grad_norm": 1.4296875, "learning_rate": 0.00018861020523701743, "loss": 3.3662, "step": 8266 }, { "epoch": 0.3870091871964422, "grad_norm": 1.1875, "learning_rate": 0.0001886074995408648, "loss": 3.1155, "step": 8267 }, { "epoch": 0.3870560009362748, "grad_norm": 1.4296875, "learning_rate": 0.00018860479354278718, "loss": 3.3301, "step": 8268 }, { "epoch": 0.38710281467610747, "grad_norm": 1.203125, "learning_rate": 0.00018860208724279392, "loss": 3.0865, "step": 8269 }, { "epoch": 0.3871496284159401, "grad_norm": 1.375, "learning_rate": 0.00018859938064089412, "loss": 3.1082, "step": 8270 }, { "epoch": 0.3871964421557727, "grad_norm": 1.5625, "learning_rate": 0.00018859667373709703, "loss": 3.2339, "step": 8271 }, { "epoch": 0.3872432558956054, "grad_norm": 1.3203125, "learning_rate": 0.00018859396653141195, "loss": 2.7321, "step": 8272 }, { "epoch": 0.387290069635438, "grad_norm": 1.1171875, "learning_rate": 0.00018859125902384802, "loss": 3.2015, "step": 8273 }, { "epoch": 0.38733688337527067, "grad_norm": 1.140625, "learning_rate": 0.0001885885512144145, "loss": 3.0485, "step": 8274 }, { "epoch": 0.3873836971151033, "grad_norm": 1.328125, "learning_rate": 0.00018858584310312061, "loss": 2.7351, "step": 8275 }, { "epoch": 0.3874305108549359, "grad_norm": 1.359375, "learning_rate": 0.00018858313468997557, "loss": 3.136, "step": 8276 }, { "epoch": 0.3874773245947686, "grad_norm": 2.3125, "learning_rate": 0.00018858042597498864, "loss": 3.7492, "step": 8277 }, { "epoch": 0.3875241383346012, "grad_norm": 1.7578125, "learning_rate": 0.00018857771695816904, "loss": 3.372, "step": 8278 }, { "epoch": 0.38757095207443387, "grad_norm": 1.7109375, "learning_rate": 0.000188575007639526, "loss": 3.1913, "step": 8279 }, { "epoch": 0.3876177658142665, "grad_norm": 1.15625, "learning_rate": 0.00018857229801906867, "loss": 3.0661, "step": 8280 }, { "epoch": 0.3876645795540991, "grad_norm": 1.5546875, "learning_rate": 0.0001885695880968064, "loss": 3.1274, "step": 8281 }, { "epoch": 0.3877113932939318, "grad_norm": 1.3203125, "learning_rate": 0.00018856687787274838, "loss": 3.502, "step": 8282 }, { "epoch": 0.3877582070337644, "grad_norm": 1.171875, "learning_rate": 0.00018856416734690386, "loss": 2.887, "step": 8283 }, { "epoch": 0.3878050207735971, "grad_norm": 1.15625, "learning_rate": 0.000188561456519282, "loss": 2.9744, "step": 8284 }, { "epoch": 0.3878518345134297, "grad_norm": 1.4609375, "learning_rate": 0.00018855874538989214, "loss": 3.2126, "step": 8285 }, { "epoch": 0.3878986482532623, "grad_norm": 1.2421875, "learning_rate": 0.00018855603395874347, "loss": 3.0446, "step": 8286 }, { "epoch": 0.387945461993095, "grad_norm": 1.53125, "learning_rate": 0.00018855332222584523, "loss": 3.262, "step": 8287 }, { "epoch": 0.3879922757329276, "grad_norm": 1.5546875, "learning_rate": 0.00018855061019120667, "loss": 3.3539, "step": 8288 }, { "epoch": 0.3880390894727603, "grad_norm": 1.203125, "learning_rate": 0.00018854789785483698, "loss": 4.1123, "step": 8289 }, { "epoch": 0.3880859032125929, "grad_norm": 1.21875, "learning_rate": 0.0001885451852167455, "loss": 3.3245, "step": 8290 }, { "epoch": 0.3881327169524255, "grad_norm": 1.234375, "learning_rate": 0.00018854247227694142, "loss": 2.9903, "step": 8291 }, { "epoch": 0.3881795306922582, "grad_norm": 1.5703125, "learning_rate": 0.00018853975903543394, "loss": 3.0199, "step": 8292 }, { "epoch": 0.3882263444320908, "grad_norm": 1.2421875, "learning_rate": 0.0001885370454922324, "loss": 3.0823, "step": 8293 }, { "epoch": 0.3882731581719235, "grad_norm": 1.3828125, "learning_rate": 0.00018853433164734597, "loss": 2.7408, "step": 8294 }, { "epoch": 0.3883199719117561, "grad_norm": 1.3984375, "learning_rate": 0.00018853161750078393, "loss": 3.4875, "step": 8295 }, { "epoch": 0.3883667856515887, "grad_norm": 1.5234375, "learning_rate": 0.00018852890305255554, "loss": 3.25, "step": 8296 }, { "epoch": 0.3884135993914214, "grad_norm": 1.546875, "learning_rate": 0.00018852618830266998, "loss": 2.9093, "step": 8297 }, { "epoch": 0.388460413131254, "grad_norm": 1.0234375, "learning_rate": 0.00018852347325113659, "loss": 2.3355, "step": 8298 }, { "epoch": 0.3885072268710867, "grad_norm": 1.3359375, "learning_rate": 0.00018852075789796454, "loss": 3.2472, "step": 8299 }, { "epoch": 0.3885540406109193, "grad_norm": 1.46875, "learning_rate": 0.00018851804224316316, "loss": 2.8694, "step": 8300 }, { "epoch": 0.3886008543507519, "grad_norm": 1.46875, "learning_rate": 0.00018851532628674168, "loss": 2.8565, "step": 8301 }, { "epoch": 0.3886476680905846, "grad_norm": 1.4375, "learning_rate": 0.0001885126100287093, "loss": 3.4214, "step": 8302 }, { "epoch": 0.3886944818304172, "grad_norm": 1.234375, "learning_rate": 0.00018850989346907534, "loss": 3.1018, "step": 8303 }, { "epoch": 0.3887412955702499, "grad_norm": 1.3984375, "learning_rate": 0.00018850717660784903, "loss": 3.146, "step": 8304 }, { "epoch": 0.3887881093100825, "grad_norm": 1.375, "learning_rate": 0.00018850445944503964, "loss": 3.3919, "step": 8305 }, { "epoch": 0.3888349230499151, "grad_norm": 1.3671875, "learning_rate": 0.00018850174198065643, "loss": 3.1435, "step": 8306 }, { "epoch": 0.3888817367897478, "grad_norm": 1.2265625, "learning_rate": 0.00018849902421470862, "loss": 3.152, "step": 8307 }, { "epoch": 0.3889285505295804, "grad_norm": 2.75, "learning_rate": 0.0001884963061472055, "loss": 2.1902, "step": 8308 }, { "epoch": 0.3889753642694131, "grad_norm": 1.75, "learning_rate": 0.00018849358777815636, "loss": 3.3679, "step": 8309 }, { "epoch": 0.3890221780092457, "grad_norm": 1.2265625, "learning_rate": 0.00018849086910757042, "loss": 3.2837, "step": 8310 }, { "epoch": 0.38906899174907833, "grad_norm": 1.25, "learning_rate": 0.00018848815013545697, "loss": 3.1612, "step": 8311 }, { "epoch": 0.389115805488911, "grad_norm": 1.9765625, "learning_rate": 0.00018848543086182523, "loss": 2.8516, "step": 8312 }, { "epoch": 0.3891626192287436, "grad_norm": 1.7890625, "learning_rate": 0.00018848271128668454, "loss": 2.8266, "step": 8313 }, { "epoch": 0.3892094329685763, "grad_norm": 1.6953125, "learning_rate": 0.0001884799914100441, "loss": 3.338, "step": 8314 }, { "epoch": 0.3892562467084089, "grad_norm": 1.3984375, "learning_rate": 0.00018847727123191322, "loss": 3.135, "step": 8315 }, { "epoch": 0.38930306044824153, "grad_norm": 1.1640625, "learning_rate": 0.00018847455075230113, "loss": 3.0287, "step": 8316 }, { "epoch": 0.3893498741880742, "grad_norm": 2.171875, "learning_rate": 0.00018847182997121714, "loss": 2.7265, "step": 8317 }, { "epoch": 0.3893966879279068, "grad_norm": 1.4453125, "learning_rate": 0.0001884691088886705, "loss": 3.5471, "step": 8318 }, { "epoch": 0.3894435016677395, "grad_norm": 1.46875, "learning_rate": 0.00018846638750467048, "loss": 4.3796, "step": 8319 }, { "epoch": 0.3894903154075721, "grad_norm": 2.546875, "learning_rate": 0.00018846366581922637, "loss": 3.134, "step": 8320 }, { "epoch": 0.38953712914740474, "grad_norm": 1.3359375, "learning_rate": 0.00018846094383234742, "loss": 3.0155, "step": 8321 }, { "epoch": 0.3895839428872374, "grad_norm": 1.7109375, "learning_rate": 0.00018845822154404292, "loss": 3.1419, "step": 8322 }, { "epoch": 0.38963075662707003, "grad_norm": 1.40625, "learning_rate": 0.00018845549895432217, "loss": 2.9783, "step": 8323 }, { "epoch": 0.3896775703669027, "grad_norm": 1.546875, "learning_rate": 0.00018845277606319437, "loss": 3.1347, "step": 8324 }, { "epoch": 0.3897243841067353, "grad_norm": 1.703125, "learning_rate": 0.0001884500528706689, "loss": 3.4434, "step": 8325 }, { "epoch": 0.38977119784656794, "grad_norm": 1.484375, "learning_rate": 0.00018844732937675494, "loss": 3.7577, "step": 8326 }, { "epoch": 0.3898180115864006, "grad_norm": 1.5703125, "learning_rate": 0.00018844460558146184, "loss": 2.9978, "step": 8327 }, { "epoch": 0.38986482532623323, "grad_norm": 1.5, "learning_rate": 0.00018844188148479886, "loss": 3.3098, "step": 8328 }, { "epoch": 0.3899116390660659, "grad_norm": 1.53125, "learning_rate": 0.0001884391570867753, "loss": 3.3607, "step": 8329 }, { "epoch": 0.3899584528058985, "grad_norm": 1.21875, "learning_rate": 0.00018843643238740038, "loss": 3.1083, "step": 8330 }, { "epoch": 0.3900052665457312, "grad_norm": 1.421875, "learning_rate": 0.00018843370738668347, "loss": 3.0058, "step": 8331 }, { "epoch": 0.3900520802855638, "grad_norm": 1.265625, "learning_rate": 0.0001884309820846338, "loss": 2.6244, "step": 8332 }, { "epoch": 0.39009889402539644, "grad_norm": 1.34375, "learning_rate": 0.00018842825648126066, "loss": 2.9097, "step": 8333 }, { "epoch": 0.3901457077652291, "grad_norm": 1.3046875, "learning_rate": 0.00018842553057657336, "loss": 3.0155, "step": 8334 }, { "epoch": 0.39019252150506173, "grad_norm": 1.0390625, "learning_rate": 0.00018842280437058115, "loss": 2.8599, "step": 8335 }, { "epoch": 0.3902393352448944, "grad_norm": 1.5625, "learning_rate": 0.00018842007786329336, "loss": 3.1591, "step": 8336 }, { "epoch": 0.390286148984727, "grad_norm": 2.171875, "learning_rate": 0.00018841735105471928, "loss": 3.323, "step": 8337 }, { "epoch": 0.39033296272455964, "grad_norm": 1.1953125, "learning_rate": 0.0001884146239448682, "loss": 4.0272, "step": 8338 }, { "epoch": 0.3903797764643923, "grad_norm": 1.109375, "learning_rate": 0.00018841189653374935, "loss": 3.0299, "step": 8339 }, { "epoch": 0.39042659020422493, "grad_norm": 1.8828125, "learning_rate": 0.0001884091688213721, "loss": 3.6346, "step": 8340 }, { "epoch": 0.3904734039440576, "grad_norm": 1.296875, "learning_rate": 0.00018840644080774573, "loss": 3.3143, "step": 8341 }, { "epoch": 0.3905202176838902, "grad_norm": 1.4296875, "learning_rate": 0.00018840371249287952, "loss": 3.0611, "step": 8342 }, { "epoch": 0.39056703142372284, "grad_norm": 1.4453125, "learning_rate": 0.00018840098387678275, "loss": 3.0426, "step": 8343 }, { "epoch": 0.3906138451635555, "grad_norm": 1.328125, "learning_rate": 0.00018839825495946472, "loss": 3.2005, "step": 8344 }, { "epoch": 0.39066065890338814, "grad_norm": 1.515625, "learning_rate": 0.0001883955257409348, "loss": 3.4725, "step": 8345 }, { "epoch": 0.3907074726432208, "grad_norm": 1.421875, "learning_rate": 0.0001883927962212022, "loss": 3.3712, "step": 8346 }, { "epoch": 0.39075428638305343, "grad_norm": 1.84375, "learning_rate": 0.00018839006640027629, "loss": 2.9339, "step": 8347 }, { "epoch": 0.39080110012288605, "grad_norm": 3.28125, "learning_rate": 0.0001883873362781663, "loss": 2.3661, "step": 8348 }, { "epoch": 0.3908479138627187, "grad_norm": 1.546875, "learning_rate": 0.00018838460585488158, "loss": 2.971, "step": 8349 }, { "epoch": 0.39089472760255134, "grad_norm": 1.7734375, "learning_rate": 0.00018838187513043142, "loss": 3.1358, "step": 8350 }, { "epoch": 0.390941541342384, "grad_norm": 1.3046875, "learning_rate": 0.00018837914410482513, "loss": 3.0691, "step": 8351 }, { "epoch": 0.39098835508221663, "grad_norm": 1.4375, "learning_rate": 0.00018837641277807203, "loss": 3.3224, "step": 8352 }, { "epoch": 0.39103516882204925, "grad_norm": 1.1484375, "learning_rate": 0.0001883736811501814, "loss": 3.1693, "step": 8353 }, { "epoch": 0.3910819825618819, "grad_norm": 1.453125, "learning_rate": 0.0001883709492211626, "loss": 2.9497, "step": 8354 }, { "epoch": 0.39112879630171454, "grad_norm": 0.9140625, "learning_rate": 0.00018836821699102485, "loss": 4.5931, "step": 8355 }, { "epoch": 0.3911756100415472, "grad_norm": 1.8984375, "learning_rate": 0.00018836548445977751, "loss": 3.3939, "step": 8356 }, { "epoch": 0.39122242378137984, "grad_norm": 1.28125, "learning_rate": 0.00018836275162742992, "loss": 2.9985, "step": 8357 }, { "epoch": 0.39126923752121245, "grad_norm": 1.1953125, "learning_rate": 0.00018836001849399132, "loss": 3.5087, "step": 8358 }, { "epoch": 0.39131605126104513, "grad_norm": 1.84375, "learning_rate": 0.0001883572850594711, "loss": 2.9966, "step": 8359 }, { "epoch": 0.39136286500087775, "grad_norm": 1.390625, "learning_rate": 0.0001883545513238785, "loss": 2.9367, "step": 8360 }, { "epoch": 0.3914096787407104, "grad_norm": 1.21875, "learning_rate": 0.0001883518172872229, "loss": 3.0255, "step": 8361 }, { "epoch": 0.39145649248054304, "grad_norm": 1.3125, "learning_rate": 0.00018834908294951359, "loss": 2.5386, "step": 8362 }, { "epoch": 0.39150330622037566, "grad_norm": 1.5, "learning_rate": 0.0001883463483107599, "loss": 3.4396, "step": 8363 }, { "epoch": 0.39155011996020833, "grad_norm": 1.203125, "learning_rate": 0.00018834361337097112, "loss": 2.791, "step": 8364 }, { "epoch": 0.39159693370004095, "grad_norm": 1.3046875, "learning_rate": 0.00018834087813015656, "loss": 2.9626, "step": 8365 }, { "epoch": 0.3916437474398736, "grad_norm": 1.1796875, "learning_rate": 0.0001883381425883256, "loss": 2.9619, "step": 8366 }, { "epoch": 0.39169056117970624, "grad_norm": 1.71875, "learning_rate": 0.00018833540674548753, "loss": 3.113, "step": 8367 }, { "epoch": 0.39173737491953886, "grad_norm": 1.296875, "learning_rate": 0.00018833267060165164, "loss": 3.1843, "step": 8368 }, { "epoch": 0.39178418865937154, "grad_norm": 1.5546875, "learning_rate": 0.0001883299341568273, "loss": 3.2482, "step": 8369 }, { "epoch": 0.39183100239920415, "grad_norm": 1.875, "learning_rate": 0.0001883271974110238, "loss": 2.9307, "step": 8370 }, { "epoch": 0.3918778161390368, "grad_norm": 1.28125, "learning_rate": 0.00018832446036425049, "loss": 3.0892, "step": 8371 }, { "epoch": 0.39192462987886945, "grad_norm": 1.4453125, "learning_rate": 0.00018832172301651667, "loss": 3.0881, "step": 8372 }, { "epoch": 0.39197144361870206, "grad_norm": 1.8203125, "learning_rate": 0.0001883189853678317, "loss": 3.4931, "step": 8373 }, { "epoch": 0.39201825735853474, "grad_norm": 1.0859375, "learning_rate": 0.00018831624741820488, "loss": 3.0083, "step": 8374 }, { "epoch": 0.39206507109836736, "grad_norm": 1.3125, "learning_rate": 0.00018831350916764556, "loss": 2.9326, "step": 8375 }, { "epoch": 0.39211188483820003, "grad_norm": 1.1328125, "learning_rate": 0.0001883107706161631, "loss": 3.5835, "step": 8376 }, { "epoch": 0.39215869857803265, "grad_norm": 1.3359375, "learning_rate": 0.00018830803176376673, "loss": 3.0256, "step": 8377 }, { "epoch": 0.39220551231786527, "grad_norm": 1.078125, "learning_rate": 0.00018830529261046588, "loss": 3.0918, "step": 8378 }, { "epoch": 0.39225232605769794, "grad_norm": 1.828125, "learning_rate": 0.00018830255315626984, "loss": 3.2342, "step": 8379 }, { "epoch": 0.39229913979753056, "grad_norm": 1.59375, "learning_rate": 0.00018829981340118796, "loss": 2.847, "step": 8380 }, { "epoch": 0.39234595353736323, "grad_norm": 1.5078125, "learning_rate": 0.00018829707334522958, "loss": 3.1071, "step": 8381 }, { "epoch": 0.39239276727719585, "grad_norm": 1.21875, "learning_rate": 0.000188294332988404, "loss": 3.1444, "step": 8382 }, { "epoch": 0.39243958101702847, "grad_norm": 1.125, "learning_rate": 0.0001882915923307206, "loss": 2.4924, "step": 8383 }, { "epoch": 0.39248639475686115, "grad_norm": 1.265625, "learning_rate": 0.0001882888513721887, "loss": 3.1338, "step": 8384 }, { "epoch": 0.39253320849669376, "grad_norm": 1.125, "learning_rate": 0.00018828611011281765, "loss": 3.2333, "step": 8385 }, { "epoch": 0.39258002223652644, "grad_norm": 1.671875, "learning_rate": 0.00018828336855261676, "loss": 3.2651, "step": 8386 }, { "epoch": 0.39262683597635906, "grad_norm": 1.09375, "learning_rate": 0.00018828062669159543, "loss": 2.927, "step": 8387 }, { "epoch": 0.3926736497161917, "grad_norm": 1.9453125, "learning_rate": 0.00018827788452976292, "loss": 3.9165, "step": 8388 }, { "epoch": 0.39272046345602435, "grad_norm": 1.4296875, "learning_rate": 0.00018827514206712866, "loss": 2.8865, "step": 8389 }, { "epoch": 0.39276727719585697, "grad_norm": 1.1875, "learning_rate": 0.00018827239930370196, "loss": 3.053, "step": 8390 }, { "epoch": 0.39281409093568964, "grad_norm": 1.640625, "learning_rate": 0.00018826965623949213, "loss": 3.0963, "step": 8391 }, { "epoch": 0.39286090467552226, "grad_norm": 1.3671875, "learning_rate": 0.00018826691287450857, "loss": 3.1456, "step": 8392 }, { "epoch": 0.39290771841535493, "grad_norm": 1.3125, "learning_rate": 0.0001882641692087606, "loss": 3.1231, "step": 8393 }, { "epoch": 0.39295453215518755, "grad_norm": 1.265625, "learning_rate": 0.0001882614252422576, "loss": 3.4538, "step": 8394 }, { "epoch": 0.39300134589502017, "grad_norm": 1.2265625, "learning_rate": 0.00018825868097500884, "loss": 3.4413, "step": 8395 }, { "epoch": 0.39304815963485285, "grad_norm": 1.4453125, "learning_rate": 0.00018825593640702377, "loss": 3.4488, "step": 8396 }, { "epoch": 0.39309497337468546, "grad_norm": 2.671875, "learning_rate": 0.0001882531915383117, "loss": 3.1716, "step": 8397 }, { "epoch": 0.39314178711451814, "grad_norm": 1.515625, "learning_rate": 0.00018825044636888196, "loss": 3.2723, "step": 8398 }, { "epoch": 0.39318860085435076, "grad_norm": 1.78125, "learning_rate": 0.00018824770089874394, "loss": 3.1033, "step": 8399 }, { "epoch": 0.3932354145941834, "grad_norm": 1.3125, "learning_rate": 0.00018824495512790695, "loss": 3.3591, "step": 8400 }, { "epoch": 0.39328222833401605, "grad_norm": 2.015625, "learning_rate": 0.00018824220905638042, "loss": 2.9399, "step": 8401 }, { "epoch": 0.39332904207384867, "grad_norm": 1.1484375, "learning_rate": 0.00018823946268417365, "loss": 3.0613, "step": 8402 }, { "epoch": 0.39337585581368134, "grad_norm": 2.53125, "learning_rate": 0.00018823671601129598, "loss": 2.7737, "step": 8403 }, { "epoch": 0.39342266955351396, "grad_norm": 1.953125, "learning_rate": 0.00018823396903775686, "loss": 3.7295, "step": 8404 }, { "epoch": 0.3934694832933466, "grad_norm": 1.7578125, "learning_rate": 0.00018823122176356554, "loss": 2.9603, "step": 8405 }, { "epoch": 0.39351629703317925, "grad_norm": 1.296875, "learning_rate": 0.00018822847418873144, "loss": 2.7159, "step": 8406 }, { "epoch": 0.39356311077301187, "grad_norm": 1.1171875, "learning_rate": 0.0001882257263132639, "loss": 2.7799, "step": 8407 }, { "epoch": 0.39360992451284454, "grad_norm": 1.796875, "learning_rate": 0.00018822297813717235, "loss": 3.1453, "step": 8408 }, { "epoch": 0.39365673825267716, "grad_norm": 1.0859375, "learning_rate": 0.00018822022966046606, "loss": 2.9717, "step": 8409 }, { "epoch": 0.3937035519925098, "grad_norm": 1.5390625, "learning_rate": 0.00018821748088315444, "loss": 3.5498, "step": 8410 }, { "epoch": 0.39375036573234246, "grad_norm": 1.34375, "learning_rate": 0.00018821473180524688, "loss": 3.3296, "step": 8411 }, { "epoch": 0.3937971794721751, "grad_norm": 1.3046875, "learning_rate": 0.0001882119824267527, "loss": 2.8574, "step": 8412 }, { "epoch": 0.39384399321200775, "grad_norm": 1.1796875, "learning_rate": 0.00018820923274768126, "loss": 2.8152, "step": 8413 }, { "epoch": 0.39389080695184037, "grad_norm": 1.5, "learning_rate": 0.000188206482768042, "loss": 3.225, "step": 8414 }, { "epoch": 0.393937620691673, "grad_norm": 1.7109375, "learning_rate": 0.00018820373248784424, "loss": 3.4392, "step": 8415 }, { "epoch": 0.39398443443150566, "grad_norm": 1.2265625, "learning_rate": 0.00018820098190709735, "loss": 2.1589, "step": 8416 }, { "epoch": 0.3940312481713383, "grad_norm": 1.3125, "learning_rate": 0.00018819823102581072, "loss": 3.2369, "step": 8417 }, { "epoch": 0.39407806191117095, "grad_norm": 1.15625, "learning_rate": 0.00018819547984399372, "loss": 2.9266, "step": 8418 }, { "epoch": 0.39412487565100357, "grad_norm": 1.3984375, "learning_rate": 0.00018819272836165571, "loss": 2.9774, "step": 8419 }, { "epoch": 0.3941716893908362, "grad_norm": 1.265625, "learning_rate": 0.0001881899765788061, "loss": 3.0149, "step": 8420 }, { "epoch": 0.39421850313066886, "grad_norm": 2.0, "learning_rate": 0.00018818722449545421, "loss": 3.3529, "step": 8421 }, { "epoch": 0.3942653168705015, "grad_norm": 1.9765625, "learning_rate": 0.00018818447211160945, "loss": 3.1954, "step": 8422 }, { "epoch": 0.39431213061033416, "grad_norm": 1.0703125, "learning_rate": 0.00018818171942728122, "loss": 2.8734, "step": 8423 }, { "epoch": 0.3943589443501668, "grad_norm": 1.2421875, "learning_rate": 0.00018817896644247886, "loss": 3.1397, "step": 8424 }, { "epoch": 0.3944057580899994, "grad_norm": 1.1953125, "learning_rate": 0.0001881762131572118, "loss": 2.7822, "step": 8425 }, { "epoch": 0.39445257182983207, "grad_norm": 1.546875, "learning_rate": 0.00018817345957148935, "loss": 3.3681, "step": 8426 }, { "epoch": 0.3944993855696647, "grad_norm": 2.359375, "learning_rate": 0.00018817070568532095, "loss": 2.9705, "step": 8427 }, { "epoch": 0.39454619930949736, "grad_norm": 1.390625, "learning_rate": 0.000188167951498716, "loss": 2.7301, "step": 8428 }, { "epoch": 0.39459301304933, "grad_norm": 1.796875, "learning_rate": 0.00018816519701168378, "loss": 2.9788, "step": 8429 }, { "epoch": 0.3946398267891626, "grad_norm": 1.7578125, "learning_rate": 0.0001881624422242338, "loss": 2.6738, "step": 8430 }, { "epoch": 0.39468664052899527, "grad_norm": 2.28125, "learning_rate": 0.00018815968713637539, "loss": 3.3515, "step": 8431 }, { "epoch": 0.3947334542688279, "grad_norm": 2.09375, "learning_rate": 0.00018815693174811796, "loss": 3.2512, "step": 8432 }, { "epoch": 0.39478026800866056, "grad_norm": 1.3359375, "learning_rate": 0.00018815417605947082, "loss": 3.4595, "step": 8433 }, { "epoch": 0.3948270817484932, "grad_norm": 1.5, "learning_rate": 0.00018815142007044347, "loss": 2.862, "step": 8434 }, { "epoch": 0.3948738954883258, "grad_norm": 1.5546875, "learning_rate": 0.00018814866378104523, "loss": 3.2052, "step": 8435 }, { "epoch": 0.3949207092281585, "grad_norm": 1.375, "learning_rate": 0.00018814590719128552, "loss": 2.9084, "step": 8436 }, { "epoch": 0.3949675229679911, "grad_norm": 1.171875, "learning_rate": 0.00018814315030117373, "loss": 3.1662, "step": 8437 }, { "epoch": 0.39501433670782377, "grad_norm": 1.3359375, "learning_rate": 0.00018814039311071924, "loss": 3.1419, "step": 8438 }, { "epoch": 0.3950611504476564, "grad_norm": 1.1875, "learning_rate": 0.00018813763561993145, "loss": 3.2228, "step": 8439 }, { "epoch": 0.395107964187489, "grad_norm": 1.1796875, "learning_rate": 0.0001881348778288198, "loss": 2.9065, "step": 8440 }, { "epoch": 0.3951547779273217, "grad_norm": 1.25, "learning_rate": 0.00018813211973739362, "loss": 2.8148, "step": 8441 }, { "epoch": 0.3952015916671543, "grad_norm": 1.265625, "learning_rate": 0.00018812936134566234, "loss": 3.1819, "step": 8442 }, { "epoch": 0.39524840540698697, "grad_norm": 1.1796875, "learning_rate": 0.00018812660265363532, "loss": 2.5261, "step": 8443 }, { "epoch": 0.3952952191468196, "grad_norm": 1.59375, "learning_rate": 0.00018812384366132207, "loss": 3.1904, "step": 8444 }, { "epoch": 0.3953420328866522, "grad_norm": 1.46875, "learning_rate": 0.00018812108436873186, "loss": 2.5354, "step": 8445 }, { "epoch": 0.3953888466264849, "grad_norm": 2.1875, "learning_rate": 0.00018811832477587417, "loss": 2.9747, "step": 8446 }, { "epoch": 0.3954356603663175, "grad_norm": 1.40625, "learning_rate": 0.00018811556488275836, "loss": 2.8377, "step": 8447 }, { "epoch": 0.3954824741061502, "grad_norm": 1.34375, "learning_rate": 0.00018811280468939389, "loss": 3.1086, "step": 8448 }, { "epoch": 0.3955292878459828, "grad_norm": 1.3515625, "learning_rate": 0.0001881100441957901, "loss": 3.2554, "step": 8449 }, { "epoch": 0.3955761015858154, "grad_norm": 1.5625, "learning_rate": 0.00018810728340195644, "loss": 3.0378, "step": 8450 }, { "epoch": 0.3956229153256481, "grad_norm": 1.53125, "learning_rate": 0.00018810452230790233, "loss": 3.0457, "step": 8451 }, { "epoch": 0.3956697290654807, "grad_norm": 1.28125, "learning_rate": 0.00018810176091363712, "loss": 2.9877, "step": 8452 }, { "epoch": 0.3957165428053134, "grad_norm": 1.2421875, "learning_rate": 0.00018809899921917026, "loss": 3.0473, "step": 8453 }, { "epoch": 0.395763356545146, "grad_norm": 1.28125, "learning_rate": 0.00018809623722451112, "loss": 3.2405, "step": 8454 }, { "epoch": 0.39581017028497867, "grad_norm": 1.3828125, "learning_rate": 0.00018809347492966918, "loss": 3.2573, "step": 8455 }, { "epoch": 0.3958569840248113, "grad_norm": 1.390625, "learning_rate": 0.00018809071233465382, "loss": 2.8358, "step": 8456 }, { "epoch": 0.3959037977646439, "grad_norm": 1.5703125, "learning_rate": 0.00018808794943947445, "loss": 2.9599, "step": 8457 }, { "epoch": 0.3959506115044766, "grad_norm": 1.34375, "learning_rate": 0.0001880851862441405, "loss": 3.1923, "step": 8458 }, { "epoch": 0.3959974252443092, "grad_norm": 1.421875, "learning_rate": 0.00018808242274866133, "loss": 2.9644, "step": 8459 }, { "epoch": 0.39604423898414187, "grad_norm": 1.375, "learning_rate": 0.0001880796589530464, "loss": 3.4255, "step": 8460 }, { "epoch": 0.3960910527239745, "grad_norm": 1.359375, "learning_rate": 0.00018807689485730517, "loss": 3.0875, "step": 8461 }, { "epoch": 0.3961378664638071, "grad_norm": 1.09375, "learning_rate": 0.00018807413046144692, "loss": 3.1492, "step": 8462 }, { "epoch": 0.3961846802036398, "grad_norm": 2.453125, "learning_rate": 0.00018807136576548127, "loss": 3.0548, "step": 8463 }, { "epoch": 0.3962314939434724, "grad_norm": 1.6015625, "learning_rate": 0.00018806860076941745, "loss": 2.5798, "step": 8464 }, { "epoch": 0.3962783076833051, "grad_norm": 1.453125, "learning_rate": 0.000188065835473265, "loss": 2.951, "step": 8465 }, { "epoch": 0.3963251214231377, "grad_norm": 1.328125, "learning_rate": 0.00018806306987703328, "loss": 3.1214, "step": 8466 }, { "epoch": 0.3963719351629703, "grad_norm": 1.34375, "learning_rate": 0.00018806030398073177, "loss": 3.3569, "step": 8467 }, { "epoch": 0.396418748902803, "grad_norm": 1.171875, "learning_rate": 0.00018805753778436984, "loss": 2.917, "step": 8468 }, { "epoch": 0.3964655626426356, "grad_norm": 1.4921875, "learning_rate": 0.00018805477128795698, "loss": 2.8095, "step": 8469 }, { "epoch": 0.3965123763824683, "grad_norm": 1.046875, "learning_rate": 0.00018805200449150254, "loss": 6.7954, "step": 8470 }, { "epoch": 0.3965591901223009, "grad_norm": 1.1328125, "learning_rate": 0.000188049237395016, "loss": 3.1766, "step": 8471 }, { "epoch": 0.3966060038621335, "grad_norm": 1.1484375, "learning_rate": 0.00018804646999850676, "loss": 2.9684, "step": 8472 }, { "epoch": 0.3966528176019662, "grad_norm": 1.1953125, "learning_rate": 0.00018804370230198427, "loss": 2.7612, "step": 8473 }, { "epoch": 0.3966996313417988, "grad_norm": 1.5234375, "learning_rate": 0.00018804093430545797, "loss": 2.9855, "step": 8474 }, { "epoch": 0.3967464450816315, "grad_norm": 1.390625, "learning_rate": 0.00018803816600893725, "loss": 3.2061, "step": 8475 }, { "epoch": 0.3967932588214641, "grad_norm": 1.5859375, "learning_rate": 0.00018803539741243157, "loss": 3.2852, "step": 8476 }, { "epoch": 0.3968400725612967, "grad_norm": 1.8359375, "learning_rate": 0.0001880326285159504, "loss": 2.7972, "step": 8477 }, { "epoch": 0.3968868863011294, "grad_norm": 1.1640625, "learning_rate": 0.00018802985931950307, "loss": 2.7508, "step": 8478 }, { "epoch": 0.396933700040962, "grad_norm": 1.6484375, "learning_rate": 0.00018802708982309913, "loss": 3.3265, "step": 8479 }, { "epoch": 0.3969805137807947, "grad_norm": 0.984375, "learning_rate": 0.00018802432002674798, "loss": 2.4705, "step": 8480 }, { "epoch": 0.3970273275206273, "grad_norm": 1.21875, "learning_rate": 0.000188021549930459, "loss": 3.1873, "step": 8481 }, { "epoch": 0.3970741412604599, "grad_norm": 1.2890625, "learning_rate": 0.0001880187795342417, "loss": 3.2249, "step": 8482 }, { "epoch": 0.3971209550002926, "grad_norm": 1.4609375, "learning_rate": 0.0001880160088381055, "loss": 4.1977, "step": 8483 }, { "epoch": 0.3971677687401252, "grad_norm": 1.265625, "learning_rate": 0.00018801323784205985, "loss": 2.8333, "step": 8484 }, { "epoch": 0.3972145824799579, "grad_norm": 1.1875, "learning_rate": 0.00018801046654611417, "loss": 3.2365, "step": 8485 }, { "epoch": 0.3972613962197905, "grad_norm": 1.3828125, "learning_rate": 0.0001880076949502779, "loss": 3.126, "step": 8486 }, { "epoch": 0.3973082099596231, "grad_norm": 1.1640625, "learning_rate": 0.0001880049230545605, "loss": 3.1344, "step": 8487 }, { "epoch": 0.3973550236994558, "grad_norm": 2.046875, "learning_rate": 0.0001880021508589714, "loss": 2.5207, "step": 8488 }, { "epoch": 0.3974018374392884, "grad_norm": 1.265625, "learning_rate": 0.0001879993783635201, "loss": 3.0936, "step": 8489 }, { "epoch": 0.3974486511791211, "grad_norm": 1.328125, "learning_rate": 0.000187996605568216, "loss": 3.0328, "step": 8490 }, { "epoch": 0.3974954649189537, "grad_norm": 1.4140625, "learning_rate": 0.00018799383247306852, "loss": 3.2442, "step": 8491 }, { "epoch": 0.39754227865878633, "grad_norm": 1.3828125, "learning_rate": 0.00018799105907808713, "loss": 2.8874, "step": 8492 }, { "epoch": 0.397589092398619, "grad_norm": 1.7890625, "learning_rate": 0.00018798828538328131, "loss": 2.8926, "step": 8493 }, { "epoch": 0.3976359061384516, "grad_norm": 1.21875, "learning_rate": 0.00018798551138866053, "loss": 3.3303, "step": 8494 }, { "epoch": 0.3976827198782843, "grad_norm": 1.609375, "learning_rate": 0.00018798273709423418, "loss": 2.464, "step": 8495 }, { "epoch": 0.3977295336181169, "grad_norm": 1.359375, "learning_rate": 0.00018797996250001172, "loss": 3.2991, "step": 8496 }, { "epoch": 0.39777634735794953, "grad_norm": 1.65625, "learning_rate": 0.00018797718760600263, "loss": 3.1787, "step": 8497 }, { "epoch": 0.3978231610977822, "grad_norm": 1.5, "learning_rate": 0.00018797441241221637, "loss": 3.3659, "step": 8498 }, { "epoch": 0.3978699748376148, "grad_norm": 1.390625, "learning_rate": 0.00018797163691866236, "loss": 3.5676, "step": 8499 }, { "epoch": 0.3979167885774475, "grad_norm": 1.03125, "learning_rate": 0.00018796886112535014, "loss": 3.1309, "step": 8500 }, { "epoch": 0.3979636023172801, "grad_norm": 1.234375, "learning_rate": 0.00018796608503228907, "loss": 3.1506, "step": 8501 }, { "epoch": 0.39801041605711274, "grad_norm": 1.5234375, "learning_rate": 0.00018796330863948864, "loss": 3.5719, "step": 8502 }, { "epoch": 0.3980572297969454, "grad_norm": 1.453125, "learning_rate": 0.00018796053194695836, "loss": 3.5986, "step": 8503 }, { "epoch": 0.39810404353677803, "grad_norm": 1.4765625, "learning_rate": 0.00018795775495470763, "loss": 3.0228, "step": 8504 }, { "epoch": 0.3981508572766107, "grad_norm": 2.375, "learning_rate": 0.00018795497766274592, "loss": 3.6341, "step": 8505 }, { "epoch": 0.3981976710164433, "grad_norm": 1.4609375, "learning_rate": 0.0001879522000710827, "loss": 3.1019, "step": 8506 }, { "epoch": 0.39824448475627594, "grad_norm": 1.28125, "learning_rate": 0.00018794942217972746, "loss": 4.5236, "step": 8507 }, { "epoch": 0.3982912984961086, "grad_norm": 1.4140625, "learning_rate": 0.00018794664398868966, "loss": 3.2904, "step": 8508 }, { "epoch": 0.39833811223594123, "grad_norm": 1.4296875, "learning_rate": 0.00018794386549797875, "loss": 3.2028, "step": 8509 }, { "epoch": 0.3983849259757739, "grad_norm": 1.5, "learning_rate": 0.00018794108670760419, "loss": 3.1102, "step": 8510 }, { "epoch": 0.3984317397156065, "grad_norm": 0.89453125, "learning_rate": 0.00018793830761757544, "loss": 3.4432, "step": 8511 }, { "epoch": 0.39847855345543914, "grad_norm": 1.25, "learning_rate": 0.00018793552822790206, "loss": 3.1353, "step": 8512 }, { "epoch": 0.3985253671952718, "grad_norm": 1.3984375, "learning_rate": 0.00018793274853859336, "loss": 2.7449, "step": 8513 }, { "epoch": 0.39857218093510444, "grad_norm": 1.3359375, "learning_rate": 0.000187929968549659, "loss": 3.0325, "step": 8514 }, { "epoch": 0.3986189946749371, "grad_norm": 1.6015625, "learning_rate": 0.00018792718826110826, "loss": 3.2825, "step": 8515 }, { "epoch": 0.39866580841476973, "grad_norm": 1.2890625, "learning_rate": 0.00018792440767295078, "loss": 3.326, "step": 8516 }, { "epoch": 0.3987126221546024, "grad_norm": 1.484375, "learning_rate": 0.00018792162678519592, "loss": 2.8122, "step": 8517 }, { "epoch": 0.398759435894435, "grad_norm": 1.359375, "learning_rate": 0.0001879188455978532, "loss": 3.1923, "step": 8518 }, { "epoch": 0.39880624963426764, "grad_norm": 1.796875, "learning_rate": 0.0001879160641109321, "loss": 3.3317, "step": 8519 }, { "epoch": 0.3988530633741003, "grad_norm": 1.609375, "learning_rate": 0.00018791328232444212, "loss": 3.3456, "step": 8520 }, { "epoch": 0.39889987711393293, "grad_norm": 1.3046875, "learning_rate": 0.00018791050023839268, "loss": 3.1117, "step": 8521 }, { "epoch": 0.3989466908537656, "grad_norm": 1.2265625, "learning_rate": 0.0001879077178527933, "loss": 2.8145, "step": 8522 }, { "epoch": 0.3989935045935982, "grad_norm": 1.8046875, "learning_rate": 0.0001879049351676535, "loss": 3.0753, "step": 8523 }, { "epoch": 0.39904031833343084, "grad_norm": 1.9296875, "learning_rate": 0.00018790215218298265, "loss": 3.488, "step": 8524 }, { "epoch": 0.3990871320732635, "grad_norm": 1.6640625, "learning_rate": 0.0001878993688987903, "loss": 3.2859, "step": 8525 }, { "epoch": 0.39913394581309614, "grad_norm": 3.0, "learning_rate": 0.00018789658531508598, "loss": 2.9669, "step": 8526 }, { "epoch": 0.3991807595529288, "grad_norm": 1.28125, "learning_rate": 0.0001878938014318791, "loss": 2.5517, "step": 8527 }, { "epoch": 0.39922757329276143, "grad_norm": 1.1953125, "learning_rate": 0.00018789101724917914, "loss": 2.9885, "step": 8528 }, { "epoch": 0.39927438703259405, "grad_norm": 1.3359375, "learning_rate": 0.00018788823276699566, "loss": 3.4018, "step": 8529 }, { "epoch": 0.3993212007724267, "grad_norm": 1.4765625, "learning_rate": 0.00018788544798533808, "loss": 2.7243, "step": 8530 }, { "epoch": 0.39936801451225934, "grad_norm": 1.546875, "learning_rate": 0.00018788266290421595, "loss": 3.303, "step": 8531 }, { "epoch": 0.399414828252092, "grad_norm": 1.3515625, "learning_rate": 0.0001878798775236387, "loss": 2.9219, "step": 8532 }, { "epoch": 0.39946164199192463, "grad_norm": 1.2578125, "learning_rate": 0.00018787709184361586, "loss": 3.4432, "step": 8533 }, { "epoch": 0.39950845573175725, "grad_norm": 1.609375, "learning_rate": 0.0001878743058641569, "loss": 3.3252, "step": 8534 }, { "epoch": 0.3995552694715899, "grad_norm": 1.4765625, "learning_rate": 0.00018787151958527134, "loss": 3.3609, "step": 8535 }, { "epoch": 0.39960208321142254, "grad_norm": 1.390625, "learning_rate": 0.00018786873300696862, "loss": 3.3526, "step": 8536 }, { "epoch": 0.3996488969512552, "grad_norm": 2.15625, "learning_rate": 0.00018786594612925833, "loss": 3.2463, "step": 8537 }, { "epoch": 0.39969571069108784, "grad_norm": 1.3984375, "learning_rate": 0.00018786315895214984, "loss": 2.9603, "step": 8538 }, { "epoch": 0.39974252443092045, "grad_norm": 1.421875, "learning_rate": 0.00018786037147565274, "loss": 2.7601, "step": 8539 }, { "epoch": 0.39978933817075313, "grad_norm": 1.1953125, "learning_rate": 0.00018785758369977652, "loss": 3.1495, "step": 8540 }, { "epoch": 0.39983615191058575, "grad_norm": 1.375, "learning_rate": 0.00018785479562453066, "loss": 2.8255, "step": 8541 }, { "epoch": 0.3998829656504184, "grad_norm": 1.3984375, "learning_rate": 0.00018785200724992464, "loss": 3.0372, "step": 8542 }, { "epoch": 0.39992977939025104, "grad_norm": 2.921875, "learning_rate": 0.000187849218575968, "loss": 2.6245, "step": 8543 }, { "epoch": 0.39997659313008366, "grad_norm": 1.5078125, "learning_rate": 0.0001878464296026702, "loss": 3.1955, "step": 8544 }, { "epoch": 0.40002340686991633, "grad_norm": 1.3125, "learning_rate": 0.0001878436403300408, "loss": 2.9183, "step": 8545 }, { "epoch": 0.40007022060974895, "grad_norm": 1.2421875, "learning_rate": 0.0001878408507580893, "loss": 2.9239, "step": 8546 }, { "epoch": 0.4001170343495816, "grad_norm": 1.2578125, "learning_rate": 0.0001878380608868251, "loss": 3.1514, "step": 8547 }, { "epoch": 0.40016384808941424, "grad_norm": 1.2421875, "learning_rate": 0.00018783527071625783, "loss": 2.8899, "step": 8548 }, { "epoch": 0.40021066182924686, "grad_norm": 1.5859375, "learning_rate": 0.00018783248024639696, "loss": 3.3568, "step": 8549 }, { "epoch": 0.40025747556907953, "grad_norm": 1.2109375, "learning_rate": 0.000187829689477252, "loss": 3.1641, "step": 8550 }, { "epoch": 0.40030428930891215, "grad_norm": 1.171875, "learning_rate": 0.0001878268984088324, "loss": 2.825, "step": 8551 }, { "epoch": 0.4003511030487448, "grad_norm": 1.28125, "learning_rate": 0.00018782410704114777, "loss": 3.2145, "step": 8552 }, { "epoch": 0.40039791678857745, "grad_norm": 1.1796875, "learning_rate": 0.00018782131537420753, "loss": 3.0603, "step": 8553 }, { "epoch": 0.40044473052841006, "grad_norm": 1.2890625, "learning_rate": 0.00018781852340802127, "loss": 3.0044, "step": 8554 }, { "epoch": 0.40049154426824274, "grad_norm": 1.28125, "learning_rate": 0.00018781573114259845, "loss": 3.4786, "step": 8555 }, { "epoch": 0.40053835800807536, "grad_norm": 1.2734375, "learning_rate": 0.0001878129385779486, "loss": 3.0326, "step": 8556 }, { "epoch": 0.40058517174790803, "grad_norm": 1.359375, "learning_rate": 0.00018781014571408124, "loss": 3.2268, "step": 8557 }, { "epoch": 0.40063198548774065, "grad_norm": 1.578125, "learning_rate": 0.00018780735255100587, "loss": 4.0975, "step": 8558 }, { "epoch": 0.40067879922757327, "grad_norm": 1.234375, "learning_rate": 0.00018780455908873206, "loss": 3.1637, "step": 8559 }, { "epoch": 0.40072561296740594, "grad_norm": 1.4296875, "learning_rate": 0.00018780176532726928, "loss": 2.9414, "step": 8560 }, { "epoch": 0.40077242670723856, "grad_norm": 1.265625, "learning_rate": 0.00018779897126662706, "loss": 3.133, "step": 8561 }, { "epoch": 0.40081924044707123, "grad_norm": 1.8046875, "learning_rate": 0.00018779617690681488, "loss": 3.4834, "step": 8562 }, { "epoch": 0.40086605418690385, "grad_norm": 1.296875, "learning_rate": 0.00018779338224784233, "loss": 3.3855, "step": 8563 }, { "epoch": 0.40091286792673647, "grad_norm": 1.28125, "learning_rate": 0.0001877905872897189, "loss": 2.8185, "step": 8564 }, { "epoch": 0.40095968166656915, "grad_norm": 1.3203125, "learning_rate": 0.00018778779203245413, "loss": 3.1942, "step": 8565 }, { "epoch": 0.40100649540640176, "grad_norm": 1.0234375, "learning_rate": 0.00018778499647605754, "loss": 2.814, "step": 8566 }, { "epoch": 0.40105330914623444, "grad_norm": 1.2734375, "learning_rate": 0.00018778220062053863, "loss": 3.0672, "step": 8567 }, { "epoch": 0.40110012288606706, "grad_norm": 1.921875, "learning_rate": 0.00018777940446590695, "loss": 3.0361, "step": 8568 }, { "epoch": 0.4011469366258997, "grad_norm": 1.734375, "learning_rate": 0.00018777660801217204, "loss": 2.9364, "step": 8569 }, { "epoch": 0.40119375036573235, "grad_norm": 1.6328125, "learning_rate": 0.0001877738112593434, "loss": 3.4433, "step": 8570 }, { "epoch": 0.40124056410556497, "grad_norm": 1.234375, "learning_rate": 0.00018777101420743055, "loss": 3.1504, "step": 8571 }, { "epoch": 0.40128737784539764, "grad_norm": 2.03125, "learning_rate": 0.00018776821685644306, "loss": 3.4043, "step": 8572 }, { "epoch": 0.40133419158523026, "grad_norm": 1.1171875, "learning_rate": 0.00018776541920639047, "loss": 2.7367, "step": 8573 }, { "epoch": 0.4013810053250629, "grad_norm": 1.25, "learning_rate": 0.00018776262125728223, "loss": 3.1865, "step": 8574 }, { "epoch": 0.40142781906489555, "grad_norm": 0.99609375, "learning_rate": 0.000187759823009128, "loss": 2.1623, "step": 8575 }, { "epoch": 0.40147463280472817, "grad_norm": 1.1484375, "learning_rate": 0.00018775702446193718, "loss": 3.121, "step": 8576 }, { "epoch": 0.40152144654456084, "grad_norm": 1.2578125, "learning_rate": 0.0001877542256157194, "loss": 3.1789, "step": 8577 }, { "epoch": 0.40156826028439346, "grad_norm": 1.34375, "learning_rate": 0.00018775142647048417, "loss": 3.3494, "step": 8578 }, { "epoch": 0.40161507402422614, "grad_norm": 1.3046875, "learning_rate": 0.00018774862702624102, "loss": 3.1324, "step": 8579 }, { "epoch": 0.40166188776405876, "grad_norm": 1.359375, "learning_rate": 0.00018774582728299952, "loss": 3.0491, "step": 8580 }, { "epoch": 0.4017087015038914, "grad_norm": 1.328125, "learning_rate": 0.00018774302724076916, "loss": 3.5246, "step": 8581 }, { "epoch": 0.40175551524372405, "grad_norm": 1.328125, "learning_rate": 0.0001877402268995595, "loss": 2.681, "step": 8582 }, { "epoch": 0.40180232898355667, "grad_norm": 1.3671875, "learning_rate": 0.0001877374262593801, "loss": 3.0934, "step": 8583 }, { "epoch": 0.40184914272338934, "grad_norm": 1.4140625, "learning_rate": 0.0001877346253202405, "loss": 2.8954, "step": 8584 }, { "epoch": 0.40189595646322196, "grad_norm": 1.3515625, "learning_rate": 0.00018773182408215024, "loss": 2.972, "step": 8585 }, { "epoch": 0.4019427702030546, "grad_norm": 2.4375, "learning_rate": 0.00018772902254511883, "loss": 3.5782, "step": 8586 }, { "epoch": 0.40198958394288725, "grad_norm": 1.8671875, "learning_rate": 0.00018772622070915588, "loss": 3.014, "step": 8587 }, { "epoch": 0.40203639768271987, "grad_norm": 1.59375, "learning_rate": 0.00018772341857427088, "loss": 3.3003, "step": 8588 }, { "epoch": 0.40208321142255254, "grad_norm": 1.1015625, "learning_rate": 0.00018772061614047344, "loss": 4.1533, "step": 8589 }, { "epoch": 0.40213002516238516, "grad_norm": 1.734375, "learning_rate": 0.00018771781340777303, "loss": 3.0159, "step": 8590 }, { "epoch": 0.4021768389022178, "grad_norm": 1.4453125, "learning_rate": 0.00018771501037617925, "loss": 3.2371, "step": 8591 }, { "epoch": 0.40222365264205046, "grad_norm": 1.5390625, "learning_rate": 0.00018771220704570164, "loss": 3.4119, "step": 8592 }, { "epoch": 0.4022704663818831, "grad_norm": 1.5234375, "learning_rate": 0.00018770940341634977, "loss": 2.9149, "step": 8593 }, { "epoch": 0.40231728012171575, "grad_norm": 1.40625, "learning_rate": 0.00018770659948813315, "loss": 3.3941, "step": 8594 }, { "epoch": 0.40236409386154837, "grad_norm": 1.3125, "learning_rate": 0.0001877037952610614, "loss": 3.3934, "step": 8595 }, { "epoch": 0.402410907601381, "grad_norm": 1.65625, "learning_rate": 0.00018770099073514403, "loss": 3.1173, "step": 8596 }, { "epoch": 0.40245772134121366, "grad_norm": 2.34375, "learning_rate": 0.00018769818591039058, "loss": 2.723, "step": 8597 }, { "epoch": 0.4025045350810463, "grad_norm": 1.6640625, "learning_rate": 0.00018769538078681065, "loss": 3.0665, "step": 8598 }, { "epoch": 0.40255134882087895, "grad_norm": 1.2890625, "learning_rate": 0.0001876925753644138, "loss": 2.8478, "step": 8599 }, { "epoch": 0.40259816256071157, "grad_norm": 1.375, "learning_rate": 0.00018768976964320951, "loss": 3.0842, "step": 8600 }, { "epoch": 0.4026449763005442, "grad_norm": 1.34375, "learning_rate": 0.0001876869636232074, "loss": 3.3213, "step": 8601 }, { "epoch": 0.40269179004037686, "grad_norm": 1.2734375, "learning_rate": 0.00018768415730441705, "loss": 3.0788, "step": 8602 }, { "epoch": 0.4027386037802095, "grad_norm": 1.3359375, "learning_rate": 0.00018768135068684806, "loss": 2.9735, "step": 8603 }, { "epoch": 0.40278541752004215, "grad_norm": 1.1953125, "learning_rate": 0.00018767854377050986, "loss": 2.5726, "step": 8604 }, { "epoch": 0.4028322312598748, "grad_norm": 1.28125, "learning_rate": 0.00018767573655541214, "loss": 2.9394, "step": 8605 }, { "epoch": 0.4028790449997074, "grad_norm": 1.578125, "learning_rate": 0.00018767292904156438, "loss": 3.3747, "step": 8606 }, { "epoch": 0.40292585873954007, "grad_norm": 1.109375, "learning_rate": 0.00018767012122897618, "loss": 2.7974, "step": 8607 }, { "epoch": 0.4029726724793727, "grad_norm": 1.53125, "learning_rate": 0.00018766731311765713, "loss": 2.9223, "step": 8608 }, { "epoch": 0.40301948621920536, "grad_norm": 2.125, "learning_rate": 0.00018766450470761675, "loss": 3.2438, "step": 8609 }, { "epoch": 0.403066299959038, "grad_norm": 1.34375, "learning_rate": 0.00018766169599886468, "loss": 3.4079, "step": 8610 }, { "epoch": 0.4031131136988706, "grad_norm": 1.34375, "learning_rate": 0.00018765888699141044, "loss": 2.9971, "step": 8611 }, { "epoch": 0.40315992743870327, "grad_norm": 1.71875, "learning_rate": 0.00018765607768526357, "loss": 3.4359, "step": 8612 }, { "epoch": 0.4032067411785359, "grad_norm": 1.5, "learning_rate": 0.00018765326808043372, "loss": 2.9771, "step": 8613 }, { "epoch": 0.40325355491836856, "grad_norm": 1.3671875, "learning_rate": 0.0001876504581769304, "loss": 3.1702, "step": 8614 }, { "epoch": 0.4033003686582012, "grad_norm": 1.328125, "learning_rate": 0.00018764764797476322, "loss": 3.0282, "step": 8615 }, { "epoch": 0.4033471823980338, "grad_norm": 2.140625, "learning_rate": 0.00018764483747394178, "loss": 3.1112, "step": 8616 }, { "epoch": 0.4033939961378665, "grad_norm": 1.375, "learning_rate": 0.00018764202667447558, "loss": 3.2396, "step": 8617 }, { "epoch": 0.4034408098776991, "grad_norm": 1.609375, "learning_rate": 0.00018763921557637424, "loss": 3.1212, "step": 8618 }, { "epoch": 0.40348762361753177, "grad_norm": 1.6015625, "learning_rate": 0.00018763640417964736, "loss": 3.3173, "step": 8619 }, { "epoch": 0.4035344373573644, "grad_norm": 1.3125, "learning_rate": 0.0001876335924843045, "loss": 3.2809, "step": 8620 }, { "epoch": 0.403581251097197, "grad_norm": 1.578125, "learning_rate": 0.0001876307804903552, "loss": 3.427, "step": 8621 }, { "epoch": 0.4036280648370297, "grad_norm": 1.1484375, "learning_rate": 0.00018762796819780911, "loss": 3.1464, "step": 8622 }, { "epoch": 0.4036748785768623, "grad_norm": 1.0546875, "learning_rate": 0.00018762515560667579, "loss": 3.593, "step": 8623 }, { "epoch": 0.40372169231669497, "grad_norm": 2.328125, "learning_rate": 0.0001876223427169648, "loss": 3.0247, "step": 8624 }, { "epoch": 0.4037685060565276, "grad_norm": 1.5546875, "learning_rate": 0.00018761952952868577, "loss": 3.4581, "step": 8625 }, { "epoch": 0.4038153197963602, "grad_norm": 1.34375, "learning_rate": 0.00018761671604184822, "loss": 3.2307, "step": 8626 }, { "epoch": 0.4038621335361929, "grad_norm": 1.578125, "learning_rate": 0.0001876139022564618, "loss": 3.1149, "step": 8627 }, { "epoch": 0.4039089472760255, "grad_norm": 1.4453125, "learning_rate": 0.00018761108817253608, "loss": 3.1603, "step": 8628 }, { "epoch": 0.4039557610158582, "grad_norm": 1.1796875, "learning_rate": 0.0001876082737900806, "loss": 3.0604, "step": 8629 }, { "epoch": 0.4040025747556908, "grad_norm": 1.375, "learning_rate": 0.00018760545910910504, "loss": 3.1238, "step": 8630 }, { "epoch": 0.4040493884955234, "grad_norm": 1.3984375, "learning_rate": 0.00018760264412961893, "loss": 2.8389, "step": 8631 }, { "epoch": 0.4040962022353561, "grad_norm": 1.3203125, "learning_rate": 0.00018759982885163185, "loss": 2.9041, "step": 8632 }, { "epoch": 0.4041430159751887, "grad_norm": 1.8984375, "learning_rate": 0.00018759701327515345, "loss": 3.3347, "step": 8633 }, { "epoch": 0.4041898297150214, "grad_norm": 1.3046875, "learning_rate": 0.00018759419740019327, "loss": 3.1445, "step": 8634 }, { "epoch": 0.404236643454854, "grad_norm": 1.1015625, "learning_rate": 0.0001875913812267609, "loss": 2.8836, "step": 8635 }, { "epoch": 0.4042834571946866, "grad_norm": 1.2890625, "learning_rate": 0.000187588564754866, "loss": 3.383, "step": 8636 }, { "epoch": 0.4043302709345193, "grad_norm": 1.6640625, "learning_rate": 0.00018758574798451814, "loss": 3.1786, "step": 8637 }, { "epoch": 0.4043770846743519, "grad_norm": 1.0390625, "learning_rate": 0.00018758293091572688, "loss": 2.8466, "step": 8638 }, { "epoch": 0.4044238984141846, "grad_norm": 1.4609375, "learning_rate": 0.00018758011354850184, "loss": 2.8347, "step": 8639 }, { "epoch": 0.4044707121540172, "grad_norm": 1.546875, "learning_rate": 0.00018757729588285264, "loss": 3.4303, "step": 8640 }, { "epoch": 0.40451752589384987, "grad_norm": 2.21875, "learning_rate": 0.00018757447791878886, "loss": 3.5225, "step": 8641 }, { "epoch": 0.4045643396336825, "grad_norm": 1.359375, "learning_rate": 0.0001875716596563201, "loss": 2.985, "step": 8642 }, { "epoch": 0.4046111533735151, "grad_norm": 1.8203125, "learning_rate": 0.000187568841095456, "loss": 3.2582, "step": 8643 }, { "epoch": 0.4046579671133478, "grad_norm": 1.421875, "learning_rate": 0.0001875660222362061, "loss": 2.9856, "step": 8644 }, { "epoch": 0.4047047808531804, "grad_norm": 1.5234375, "learning_rate": 0.00018756320307858007, "loss": 3.6231, "step": 8645 }, { "epoch": 0.4047515945930131, "grad_norm": 1.4765625, "learning_rate": 0.00018756038362258747, "loss": 3.3459, "step": 8646 }, { "epoch": 0.4047984083328457, "grad_norm": 1.2734375, "learning_rate": 0.00018755756386823792, "loss": 2.8251, "step": 8647 }, { "epoch": 0.4048452220726783, "grad_norm": 1.2578125, "learning_rate": 0.00018755474381554105, "loss": 3.1302, "step": 8648 }, { "epoch": 0.404892035812511, "grad_norm": 1.734375, "learning_rate": 0.00018755192346450646, "loss": 3.0577, "step": 8649 }, { "epoch": 0.4049388495523436, "grad_norm": 1.2265625, "learning_rate": 0.0001875491028151437, "loss": 3.4263, "step": 8650 }, { "epoch": 0.4049856632921763, "grad_norm": 1.0546875, "learning_rate": 0.00018754628186746247, "loss": 3.2251, "step": 8651 }, { "epoch": 0.4050324770320089, "grad_norm": 1.2734375, "learning_rate": 0.00018754346062147236, "loss": 3.1619, "step": 8652 }, { "epoch": 0.4050792907718415, "grad_norm": 1.359375, "learning_rate": 0.00018754063907718295, "loss": 2.8136, "step": 8653 }, { "epoch": 0.4051261045116742, "grad_norm": 1.2421875, "learning_rate": 0.00018753781723460387, "loss": 2.9644, "step": 8654 }, { "epoch": 0.4051729182515068, "grad_norm": 1.46875, "learning_rate": 0.00018753499509374476, "loss": 2.99, "step": 8655 }, { "epoch": 0.4052197319913395, "grad_norm": 1.6953125, "learning_rate": 0.00018753217265461518, "loss": 3.1262, "step": 8656 }, { "epoch": 0.4052665457311721, "grad_norm": 1.71875, "learning_rate": 0.00018752934991722478, "loss": 3.1979, "step": 8657 }, { "epoch": 0.4053133594710047, "grad_norm": 1.15625, "learning_rate": 0.0001875265268815832, "loss": 3.3873, "step": 8658 }, { "epoch": 0.4053601732108374, "grad_norm": 1.8359375, "learning_rate": 0.00018752370354770004, "loss": 3.7373, "step": 8659 }, { "epoch": 0.40540698695067, "grad_norm": 1.1015625, "learning_rate": 0.00018752087991558491, "loss": 3.4559, "step": 8660 }, { "epoch": 0.4054538006905027, "grad_norm": 1.5859375, "learning_rate": 0.00018751805598524746, "loss": 3.1345, "step": 8661 }, { "epoch": 0.4055006144303353, "grad_norm": 1.4921875, "learning_rate": 0.00018751523175669724, "loss": 3.0645, "step": 8662 }, { "epoch": 0.4055474281701679, "grad_norm": 1.9921875, "learning_rate": 0.000187512407229944, "loss": 3.2795, "step": 8663 }, { "epoch": 0.4055942419100006, "grad_norm": 1.5, "learning_rate": 0.00018750958240499723, "loss": 3.3374, "step": 8664 }, { "epoch": 0.4056410556498332, "grad_norm": 1.7421875, "learning_rate": 0.00018750675728186665, "loss": 2.5503, "step": 8665 }, { "epoch": 0.4056878693896659, "grad_norm": 1.2578125, "learning_rate": 0.00018750393186056186, "loss": 2.9286, "step": 8666 }, { "epoch": 0.4057346831294985, "grad_norm": 1.203125, "learning_rate": 0.00018750110614109244, "loss": 3.262, "step": 8667 }, { "epoch": 0.4057814968693311, "grad_norm": 1.1953125, "learning_rate": 0.0001874982801234681, "loss": 3.0439, "step": 8668 }, { "epoch": 0.4058283106091638, "grad_norm": 1.578125, "learning_rate": 0.0001874954538076984, "loss": 3.3831, "step": 8669 }, { "epoch": 0.4058751243489964, "grad_norm": 1.40625, "learning_rate": 0.00018749262719379298, "loss": 3.3963, "step": 8670 }, { "epoch": 0.4059219380888291, "grad_norm": 1.4375, "learning_rate": 0.00018748980028176156, "loss": 2.9435, "step": 8671 }, { "epoch": 0.4059687518286617, "grad_norm": 1.3125, "learning_rate": 0.00018748697307161366, "loss": 3.0368, "step": 8672 }, { "epoch": 0.40601556556849433, "grad_norm": 1.171875, "learning_rate": 0.00018748414556335894, "loss": 2.9641, "step": 8673 }, { "epoch": 0.406062379308327, "grad_norm": 1.953125, "learning_rate": 0.00018748131775700707, "loss": 3.1879, "step": 8674 }, { "epoch": 0.4061091930481596, "grad_norm": 1.390625, "learning_rate": 0.00018747848965256766, "loss": 2.9411, "step": 8675 }, { "epoch": 0.4061560067879923, "grad_norm": 1.3359375, "learning_rate": 0.00018747566125005037, "loss": 3.3049, "step": 8676 }, { "epoch": 0.4062028205278249, "grad_norm": 1.5078125, "learning_rate": 0.0001874728325494648, "loss": 3.0037, "step": 8677 }, { "epoch": 0.40624963426765753, "grad_norm": 1.8984375, "learning_rate": 0.0001874700035508206, "loss": 3.171, "step": 8678 }, { "epoch": 0.4062964480074902, "grad_norm": 1.375, "learning_rate": 0.00018746717425412746, "loss": 3.8225, "step": 8679 }, { "epoch": 0.4063432617473228, "grad_norm": 1.5234375, "learning_rate": 0.00018746434465939494, "loss": 3.3847, "step": 8680 }, { "epoch": 0.4063900754871555, "grad_norm": 1.265625, "learning_rate": 0.00018746151476663273, "loss": 2.9033, "step": 8681 }, { "epoch": 0.4064368892269881, "grad_norm": 1.28125, "learning_rate": 0.0001874586845758505, "loss": 2.7915, "step": 8682 }, { "epoch": 0.40648370296682074, "grad_norm": 1.171875, "learning_rate": 0.0001874558540870578, "loss": 2.9307, "step": 8683 }, { "epoch": 0.4065305167066534, "grad_norm": 1.234375, "learning_rate": 0.0001874530233002644, "loss": 3.3286, "step": 8684 }, { "epoch": 0.40657733044648603, "grad_norm": 1.421875, "learning_rate": 0.0001874501922154798, "loss": 3.1244, "step": 8685 }, { "epoch": 0.4066241441863187, "grad_norm": 1.921875, "learning_rate": 0.00018744736083271376, "loss": 2.7132, "step": 8686 }, { "epoch": 0.4066709579261513, "grad_norm": 1.859375, "learning_rate": 0.00018744452915197592, "loss": 3.3671, "step": 8687 }, { "epoch": 0.40671777166598394, "grad_norm": 1.2421875, "learning_rate": 0.0001874416971732759, "loss": 2.7243, "step": 8688 }, { "epoch": 0.4067645854058166, "grad_norm": 1.2109375, "learning_rate": 0.0001874388648966233, "loss": 3.1727, "step": 8689 }, { "epoch": 0.40681139914564923, "grad_norm": 1.6328125, "learning_rate": 0.00018743603232202784, "loss": 3.1324, "step": 8690 }, { "epoch": 0.4068582128854819, "grad_norm": 1.6640625, "learning_rate": 0.00018743319944949919, "loss": 3.1426, "step": 8691 }, { "epoch": 0.4069050266253145, "grad_norm": 1.7421875, "learning_rate": 0.00018743036627904693, "loss": 3.7511, "step": 8692 }, { "epoch": 0.40695184036514714, "grad_norm": 1.40625, "learning_rate": 0.00018742753281068077, "loss": 2.884, "step": 8693 }, { "epoch": 0.4069986541049798, "grad_norm": 1.421875, "learning_rate": 0.00018742469904441033, "loss": 2.8293, "step": 8694 }, { "epoch": 0.40704546784481244, "grad_norm": 1.4921875, "learning_rate": 0.0001874218649802453, "loss": 2.8296, "step": 8695 }, { "epoch": 0.4070922815846451, "grad_norm": 1.328125, "learning_rate": 0.0001874190306181953, "loss": 2.9755, "step": 8696 }, { "epoch": 0.40713909532447773, "grad_norm": 1.390625, "learning_rate": 0.00018741619595827, "loss": 2.9694, "step": 8697 }, { "epoch": 0.40718590906431035, "grad_norm": 1.15625, "learning_rate": 0.0001874133610004791, "loss": 3.2253, "step": 8698 }, { "epoch": 0.407232722804143, "grad_norm": 1.1171875, "learning_rate": 0.0001874105257448322, "loss": 2.0265, "step": 8699 }, { "epoch": 0.40727953654397564, "grad_norm": 1.546875, "learning_rate": 0.00018740769019133898, "loss": 3.2319, "step": 8700 }, { "epoch": 0.4073263502838083, "grad_norm": 1.6484375, "learning_rate": 0.00018740485434000915, "loss": 3.231, "step": 8701 }, { "epoch": 0.40737316402364093, "grad_norm": 1.109375, "learning_rate": 0.00018740201819085229, "loss": 2.986, "step": 8702 }, { "epoch": 0.4074199777634736, "grad_norm": 1.3359375, "learning_rate": 0.0001873991817438781, "loss": 3.0065, "step": 8703 }, { "epoch": 0.4074667915033062, "grad_norm": 1.421875, "learning_rate": 0.00018739634499909626, "loss": 2.4616, "step": 8704 }, { "epoch": 0.40751360524313884, "grad_norm": 1.3125, "learning_rate": 0.00018739350795651645, "loss": 3.0649, "step": 8705 }, { "epoch": 0.4075604189829715, "grad_norm": 1.3828125, "learning_rate": 0.0001873906706161483, "loss": 3.0447, "step": 8706 }, { "epoch": 0.40760723272280414, "grad_norm": 1.140625, "learning_rate": 0.00018738783297800148, "loss": 3.2547, "step": 8707 }, { "epoch": 0.4076540464626368, "grad_norm": 1.640625, "learning_rate": 0.00018738499504208567, "loss": 3.1361, "step": 8708 }, { "epoch": 0.40770086020246943, "grad_norm": 1.203125, "learning_rate": 0.00018738215680841057, "loss": 3.0448, "step": 8709 }, { "epoch": 0.40774767394230205, "grad_norm": 1.375, "learning_rate": 0.0001873793182769858, "loss": 3.121, "step": 8710 }, { "epoch": 0.4077944876821347, "grad_norm": 1.28125, "learning_rate": 0.00018737647944782105, "loss": 3.4287, "step": 8711 }, { "epoch": 0.40784130142196734, "grad_norm": 1.390625, "learning_rate": 0.00018737364032092598, "loss": 3.2833, "step": 8712 }, { "epoch": 0.4078881151618, "grad_norm": 1.265625, "learning_rate": 0.0001873708008963103, "loss": 2.6072, "step": 8713 }, { "epoch": 0.40793492890163263, "grad_norm": 1.2734375, "learning_rate": 0.00018736796117398366, "loss": 2.9418, "step": 8714 }, { "epoch": 0.40798174264146525, "grad_norm": 1.3984375, "learning_rate": 0.00018736512115395576, "loss": 3.3697, "step": 8715 }, { "epoch": 0.4080285563812979, "grad_norm": 1.078125, "learning_rate": 0.00018736228083623624, "loss": 2.7371, "step": 8716 }, { "epoch": 0.40807537012113054, "grad_norm": 1.078125, "learning_rate": 0.00018735944022083484, "loss": 3.0564, "step": 8717 }, { "epoch": 0.4081221838609632, "grad_norm": 1.5234375, "learning_rate": 0.00018735659930776115, "loss": 3.158, "step": 8718 }, { "epoch": 0.40816899760079584, "grad_norm": 1.171875, "learning_rate": 0.0001873537580970249, "loss": 4.2411, "step": 8719 }, { "epoch": 0.40821581134062845, "grad_norm": 2.359375, "learning_rate": 0.00018735091658863577, "loss": 3.1656, "step": 8720 }, { "epoch": 0.40826262508046113, "grad_norm": 1.40625, "learning_rate": 0.00018734807478260345, "loss": 2.8357, "step": 8721 }, { "epoch": 0.40830943882029375, "grad_norm": 1.4140625, "learning_rate": 0.00018734523267893762, "loss": 3.1692, "step": 8722 }, { "epoch": 0.4083562525601264, "grad_norm": 1.46875, "learning_rate": 0.00018734239027764794, "loss": 2.8454, "step": 8723 }, { "epoch": 0.40840306629995904, "grad_norm": 1.3046875, "learning_rate": 0.00018733954757874414, "loss": 3.2381, "step": 8724 }, { "epoch": 0.40844988003979166, "grad_norm": 1.265625, "learning_rate": 0.00018733670458223584, "loss": 3.185, "step": 8725 }, { "epoch": 0.40849669377962433, "grad_norm": 1.2265625, "learning_rate": 0.0001873338612881328, "loss": 3.1525, "step": 8726 }, { "epoch": 0.40854350751945695, "grad_norm": 1.6015625, "learning_rate": 0.00018733101769644468, "loss": 3.1539, "step": 8727 }, { "epoch": 0.4085903212592896, "grad_norm": 1.6015625, "learning_rate": 0.00018732817380718114, "loss": 3.279, "step": 8728 }, { "epoch": 0.40863713499912224, "grad_norm": 1.171875, "learning_rate": 0.00018732532962035191, "loss": 3.0061, "step": 8729 }, { "epoch": 0.40868394873895486, "grad_norm": 1.5, "learning_rate": 0.00018732248513596668, "loss": 3.2157, "step": 8730 }, { "epoch": 0.40873076247878753, "grad_norm": 1.5, "learning_rate": 0.0001873196403540351, "loss": 2.9726, "step": 8731 }, { "epoch": 0.40877757621862015, "grad_norm": 1.3203125, "learning_rate": 0.00018731679527456694, "loss": 2.9236, "step": 8732 }, { "epoch": 0.4088243899584528, "grad_norm": 1.1640625, "learning_rate": 0.0001873139498975718, "loss": 3.553, "step": 8733 }, { "epoch": 0.40887120369828545, "grad_norm": 1.9140625, "learning_rate": 0.00018731110422305945, "loss": 4.9316, "step": 8734 }, { "epoch": 0.40891801743811806, "grad_norm": 2.203125, "learning_rate": 0.00018730825825103955, "loss": 3.1702, "step": 8735 }, { "epoch": 0.40896483117795074, "grad_norm": 1.2265625, "learning_rate": 0.0001873054119815218, "loss": 3.1025, "step": 8736 }, { "epoch": 0.40901164491778336, "grad_norm": 1.109375, "learning_rate": 0.0001873025654145159, "loss": 2.7802, "step": 8737 }, { "epoch": 0.40905845865761603, "grad_norm": 1.75, "learning_rate": 0.00018729971855003158, "loss": 2.8574, "step": 8738 }, { "epoch": 0.40910527239744865, "grad_norm": 1.8203125, "learning_rate": 0.0001872968713880785, "loss": 3.4343, "step": 8739 }, { "epoch": 0.40915208613728127, "grad_norm": 1.8984375, "learning_rate": 0.00018729402392866638, "loss": 3.0707, "step": 8740 }, { "epoch": 0.40919889987711394, "grad_norm": 1.078125, "learning_rate": 0.00018729117617180492, "loss": 2.8581, "step": 8741 }, { "epoch": 0.40924571361694656, "grad_norm": 1.0703125, "learning_rate": 0.00018728832811750382, "loss": 2.9158, "step": 8742 }, { "epoch": 0.40929252735677923, "grad_norm": 1.2734375, "learning_rate": 0.00018728547976577277, "loss": 3.0548, "step": 8743 }, { "epoch": 0.40933934109661185, "grad_norm": 1.8984375, "learning_rate": 0.00018728263111662152, "loss": 2.5561, "step": 8744 }, { "epoch": 0.40938615483644447, "grad_norm": 1.265625, "learning_rate": 0.00018727978217005973, "loss": 3.0567, "step": 8745 }, { "epoch": 0.40943296857627715, "grad_norm": 1.9609375, "learning_rate": 0.00018727693292609713, "loss": 3.1073, "step": 8746 }, { "epoch": 0.40947978231610976, "grad_norm": 2.25, "learning_rate": 0.00018727408338474347, "loss": 3.1472, "step": 8747 }, { "epoch": 0.40952659605594244, "grad_norm": 1.1484375, "learning_rate": 0.00018727123354600835, "loss": 2.7649, "step": 8748 }, { "epoch": 0.40957340979577506, "grad_norm": 1.3671875, "learning_rate": 0.00018726838340990158, "loss": 2.9272, "step": 8749 }, { "epoch": 0.4096202235356077, "grad_norm": 1.3203125, "learning_rate": 0.00018726553297643283, "loss": 3.2683, "step": 8750 }, { "epoch": 0.40966703727544035, "grad_norm": 1.296875, "learning_rate": 0.00018726268224561184, "loss": 3.0437, "step": 8751 }, { "epoch": 0.40971385101527297, "grad_norm": 1.5546875, "learning_rate": 0.00018725983121744828, "loss": 2.9781, "step": 8752 }, { "epoch": 0.40976066475510564, "grad_norm": 1.7890625, "learning_rate": 0.0001872569798919519, "loss": 2.5567, "step": 8753 }, { "epoch": 0.40980747849493826, "grad_norm": 1.5625, "learning_rate": 0.00018725412826913238, "loss": 2.9034, "step": 8754 }, { "epoch": 0.4098542922347709, "grad_norm": 1.140625, "learning_rate": 0.00018725127634899947, "loss": 3.0555, "step": 8755 }, { "epoch": 0.40990110597460355, "grad_norm": 1.8671875, "learning_rate": 0.00018724842413156293, "loss": 3.0533, "step": 8756 }, { "epoch": 0.40994791971443617, "grad_norm": 1.171875, "learning_rate": 0.00018724557161683237, "loss": 3.1385, "step": 8757 }, { "epoch": 0.40999473345426884, "grad_norm": 1.640625, "learning_rate": 0.0001872427188048176, "loss": 2.8857, "step": 8758 }, { "epoch": 0.41004154719410146, "grad_norm": 1.609375, "learning_rate": 0.00018723986569552827, "loss": 2.889, "step": 8759 }, { "epoch": 0.41008836093393414, "grad_norm": 1.4765625, "learning_rate": 0.00018723701228897416, "loss": 2.7539, "step": 8760 }, { "epoch": 0.41013517467376676, "grad_norm": 1.453125, "learning_rate": 0.000187234158585165, "loss": 3.8717, "step": 8761 }, { "epoch": 0.4101819884135994, "grad_norm": 1.5390625, "learning_rate": 0.00018723130458411048, "loss": 3.3388, "step": 8762 }, { "epoch": 0.41022880215343205, "grad_norm": 1.2109375, "learning_rate": 0.0001872284502858203, "loss": 2.9459, "step": 8763 }, { "epoch": 0.41027561589326467, "grad_norm": 1.2109375, "learning_rate": 0.00018722559569030422, "loss": 3.4611, "step": 8764 }, { "epoch": 0.41032242963309734, "grad_norm": 1.8125, "learning_rate": 0.000187222740797572, "loss": 3.2949, "step": 8765 }, { "epoch": 0.41036924337292996, "grad_norm": 1.1484375, "learning_rate": 0.0001872198856076333, "loss": 3.1965, "step": 8766 }, { "epoch": 0.4104160571127626, "grad_norm": 1.4296875, "learning_rate": 0.00018721703012049789, "loss": 3.067, "step": 8767 }, { "epoch": 0.41046287085259525, "grad_norm": 1.125, "learning_rate": 0.0001872141743361755, "loss": 4.4951, "step": 8768 }, { "epoch": 0.41050968459242787, "grad_norm": 1.3359375, "learning_rate": 0.00018721131825467585, "loss": 3.3847, "step": 8769 }, { "epoch": 0.41055649833226054, "grad_norm": 1.671875, "learning_rate": 0.0001872084618760087, "loss": 3.2759, "step": 8770 }, { "epoch": 0.41060331207209316, "grad_norm": 1.4453125, "learning_rate": 0.00018720560520018372, "loss": 3.0626, "step": 8771 }, { "epoch": 0.4106501258119258, "grad_norm": 1.3203125, "learning_rate": 0.00018720274822721069, "loss": 3.0973, "step": 8772 }, { "epoch": 0.41069693955175846, "grad_norm": 1.421875, "learning_rate": 0.00018719989095709934, "loss": 2.9693, "step": 8773 }, { "epoch": 0.4107437532915911, "grad_norm": 1.3828125, "learning_rate": 0.00018719703338985938, "loss": 2.8607, "step": 8774 }, { "epoch": 0.41079056703142375, "grad_norm": 1.3671875, "learning_rate": 0.00018719417552550058, "loss": 3.3651, "step": 8775 }, { "epoch": 0.41083738077125637, "grad_norm": 1.6015625, "learning_rate": 0.00018719131736403267, "loss": 3.161, "step": 8776 }, { "epoch": 0.410884194511089, "grad_norm": 1.0703125, "learning_rate": 0.0001871884589054654, "loss": 2.916, "step": 8777 }, { "epoch": 0.41093100825092166, "grad_norm": 1.1640625, "learning_rate": 0.0001871856001498085, "loss": 3.0917, "step": 8778 }, { "epoch": 0.4109778219907543, "grad_norm": 1.296875, "learning_rate": 0.0001871827410970717, "loss": 2.664, "step": 8779 }, { "epoch": 0.41102463573058695, "grad_norm": 3.0625, "learning_rate": 0.0001871798817472647, "loss": 2.694, "step": 8780 }, { "epoch": 0.41107144947041957, "grad_norm": 1.6328125, "learning_rate": 0.00018717702210039737, "loss": 3.0677, "step": 8781 }, { "epoch": 0.4111182632102522, "grad_norm": 1.3203125, "learning_rate": 0.00018717416215647932, "loss": 2.2057, "step": 8782 }, { "epoch": 0.41116507695008486, "grad_norm": 1.96875, "learning_rate": 0.00018717130191552036, "loss": 3.1619, "step": 8783 }, { "epoch": 0.4112118906899175, "grad_norm": 1.2421875, "learning_rate": 0.00018716844137753026, "loss": 3.2051, "step": 8784 }, { "epoch": 0.41125870442975015, "grad_norm": 1.4453125, "learning_rate": 0.00018716558054251872, "loss": 2.979, "step": 8785 }, { "epoch": 0.4113055181695828, "grad_norm": 1.5, "learning_rate": 0.0001871627194104955, "loss": 2.7646, "step": 8786 }, { "epoch": 0.4113523319094154, "grad_norm": 1.3671875, "learning_rate": 0.00018715985798147032, "loss": 2.7459, "step": 8787 }, { "epoch": 0.41139914564924807, "grad_norm": 1.84375, "learning_rate": 0.000187156996255453, "loss": 3.339, "step": 8788 }, { "epoch": 0.4114459593890807, "grad_norm": 1.328125, "learning_rate": 0.00018715413423245325, "loss": 2.7093, "step": 8789 }, { "epoch": 0.41149277312891336, "grad_norm": 1.90625, "learning_rate": 0.00018715127191248082, "loss": 3.1041, "step": 8790 }, { "epoch": 0.411539586868746, "grad_norm": 1.109375, "learning_rate": 0.00018714840929554547, "loss": 2.7604, "step": 8791 }, { "epoch": 0.4115864006085786, "grad_norm": 1.1796875, "learning_rate": 0.00018714554638165693, "loss": 3.0814, "step": 8792 }, { "epoch": 0.41163321434841127, "grad_norm": 1.3984375, "learning_rate": 0.00018714268317082498, "loss": 2.7978, "step": 8793 }, { "epoch": 0.4116800280882439, "grad_norm": 1.1875, "learning_rate": 0.00018713981966305942, "loss": 3.3141, "step": 8794 }, { "epoch": 0.41172684182807656, "grad_norm": 1.1953125, "learning_rate": 0.00018713695585836993, "loss": 3.346, "step": 8795 }, { "epoch": 0.4117736555679092, "grad_norm": 1.375, "learning_rate": 0.00018713409175676632, "loss": 3.1393, "step": 8796 }, { "epoch": 0.4118204693077418, "grad_norm": 1.6328125, "learning_rate": 0.0001871312273582583, "loss": 3.2558, "step": 8797 }, { "epoch": 0.4118672830475745, "grad_norm": 1.625, "learning_rate": 0.00018712836266285567, "loss": 3.3826, "step": 8798 }, { "epoch": 0.4119140967874071, "grad_norm": 1.5546875, "learning_rate": 0.00018712549767056818, "loss": 3.13, "step": 8799 }, { "epoch": 0.41196091052723977, "grad_norm": 1.421875, "learning_rate": 0.0001871226323814056, "loss": 3.025, "step": 8800 }, { "epoch": 0.4120077242670724, "grad_norm": 1.9609375, "learning_rate": 0.00018711976679537768, "loss": 3.3789, "step": 8801 }, { "epoch": 0.412054538006905, "grad_norm": 1.5390625, "learning_rate": 0.00018711690091249419, "loss": 3.1135, "step": 8802 }, { "epoch": 0.4121013517467377, "grad_norm": 1.296875, "learning_rate": 0.0001871140347327649, "loss": 3.2849, "step": 8803 }, { "epoch": 0.4121481654865703, "grad_norm": 1.9453125, "learning_rate": 0.00018711116825619956, "loss": 3.5768, "step": 8804 }, { "epoch": 0.41219497922640297, "grad_norm": 1.4375, "learning_rate": 0.00018710830148280797, "loss": 3.3899, "step": 8805 }, { "epoch": 0.4122417929662356, "grad_norm": 1.2578125, "learning_rate": 0.00018710543441259985, "loss": 3.1793, "step": 8806 }, { "epoch": 0.4122886067060682, "grad_norm": 1.2578125, "learning_rate": 0.00018710256704558502, "loss": 3.2285, "step": 8807 }, { "epoch": 0.4123354204459009, "grad_norm": 1.2578125, "learning_rate": 0.00018709969938177323, "loss": 2.9541, "step": 8808 }, { "epoch": 0.4123822341857335, "grad_norm": 1.28125, "learning_rate": 0.00018709683142117424, "loss": 3.0247, "step": 8809 }, { "epoch": 0.4124290479255662, "grad_norm": 1.34375, "learning_rate": 0.00018709396316379784, "loss": 3.272, "step": 8810 }, { "epoch": 0.4124758616653988, "grad_norm": 1.3515625, "learning_rate": 0.00018709109460965379, "loss": 3.4384, "step": 8811 }, { "epoch": 0.4125226754052314, "grad_norm": 1.8515625, "learning_rate": 0.00018708822575875186, "loss": 3.0512, "step": 8812 }, { "epoch": 0.4125694891450641, "grad_norm": 1.46875, "learning_rate": 0.00018708535661110182, "loss": 3.0327, "step": 8813 }, { "epoch": 0.4126163028848967, "grad_norm": 1.8828125, "learning_rate": 0.00018708248716671347, "loss": 3.3663, "step": 8814 }, { "epoch": 0.4126631166247294, "grad_norm": 1.359375, "learning_rate": 0.00018707961742559661, "loss": 3.2744, "step": 8815 }, { "epoch": 0.412709930364562, "grad_norm": 1.1171875, "learning_rate": 0.00018707674738776094, "loss": 3.1329, "step": 8816 }, { "epoch": 0.4127567441043946, "grad_norm": 1.34375, "learning_rate": 0.00018707387705321632, "loss": 3.5056, "step": 8817 }, { "epoch": 0.4128035578442273, "grad_norm": 1.546875, "learning_rate": 0.00018707100642197252, "loss": 2.9772, "step": 8818 }, { "epoch": 0.4128503715840599, "grad_norm": 2.078125, "learning_rate": 0.00018706813549403925, "loss": 3.7623, "step": 8819 }, { "epoch": 0.4128971853238926, "grad_norm": 1.5234375, "learning_rate": 0.00018706526426942637, "loss": 3.1459, "step": 8820 }, { "epoch": 0.4129439990637252, "grad_norm": 1.3125, "learning_rate": 0.00018706239274814362, "loss": 2.9361, "step": 8821 }, { "epoch": 0.41299081280355787, "grad_norm": 1.671875, "learning_rate": 0.0001870595209302008, "loss": 2.8708, "step": 8822 }, { "epoch": 0.4130376265433905, "grad_norm": 1.3984375, "learning_rate": 0.00018705664881560772, "loss": 3.0689, "step": 8823 }, { "epoch": 0.4130844402832231, "grad_norm": 1.3984375, "learning_rate": 0.0001870537764043741, "loss": 3.2345, "step": 8824 }, { "epoch": 0.4131312540230558, "grad_norm": 1.046875, "learning_rate": 0.0001870509036965098, "loss": 2.8904, "step": 8825 }, { "epoch": 0.4131780677628884, "grad_norm": 1.1328125, "learning_rate": 0.00018704803069202454, "loss": 3.1779, "step": 8826 }, { "epoch": 0.4132248815027211, "grad_norm": 1.6875, "learning_rate": 0.0001870451573909282, "loss": 3.2778, "step": 8827 }, { "epoch": 0.4132716952425537, "grad_norm": 1.3671875, "learning_rate": 0.0001870422837932305, "loss": 3.1492, "step": 8828 }, { "epoch": 0.4133185089823863, "grad_norm": 1.2265625, "learning_rate": 0.00018703940989894121, "loss": 2.7585, "step": 8829 }, { "epoch": 0.413365322722219, "grad_norm": 1.296875, "learning_rate": 0.0001870365357080702, "loss": 3.1798, "step": 8830 }, { "epoch": 0.4134121364620516, "grad_norm": 1.3828125, "learning_rate": 0.0001870336612206272, "loss": 3.1129, "step": 8831 }, { "epoch": 0.4134589502018843, "grad_norm": 1.2421875, "learning_rate": 0.00018703078643662207, "loss": 2.9216, "step": 8832 }, { "epoch": 0.4135057639417169, "grad_norm": 1.375, "learning_rate": 0.00018702791135606455, "loss": 2.9021, "step": 8833 }, { "epoch": 0.4135525776815495, "grad_norm": 1.8125, "learning_rate": 0.00018702503597896444, "loss": 3.4918, "step": 8834 }, { "epoch": 0.4135993914213822, "grad_norm": 1.484375, "learning_rate": 0.00018702216030533155, "loss": 3.4528, "step": 8835 }, { "epoch": 0.4136462051612148, "grad_norm": 1.2734375, "learning_rate": 0.00018701928433517568, "loss": 3.1002, "step": 8836 }, { "epoch": 0.4136930189010475, "grad_norm": 1.4375, "learning_rate": 0.00018701640806850664, "loss": 3.2162, "step": 8837 }, { "epoch": 0.4137398326408801, "grad_norm": 1.28125, "learning_rate": 0.00018701353150533422, "loss": 3.339, "step": 8838 }, { "epoch": 0.4137866463807127, "grad_norm": 1.3046875, "learning_rate": 0.0001870106546456682, "loss": 3.2367, "step": 8839 }, { "epoch": 0.4138334601205454, "grad_norm": 1.015625, "learning_rate": 0.00018700777748951843, "loss": 3.2022, "step": 8840 }, { "epoch": 0.413880273860378, "grad_norm": 1.1953125, "learning_rate": 0.00018700490003689465, "loss": 3.1015, "step": 8841 }, { "epoch": 0.4139270876002107, "grad_norm": 1.25, "learning_rate": 0.00018700202228780674, "loss": 3.1997, "step": 8842 }, { "epoch": 0.4139739013400433, "grad_norm": 1.2109375, "learning_rate": 0.00018699914424226446, "loss": 2.8174, "step": 8843 }, { "epoch": 0.4140207150798759, "grad_norm": 1.5, "learning_rate": 0.0001869962659002776, "loss": 3.442, "step": 8844 }, { "epoch": 0.4140675288197086, "grad_norm": 1.5546875, "learning_rate": 0.00018699338726185604, "loss": 3.2685, "step": 8845 }, { "epoch": 0.4141143425595412, "grad_norm": 1.2421875, "learning_rate": 0.0001869905083270095, "loss": 3.1999, "step": 8846 }, { "epoch": 0.4141611562993739, "grad_norm": 1.1171875, "learning_rate": 0.00018698762909574786, "loss": 3.0819, "step": 8847 }, { "epoch": 0.4142079700392065, "grad_norm": 1.6171875, "learning_rate": 0.00018698474956808087, "loss": 2.949, "step": 8848 }, { "epoch": 0.4142547837790391, "grad_norm": 1.734375, "learning_rate": 0.0001869818697440184, "loss": 3.3481, "step": 8849 }, { "epoch": 0.4143015975188718, "grad_norm": 1.46875, "learning_rate": 0.0001869789896235702, "loss": 2.5385, "step": 8850 }, { "epoch": 0.4143484112587044, "grad_norm": 1.5390625, "learning_rate": 0.00018697610920674618, "loss": 2.382, "step": 8851 }, { "epoch": 0.4143952249985371, "grad_norm": 1.3125, "learning_rate": 0.00018697322849355605, "loss": 3.1048, "step": 8852 }, { "epoch": 0.4144420387383697, "grad_norm": 1.21875, "learning_rate": 0.0001869703474840097, "loss": 3.2771, "step": 8853 }, { "epoch": 0.41448885247820233, "grad_norm": 1.71875, "learning_rate": 0.0001869674661781169, "loss": 3.2139, "step": 8854 }, { "epoch": 0.414535666218035, "grad_norm": 2.046875, "learning_rate": 0.00018696458457588747, "loss": 2.6856, "step": 8855 }, { "epoch": 0.4145824799578676, "grad_norm": 1.5703125, "learning_rate": 0.00018696170267733128, "loss": 2.9427, "step": 8856 }, { "epoch": 0.4146292936977003, "grad_norm": 1.171875, "learning_rate": 0.0001869588204824581, "loss": 3.2111, "step": 8857 }, { "epoch": 0.4146761074375329, "grad_norm": 1.3515625, "learning_rate": 0.00018695593799127776, "loss": 3.5262, "step": 8858 }, { "epoch": 0.41472292117736553, "grad_norm": 1.3984375, "learning_rate": 0.0001869530552038001, "loss": 2.881, "step": 8859 }, { "epoch": 0.4147697349171982, "grad_norm": 1.3046875, "learning_rate": 0.0001869501721200349, "loss": 2.7809, "step": 8860 }, { "epoch": 0.4148165486570308, "grad_norm": 1.46875, "learning_rate": 0.00018694728873999206, "loss": 2.7082, "step": 8861 }, { "epoch": 0.4148633623968635, "grad_norm": 1.21875, "learning_rate": 0.0001869444050636813, "loss": 3.176, "step": 8862 }, { "epoch": 0.4149101761366961, "grad_norm": 1.3984375, "learning_rate": 0.00018694152109111257, "loss": 3.4082, "step": 8863 }, { "epoch": 0.41495698987652874, "grad_norm": 1.8359375, "learning_rate": 0.0001869386368222956, "loss": 3.3461, "step": 8864 }, { "epoch": 0.4150038036163614, "grad_norm": 2.21875, "learning_rate": 0.00018693575225724028, "loss": 3.0872, "step": 8865 }, { "epoch": 0.41505061735619403, "grad_norm": 1.7734375, "learning_rate": 0.00018693286739595636, "loss": 3.0804, "step": 8866 }, { "epoch": 0.4150974310960267, "grad_norm": 1.109375, "learning_rate": 0.00018692998223845373, "loss": 2.9371, "step": 8867 }, { "epoch": 0.4151442448358593, "grad_norm": 1.2890625, "learning_rate": 0.00018692709678474226, "loss": 2.9352, "step": 8868 }, { "epoch": 0.41519105857569194, "grad_norm": 1.6171875, "learning_rate": 0.00018692421103483166, "loss": 2.9377, "step": 8869 }, { "epoch": 0.4152378723155246, "grad_norm": 1.5, "learning_rate": 0.00018692132498873188, "loss": 2.6098, "step": 8870 }, { "epoch": 0.41528468605535723, "grad_norm": 1.0859375, "learning_rate": 0.0001869184386464527, "loss": 2.6043, "step": 8871 }, { "epoch": 0.4153314997951899, "grad_norm": 1.140625, "learning_rate": 0.00018691555200800396, "loss": 3.0554, "step": 8872 }, { "epoch": 0.4153783135350225, "grad_norm": 1.1640625, "learning_rate": 0.00018691266507339551, "loss": 3.5458, "step": 8873 }, { "epoch": 0.41542512727485514, "grad_norm": 1.8046875, "learning_rate": 0.00018690977784263717, "loss": 3.854, "step": 8874 }, { "epoch": 0.4154719410146878, "grad_norm": 1.2109375, "learning_rate": 0.0001869068903157388, "loss": 3.6939, "step": 8875 }, { "epoch": 0.41551875475452044, "grad_norm": 1.203125, "learning_rate": 0.00018690400249271022, "loss": 3.2645, "step": 8876 }, { "epoch": 0.4155655684943531, "grad_norm": 1.453125, "learning_rate": 0.00018690111437356125, "loss": 2.6712, "step": 8877 }, { "epoch": 0.41561238223418573, "grad_norm": 1.3671875, "learning_rate": 0.00018689822595830178, "loss": 3.0536, "step": 8878 }, { "epoch": 0.41565919597401835, "grad_norm": 1.359375, "learning_rate": 0.00018689533724694163, "loss": 2.8053, "step": 8879 }, { "epoch": 0.415706009713851, "grad_norm": 1.375, "learning_rate": 0.0001868924482394906, "loss": 3.2199, "step": 8880 }, { "epoch": 0.41575282345368364, "grad_norm": 1.296875, "learning_rate": 0.00018688955893595864, "loss": 3.3529, "step": 8881 }, { "epoch": 0.4157996371935163, "grad_norm": 2.671875, "learning_rate": 0.00018688666933635548, "loss": 3.0553, "step": 8882 }, { "epoch": 0.41584645093334893, "grad_norm": 1.6875, "learning_rate": 0.00018688377944069102, "loss": 3.0196, "step": 8883 }, { "epoch": 0.4158932646731816, "grad_norm": 1.4765625, "learning_rate": 0.00018688088924897516, "loss": 3.1494, "step": 8884 }, { "epoch": 0.4159400784130142, "grad_norm": 1.421875, "learning_rate": 0.00018687799876121764, "loss": 2.9672, "step": 8885 }, { "epoch": 0.41598689215284684, "grad_norm": 1.5, "learning_rate": 0.00018687510797742838, "loss": 3.2709, "step": 8886 }, { "epoch": 0.4160337058926795, "grad_norm": 1.296875, "learning_rate": 0.00018687221689761717, "loss": 3.2884, "step": 8887 }, { "epoch": 0.41608051963251214, "grad_norm": 1.296875, "learning_rate": 0.00018686932552179394, "loss": 2.784, "step": 8888 }, { "epoch": 0.4161273333723448, "grad_norm": 2.21875, "learning_rate": 0.0001868664338499685, "loss": 3.2661, "step": 8889 }, { "epoch": 0.41617414711217743, "grad_norm": 1.328125, "learning_rate": 0.00018686354188215073, "loss": 3.3027, "step": 8890 }, { "epoch": 0.41622096085201005, "grad_norm": 1.203125, "learning_rate": 0.00018686064961835044, "loss": 3.2924, "step": 8891 }, { "epoch": 0.4162677745918427, "grad_norm": 1.546875, "learning_rate": 0.0001868577570585775, "loss": 3.3063, "step": 8892 }, { "epoch": 0.41631458833167534, "grad_norm": 1.4296875, "learning_rate": 0.00018685486420284177, "loss": 3.161, "step": 8893 }, { "epoch": 0.416361402071508, "grad_norm": 6.09375, "learning_rate": 0.00018685197105115312, "loss": 3.6954, "step": 8894 }, { "epoch": 0.41640821581134063, "grad_norm": 1.5078125, "learning_rate": 0.0001868490776035214, "loss": 3.1956, "step": 8895 }, { "epoch": 0.41645502955117325, "grad_norm": 1.0, "learning_rate": 0.00018684618385995648, "loss": 3.0244, "step": 8896 }, { "epoch": 0.4165018432910059, "grad_norm": 1.578125, "learning_rate": 0.00018684328982046818, "loss": 2.229, "step": 8897 }, { "epoch": 0.41654865703083854, "grad_norm": 1.296875, "learning_rate": 0.00018684039548506643, "loss": 2.7105, "step": 8898 }, { "epoch": 0.4165954707706712, "grad_norm": 1.296875, "learning_rate": 0.00018683750085376103, "loss": 3.3043, "step": 8899 }, { "epoch": 0.41664228451050384, "grad_norm": 1.5234375, "learning_rate": 0.00018683460592656184, "loss": 3.3264, "step": 8900 }, { "epoch": 0.41668909825033645, "grad_norm": 1.4609375, "learning_rate": 0.00018683171070347875, "loss": 2.8028, "step": 8901 }, { "epoch": 0.41673591199016913, "grad_norm": 1.375, "learning_rate": 0.00018682881518452166, "loss": 2.9393, "step": 8902 }, { "epoch": 0.41678272573000175, "grad_norm": 1.765625, "learning_rate": 0.0001868259193697004, "loss": 3.3565, "step": 8903 }, { "epoch": 0.4168295394698344, "grad_norm": 1.3203125, "learning_rate": 0.00018682302325902483, "loss": 2.93, "step": 8904 }, { "epoch": 0.41687635320966704, "grad_norm": 1.4375, "learning_rate": 0.0001868201268525048, "loss": 3.2662, "step": 8905 }, { "epoch": 0.41692316694949966, "grad_norm": 1.15625, "learning_rate": 0.00018681723015015025, "loss": 2.9152, "step": 8906 }, { "epoch": 0.41696998068933233, "grad_norm": 1.4140625, "learning_rate": 0.00018681433315197098, "loss": 3.0775, "step": 8907 }, { "epoch": 0.41701679442916495, "grad_norm": 1.2421875, "learning_rate": 0.00018681143585797688, "loss": 2.7687, "step": 8908 }, { "epoch": 0.4170636081689976, "grad_norm": 1.4296875, "learning_rate": 0.00018680853826817786, "loss": 3.2233, "step": 8909 }, { "epoch": 0.41711042190883024, "grad_norm": 1.5078125, "learning_rate": 0.00018680564038258375, "loss": 3.414, "step": 8910 }, { "epoch": 0.41715723564866286, "grad_norm": 1.890625, "learning_rate": 0.00018680274220120443, "loss": 3.2914, "step": 8911 }, { "epoch": 0.41720404938849553, "grad_norm": 1.3046875, "learning_rate": 0.00018679984372404983, "loss": 3.0513, "step": 8912 }, { "epoch": 0.41725086312832815, "grad_norm": 1.25, "learning_rate": 0.00018679694495112972, "loss": 2.8406, "step": 8913 }, { "epoch": 0.4172976768681608, "grad_norm": 1.4765625, "learning_rate": 0.00018679404588245406, "loss": 3.5922, "step": 8914 }, { "epoch": 0.41734449060799345, "grad_norm": 1.4921875, "learning_rate": 0.00018679114651803269, "loss": 3.051, "step": 8915 }, { "epoch": 0.41739130434782606, "grad_norm": 1.265625, "learning_rate": 0.00018678824685787552, "loss": 2.9867, "step": 8916 }, { "epoch": 0.41743811808765874, "grad_norm": 1.1796875, "learning_rate": 0.0001867853469019924, "loss": 3.005, "step": 8917 }, { "epoch": 0.41748493182749136, "grad_norm": 1.1953125, "learning_rate": 0.00018678244665039327, "loss": 2.9064, "step": 8918 }, { "epoch": 0.41753174556732403, "grad_norm": 1.2109375, "learning_rate": 0.00018677954610308792, "loss": 2.9679, "step": 8919 }, { "epoch": 0.41757855930715665, "grad_norm": 2.28125, "learning_rate": 0.00018677664526008632, "loss": 2.9021, "step": 8920 }, { "epoch": 0.41762537304698927, "grad_norm": 1.484375, "learning_rate": 0.00018677374412139828, "loss": 2.9733, "step": 8921 }, { "epoch": 0.41767218678682194, "grad_norm": 1.578125, "learning_rate": 0.00018677084268703374, "loss": 3.2012, "step": 8922 }, { "epoch": 0.41771900052665456, "grad_norm": 1.25, "learning_rate": 0.0001867679409570026, "loss": 2.6765, "step": 8923 }, { "epoch": 0.41776581426648723, "grad_norm": 1.28125, "learning_rate": 0.00018676503893131464, "loss": 3.0019, "step": 8924 }, { "epoch": 0.41781262800631985, "grad_norm": 1.2578125, "learning_rate": 0.00018676213660997988, "loss": 2.6133, "step": 8925 }, { "epoch": 0.41785944174615247, "grad_norm": 1.6171875, "learning_rate": 0.00018675923399300818, "loss": 3.0201, "step": 8926 }, { "epoch": 0.41790625548598515, "grad_norm": 0.99609375, "learning_rate": 0.00018675633108040934, "loss": 2.8354, "step": 8927 }, { "epoch": 0.41795306922581776, "grad_norm": 1.59375, "learning_rate": 0.00018675342787219336, "loss": 3.2197, "step": 8928 }, { "epoch": 0.41799988296565044, "grad_norm": 1.5625, "learning_rate": 0.00018675052436837008, "loss": 2.9993, "step": 8929 }, { "epoch": 0.41804669670548306, "grad_norm": 1.1953125, "learning_rate": 0.00018674762056894938, "loss": 2.9781, "step": 8930 }, { "epoch": 0.4180935104453157, "grad_norm": 1.34375, "learning_rate": 0.00018674471647394124, "loss": 2.922, "step": 8931 }, { "epoch": 0.41814032418514835, "grad_norm": 1.125, "learning_rate": 0.00018674181208335543, "loss": 3.0098, "step": 8932 }, { "epoch": 0.41818713792498097, "grad_norm": 1.25, "learning_rate": 0.00018673890739720193, "loss": 2.9386, "step": 8933 }, { "epoch": 0.41823395166481364, "grad_norm": 1.796875, "learning_rate": 0.00018673600241549064, "loss": 2.758, "step": 8934 }, { "epoch": 0.41828076540464626, "grad_norm": 1.265625, "learning_rate": 0.00018673309713823137, "loss": 2.8309, "step": 8935 }, { "epoch": 0.4183275791444789, "grad_norm": 1.609375, "learning_rate": 0.00018673019156543414, "loss": 3.1763, "step": 8936 }, { "epoch": 0.41837439288431155, "grad_norm": 1.3515625, "learning_rate": 0.00018672728569710877, "loss": 3.6687, "step": 8937 }, { "epoch": 0.41842120662414417, "grad_norm": 1.375, "learning_rate": 0.0001867243795332652, "loss": 3.3609, "step": 8938 }, { "epoch": 0.41846802036397684, "grad_norm": 1.4609375, "learning_rate": 0.00018672147307391334, "loss": 3.3341, "step": 8939 }, { "epoch": 0.41851483410380946, "grad_norm": 1.8125, "learning_rate": 0.00018671856631906307, "loss": 3.0579, "step": 8940 }, { "epoch": 0.4185616478436421, "grad_norm": 1.8671875, "learning_rate": 0.00018671565926872425, "loss": 2.8208, "step": 8941 }, { "epoch": 0.41860846158347476, "grad_norm": 1.75, "learning_rate": 0.00018671275192290687, "loss": 2.6641, "step": 8942 }, { "epoch": 0.4186552753233074, "grad_norm": 1.65625, "learning_rate": 0.0001867098442816208, "loss": 4.6384, "step": 8943 }, { "epoch": 0.41870208906314005, "grad_norm": 1.5703125, "learning_rate": 0.00018670693634487595, "loss": 3.3792, "step": 8944 }, { "epoch": 0.41874890280297267, "grad_norm": 1.609375, "learning_rate": 0.0001867040281126822, "loss": 3.3596, "step": 8945 }, { "epoch": 0.41879571654280534, "grad_norm": 1.0078125, "learning_rate": 0.00018670111958504952, "loss": 2.6809, "step": 8946 }, { "epoch": 0.41884253028263796, "grad_norm": 1.375, "learning_rate": 0.00018669821076198776, "loss": 2.7911, "step": 8947 }, { "epoch": 0.4188893440224706, "grad_norm": 1.1796875, "learning_rate": 0.00018669530164350687, "loss": 2.7018, "step": 8948 }, { "epoch": 0.41893615776230325, "grad_norm": 1.3125, "learning_rate": 0.00018669239222961675, "loss": 2.5929, "step": 8949 }, { "epoch": 0.41898297150213587, "grad_norm": 1.75, "learning_rate": 0.00018668948252032732, "loss": 3.6143, "step": 8950 }, { "epoch": 0.41902978524196854, "grad_norm": 1.3203125, "learning_rate": 0.00018668657251564848, "loss": 2.863, "step": 8951 }, { "epoch": 0.41907659898180116, "grad_norm": 2.03125, "learning_rate": 0.00018668366221559016, "loss": 3.0108, "step": 8952 }, { "epoch": 0.4191234127216338, "grad_norm": 1.5546875, "learning_rate": 0.00018668075162016226, "loss": 3.2848, "step": 8953 }, { "epoch": 0.41917022646146646, "grad_norm": 1.2109375, "learning_rate": 0.00018667784072937474, "loss": 2.8049, "step": 8954 }, { "epoch": 0.4192170402012991, "grad_norm": 1.4609375, "learning_rate": 0.00018667492954323745, "loss": 3.3432, "step": 8955 }, { "epoch": 0.41926385394113175, "grad_norm": 1.4765625, "learning_rate": 0.00018667201806176034, "loss": 3.0889, "step": 8956 }, { "epoch": 0.41931066768096437, "grad_norm": 1.4140625, "learning_rate": 0.00018666910628495337, "loss": 3.1732, "step": 8957 }, { "epoch": 0.419357481420797, "grad_norm": 1.875, "learning_rate": 0.00018666619421282642, "loss": 3.258, "step": 8958 }, { "epoch": 0.41940429516062966, "grad_norm": 2.0625, "learning_rate": 0.00018666328184538945, "loss": 3.3195, "step": 8959 }, { "epoch": 0.4194511089004623, "grad_norm": 1.34375, "learning_rate": 0.00018666036918265234, "loss": 3.1035, "step": 8960 }, { "epoch": 0.41949792264029495, "grad_norm": 2.265625, "learning_rate": 0.000186657456224625, "loss": 2.9503, "step": 8961 }, { "epoch": 0.41954473638012757, "grad_norm": 1.6953125, "learning_rate": 0.00018665454297131742, "loss": 3.0514, "step": 8962 }, { "epoch": 0.4195915501199602, "grad_norm": 1.328125, "learning_rate": 0.0001866516294227395, "loss": 2.9475, "step": 8963 }, { "epoch": 0.41963836385979286, "grad_norm": 1.2578125, "learning_rate": 0.00018664871557890113, "loss": 3.2393, "step": 8964 }, { "epoch": 0.4196851775996255, "grad_norm": 1.4375, "learning_rate": 0.0001866458014398123, "loss": 3.1608, "step": 8965 }, { "epoch": 0.41973199133945815, "grad_norm": 1.59375, "learning_rate": 0.00018664288700548286, "loss": 2.6298, "step": 8966 }, { "epoch": 0.4197788050792908, "grad_norm": 2.109375, "learning_rate": 0.00018663997227592286, "loss": 3.4734, "step": 8967 }, { "epoch": 0.4198256188191234, "grad_norm": 1.859375, "learning_rate": 0.0001866370572511421, "loss": 3.2713, "step": 8968 }, { "epoch": 0.41987243255895607, "grad_norm": 1.140625, "learning_rate": 0.00018663414193115062, "loss": 3.0446, "step": 8969 }, { "epoch": 0.4199192462987887, "grad_norm": 1.1328125, "learning_rate": 0.00018663122631595828, "loss": 2.2872, "step": 8970 }, { "epoch": 0.41996606003862136, "grad_norm": 1.515625, "learning_rate": 0.00018662831040557507, "loss": 3.5306, "step": 8971 }, { "epoch": 0.420012873778454, "grad_norm": 1.3984375, "learning_rate": 0.00018662539420001087, "loss": 3.3697, "step": 8972 }, { "epoch": 0.4200596875182866, "grad_norm": 1.328125, "learning_rate": 0.00018662247769927567, "loss": 3.3536, "step": 8973 }, { "epoch": 0.42010650125811927, "grad_norm": 1.59375, "learning_rate": 0.00018661956090337936, "loss": 2.717, "step": 8974 }, { "epoch": 0.4201533149979519, "grad_norm": 1.546875, "learning_rate": 0.00018661664381233191, "loss": 2.9801, "step": 8975 }, { "epoch": 0.42020012873778456, "grad_norm": 1.5625, "learning_rate": 0.0001866137264261433, "loss": 3.1825, "step": 8976 }, { "epoch": 0.4202469424776172, "grad_norm": 1.328125, "learning_rate": 0.00018661080874482337, "loss": 3.2504, "step": 8977 }, { "epoch": 0.4202937562174498, "grad_norm": 1.8046875, "learning_rate": 0.00018660789076838214, "loss": 3.2288, "step": 8978 }, { "epoch": 0.4203405699572825, "grad_norm": 2.09375, "learning_rate": 0.0001866049724968295, "loss": 2.6853, "step": 8979 }, { "epoch": 0.4203873836971151, "grad_norm": 1.4375, "learning_rate": 0.00018660205393017547, "loss": 2.632, "step": 8980 }, { "epoch": 0.42043419743694777, "grad_norm": 1.28125, "learning_rate": 0.00018659913506842993, "loss": 3.2166, "step": 8981 }, { "epoch": 0.4204810111767804, "grad_norm": 1.4140625, "learning_rate": 0.0001865962159116028, "loss": 2.7048, "step": 8982 }, { "epoch": 0.420527824916613, "grad_norm": 1.296875, "learning_rate": 0.00018659329645970412, "loss": 3.1249, "step": 8983 }, { "epoch": 0.4205746386564457, "grad_norm": 1.328125, "learning_rate": 0.00018659037671274378, "loss": 2.9142, "step": 8984 }, { "epoch": 0.4206214523962783, "grad_norm": 1.46875, "learning_rate": 0.00018658745667073171, "loss": 3.1567, "step": 8985 }, { "epoch": 0.42066826613611097, "grad_norm": 1.359375, "learning_rate": 0.0001865845363336779, "loss": 3.0174, "step": 8986 }, { "epoch": 0.4207150798759436, "grad_norm": 1.1171875, "learning_rate": 0.0001865816157015923, "loss": 2.9535, "step": 8987 }, { "epoch": 0.4207618936157762, "grad_norm": 1.484375, "learning_rate": 0.00018657869477448484, "loss": 2.835, "step": 8988 }, { "epoch": 0.4208087073556089, "grad_norm": 1.3203125, "learning_rate": 0.0001865757735523655, "loss": 2.6348, "step": 8989 }, { "epoch": 0.4208555210954415, "grad_norm": 2.171875, "learning_rate": 0.0001865728520352442, "loss": 2.6964, "step": 8990 }, { "epoch": 0.42090233483527417, "grad_norm": 1.0859375, "learning_rate": 0.0001865699302231309, "loss": 2.8819, "step": 8991 }, { "epoch": 0.4209491485751068, "grad_norm": 1.3203125, "learning_rate": 0.00018656700811603554, "loss": 4.3326, "step": 8992 }, { "epoch": 0.4209959623149394, "grad_norm": 1.828125, "learning_rate": 0.00018656408571396816, "loss": 3.6526, "step": 8993 }, { "epoch": 0.4210427760547721, "grad_norm": 1.4921875, "learning_rate": 0.00018656116301693864, "loss": 2.8978, "step": 8994 }, { "epoch": 0.4210895897946047, "grad_norm": 1.3515625, "learning_rate": 0.00018655824002495697, "loss": 3.3588, "step": 8995 }, { "epoch": 0.4211364035344374, "grad_norm": 1.203125, "learning_rate": 0.00018655531673803308, "loss": 2.9813, "step": 8996 }, { "epoch": 0.42118321727427, "grad_norm": 1.140625, "learning_rate": 0.00018655239315617695, "loss": 2.3616, "step": 8997 }, { "epoch": 0.4212300310141026, "grad_norm": 1.578125, "learning_rate": 0.0001865494692793986, "loss": 2.7406, "step": 8998 }, { "epoch": 0.4212768447539353, "grad_norm": 1.2265625, "learning_rate": 0.00018654654510770789, "loss": 2.6499, "step": 8999 }, { "epoch": 0.4213236584937679, "grad_norm": 1.25, "learning_rate": 0.00018654362064111481, "loss": 2.7523, "step": 9000 }, { "epoch": 0.4213704722336006, "grad_norm": 1.53125, "learning_rate": 0.00018654069587962938, "loss": 3.1981, "step": 9001 }, { "epoch": 0.4214172859734332, "grad_norm": 1.4296875, "learning_rate": 0.0001865377708232615, "loss": 2.752, "step": 9002 }, { "epoch": 0.4214640997132658, "grad_norm": 2.296875, "learning_rate": 0.00018653484547202117, "loss": 3.0316, "step": 9003 }, { "epoch": 0.4215109134530985, "grad_norm": 1.0078125, "learning_rate": 0.0001865319198259184, "loss": 2.8541, "step": 9004 }, { "epoch": 0.4215577271929311, "grad_norm": 1.3828125, "learning_rate": 0.00018652899388496309, "loss": 3.1753, "step": 9005 }, { "epoch": 0.4216045409327638, "grad_norm": 1.4921875, "learning_rate": 0.00018652606764916522, "loss": 2.9856, "step": 9006 }, { "epoch": 0.4216513546725964, "grad_norm": 1.5, "learning_rate": 0.0001865231411185348, "loss": 3.3422, "step": 9007 }, { "epoch": 0.4216981684124291, "grad_norm": 2.046875, "learning_rate": 0.00018652021429308176, "loss": 3.4811, "step": 9008 }, { "epoch": 0.4217449821522617, "grad_norm": 1.8671875, "learning_rate": 0.0001865172871728161, "loss": 3.0892, "step": 9009 }, { "epoch": 0.4217917958920943, "grad_norm": 1.2421875, "learning_rate": 0.00018651435975774777, "loss": 3.1951, "step": 9010 }, { "epoch": 0.421838609631927, "grad_norm": 1.359375, "learning_rate": 0.00018651143204788677, "loss": 2.6562, "step": 9011 }, { "epoch": 0.4218854233717596, "grad_norm": 1.484375, "learning_rate": 0.0001865085040432431, "loss": 3.3394, "step": 9012 }, { "epoch": 0.4219322371115923, "grad_norm": 1.1796875, "learning_rate": 0.00018650557574382664, "loss": 3.5303, "step": 9013 }, { "epoch": 0.4219790508514249, "grad_norm": 2.15625, "learning_rate": 0.00018650264714964747, "loss": 3.1296, "step": 9014 }, { "epoch": 0.4220258645912575, "grad_norm": 1.0859375, "learning_rate": 0.00018649971826071552, "loss": 2.7899, "step": 9015 }, { "epoch": 0.4220726783310902, "grad_norm": 1.2265625, "learning_rate": 0.00018649678907704077, "loss": 2.8408, "step": 9016 }, { "epoch": 0.4221194920709228, "grad_norm": 1.5078125, "learning_rate": 0.00018649385959863325, "loss": 3.4548, "step": 9017 }, { "epoch": 0.4221663058107555, "grad_norm": 1.3046875, "learning_rate": 0.00018649092982550287, "loss": 3.0319, "step": 9018 }, { "epoch": 0.4222131195505881, "grad_norm": 1.515625, "learning_rate": 0.00018648799975765965, "loss": 3.1649, "step": 9019 }, { "epoch": 0.4222599332904207, "grad_norm": 1.3046875, "learning_rate": 0.00018648506939511356, "loss": 2.908, "step": 9020 }, { "epoch": 0.4223067470302534, "grad_norm": 1.09375, "learning_rate": 0.00018648213873787462, "loss": 2.7001, "step": 9021 }, { "epoch": 0.422353560770086, "grad_norm": 1.3828125, "learning_rate": 0.00018647920778595278, "loss": 3.337, "step": 9022 }, { "epoch": 0.4224003745099187, "grad_norm": 1.40625, "learning_rate": 0.00018647627653935804, "loss": 2.8463, "step": 9023 }, { "epoch": 0.4224471882497513, "grad_norm": 0.953125, "learning_rate": 0.0001864733449981004, "loss": 2.18, "step": 9024 }, { "epoch": 0.4224940019895839, "grad_norm": 1.4765625, "learning_rate": 0.0001864704131621898, "loss": 2.6418, "step": 9025 }, { "epoch": 0.4225408157294166, "grad_norm": 1.3515625, "learning_rate": 0.00018646748103163627, "loss": 3.1133, "step": 9026 }, { "epoch": 0.4225876294692492, "grad_norm": 1.7578125, "learning_rate": 0.00018646454860644983, "loss": 3.2404, "step": 9027 }, { "epoch": 0.4226344432090819, "grad_norm": 1.4296875, "learning_rate": 0.0001864616158866404, "loss": 3.2501, "step": 9028 }, { "epoch": 0.4226812569489145, "grad_norm": 1.3046875, "learning_rate": 0.00018645868287221805, "loss": 3.3162, "step": 9029 }, { "epoch": 0.4227280706887471, "grad_norm": 1.375, "learning_rate": 0.00018645574956319269, "loss": 3.2968, "step": 9030 }, { "epoch": 0.4227748844285798, "grad_norm": 2.34375, "learning_rate": 0.00018645281595957439, "loss": 3.0848, "step": 9031 }, { "epoch": 0.4228216981684124, "grad_norm": 1.1328125, "learning_rate": 0.0001864498820613731, "loss": 2.9477, "step": 9032 }, { "epoch": 0.4228685119082451, "grad_norm": 1.078125, "learning_rate": 0.00018644694786859887, "loss": 3.0254, "step": 9033 }, { "epoch": 0.4229153256480777, "grad_norm": 1.6640625, "learning_rate": 0.00018644401338126164, "loss": 2.5759, "step": 9034 }, { "epoch": 0.42296213938791033, "grad_norm": 1.4296875, "learning_rate": 0.00018644107859937142, "loss": 3.2759, "step": 9035 }, { "epoch": 0.423008953127743, "grad_norm": 1.265625, "learning_rate": 0.00018643814352293822, "loss": 2.9649, "step": 9036 }, { "epoch": 0.4230557668675756, "grad_norm": 2.0625, "learning_rate": 0.00018643520815197207, "loss": 3.2357, "step": 9037 }, { "epoch": 0.4231025806074083, "grad_norm": 1.6484375, "learning_rate": 0.00018643227248648291, "loss": 3.6469, "step": 9038 }, { "epoch": 0.4231493943472409, "grad_norm": 1.296875, "learning_rate": 0.00018642933652648084, "loss": 2.8926, "step": 9039 }, { "epoch": 0.42319620808707353, "grad_norm": 1.203125, "learning_rate": 0.00018642640027197573, "loss": 2.8928, "step": 9040 }, { "epoch": 0.4232430218269062, "grad_norm": 1.5625, "learning_rate": 0.00018642346372297767, "loss": 2.849, "step": 9041 }, { "epoch": 0.4232898355667388, "grad_norm": 1.3984375, "learning_rate": 0.0001864205268794967, "loss": 3.1666, "step": 9042 }, { "epoch": 0.4233366493065715, "grad_norm": 1.359375, "learning_rate": 0.00018641758974154274, "loss": 2.8857, "step": 9043 }, { "epoch": 0.4233834630464041, "grad_norm": 1.2265625, "learning_rate": 0.00018641465230912583, "loss": 2.9257, "step": 9044 }, { "epoch": 0.42343027678623674, "grad_norm": 1.53125, "learning_rate": 0.000186411714582256, "loss": 2.9845, "step": 9045 }, { "epoch": 0.4234770905260694, "grad_norm": 2.03125, "learning_rate": 0.00018640877656094323, "loss": 3.0834, "step": 9046 }, { "epoch": 0.42352390426590203, "grad_norm": 1.2890625, "learning_rate": 0.0001864058382451976, "loss": 2.7451, "step": 9047 }, { "epoch": 0.4235707180057347, "grad_norm": 1.125, "learning_rate": 0.00018640289963502902, "loss": 2.9252, "step": 9048 }, { "epoch": 0.4236175317455673, "grad_norm": 1.328125, "learning_rate": 0.00018639996073044758, "loss": 3.2996, "step": 9049 }, { "epoch": 0.42366434548539994, "grad_norm": 1.234375, "learning_rate": 0.00018639702153146326, "loss": 2.4988, "step": 9050 }, { "epoch": 0.4237111592252326, "grad_norm": 1.8046875, "learning_rate": 0.0001863940820380861, "loss": 2.8345, "step": 9051 }, { "epoch": 0.42375797296506523, "grad_norm": 1.15625, "learning_rate": 0.00018639114225032607, "loss": 2.8798, "step": 9052 }, { "epoch": 0.4238047867048979, "grad_norm": 1.2890625, "learning_rate": 0.00018638820216819322, "loss": 3.0165, "step": 9053 }, { "epoch": 0.4238516004447305, "grad_norm": 1.1796875, "learning_rate": 0.0001863852617916976, "loss": 2.5962, "step": 9054 }, { "epoch": 0.42389841418456314, "grad_norm": 1.3984375, "learning_rate": 0.00018638232112084914, "loss": 2.9982, "step": 9055 }, { "epoch": 0.4239452279243958, "grad_norm": 1.3203125, "learning_rate": 0.00018637938015565796, "loss": 3.055, "step": 9056 }, { "epoch": 0.42399204166422844, "grad_norm": 1.34375, "learning_rate": 0.00018637643889613401, "loss": 2.8112, "step": 9057 }, { "epoch": 0.4240388554040611, "grad_norm": 1.0390625, "learning_rate": 0.00018637349734228735, "loss": 3.4329, "step": 9058 }, { "epoch": 0.42408566914389373, "grad_norm": 1.8359375, "learning_rate": 0.00018637055549412796, "loss": 3.4196, "step": 9059 }, { "epoch": 0.42413248288372635, "grad_norm": 1.7109375, "learning_rate": 0.00018636761335166595, "loss": 3.18, "step": 9060 }, { "epoch": 0.424179296623559, "grad_norm": 1.1484375, "learning_rate": 0.00018636467091491125, "loss": 3.1767, "step": 9061 }, { "epoch": 0.42422611036339164, "grad_norm": 1.171875, "learning_rate": 0.00018636172818387392, "loss": 2.9597, "step": 9062 }, { "epoch": 0.4242729241032243, "grad_norm": 1.6171875, "learning_rate": 0.00018635878515856403, "loss": 2.4419, "step": 9063 }, { "epoch": 0.42431973784305693, "grad_norm": 1.7109375, "learning_rate": 0.00018635584183899156, "loss": 3.2463, "step": 9064 }, { "epoch": 0.42436655158288955, "grad_norm": 1.671875, "learning_rate": 0.0001863528982251665, "loss": 3.6301, "step": 9065 }, { "epoch": 0.4244133653227222, "grad_norm": 1.1875, "learning_rate": 0.00018634995431709902, "loss": 2.8679, "step": 9066 }, { "epoch": 0.42446017906255484, "grad_norm": 1.0859375, "learning_rate": 0.00018634701011479898, "loss": 2.6719, "step": 9067 }, { "epoch": 0.4245069928023875, "grad_norm": 1.5625, "learning_rate": 0.00018634406561827654, "loss": 3.2193, "step": 9068 }, { "epoch": 0.42455380654222014, "grad_norm": 1.1640625, "learning_rate": 0.00018634112082754168, "loss": 2.7643, "step": 9069 }, { "epoch": 0.4246006202820528, "grad_norm": 1.15625, "learning_rate": 0.00018633817574260442, "loss": 2.9591, "step": 9070 }, { "epoch": 0.42464743402188543, "grad_norm": 1.1875, "learning_rate": 0.00018633523036347483, "loss": 3.1097, "step": 9071 }, { "epoch": 0.42469424776171805, "grad_norm": 1.515625, "learning_rate": 0.00018633228469016295, "loss": 2.8068, "step": 9072 }, { "epoch": 0.4247410615015507, "grad_norm": 1.75, "learning_rate": 0.00018632933872267878, "loss": 2.6898, "step": 9073 }, { "epoch": 0.42478787524138334, "grad_norm": 1.734375, "learning_rate": 0.00018632639246103237, "loss": 3.2982, "step": 9074 }, { "epoch": 0.424834688981216, "grad_norm": 1.2890625, "learning_rate": 0.00018632344590523378, "loss": 3.2271, "step": 9075 }, { "epoch": 0.42488150272104863, "grad_norm": 1.359375, "learning_rate": 0.00018632049905529304, "loss": 3.3669, "step": 9076 }, { "epoch": 0.42492831646088125, "grad_norm": 1.5, "learning_rate": 0.00018631755191122017, "loss": 3.2013, "step": 9077 }, { "epoch": 0.4249751302007139, "grad_norm": 1.53125, "learning_rate": 0.00018631460447302527, "loss": 3.1229, "step": 9078 }, { "epoch": 0.42502194394054654, "grad_norm": 1.734375, "learning_rate": 0.00018631165674071833, "loss": 3.2958, "step": 9079 }, { "epoch": 0.4250687576803792, "grad_norm": 1.2109375, "learning_rate": 0.0001863087087143094, "loss": 3.1725, "step": 9080 }, { "epoch": 0.42511557142021184, "grad_norm": 1.3828125, "learning_rate": 0.00018630576039380855, "loss": 2.8243, "step": 9081 }, { "epoch": 0.42516238516004445, "grad_norm": 1.8203125, "learning_rate": 0.00018630281177922577, "loss": 3.4979, "step": 9082 }, { "epoch": 0.4252091988998771, "grad_norm": 1.1640625, "learning_rate": 0.00018629986287057118, "loss": 2.6477, "step": 9083 }, { "epoch": 0.42525601263970975, "grad_norm": 2.171875, "learning_rate": 0.0001862969136678548, "loss": 2.8961, "step": 9084 }, { "epoch": 0.4253028263795424, "grad_norm": 1.140625, "learning_rate": 0.00018629396417108667, "loss": 3.059, "step": 9085 }, { "epoch": 0.42534964011937504, "grad_norm": 1.3828125, "learning_rate": 0.00018629101438027683, "loss": 2.928, "step": 9086 }, { "epoch": 0.42539645385920766, "grad_norm": 1.6953125, "learning_rate": 0.00018628806429543535, "loss": 3.1074, "step": 9087 }, { "epoch": 0.42544326759904033, "grad_norm": 1.3984375, "learning_rate": 0.00018628511391657228, "loss": 2.6748, "step": 9088 }, { "epoch": 0.42549008133887295, "grad_norm": 1.125, "learning_rate": 0.00018628216324369768, "loss": 2.64, "step": 9089 }, { "epoch": 0.4255368950787056, "grad_norm": 1.3671875, "learning_rate": 0.00018627921227682158, "loss": 3.0384, "step": 9090 }, { "epoch": 0.42558370881853824, "grad_norm": 1.5546875, "learning_rate": 0.00018627626101595408, "loss": 3.4335, "step": 9091 }, { "epoch": 0.42563052255837086, "grad_norm": 1.2421875, "learning_rate": 0.0001862733094611052, "loss": 3.2498, "step": 9092 }, { "epoch": 0.42567733629820353, "grad_norm": 1.2265625, "learning_rate": 0.000186270357612285, "loss": 2.8914, "step": 9093 }, { "epoch": 0.42572415003803615, "grad_norm": 1.2421875, "learning_rate": 0.00018626740546950354, "loss": 2.9246, "step": 9094 }, { "epoch": 0.4257709637778688, "grad_norm": 1.5078125, "learning_rate": 0.00018626445303277086, "loss": 3.4315, "step": 9095 }, { "epoch": 0.42581777751770145, "grad_norm": 1.4140625, "learning_rate": 0.00018626150030209707, "loss": 3.3463, "step": 9096 }, { "epoch": 0.42586459125753406, "grad_norm": 1.25, "learning_rate": 0.00018625854727749222, "loss": 2.9028, "step": 9097 }, { "epoch": 0.42591140499736674, "grad_norm": 1.3671875, "learning_rate": 0.00018625559395896632, "loss": 3.0288, "step": 9098 }, { "epoch": 0.42595821873719936, "grad_norm": 1.40625, "learning_rate": 0.0001862526403465295, "loss": 3.273, "step": 9099 }, { "epoch": 0.42600503247703203, "grad_norm": 1.328125, "learning_rate": 0.00018624968644019176, "loss": 3.1412, "step": 9100 }, { "epoch": 0.42605184621686465, "grad_norm": 1.359375, "learning_rate": 0.0001862467322399632, "loss": 3.3275, "step": 9101 }, { "epoch": 0.42609865995669727, "grad_norm": 1.265625, "learning_rate": 0.00018624377774585394, "loss": 3.0391, "step": 9102 }, { "epoch": 0.42614547369652994, "grad_norm": 2.375, "learning_rate": 0.00018624082295787392, "loss": 3.6415, "step": 9103 }, { "epoch": 0.42619228743636256, "grad_norm": 1.328125, "learning_rate": 0.00018623786787603334, "loss": 3.3307, "step": 9104 }, { "epoch": 0.42623910117619523, "grad_norm": 2.015625, "learning_rate": 0.00018623491250034218, "loss": 2.9613, "step": 9105 }, { "epoch": 0.42628591491602785, "grad_norm": 1.3359375, "learning_rate": 0.00018623195683081055, "loss": 2.8567, "step": 9106 }, { "epoch": 0.42633272865586047, "grad_norm": 1.3828125, "learning_rate": 0.0001862290008674485, "loss": 3.021, "step": 9107 }, { "epoch": 0.42637954239569315, "grad_norm": 1.4609375, "learning_rate": 0.0001862260446102661, "loss": 2.1677, "step": 9108 }, { "epoch": 0.42642635613552576, "grad_norm": 1.1796875, "learning_rate": 0.00018622308805927348, "loss": 2.8936, "step": 9109 }, { "epoch": 0.42647316987535844, "grad_norm": 1.4140625, "learning_rate": 0.00018622013121448062, "loss": 3.2385, "step": 9110 }, { "epoch": 0.42651998361519106, "grad_norm": 1.3828125, "learning_rate": 0.00018621717407589768, "loss": 2.953, "step": 9111 }, { "epoch": 0.4265667973550237, "grad_norm": 1.125, "learning_rate": 0.0001862142166435347, "loss": 3.0021, "step": 9112 }, { "epoch": 0.42661361109485635, "grad_norm": 1.296875, "learning_rate": 0.0001862112589174018, "loss": 3.5006, "step": 9113 }, { "epoch": 0.42666042483468897, "grad_norm": 1.3515625, "learning_rate": 0.00018620830089750897, "loss": 3.3378, "step": 9114 }, { "epoch": 0.42670723857452164, "grad_norm": 1.3671875, "learning_rate": 0.00018620534258386633, "loss": 3.0898, "step": 9115 }, { "epoch": 0.42675405231435426, "grad_norm": 1.25, "learning_rate": 0.00018620238397648397, "loss": 2.8509, "step": 9116 }, { "epoch": 0.4268008660541869, "grad_norm": 1.2109375, "learning_rate": 0.000186199425075372, "loss": 3.028, "step": 9117 }, { "epoch": 0.42684767979401955, "grad_norm": 1.359375, "learning_rate": 0.00018619646588054046, "loss": 3.3607, "step": 9118 }, { "epoch": 0.42689449353385217, "grad_norm": 1.4296875, "learning_rate": 0.00018619350639199943, "loss": 3.1075, "step": 9119 }, { "epoch": 0.42694130727368484, "grad_norm": 1.6015625, "learning_rate": 0.000186190546609759, "loss": 3.1876, "step": 9120 }, { "epoch": 0.42698812101351746, "grad_norm": 1.2734375, "learning_rate": 0.0001861875865338293, "loss": 3.1482, "step": 9121 }, { "epoch": 0.4270349347533501, "grad_norm": 1.359375, "learning_rate": 0.00018618462616422037, "loss": 2.0984, "step": 9122 }, { "epoch": 0.42708174849318276, "grad_norm": 1.2890625, "learning_rate": 0.0001861816655009423, "loss": 2.6328, "step": 9123 }, { "epoch": 0.4271285622330154, "grad_norm": 1.625, "learning_rate": 0.0001861787045440052, "loss": 2.8551, "step": 9124 }, { "epoch": 0.42717537597284805, "grad_norm": 1.3515625, "learning_rate": 0.00018617574329341912, "loss": 3.0499, "step": 9125 }, { "epoch": 0.42722218971268067, "grad_norm": 1.875, "learning_rate": 0.00018617278174919418, "loss": 3.3232, "step": 9126 }, { "epoch": 0.4272690034525133, "grad_norm": 1.1640625, "learning_rate": 0.00018616981991134048, "loss": 3.0229, "step": 9127 }, { "epoch": 0.42731581719234596, "grad_norm": 0.890625, "learning_rate": 0.00018616685777986808, "loss": 3.7951, "step": 9128 }, { "epoch": 0.4273626309321786, "grad_norm": 1.3671875, "learning_rate": 0.00018616389535478712, "loss": 2.9505, "step": 9129 }, { "epoch": 0.42740944467201125, "grad_norm": 1.4453125, "learning_rate": 0.00018616093263610766, "loss": 2.9302, "step": 9130 }, { "epoch": 0.42745625841184387, "grad_norm": 1.609375, "learning_rate": 0.0001861579696238398, "loss": 2.9719, "step": 9131 }, { "epoch": 0.42750307215167654, "grad_norm": 1.6328125, "learning_rate": 0.00018615500631799362, "loss": 2.9219, "step": 9132 }, { "epoch": 0.42754988589150916, "grad_norm": 1.1953125, "learning_rate": 0.00018615204271857927, "loss": 3.0586, "step": 9133 }, { "epoch": 0.4275966996313418, "grad_norm": 1.1484375, "learning_rate": 0.0001861490788256068, "loss": 2.9167, "step": 9134 }, { "epoch": 0.42764351337117446, "grad_norm": 1.5625, "learning_rate": 0.00018614611463908633, "loss": 2.6541, "step": 9135 }, { "epoch": 0.4276903271110071, "grad_norm": 1.2890625, "learning_rate": 0.00018614315015902792, "loss": 3.2893, "step": 9136 }, { "epoch": 0.42773714085083975, "grad_norm": 0.984375, "learning_rate": 0.00018614018538544173, "loss": 2.8761, "step": 9137 }, { "epoch": 0.42778395459067237, "grad_norm": 1.34375, "learning_rate": 0.00018613722031833786, "loss": 2.8841, "step": 9138 }, { "epoch": 0.427830768330505, "grad_norm": 1.171875, "learning_rate": 0.00018613425495772636, "loss": 3.0622, "step": 9139 }, { "epoch": 0.42787758207033766, "grad_norm": 1.5625, "learning_rate": 0.00018613128930361738, "loss": 3.0007, "step": 9140 }, { "epoch": 0.4279243958101703, "grad_norm": 1.203125, "learning_rate": 0.000186128323356021, "loss": 2.8095, "step": 9141 }, { "epoch": 0.42797120955000295, "grad_norm": 1.1171875, "learning_rate": 0.00018612535711494735, "loss": 2.745, "step": 9142 }, { "epoch": 0.42801802328983557, "grad_norm": 1.46875, "learning_rate": 0.0001861223905804065, "loss": 3.2111, "step": 9143 }, { "epoch": 0.4280648370296682, "grad_norm": 1.3671875, "learning_rate": 0.0001861194237524086, "loss": 3.1039, "step": 9144 }, { "epoch": 0.42811165076950086, "grad_norm": 1.1640625, "learning_rate": 0.00018611645663096373, "loss": 3.0394, "step": 9145 }, { "epoch": 0.4281584645093335, "grad_norm": 1.5546875, "learning_rate": 0.00018611348921608202, "loss": 3.3294, "step": 9146 }, { "epoch": 0.42820527824916615, "grad_norm": 1.3515625, "learning_rate": 0.00018611052150777356, "loss": 3.0766, "step": 9147 }, { "epoch": 0.4282520919889988, "grad_norm": 1.53125, "learning_rate": 0.0001861075535060485, "loss": 2.9631, "step": 9148 }, { "epoch": 0.4282989057288314, "grad_norm": 2.046875, "learning_rate": 0.00018610458521091692, "loss": 2.8604, "step": 9149 }, { "epoch": 0.42834571946866407, "grad_norm": 1.5625, "learning_rate": 0.00018610161662238893, "loss": 3.1281, "step": 9150 }, { "epoch": 0.4283925332084967, "grad_norm": 1.375, "learning_rate": 0.0001860986477404747, "loss": 3.0738, "step": 9151 }, { "epoch": 0.42843934694832936, "grad_norm": 1.578125, "learning_rate": 0.00018609567856518427, "loss": 3.3243, "step": 9152 }, { "epoch": 0.428486160688162, "grad_norm": 1.3359375, "learning_rate": 0.00018609270909652779, "loss": 2.8307, "step": 9153 }, { "epoch": 0.4285329744279946, "grad_norm": 1.40625, "learning_rate": 0.00018608973933451536, "loss": 2.8853, "step": 9154 }, { "epoch": 0.42857978816782727, "grad_norm": 1.1953125, "learning_rate": 0.00018608676927915716, "loss": 2.6292, "step": 9155 }, { "epoch": 0.4286266019076599, "grad_norm": 1.484375, "learning_rate": 0.00018608379893046326, "loss": 3.0187, "step": 9156 }, { "epoch": 0.42867341564749256, "grad_norm": 1.46875, "learning_rate": 0.00018608082828844378, "loss": 2.7197, "step": 9157 }, { "epoch": 0.4287202293873252, "grad_norm": 1.140625, "learning_rate": 0.00018607785735310885, "loss": 2.6047, "step": 9158 }, { "epoch": 0.4287670431271578, "grad_norm": 1.484375, "learning_rate": 0.00018607488612446858, "loss": 2.9121, "step": 9159 }, { "epoch": 0.4288138568669905, "grad_norm": 1.3359375, "learning_rate": 0.00018607191460253314, "loss": 2.903, "step": 9160 }, { "epoch": 0.4288606706068231, "grad_norm": 1.2734375, "learning_rate": 0.00018606894278731264, "loss": 3.0926, "step": 9161 }, { "epoch": 0.42890748434665577, "grad_norm": 1.421875, "learning_rate": 0.00018606597067881718, "loss": 2.8732, "step": 9162 }, { "epoch": 0.4289542980864884, "grad_norm": 1.7421875, "learning_rate": 0.00018606299827705688, "loss": 3.2968, "step": 9163 }, { "epoch": 0.429001111826321, "grad_norm": 1.6796875, "learning_rate": 0.0001860600255820419, "loss": 3.1099, "step": 9164 }, { "epoch": 0.4290479255661537, "grad_norm": 1.4609375, "learning_rate": 0.00018605705259378235, "loss": 3.0826, "step": 9165 }, { "epoch": 0.4290947393059863, "grad_norm": 1.140625, "learning_rate": 0.0001860540793122884, "loss": 3.0374, "step": 9166 }, { "epoch": 0.42914155304581897, "grad_norm": 1.8515625, "learning_rate": 0.00018605110573757013, "loss": 3.0076, "step": 9167 }, { "epoch": 0.4291883667856516, "grad_norm": 1.359375, "learning_rate": 0.00018604813186963768, "loss": 3.3946, "step": 9168 }, { "epoch": 0.4292351805254842, "grad_norm": 1.578125, "learning_rate": 0.0001860451577085012, "loss": 2.9021, "step": 9169 }, { "epoch": 0.4292819942653169, "grad_norm": 1.796875, "learning_rate": 0.00018604218325417082, "loss": 3.0204, "step": 9170 }, { "epoch": 0.4293288080051495, "grad_norm": 1.2890625, "learning_rate": 0.00018603920850665667, "loss": 2.9275, "step": 9171 }, { "epoch": 0.42937562174498217, "grad_norm": 1.421875, "learning_rate": 0.0001860362334659689, "loss": 3.1833, "step": 9172 }, { "epoch": 0.4294224354848148, "grad_norm": 1.65625, "learning_rate": 0.00018603325813211762, "loss": 3.0922, "step": 9173 }, { "epoch": 0.4294692492246474, "grad_norm": 1.9921875, "learning_rate": 0.000186030282505113, "loss": 4.7283, "step": 9174 }, { "epoch": 0.4295160629644801, "grad_norm": 1.328125, "learning_rate": 0.0001860273065849652, "loss": 3.0914, "step": 9175 }, { "epoch": 0.4295628767043127, "grad_norm": 1.3515625, "learning_rate": 0.00018602433037168424, "loss": 2.8874, "step": 9176 }, { "epoch": 0.4296096904441454, "grad_norm": 1.1640625, "learning_rate": 0.00018602135386528042, "loss": 2.7178, "step": 9177 }, { "epoch": 0.429656504183978, "grad_norm": 1.28125, "learning_rate": 0.0001860183770657638, "loss": 3.2717, "step": 9178 }, { "epoch": 0.4297033179238106, "grad_norm": 1.359375, "learning_rate": 0.0001860153999731445, "loss": 2.9641, "step": 9179 }, { "epoch": 0.4297501316636433, "grad_norm": 1.484375, "learning_rate": 0.00018601242258743273, "loss": 3.0178, "step": 9180 }, { "epoch": 0.4297969454034759, "grad_norm": 1.203125, "learning_rate": 0.00018600944490863858, "loss": 2.956, "step": 9181 }, { "epoch": 0.4298437591433086, "grad_norm": 1.2734375, "learning_rate": 0.00018600646693677221, "loss": 2.8262, "step": 9182 }, { "epoch": 0.4298905728831412, "grad_norm": 1.328125, "learning_rate": 0.00018600348867184378, "loss": 3.0986, "step": 9183 }, { "epoch": 0.4299373866229738, "grad_norm": 1.203125, "learning_rate": 0.00018600051011386347, "loss": 3.0014, "step": 9184 }, { "epoch": 0.4299842003628065, "grad_norm": 1.546875, "learning_rate": 0.00018599753126284134, "loss": 2.8475, "step": 9185 }, { "epoch": 0.4300310141026391, "grad_norm": 1.1875, "learning_rate": 0.00018599455211878763, "loss": 2.8411, "step": 9186 }, { "epoch": 0.4300778278424718, "grad_norm": 1.2734375, "learning_rate": 0.00018599157268171242, "loss": 3.3147, "step": 9187 }, { "epoch": 0.4301246415823044, "grad_norm": 3.03125, "learning_rate": 0.00018598859295162595, "loss": 2.604, "step": 9188 }, { "epoch": 0.430171455322137, "grad_norm": 1.1171875, "learning_rate": 0.00018598561292853828, "loss": 3.2357, "step": 9189 }, { "epoch": 0.4302182690619697, "grad_norm": 1.703125, "learning_rate": 0.0001859826326124596, "loss": 2.8421, "step": 9190 }, { "epoch": 0.4302650828018023, "grad_norm": 1.15625, "learning_rate": 0.00018597965200340007, "loss": 2.7013, "step": 9191 }, { "epoch": 0.430311896541635, "grad_norm": 0.9453125, "learning_rate": 0.00018597667110136987, "loss": 3.4988, "step": 9192 }, { "epoch": 0.4303587102814676, "grad_norm": 1.2890625, "learning_rate": 0.00018597368990637913, "loss": 2.8991, "step": 9193 }, { "epoch": 0.4304055240213003, "grad_norm": 1.3984375, "learning_rate": 0.000185970708418438, "loss": 2.5336, "step": 9194 }, { "epoch": 0.4304523377611329, "grad_norm": 1.3125, "learning_rate": 0.00018596772663755667, "loss": 3.2025, "step": 9195 }, { "epoch": 0.4304991515009655, "grad_norm": 1.3984375, "learning_rate": 0.00018596474456374524, "loss": 3.1506, "step": 9196 }, { "epoch": 0.4305459652407982, "grad_norm": 1.25, "learning_rate": 0.00018596176219701392, "loss": 3.689, "step": 9197 }, { "epoch": 0.4305927789806308, "grad_norm": 1.5546875, "learning_rate": 0.0001859587795373729, "loss": 2.679, "step": 9198 }, { "epoch": 0.4306395927204635, "grad_norm": 1.53125, "learning_rate": 0.0001859557965848323, "loss": 3.3034, "step": 9199 }, { "epoch": 0.4306864064602961, "grad_norm": 1.5234375, "learning_rate": 0.00018595281333940225, "loss": 3.0625, "step": 9200 }, { "epoch": 0.4307332202001287, "grad_norm": 1.2265625, "learning_rate": 0.00018594982980109302, "loss": 3.3179, "step": 9201 }, { "epoch": 0.4307800339399614, "grad_norm": 1.4453125, "learning_rate": 0.00018594684596991466, "loss": 2.9798, "step": 9202 }, { "epoch": 0.430826847679794, "grad_norm": 1.25, "learning_rate": 0.0001859438618458774, "loss": 2.8294, "step": 9203 }, { "epoch": 0.4308736614196267, "grad_norm": 1.5390625, "learning_rate": 0.00018594087742899141, "loss": 3.1834, "step": 9204 }, { "epoch": 0.4309204751594593, "grad_norm": 1.21875, "learning_rate": 0.00018593789271926684, "loss": 3.1936, "step": 9205 }, { "epoch": 0.4309672888992919, "grad_norm": 1.7265625, "learning_rate": 0.00018593490771671387, "loss": 3.5287, "step": 9206 }, { "epoch": 0.4310141026391246, "grad_norm": 1.5625, "learning_rate": 0.00018593192242134267, "loss": 3.3907, "step": 9207 }, { "epoch": 0.4310609163789572, "grad_norm": 1.4609375, "learning_rate": 0.00018592893683316343, "loss": 3.1281, "step": 9208 }, { "epoch": 0.4311077301187899, "grad_norm": 1.375, "learning_rate": 0.00018592595095218628, "loss": 3.2747, "step": 9209 }, { "epoch": 0.4311545438586225, "grad_norm": 1.2578125, "learning_rate": 0.00018592296477842145, "loss": 3.0811, "step": 9210 }, { "epoch": 0.4312013575984551, "grad_norm": 1.4453125, "learning_rate": 0.00018591997831187904, "loss": 2.8167, "step": 9211 }, { "epoch": 0.4312481713382878, "grad_norm": 1.5390625, "learning_rate": 0.00018591699155256927, "loss": 2.9962, "step": 9212 }, { "epoch": 0.4312949850781204, "grad_norm": 1.703125, "learning_rate": 0.00018591400450050237, "loss": 3.1794, "step": 9213 }, { "epoch": 0.4313417988179531, "grad_norm": 1.3125, "learning_rate": 0.0001859110171556884, "loss": 3.1162, "step": 9214 }, { "epoch": 0.4313886125577857, "grad_norm": 1.875, "learning_rate": 0.00018590802951813767, "loss": 3.1897, "step": 9215 }, { "epoch": 0.43143542629761833, "grad_norm": 1.5234375, "learning_rate": 0.00018590504158786026, "loss": 3.2332, "step": 9216 }, { "epoch": 0.431482240037451, "grad_norm": 2.109375, "learning_rate": 0.00018590205336486637, "loss": 2.8119, "step": 9217 }, { "epoch": 0.4315290537772836, "grad_norm": 1.21875, "learning_rate": 0.00018589906484916624, "loss": 2.5448, "step": 9218 }, { "epoch": 0.4315758675171163, "grad_norm": 1.3046875, "learning_rate": 0.00018589607604077, "loss": 2.6772, "step": 9219 }, { "epoch": 0.4316226812569489, "grad_norm": 1.8671875, "learning_rate": 0.0001858930869396878, "loss": 3.1089, "step": 9220 }, { "epoch": 0.43166949499678153, "grad_norm": 1.125, "learning_rate": 0.00018589009754592994, "loss": 4.1312, "step": 9221 }, { "epoch": 0.4317163087366142, "grad_norm": 1.8046875, "learning_rate": 0.0001858871078595065, "loss": 2.9951, "step": 9222 }, { "epoch": 0.4317631224764468, "grad_norm": 1.109375, "learning_rate": 0.00018588411788042773, "loss": 2.7151, "step": 9223 }, { "epoch": 0.4318099362162795, "grad_norm": 1.4296875, "learning_rate": 0.00018588112760870376, "loss": 3.2261, "step": 9224 }, { "epoch": 0.4318567499561121, "grad_norm": 1.3828125, "learning_rate": 0.0001858781370443448, "loss": 2.9338, "step": 9225 }, { "epoch": 0.43190356369594474, "grad_norm": 1.609375, "learning_rate": 0.00018587514618736108, "loss": 2.7965, "step": 9226 }, { "epoch": 0.4319503774357774, "grad_norm": 1.765625, "learning_rate": 0.00018587215503776275, "loss": 3.1558, "step": 9227 }, { "epoch": 0.43199719117561003, "grad_norm": 1.25, "learning_rate": 0.00018586916359556005, "loss": 2.8878, "step": 9228 }, { "epoch": 0.4320440049154427, "grad_norm": 1.53125, "learning_rate": 0.0001858661718607631, "loss": 3.0682, "step": 9229 }, { "epoch": 0.4320908186552753, "grad_norm": 1.078125, "learning_rate": 0.00018586317983338216, "loss": 2.3018, "step": 9230 }, { "epoch": 0.43213763239510794, "grad_norm": 1.4765625, "learning_rate": 0.00018586018751342738, "loss": 3.0393, "step": 9231 }, { "epoch": 0.4321844461349406, "grad_norm": 1.703125, "learning_rate": 0.000185857194900909, "loss": 2.7973, "step": 9232 }, { "epoch": 0.43223125987477323, "grad_norm": 1.609375, "learning_rate": 0.00018585420199583713, "loss": 2.9768, "step": 9233 }, { "epoch": 0.4322780736146059, "grad_norm": 1.265625, "learning_rate": 0.0001858512087982221, "loss": 4.7419, "step": 9234 }, { "epoch": 0.4323248873544385, "grad_norm": 1.734375, "learning_rate": 0.00018584821530807398, "loss": 2.8904, "step": 9235 }, { "epoch": 0.43237170109427114, "grad_norm": 1.265625, "learning_rate": 0.00018584522152540308, "loss": 2.6795, "step": 9236 }, { "epoch": 0.4324185148341038, "grad_norm": 1.078125, "learning_rate": 0.00018584222745021952, "loss": 2.7385, "step": 9237 }, { "epoch": 0.43246532857393644, "grad_norm": 1.4765625, "learning_rate": 0.00018583923308253356, "loss": 3.3575, "step": 9238 }, { "epoch": 0.4325121423137691, "grad_norm": 1.421875, "learning_rate": 0.0001858362384223553, "loss": 3.1615, "step": 9239 }, { "epoch": 0.43255895605360173, "grad_norm": 1.515625, "learning_rate": 0.00018583324346969508, "loss": 2.8709, "step": 9240 }, { "epoch": 0.43260576979343435, "grad_norm": 1.6875, "learning_rate": 0.00018583024822456306, "loss": 3.0979, "step": 9241 }, { "epoch": 0.432652583533267, "grad_norm": 1.09375, "learning_rate": 0.00018582725268696942, "loss": 2.8426, "step": 9242 }, { "epoch": 0.43269939727309964, "grad_norm": 1.46875, "learning_rate": 0.00018582425685692436, "loss": 3.3593, "step": 9243 }, { "epoch": 0.4327462110129323, "grad_norm": 1.34375, "learning_rate": 0.0001858212607344381, "loss": 3.2897, "step": 9244 }, { "epoch": 0.43279302475276493, "grad_norm": 1.3359375, "learning_rate": 0.00018581826431952086, "loss": 2.956, "step": 9245 }, { "epoch": 0.43283983849259755, "grad_norm": 3.796875, "learning_rate": 0.00018581526761218283, "loss": 2.6387, "step": 9246 }, { "epoch": 0.4328866522324302, "grad_norm": 1.625, "learning_rate": 0.00018581227061243428, "loss": 3.2855, "step": 9247 }, { "epoch": 0.43293346597226284, "grad_norm": 1.359375, "learning_rate": 0.00018580927332028532, "loss": 3.333, "step": 9248 }, { "epoch": 0.4329802797120955, "grad_norm": 1.328125, "learning_rate": 0.00018580627573574628, "loss": 2.7556, "step": 9249 }, { "epoch": 0.43302709345192814, "grad_norm": 1.34375, "learning_rate": 0.00018580327785882727, "loss": 3.0217, "step": 9250 }, { "epoch": 0.43307390719176075, "grad_norm": 1.4765625, "learning_rate": 0.00018580027968953858, "loss": 3.3916, "step": 9251 }, { "epoch": 0.43312072093159343, "grad_norm": 1.4609375, "learning_rate": 0.0001857972812278904, "loss": 3.1679, "step": 9252 }, { "epoch": 0.43316753467142605, "grad_norm": 1.4765625, "learning_rate": 0.0001857942824738929, "loss": 3.196, "step": 9253 }, { "epoch": 0.4332143484112587, "grad_norm": 1.1796875, "learning_rate": 0.00018579128342755642, "loss": 2.7897, "step": 9254 }, { "epoch": 0.43326116215109134, "grad_norm": 1.390625, "learning_rate": 0.00018578828408889102, "loss": 2.9633, "step": 9255 }, { "epoch": 0.433307975890924, "grad_norm": 1.2109375, "learning_rate": 0.00018578528445790704, "loss": 2.8834, "step": 9256 }, { "epoch": 0.43335478963075663, "grad_norm": 1.203125, "learning_rate": 0.00018578228453461465, "loss": 2.7078, "step": 9257 }, { "epoch": 0.43340160337058925, "grad_norm": 1.3671875, "learning_rate": 0.0001857792843190241, "loss": 2.9766, "step": 9258 }, { "epoch": 0.4334484171104219, "grad_norm": 1.625, "learning_rate": 0.00018577628381114558, "loss": 3.2746, "step": 9259 }, { "epoch": 0.43349523085025454, "grad_norm": 1.34375, "learning_rate": 0.00018577328301098932, "loss": 3.0825, "step": 9260 }, { "epoch": 0.4335420445900872, "grad_norm": 1.3828125, "learning_rate": 0.0001857702819185656, "loss": 3.0579, "step": 9261 }, { "epoch": 0.43358885832991984, "grad_norm": 1.2578125, "learning_rate": 0.00018576728053388457, "loss": 3.0027, "step": 9262 }, { "epoch": 0.43363567206975245, "grad_norm": 1.390625, "learning_rate": 0.00018576427885695648, "loss": 3.0201, "step": 9263 }, { "epoch": 0.4336824858095851, "grad_norm": 2.125, "learning_rate": 0.00018576127688779156, "loss": 2.6926, "step": 9264 }, { "epoch": 0.43372929954941775, "grad_norm": 1.3203125, "learning_rate": 0.00018575827462640008, "loss": 3.0667, "step": 9265 }, { "epoch": 0.4337761132892504, "grad_norm": 1.3984375, "learning_rate": 0.00018575527207279223, "loss": 2.6843, "step": 9266 }, { "epoch": 0.43382292702908304, "grad_norm": 1.3046875, "learning_rate": 0.00018575226922697824, "loss": 3.0937, "step": 9267 }, { "epoch": 0.43386974076891566, "grad_norm": 1.34375, "learning_rate": 0.00018574926608896838, "loss": 2.8158, "step": 9268 }, { "epoch": 0.43391655450874833, "grad_norm": 1.8203125, "learning_rate": 0.0001857462626587728, "loss": 2.8444, "step": 9269 }, { "epoch": 0.43396336824858095, "grad_norm": 1.4375, "learning_rate": 0.00018574325893640182, "loss": 2.8989, "step": 9270 }, { "epoch": 0.4340101819884136, "grad_norm": 1.3984375, "learning_rate": 0.00018574025492186562, "loss": 3.1068, "step": 9271 }, { "epoch": 0.43405699572824624, "grad_norm": 1.640625, "learning_rate": 0.00018573725061517447, "loss": 2.9634, "step": 9272 }, { "epoch": 0.43410380946807886, "grad_norm": 1.6796875, "learning_rate": 0.00018573424601633857, "loss": 3.6502, "step": 9273 }, { "epoch": 0.43415062320791153, "grad_norm": 1.3125, "learning_rate": 0.00018573124112536822, "loss": 2.6987, "step": 9274 }, { "epoch": 0.43419743694774415, "grad_norm": 1.4765625, "learning_rate": 0.00018572823594227363, "loss": 3.1361, "step": 9275 }, { "epoch": 0.4342442506875768, "grad_norm": 2.265625, "learning_rate": 0.000185725230467065, "loss": 3.1496, "step": 9276 }, { "epoch": 0.43429106442740945, "grad_norm": 1.296875, "learning_rate": 0.0001857222246997526, "loss": 2.7806, "step": 9277 }, { "epoch": 0.43433787816724206, "grad_norm": 1.8203125, "learning_rate": 0.0001857192186403467, "loss": 3.0582, "step": 9278 }, { "epoch": 0.43438469190707474, "grad_norm": 1.5234375, "learning_rate": 0.00018571621228885747, "loss": 3.1907, "step": 9279 }, { "epoch": 0.43443150564690736, "grad_norm": 1.359375, "learning_rate": 0.00018571320564529523, "loss": 3.117, "step": 9280 }, { "epoch": 0.43447831938674003, "grad_norm": 1.34375, "learning_rate": 0.0001857101987096702, "loss": 2.8105, "step": 9281 }, { "epoch": 0.43452513312657265, "grad_norm": 1.53125, "learning_rate": 0.00018570719148199266, "loss": 3.0239, "step": 9282 }, { "epoch": 0.43457194686640527, "grad_norm": 1.5, "learning_rate": 0.00018570418396227277, "loss": 2.6372, "step": 9283 }, { "epoch": 0.43461876060623794, "grad_norm": 1.875, "learning_rate": 0.00018570117615052082, "loss": 3.1235, "step": 9284 }, { "epoch": 0.43466557434607056, "grad_norm": 1.4765625, "learning_rate": 0.0001856981680467471, "loss": 3.2392, "step": 9285 }, { "epoch": 0.43471238808590323, "grad_norm": 1.2421875, "learning_rate": 0.0001856951596509618, "loss": 3.1486, "step": 9286 }, { "epoch": 0.43475920182573585, "grad_norm": 1.5078125, "learning_rate": 0.0001856921509631752, "loss": 3.6732, "step": 9287 }, { "epoch": 0.43480601556556847, "grad_norm": 1.3203125, "learning_rate": 0.00018568914198339756, "loss": 3.121, "step": 9288 }, { "epoch": 0.43485282930540115, "grad_norm": 1.2890625, "learning_rate": 0.00018568613271163913, "loss": 3.2504, "step": 9289 }, { "epoch": 0.43489964304523376, "grad_norm": 1.2890625, "learning_rate": 0.00018568312314791013, "loss": 2.9737, "step": 9290 }, { "epoch": 0.43494645678506644, "grad_norm": 1.15625, "learning_rate": 0.00018568011329222084, "loss": 2.6867, "step": 9291 }, { "epoch": 0.43499327052489906, "grad_norm": 1.4609375, "learning_rate": 0.00018567710314458153, "loss": 2.9522, "step": 9292 }, { "epoch": 0.4350400842647317, "grad_norm": 1.1171875, "learning_rate": 0.00018567409270500243, "loss": 2.8012, "step": 9293 }, { "epoch": 0.43508689800456435, "grad_norm": 1.15625, "learning_rate": 0.00018567108197349382, "loss": 3.213, "step": 9294 }, { "epoch": 0.43513371174439697, "grad_norm": 1.390625, "learning_rate": 0.000185668070950066, "loss": 3.0827, "step": 9295 }, { "epoch": 0.43518052548422964, "grad_norm": 1.4453125, "learning_rate": 0.0001856650596347291, "loss": 3.3957, "step": 9296 }, { "epoch": 0.43522733922406226, "grad_norm": 1.7578125, "learning_rate": 0.0001856620480274935, "loss": 3.0192, "step": 9297 }, { "epoch": 0.4352741529638949, "grad_norm": 1.6640625, "learning_rate": 0.00018565903612836944, "loss": 2.7789, "step": 9298 }, { "epoch": 0.43532096670372755, "grad_norm": 1.171875, "learning_rate": 0.00018565602393736712, "loss": 2.8739, "step": 9299 }, { "epoch": 0.43536778044356017, "grad_norm": 1.7578125, "learning_rate": 0.0001856530114544969, "loss": 2.9893, "step": 9300 }, { "epoch": 0.43541459418339284, "grad_norm": 1.5625, "learning_rate": 0.00018564999867976895, "loss": 2.6583, "step": 9301 }, { "epoch": 0.43546140792322546, "grad_norm": 1.796875, "learning_rate": 0.0001856469856131936, "loss": 3.0831, "step": 9302 }, { "epoch": 0.4355082216630581, "grad_norm": 1.5234375, "learning_rate": 0.0001856439722547811, "loss": 3.0244, "step": 9303 }, { "epoch": 0.43555503540289076, "grad_norm": 1.546875, "learning_rate": 0.0001856409586045417, "loss": 3.045, "step": 9304 }, { "epoch": 0.4356018491427234, "grad_norm": 1.390625, "learning_rate": 0.0001856379446624857, "loss": 2.9869, "step": 9305 }, { "epoch": 0.43564866288255605, "grad_norm": 2.375, "learning_rate": 0.00018563493042862334, "loss": 2.7684, "step": 9306 }, { "epoch": 0.43569547662238867, "grad_norm": 1.3671875, "learning_rate": 0.00018563191590296492, "loss": 2.8565, "step": 9307 }, { "epoch": 0.4357422903622213, "grad_norm": 1.28125, "learning_rate": 0.0001856289010855207, "loss": 2.613, "step": 9308 }, { "epoch": 0.43578910410205396, "grad_norm": 1.640625, "learning_rate": 0.00018562588597630092, "loss": 3.3879, "step": 9309 }, { "epoch": 0.4358359178418866, "grad_norm": 1.9453125, "learning_rate": 0.00018562287057531592, "loss": 2.9863, "step": 9310 }, { "epoch": 0.43588273158171925, "grad_norm": 1.5078125, "learning_rate": 0.00018561985488257593, "loss": 3.2093, "step": 9311 }, { "epoch": 0.43592954532155187, "grad_norm": 1.4609375, "learning_rate": 0.00018561683889809118, "loss": 3.2153, "step": 9312 }, { "epoch": 0.43597635906138454, "grad_norm": 1.3515625, "learning_rate": 0.00018561382262187207, "loss": 3.029, "step": 9313 }, { "epoch": 0.43602317280121716, "grad_norm": 2.390625, "learning_rate": 0.00018561080605392877, "loss": 2.8367, "step": 9314 }, { "epoch": 0.4360699865410498, "grad_norm": 1.46875, "learning_rate": 0.00018560778919427163, "loss": 3.0779, "step": 9315 }, { "epoch": 0.43611680028088246, "grad_norm": 1.078125, "learning_rate": 0.00018560477204291085, "loss": 3.1375, "step": 9316 }, { "epoch": 0.4361636140207151, "grad_norm": 1.375, "learning_rate": 0.00018560175459985678, "loss": 3.285, "step": 9317 }, { "epoch": 0.43621042776054775, "grad_norm": 1.3125, "learning_rate": 0.0001855987368651197, "loss": 3.0262, "step": 9318 }, { "epoch": 0.43625724150038037, "grad_norm": 1.1015625, "learning_rate": 0.00018559571883870984, "loss": 3.4536, "step": 9319 }, { "epoch": 0.436304055240213, "grad_norm": 1.4375, "learning_rate": 0.00018559270052063753, "loss": 3.0131, "step": 9320 }, { "epoch": 0.43635086898004566, "grad_norm": 1.0546875, "learning_rate": 0.00018558968191091305, "loss": 2.3286, "step": 9321 }, { "epoch": 0.4363976827198783, "grad_norm": 1.5234375, "learning_rate": 0.00018558666300954666, "loss": 3.3899, "step": 9322 }, { "epoch": 0.43644449645971095, "grad_norm": 1.6875, "learning_rate": 0.00018558364381654867, "loss": 1.9857, "step": 9323 }, { "epoch": 0.43649131019954357, "grad_norm": 1.5078125, "learning_rate": 0.00018558062433192936, "loss": 2.9376, "step": 9324 }, { "epoch": 0.4365381239393762, "grad_norm": 1.453125, "learning_rate": 0.000185577604555699, "loss": 3.1954, "step": 9325 }, { "epoch": 0.43658493767920886, "grad_norm": 1.5703125, "learning_rate": 0.00018557458448786792, "loss": 2.9349, "step": 9326 }, { "epoch": 0.4366317514190415, "grad_norm": 2.34375, "learning_rate": 0.0001855715641284464, "loss": 2.76, "step": 9327 }, { "epoch": 0.43667856515887415, "grad_norm": 1.3203125, "learning_rate": 0.0001855685434774447, "loss": 2.6987, "step": 9328 }, { "epoch": 0.4367253788987068, "grad_norm": 2.3125, "learning_rate": 0.00018556552253487315, "loss": 3.0903, "step": 9329 }, { "epoch": 0.4367721926385394, "grad_norm": 1.3203125, "learning_rate": 0.00018556250130074203, "loss": 3.5358, "step": 9330 }, { "epoch": 0.43681900637837207, "grad_norm": 1.8046875, "learning_rate": 0.0001855594797750616, "loss": 3.084, "step": 9331 }, { "epoch": 0.4368658201182047, "grad_norm": 1.8359375, "learning_rate": 0.00018555645795784222, "loss": 2.8849, "step": 9332 }, { "epoch": 0.43691263385803736, "grad_norm": 1.53125, "learning_rate": 0.00018555343584909415, "loss": 3.098, "step": 9333 }, { "epoch": 0.43695944759787, "grad_norm": 1.4921875, "learning_rate": 0.00018555041344882768, "loss": 3.093, "step": 9334 }, { "epoch": 0.4370062613377026, "grad_norm": 1.359375, "learning_rate": 0.00018554739075705312, "loss": 3.1203, "step": 9335 }, { "epoch": 0.43705307507753527, "grad_norm": 1.2265625, "learning_rate": 0.0001855443677737808, "loss": 2.9968, "step": 9336 }, { "epoch": 0.4370998888173679, "grad_norm": 1.5703125, "learning_rate": 0.00018554134449902098, "loss": 2.9458, "step": 9337 }, { "epoch": 0.43714670255720056, "grad_norm": 1.6953125, "learning_rate": 0.00018553832093278397, "loss": 2.9174, "step": 9338 }, { "epoch": 0.4371935162970332, "grad_norm": 1.625, "learning_rate": 0.00018553529707508005, "loss": 2.9516, "step": 9339 }, { "epoch": 0.4372403300368658, "grad_norm": 1.21875, "learning_rate": 0.00018553227292591957, "loss": 3.323, "step": 9340 }, { "epoch": 0.4372871437766985, "grad_norm": 1.3671875, "learning_rate": 0.00018552924848531284, "loss": 3.41, "step": 9341 }, { "epoch": 0.4373339575165311, "grad_norm": 1.5390625, "learning_rate": 0.0001855262237532701, "loss": 4.8443, "step": 9342 }, { "epoch": 0.43738077125636377, "grad_norm": 1.2578125, "learning_rate": 0.0001855231987298017, "loss": 3.3029, "step": 9343 }, { "epoch": 0.4374275849961964, "grad_norm": 1.2265625, "learning_rate": 0.00018552017341491798, "loss": 2.8433, "step": 9344 }, { "epoch": 0.437474398736029, "grad_norm": 1.7109375, "learning_rate": 0.00018551714780862917, "loss": 2.8351, "step": 9345 }, { "epoch": 0.4375212124758617, "grad_norm": 1.3125, "learning_rate": 0.00018551412191094563, "loss": 2.8678, "step": 9346 }, { "epoch": 0.4375680262156943, "grad_norm": 1.453125, "learning_rate": 0.00018551109572187768, "loss": 2.994, "step": 9347 }, { "epoch": 0.43761483995552697, "grad_norm": 1.25, "learning_rate": 0.0001855080692414356, "loss": 2.7267, "step": 9348 }, { "epoch": 0.4376616536953596, "grad_norm": 1.6953125, "learning_rate": 0.0001855050424696297, "loss": 2.9098, "step": 9349 }, { "epoch": 0.4377084674351922, "grad_norm": 1.3828125, "learning_rate": 0.00018550201540647033, "loss": 3.0121, "step": 9350 }, { "epoch": 0.4377552811750249, "grad_norm": 1.34375, "learning_rate": 0.0001854989880519678, "loss": 2.8478, "step": 9351 }, { "epoch": 0.4378020949148575, "grad_norm": 1.734375, "learning_rate": 0.00018549596040613243, "loss": 2.9575, "step": 9352 }, { "epoch": 0.43784890865469017, "grad_norm": 1.1015625, "learning_rate": 0.00018549293246897448, "loss": 3.0866, "step": 9353 }, { "epoch": 0.4378957223945228, "grad_norm": 1.2265625, "learning_rate": 0.0001854899042405043, "loss": 2.829, "step": 9354 }, { "epoch": 0.4379425361343554, "grad_norm": 1.46875, "learning_rate": 0.0001854868757207322, "loss": 2.9919, "step": 9355 }, { "epoch": 0.4379893498741881, "grad_norm": 1.109375, "learning_rate": 0.00018548384690966855, "loss": 2.9761, "step": 9356 }, { "epoch": 0.4380361636140207, "grad_norm": 1.9375, "learning_rate": 0.00018548081780732362, "loss": 3.1384, "step": 9357 }, { "epoch": 0.4380829773538534, "grad_norm": 1.203125, "learning_rate": 0.00018547778841370772, "loss": 2.4821, "step": 9358 }, { "epoch": 0.438129791093686, "grad_norm": 1.828125, "learning_rate": 0.00018547475872883123, "loss": 3.1443, "step": 9359 }, { "epoch": 0.4381766048335186, "grad_norm": 1.8046875, "learning_rate": 0.00018547172875270444, "loss": 3.2784, "step": 9360 }, { "epoch": 0.4382234185733513, "grad_norm": 1.421875, "learning_rate": 0.00018546869848533768, "loss": 2.8121, "step": 9361 }, { "epoch": 0.4382702323131839, "grad_norm": 1.3671875, "learning_rate": 0.00018546566792674126, "loss": 2.6765, "step": 9362 }, { "epoch": 0.4383170460530166, "grad_norm": 1.1015625, "learning_rate": 0.00018546263707692548, "loss": 2.9333, "step": 9363 }, { "epoch": 0.4383638597928492, "grad_norm": 1.640625, "learning_rate": 0.00018545960593590075, "loss": 2.7965, "step": 9364 }, { "epoch": 0.4384106735326818, "grad_norm": 1.1875, "learning_rate": 0.00018545657450367732, "loss": 3.0914, "step": 9365 }, { "epoch": 0.4384574872725145, "grad_norm": 1.3203125, "learning_rate": 0.00018545354278026559, "loss": 2.7381, "step": 9366 }, { "epoch": 0.4385043010123471, "grad_norm": 1.8671875, "learning_rate": 0.0001854505107656758, "loss": 2.895, "step": 9367 }, { "epoch": 0.4385511147521798, "grad_norm": 1.453125, "learning_rate": 0.00018544747845991837, "loss": 2.8583, "step": 9368 }, { "epoch": 0.4385979284920124, "grad_norm": 1.5234375, "learning_rate": 0.0001854444458630036, "loss": 2.797, "step": 9369 }, { "epoch": 0.438644742231845, "grad_norm": 1.296875, "learning_rate": 0.00018544141297494182, "loss": 3.1075, "step": 9370 }, { "epoch": 0.4386915559716777, "grad_norm": 1.2265625, "learning_rate": 0.00018543837979574334, "loss": 3.2195, "step": 9371 }, { "epoch": 0.4387383697115103, "grad_norm": 1.7421875, "learning_rate": 0.00018543534632541855, "loss": 3.0293, "step": 9372 }, { "epoch": 0.438785183451343, "grad_norm": 1.4609375, "learning_rate": 0.00018543231256397773, "loss": 3.1235, "step": 9373 }, { "epoch": 0.4388319971911756, "grad_norm": 1.4609375, "learning_rate": 0.00018542927851143123, "loss": 3.1534, "step": 9374 }, { "epoch": 0.4388788109310083, "grad_norm": 1.25, "learning_rate": 0.00018542624416778946, "loss": 3.1435, "step": 9375 }, { "epoch": 0.4389256246708409, "grad_norm": 1.59375, "learning_rate": 0.00018542320953306266, "loss": 3.4303, "step": 9376 }, { "epoch": 0.4389724384106735, "grad_norm": 1.21875, "learning_rate": 0.0001854201746072612, "loss": 2.6726, "step": 9377 }, { "epoch": 0.4390192521505062, "grad_norm": 1.703125, "learning_rate": 0.00018541713939039544, "loss": 3.0711, "step": 9378 }, { "epoch": 0.4390660658903388, "grad_norm": 1.4765625, "learning_rate": 0.00018541410388247577, "loss": 3.2026, "step": 9379 }, { "epoch": 0.4391128796301715, "grad_norm": 1.3671875, "learning_rate": 0.0001854110680835124, "loss": 3.0387, "step": 9380 }, { "epoch": 0.4391596933700041, "grad_norm": 1.21875, "learning_rate": 0.0001854080319935158, "loss": 3.0624, "step": 9381 }, { "epoch": 0.4392065071098367, "grad_norm": 1.5, "learning_rate": 0.00018540499561249624, "loss": 3.2629, "step": 9382 }, { "epoch": 0.4392533208496694, "grad_norm": 1.40625, "learning_rate": 0.0001854019589404641, "loss": 3.2165, "step": 9383 }, { "epoch": 0.439300134589502, "grad_norm": 1.0390625, "learning_rate": 0.00018539892197742973, "loss": 2.7853, "step": 9384 }, { "epoch": 0.4393469483293347, "grad_norm": 1.140625, "learning_rate": 0.0001853958847234035, "loss": 2.9209, "step": 9385 }, { "epoch": 0.4393937620691673, "grad_norm": 2.46875, "learning_rate": 0.00018539284717839568, "loss": 3.0054, "step": 9386 }, { "epoch": 0.4394405758089999, "grad_norm": 1.1484375, "learning_rate": 0.00018538980934241668, "loss": 3.0742, "step": 9387 }, { "epoch": 0.4394873895488326, "grad_norm": 1.328125, "learning_rate": 0.00018538677121547685, "loss": 2.7966, "step": 9388 }, { "epoch": 0.4395342032886652, "grad_norm": 1.6484375, "learning_rate": 0.0001853837327975865, "loss": 3.1564, "step": 9389 }, { "epoch": 0.4395810170284979, "grad_norm": 1.5078125, "learning_rate": 0.00018538069408875608, "loss": 3.1297, "step": 9390 }, { "epoch": 0.4396278307683305, "grad_norm": 1.1171875, "learning_rate": 0.00018537765508899585, "loss": 2.5071, "step": 9391 }, { "epoch": 0.4396746445081631, "grad_norm": 1.28125, "learning_rate": 0.00018537461579831618, "loss": 3.0156, "step": 9392 }, { "epoch": 0.4397214582479958, "grad_norm": 1.453125, "learning_rate": 0.00018537157621672745, "loss": 2.9267, "step": 9393 }, { "epoch": 0.4397682719878284, "grad_norm": 1.5546875, "learning_rate": 0.00018536853634424, "loss": 3.0962, "step": 9394 }, { "epoch": 0.4398150857276611, "grad_norm": 1.1171875, "learning_rate": 0.00018536549618086422, "loss": 3.0359, "step": 9395 }, { "epoch": 0.4398618994674937, "grad_norm": 1.171875, "learning_rate": 0.00018536245572661045, "loss": 2.6392, "step": 9396 }, { "epoch": 0.43990871320732633, "grad_norm": 3.265625, "learning_rate": 0.00018535941498148902, "loss": 3.0922, "step": 9397 }, { "epoch": 0.439955526947159, "grad_norm": 1.5234375, "learning_rate": 0.00018535637394551032, "loss": 3.316, "step": 9398 }, { "epoch": 0.4400023406869916, "grad_norm": 1.4375, "learning_rate": 0.0001853533326186847, "loss": 3.1309, "step": 9399 }, { "epoch": 0.4400491544268243, "grad_norm": 1.609375, "learning_rate": 0.00018535029100102257, "loss": 3.5308, "step": 9400 }, { "epoch": 0.4400959681666569, "grad_norm": 1.359375, "learning_rate": 0.00018534724909253424, "loss": 3.0753, "step": 9401 }, { "epoch": 0.44014278190648953, "grad_norm": 1.78125, "learning_rate": 0.00018534420689323005, "loss": 3.3307, "step": 9402 }, { "epoch": 0.4401895956463222, "grad_norm": 1.515625, "learning_rate": 0.00018534116440312048, "loss": 2.6165, "step": 9403 }, { "epoch": 0.4402364093861548, "grad_norm": 1.375, "learning_rate": 0.0001853381216222158, "loss": 2.9139, "step": 9404 }, { "epoch": 0.4402832231259875, "grad_norm": 1.453125, "learning_rate": 0.0001853350785505264, "loss": 3.1172, "step": 9405 }, { "epoch": 0.4403300368658201, "grad_norm": 1.4375, "learning_rate": 0.00018533203518806264, "loss": 3.3733, "step": 9406 }, { "epoch": 0.44037685060565274, "grad_norm": 1.15625, "learning_rate": 0.0001853289915348349, "loss": 3.0007, "step": 9407 }, { "epoch": 0.4404236643454854, "grad_norm": 1.4296875, "learning_rate": 0.00018532594759085357, "loss": 2.8313, "step": 9408 }, { "epoch": 0.44047047808531803, "grad_norm": 1.390625, "learning_rate": 0.000185322903356129, "loss": 3.0619, "step": 9409 }, { "epoch": 0.4405172918251507, "grad_norm": 1.8203125, "learning_rate": 0.00018531985883067157, "loss": 3.3937, "step": 9410 }, { "epoch": 0.4405641055649833, "grad_norm": 1.7109375, "learning_rate": 0.00018531681401449165, "loss": 3.1793, "step": 9411 }, { "epoch": 0.44061091930481594, "grad_norm": 1.6171875, "learning_rate": 0.0001853137689075996, "loss": 3.2514, "step": 9412 }, { "epoch": 0.4406577330446486, "grad_norm": 1.21875, "learning_rate": 0.00018531072351000584, "loss": 3.1259, "step": 9413 }, { "epoch": 0.44070454678448123, "grad_norm": 1.6796875, "learning_rate": 0.00018530767782172074, "loss": 3.1161, "step": 9414 }, { "epoch": 0.4407513605243139, "grad_norm": 1.5390625, "learning_rate": 0.00018530463184275462, "loss": 3.2693, "step": 9415 }, { "epoch": 0.4407981742641465, "grad_norm": 1.25, "learning_rate": 0.0001853015855731179, "loss": 2.6503, "step": 9416 }, { "epoch": 0.44084498800397914, "grad_norm": 1.1640625, "learning_rate": 0.00018529853901282098, "loss": 2.9455, "step": 9417 }, { "epoch": 0.4408918017438118, "grad_norm": 1.375, "learning_rate": 0.0001852954921618742, "loss": 2.8847, "step": 9418 }, { "epoch": 0.44093861548364444, "grad_norm": 1.609375, "learning_rate": 0.00018529244502028797, "loss": 2.8289, "step": 9419 }, { "epoch": 0.4409854292234771, "grad_norm": 2.75, "learning_rate": 0.00018528939758807267, "loss": 3.2652, "step": 9420 }, { "epoch": 0.44103224296330973, "grad_norm": 1.265625, "learning_rate": 0.00018528634986523867, "loss": 3.2448, "step": 9421 }, { "epoch": 0.44107905670314235, "grad_norm": 1.3359375, "learning_rate": 0.00018528330185179636, "loss": 3.055, "step": 9422 }, { "epoch": 0.441125870442975, "grad_norm": 1.1328125, "learning_rate": 0.00018528025354775613, "loss": 3.0471, "step": 9423 }, { "epoch": 0.44117268418280764, "grad_norm": 1.9296875, "learning_rate": 0.00018527720495312836, "loss": 2.9866, "step": 9424 }, { "epoch": 0.4412194979226403, "grad_norm": 1.4921875, "learning_rate": 0.00018527415606792345, "loss": 3.0003, "step": 9425 }, { "epoch": 0.44126631166247293, "grad_norm": 1.453125, "learning_rate": 0.00018527110689215177, "loss": 3.2438, "step": 9426 }, { "epoch": 0.44131312540230555, "grad_norm": 1.3125, "learning_rate": 0.00018526805742582374, "loss": 2.9481, "step": 9427 }, { "epoch": 0.4413599391421382, "grad_norm": 1.2421875, "learning_rate": 0.00018526500766894973, "loss": 3.0915, "step": 9428 }, { "epoch": 0.44140675288197084, "grad_norm": 1.296875, "learning_rate": 0.00018526195762154016, "loss": 3.2701, "step": 9429 }, { "epoch": 0.4414535666218035, "grad_norm": 1.7265625, "learning_rate": 0.00018525890728360533, "loss": 2.9407, "step": 9430 }, { "epoch": 0.44150038036163614, "grad_norm": 1.546875, "learning_rate": 0.00018525585665515575, "loss": 3.1886, "step": 9431 }, { "epoch": 0.44154719410146875, "grad_norm": 1.40625, "learning_rate": 0.00018525280573620175, "loss": 2.6478, "step": 9432 }, { "epoch": 0.44159400784130143, "grad_norm": 1.296875, "learning_rate": 0.00018524975452675375, "loss": 3.0517, "step": 9433 }, { "epoch": 0.44164082158113405, "grad_norm": 3.03125, "learning_rate": 0.00018524670302682216, "loss": 3.0908, "step": 9434 }, { "epoch": 0.4416876353209667, "grad_norm": 1.2578125, "learning_rate": 0.00018524365123641733, "loss": 3.0536, "step": 9435 }, { "epoch": 0.44173444906079934, "grad_norm": 2.5625, "learning_rate": 0.0001852405991555497, "loss": 2.5585, "step": 9436 }, { "epoch": 0.441781262800632, "grad_norm": 1.0546875, "learning_rate": 0.00018523754678422964, "loss": 1.9347, "step": 9437 }, { "epoch": 0.44182807654046463, "grad_norm": 1.1171875, "learning_rate": 0.00018523449412246757, "loss": 2.89, "step": 9438 }, { "epoch": 0.44187489028029725, "grad_norm": 1.8671875, "learning_rate": 0.0001852314411702739, "loss": 3.5328, "step": 9439 }, { "epoch": 0.4419217040201299, "grad_norm": 1.1796875, "learning_rate": 0.00018522838792765903, "loss": 2.8323, "step": 9440 }, { "epoch": 0.44196851775996254, "grad_norm": 1.21875, "learning_rate": 0.0001852253343946333, "loss": 2.7575, "step": 9441 }, { "epoch": 0.4420153314997952, "grad_norm": 1.1875, "learning_rate": 0.00018522228057120721, "loss": 2.9684, "step": 9442 }, { "epoch": 0.44206214523962784, "grad_norm": 2.296875, "learning_rate": 0.00018521922645739115, "loss": 3.4115, "step": 9443 }, { "epoch": 0.44210895897946045, "grad_norm": 1.53125, "learning_rate": 0.00018521617205319543, "loss": 3.336, "step": 9444 }, { "epoch": 0.4421557727192931, "grad_norm": 1.8671875, "learning_rate": 0.0001852131173586306, "loss": 3.1469, "step": 9445 }, { "epoch": 0.44220258645912575, "grad_norm": 1.6015625, "learning_rate": 0.00018521006237370697, "loss": 3.0576, "step": 9446 }, { "epoch": 0.4422494001989584, "grad_norm": 1.2109375, "learning_rate": 0.000185207007098435, "loss": 2.9306, "step": 9447 }, { "epoch": 0.44229621393879104, "grad_norm": 1.359375, "learning_rate": 0.00018520395153282505, "loss": 2.9494, "step": 9448 }, { "epoch": 0.44234302767862366, "grad_norm": 1.171875, "learning_rate": 0.00018520089567688753, "loss": 2.8086, "step": 9449 }, { "epoch": 0.44238984141845633, "grad_norm": 1.421875, "learning_rate": 0.00018519783953063296, "loss": 3.2991, "step": 9450 }, { "epoch": 0.44243665515828895, "grad_norm": 1.1171875, "learning_rate": 0.00018519478309407162, "loss": 3.2004, "step": 9451 }, { "epoch": 0.4424834688981216, "grad_norm": 1.03125, "learning_rate": 0.000185191726367214, "loss": 2.4658, "step": 9452 }, { "epoch": 0.44253028263795424, "grad_norm": 1.1328125, "learning_rate": 0.0001851886693500705, "loss": 3.2416, "step": 9453 }, { "epoch": 0.44257709637778686, "grad_norm": 1.2421875, "learning_rate": 0.00018518561204265151, "loss": 2.9047, "step": 9454 }, { "epoch": 0.44262391011761953, "grad_norm": 2.125, "learning_rate": 0.0001851825544449675, "loss": 3.9793, "step": 9455 }, { "epoch": 0.44267072385745215, "grad_norm": 1.53125, "learning_rate": 0.00018517949655702884, "loss": 2.7903, "step": 9456 }, { "epoch": 0.4427175375972848, "grad_norm": 1.515625, "learning_rate": 0.000185176438378846, "loss": 2.913, "step": 9457 }, { "epoch": 0.44276435133711745, "grad_norm": 1.515625, "learning_rate": 0.00018517337991042933, "loss": 3.0057, "step": 9458 }, { "epoch": 0.44281116507695006, "grad_norm": 1.40625, "learning_rate": 0.00018517032115178932, "loss": 2.7967, "step": 9459 }, { "epoch": 0.44285797881678274, "grad_norm": 1.3359375, "learning_rate": 0.00018516726210293633, "loss": 3.126, "step": 9460 }, { "epoch": 0.44290479255661536, "grad_norm": 1.2734375, "learning_rate": 0.00018516420276388086, "loss": 2.8211, "step": 9461 }, { "epoch": 0.44295160629644803, "grad_norm": 1.1171875, "learning_rate": 0.00018516114313463325, "loss": 2.9835, "step": 9462 }, { "epoch": 0.44299842003628065, "grad_norm": 1.59375, "learning_rate": 0.000185158083215204, "loss": 2.9657, "step": 9463 }, { "epoch": 0.44304523377611327, "grad_norm": 1.359375, "learning_rate": 0.00018515502300560347, "loss": 3.6781, "step": 9464 }, { "epoch": 0.44309204751594594, "grad_norm": 1.6953125, "learning_rate": 0.00018515196250584216, "loss": 3.3462, "step": 9465 }, { "epoch": 0.44313886125577856, "grad_norm": 1.390625, "learning_rate": 0.00018514890171593044, "loss": 3.3222, "step": 9466 }, { "epoch": 0.44318567499561123, "grad_norm": 1.375, "learning_rate": 0.00018514584063587875, "loss": 3.1283, "step": 9467 }, { "epoch": 0.44323248873544385, "grad_norm": 1.421875, "learning_rate": 0.00018514277926569755, "loss": 2.7344, "step": 9468 }, { "epoch": 0.44327930247527647, "grad_norm": 1.390625, "learning_rate": 0.00018513971760539724, "loss": 3.1519, "step": 9469 }, { "epoch": 0.44332611621510915, "grad_norm": 1.265625, "learning_rate": 0.00018513665565498828, "loss": 2.3326, "step": 9470 }, { "epoch": 0.44337292995494176, "grad_norm": 1.3671875, "learning_rate": 0.00018513359341448108, "loss": 2.7936, "step": 9471 }, { "epoch": 0.44341974369477444, "grad_norm": 1.359375, "learning_rate": 0.00018513053088388607, "loss": 2.3685, "step": 9472 }, { "epoch": 0.44346655743460706, "grad_norm": 1.265625, "learning_rate": 0.0001851274680632137, "loss": 2.9911, "step": 9473 }, { "epoch": 0.4435133711744397, "grad_norm": 1.1171875, "learning_rate": 0.0001851244049524744, "loss": 3.1167, "step": 9474 }, { "epoch": 0.44356018491427235, "grad_norm": 1.453125, "learning_rate": 0.00018512134155167863, "loss": 3.4123, "step": 9475 }, { "epoch": 0.44360699865410497, "grad_norm": 1.328125, "learning_rate": 0.00018511827786083678, "loss": 2.9884, "step": 9476 }, { "epoch": 0.44365381239393764, "grad_norm": 1.4921875, "learning_rate": 0.00018511521387995934, "loss": 2.9936, "step": 9477 }, { "epoch": 0.44370062613377026, "grad_norm": 3.0625, "learning_rate": 0.00018511214960905672, "loss": 3.0122, "step": 9478 }, { "epoch": 0.4437474398736029, "grad_norm": 1.4921875, "learning_rate": 0.00018510908504813935, "loss": 3.1352, "step": 9479 }, { "epoch": 0.44379425361343555, "grad_norm": 1.3046875, "learning_rate": 0.00018510602019721772, "loss": 3.1355, "step": 9480 }, { "epoch": 0.44384106735326817, "grad_norm": 1.625, "learning_rate": 0.00018510295505630225, "loss": 3.12, "step": 9481 }, { "epoch": 0.44388788109310084, "grad_norm": 1.2890625, "learning_rate": 0.00018509988962540338, "loss": 3.1462, "step": 9482 }, { "epoch": 0.44393469483293346, "grad_norm": 1.6328125, "learning_rate": 0.00018509682390453153, "loss": 2.5998, "step": 9483 }, { "epoch": 0.4439815085727661, "grad_norm": 1.1328125, "learning_rate": 0.00018509375789369718, "loss": 2.9803, "step": 9484 }, { "epoch": 0.44402832231259876, "grad_norm": 1.25, "learning_rate": 0.0001850906915929108, "loss": 2.9918, "step": 9485 }, { "epoch": 0.4440751360524314, "grad_norm": 1.21875, "learning_rate": 0.00018508762500218278, "loss": 2.7807, "step": 9486 }, { "epoch": 0.44412194979226405, "grad_norm": 1.0859375, "learning_rate": 0.0001850845581215236, "loss": 2.8528, "step": 9487 }, { "epoch": 0.44416876353209667, "grad_norm": 1.3125, "learning_rate": 0.0001850814909509437, "loss": 2.9525, "step": 9488 }, { "epoch": 0.4442155772719293, "grad_norm": 1.9375, "learning_rate": 0.0001850784234904535, "loss": 3.1196, "step": 9489 }, { "epoch": 0.44426239101176196, "grad_norm": 1.234375, "learning_rate": 0.00018507535574006356, "loss": 2.7324, "step": 9490 }, { "epoch": 0.4443092047515946, "grad_norm": 1.5859375, "learning_rate": 0.00018507228769978424, "loss": 3.1913, "step": 9491 }, { "epoch": 0.44435601849142725, "grad_norm": 1.3671875, "learning_rate": 0.000185069219369626, "loss": 3.2485, "step": 9492 }, { "epoch": 0.44440283223125987, "grad_norm": 1.328125, "learning_rate": 0.00018506615074959933, "loss": 3.1053, "step": 9493 }, { "epoch": 0.4444496459710925, "grad_norm": 1.0859375, "learning_rate": 0.00018506308183971465, "loss": 2.5402, "step": 9494 }, { "epoch": 0.44449645971092516, "grad_norm": 1.3125, "learning_rate": 0.00018506001263998245, "loss": 3.3725, "step": 9495 }, { "epoch": 0.4445432734507578, "grad_norm": 1.421875, "learning_rate": 0.00018505694315041317, "loss": 2.8411, "step": 9496 }, { "epoch": 0.44459008719059046, "grad_norm": 1.421875, "learning_rate": 0.0001850538733710173, "loss": 2.8373, "step": 9497 }, { "epoch": 0.4446369009304231, "grad_norm": 1.453125, "learning_rate": 0.00018505080330180523, "loss": 2.7144, "step": 9498 }, { "epoch": 0.44468371467025575, "grad_norm": 1.3671875, "learning_rate": 0.00018504773294278747, "loss": 3.1627, "step": 9499 }, { "epoch": 0.44473052841008837, "grad_norm": 1.34375, "learning_rate": 0.0001850446622939745, "loss": 4.4499, "step": 9500 }, { "epoch": 0.444777342149921, "grad_norm": 1.4453125, "learning_rate": 0.00018504159135537674, "loss": 2.6009, "step": 9501 }, { "epoch": 0.44482415588975366, "grad_norm": 1.3515625, "learning_rate": 0.00018503852012700467, "loss": 3.0114, "step": 9502 }, { "epoch": 0.4448709696295863, "grad_norm": 1.25, "learning_rate": 0.00018503544860886875, "loss": 3.0659, "step": 9503 }, { "epoch": 0.44491778336941895, "grad_norm": 1.5546875, "learning_rate": 0.00018503237680097948, "loss": 2.9457, "step": 9504 }, { "epoch": 0.44496459710925157, "grad_norm": 1.5390625, "learning_rate": 0.0001850293047033473, "loss": 2.865, "step": 9505 }, { "epoch": 0.4450114108490842, "grad_norm": 1.8828125, "learning_rate": 0.0001850262323159827, "loss": 2.9495, "step": 9506 }, { "epoch": 0.44505822458891686, "grad_norm": 1.4921875, "learning_rate": 0.00018502315963889608, "loss": 3.0669, "step": 9507 }, { "epoch": 0.4451050383287495, "grad_norm": 1.65625, "learning_rate": 0.00018502008667209795, "loss": 3.1884, "step": 9508 }, { "epoch": 0.44515185206858215, "grad_norm": 1.265625, "learning_rate": 0.00018501701341559884, "loss": 2.8768, "step": 9509 }, { "epoch": 0.4451986658084148, "grad_norm": 1.53125, "learning_rate": 0.00018501393986940915, "loss": 2.8493, "step": 9510 }, { "epoch": 0.4452454795482474, "grad_norm": 1.4609375, "learning_rate": 0.00018501086603353936, "loss": 3.3228, "step": 9511 }, { "epoch": 0.44529229328808007, "grad_norm": 1.5078125, "learning_rate": 0.00018500779190799998, "loss": 3.3729, "step": 9512 }, { "epoch": 0.4453391070279127, "grad_norm": 1.6796875, "learning_rate": 0.00018500471749280142, "loss": 3.2346, "step": 9513 }, { "epoch": 0.44538592076774536, "grad_norm": 1.2109375, "learning_rate": 0.00018500164278795428, "loss": 2.6021, "step": 9514 }, { "epoch": 0.445432734507578, "grad_norm": 1.5703125, "learning_rate": 0.00018499856779346886, "loss": 3.0133, "step": 9515 }, { "epoch": 0.4454795482474106, "grad_norm": 1.3359375, "learning_rate": 0.00018499549250935578, "loss": 3.0322, "step": 9516 }, { "epoch": 0.44552636198724327, "grad_norm": 1.6015625, "learning_rate": 0.0001849924169356255, "loss": 3.285, "step": 9517 }, { "epoch": 0.4455731757270759, "grad_norm": 1.15625, "learning_rate": 0.00018498934107228845, "loss": 3.0601, "step": 9518 }, { "epoch": 0.44561998946690856, "grad_norm": 1.5546875, "learning_rate": 0.0001849862649193551, "loss": 2.8529, "step": 9519 }, { "epoch": 0.4456668032067412, "grad_norm": 1.4609375, "learning_rate": 0.000184983188476836, "loss": 2.9099, "step": 9520 }, { "epoch": 0.4457136169465738, "grad_norm": 1.609375, "learning_rate": 0.0001849801117447416, "loss": 3.0258, "step": 9521 }, { "epoch": 0.4457604306864065, "grad_norm": 1.5234375, "learning_rate": 0.00018497703472308236, "loss": 3.1662, "step": 9522 }, { "epoch": 0.4458072444262391, "grad_norm": 2.015625, "learning_rate": 0.00018497395741186879, "loss": 3.1722, "step": 9523 }, { "epoch": 0.44585405816607176, "grad_norm": 1.3828125, "learning_rate": 0.00018497087981111137, "loss": 3.3231, "step": 9524 }, { "epoch": 0.4459008719059044, "grad_norm": 1.078125, "learning_rate": 0.00018496780192082062, "loss": 2.7823, "step": 9525 }, { "epoch": 0.445947685645737, "grad_norm": 1.2421875, "learning_rate": 0.00018496472374100696, "loss": 2.8998, "step": 9526 }, { "epoch": 0.4459944993855697, "grad_norm": 1.6171875, "learning_rate": 0.00018496164527168093, "loss": 3.1134, "step": 9527 }, { "epoch": 0.4460413131254023, "grad_norm": 1.1328125, "learning_rate": 0.000184958566512853, "loss": 2.9385, "step": 9528 }, { "epoch": 0.44608812686523497, "grad_norm": 1.4765625, "learning_rate": 0.00018495548746453366, "loss": 2.9531, "step": 9529 }, { "epoch": 0.4461349406050676, "grad_norm": 1.9375, "learning_rate": 0.00018495240812673342, "loss": 2.8233, "step": 9530 }, { "epoch": 0.4461817543449002, "grad_norm": 1.1875, "learning_rate": 0.00018494932849946275, "loss": 2.7496, "step": 9531 }, { "epoch": 0.4462285680847329, "grad_norm": 1.1171875, "learning_rate": 0.00018494624858273216, "loss": 2.8829, "step": 9532 }, { "epoch": 0.4462753818245655, "grad_norm": 1.421875, "learning_rate": 0.00018494316837655214, "loss": 2.7964, "step": 9533 }, { "epoch": 0.44632219556439817, "grad_norm": 2.546875, "learning_rate": 0.0001849400878809332, "loss": 3.0272, "step": 9534 }, { "epoch": 0.4463690093042308, "grad_norm": 1.6328125, "learning_rate": 0.0001849370070958858, "loss": 3.4726, "step": 9535 }, { "epoch": 0.4464158230440634, "grad_norm": 1.1484375, "learning_rate": 0.00018493392602142046, "loss": 3.1573, "step": 9536 }, { "epoch": 0.4464626367838961, "grad_norm": 1.3203125, "learning_rate": 0.00018493084465754763, "loss": 3.0568, "step": 9537 }, { "epoch": 0.4465094505237287, "grad_norm": 1.4375, "learning_rate": 0.0001849277630042779, "loss": 2.9915, "step": 9538 }, { "epoch": 0.4465562642635614, "grad_norm": 1.15625, "learning_rate": 0.00018492468106162178, "loss": 3.0676, "step": 9539 }, { "epoch": 0.446603078003394, "grad_norm": 1.1875, "learning_rate": 0.00018492159882958965, "loss": 3.1136, "step": 9540 }, { "epoch": 0.4466498917432266, "grad_norm": 1.28125, "learning_rate": 0.0001849185163081921, "loss": 3.3517, "step": 9541 }, { "epoch": 0.4466967054830593, "grad_norm": 1.3984375, "learning_rate": 0.0001849154334974396, "loss": 2.7953, "step": 9542 }, { "epoch": 0.4467435192228919, "grad_norm": 1.2265625, "learning_rate": 0.00018491235039734267, "loss": 3.1369, "step": 9543 }, { "epoch": 0.4467903329627246, "grad_norm": 1.3046875, "learning_rate": 0.00018490926700791182, "loss": 2.7525, "step": 9544 }, { "epoch": 0.4468371467025572, "grad_norm": 1.1484375, "learning_rate": 0.00018490618332915755, "loss": 3.2544, "step": 9545 }, { "epoch": 0.4468839604423898, "grad_norm": 1.2734375, "learning_rate": 0.0001849030993610904, "loss": 3.1715, "step": 9546 }, { "epoch": 0.4469307741822225, "grad_norm": 1.0625, "learning_rate": 0.0001849000151037208, "loss": 2.774, "step": 9547 }, { "epoch": 0.4469775879220551, "grad_norm": 1.484375, "learning_rate": 0.00018489693055705932, "loss": 2.9623, "step": 9548 }, { "epoch": 0.4470244016618878, "grad_norm": 1.3828125, "learning_rate": 0.00018489384572111648, "loss": 3.506, "step": 9549 }, { "epoch": 0.4470712154017204, "grad_norm": 1.28125, "learning_rate": 0.00018489076059590275, "loss": 2.9961, "step": 9550 }, { "epoch": 0.447118029141553, "grad_norm": 1.5703125, "learning_rate": 0.00018488767518142868, "loss": 2.9337, "step": 9551 }, { "epoch": 0.4471648428813857, "grad_norm": 1.859375, "learning_rate": 0.0001848845894777047, "loss": 2.9362, "step": 9552 }, { "epoch": 0.4472116566212183, "grad_norm": 1.484375, "learning_rate": 0.00018488150348474144, "loss": 2.7995, "step": 9553 }, { "epoch": 0.447258470361051, "grad_norm": 1.5, "learning_rate": 0.00018487841720254936, "loss": 3.852, "step": 9554 }, { "epoch": 0.4473052841008836, "grad_norm": 1.6171875, "learning_rate": 0.00018487533063113898, "loss": 3.3685, "step": 9555 }, { "epoch": 0.4473520978407162, "grad_norm": 1.46875, "learning_rate": 0.00018487224377052082, "loss": 3.0217, "step": 9556 }, { "epoch": 0.4473989115805489, "grad_norm": 1.203125, "learning_rate": 0.00018486915662070539, "loss": 2.8128, "step": 9557 }, { "epoch": 0.4474457253203815, "grad_norm": 1.1796875, "learning_rate": 0.0001848660691817032, "loss": 3.0418, "step": 9558 }, { "epoch": 0.4474925390602142, "grad_norm": 1.015625, "learning_rate": 0.00018486298145352478, "loss": 2.8267, "step": 9559 }, { "epoch": 0.4475393528000468, "grad_norm": 1.0859375, "learning_rate": 0.00018485989343618067, "loss": 2.7432, "step": 9560 }, { "epoch": 0.4475861665398795, "grad_norm": 1.4375, "learning_rate": 0.00018485680512968138, "loss": 2.9948, "step": 9561 }, { "epoch": 0.4476329802797121, "grad_norm": 1.09375, "learning_rate": 0.0001848537165340374, "loss": 2.9082, "step": 9562 }, { "epoch": 0.4476797940195447, "grad_norm": 1.3515625, "learning_rate": 0.00018485062764925936, "loss": 3.1304, "step": 9563 }, { "epoch": 0.4477266077593774, "grad_norm": 2.8125, "learning_rate": 0.00018484753847535767, "loss": 3.0311, "step": 9564 }, { "epoch": 0.44777342149921, "grad_norm": 1.2734375, "learning_rate": 0.00018484444901234289, "loss": 3.661, "step": 9565 }, { "epoch": 0.4478202352390427, "grad_norm": 1.6015625, "learning_rate": 0.00018484135926022556, "loss": 3.153, "step": 9566 }, { "epoch": 0.4478670489788753, "grad_norm": 1.296875, "learning_rate": 0.00018483826921901622, "loss": 3.0779, "step": 9567 }, { "epoch": 0.4479138627187079, "grad_norm": 1.484375, "learning_rate": 0.00018483517888872534, "loss": 3.5173, "step": 9568 }, { "epoch": 0.4479606764585406, "grad_norm": 1.421875, "learning_rate": 0.0001848320882693635, "loss": 3.0084, "step": 9569 }, { "epoch": 0.4480074901983732, "grad_norm": 1.671875, "learning_rate": 0.00018482899736094126, "loss": 3.2808, "step": 9570 }, { "epoch": 0.4480543039382059, "grad_norm": 1.515625, "learning_rate": 0.00018482590616346908, "loss": 3.0504, "step": 9571 }, { "epoch": 0.4481011176780385, "grad_norm": 1.390625, "learning_rate": 0.00018482281467695755, "loss": 3.0389, "step": 9572 }, { "epoch": 0.4481479314178711, "grad_norm": 1.1953125, "learning_rate": 0.00018481972290141716, "loss": 2.8066, "step": 9573 }, { "epoch": 0.4481947451577038, "grad_norm": 1.2109375, "learning_rate": 0.0001848166308368585, "loss": 2.8376, "step": 9574 }, { "epoch": 0.4482415588975364, "grad_norm": 1.03125, "learning_rate": 0.00018481353848329203, "loss": 2.5929, "step": 9575 }, { "epoch": 0.4482883726373691, "grad_norm": 1.4140625, "learning_rate": 0.00018481044584072836, "loss": 3.2121, "step": 9576 }, { "epoch": 0.4483351863772017, "grad_norm": 1.484375, "learning_rate": 0.00018480735290917798, "loss": 3.2602, "step": 9577 }, { "epoch": 0.44838200011703433, "grad_norm": 2.0, "learning_rate": 0.00018480425968865144, "loss": 2.8928, "step": 9578 }, { "epoch": 0.448428813856867, "grad_norm": 1.5859375, "learning_rate": 0.00018480116617915934, "loss": 2.9142, "step": 9579 }, { "epoch": 0.4484756275966996, "grad_norm": 1.5, "learning_rate": 0.00018479807238071212, "loss": 2.849, "step": 9580 }, { "epoch": 0.4485224413365323, "grad_norm": 1.640625, "learning_rate": 0.00018479497829332038, "loss": 3.2025, "step": 9581 }, { "epoch": 0.4485692550763649, "grad_norm": 1.5078125, "learning_rate": 0.00018479188391699465, "loss": 3.2715, "step": 9582 }, { "epoch": 0.44861606881619753, "grad_norm": 1.453125, "learning_rate": 0.00018478878925174547, "loss": 2.5638, "step": 9583 }, { "epoch": 0.4486628825560302, "grad_norm": 1.546875, "learning_rate": 0.0001847856942975834, "loss": 2.8157, "step": 9584 }, { "epoch": 0.4487096962958628, "grad_norm": 1.3203125, "learning_rate": 0.000184782599054519, "loss": 3.3159, "step": 9585 }, { "epoch": 0.4487565100356955, "grad_norm": 1.5078125, "learning_rate": 0.00018477950352256277, "loss": 3.0255, "step": 9586 }, { "epoch": 0.4488033237755281, "grad_norm": 1.046875, "learning_rate": 0.0001847764077017253, "loss": 2.8658, "step": 9587 }, { "epoch": 0.44885013751536074, "grad_norm": 2.046875, "learning_rate": 0.0001847733115920171, "loss": 3.1306, "step": 9588 }, { "epoch": 0.4488969512551934, "grad_norm": 1.4453125, "learning_rate": 0.00018477021519344873, "loss": 3.2471, "step": 9589 }, { "epoch": 0.44894376499502603, "grad_norm": 1.6953125, "learning_rate": 0.0001847671185060308, "loss": 2.9288, "step": 9590 }, { "epoch": 0.4489905787348587, "grad_norm": 1.421875, "learning_rate": 0.00018476402152977375, "loss": 2.7016, "step": 9591 }, { "epoch": 0.4490373924746913, "grad_norm": 1.2578125, "learning_rate": 0.00018476092426468826, "loss": 3.2953, "step": 9592 }, { "epoch": 0.44908420621452394, "grad_norm": 1.359375, "learning_rate": 0.00018475782671078478, "loss": 2.7731, "step": 9593 }, { "epoch": 0.4491310199543566, "grad_norm": 1.5859375, "learning_rate": 0.00018475472886807391, "loss": 3.3356, "step": 9594 }, { "epoch": 0.44917783369418923, "grad_norm": 1.8984375, "learning_rate": 0.0001847516307365662, "loss": 3.3144, "step": 9595 }, { "epoch": 0.4492246474340219, "grad_norm": 2.21875, "learning_rate": 0.00018474853231627224, "loss": 3.5199, "step": 9596 }, { "epoch": 0.4492714611738545, "grad_norm": 1.390625, "learning_rate": 0.00018474543360720252, "loss": 2.8459, "step": 9597 }, { "epoch": 0.44931827491368714, "grad_norm": 1.34375, "learning_rate": 0.00018474233460936764, "loss": 3.0381, "step": 9598 }, { "epoch": 0.4493650886535198, "grad_norm": 1.5, "learning_rate": 0.00018473923532277814, "loss": 3.2353, "step": 9599 }, { "epoch": 0.44941190239335244, "grad_norm": 1.6328125, "learning_rate": 0.00018473613574744463, "loss": 3.0633, "step": 9600 }, { "epoch": 0.4494587161331851, "grad_norm": 1.21875, "learning_rate": 0.0001847330358833776, "loss": 2.7564, "step": 9601 }, { "epoch": 0.44950552987301773, "grad_norm": 1.046875, "learning_rate": 0.00018472993573058765, "loss": 2.6735, "step": 9602 }, { "epoch": 0.44955234361285035, "grad_norm": 1.1328125, "learning_rate": 0.00018472683528908537, "loss": 3.9438, "step": 9603 }, { "epoch": 0.449599157352683, "grad_norm": 1.75, "learning_rate": 0.00018472373455888128, "loss": 2.7137, "step": 9604 }, { "epoch": 0.44964597109251564, "grad_norm": 1.2734375, "learning_rate": 0.00018472063353998597, "loss": 2.9116, "step": 9605 }, { "epoch": 0.4496927848323483, "grad_norm": 1.5625, "learning_rate": 0.00018471753223241, "loss": 2.885, "step": 9606 }, { "epoch": 0.44973959857218093, "grad_norm": 1.4296875, "learning_rate": 0.0001847144306361639, "loss": 2.9304, "step": 9607 }, { "epoch": 0.44978641231201355, "grad_norm": 1.0546875, "learning_rate": 0.00018471132875125829, "loss": 2.7661, "step": 9608 }, { "epoch": 0.4498332260518462, "grad_norm": 2.375, "learning_rate": 0.00018470822657770374, "loss": 2.9654, "step": 9609 }, { "epoch": 0.44988003979167884, "grad_norm": 1.4375, "learning_rate": 0.0001847051241155108, "loss": 3.3772, "step": 9610 }, { "epoch": 0.4499268535315115, "grad_norm": 1.4765625, "learning_rate": 0.00018470202136469004, "loss": 2.9516, "step": 9611 }, { "epoch": 0.44997366727134414, "grad_norm": 1.203125, "learning_rate": 0.00018469891832525206, "loss": 2.9341, "step": 9612 }, { "epoch": 0.45002048101117675, "grad_norm": 1.4296875, "learning_rate": 0.00018469581499720738, "loss": 3.3826, "step": 9613 }, { "epoch": 0.45006729475100943, "grad_norm": 1.140625, "learning_rate": 0.00018469271138056662, "loss": 3.0301, "step": 9614 }, { "epoch": 0.45011410849084205, "grad_norm": 1.125, "learning_rate": 0.00018468960747534036, "loss": 3.0548, "step": 9615 }, { "epoch": 0.4501609222306747, "grad_norm": 1.578125, "learning_rate": 0.00018468650328153912, "loss": 3.483, "step": 9616 }, { "epoch": 0.45020773597050734, "grad_norm": 1.1171875, "learning_rate": 0.00018468339879917353, "loss": 3.1448, "step": 9617 }, { "epoch": 0.45025454971033996, "grad_norm": 1.1875, "learning_rate": 0.00018468029402825416, "loss": 3.282, "step": 9618 }, { "epoch": 0.45030136345017263, "grad_norm": 1.3125, "learning_rate": 0.00018467718896879158, "loss": 3.1775, "step": 9619 }, { "epoch": 0.45034817719000525, "grad_norm": 1.328125, "learning_rate": 0.0001846740836207964, "loss": 3.1602, "step": 9620 }, { "epoch": 0.4503949909298379, "grad_norm": 3.28125, "learning_rate": 0.00018467097798427912, "loss": 3.2581, "step": 9621 }, { "epoch": 0.45044180466967054, "grad_norm": 2.4375, "learning_rate": 0.00018466787205925043, "loss": 3.3431, "step": 9622 }, { "epoch": 0.4504886184095032, "grad_norm": 1.578125, "learning_rate": 0.00018466476584572084, "loss": 3.1315, "step": 9623 }, { "epoch": 0.45053543214933583, "grad_norm": 1.265625, "learning_rate": 0.00018466165934370095, "loss": 3.1304, "step": 9624 }, { "epoch": 0.45058224588916845, "grad_norm": 1.3359375, "learning_rate": 0.00018465855255320133, "loss": 3.4328, "step": 9625 }, { "epoch": 0.4506290596290011, "grad_norm": 1.5390625, "learning_rate": 0.0001846554454742326, "loss": 3.0546, "step": 9626 }, { "epoch": 0.45067587336883375, "grad_norm": 1.3125, "learning_rate": 0.00018465233810680532, "loss": 3.0084, "step": 9627 }, { "epoch": 0.4507226871086664, "grad_norm": 1.1875, "learning_rate": 0.00018464923045093012, "loss": 2.9419, "step": 9628 }, { "epoch": 0.45076950084849904, "grad_norm": 2.078125, "learning_rate": 0.00018464612250661754, "loss": 2.7813, "step": 9629 }, { "epoch": 0.45081631458833166, "grad_norm": 1.2265625, "learning_rate": 0.0001846430142738782, "loss": 2.8686, "step": 9630 }, { "epoch": 0.45086312832816433, "grad_norm": 1.7890625, "learning_rate": 0.0001846399057527227, "loss": 2.853, "step": 9631 }, { "epoch": 0.45090994206799695, "grad_norm": 1.2578125, "learning_rate": 0.0001846367969431616, "loss": 2.7355, "step": 9632 }, { "epoch": 0.4509567558078296, "grad_norm": 1.546875, "learning_rate": 0.00018463368784520548, "loss": 2.918, "step": 9633 }, { "epoch": 0.45100356954766224, "grad_norm": 1.6484375, "learning_rate": 0.00018463057845886498, "loss": 3.4237, "step": 9634 }, { "epoch": 0.45105038328749486, "grad_norm": 1.0078125, "learning_rate": 0.00018462746878415066, "loss": 2.7426, "step": 9635 }, { "epoch": 0.45109719702732753, "grad_norm": 1.515625, "learning_rate": 0.00018462435882107315, "loss": 3.0902, "step": 9636 }, { "epoch": 0.45114401076716015, "grad_norm": 1.9296875, "learning_rate": 0.00018462124856964305, "loss": 3.1934, "step": 9637 }, { "epoch": 0.4511908245069928, "grad_norm": 1.578125, "learning_rate": 0.00018461813802987093, "loss": 2.9329, "step": 9638 }, { "epoch": 0.45123763824682545, "grad_norm": 1.5859375, "learning_rate": 0.00018461502720176735, "loss": 3.1012, "step": 9639 }, { "epoch": 0.45128445198665806, "grad_norm": 2.21875, "learning_rate": 0.000184611916085343, "loss": 2.78, "step": 9640 }, { "epoch": 0.45133126572649074, "grad_norm": 1.3125, "learning_rate": 0.00018460880468060842, "loss": 2.8411, "step": 9641 }, { "epoch": 0.45137807946632336, "grad_norm": 1.75, "learning_rate": 0.0001846056929875742, "loss": 2.8527, "step": 9642 }, { "epoch": 0.45142489320615603, "grad_norm": 1.859375, "learning_rate": 0.00018460258100625105, "loss": 3.0974, "step": 9643 }, { "epoch": 0.45147170694598865, "grad_norm": 1.671875, "learning_rate": 0.00018459946873664943, "loss": 3.0028, "step": 9644 }, { "epoch": 0.45151852068582127, "grad_norm": 1.453125, "learning_rate": 0.00018459635617878003, "loss": 2.9964, "step": 9645 }, { "epoch": 0.45156533442565394, "grad_norm": 1.40625, "learning_rate": 0.0001845932433326534, "loss": 2.9395, "step": 9646 }, { "epoch": 0.45161214816548656, "grad_norm": 2.0625, "learning_rate": 0.00018459013019828024, "loss": 2.9599, "step": 9647 }, { "epoch": 0.45165896190531923, "grad_norm": 1.796875, "learning_rate": 0.0001845870167756711, "loss": 2.8817, "step": 9648 }, { "epoch": 0.45170577564515185, "grad_norm": 5.375, "learning_rate": 0.00018458390306483655, "loss": 2.9268, "step": 9649 }, { "epoch": 0.45175258938498447, "grad_norm": 1.5625, "learning_rate": 0.00018458078906578726, "loss": 2.5308, "step": 9650 }, { "epoch": 0.45179940312481714, "grad_norm": 1.234375, "learning_rate": 0.00018457767477853384, "loss": 2.9987, "step": 9651 }, { "epoch": 0.45184621686464976, "grad_norm": 1.421875, "learning_rate": 0.00018457456020308687, "loss": 2.7766, "step": 9652 }, { "epoch": 0.45189303060448244, "grad_norm": 1.6796875, "learning_rate": 0.00018457144533945695, "loss": 2.977, "step": 9653 }, { "epoch": 0.45193984434431506, "grad_norm": 1.2890625, "learning_rate": 0.00018456833018765472, "loss": 2.6307, "step": 9654 }, { "epoch": 0.4519866580841477, "grad_norm": 1.1171875, "learning_rate": 0.0001845652147476908, "loss": 3.073, "step": 9655 }, { "epoch": 0.45203347182398035, "grad_norm": 1.203125, "learning_rate": 0.0001845620990195758, "loss": 4.0814, "step": 9656 }, { "epoch": 0.45208028556381297, "grad_norm": 1.5, "learning_rate": 0.00018455898300332035, "loss": 2.9564, "step": 9657 }, { "epoch": 0.45212709930364564, "grad_norm": 1.125, "learning_rate": 0.00018455586669893504, "loss": 3.5228, "step": 9658 }, { "epoch": 0.45217391304347826, "grad_norm": 1.2109375, "learning_rate": 0.00018455275010643052, "loss": 3.2478, "step": 9659 }, { "epoch": 0.4522207267833109, "grad_norm": 1.3359375, "learning_rate": 0.00018454963322581737, "loss": 3.0317, "step": 9660 }, { "epoch": 0.45226754052314355, "grad_norm": 2.0, "learning_rate": 0.00018454651605710623, "loss": 3.6376, "step": 9661 }, { "epoch": 0.45231435426297617, "grad_norm": 1.71875, "learning_rate": 0.00018454339860030775, "loss": 3.1462, "step": 9662 }, { "epoch": 0.45236116800280884, "grad_norm": 1.375, "learning_rate": 0.0001845402808554325, "loss": 3.1204, "step": 9663 }, { "epoch": 0.45240798174264146, "grad_norm": 1.1875, "learning_rate": 0.00018453716282249114, "loss": 2.9694, "step": 9664 }, { "epoch": 0.4524547954824741, "grad_norm": 1.6171875, "learning_rate": 0.0001845340445014943, "loss": 3.1779, "step": 9665 }, { "epoch": 0.45250160922230676, "grad_norm": 1.2734375, "learning_rate": 0.00018453092589245258, "loss": 2.9854, "step": 9666 }, { "epoch": 0.4525484229621394, "grad_norm": 1.265625, "learning_rate": 0.0001845278069953766, "loss": 2.8534, "step": 9667 }, { "epoch": 0.45259523670197205, "grad_norm": 1.296875, "learning_rate": 0.000184524687810277, "loss": 2.9864, "step": 9668 }, { "epoch": 0.45264205044180467, "grad_norm": 1.671875, "learning_rate": 0.00018452156833716443, "loss": 3.1818, "step": 9669 }, { "epoch": 0.4526888641816373, "grad_norm": 2.265625, "learning_rate": 0.00018451844857604949, "loss": 2.753, "step": 9670 }, { "epoch": 0.45273567792146996, "grad_norm": 1.203125, "learning_rate": 0.00018451532852694285, "loss": 2.7455, "step": 9671 }, { "epoch": 0.4527824916613026, "grad_norm": 1.40625, "learning_rate": 0.00018451220818985507, "loss": 2.8825, "step": 9672 }, { "epoch": 0.45282930540113525, "grad_norm": 1.4375, "learning_rate": 0.00018450908756479682, "loss": 2.805, "step": 9673 }, { "epoch": 0.45287611914096787, "grad_norm": 1.484375, "learning_rate": 0.0001845059666517788, "loss": 2.7691, "step": 9674 }, { "epoch": 0.4529229328808005, "grad_norm": 1.4375, "learning_rate": 0.00018450284545081154, "loss": 2.9776, "step": 9675 }, { "epoch": 0.45296974662063316, "grad_norm": 1.1875, "learning_rate": 0.00018449972396190575, "loss": 2.9643, "step": 9676 }, { "epoch": 0.4530165603604658, "grad_norm": 1.46875, "learning_rate": 0.000184496602185072, "loss": 3.336, "step": 9677 }, { "epoch": 0.45306337410029845, "grad_norm": 1.84375, "learning_rate": 0.00018449348012032098, "loss": 2.8603, "step": 9678 }, { "epoch": 0.4531101878401311, "grad_norm": 2.046875, "learning_rate": 0.00018449035776766328, "loss": 2.4956, "step": 9679 }, { "epoch": 0.4531570015799637, "grad_norm": 1.2734375, "learning_rate": 0.0001844872351271096, "loss": 3.06, "step": 9680 }, { "epoch": 0.45320381531979637, "grad_norm": 1.2421875, "learning_rate": 0.00018448411219867055, "loss": 2.9831, "step": 9681 }, { "epoch": 0.453250629059629, "grad_norm": 2.234375, "learning_rate": 0.00018448098898235675, "loss": 3.1431, "step": 9682 }, { "epoch": 0.45329744279946166, "grad_norm": 1.3828125, "learning_rate": 0.0001844778654781789, "loss": 2.8878, "step": 9683 }, { "epoch": 0.4533442565392943, "grad_norm": 5.4375, "learning_rate": 0.00018447474168614757, "loss": 3.1595, "step": 9684 }, { "epoch": 0.45339107027912695, "grad_norm": 1.2578125, "learning_rate": 0.00018447161760627349, "loss": 2.9015, "step": 9685 }, { "epoch": 0.45343788401895957, "grad_norm": 1.6171875, "learning_rate": 0.00018446849323856717, "loss": 2.9402, "step": 9686 }, { "epoch": 0.4534846977587922, "grad_norm": 1.25, "learning_rate": 0.00018446536858303942, "loss": 2.8788, "step": 9687 }, { "epoch": 0.45353151149862486, "grad_norm": 1.71875, "learning_rate": 0.00018446224363970076, "loss": 3.3673, "step": 9688 }, { "epoch": 0.4535783252384575, "grad_norm": 1.2265625, "learning_rate": 0.00018445911840856192, "loss": 2.9503, "step": 9689 }, { "epoch": 0.45362513897829015, "grad_norm": 1.453125, "learning_rate": 0.00018445599288963353, "loss": 2.9799, "step": 9690 }, { "epoch": 0.4536719527181228, "grad_norm": 1.5, "learning_rate": 0.0001844528670829262, "loss": 2.9273, "step": 9691 }, { "epoch": 0.4537187664579554, "grad_norm": 1.375, "learning_rate": 0.0001844497409884506, "loss": 3.1508, "step": 9692 }, { "epoch": 0.45376558019778807, "grad_norm": 1.15625, "learning_rate": 0.0001844466146062174, "loss": 3.0734, "step": 9693 }, { "epoch": 0.4538123939376207, "grad_norm": 1.3515625, "learning_rate": 0.00018444348793623724, "loss": 2.8014, "step": 9694 }, { "epoch": 0.45385920767745336, "grad_norm": 1.59375, "learning_rate": 0.00018444036097852077, "loss": 3.0786, "step": 9695 }, { "epoch": 0.453906021417286, "grad_norm": 1.390625, "learning_rate": 0.00018443723373307865, "loss": 2.8921, "step": 9696 }, { "epoch": 0.4539528351571186, "grad_norm": 1.1875, "learning_rate": 0.00018443410619992155, "loss": 2.9312, "step": 9697 }, { "epoch": 0.45399964889695127, "grad_norm": 1.984375, "learning_rate": 0.00018443097837906008, "loss": 3.2713, "step": 9698 }, { "epoch": 0.4540464626367839, "grad_norm": 1.7578125, "learning_rate": 0.00018442785027050497, "loss": 3.329, "step": 9699 }, { "epoch": 0.45409327637661656, "grad_norm": 1.4140625, "learning_rate": 0.00018442472187426686, "loss": 3.4404, "step": 9700 }, { "epoch": 0.4541400901164492, "grad_norm": 1.453125, "learning_rate": 0.00018442159319035633, "loss": 2.9434, "step": 9701 }, { "epoch": 0.4541869038562818, "grad_norm": 1.4140625, "learning_rate": 0.00018441846421878415, "loss": 2.8415, "step": 9702 }, { "epoch": 0.4542337175961145, "grad_norm": 1.9296875, "learning_rate": 0.0001844153349595609, "loss": 2.9952, "step": 9703 }, { "epoch": 0.4542805313359471, "grad_norm": 1.265625, "learning_rate": 0.00018441220541269728, "loss": 3.0718, "step": 9704 }, { "epoch": 0.45432734507577976, "grad_norm": 1.1953125, "learning_rate": 0.00018440907557820398, "loss": 3.0333, "step": 9705 }, { "epoch": 0.4543741588156124, "grad_norm": 1.1328125, "learning_rate": 0.0001844059454560916, "loss": 2.691, "step": 9706 }, { "epoch": 0.454420972555445, "grad_norm": 1.4140625, "learning_rate": 0.00018440281504637085, "loss": 2.8513, "step": 9707 }, { "epoch": 0.4544677862952777, "grad_norm": 1.6796875, "learning_rate": 0.00018439968434905238, "loss": 3.1328, "step": 9708 }, { "epoch": 0.4545146000351103, "grad_norm": 1.859375, "learning_rate": 0.00018439655336414688, "loss": 3.4913, "step": 9709 }, { "epoch": 0.45456141377494297, "grad_norm": 1.1171875, "learning_rate": 0.00018439342209166499, "loss": 2.8882, "step": 9710 }, { "epoch": 0.4546082275147756, "grad_norm": 1.34375, "learning_rate": 0.00018439029053161742, "loss": 2.9038, "step": 9711 }, { "epoch": 0.4546550412546082, "grad_norm": 1.53125, "learning_rate": 0.00018438715868401476, "loss": 2.8248, "step": 9712 }, { "epoch": 0.4547018549944409, "grad_norm": 1.40625, "learning_rate": 0.00018438402654886776, "loss": 4.0912, "step": 9713 }, { "epoch": 0.4547486687342735, "grad_norm": 1.7890625, "learning_rate": 0.0001843808941261871, "loss": 3.3173, "step": 9714 }, { "epoch": 0.45479548247410617, "grad_norm": 1.78125, "learning_rate": 0.0001843777614159834, "loss": 3.3595, "step": 9715 }, { "epoch": 0.4548422962139388, "grad_norm": 1.6640625, "learning_rate": 0.00018437462841826733, "loss": 2.9637, "step": 9716 }, { "epoch": 0.4548891099537714, "grad_norm": 1.59375, "learning_rate": 0.00018437149513304962, "loss": 2.5244, "step": 9717 }, { "epoch": 0.4549359236936041, "grad_norm": 1.1171875, "learning_rate": 0.00018436836156034088, "loss": 2.7193, "step": 9718 }, { "epoch": 0.4549827374334367, "grad_norm": 1.171875, "learning_rate": 0.00018436522770015187, "loss": 2.643, "step": 9719 }, { "epoch": 0.4550295511732694, "grad_norm": 1.6328125, "learning_rate": 0.00018436209355249318, "loss": 3.0434, "step": 9720 }, { "epoch": 0.455076364913102, "grad_norm": 1.4453125, "learning_rate": 0.00018435895911737558, "loss": 3.5261, "step": 9721 }, { "epoch": 0.4551231786529346, "grad_norm": 1.25, "learning_rate": 0.00018435582439480967, "loss": 3.0471, "step": 9722 }, { "epoch": 0.4551699923927673, "grad_norm": 1.7890625, "learning_rate": 0.00018435268938480617, "loss": 2.3369, "step": 9723 }, { "epoch": 0.4552168061325999, "grad_norm": 3.328125, "learning_rate": 0.00018434955408737577, "loss": 3.2999, "step": 9724 }, { "epoch": 0.4552636198724326, "grad_norm": 1.2734375, "learning_rate": 0.00018434641850252913, "loss": 3.5565, "step": 9725 }, { "epoch": 0.4553104336122652, "grad_norm": 1.21875, "learning_rate": 0.00018434328263027694, "loss": 2.7829, "step": 9726 }, { "epoch": 0.4553572473520978, "grad_norm": 1.5859375, "learning_rate": 0.00018434014647062988, "loss": 3.0327, "step": 9727 }, { "epoch": 0.4554040610919305, "grad_norm": 1.1640625, "learning_rate": 0.0001843370100235987, "loss": 2.6524, "step": 9728 }, { "epoch": 0.4554508748317631, "grad_norm": 1.140625, "learning_rate": 0.00018433387328919398, "loss": 2.5769, "step": 9729 }, { "epoch": 0.4554976885715958, "grad_norm": 1.5625, "learning_rate": 0.00018433073626742647, "loss": 3.1101, "step": 9730 }, { "epoch": 0.4555445023114284, "grad_norm": 1.859375, "learning_rate": 0.00018432759895830683, "loss": 2.6401, "step": 9731 }, { "epoch": 0.455591316051261, "grad_norm": 1.1171875, "learning_rate": 0.00018432446136184581, "loss": 4.2155, "step": 9732 }, { "epoch": 0.4556381297910937, "grad_norm": 1.3984375, "learning_rate": 0.00018432132347805403, "loss": 2.8644, "step": 9733 }, { "epoch": 0.4556849435309263, "grad_norm": 1.5078125, "learning_rate": 0.00018431818530694228, "loss": 3.273, "step": 9734 }, { "epoch": 0.455731757270759, "grad_norm": 1.34375, "learning_rate": 0.00018431504684852113, "loss": 2.5424, "step": 9735 }, { "epoch": 0.4557785710105916, "grad_norm": 1.3828125, "learning_rate": 0.00018431190810280133, "loss": 2.9772, "step": 9736 }, { "epoch": 0.4558253847504242, "grad_norm": 1.2421875, "learning_rate": 0.0001843087690697936, "loss": 2.6129, "step": 9737 }, { "epoch": 0.4558721984902569, "grad_norm": 1.3984375, "learning_rate": 0.00018430562974950857, "loss": 3.0252, "step": 9738 }, { "epoch": 0.4559190122300895, "grad_norm": 1.9765625, "learning_rate": 0.00018430249014195704, "loss": 2.8543, "step": 9739 }, { "epoch": 0.4559658259699222, "grad_norm": 1.640625, "learning_rate": 0.0001842993502471496, "loss": 3.2904, "step": 9740 }, { "epoch": 0.4560126397097548, "grad_norm": 1.1328125, "learning_rate": 0.00018429621006509704, "loss": 2.2977, "step": 9741 }, { "epoch": 0.4560594534495874, "grad_norm": 1.3984375, "learning_rate": 0.00018429306959580997, "loss": 3.2557, "step": 9742 }, { "epoch": 0.4561062671894201, "grad_norm": 1.140625, "learning_rate": 0.00018428992883929917, "loss": 3.0212, "step": 9743 }, { "epoch": 0.4561530809292527, "grad_norm": 1.1796875, "learning_rate": 0.0001842867877955753, "loss": 2.7123, "step": 9744 }, { "epoch": 0.4561998946690854, "grad_norm": 1.4765625, "learning_rate": 0.00018428364646464908, "loss": 2.8469, "step": 9745 }, { "epoch": 0.456246708408918, "grad_norm": 1.4453125, "learning_rate": 0.0001842805048465312, "loss": 2.9653, "step": 9746 }, { "epoch": 0.4562935221487507, "grad_norm": 1.1953125, "learning_rate": 0.00018427736294123235, "loss": 2.8635, "step": 9747 }, { "epoch": 0.4563403358885833, "grad_norm": 1.3046875, "learning_rate": 0.0001842742207487633, "loss": 3.0395, "step": 9748 }, { "epoch": 0.4563871496284159, "grad_norm": 1.1875, "learning_rate": 0.00018427107826913467, "loss": 3.759, "step": 9749 }, { "epoch": 0.4564339633682486, "grad_norm": 1.109375, "learning_rate": 0.00018426793550235722, "loss": 3.2126, "step": 9750 }, { "epoch": 0.4564807771080812, "grad_norm": 1.2578125, "learning_rate": 0.00018426479244844167, "loss": 2.7086, "step": 9751 }, { "epoch": 0.4565275908479139, "grad_norm": 1.4609375, "learning_rate": 0.0001842616491073987, "loss": 2.9486, "step": 9752 }, { "epoch": 0.4565744045877465, "grad_norm": 1.3125, "learning_rate": 0.00018425850547923903, "loss": 2.8349, "step": 9753 }, { "epoch": 0.4566212183275791, "grad_norm": 1.5703125, "learning_rate": 0.0001842553615639734, "loss": 2.9617, "step": 9754 }, { "epoch": 0.4566680320674118, "grad_norm": 1.1875, "learning_rate": 0.0001842522173616125, "loss": 2.9083, "step": 9755 }, { "epoch": 0.4567148458072444, "grad_norm": 1.9609375, "learning_rate": 0.000184249072872167, "loss": 3.3258, "step": 9756 }, { "epoch": 0.4567616595470771, "grad_norm": 1.0859375, "learning_rate": 0.00018424592809564768, "loss": 2.435, "step": 9757 }, { "epoch": 0.4568084732869097, "grad_norm": 1.3828125, "learning_rate": 0.0001842427830320652, "loss": 3.0522, "step": 9758 }, { "epoch": 0.45685528702674233, "grad_norm": 1.921875, "learning_rate": 0.00018423963768143033, "loss": 2.9258, "step": 9759 }, { "epoch": 0.456902100766575, "grad_norm": 1.359375, "learning_rate": 0.00018423649204375373, "loss": 3.1768, "step": 9760 }, { "epoch": 0.4569489145064076, "grad_norm": 1.625, "learning_rate": 0.0001842333461190462, "loss": 2.9492, "step": 9761 }, { "epoch": 0.4569957282462403, "grad_norm": 1.265625, "learning_rate": 0.0001842301999073184, "loss": 3.2408, "step": 9762 }, { "epoch": 0.4570425419860729, "grad_norm": 1.703125, "learning_rate": 0.00018422705340858105, "loss": 2.9071, "step": 9763 }, { "epoch": 0.45708935572590553, "grad_norm": 1.0546875, "learning_rate": 0.00018422390662284487, "loss": 2.0585, "step": 9764 }, { "epoch": 0.4571361694657382, "grad_norm": 1.7265625, "learning_rate": 0.00018422075955012063, "loss": 2.9975, "step": 9765 }, { "epoch": 0.4571829832055708, "grad_norm": 1.4375, "learning_rate": 0.000184217612190419, "loss": 3.2656, "step": 9766 }, { "epoch": 0.4572297969454035, "grad_norm": 1.1328125, "learning_rate": 0.00018421446454375072, "loss": 2.8325, "step": 9767 }, { "epoch": 0.4572766106852361, "grad_norm": 1.4765625, "learning_rate": 0.00018421131661012653, "loss": 3.0022, "step": 9768 }, { "epoch": 0.45732342442506874, "grad_norm": 1.3515625, "learning_rate": 0.00018420816838955718, "loss": 3.1309, "step": 9769 }, { "epoch": 0.4573702381649014, "grad_norm": 1.453125, "learning_rate": 0.0001842050198820533, "loss": 3.2795, "step": 9770 }, { "epoch": 0.45741705190473403, "grad_norm": 1.4375, "learning_rate": 0.00018420187108762572, "loss": 2.4607, "step": 9771 }, { "epoch": 0.4574638656445667, "grad_norm": 1.453125, "learning_rate": 0.0001841987220062851, "loss": 3.2659, "step": 9772 }, { "epoch": 0.4575106793843993, "grad_norm": 1.546875, "learning_rate": 0.00018419557263804227, "loss": 2.8462, "step": 9773 }, { "epoch": 0.45755749312423194, "grad_norm": 1.5859375, "learning_rate": 0.00018419242298290783, "loss": 3.1479, "step": 9774 }, { "epoch": 0.4576043068640646, "grad_norm": 1.3671875, "learning_rate": 0.0001841892730408926, "loss": 3.0407, "step": 9775 }, { "epoch": 0.45765112060389723, "grad_norm": 1.515625, "learning_rate": 0.00018418612281200726, "loss": 3.4488, "step": 9776 }, { "epoch": 0.4576979343437299, "grad_norm": 1.7109375, "learning_rate": 0.0001841829722962626, "loss": 3.2078, "step": 9777 }, { "epoch": 0.4577447480835625, "grad_norm": 1.2734375, "learning_rate": 0.0001841798214936693, "loss": 3.0258, "step": 9778 }, { "epoch": 0.45779156182339514, "grad_norm": 1.140625, "learning_rate": 0.00018417667040423815, "loss": 2.7125, "step": 9779 }, { "epoch": 0.4578383755632278, "grad_norm": 1.375, "learning_rate": 0.00018417351902797987, "loss": 2.7706, "step": 9780 }, { "epoch": 0.45788518930306044, "grad_norm": 1.28125, "learning_rate": 0.00018417036736490517, "loss": 2.7433, "step": 9781 }, { "epoch": 0.4579320030428931, "grad_norm": 1.21875, "learning_rate": 0.00018416721541502482, "loss": 3.2029, "step": 9782 }, { "epoch": 0.45797881678272573, "grad_norm": 0.9296875, "learning_rate": 0.00018416406317834952, "loss": 3.2543, "step": 9783 }, { "epoch": 0.45802563052255835, "grad_norm": 1.734375, "learning_rate": 0.0001841609106548901, "loss": 3.5103, "step": 9784 }, { "epoch": 0.458072444262391, "grad_norm": 1.765625, "learning_rate": 0.0001841577578446572, "loss": 3.4046, "step": 9785 }, { "epoch": 0.45811925800222364, "grad_norm": 1.234375, "learning_rate": 0.00018415460474766163, "loss": 3.0726, "step": 9786 }, { "epoch": 0.4581660717420563, "grad_norm": 1.390625, "learning_rate": 0.00018415145136391406, "loss": 3.0436, "step": 9787 }, { "epoch": 0.45821288548188893, "grad_norm": 1.15625, "learning_rate": 0.00018414829769342533, "loss": 2.856, "step": 9788 }, { "epoch": 0.45825969922172155, "grad_norm": 1.7265625, "learning_rate": 0.0001841451437362061, "loss": 3.1634, "step": 9789 }, { "epoch": 0.4583065129615542, "grad_norm": 1.359375, "learning_rate": 0.00018414198949226717, "loss": 3.0764, "step": 9790 }, { "epoch": 0.45835332670138684, "grad_norm": 1.015625, "learning_rate": 0.0001841388349616193, "loss": 4.22, "step": 9791 }, { "epoch": 0.4584001404412195, "grad_norm": 1.7265625, "learning_rate": 0.00018413568014427322, "loss": 2.7996, "step": 9792 }, { "epoch": 0.45844695418105214, "grad_norm": 1.34375, "learning_rate": 0.0001841325250402396, "loss": 3.0125, "step": 9793 }, { "epoch": 0.45849376792088475, "grad_norm": 1.7109375, "learning_rate": 0.00018412936964952933, "loss": 2.9853, "step": 9794 }, { "epoch": 0.45854058166071743, "grad_norm": 1.3046875, "learning_rate": 0.00018412621397215307, "loss": 2.8704, "step": 9795 }, { "epoch": 0.45858739540055005, "grad_norm": 1.546875, "learning_rate": 0.00018412305800812165, "loss": 3.4482, "step": 9796 }, { "epoch": 0.4586342091403827, "grad_norm": 1.234375, "learning_rate": 0.0001841199017574457, "loss": 2.8849, "step": 9797 }, { "epoch": 0.45868102288021534, "grad_norm": 1.34375, "learning_rate": 0.0001841167452201361, "loss": 5.2873, "step": 9798 }, { "epoch": 0.45872783662004796, "grad_norm": 2.046875, "learning_rate": 0.00018411358839620352, "loss": 3.0099, "step": 9799 }, { "epoch": 0.45877465035988063, "grad_norm": 1.53125, "learning_rate": 0.00018411043128565874, "loss": 3.4587, "step": 9800 }, { "epoch": 0.45882146409971325, "grad_norm": 1.4609375, "learning_rate": 0.00018410727388851257, "loss": 2.9584, "step": 9801 }, { "epoch": 0.4588682778395459, "grad_norm": 2.640625, "learning_rate": 0.00018410411620477569, "loss": 3.2387, "step": 9802 }, { "epoch": 0.45891509157937854, "grad_norm": 1.7265625, "learning_rate": 0.00018410095823445893, "loss": 2.7881, "step": 9803 }, { "epoch": 0.45896190531921116, "grad_norm": 1.6484375, "learning_rate": 0.00018409779997757296, "loss": 3.2584, "step": 9804 }, { "epoch": 0.45900871905904383, "grad_norm": 1.5546875, "learning_rate": 0.00018409464143412865, "loss": 3.693, "step": 9805 }, { "epoch": 0.45905553279887645, "grad_norm": 1.2265625, "learning_rate": 0.0001840914826041367, "loss": 2.6568, "step": 9806 }, { "epoch": 0.4591023465387091, "grad_norm": 1.234375, "learning_rate": 0.00018408832348760785, "loss": 3.2108, "step": 9807 }, { "epoch": 0.45914916027854175, "grad_norm": 2.265625, "learning_rate": 0.00018408516408455295, "loss": 2.7281, "step": 9808 }, { "epoch": 0.4591959740183744, "grad_norm": 1.2421875, "learning_rate": 0.00018408200439498269, "loss": 2.9426, "step": 9809 }, { "epoch": 0.45924278775820704, "grad_norm": 1.4375, "learning_rate": 0.00018407884441890783, "loss": 2.8023, "step": 9810 }, { "epoch": 0.45928960149803966, "grad_norm": 1.4453125, "learning_rate": 0.00018407568415633924, "loss": 2.77, "step": 9811 }, { "epoch": 0.45933641523787233, "grad_norm": 1.5078125, "learning_rate": 0.00018407252360728754, "loss": 2.5456, "step": 9812 }, { "epoch": 0.45938322897770495, "grad_norm": 1.9609375, "learning_rate": 0.0001840693627717636, "loss": 2.3748, "step": 9813 }, { "epoch": 0.4594300427175376, "grad_norm": 1.1640625, "learning_rate": 0.00018406620164977818, "loss": 2.6702, "step": 9814 }, { "epoch": 0.45947685645737024, "grad_norm": 1.2421875, "learning_rate": 0.00018406304024134204, "loss": 3.0345, "step": 9815 }, { "epoch": 0.45952367019720286, "grad_norm": 1.7109375, "learning_rate": 0.00018405987854646592, "loss": 3.2201, "step": 9816 }, { "epoch": 0.45957048393703553, "grad_norm": 1.25, "learning_rate": 0.00018405671656516063, "loss": 2.9943, "step": 9817 }, { "epoch": 0.45961729767686815, "grad_norm": 1.203125, "learning_rate": 0.00018405355429743696, "loss": 3.0142, "step": 9818 }, { "epoch": 0.4596641114167008, "grad_norm": 1.34375, "learning_rate": 0.00018405039174330568, "loss": 3.0793, "step": 9819 }, { "epoch": 0.45971092515653345, "grad_norm": 1.671875, "learning_rate": 0.0001840472289027775, "loss": 3.1322, "step": 9820 }, { "epoch": 0.45975773889636606, "grad_norm": 1.2890625, "learning_rate": 0.00018404406577586326, "loss": 2.893, "step": 9821 }, { "epoch": 0.45980455263619874, "grad_norm": 1.4140625, "learning_rate": 0.00018404090236257375, "loss": 2.8841, "step": 9822 }, { "epoch": 0.45985136637603136, "grad_norm": 1.609375, "learning_rate": 0.0001840377386629197, "loss": 2.8373, "step": 9823 }, { "epoch": 0.45989818011586403, "grad_norm": 2.578125, "learning_rate": 0.00018403457467691193, "loss": 3.0215, "step": 9824 }, { "epoch": 0.45994499385569665, "grad_norm": 1.1484375, "learning_rate": 0.00018403141040456117, "loss": 3.0116, "step": 9825 }, { "epoch": 0.45999180759552927, "grad_norm": 1.2265625, "learning_rate": 0.00018402824584587825, "loss": 3.2813, "step": 9826 }, { "epoch": 0.46003862133536194, "grad_norm": 1.390625, "learning_rate": 0.00018402508100087394, "loss": 2.9404, "step": 9827 }, { "epoch": 0.46008543507519456, "grad_norm": 1.5, "learning_rate": 0.00018402191586955904, "loss": 3.1865, "step": 9828 }, { "epoch": 0.46013224881502723, "grad_norm": 1.625, "learning_rate": 0.0001840187504519443, "loss": 2.6176, "step": 9829 }, { "epoch": 0.46017906255485985, "grad_norm": 1.53125, "learning_rate": 0.00018401558474804054, "loss": 4.342, "step": 9830 }, { "epoch": 0.46022587629469247, "grad_norm": 1.3125, "learning_rate": 0.0001840124187578585, "loss": 2.9052, "step": 9831 }, { "epoch": 0.46027269003452514, "grad_norm": 1.3515625, "learning_rate": 0.00018400925248140902, "loss": 2.9479, "step": 9832 }, { "epoch": 0.46031950377435776, "grad_norm": 1.4609375, "learning_rate": 0.00018400608591870288, "loss": 2.7841, "step": 9833 }, { "epoch": 0.46036631751419044, "grad_norm": 1.3828125, "learning_rate": 0.00018400291906975084, "loss": 3.0261, "step": 9834 }, { "epoch": 0.46041313125402306, "grad_norm": 1.8515625, "learning_rate": 0.00018399975193456373, "loss": 3.105, "step": 9835 }, { "epoch": 0.4604599449938557, "grad_norm": 1.5078125, "learning_rate": 0.0001839965845131523, "loss": 2.9719, "step": 9836 }, { "epoch": 0.46050675873368835, "grad_norm": 1.3359375, "learning_rate": 0.00018399341680552737, "loss": 2.9309, "step": 9837 }, { "epoch": 0.46055357247352097, "grad_norm": 1.4296875, "learning_rate": 0.00018399024881169974, "loss": 3.1422, "step": 9838 }, { "epoch": 0.46060038621335364, "grad_norm": 1.46875, "learning_rate": 0.0001839870805316802, "loss": 3.3642, "step": 9839 }, { "epoch": 0.46064719995318626, "grad_norm": 1.0625, "learning_rate": 0.0001839839119654795, "loss": 2.7055, "step": 9840 }, { "epoch": 0.4606940136930189, "grad_norm": 1.3515625, "learning_rate": 0.0001839807431131085, "loss": 3.0683, "step": 9841 }, { "epoch": 0.46074082743285155, "grad_norm": 1.359375, "learning_rate": 0.00018397757397457795, "loss": 2.922, "step": 9842 }, { "epoch": 0.46078764117268417, "grad_norm": 1.1796875, "learning_rate": 0.0001839744045498987, "loss": 3.1611, "step": 9843 }, { "epoch": 0.46083445491251684, "grad_norm": 7.90625, "learning_rate": 0.0001839712348390815, "loss": 3.9559, "step": 9844 }, { "epoch": 0.46088126865234946, "grad_norm": 1.3359375, "learning_rate": 0.00018396806484213718, "loss": 2.8635, "step": 9845 }, { "epoch": 0.4609280823921821, "grad_norm": 1.1953125, "learning_rate": 0.00018396489455907653, "loss": 2.9816, "step": 9846 }, { "epoch": 0.46097489613201476, "grad_norm": 1.359375, "learning_rate": 0.00018396172398991035, "loss": 2.8138, "step": 9847 }, { "epoch": 0.4610217098718474, "grad_norm": 1.3984375, "learning_rate": 0.00018395855313464948, "loss": 2.8295, "step": 9848 }, { "epoch": 0.46106852361168005, "grad_norm": 1.1640625, "learning_rate": 0.00018395538199330463, "loss": 3.3324, "step": 9849 }, { "epoch": 0.46111533735151267, "grad_norm": 6.25, "learning_rate": 0.00018395221056588672, "loss": 4.0557, "step": 9850 }, { "epoch": 0.4611621510913453, "grad_norm": 1.1953125, "learning_rate": 0.0001839490388524065, "loss": 2.7839, "step": 9851 }, { "epoch": 0.46120896483117796, "grad_norm": 1.1640625, "learning_rate": 0.00018394586685287475, "loss": 2.9845, "step": 9852 }, { "epoch": 0.4612557785710106, "grad_norm": 1.2734375, "learning_rate": 0.00018394269456730236, "loss": 2.897, "step": 9853 }, { "epoch": 0.46130259231084325, "grad_norm": 1.7265625, "learning_rate": 0.00018393952199570007, "loss": 3.1851, "step": 9854 }, { "epoch": 0.46134940605067587, "grad_norm": 1.65625, "learning_rate": 0.00018393634913807869, "loss": 4.1255, "step": 9855 }, { "epoch": 0.4613962197905085, "grad_norm": 1.3671875, "learning_rate": 0.00018393317599444905, "loss": 2.8787, "step": 9856 }, { "epoch": 0.46144303353034116, "grad_norm": 1.671875, "learning_rate": 0.000183930002564822, "loss": 3.1868, "step": 9857 }, { "epoch": 0.4614898472701738, "grad_norm": 2.015625, "learning_rate": 0.0001839268288492083, "loss": 2.7369, "step": 9858 }, { "epoch": 0.46153666101000645, "grad_norm": 1.2734375, "learning_rate": 0.00018392365484761877, "loss": 3.1846, "step": 9859 }, { "epoch": 0.4615834747498391, "grad_norm": 1.4453125, "learning_rate": 0.00018392048056006425, "loss": 2.8702, "step": 9860 }, { "epoch": 0.4616302884896717, "grad_norm": 1.6484375, "learning_rate": 0.00018391730598655553, "loss": 2.9958, "step": 9861 }, { "epoch": 0.46167710222950437, "grad_norm": 2.140625, "learning_rate": 0.0001839141311271035, "loss": 3.2922, "step": 9862 }, { "epoch": 0.461723915969337, "grad_norm": 1.53125, "learning_rate": 0.00018391095598171885, "loss": 2.8677, "step": 9863 }, { "epoch": 0.46177072970916966, "grad_norm": 1.265625, "learning_rate": 0.0001839077805504125, "loss": 2.8355, "step": 9864 }, { "epoch": 0.4618175434490023, "grad_norm": 1.09375, "learning_rate": 0.0001839046048331952, "loss": 3.2474, "step": 9865 }, { "epoch": 0.4618643571888349, "grad_norm": 1.3125, "learning_rate": 0.00018390142883007785, "loss": 3.1068, "step": 9866 }, { "epoch": 0.46191117092866757, "grad_norm": 1.6796875, "learning_rate": 0.0001838982525410712, "loss": 3.172, "step": 9867 }, { "epoch": 0.4619579846685002, "grad_norm": 1.546875, "learning_rate": 0.00018389507596618614, "loss": 3.4847, "step": 9868 }, { "epoch": 0.46200479840833286, "grad_norm": 1.46875, "learning_rate": 0.00018389189910543343, "loss": 3.0026, "step": 9869 }, { "epoch": 0.4620516121481655, "grad_norm": 1.2890625, "learning_rate": 0.00018388872195882394, "loss": 2.8524, "step": 9870 }, { "epoch": 0.46209842588799815, "grad_norm": 1.3359375, "learning_rate": 0.00018388554452636847, "loss": 2.9624, "step": 9871 }, { "epoch": 0.4621452396278308, "grad_norm": 1.25, "learning_rate": 0.00018388236680807788, "loss": 2.6106, "step": 9872 }, { "epoch": 0.4621920533676634, "grad_norm": 1.6484375, "learning_rate": 0.00018387918880396293, "loss": 3.1874, "step": 9873 }, { "epoch": 0.46223886710749607, "grad_norm": 1.46875, "learning_rate": 0.00018387601051403452, "loss": 3.037, "step": 9874 }, { "epoch": 0.4622856808473287, "grad_norm": 1.0859375, "learning_rate": 0.00018387283193830345, "loss": 2.8538, "step": 9875 }, { "epoch": 0.46233249458716136, "grad_norm": 1.2265625, "learning_rate": 0.00018386965307678056, "loss": 2.9935, "step": 9876 }, { "epoch": 0.462379308326994, "grad_norm": 1.5703125, "learning_rate": 0.00018386647392947664, "loss": 2.6758, "step": 9877 }, { "epoch": 0.4624261220668266, "grad_norm": 1.921875, "learning_rate": 0.0001838632944964026, "loss": 2.7973, "step": 9878 }, { "epoch": 0.46247293580665927, "grad_norm": 1.3984375, "learning_rate": 0.0001838601147775692, "loss": 3.1075, "step": 9879 }, { "epoch": 0.4625197495464919, "grad_norm": 1.15625, "learning_rate": 0.00018385693477298732, "loss": 2.4427, "step": 9880 }, { "epoch": 0.46256656328632456, "grad_norm": 1.7578125, "learning_rate": 0.00018385375448266776, "loss": 3.218, "step": 9881 }, { "epoch": 0.4626133770261572, "grad_norm": 1.515625, "learning_rate": 0.00018385057390662141, "loss": 3.3881, "step": 9882 }, { "epoch": 0.4626601907659898, "grad_norm": 1.078125, "learning_rate": 0.00018384739304485906, "loss": 2.7946, "step": 9883 }, { "epoch": 0.4627070045058225, "grad_norm": 1.1796875, "learning_rate": 0.00018384421189739157, "loss": 3.029, "step": 9884 }, { "epoch": 0.4627538182456551, "grad_norm": 1.1953125, "learning_rate": 0.0001838410304642298, "loss": 2.6826, "step": 9885 }, { "epoch": 0.46280063198548776, "grad_norm": 1.6015625, "learning_rate": 0.00018383784874538456, "loss": 2.9955, "step": 9886 }, { "epoch": 0.4628474457253204, "grad_norm": 1.1015625, "learning_rate": 0.00018383466674086663, "loss": 2.903, "step": 9887 }, { "epoch": 0.462894259465153, "grad_norm": 1.359375, "learning_rate": 0.00018383148445068697, "loss": 2.8646, "step": 9888 }, { "epoch": 0.4629410732049857, "grad_norm": 1.984375, "learning_rate": 0.00018382830187485639, "loss": 2.3255, "step": 9889 }, { "epoch": 0.4629878869448183, "grad_norm": 1.2734375, "learning_rate": 0.0001838251190133857, "loss": 2.9416, "step": 9890 }, { "epoch": 0.46303470068465097, "grad_norm": 1.171875, "learning_rate": 0.00018382193586628573, "loss": 2.7583, "step": 9891 }, { "epoch": 0.4630815144244836, "grad_norm": 1.46875, "learning_rate": 0.00018381875243356742, "loss": 2.7928, "step": 9892 }, { "epoch": 0.4631283281643162, "grad_norm": 1.28125, "learning_rate": 0.0001838155687152415, "loss": 3.2067, "step": 9893 }, { "epoch": 0.4631751419041489, "grad_norm": 1.3203125, "learning_rate": 0.0001838123847113189, "loss": 3.2712, "step": 9894 }, { "epoch": 0.4632219556439815, "grad_norm": 1.6640625, "learning_rate": 0.00018380920042181042, "loss": 3.077, "step": 9895 }, { "epoch": 0.46326876938381417, "grad_norm": 1.5703125, "learning_rate": 0.00018380601584672696, "loss": 2.9398, "step": 9896 }, { "epoch": 0.4633155831236468, "grad_norm": 1.1796875, "learning_rate": 0.00018380283098607935, "loss": 2.8786, "step": 9897 }, { "epoch": 0.4633623968634794, "grad_norm": 1.5546875, "learning_rate": 0.00018379964583987844, "loss": 3.3199, "step": 9898 }, { "epoch": 0.4634092106033121, "grad_norm": 1.5625, "learning_rate": 0.00018379646040813505, "loss": 2.8546, "step": 9899 }, { "epoch": 0.4634560243431447, "grad_norm": 1.203125, "learning_rate": 0.00018379327469086009, "loss": 2.7587, "step": 9900 }, { "epoch": 0.4635028380829774, "grad_norm": 1.421875, "learning_rate": 0.00018379008868806438, "loss": 2.9028, "step": 9901 }, { "epoch": 0.46354965182281, "grad_norm": 1.5390625, "learning_rate": 0.0001837869023997588, "loss": 3.0157, "step": 9902 }, { "epoch": 0.4635964655626426, "grad_norm": 1.6171875, "learning_rate": 0.00018378371582595415, "loss": 3.1507, "step": 9903 }, { "epoch": 0.4636432793024753, "grad_norm": 1.703125, "learning_rate": 0.00018378052896666139, "loss": 3.0356, "step": 9904 }, { "epoch": 0.4636900930423079, "grad_norm": 1.6484375, "learning_rate": 0.00018377734182189128, "loss": 2.9043, "step": 9905 }, { "epoch": 0.4637369067821406, "grad_norm": 1.234375, "learning_rate": 0.00018377415439165473, "loss": 3.0005, "step": 9906 }, { "epoch": 0.4637837205219732, "grad_norm": 1.6796875, "learning_rate": 0.0001837709666759626, "loss": 2.9982, "step": 9907 }, { "epoch": 0.4638305342618058, "grad_norm": 1.109375, "learning_rate": 0.00018376777867482575, "loss": 2.8528, "step": 9908 }, { "epoch": 0.4638773480016385, "grad_norm": 1.5859375, "learning_rate": 0.000183764590388255, "loss": 3.3822, "step": 9909 }, { "epoch": 0.4639241617414711, "grad_norm": 1.125, "learning_rate": 0.00018376140181626126, "loss": 2.6732, "step": 9910 }, { "epoch": 0.4639709754813038, "grad_norm": 1.1640625, "learning_rate": 0.00018375821295885542, "loss": 2.9342, "step": 9911 }, { "epoch": 0.4640177892211364, "grad_norm": 1.203125, "learning_rate": 0.00018375502381604827, "loss": 2.689, "step": 9912 }, { "epoch": 0.464064602960969, "grad_norm": 1.078125, "learning_rate": 0.0001837518343878508, "loss": 2.8277, "step": 9913 }, { "epoch": 0.4641114167008017, "grad_norm": 1.0859375, "learning_rate": 0.0001837486446742737, "loss": 4.0219, "step": 9914 }, { "epoch": 0.4641582304406343, "grad_norm": 1.1484375, "learning_rate": 0.00018374545467532798, "loss": 2.9074, "step": 9915 }, { "epoch": 0.464205044180467, "grad_norm": 1.2265625, "learning_rate": 0.00018374226439102443, "loss": 2.9155, "step": 9916 }, { "epoch": 0.4642518579202996, "grad_norm": 2.21875, "learning_rate": 0.00018373907382137396, "loss": 2.9245, "step": 9917 }, { "epoch": 0.4642986716601322, "grad_norm": 1.4296875, "learning_rate": 0.00018373588296638745, "loss": 3.0446, "step": 9918 }, { "epoch": 0.4643454853999649, "grad_norm": 1.6875, "learning_rate": 0.00018373269182607577, "loss": 2.8512, "step": 9919 }, { "epoch": 0.4643922991397975, "grad_norm": 1.0625, "learning_rate": 0.00018372950040044977, "loss": 2.9087, "step": 9920 }, { "epoch": 0.4644391128796302, "grad_norm": 1.3125, "learning_rate": 0.00018372630868952037, "loss": 2.438, "step": 9921 }, { "epoch": 0.4644859266194628, "grad_norm": 1.125, "learning_rate": 0.00018372311669329836, "loss": 2.8995, "step": 9922 }, { "epoch": 0.4645327403592954, "grad_norm": 2.359375, "learning_rate": 0.0001837199244117947, "loss": 2.9538, "step": 9923 }, { "epoch": 0.4645795540991281, "grad_norm": 1.21875, "learning_rate": 0.00018371673184502024, "loss": 2.981, "step": 9924 }, { "epoch": 0.4646263678389607, "grad_norm": 1.3984375, "learning_rate": 0.00018371353899298583, "loss": 3.1359, "step": 9925 }, { "epoch": 0.4646731815787934, "grad_norm": 1.9375, "learning_rate": 0.00018371034585570238, "loss": 3.0596, "step": 9926 }, { "epoch": 0.464719995318626, "grad_norm": 1.9296875, "learning_rate": 0.00018370715243318076, "loss": 2.8134, "step": 9927 }, { "epoch": 0.4647668090584587, "grad_norm": 1.3203125, "learning_rate": 0.0001837039587254319, "loss": 3.0659, "step": 9928 }, { "epoch": 0.4648136227982913, "grad_norm": 1.703125, "learning_rate": 0.00018370076473246663, "loss": 3.229, "step": 9929 }, { "epoch": 0.4648604365381239, "grad_norm": 1.203125, "learning_rate": 0.0001836975704542958, "loss": 3.002, "step": 9930 }, { "epoch": 0.4649072502779566, "grad_norm": 1.2734375, "learning_rate": 0.00018369437589093037, "loss": 3.1358, "step": 9931 }, { "epoch": 0.4649540640177892, "grad_norm": 1.2265625, "learning_rate": 0.00018369118104238116, "loss": 2.8455, "step": 9932 }, { "epoch": 0.4650008777576219, "grad_norm": 1.265625, "learning_rate": 0.00018368798590865913, "loss": 3.0395, "step": 9933 }, { "epoch": 0.4650476914974545, "grad_norm": 1.390625, "learning_rate": 0.0001836847904897751, "loss": 3.0769, "step": 9934 }, { "epoch": 0.4650945052372871, "grad_norm": 1.71875, "learning_rate": 0.00018368159478574, "loss": 3.0658, "step": 9935 }, { "epoch": 0.4651413189771198, "grad_norm": 1.3515625, "learning_rate": 0.00018367839879656467, "loss": 2.8167, "step": 9936 }, { "epoch": 0.4651881327169524, "grad_norm": 3.359375, "learning_rate": 0.00018367520252226004, "loss": 2.9399, "step": 9937 }, { "epoch": 0.4652349464567851, "grad_norm": 1.734375, "learning_rate": 0.00018367200596283703, "loss": 3.3312, "step": 9938 }, { "epoch": 0.4652817601966177, "grad_norm": 1.46875, "learning_rate": 0.00018366880911830648, "loss": 2.6247, "step": 9939 }, { "epoch": 0.46532857393645033, "grad_norm": 2.0, "learning_rate": 0.00018366561198867928, "loss": 2.667, "step": 9940 }, { "epoch": 0.465375387676283, "grad_norm": 1.328125, "learning_rate": 0.00018366241457396635, "loss": 2.57, "step": 9941 }, { "epoch": 0.4654222014161156, "grad_norm": 1.15625, "learning_rate": 0.0001836592168741786, "loss": 2.3177, "step": 9942 }, { "epoch": 0.4654690151559483, "grad_norm": 1.3515625, "learning_rate": 0.00018365601888932688, "loss": 2.7727, "step": 9943 }, { "epoch": 0.4655158288957809, "grad_norm": 1.6875, "learning_rate": 0.00018365282061942213, "loss": 3.2709, "step": 9944 }, { "epoch": 0.46556264263561353, "grad_norm": 1.546875, "learning_rate": 0.00018364962206447524, "loss": 3.2449, "step": 9945 }, { "epoch": 0.4656094563754462, "grad_norm": 1.203125, "learning_rate": 0.00018364642322449704, "loss": 3.0615, "step": 9946 }, { "epoch": 0.4656562701152788, "grad_norm": 1.40625, "learning_rate": 0.00018364322409949852, "loss": 3.094, "step": 9947 }, { "epoch": 0.4657030838551115, "grad_norm": 1.4296875, "learning_rate": 0.00018364002468949057, "loss": 2.7562, "step": 9948 }, { "epoch": 0.4657498975949441, "grad_norm": 1.6484375, "learning_rate": 0.00018363682499448407, "loss": 3.3871, "step": 9949 }, { "epoch": 0.46579671133477674, "grad_norm": 1.4140625, "learning_rate": 0.0001836336250144899, "loss": 2.9631, "step": 9950 }, { "epoch": 0.4658435250746094, "grad_norm": 1.7890625, "learning_rate": 0.00018363042474951898, "loss": 3.0349, "step": 9951 }, { "epoch": 0.46589033881444203, "grad_norm": 1.4140625, "learning_rate": 0.00018362722419958222, "loss": 3.1519, "step": 9952 }, { "epoch": 0.4659371525542747, "grad_norm": 1.3515625, "learning_rate": 0.00018362402336469057, "loss": 2.9236, "step": 9953 }, { "epoch": 0.4659839662941073, "grad_norm": 1.2734375, "learning_rate": 0.00018362082224485487, "loss": 3.0015, "step": 9954 }, { "epoch": 0.46603078003393994, "grad_norm": 1.2109375, "learning_rate": 0.00018361762084008603, "loss": 2.6198, "step": 9955 }, { "epoch": 0.4660775937737726, "grad_norm": 2.125, "learning_rate": 0.00018361441915039502, "loss": 2.7132, "step": 9956 }, { "epoch": 0.46612440751360523, "grad_norm": 1.4140625, "learning_rate": 0.00018361121717579268, "loss": 2.3998, "step": 9957 }, { "epoch": 0.4661712212534379, "grad_norm": 1.1328125, "learning_rate": 0.00018360801491628997, "loss": 2.8981, "step": 9958 }, { "epoch": 0.4662180349932705, "grad_norm": 1.3359375, "learning_rate": 0.00018360481237189778, "loss": 3.0631, "step": 9959 }, { "epoch": 0.46626484873310314, "grad_norm": 1.6484375, "learning_rate": 0.000183601609542627, "loss": 3.0352, "step": 9960 }, { "epoch": 0.4663116624729358, "grad_norm": 2.375, "learning_rate": 0.0001835984064284886, "loss": 2.5686, "step": 9961 }, { "epoch": 0.46635847621276844, "grad_norm": 2.484375, "learning_rate": 0.00018359520302949344, "loss": 3.1213, "step": 9962 }, { "epoch": 0.4664052899526011, "grad_norm": 1.5390625, "learning_rate": 0.00018359199934565247, "loss": 3.2384, "step": 9963 }, { "epoch": 0.46645210369243373, "grad_norm": 1.8671875, "learning_rate": 0.00018358879537697657, "loss": 3.1655, "step": 9964 }, { "epoch": 0.46649891743226635, "grad_norm": 1.1015625, "learning_rate": 0.00018358559112347672, "loss": 2.6637, "step": 9965 }, { "epoch": 0.466545731172099, "grad_norm": 1.046875, "learning_rate": 0.00018358238658516376, "loss": 2.5218, "step": 9966 }, { "epoch": 0.46659254491193164, "grad_norm": 1.3515625, "learning_rate": 0.0001835791817620487, "loss": 3.1161, "step": 9967 }, { "epoch": 0.4666393586517643, "grad_norm": 1.8203125, "learning_rate": 0.00018357597665414236, "loss": 3.0042, "step": 9968 }, { "epoch": 0.46668617239159693, "grad_norm": 1.4609375, "learning_rate": 0.0001835727712614557, "loss": 2.8445, "step": 9969 }, { "epoch": 0.46673298613142955, "grad_norm": 1.34375, "learning_rate": 0.00018356956558399968, "loss": 3.0485, "step": 9970 }, { "epoch": 0.4667797998712622, "grad_norm": 1.25, "learning_rate": 0.00018356635962178523, "loss": 2.8238, "step": 9971 }, { "epoch": 0.46682661361109484, "grad_norm": 1.3515625, "learning_rate": 0.0001835631533748232, "loss": 2.7412, "step": 9972 }, { "epoch": 0.4668734273509275, "grad_norm": 1.109375, "learning_rate": 0.00018355994684312457, "loss": 2.3681, "step": 9973 }, { "epoch": 0.46692024109076014, "grad_norm": 1.5234375, "learning_rate": 0.00018355674002670024, "loss": 2.8986, "step": 9974 }, { "epoch": 0.46696705483059275, "grad_norm": 1.0703125, "learning_rate": 0.00018355353292556114, "loss": 2.2562, "step": 9975 }, { "epoch": 0.46701386857042543, "grad_norm": 1.1953125, "learning_rate": 0.00018355032553971822, "loss": 3.1341, "step": 9976 }, { "epoch": 0.46706068231025805, "grad_norm": 1.265625, "learning_rate": 0.0001835471178691824, "loss": 3.0098, "step": 9977 }, { "epoch": 0.4671074960500907, "grad_norm": 1.2578125, "learning_rate": 0.00018354390991396458, "loss": 3.0533, "step": 9978 }, { "epoch": 0.46715430978992334, "grad_norm": 1.515625, "learning_rate": 0.00018354070167407574, "loss": 2.5504, "step": 9979 }, { "epoch": 0.46720112352975596, "grad_norm": 1.6796875, "learning_rate": 0.0001835374931495268, "loss": 2.7548, "step": 9980 }, { "epoch": 0.46724793726958863, "grad_norm": 3.453125, "learning_rate": 0.00018353428434032866, "loss": 3.1497, "step": 9981 }, { "epoch": 0.46729475100942125, "grad_norm": 1.0703125, "learning_rate": 0.00018353107524649227, "loss": 2.6415, "step": 9982 }, { "epoch": 0.4673415647492539, "grad_norm": 1.421875, "learning_rate": 0.00018352786586802857, "loss": 2.702, "step": 9983 }, { "epoch": 0.46738837848908654, "grad_norm": 1.1953125, "learning_rate": 0.0001835246562049485, "loss": 2.8574, "step": 9984 }, { "epoch": 0.46743519222891916, "grad_norm": 1.328125, "learning_rate": 0.000183521446257263, "loss": 2.1253, "step": 9985 }, { "epoch": 0.46748200596875183, "grad_norm": 1.484375, "learning_rate": 0.000183518236024983, "loss": 3.0257, "step": 9986 }, { "epoch": 0.46752881970858445, "grad_norm": 1.5234375, "learning_rate": 0.00018351502550811944, "loss": 3.4571, "step": 9987 }, { "epoch": 0.4675756334484171, "grad_norm": 1.25, "learning_rate": 0.00018351181470668325, "loss": 2.7351, "step": 9988 }, { "epoch": 0.46762244718824975, "grad_norm": 1.5703125, "learning_rate": 0.00018350860362068537, "loss": 3.1423, "step": 9989 }, { "epoch": 0.4676692609280824, "grad_norm": 1.3984375, "learning_rate": 0.00018350539225013676, "loss": 2.9892, "step": 9990 }, { "epoch": 0.46771607466791504, "grad_norm": 1.546875, "learning_rate": 0.00018350218059504835, "loss": 2.9209, "step": 9991 }, { "epoch": 0.46776288840774766, "grad_norm": 1.71875, "learning_rate": 0.00018349896865543108, "loss": 3.0798, "step": 9992 }, { "epoch": 0.46780970214758033, "grad_norm": 1.53125, "learning_rate": 0.00018349575643129593, "loss": 3.182, "step": 9993 }, { "epoch": 0.46785651588741295, "grad_norm": 1.1953125, "learning_rate": 0.00018349254392265377, "loss": 2.8255, "step": 9994 }, { "epoch": 0.4679033296272456, "grad_norm": 1.265625, "learning_rate": 0.0001834893311295156, "loss": 3.0831, "step": 9995 }, { "epoch": 0.46795014336707824, "grad_norm": 1.2265625, "learning_rate": 0.0001834861180518924, "loss": 2.9512, "step": 9996 }, { "epoch": 0.46799695710691086, "grad_norm": 1.296875, "learning_rate": 0.00018348290468979506, "loss": 2.9524, "step": 9997 }, { "epoch": 0.46804377084674353, "grad_norm": 1.4921875, "learning_rate": 0.00018347969104323455, "loss": 3.3939, "step": 9998 }, { "epoch": 0.46809058458657615, "grad_norm": 1.2265625, "learning_rate": 0.0001834764771122218, "loss": 3.3942, "step": 9999 }, { "epoch": 0.4681373983264088, "grad_norm": 1.4765625, "learning_rate": 0.00018347326289676777, "loss": 3.0696, "step": 10000 }, { "epoch": 0.46818421206624145, "grad_norm": 1.4765625, "learning_rate": 0.00018347004839688346, "loss": 2.8993, "step": 10001 }, { "epoch": 0.46823102580607406, "grad_norm": 1.515625, "learning_rate": 0.00018346683361257976, "loss": 2.8072, "step": 10002 }, { "epoch": 0.46827783954590674, "grad_norm": 2.859375, "learning_rate": 0.00018346361854386766, "loss": 3.9444, "step": 10003 }, { "epoch": 0.46832465328573936, "grad_norm": 1.5703125, "learning_rate": 0.0001834604031907581, "loss": 3.1935, "step": 10004 }, { "epoch": 0.46837146702557203, "grad_norm": 1.296875, "learning_rate": 0.00018345718755326205, "loss": 2.8228, "step": 10005 }, { "epoch": 0.46841828076540465, "grad_norm": 1.203125, "learning_rate": 0.00018345397163139048, "loss": 4.2474, "step": 10006 }, { "epoch": 0.46846509450523727, "grad_norm": 1.3203125, "learning_rate": 0.00018345075542515428, "loss": 2.5248, "step": 10007 }, { "epoch": 0.46851190824506994, "grad_norm": 1.8671875, "learning_rate": 0.00018344753893456445, "loss": 3.4174, "step": 10008 }, { "epoch": 0.46855872198490256, "grad_norm": 2.546875, "learning_rate": 0.00018344432215963198, "loss": 3.0821, "step": 10009 }, { "epoch": 0.46860553572473523, "grad_norm": 1.265625, "learning_rate": 0.00018344110510036782, "loss": 2.8656, "step": 10010 }, { "epoch": 0.46865234946456785, "grad_norm": 1.5234375, "learning_rate": 0.00018343788775678288, "loss": 2.9347, "step": 10011 }, { "epoch": 0.46869916320440047, "grad_norm": 1.3359375, "learning_rate": 0.00018343467012888818, "loss": 3.082, "step": 10012 }, { "epoch": 0.46874597694423314, "grad_norm": 1.765625, "learning_rate": 0.00018343145221669463, "loss": 2.8833, "step": 10013 }, { "epoch": 0.46879279068406576, "grad_norm": 1.6953125, "learning_rate": 0.00018342823402021328, "loss": 2.8835, "step": 10014 }, { "epoch": 0.46883960442389844, "grad_norm": 1.7265625, "learning_rate": 0.000183425015539455, "loss": 3.3004, "step": 10015 }, { "epoch": 0.46888641816373106, "grad_norm": 1.484375, "learning_rate": 0.00018342179677443085, "loss": 2.9821, "step": 10016 }, { "epoch": 0.4689332319035637, "grad_norm": 1.4921875, "learning_rate": 0.0001834185777251517, "loss": 3.0038, "step": 10017 }, { "epoch": 0.46898004564339635, "grad_norm": 1.2265625, "learning_rate": 0.00018341535839162864, "loss": 2.6151, "step": 10018 }, { "epoch": 0.46902685938322897, "grad_norm": 1.2890625, "learning_rate": 0.0001834121387738725, "loss": 2.769, "step": 10019 }, { "epoch": 0.46907367312306164, "grad_norm": 1.546875, "learning_rate": 0.00018340891887189435, "loss": 2.8817, "step": 10020 }, { "epoch": 0.46912048686289426, "grad_norm": 1.390625, "learning_rate": 0.0001834056986857051, "loss": 3.3501, "step": 10021 }, { "epoch": 0.4691673006027269, "grad_norm": 1.5390625, "learning_rate": 0.0001834024782153158, "loss": 3.0329, "step": 10022 }, { "epoch": 0.46921411434255955, "grad_norm": 1.78125, "learning_rate": 0.00018339925746073735, "loss": 3.3781, "step": 10023 }, { "epoch": 0.46926092808239217, "grad_norm": 1.46875, "learning_rate": 0.00018339603642198075, "loss": 2.8568, "step": 10024 }, { "epoch": 0.46930774182222484, "grad_norm": 1.6484375, "learning_rate": 0.00018339281509905696, "loss": 3.4564, "step": 10025 }, { "epoch": 0.46935455556205746, "grad_norm": 1.2109375, "learning_rate": 0.00018338959349197703, "loss": 3.231, "step": 10026 }, { "epoch": 0.4694013693018901, "grad_norm": 1.0703125, "learning_rate": 0.00018338637160075183, "loss": 2.3486, "step": 10027 }, { "epoch": 0.46944818304172276, "grad_norm": 1.390625, "learning_rate": 0.00018338314942539238, "loss": 2.7892, "step": 10028 }, { "epoch": 0.4694949967815554, "grad_norm": 1.5703125, "learning_rate": 0.0001833799269659097, "loss": 3.6758, "step": 10029 }, { "epoch": 0.46954181052138805, "grad_norm": 1.5703125, "learning_rate": 0.00018337670422231473, "loss": 2.8883, "step": 10030 }, { "epoch": 0.46958862426122067, "grad_norm": 1.2734375, "learning_rate": 0.00018337348119461843, "loss": 3.0034, "step": 10031 }, { "epoch": 0.4696354380010533, "grad_norm": 1.5703125, "learning_rate": 0.00018337025788283184, "loss": 2.7322, "step": 10032 }, { "epoch": 0.46968225174088596, "grad_norm": 1.125, "learning_rate": 0.00018336703428696593, "loss": 4.4745, "step": 10033 }, { "epoch": 0.4697290654807186, "grad_norm": 1.3515625, "learning_rate": 0.00018336381040703166, "loss": 3.2505, "step": 10034 }, { "epoch": 0.46977587922055125, "grad_norm": 1.2890625, "learning_rate": 0.00018336058624304, "loss": 3.081, "step": 10035 }, { "epoch": 0.46982269296038387, "grad_norm": 1.375, "learning_rate": 0.000183357361795002, "loss": 3.2252, "step": 10036 }, { "epoch": 0.4698695067002165, "grad_norm": 1.234375, "learning_rate": 0.0001833541370629286, "loss": 2.98, "step": 10037 }, { "epoch": 0.46991632044004916, "grad_norm": 1.9765625, "learning_rate": 0.00018335091204683076, "loss": 3.7699, "step": 10038 }, { "epoch": 0.4699631341798818, "grad_norm": 1.5390625, "learning_rate": 0.00018334768674671951, "loss": 2.9181, "step": 10039 }, { "epoch": 0.47000994791971445, "grad_norm": 1.046875, "learning_rate": 0.00018334446116260588, "loss": 2.3617, "step": 10040 }, { "epoch": 0.4700567616595471, "grad_norm": 1.7578125, "learning_rate": 0.00018334123529450077, "loss": 2.9855, "step": 10041 }, { "epoch": 0.4701035753993797, "grad_norm": 1.3828125, "learning_rate": 0.00018333800914241524, "loss": 2.6958, "step": 10042 }, { "epoch": 0.47015038913921237, "grad_norm": 2.5, "learning_rate": 0.00018333478270636029, "loss": 3.6435, "step": 10043 }, { "epoch": 0.470197202879045, "grad_norm": 1.2578125, "learning_rate": 0.00018333155598634684, "loss": 2.7993, "step": 10044 }, { "epoch": 0.47024401661887766, "grad_norm": 2.25, "learning_rate": 0.00018332832898238595, "loss": 3.0177, "step": 10045 }, { "epoch": 0.4702908303587103, "grad_norm": 1.4140625, "learning_rate": 0.00018332510169448858, "loss": 3.1869, "step": 10046 }, { "epoch": 0.4703376440985429, "grad_norm": 1.375, "learning_rate": 0.00018332187412266576, "loss": 3.1905, "step": 10047 }, { "epoch": 0.47038445783837557, "grad_norm": 1.515625, "learning_rate": 0.00018331864626692845, "loss": 2.4827, "step": 10048 }, { "epoch": 0.4704312715782082, "grad_norm": 1.3671875, "learning_rate": 0.00018331541812728768, "loss": 3.1297, "step": 10049 }, { "epoch": 0.47047808531804086, "grad_norm": 1.2890625, "learning_rate": 0.00018331218970375442, "loss": 2.9189, "step": 10050 }, { "epoch": 0.4705248990578735, "grad_norm": 1.234375, "learning_rate": 0.00018330896099633972, "loss": 2.784, "step": 10051 }, { "epoch": 0.47057171279770615, "grad_norm": 2.828125, "learning_rate": 0.00018330573200505453, "loss": 3.116, "step": 10052 }, { "epoch": 0.4706185265375388, "grad_norm": 1.078125, "learning_rate": 0.00018330250272990988, "loss": 3.013, "step": 10053 }, { "epoch": 0.4706653402773714, "grad_norm": 1.1953125, "learning_rate": 0.00018329927317091676, "loss": 3.2904, "step": 10054 }, { "epoch": 0.47071215401720407, "grad_norm": 2.0, "learning_rate": 0.0001832960433280862, "loss": 2.6266, "step": 10055 }, { "epoch": 0.4707589677570367, "grad_norm": 1.5234375, "learning_rate": 0.0001832928132014292, "loss": 2.6536, "step": 10056 }, { "epoch": 0.47080578149686936, "grad_norm": 1.4609375, "learning_rate": 0.00018328958279095674, "loss": 3.0805, "step": 10057 }, { "epoch": 0.470852595236702, "grad_norm": 1.3359375, "learning_rate": 0.0001832863520966798, "loss": 2.9106, "step": 10058 }, { "epoch": 0.4708994089765346, "grad_norm": 1.5546875, "learning_rate": 0.00018328312111860949, "loss": 2.9283, "step": 10059 }, { "epoch": 0.47094622271636727, "grad_norm": 1.5078125, "learning_rate": 0.0001832798898567567, "loss": 2.9856, "step": 10060 }, { "epoch": 0.4709930364561999, "grad_norm": 0.93359375, "learning_rate": 0.00018327665831113253, "loss": 3.3736, "step": 10061 }, { "epoch": 0.47103985019603256, "grad_norm": 1.3125, "learning_rate": 0.000183273426481748, "loss": 2.6467, "step": 10062 }, { "epoch": 0.4710866639358652, "grad_norm": 1.9140625, "learning_rate": 0.000183270194368614, "loss": 3.3188, "step": 10063 }, { "epoch": 0.4711334776756978, "grad_norm": 1.359375, "learning_rate": 0.0001832669619717417, "loss": 2.806, "step": 10064 }, { "epoch": 0.47118029141553047, "grad_norm": 1.2734375, "learning_rate": 0.00018326372929114199, "loss": 2.9314, "step": 10065 }, { "epoch": 0.4712271051553631, "grad_norm": 1.7421875, "learning_rate": 0.00018326049632682597, "loss": 3.1381, "step": 10066 }, { "epoch": 0.47127391889519576, "grad_norm": 1.2109375, "learning_rate": 0.0001832572630788046, "loss": 2.7985, "step": 10067 }, { "epoch": 0.4713207326350284, "grad_norm": 1.3671875, "learning_rate": 0.00018325402954708891, "loss": 3.1925, "step": 10068 }, { "epoch": 0.471367546374861, "grad_norm": 1.3515625, "learning_rate": 0.00018325079573168994, "loss": 3.1244, "step": 10069 }, { "epoch": 0.4714143601146937, "grad_norm": 1.3984375, "learning_rate": 0.0001832475616326187, "loss": 3.0313, "step": 10070 }, { "epoch": 0.4714611738545263, "grad_norm": 1.1875, "learning_rate": 0.0001832443272498862, "loss": 3.8747, "step": 10071 }, { "epoch": 0.47150798759435897, "grad_norm": 1.90625, "learning_rate": 0.00018324109258350348, "loss": 2.6429, "step": 10072 }, { "epoch": 0.4715548013341916, "grad_norm": 1.8046875, "learning_rate": 0.00018323785763348153, "loss": 3.0154, "step": 10073 }, { "epoch": 0.4716016150740242, "grad_norm": 1.4453125, "learning_rate": 0.0001832346223998314, "loss": 3.0707, "step": 10074 }, { "epoch": 0.4716484288138569, "grad_norm": 1.6875, "learning_rate": 0.0001832313868825641, "loss": 3.152, "step": 10075 }, { "epoch": 0.4716952425536895, "grad_norm": 1.6875, "learning_rate": 0.00018322815108169066, "loss": 3.0612, "step": 10076 }, { "epoch": 0.47174205629352217, "grad_norm": 1.1953125, "learning_rate": 0.00018322491499722215, "loss": 2.8237, "step": 10077 }, { "epoch": 0.4717888700333548, "grad_norm": 1.71875, "learning_rate": 0.00018322167862916947, "loss": 2.6762, "step": 10078 }, { "epoch": 0.4718356837731874, "grad_norm": 1.28125, "learning_rate": 0.00018321844197754378, "loss": 3.2283, "step": 10079 }, { "epoch": 0.4718824975130201, "grad_norm": 1.3515625, "learning_rate": 0.0001832152050423561, "loss": 2.9179, "step": 10080 }, { "epoch": 0.4719293112528527, "grad_norm": 1.3359375, "learning_rate": 0.00018321196782361735, "loss": 3.1137, "step": 10081 }, { "epoch": 0.4719761249926854, "grad_norm": 1.515625, "learning_rate": 0.00018320873032133864, "loss": 2.7369, "step": 10082 }, { "epoch": 0.472022938732518, "grad_norm": 1.3359375, "learning_rate": 0.000183205492535531, "loss": 2.7816, "step": 10083 }, { "epoch": 0.4720697524723506, "grad_norm": 1.421875, "learning_rate": 0.00018320225446620544, "loss": 3.0514, "step": 10084 }, { "epoch": 0.4721165662121833, "grad_norm": 1.2421875, "learning_rate": 0.00018319901611337305, "loss": 4.7512, "step": 10085 }, { "epoch": 0.4721633799520159, "grad_norm": 1.7421875, "learning_rate": 0.0001831957774770448, "loss": 2.9085, "step": 10086 }, { "epoch": 0.4722101936918486, "grad_norm": 1.375, "learning_rate": 0.00018319253855723173, "loss": 2.9886, "step": 10087 }, { "epoch": 0.4722570074316812, "grad_norm": 1.3203125, "learning_rate": 0.00018318929935394492, "loss": 2.9086, "step": 10088 }, { "epoch": 0.4723038211715138, "grad_norm": 1.46875, "learning_rate": 0.00018318605986719538, "loss": 3.0877, "step": 10089 }, { "epoch": 0.4723506349113465, "grad_norm": 1.0859375, "learning_rate": 0.0001831828200969941, "loss": 2.74, "step": 10090 }, { "epoch": 0.4723974486511791, "grad_norm": 1.1015625, "learning_rate": 0.00018317958004335222, "loss": 2.1822, "step": 10091 }, { "epoch": 0.4724442623910118, "grad_norm": 1.5625, "learning_rate": 0.00018317633970628072, "loss": 3.0309, "step": 10092 }, { "epoch": 0.4724910761308444, "grad_norm": 1.3203125, "learning_rate": 0.00018317309908579064, "loss": 2.9646, "step": 10093 }, { "epoch": 0.472537889870677, "grad_norm": 2.03125, "learning_rate": 0.00018316985818189302, "loss": 3.02, "step": 10094 }, { "epoch": 0.4725847036105097, "grad_norm": 2.828125, "learning_rate": 0.00018316661699459895, "loss": 2.8504, "step": 10095 }, { "epoch": 0.4726315173503423, "grad_norm": 1.546875, "learning_rate": 0.0001831633755239194, "loss": 3.2896, "step": 10096 }, { "epoch": 0.472678331090175, "grad_norm": 1.21875, "learning_rate": 0.0001831601337698655, "loss": 3.0307, "step": 10097 }, { "epoch": 0.4727251448300076, "grad_norm": 1.6328125, "learning_rate": 0.0001831568917324482, "loss": 3.129, "step": 10098 }, { "epoch": 0.4727719585698402, "grad_norm": 1.2734375, "learning_rate": 0.00018315364941167865, "loss": 2.6908, "step": 10099 }, { "epoch": 0.4728187723096729, "grad_norm": 1.4375, "learning_rate": 0.00018315040680756784, "loss": 3.1713, "step": 10100 }, { "epoch": 0.4728655860495055, "grad_norm": 1.1953125, "learning_rate": 0.0001831471639201268, "loss": 2.7303, "step": 10101 }, { "epoch": 0.4729123997893382, "grad_norm": 1.2734375, "learning_rate": 0.0001831439207493666, "loss": 2.6181, "step": 10102 }, { "epoch": 0.4729592135291708, "grad_norm": 7.90625, "learning_rate": 0.0001831406772952983, "loss": 3.5545, "step": 10103 }, { "epoch": 0.4730060272690034, "grad_norm": 1.234375, "learning_rate": 0.00018313743355793295, "loss": 2.9456, "step": 10104 }, { "epoch": 0.4730528410088361, "grad_norm": 1.546875, "learning_rate": 0.00018313418953728162, "loss": 2.8316, "step": 10105 }, { "epoch": 0.4730996547486687, "grad_norm": 1.046875, "learning_rate": 0.00018313094523335534, "loss": 2.8792, "step": 10106 }, { "epoch": 0.4731464684885014, "grad_norm": 1.4453125, "learning_rate": 0.0001831277006461652, "loss": 3.1287, "step": 10107 }, { "epoch": 0.473193282228334, "grad_norm": 1.359375, "learning_rate": 0.00018312445577572215, "loss": 3.2976, "step": 10108 }, { "epoch": 0.47324009596816663, "grad_norm": 1.421875, "learning_rate": 0.00018312121062203735, "loss": 3.0428, "step": 10109 }, { "epoch": 0.4732869097079993, "grad_norm": 1.484375, "learning_rate": 0.00018311796518512185, "loss": 2.6292, "step": 10110 }, { "epoch": 0.4733337234478319, "grad_norm": 1.5859375, "learning_rate": 0.00018311471946498667, "loss": 2.9464, "step": 10111 }, { "epoch": 0.4733805371876646, "grad_norm": 1.5546875, "learning_rate": 0.0001831114734616429, "loss": 2.9644, "step": 10112 }, { "epoch": 0.4734273509274972, "grad_norm": 1.40625, "learning_rate": 0.0001831082271751016, "loss": 2.6742, "step": 10113 }, { "epoch": 0.4734741646673299, "grad_norm": 1.2109375, "learning_rate": 0.00018310498060537377, "loss": 3.1193, "step": 10114 }, { "epoch": 0.4735209784071625, "grad_norm": 1.75, "learning_rate": 0.00018310173375247056, "loss": 3.0719, "step": 10115 }, { "epoch": 0.4735677921469951, "grad_norm": 1.296875, "learning_rate": 0.000183098486616403, "loss": 2.6519, "step": 10116 }, { "epoch": 0.4736146058868278, "grad_norm": 1.1640625, "learning_rate": 0.00018309523919718212, "loss": 3.432, "step": 10117 }, { "epoch": 0.4736614196266604, "grad_norm": 1.4375, "learning_rate": 0.00018309199149481903, "loss": 2.8111, "step": 10118 }, { "epoch": 0.4737082333664931, "grad_norm": 1.78125, "learning_rate": 0.0001830887435093248, "loss": 3.0194, "step": 10119 }, { "epoch": 0.4737550471063257, "grad_norm": 1.7578125, "learning_rate": 0.0001830854952407105, "loss": 3.2286, "step": 10120 }, { "epoch": 0.47380186084615833, "grad_norm": 1.5703125, "learning_rate": 0.00018308224668898713, "loss": 2.885, "step": 10121 }, { "epoch": 0.473848674585991, "grad_norm": 1.1484375, "learning_rate": 0.0001830789978541658, "loss": 3.0813, "step": 10122 }, { "epoch": 0.4738954883258236, "grad_norm": 1.4375, "learning_rate": 0.0001830757487362576, "loss": 2.9879, "step": 10123 }, { "epoch": 0.4739423020656563, "grad_norm": 1.8515625, "learning_rate": 0.00018307249933527362, "loss": 3.3889, "step": 10124 }, { "epoch": 0.4739891158054889, "grad_norm": 1.3046875, "learning_rate": 0.00018306924965122488, "loss": 3.2435, "step": 10125 }, { "epoch": 0.47403592954532153, "grad_norm": 1.515625, "learning_rate": 0.00018306599968412245, "loss": 2.8866, "step": 10126 }, { "epoch": 0.4740827432851542, "grad_norm": 1.3125, "learning_rate": 0.00018306274943397744, "loss": 3.3961, "step": 10127 }, { "epoch": 0.4741295570249868, "grad_norm": 1.2421875, "learning_rate": 0.00018305949890080094, "loss": 2.9205, "step": 10128 }, { "epoch": 0.4741763707648195, "grad_norm": 1.4453125, "learning_rate": 0.00018305624808460398, "loss": 2.8595, "step": 10129 }, { "epoch": 0.4742231845046521, "grad_norm": 1.28125, "learning_rate": 0.00018305299698539765, "loss": 2.8548, "step": 10130 }, { "epoch": 0.47426999824448474, "grad_norm": 1.1953125, "learning_rate": 0.00018304974560319303, "loss": 2.9683, "step": 10131 }, { "epoch": 0.4743168119843174, "grad_norm": 1.2265625, "learning_rate": 0.00018304649393800123, "loss": 2.9543, "step": 10132 }, { "epoch": 0.47436362572415003, "grad_norm": 1.25, "learning_rate": 0.00018304324198983326, "loss": 2.9579, "step": 10133 }, { "epoch": 0.4744104394639827, "grad_norm": 1.0546875, "learning_rate": 0.00018303998975870028, "loss": 2.8105, "step": 10134 }, { "epoch": 0.4744572532038153, "grad_norm": 1.234375, "learning_rate": 0.0001830367372446133, "loss": 3.1641, "step": 10135 }, { "epoch": 0.47450406694364794, "grad_norm": 1.4375, "learning_rate": 0.00018303348444758348, "loss": 2.8598, "step": 10136 }, { "epoch": 0.4745508806834806, "grad_norm": 1.328125, "learning_rate": 0.00018303023136762183, "loss": 2.6845, "step": 10137 }, { "epoch": 0.47459769442331323, "grad_norm": 1.765625, "learning_rate": 0.00018302697800473948, "loss": 3.2779, "step": 10138 }, { "epoch": 0.4746445081631459, "grad_norm": 1.2578125, "learning_rate": 0.00018302372435894748, "loss": 2.9262, "step": 10139 }, { "epoch": 0.4746913219029785, "grad_norm": 1.453125, "learning_rate": 0.00018302047043025697, "loss": 2.7228, "step": 10140 }, { "epoch": 0.47473813564281114, "grad_norm": 1.3203125, "learning_rate": 0.000183017216218679, "loss": 2.9208, "step": 10141 }, { "epoch": 0.4747849493826438, "grad_norm": 1.3359375, "learning_rate": 0.00018301396172422464, "loss": 3.0545, "step": 10142 }, { "epoch": 0.47483176312247644, "grad_norm": 1.59375, "learning_rate": 0.00018301070694690504, "loss": 2.4062, "step": 10143 }, { "epoch": 0.4748785768623091, "grad_norm": 1.2265625, "learning_rate": 0.00018300745188673122, "loss": 3.1218, "step": 10144 }, { "epoch": 0.47492539060214173, "grad_norm": 1.4453125, "learning_rate": 0.0001830041965437143, "loss": 2.7852, "step": 10145 }, { "epoch": 0.47497220434197435, "grad_norm": 1.15625, "learning_rate": 0.0001830009409178654, "loss": 3.1013, "step": 10146 }, { "epoch": 0.475019018081807, "grad_norm": 1.2890625, "learning_rate": 0.0001829976850091956, "loss": 3.125, "step": 10147 }, { "epoch": 0.47506583182163964, "grad_norm": 1.0703125, "learning_rate": 0.00018299442881771597, "loss": 2.9098, "step": 10148 }, { "epoch": 0.4751126455614723, "grad_norm": 1.5859375, "learning_rate": 0.00018299117234343764, "loss": 3.0195, "step": 10149 }, { "epoch": 0.47515945930130493, "grad_norm": 1.3671875, "learning_rate": 0.00018298791558637168, "loss": 2.634, "step": 10150 }, { "epoch": 0.47520627304113755, "grad_norm": 1.5859375, "learning_rate": 0.00018298465854652918, "loss": 3.2058, "step": 10151 }, { "epoch": 0.4752530867809702, "grad_norm": 1.203125, "learning_rate": 0.00018298140122392125, "loss": 2.7552, "step": 10152 }, { "epoch": 0.47529990052080284, "grad_norm": 1.265625, "learning_rate": 0.000182978143618559, "loss": 3.2007, "step": 10153 }, { "epoch": 0.4753467142606355, "grad_norm": 1.421875, "learning_rate": 0.00018297488573045353, "loss": 3.1634, "step": 10154 }, { "epoch": 0.47539352800046814, "grad_norm": 1.4609375, "learning_rate": 0.00018297162755961593, "loss": 3.0558, "step": 10155 }, { "epoch": 0.47544034174030075, "grad_norm": 1.7734375, "learning_rate": 0.0001829683691060573, "loss": 2.5988, "step": 10156 }, { "epoch": 0.4754871554801334, "grad_norm": 1.734375, "learning_rate": 0.00018296511036978874, "loss": 3.1906, "step": 10157 }, { "epoch": 0.47553396921996605, "grad_norm": 1.53125, "learning_rate": 0.00018296185135082137, "loss": 2.3044, "step": 10158 }, { "epoch": 0.4755807829597987, "grad_norm": 1.5234375, "learning_rate": 0.0001829585920491663, "loss": 2.673, "step": 10159 }, { "epoch": 0.47562759669963134, "grad_norm": 1.5546875, "learning_rate": 0.0001829553324648346, "loss": 2.8987, "step": 10160 }, { "epoch": 0.47567441043946396, "grad_norm": 1.2734375, "learning_rate": 0.00018295207259783742, "loss": 3.1077, "step": 10161 }, { "epoch": 0.47572122417929663, "grad_norm": 1.6953125, "learning_rate": 0.00018294881244818585, "loss": 3.0173, "step": 10162 }, { "epoch": 0.47576803791912925, "grad_norm": 1.4140625, "learning_rate": 0.00018294555201589096, "loss": 3.3543, "step": 10163 }, { "epoch": 0.4758148516589619, "grad_norm": 1.4140625, "learning_rate": 0.00018294229130096394, "loss": 3.0668, "step": 10164 }, { "epoch": 0.47586166539879454, "grad_norm": 1.4921875, "learning_rate": 0.00018293903030341583, "loss": 2.8083, "step": 10165 }, { "epoch": 0.47590847913862716, "grad_norm": 1.0859375, "learning_rate": 0.00018293576902325776, "loss": 3.2203, "step": 10166 }, { "epoch": 0.47595529287845983, "grad_norm": 1.390625, "learning_rate": 0.00018293250746050088, "loss": 2.9833, "step": 10167 }, { "epoch": 0.47600210661829245, "grad_norm": 1.4921875, "learning_rate": 0.00018292924561515628, "loss": 2.8073, "step": 10168 }, { "epoch": 0.4760489203581251, "grad_norm": 1.375, "learning_rate": 0.00018292598348723505, "loss": 2.8229, "step": 10169 }, { "epoch": 0.47609573409795775, "grad_norm": 1.0703125, "learning_rate": 0.0001829227210767483, "loss": 2.75, "step": 10170 }, { "epoch": 0.47614254783779036, "grad_norm": 1.1328125, "learning_rate": 0.00018291945838370724, "loss": 2.8509, "step": 10171 }, { "epoch": 0.47618936157762304, "grad_norm": 1.9296875, "learning_rate": 0.00018291619540812282, "loss": 3.2542, "step": 10172 }, { "epoch": 0.47623617531745566, "grad_norm": 1.6953125, "learning_rate": 0.00018291293215000635, "loss": 3.5007, "step": 10173 }, { "epoch": 0.47628298905728833, "grad_norm": 1.6328125, "learning_rate": 0.0001829096686093688, "loss": 2.8882, "step": 10174 }, { "epoch": 0.47632980279712095, "grad_norm": 1.515625, "learning_rate": 0.00018290640478622137, "loss": 2.7247, "step": 10175 }, { "epoch": 0.4763766165369536, "grad_norm": 1.5859375, "learning_rate": 0.00018290314068057515, "loss": 2.8488, "step": 10176 }, { "epoch": 0.47642343027678624, "grad_norm": 1.3359375, "learning_rate": 0.0001828998762924413, "loss": 3.0392, "step": 10177 }, { "epoch": 0.47647024401661886, "grad_norm": 1.0625, "learning_rate": 0.00018289661162183086, "loss": 2.3819, "step": 10178 }, { "epoch": 0.47651705775645153, "grad_norm": 1.25, "learning_rate": 0.00018289334666875506, "loss": 3.1277, "step": 10179 }, { "epoch": 0.47656387149628415, "grad_norm": 1.25, "learning_rate": 0.00018289008143322492, "loss": 3.002, "step": 10180 }, { "epoch": 0.4766106852361168, "grad_norm": 1.453125, "learning_rate": 0.00018288681591525165, "loss": 2.8882, "step": 10181 }, { "epoch": 0.47665749897594945, "grad_norm": 1.1796875, "learning_rate": 0.00018288355011484634, "loss": 2.8838, "step": 10182 }, { "epoch": 0.47670431271578206, "grad_norm": 1.546875, "learning_rate": 0.00018288028403202014, "loss": 2.915, "step": 10183 }, { "epoch": 0.47675112645561474, "grad_norm": 1.171875, "learning_rate": 0.00018287701766678412, "loss": 2.6102, "step": 10184 }, { "epoch": 0.47679794019544736, "grad_norm": 1.46875, "learning_rate": 0.00018287375101914952, "loss": 3.318, "step": 10185 }, { "epoch": 0.47684475393528003, "grad_norm": 1.40625, "learning_rate": 0.00018287048408912736, "loss": 2.7122, "step": 10186 }, { "epoch": 0.47689156767511265, "grad_norm": 1.359375, "learning_rate": 0.00018286721687672884, "loss": 2.68, "step": 10187 }, { "epoch": 0.47693838141494527, "grad_norm": 1.234375, "learning_rate": 0.00018286394938196504, "loss": 2.9056, "step": 10188 }, { "epoch": 0.47698519515477794, "grad_norm": 1.6640625, "learning_rate": 0.0001828606816048471, "loss": 3.143, "step": 10189 }, { "epoch": 0.47703200889461056, "grad_norm": 1.2734375, "learning_rate": 0.00018285741354538622, "loss": 3.2663, "step": 10190 }, { "epoch": 0.47707882263444323, "grad_norm": 1.640625, "learning_rate": 0.0001828541452035935, "loss": 2.9936, "step": 10191 }, { "epoch": 0.47712563637427585, "grad_norm": 1.4921875, "learning_rate": 0.00018285087657948004, "loss": 2.8309, "step": 10192 }, { "epoch": 0.47717245011410847, "grad_norm": 1.453125, "learning_rate": 0.000182847607673057, "loss": 2.8273, "step": 10193 }, { "epoch": 0.47721926385394114, "grad_norm": 1.265625, "learning_rate": 0.00018284433848433554, "loss": 2.7928, "step": 10194 }, { "epoch": 0.47726607759377376, "grad_norm": 1.09375, "learning_rate": 0.00018284106901332678, "loss": 2.2371, "step": 10195 }, { "epoch": 0.47731289133360644, "grad_norm": 1.359375, "learning_rate": 0.0001828377992600419, "loss": 2.4608, "step": 10196 }, { "epoch": 0.47735970507343906, "grad_norm": 1.515625, "learning_rate": 0.00018283452922449197, "loss": 2.8105, "step": 10197 }, { "epoch": 0.4774065188132717, "grad_norm": 1.6640625, "learning_rate": 0.0001828312589066882, "loss": 2.8274, "step": 10198 }, { "epoch": 0.47745333255310435, "grad_norm": 1.5078125, "learning_rate": 0.00018282798830664167, "loss": 2.954, "step": 10199 }, { "epoch": 0.47750014629293697, "grad_norm": 1.1171875, "learning_rate": 0.00018282471742436357, "loss": 2.9128, "step": 10200 }, { "epoch": 0.47754696003276964, "grad_norm": 1.2578125, "learning_rate": 0.00018282144625986503, "loss": 3.1357, "step": 10201 }, { "epoch": 0.47759377377260226, "grad_norm": 1.546875, "learning_rate": 0.0001828181748131572, "loss": 3.3455, "step": 10202 }, { "epoch": 0.4776405875124349, "grad_norm": 1.3359375, "learning_rate": 0.00018281490308425124, "loss": 2.9865, "step": 10203 }, { "epoch": 0.47768740125226755, "grad_norm": 2.359375, "learning_rate": 0.00018281163107315828, "loss": 3.0058, "step": 10204 }, { "epoch": 0.47773421499210017, "grad_norm": 1.3984375, "learning_rate": 0.00018280835877988947, "loss": 3.1258, "step": 10205 }, { "epoch": 0.47778102873193284, "grad_norm": 1.9375, "learning_rate": 0.00018280508620445595, "loss": 3.0212, "step": 10206 }, { "epoch": 0.47782784247176546, "grad_norm": 1.5703125, "learning_rate": 0.0001828018133468689, "loss": 3.0812, "step": 10207 }, { "epoch": 0.4778746562115981, "grad_norm": 1.375, "learning_rate": 0.00018279854020713946, "loss": 3.2017, "step": 10208 }, { "epoch": 0.47792146995143076, "grad_norm": 1.6953125, "learning_rate": 0.00018279526678527877, "loss": 3.2731, "step": 10209 }, { "epoch": 0.4779682836912634, "grad_norm": 1.1875, "learning_rate": 0.00018279199308129801, "loss": 3.2817, "step": 10210 }, { "epoch": 0.47801509743109605, "grad_norm": 1.4375, "learning_rate": 0.00018278871909520832, "loss": 3.0694, "step": 10211 }, { "epoch": 0.47806191117092867, "grad_norm": 1.546875, "learning_rate": 0.00018278544482702085, "loss": 3.0746, "step": 10212 }, { "epoch": 0.4781087249107613, "grad_norm": 1.46875, "learning_rate": 0.00018278217027674674, "loss": 2.797, "step": 10213 }, { "epoch": 0.47815553865059396, "grad_norm": 1.640625, "learning_rate": 0.00018277889544439718, "loss": 3.2785, "step": 10214 }, { "epoch": 0.4782023523904266, "grad_norm": 2.1875, "learning_rate": 0.00018277562032998333, "loss": 3.385, "step": 10215 }, { "epoch": 0.47824916613025925, "grad_norm": 1.7109375, "learning_rate": 0.0001827723449335163, "loss": 3.0875, "step": 10216 }, { "epoch": 0.47829597987009187, "grad_norm": 1.2109375, "learning_rate": 0.00018276906925500735, "loss": 2.9409, "step": 10217 }, { "epoch": 0.4783427936099245, "grad_norm": 1.1953125, "learning_rate": 0.00018276579329446754, "loss": 2.9578, "step": 10218 }, { "epoch": 0.47838960734975716, "grad_norm": 1.2578125, "learning_rate": 0.0001827625170519081, "loss": 3.031, "step": 10219 }, { "epoch": 0.4784364210895898, "grad_norm": 1.71875, "learning_rate": 0.00018275924052734018, "loss": 2.9058, "step": 10220 }, { "epoch": 0.47848323482942245, "grad_norm": 1.3359375, "learning_rate": 0.0001827559637207749, "loss": 2.9633, "step": 10221 }, { "epoch": 0.4785300485692551, "grad_norm": 1.515625, "learning_rate": 0.0001827526866322234, "loss": 2.721, "step": 10222 }, { "epoch": 0.4785768623090877, "grad_norm": 1.3984375, "learning_rate": 0.000182749409261697, "loss": 2.9795, "step": 10223 }, { "epoch": 0.47862367604892037, "grad_norm": 1.5625, "learning_rate": 0.00018274613160920673, "loss": 2.5929, "step": 10224 }, { "epoch": 0.478670489788753, "grad_norm": 1.0859375, "learning_rate": 0.00018274285367476383, "loss": 2.6698, "step": 10225 }, { "epoch": 0.47871730352858566, "grad_norm": 1.3984375, "learning_rate": 0.0001827395754583794, "loss": 2.9349, "step": 10226 }, { "epoch": 0.4787641172684183, "grad_norm": 1.1953125, "learning_rate": 0.00018273629696006466, "loss": 2.6278, "step": 10227 }, { "epoch": 0.4788109310082509, "grad_norm": 2.703125, "learning_rate": 0.00018273301817983077, "loss": 2.1416, "step": 10228 }, { "epoch": 0.47885774474808357, "grad_norm": 1.3671875, "learning_rate": 0.00018272973911768895, "loss": 2.9605, "step": 10229 }, { "epoch": 0.4789045584879162, "grad_norm": 1.5859375, "learning_rate": 0.00018272645977365025, "loss": 2.752, "step": 10230 }, { "epoch": 0.47895137222774886, "grad_norm": 2.5625, "learning_rate": 0.00018272318014772595, "loss": 3.4511, "step": 10231 }, { "epoch": 0.4789981859675815, "grad_norm": 2.609375, "learning_rate": 0.0001827199002399272, "loss": 3.3573, "step": 10232 }, { "epoch": 0.4790449997074141, "grad_norm": 1.515625, "learning_rate": 0.00018271662005026515, "loss": 3.1334, "step": 10233 }, { "epoch": 0.4790918134472468, "grad_norm": 3.171875, "learning_rate": 0.00018271333957875103, "loss": 2.6701, "step": 10234 }, { "epoch": 0.4791386271870794, "grad_norm": 1.984375, "learning_rate": 0.00018271005882539602, "loss": 3.3612, "step": 10235 }, { "epoch": 0.47918544092691207, "grad_norm": 1.203125, "learning_rate": 0.00018270677779021118, "loss": 3.1204, "step": 10236 }, { "epoch": 0.4792322546667447, "grad_norm": 1.2734375, "learning_rate": 0.0001827034964732078, "loss": 2.8333, "step": 10237 }, { "epoch": 0.47927906840657736, "grad_norm": 1.28125, "learning_rate": 0.00018270021487439708, "loss": 3.079, "step": 10238 }, { "epoch": 0.47932588214641, "grad_norm": 1.5546875, "learning_rate": 0.00018269693299379012, "loss": 3.2709, "step": 10239 }, { "epoch": 0.4793726958862426, "grad_norm": 1.546875, "learning_rate": 0.00018269365083139815, "loss": 2.664, "step": 10240 }, { "epoch": 0.47941950962607527, "grad_norm": 1.046875, "learning_rate": 0.00018269036838723234, "loss": 2.7628, "step": 10241 }, { "epoch": 0.4794663233659079, "grad_norm": 1.2890625, "learning_rate": 0.00018268708566130388, "loss": 2.9859, "step": 10242 }, { "epoch": 0.47951313710574056, "grad_norm": 1.21875, "learning_rate": 0.00018268380265362398, "loss": 2.6679, "step": 10243 }, { "epoch": 0.4795599508455732, "grad_norm": 1.3359375, "learning_rate": 0.00018268051936420378, "loss": 3.3811, "step": 10244 }, { "epoch": 0.4796067645854058, "grad_norm": 1.484375, "learning_rate": 0.0001826772357930545, "loss": 2.4381, "step": 10245 }, { "epoch": 0.47965357832523847, "grad_norm": 2.078125, "learning_rate": 0.0001826739519401873, "loss": 3.4855, "step": 10246 }, { "epoch": 0.4797003920650711, "grad_norm": 1.3828125, "learning_rate": 0.0001826706678056134, "loss": 3.1053, "step": 10247 }, { "epoch": 0.47974720580490376, "grad_norm": 1.3046875, "learning_rate": 0.00018266738338934397, "loss": 2.7375, "step": 10248 }, { "epoch": 0.4797940195447364, "grad_norm": 1.1171875, "learning_rate": 0.0001826640986913902, "loss": 2.4405, "step": 10249 }, { "epoch": 0.479840833284569, "grad_norm": 1.453125, "learning_rate": 0.00018266081371176332, "loss": 2.9136, "step": 10250 }, { "epoch": 0.4798876470244017, "grad_norm": 1.2421875, "learning_rate": 0.00018265752845047448, "loss": 2.9967, "step": 10251 }, { "epoch": 0.4799344607642343, "grad_norm": 1.3359375, "learning_rate": 0.0001826542429075349, "loss": 3.138, "step": 10252 }, { "epoch": 0.47998127450406697, "grad_norm": 2.015625, "learning_rate": 0.00018265095708295577, "loss": 2.9779, "step": 10253 }, { "epoch": 0.4800280882438996, "grad_norm": 1.421875, "learning_rate": 0.00018264767097674826, "loss": 2.9633, "step": 10254 }, { "epoch": 0.4800749019837322, "grad_norm": 1.3671875, "learning_rate": 0.0001826443845889236, "loss": 2.9556, "step": 10255 }, { "epoch": 0.4801217157235649, "grad_norm": 1.6640625, "learning_rate": 0.00018264109791949298, "loss": 2.6736, "step": 10256 }, { "epoch": 0.4801685294633975, "grad_norm": 1.5390625, "learning_rate": 0.00018263781096846757, "loss": 2.9607, "step": 10257 }, { "epoch": 0.48021534320323017, "grad_norm": 1.4609375, "learning_rate": 0.0001826345237358586, "loss": 2.7772, "step": 10258 }, { "epoch": 0.4802621569430628, "grad_norm": 1.7109375, "learning_rate": 0.00018263123622167728, "loss": 2.9415, "step": 10259 }, { "epoch": 0.4803089706828954, "grad_norm": 1.59375, "learning_rate": 0.00018262794842593483, "loss": 2.8301, "step": 10260 }, { "epoch": 0.4803557844227281, "grad_norm": 1.0, "learning_rate": 0.00018262466034864238, "loss": 3.7099, "step": 10261 }, { "epoch": 0.4804025981625607, "grad_norm": 1.3125, "learning_rate": 0.00018262137198981119, "loss": 3.1403, "step": 10262 }, { "epoch": 0.4804494119023934, "grad_norm": 1.3203125, "learning_rate": 0.00018261808334945245, "loss": 2.9703, "step": 10263 }, { "epoch": 0.480496225642226, "grad_norm": 1.625, "learning_rate": 0.00018261479442757738, "loss": 3.3433, "step": 10264 }, { "epoch": 0.4805430393820586, "grad_norm": 1.1953125, "learning_rate": 0.00018261150522419714, "loss": 3.1189, "step": 10265 }, { "epoch": 0.4805898531218913, "grad_norm": 2.1875, "learning_rate": 0.00018260821573932298, "loss": 2.435, "step": 10266 }, { "epoch": 0.4806366668617239, "grad_norm": 1.6171875, "learning_rate": 0.0001826049259729661, "loss": 2.4034, "step": 10267 }, { "epoch": 0.4806834806015566, "grad_norm": 1.78125, "learning_rate": 0.00018260163592513773, "loss": 3.3918, "step": 10268 }, { "epoch": 0.4807302943413892, "grad_norm": 1.453125, "learning_rate": 0.00018259834559584905, "loss": 2.8756, "step": 10269 }, { "epoch": 0.4807771080812218, "grad_norm": 1.7421875, "learning_rate": 0.0001825950549851113, "loss": 2.6831, "step": 10270 }, { "epoch": 0.4808239218210545, "grad_norm": 1.59375, "learning_rate": 0.00018259176409293564, "loss": 2.9418, "step": 10271 }, { "epoch": 0.4808707355608871, "grad_norm": 1.2265625, "learning_rate": 0.00018258847291933333, "loss": 2.7545, "step": 10272 }, { "epoch": 0.4809175493007198, "grad_norm": 1.4140625, "learning_rate": 0.00018258518146431558, "loss": 2.7941, "step": 10273 }, { "epoch": 0.4809643630405524, "grad_norm": 1.6015625, "learning_rate": 0.0001825818897278936, "loss": 3.0988, "step": 10274 }, { "epoch": 0.481011176780385, "grad_norm": 1.21875, "learning_rate": 0.0001825785977100786, "loss": 2.7951, "step": 10275 }, { "epoch": 0.4810579905202177, "grad_norm": 1.7578125, "learning_rate": 0.00018257530541088176, "loss": 3.3229, "step": 10276 }, { "epoch": 0.4811048042600503, "grad_norm": 1.21875, "learning_rate": 0.00018257201283031443, "loss": 2.9634, "step": 10277 }, { "epoch": 0.481151617999883, "grad_norm": 2.328125, "learning_rate": 0.00018256871996838767, "loss": 3.1437, "step": 10278 }, { "epoch": 0.4811984317397156, "grad_norm": 1.515625, "learning_rate": 0.00018256542682511278, "loss": 3.218, "step": 10279 }, { "epoch": 0.4812452454795482, "grad_norm": 1.15625, "learning_rate": 0.00018256213340050098, "loss": 2.8825, "step": 10280 }, { "epoch": 0.4812920592193809, "grad_norm": 1.265625, "learning_rate": 0.00018255883969456348, "loss": 2.6508, "step": 10281 }, { "epoch": 0.4813388729592135, "grad_norm": 1.0703125, "learning_rate": 0.0001825555457073115, "loss": 2.5058, "step": 10282 }, { "epoch": 0.4813856866990462, "grad_norm": 1.1328125, "learning_rate": 0.00018255225143875626, "loss": 2.7185, "step": 10283 }, { "epoch": 0.4814325004388788, "grad_norm": 1.5, "learning_rate": 0.00018254895688890903, "loss": 2.6256, "step": 10284 }, { "epoch": 0.4814793141787114, "grad_norm": 1.234375, "learning_rate": 0.000182545662057781, "loss": 2.6076, "step": 10285 }, { "epoch": 0.4815261279185441, "grad_norm": 1.703125, "learning_rate": 0.00018254236694538337, "loss": 3.2189, "step": 10286 }, { "epoch": 0.4815729416583767, "grad_norm": 1.3046875, "learning_rate": 0.00018253907155172742, "loss": 2.8635, "step": 10287 }, { "epoch": 0.4816197553982094, "grad_norm": 1.4453125, "learning_rate": 0.00018253577587682432, "loss": 2.6416, "step": 10288 }, { "epoch": 0.481666569138042, "grad_norm": 1.4296875, "learning_rate": 0.00018253247992068537, "loss": 3.1711, "step": 10289 }, { "epoch": 0.48171338287787463, "grad_norm": 1.375, "learning_rate": 0.00018252918368332174, "loss": 2.7628, "step": 10290 }, { "epoch": 0.4817601966177073, "grad_norm": 1.6171875, "learning_rate": 0.0001825258871647447, "loss": 2.9974, "step": 10291 }, { "epoch": 0.4818070103575399, "grad_norm": 1.1015625, "learning_rate": 0.00018252259036496545, "loss": 2.6585, "step": 10292 }, { "epoch": 0.4818538240973726, "grad_norm": 1.1640625, "learning_rate": 0.00018251929328399528, "loss": 2.5887, "step": 10293 }, { "epoch": 0.4819006378372052, "grad_norm": 1.140625, "learning_rate": 0.00018251599592184536, "loss": 3.7748, "step": 10294 }, { "epoch": 0.48194745157703783, "grad_norm": 1.03125, "learning_rate": 0.00018251269827852698, "loss": 3.0073, "step": 10295 }, { "epoch": 0.4819942653168705, "grad_norm": 1.0546875, "learning_rate": 0.00018250940035405134, "loss": 2.0378, "step": 10296 }, { "epoch": 0.4820410790567031, "grad_norm": 1.84375, "learning_rate": 0.00018250610214842964, "loss": 2.614, "step": 10297 }, { "epoch": 0.4820878927965358, "grad_norm": 2.46875, "learning_rate": 0.00018250280366167323, "loss": 3.3329, "step": 10298 }, { "epoch": 0.4821347065363684, "grad_norm": 1.3984375, "learning_rate": 0.00018249950489379325, "loss": 2.9075, "step": 10299 }, { "epoch": 0.4821815202762011, "grad_norm": 1.3515625, "learning_rate": 0.00018249620584480098, "loss": 2.8251, "step": 10300 }, { "epoch": 0.4822283340160337, "grad_norm": 1.171875, "learning_rate": 0.00018249290651470767, "loss": 2.847, "step": 10301 }, { "epoch": 0.48227514775586633, "grad_norm": 1.3828125, "learning_rate": 0.00018248960690352454, "loss": 3.3078, "step": 10302 }, { "epoch": 0.482321961495699, "grad_norm": 1.8671875, "learning_rate": 0.00018248630701126286, "loss": 3.0775, "step": 10303 }, { "epoch": 0.4823687752355316, "grad_norm": 1.3359375, "learning_rate": 0.00018248300683793383, "loss": 2.7555, "step": 10304 }, { "epoch": 0.4824155889753643, "grad_norm": 1.171875, "learning_rate": 0.00018247970638354875, "loss": 2.9977, "step": 10305 }, { "epoch": 0.4824624027151969, "grad_norm": 1.171875, "learning_rate": 0.00018247640564811882, "loss": 2.9705, "step": 10306 }, { "epoch": 0.48250921645502953, "grad_norm": 1.3984375, "learning_rate": 0.00018247310463165534, "loss": 3.0182, "step": 10307 }, { "epoch": 0.4825560301948622, "grad_norm": 1.3984375, "learning_rate": 0.00018246980333416944, "loss": 3.1022, "step": 10308 }, { "epoch": 0.4826028439346948, "grad_norm": 1.3515625, "learning_rate": 0.0001824665017556725, "loss": 2.793, "step": 10309 }, { "epoch": 0.4826496576745275, "grad_norm": 1.6171875, "learning_rate": 0.00018246319989617576, "loss": 3.4483, "step": 10310 }, { "epoch": 0.4826964714143601, "grad_norm": 1.390625, "learning_rate": 0.0001824598977556904, "loss": 3.013, "step": 10311 }, { "epoch": 0.48274328515419274, "grad_norm": 1.2890625, "learning_rate": 0.0001824565953342277, "loss": 3.1576, "step": 10312 }, { "epoch": 0.4827900988940254, "grad_norm": 1.703125, "learning_rate": 0.0001824532926317989, "loss": 3.1306, "step": 10313 }, { "epoch": 0.48283691263385803, "grad_norm": 1.296875, "learning_rate": 0.0001824499896484153, "loss": 2.8226, "step": 10314 }, { "epoch": 0.4828837263736907, "grad_norm": 1.2109375, "learning_rate": 0.00018244668638408814, "loss": 2.7627, "step": 10315 }, { "epoch": 0.4829305401135233, "grad_norm": 1.4453125, "learning_rate": 0.00018244338283882863, "loss": 2.6159, "step": 10316 }, { "epoch": 0.48297735385335594, "grad_norm": 1.28125, "learning_rate": 0.00018244007901264806, "loss": 2.9168, "step": 10317 }, { "epoch": 0.4830241675931886, "grad_norm": 1.8125, "learning_rate": 0.00018243677490555768, "loss": 3.0731, "step": 10318 }, { "epoch": 0.48307098133302123, "grad_norm": 1.3359375, "learning_rate": 0.00018243347051756877, "loss": 3.1221, "step": 10319 }, { "epoch": 0.4831177950728539, "grad_norm": 1.2578125, "learning_rate": 0.00018243016584869258, "loss": 3.3181, "step": 10320 }, { "epoch": 0.4831646088126865, "grad_norm": 1.1796875, "learning_rate": 0.00018242686089894037, "loss": 2.6711, "step": 10321 }, { "epoch": 0.48321142255251914, "grad_norm": 1.5625, "learning_rate": 0.00018242355566832336, "loss": 2.7939, "step": 10322 }, { "epoch": 0.4832582362923518, "grad_norm": 1.40625, "learning_rate": 0.0001824202501568529, "loss": 2.9478, "step": 10323 }, { "epoch": 0.48330505003218444, "grad_norm": 1.3046875, "learning_rate": 0.00018241694436454018, "loss": 3.2233, "step": 10324 }, { "epoch": 0.4833518637720171, "grad_norm": 1.9453125, "learning_rate": 0.00018241363829139647, "loss": 3.055, "step": 10325 }, { "epoch": 0.48339867751184973, "grad_norm": 1.53125, "learning_rate": 0.00018241033193743306, "loss": 2.8586, "step": 10326 }, { "epoch": 0.48344549125168235, "grad_norm": 2.796875, "learning_rate": 0.00018240702530266122, "loss": 3.3132, "step": 10327 }, { "epoch": 0.483492304991515, "grad_norm": 1.484375, "learning_rate": 0.00018240371838709217, "loss": 2.925, "step": 10328 }, { "epoch": 0.48353911873134764, "grad_norm": 1.2578125, "learning_rate": 0.00018240041119073726, "loss": 3.1111, "step": 10329 }, { "epoch": 0.4835859324711803, "grad_norm": 2.6875, "learning_rate": 0.00018239710371360768, "loss": 2.6032, "step": 10330 }, { "epoch": 0.48363274621101293, "grad_norm": 1.78125, "learning_rate": 0.00018239379595571474, "loss": 3.3952, "step": 10331 }, { "epoch": 0.48367955995084555, "grad_norm": 1.2421875, "learning_rate": 0.0001823904879170697, "loss": 3.5791, "step": 10332 }, { "epoch": 0.4837263736906782, "grad_norm": 1.1171875, "learning_rate": 0.00018238717959768384, "loss": 2.4403, "step": 10333 }, { "epoch": 0.48377318743051084, "grad_norm": 1.2734375, "learning_rate": 0.0001823838709975684, "loss": 2.9605, "step": 10334 }, { "epoch": 0.4838200011703435, "grad_norm": 1.4296875, "learning_rate": 0.00018238056211673472, "loss": 3.0193, "step": 10335 }, { "epoch": 0.48386681491017614, "grad_norm": 1.78125, "learning_rate": 0.00018237725295519401, "loss": 3.2099, "step": 10336 }, { "epoch": 0.48391362865000875, "grad_norm": 1.359375, "learning_rate": 0.0001823739435129576, "loss": 3.1017, "step": 10337 }, { "epoch": 0.4839604423898414, "grad_norm": 1.6796875, "learning_rate": 0.00018237063379003673, "loss": 2.3802, "step": 10338 }, { "epoch": 0.48400725612967405, "grad_norm": 1.40625, "learning_rate": 0.00018236732378644268, "loss": 2.7754, "step": 10339 }, { "epoch": 0.4840540698695067, "grad_norm": 1.359375, "learning_rate": 0.00018236401350218672, "loss": 2.8365, "step": 10340 }, { "epoch": 0.48410088360933934, "grad_norm": 1.46875, "learning_rate": 0.00018236070293728017, "loss": 3.1613, "step": 10341 }, { "epoch": 0.48414769734917196, "grad_norm": 1.65625, "learning_rate": 0.00018235739209173429, "loss": 2.9616, "step": 10342 }, { "epoch": 0.48419451108900463, "grad_norm": 1.265625, "learning_rate": 0.00018235408096556032, "loss": 3.2003, "step": 10343 }, { "epoch": 0.48424132482883725, "grad_norm": 1.609375, "learning_rate": 0.00018235076955876961, "loss": 3.2625, "step": 10344 }, { "epoch": 0.4842881385686699, "grad_norm": 1.5390625, "learning_rate": 0.0001823474578713734, "loss": 2.9799, "step": 10345 }, { "epoch": 0.48433495230850254, "grad_norm": 1.078125, "learning_rate": 0.000182344145903383, "loss": 2.6859, "step": 10346 }, { "epoch": 0.48438176604833516, "grad_norm": 1.1953125, "learning_rate": 0.00018234083365480967, "loss": 2.9509, "step": 10347 }, { "epoch": 0.48442857978816783, "grad_norm": 1.4296875, "learning_rate": 0.00018233752112566473, "loss": 4.205, "step": 10348 }, { "epoch": 0.48447539352800045, "grad_norm": 6.59375, "learning_rate": 0.00018233420831595943, "loss": 2.4641, "step": 10349 }, { "epoch": 0.4845222072678331, "grad_norm": 1.734375, "learning_rate": 0.00018233089522570506, "loss": 3.3356, "step": 10350 }, { "epoch": 0.48456902100766575, "grad_norm": 1.3671875, "learning_rate": 0.00018232758185491294, "loss": 3.1367, "step": 10351 }, { "epoch": 0.48461583474749836, "grad_norm": 1.4296875, "learning_rate": 0.00018232426820359433, "loss": 2.7837, "step": 10352 }, { "epoch": 0.48466264848733104, "grad_norm": 1.6796875, "learning_rate": 0.00018232095427176057, "loss": 3.015, "step": 10353 }, { "epoch": 0.48470946222716366, "grad_norm": 1.515625, "learning_rate": 0.0001823176400594229, "loss": 3.1547, "step": 10354 }, { "epoch": 0.48475627596699633, "grad_norm": 1.9140625, "learning_rate": 0.00018231432556659264, "loss": 3.2232, "step": 10355 }, { "epoch": 0.48480308970682895, "grad_norm": 1.2734375, "learning_rate": 0.00018231101079328103, "loss": 2.7913, "step": 10356 }, { "epoch": 0.48484990344666157, "grad_norm": 1.2734375, "learning_rate": 0.00018230769573949945, "loss": 2.7, "step": 10357 }, { "epoch": 0.48489671718649424, "grad_norm": 1.3828125, "learning_rate": 0.00018230438040525913, "loss": 3.0784, "step": 10358 }, { "epoch": 0.48494353092632686, "grad_norm": 2.03125, "learning_rate": 0.0001823010647905714, "loss": 3.0797, "step": 10359 }, { "epoch": 0.48499034466615953, "grad_norm": 1.65625, "learning_rate": 0.00018229774889544755, "loss": 3.1932, "step": 10360 }, { "epoch": 0.48503715840599215, "grad_norm": 1.28125, "learning_rate": 0.00018229443271989887, "loss": 2.6623, "step": 10361 }, { "epoch": 0.4850839721458248, "grad_norm": 1.015625, "learning_rate": 0.00018229111626393666, "loss": 4.5629, "step": 10362 }, { "epoch": 0.48513078588565745, "grad_norm": 1.578125, "learning_rate": 0.00018228779952757224, "loss": 2.708, "step": 10363 }, { "epoch": 0.48517759962549006, "grad_norm": 1.8125, "learning_rate": 0.00018228448251081692, "loss": 3.5966, "step": 10364 }, { "epoch": 0.48522441336532274, "grad_norm": 1.6171875, "learning_rate": 0.00018228116521368197, "loss": 2.8777, "step": 10365 }, { "epoch": 0.48527122710515536, "grad_norm": 1.3359375, "learning_rate": 0.0001822778476361787, "loss": 2.5257, "step": 10366 }, { "epoch": 0.48531804084498803, "grad_norm": 1.6171875, "learning_rate": 0.00018227452977831842, "loss": 3.3157, "step": 10367 }, { "epoch": 0.48536485458482065, "grad_norm": 1.21875, "learning_rate": 0.00018227121164011243, "loss": 2.546, "step": 10368 }, { "epoch": 0.48541166832465327, "grad_norm": 1.25, "learning_rate": 0.00018226789322157205, "loss": 2.7749, "step": 10369 }, { "epoch": 0.48545848206448594, "grad_norm": 1.265625, "learning_rate": 0.00018226457452270858, "loss": 3.006, "step": 10370 }, { "epoch": 0.48550529580431856, "grad_norm": 1.3828125, "learning_rate": 0.00018226125554353332, "loss": 3.1902, "step": 10371 }, { "epoch": 0.48555210954415123, "grad_norm": 1.5390625, "learning_rate": 0.0001822579362840576, "loss": 3.1683, "step": 10372 }, { "epoch": 0.48559892328398385, "grad_norm": 1.7578125, "learning_rate": 0.0001822546167442927, "loss": 3.076, "step": 10373 }, { "epoch": 0.48564573702381647, "grad_norm": 1.5546875, "learning_rate": 0.00018225129692424994, "loss": 2.9178, "step": 10374 }, { "epoch": 0.48569255076364914, "grad_norm": 1.2421875, "learning_rate": 0.00018224797682394068, "loss": 2.5053, "step": 10375 }, { "epoch": 0.48573936450348176, "grad_norm": 1.828125, "learning_rate": 0.00018224465644337614, "loss": 3.0189, "step": 10376 }, { "epoch": 0.48578617824331444, "grad_norm": 1.3828125, "learning_rate": 0.00018224133578256772, "loss": 2.6921, "step": 10377 }, { "epoch": 0.48583299198314706, "grad_norm": 1.328125, "learning_rate": 0.00018223801484152673, "loss": 2.7284, "step": 10378 }, { "epoch": 0.4858798057229797, "grad_norm": 1.96875, "learning_rate": 0.0001822346936202644, "loss": 4.0258, "step": 10379 }, { "epoch": 0.48592661946281235, "grad_norm": 1.5390625, "learning_rate": 0.00018223137211879214, "loss": 2.8927, "step": 10380 }, { "epoch": 0.48597343320264497, "grad_norm": 1.3203125, "learning_rate": 0.0001822280503371212, "loss": 3.0287, "step": 10381 }, { "epoch": 0.48602024694247764, "grad_norm": 3.125, "learning_rate": 0.00018222472827526297, "loss": 3.2006, "step": 10382 }, { "epoch": 0.48606706068231026, "grad_norm": 2.109375, "learning_rate": 0.00018222140593322872, "loss": 2.8104, "step": 10383 }, { "epoch": 0.4861138744221429, "grad_norm": 1.1875, "learning_rate": 0.00018221808331102976, "loss": 2.8296, "step": 10384 }, { "epoch": 0.48616068816197555, "grad_norm": 1.609375, "learning_rate": 0.00018221476040867747, "loss": 2.8953, "step": 10385 }, { "epoch": 0.48620750190180817, "grad_norm": 1.296875, "learning_rate": 0.00018221143722618312, "loss": 2.8676, "step": 10386 }, { "epoch": 0.48625431564164084, "grad_norm": 2.203125, "learning_rate": 0.00018220811376355804, "loss": 2.8458, "step": 10387 }, { "epoch": 0.48630112938147346, "grad_norm": 1.21875, "learning_rate": 0.00018220479002081357, "loss": 2.8532, "step": 10388 }, { "epoch": 0.4863479431213061, "grad_norm": 1.4296875, "learning_rate": 0.00018220146599796103, "loss": 2.6737, "step": 10389 }, { "epoch": 0.48639475686113876, "grad_norm": 2.078125, "learning_rate": 0.00018219814169501174, "loss": 2.8005, "step": 10390 }, { "epoch": 0.4864415706009714, "grad_norm": 1.1875, "learning_rate": 0.00018219481711197706, "loss": 2.9076, "step": 10391 }, { "epoch": 0.48648838434080405, "grad_norm": 1.1875, "learning_rate": 0.00018219149224886827, "loss": 2.991, "step": 10392 }, { "epoch": 0.48653519808063667, "grad_norm": 1.265625, "learning_rate": 0.00018218816710569672, "loss": 2.7104, "step": 10393 }, { "epoch": 0.4865820118204693, "grad_norm": 1.671875, "learning_rate": 0.00018218484168247375, "loss": 3.0533, "step": 10394 }, { "epoch": 0.48662882556030196, "grad_norm": 1.6015625, "learning_rate": 0.00018218151597921068, "loss": 3.0339, "step": 10395 }, { "epoch": 0.4866756393001346, "grad_norm": 1.078125, "learning_rate": 0.00018217818999591884, "loss": 2.7256, "step": 10396 }, { "epoch": 0.48672245303996725, "grad_norm": 1.2578125, "learning_rate": 0.00018217486373260956, "loss": 2.7589, "step": 10397 }, { "epoch": 0.48676926677979987, "grad_norm": 1.3671875, "learning_rate": 0.0001821715371892942, "loss": 3.1949, "step": 10398 }, { "epoch": 0.4868160805196325, "grad_norm": 1.171875, "learning_rate": 0.00018216821036598407, "loss": 2.4402, "step": 10399 }, { "epoch": 0.48686289425946516, "grad_norm": 1.203125, "learning_rate": 0.0001821648832626905, "loss": 4.1173, "step": 10400 }, { "epoch": 0.4869097079992978, "grad_norm": 1.515625, "learning_rate": 0.00018216155587942484, "loss": 2.6872, "step": 10401 }, { "epoch": 0.48695652173913045, "grad_norm": 1.4609375, "learning_rate": 0.00018215822821619843, "loss": 3.0229, "step": 10402 }, { "epoch": 0.4870033354789631, "grad_norm": 1.1015625, "learning_rate": 0.00018215490027302265, "loss": 3.1981, "step": 10403 }, { "epoch": 0.4870501492187957, "grad_norm": 1.5859375, "learning_rate": 0.00018215157204990874, "loss": 3.2064, "step": 10404 }, { "epoch": 0.48709696295862837, "grad_norm": 1.3046875, "learning_rate": 0.00018214824354686812, "loss": 2.6418, "step": 10405 }, { "epoch": 0.487143776698461, "grad_norm": 1.2109375, "learning_rate": 0.0001821449147639121, "loss": 2.8561, "step": 10406 }, { "epoch": 0.48719059043829366, "grad_norm": 1.375, "learning_rate": 0.00018214158570105203, "loss": 2.9862, "step": 10407 }, { "epoch": 0.4872374041781263, "grad_norm": 1.421875, "learning_rate": 0.00018213825635829926, "loss": 3.0487, "step": 10408 }, { "epoch": 0.4872842179179589, "grad_norm": 1.5703125, "learning_rate": 0.00018213492673566515, "loss": 2.974, "step": 10409 }, { "epoch": 0.48733103165779157, "grad_norm": 1.796875, "learning_rate": 0.000182131596833161, "loss": 3.1407, "step": 10410 }, { "epoch": 0.4873778453976242, "grad_norm": 1.4296875, "learning_rate": 0.0001821282666507982, "loss": 3.1291, "step": 10411 }, { "epoch": 0.48742465913745686, "grad_norm": 1.421875, "learning_rate": 0.00018212493618858806, "loss": 2.8186, "step": 10412 }, { "epoch": 0.4874714728772895, "grad_norm": 1.546875, "learning_rate": 0.00018212160544654195, "loss": 3.2356, "step": 10413 }, { "epoch": 0.4875182866171221, "grad_norm": 1.125, "learning_rate": 0.0001821182744246712, "loss": 2.6152, "step": 10414 }, { "epoch": 0.4875651003569548, "grad_norm": 1.3125, "learning_rate": 0.0001821149431229872, "loss": 2.8377, "step": 10415 }, { "epoch": 0.4876119140967874, "grad_norm": 1.171875, "learning_rate": 0.0001821116115415013, "loss": 2.8281, "step": 10416 }, { "epoch": 0.48765872783662007, "grad_norm": 1.3515625, "learning_rate": 0.00018210827968022478, "loss": 2.8071, "step": 10417 }, { "epoch": 0.4877055415764527, "grad_norm": 1.2734375, "learning_rate": 0.00018210494753916908, "loss": 2.6197, "step": 10418 }, { "epoch": 0.4877523553162853, "grad_norm": 1.5859375, "learning_rate": 0.0001821016151183455, "loss": 2.8214, "step": 10419 }, { "epoch": 0.487799169056118, "grad_norm": 2.0625, "learning_rate": 0.00018209828241776542, "loss": 3.3779, "step": 10420 }, { "epoch": 0.4878459827959506, "grad_norm": 1.78125, "learning_rate": 0.00018209494943744018, "loss": 2.2984, "step": 10421 }, { "epoch": 0.48789279653578327, "grad_norm": 0.98046875, "learning_rate": 0.00018209161617738112, "loss": 3.7734, "step": 10422 }, { "epoch": 0.4879396102756159, "grad_norm": 1.3046875, "learning_rate": 0.00018208828263759967, "loss": 2.4224, "step": 10423 }, { "epoch": 0.48798642401544856, "grad_norm": 1.1015625, "learning_rate": 0.00018208494881810712, "loss": 3.0756, "step": 10424 }, { "epoch": 0.4880332377552812, "grad_norm": 1.296875, "learning_rate": 0.00018208161471891488, "loss": 2.9313, "step": 10425 }, { "epoch": 0.4880800514951138, "grad_norm": 1.6171875, "learning_rate": 0.00018207828034003424, "loss": 2.9798, "step": 10426 }, { "epoch": 0.48812686523494647, "grad_norm": 1.2421875, "learning_rate": 0.00018207494568147663, "loss": 2.6805, "step": 10427 }, { "epoch": 0.4881736789747791, "grad_norm": 1.4921875, "learning_rate": 0.00018207161074325337, "loss": 3.2059, "step": 10428 }, { "epoch": 0.48822049271461176, "grad_norm": 1.296875, "learning_rate": 0.00018206827552537584, "loss": 2.9049, "step": 10429 }, { "epoch": 0.4882673064544444, "grad_norm": 1.4453125, "learning_rate": 0.00018206494002785543, "loss": 3.0813, "step": 10430 }, { "epoch": 0.488314120194277, "grad_norm": 1.7109375, "learning_rate": 0.00018206160425070347, "loss": 2.9111, "step": 10431 }, { "epoch": 0.4883609339341097, "grad_norm": 1.6484375, "learning_rate": 0.00018205826819393133, "loss": 3.0469, "step": 10432 }, { "epoch": 0.4884077476739423, "grad_norm": 1.4375, "learning_rate": 0.0001820549318575504, "loss": 2.7115, "step": 10433 }, { "epoch": 0.48845456141377497, "grad_norm": 1.28125, "learning_rate": 0.000182051595241572, "loss": 2.5626, "step": 10434 }, { "epoch": 0.4885013751536076, "grad_norm": 1.0625, "learning_rate": 0.00018204825834600754, "loss": 3.0161, "step": 10435 }, { "epoch": 0.4885481888934402, "grad_norm": 1.234375, "learning_rate": 0.0001820449211708684, "loss": 2.4524, "step": 10436 }, { "epoch": 0.4885950026332729, "grad_norm": 1.15625, "learning_rate": 0.00018204158371616593, "loss": 3.093, "step": 10437 }, { "epoch": 0.4886418163731055, "grad_norm": 1.34375, "learning_rate": 0.00018203824598191147, "loss": 3.054, "step": 10438 }, { "epoch": 0.48868863011293817, "grad_norm": 1.625, "learning_rate": 0.0001820349079681165, "loss": 3.2787, "step": 10439 }, { "epoch": 0.4887354438527708, "grad_norm": 1.671875, "learning_rate": 0.00018203156967479226, "loss": 2.9161, "step": 10440 }, { "epoch": 0.4887822575926034, "grad_norm": 4.75, "learning_rate": 0.00018202823110195023, "loss": 3.3376, "step": 10441 }, { "epoch": 0.4888290713324361, "grad_norm": 1.1171875, "learning_rate": 0.00018202489224960174, "loss": 2.5394, "step": 10442 }, { "epoch": 0.4888758850722687, "grad_norm": 1.9375, "learning_rate": 0.00018202155311775812, "loss": 3.1374, "step": 10443 }, { "epoch": 0.4889226988121014, "grad_norm": 1.4453125, "learning_rate": 0.00018201821370643085, "loss": 3.0466, "step": 10444 }, { "epoch": 0.488969512551934, "grad_norm": 1.3203125, "learning_rate": 0.00018201487401563125, "loss": 3.4844, "step": 10445 }, { "epoch": 0.4890163262917666, "grad_norm": 1.3515625, "learning_rate": 0.0001820115340453707, "loss": 2.6671, "step": 10446 }, { "epoch": 0.4890631400315993, "grad_norm": 1.25, "learning_rate": 0.00018200819379566059, "loss": 2.7379, "step": 10447 }, { "epoch": 0.4891099537714319, "grad_norm": 1.515625, "learning_rate": 0.00018200485326651228, "loss": 2.9033, "step": 10448 }, { "epoch": 0.4891567675112646, "grad_norm": 1.5625, "learning_rate": 0.0001820015124579372, "loss": 2.8064, "step": 10449 }, { "epoch": 0.4892035812510972, "grad_norm": 1.59375, "learning_rate": 0.0001819981713699467, "loss": 3.257, "step": 10450 }, { "epoch": 0.4892503949909298, "grad_norm": 1.34375, "learning_rate": 0.00018199483000255214, "loss": 3.0393, "step": 10451 }, { "epoch": 0.4892972087307625, "grad_norm": 1.8828125, "learning_rate": 0.000181991488355765, "loss": 3.3558, "step": 10452 }, { "epoch": 0.4893440224705951, "grad_norm": 1.5859375, "learning_rate": 0.00018198814642959656, "loss": 2.7727, "step": 10453 }, { "epoch": 0.4893908362104278, "grad_norm": 1.21875, "learning_rate": 0.00018198480422405825, "loss": 2.3928, "step": 10454 }, { "epoch": 0.4894376499502604, "grad_norm": 1.53125, "learning_rate": 0.00018198146173916148, "loss": 3.1709, "step": 10455 }, { "epoch": 0.489484463690093, "grad_norm": 2.328125, "learning_rate": 0.00018197811897491758, "loss": 2.724, "step": 10456 }, { "epoch": 0.4895312774299257, "grad_norm": 1.3046875, "learning_rate": 0.00018197477593133804, "loss": 3.097, "step": 10457 }, { "epoch": 0.4895780911697583, "grad_norm": 1.625, "learning_rate": 0.0001819714326084341, "loss": 2.9901, "step": 10458 }, { "epoch": 0.489624904909591, "grad_norm": 1.5, "learning_rate": 0.00018196808900621734, "loss": 2.928, "step": 10459 }, { "epoch": 0.4896717186494236, "grad_norm": 1.34375, "learning_rate": 0.00018196474512469898, "loss": 2.581, "step": 10460 }, { "epoch": 0.4897185323892562, "grad_norm": 1.34375, "learning_rate": 0.00018196140096389052, "loss": 2.906, "step": 10461 }, { "epoch": 0.4897653461290889, "grad_norm": 1.171875, "learning_rate": 0.00018195805652380332, "loss": 2.8149, "step": 10462 }, { "epoch": 0.4898121598689215, "grad_norm": 1.3359375, "learning_rate": 0.00018195471180444878, "loss": 2.8625, "step": 10463 }, { "epoch": 0.4898589736087542, "grad_norm": 1.2265625, "learning_rate": 0.0001819513668058383, "loss": 2.7082, "step": 10464 }, { "epoch": 0.4899057873485868, "grad_norm": 1.5703125, "learning_rate": 0.00018194802152798328, "loss": 3.1772, "step": 10465 }, { "epoch": 0.4899526010884194, "grad_norm": 1.1171875, "learning_rate": 0.0001819446759708951, "loss": 2.8575, "step": 10466 }, { "epoch": 0.4899994148282521, "grad_norm": 1.4609375, "learning_rate": 0.0001819413301345852, "loss": 2.6353, "step": 10467 }, { "epoch": 0.4900462285680847, "grad_norm": 1.1796875, "learning_rate": 0.00018193798401906492, "loss": 2.6162, "step": 10468 }, { "epoch": 0.4900930423079174, "grad_norm": 1.421875, "learning_rate": 0.00018193463762434568, "loss": 3.1029, "step": 10469 }, { "epoch": 0.49013985604775, "grad_norm": 2.4375, "learning_rate": 0.00018193129095043893, "loss": 3.1669, "step": 10470 }, { "epoch": 0.49018666978758263, "grad_norm": 1.3671875, "learning_rate": 0.00018192794399735605, "loss": 2.95, "step": 10471 }, { "epoch": 0.4902334835274153, "grad_norm": 1.453125, "learning_rate": 0.00018192459676510841, "loss": 2.7259, "step": 10472 }, { "epoch": 0.4902802972672479, "grad_norm": 1.9921875, "learning_rate": 0.00018192124925370747, "loss": 3.0379, "step": 10473 }, { "epoch": 0.4903271110070806, "grad_norm": 1.671875, "learning_rate": 0.00018191790146316459, "loss": 2.9594, "step": 10474 }, { "epoch": 0.4903739247469132, "grad_norm": 1.375, "learning_rate": 0.00018191455339349122, "loss": 2.5937, "step": 10475 }, { "epoch": 0.49042073848674583, "grad_norm": 1.4921875, "learning_rate": 0.0001819112050446987, "loss": 2.9374, "step": 10476 }, { "epoch": 0.4904675522265785, "grad_norm": 1.5234375, "learning_rate": 0.0001819078564167985, "loss": 3.2068, "step": 10477 }, { "epoch": 0.4905143659664111, "grad_norm": 1.546875, "learning_rate": 0.00018190450750980204, "loss": 2.7712, "step": 10478 }, { "epoch": 0.4905611797062438, "grad_norm": 1.359375, "learning_rate": 0.0001819011583237207, "loss": 3.0936, "step": 10479 }, { "epoch": 0.4906079934460764, "grad_norm": 1.3203125, "learning_rate": 0.00018189780885856588, "loss": 2.4347, "step": 10480 }, { "epoch": 0.4906548071859091, "grad_norm": 1.5078125, "learning_rate": 0.00018189445911434899, "loss": 3.099, "step": 10481 }, { "epoch": 0.4907016209257417, "grad_norm": 1.3046875, "learning_rate": 0.00018189110909108152, "loss": 2.9588, "step": 10482 }, { "epoch": 0.49074843466557433, "grad_norm": 1.1796875, "learning_rate": 0.0001818877587887748, "loss": 2.9991, "step": 10483 }, { "epoch": 0.490795248405407, "grad_norm": 1.4140625, "learning_rate": 0.00018188440820744026, "loss": 3.1391, "step": 10484 }, { "epoch": 0.4908420621452396, "grad_norm": 1.703125, "learning_rate": 0.00018188105734708935, "loss": 2.6831, "step": 10485 }, { "epoch": 0.4908888758850723, "grad_norm": 1.5703125, "learning_rate": 0.00018187770620773348, "loss": 3.134, "step": 10486 }, { "epoch": 0.4909356896249049, "grad_norm": 1.125, "learning_rate": 0.00018187435478938402, "loss": 2.7813, "step": 10487 }, { "epoch": 0.49098250336473753, "grad_norm": 1.4140625, "learning_rate": 0.00018187100309205247, "loss": 2.3688, "step": 10488 }, { "epoch": 0.4910293171045702, "grad_norm": 1.3125, "learning_rate": 0.00018186765111575018, "loss": 2.8241, "step": 10489 }, { "epoch": 0.4910761308444028, "grad_norm": 1.2578125, "learning_rate": 0.0001818642988604886, "loss": 2.6276, "step": 10490 }, { "epoch": 0.4911229445842355, "grad_norm": 0.984375, "learning_rate": 0.0001818609463262792, "loss": 2.5955, "step": 10491 }, { "epoch": 0.4911697583240681, "grad_norm": 1.3359375, "learning_rate": 0.00018185759351313328, "loss": 3.263, "step": 10492 }, { "epoch": 0.49121657206390074, "grad_norm": 1.359375, "learning_rate": 0.0001818542404210624, "loss": 3.275, "step": 10493 }, { "epoch": 0.4912633858037334, "grad_norm": 1.4609375, "learning_rate": 0.00018185088705007792, "loss": 2.5944, "step": 10494 }, { "epoch": 0.49131019954356603, "grad_norm": 1.265625, "learning_rate": 0.00018184753340019127, "loss": 2.7583, "step": 10495 }, { "epoch": 0.4913570132833987, "grad_norm": 1.21875, "learning_rate": 0.00018184417947141387, "loss": 2.829, "step": 10496 }, { "epoch": 0.4914038270232313, "grad_norm": 1.34375, "learning_rate": 0.00018184082526375716, "loss": 3.1031, "step": 10497 }, { "epoch": 0.49145064076306394, "grad_norm": 1.515625, "learning_rate": 0.0001818374707772326, "loss": 3.183, "step": 10498 }, { "epoch": 0.4914974545028966, "grad_norm": 1.4609375, "learning_rate": 0.00018183411601185157, "loss": 2.9176, "step": 10499 }, { "epoch": 0.49154426824272923, "grad_norm": 1.1796875, "learning_rate": 0.00018183076096762551, "loss": 2.7466, "step": 10500 }, { "epoch": 0.4915910819825619, "grad_norm": 1.4375, "learning_rate": 0.00018182740564456586, "loss": 3.1403, "step": 10501 }, { "epoch": 0.4916378957223945, "grad_norm": 1.828125, "learning_rate": 0.00018182405004268408, "loss": 2.8959, "step": 10502 }, { "epoch": 0.49168470946222714, "grad_norm": 1.265625, "learning_rate": 0.00018182069416199155, "loss": 2.6877, "step": 10503 }, { "epoch": 0.4917315232020598, "grad_norm": 1.421875, "learning_rate": 0.0001818173380024997, "loss": 3.0854, "step": 10504 }, { "epoch": 0.49177833694189244, "grad_norm": 1.5, "learning_rate": 0.00018181398156422008, "loss": 3.1039, "step": 10505 }, { "epoch": 0.4918251506817251, "grad_norm": 1.1796875, "learning_rate": 0.000181810624847164, "loss": 2.927, "step": 10506 }, { "epoch": 0.49187196442155773, "grad_norm": 1.40625, "learning_rate": 0.00018180726785134297, "loss": 2.9018, "step": 10507 }, { "epoch": 0.49191877816139035, "grad_norm": 1.3828125, "learning_rate": 0.00018180391057676839, "loss": 2.8127, "step": 10508 }, { "epoch": 0.491965591901223, "grad_norm": 1.3046875, "learning_rate": 0.0001818005530234517, "loss": 3.5422, "step": 10509 }, { "epoch": 0.49201240564105564, "grad_norm": 1.0859375, "learning_rate": 0.00018179719519140436, "loss": 2.4725, "step": 10510 }, { "epoch": 0.4920592193808883, "grad_norm": 1.421875, "learning_rate": 0.00018179383708063785, "loss": 2.7983, "step": 10511 }, { "epoch": 0.49210603312072093, "grad_norm": 1.421875, "learning_rate": 0.0001817904786911635, "loss": 2.9299, "step": 10512 }, { "epoch": 0.49215284686055355, "grad_norm": 2.0, "learning_rate": 0.00018178712002299286, "loss": 3.1072, "step": 10513 }, { "epoch": 0.4921996606003862, "grad_norm": 1.6015625, "learning_rate": 0.00018178376107613734, "loss": 2.8378, "step": 10514 }, { "epoch": 0.49224647434021884, "grad_norm": 1.5859375, "learning_rate": 0.00018178040185060836, "loss": 3.1862, "step": 10515 }, { "epoch": 0.4922932880800515, "grad_norm": 1.1953125, "learning_rate": 0.00018177704234641738, "loss": 3.0369, "step": 10516 }, { "epoch": 0.49234010181988414, "grad_norm": 1.3671875, "learning_rate": 0.0001817736825635759, "loss": 2.6177, "step": 10517 }, { "epoch": 0.49238691555971675, "grad_norm": 1.421875, "learning_rate": 0.0001817703225020953, "loss": 2.8554, "step": 10518 }, { "epoch": 0.4924337292995494, "grad_norm": 1.203125, "learning_rate": 0.00018176696216198703, "loss": 2.7479, "step": 10519 }, { "epoch": 0.49248054303938205, "grad_norm": 1.546875, "learning_rate": 0.00018176360154326256, "loss": 3.2504, "step": 10520 }, { "epoch": 0.4925273567792147, "grad_norm": 1.390625, "learning_rate": 0.00018176024064593338, "loss": 2.9304, "step": 10521 }, { "epoch": 0.49257417051904734, "grad_norm": 1.9140625, "learning_rate": 0.0001817568794700109, "loss": 3.3107, "step": 10522 }, { "epoch": 0.49262098425887996, "grad_norm": 1.5, "learning_rate": 0.00018175351801550654, "loss": 3.1885, "step": 10523 }, { "epoch": 0.49266779799871263, "grad_norm": 1.171875, "learning_rate": 0.00018175015628243185, "loss": 2.6131, "step": 10524 }, { "epoch": 0.49271461173854525, "grad_norm": 1.4609375, "learning_rate": 0.0001817467942707982, "loss": 2.8621, "step": 10525 }, { "epoch": 0.4927614254783779, "grad_norm": 1.046875, "learning_rate": 0.0001817434319806171, "loss": 3.0718, "step": 10526 }, { "epoch": 0.49280823921821054, "grad_norm": 1.421875, "learning_rate": 0.00018174006941189993, "loss": 2.7964, "step": 10527 }, { "epoch": 0.49285505295804316, "grad_norm": 1.515625, "learning_rate": 0.00018173670656465823, "loss": 2.8926, "step": 10528 }, { "epoch": 0.49290186669787583, "grad_norm": 1.09375, "learning_rate": 0.00018173334343890343, "loss": 3.1163, "step": 10529 }, { "epoch": 0.49294868043770845, "grad_norm": 1.2578125, "learning_rate": 0.00018172998003464696, "loss": 2.8285, "step": 10530 }, { "epoch": 0.4929954941775411, "grad_norm": 1.2890625, "learning_rate": 0.00018172661635190035, "loss": 2.7313, "step": 10531 }, { "epoch": 0.49304230791737375, "grad_norm": 1.3828125, "learning_rate": 0.000181723252390675, "loss": 2.8627, "step": 10532 }, { "epoch": 0.49308912165720636, "grad_norm": 1.3515625, "learning_rate": 0.00018171988815098236, "loss": 2.7265, "step": 10533 }, { "epoch": 0.49313593539703904, "grad_norm": 1.484375, "learning_rate": 0.00018171652363283393, "loss": 2.7611, "step": 10534 }, { "epoch": 0.49318274913687166, "grad_norm": 1.3125, "learning_rate": 0.0001817131588362412, "loss": 2.8155, "step": 10535 }, { "epoch": 0.49322956287670433, "grad_norm": 1.296875, "learning_rate": 0.0001817097937612156, "loss": 3.0248, "step": 10536 }, { "epoch": 0.49327637661653695, "grad_norm": 1.4921875, "learning_rate": 0.00018170642840776858, "loss": 2.8038, "step": 10537 }, { "epoch": 0.49332319035636957, "grad_norm": 1.3515625, "learning_rate": 0.00018170306277591166, "loss": 2.4053, "step": 10538 }, { "epoch": 0.49337000409620224, "grad_norm": 1.3359375, "learning_rate": 0.00018169969686565625, "loss": 2.5588, "step": 10539 }, { "epoch": 0.49341681783603486, "grad_norm": 1.2265625, "learning_rate": 0.00018169633067701386, "loss": 3.2261, "step": 10540 }, { "epoch": 0.49346363157586753, "grad_norm": 1.328125, "learning_rate": 0.00018169296420999593, "loss": 2.9052, "step": 10541 }, { "epoch": 0.49351044531570015, "grad_norm": 1.5, "learning_rate": 0.00018168959746461396, "loss": 2.4406, "step": 10542 }, { "epoch": 0.4935572590555328, "grad_norm": 1.7890625, "learning_rate": 0.0001816862304408794, "loss": 3.1338, "step": 10543 }, { "epoch": 0.49360407279536545, "grad_norm": 2.828125, "learning_rate": 0.00018168286313880374, "loss": 2.8737, "step": 10544 }, { "epoch": 0.49365088653519806, "grad_norm": 1.4296875, "learning_rate": 0.00018167949555839843, "loss": 2.7966, "step": 10545 }, { "epoch": 0.49369770027503074, "grad_norm": 1.3828125, "learning_rate": 0.00018167612769967497, "loss": 2.7851, "step": 10546 }, { "epoch": 0.49374451401486336, "grad_norm": 1.203125, "learning_rate": 0.00018167275956264484, "loss": 2.8142, "step": 10547 }, { "epoch": 0.49379132775469603, "grad_norm": 1.3828125, "learning_rate": 0.00018166939114731947, "loss": 2.8124, "step": 10548 }, { "epoch": 0.49383814149452865, "grad_norm": 1.3125, "learning_rate": 0.0001816660224537104, "loss": 2.7956, "step": 10549 }, { "epoch": 0.49388495523436127, "grad_norm": 1.9453125, "learning_rate": 0.00018166265348182906, "loss": 3.0506, "step": 10550 }, { "epoch": 0.49393176897419394, "grad_norm": 2.84375, "learning_rate": 0.00018165928423168696, "loss": 2.7275, "step": 10551 }, { "epoch": 0.49397858271402656, "grad_norm": 1.25, "learning_rate": 0.0001816559147032956, "loss": 3.099, "step": 10552 }, { "epoch": 0.49402539645385923, "grad_norm": 1.296875, "learning_rate": 0.0001816525448966664, "loss": 2.4196, "step": 10553 }, { "epoch": 0.49407221019369185, "grad_norm": 1.265625, "learning_rate": 0.00018164917481181084, "loss": 2.8248, "step": 10554 }, { "epoch": 0.49411902393352447, "grad_norm": 1.5859375, "learning_rate": 0.00018164580444874048, "loss": 3.6399, "step": 10555 }, { "epoch": 0.49416583767335714, "grad_norm": 1.6796875, "learning_rate": 0.00018164243380746676, "loss": 2.5225, "step": 10556 }, { "epoch": 0.49421265141318976, "grad_norm": 1.3203125, "learning_rate": 0.00018163906288800114, "loss": 2.9547, "step": 10557 }, { "epoch": 0.49425946515302244, "grad_norm": 1.7109375, "learning_rate": 0.00018163569169035512, "loss": 2.8241, "step": 10558 }, { "epoch": 0.49430627889285506, "grad_norm": 1.9765625, "learning_rate": 0.00018163232021454025, "loss": 2.8517, "step": 10559 }, { "epoch": 0.4943530926326877, "grad_norm": 1.375, "learning_rate": 0.00018162894846056795, "loss": 2.5076, "step": 10560 }, { "epoch": 0.49439990637252035, "grad_norm": 1.4453125, "learning_rate": 0.00018162557642844971, "loss": 2.8812, "step": 10561 }, { "epoch": 0.49444672011235297, "grad_norm": 1.25, "learning_rate": 0.00018162220411819703, "loss": 2.8908, "step": 10562 }, { "epoch": 0.49449353385218564, "grad_norm": 1.3203125, "learning_rate": 0.0001816188315298214, "loss": 2.8954, "step": 10563 }, { "epoch": 0.49454034759201826, "grad_norm": 1.53125, "learning_rate": 0.00018161545866333438, "loss": 2.3754, "step": 10564 }, { "epoch": 0.4945871613318509, "grad_norm": 1.21875, "learning_rate": 0.00018161208551874735, "loss": 3.0403, "step": 10565 }, { "epoch": 0.49463397507168355, "grad_norm": 8.5, "learning_rate": 0.00018160871209607187, "loss": 3.8115, "step": 10566 }, { "epoch": 0.49468078881151617, "grad_norm": 1.640625, "learning_rate": 0.00018160533839531944, "loss": 3.0076, "step": 10567 }, { "epoch": 0.49472760255134884, "grad_norm": 1.53125, "learning_rate": 0.0001816019644165015, "loss": 2.7412, "step": 10568 }, { "epoch": 0.49477441629118146, "grad_norm": 1.9296875, "learning_rate": 0.0001815985901596296, "loss": 2.9899, "step": 10569 }, { "epoch": 0.4948212300310141, "grad_norm": 2.09375, "learning_rate": 0.00018159521562471524, "loss": 2.7247, "step": 10570 }, { "epoch": 0.49486804377084675, "grad_norm": 1.1953125, "learning_rate": 0.00018159184081176986, "loss": 3.0654, "step": 10571 }, { "epoch": 0.4949148575106794, "grad_norm": 1.1640625, "learning_rate": 0.00018158846572080504, "loss": 3.0153, "step": 10572 }, { "epoch": 0.49496167125051205, "grad_norm": 1.5078125, "learning_rate": 0.00018158509035183222, "loss": 3.1108, "step": 10573 }, { "epoch": 0.49500848499034467, "grad_norm": 1.2578125, "learning_rate": 0.0001815817147048629, "loss": 2.8478, "step": 10574 }, { "epoch": 0.4950552987301773, "grad_norm": 1.296875, "learning_rate": 0.00018157833877990864, "loss": 3.357, "step": 10575 }, { "epoch": 0.49510211247000996, "grad_norm": 1.0546875, "learning_rate": 0.00018157496257698087, "loss": 2.9252, "step": 10576 }, { "epoch": 0.4951489262098426, "grad_norm": 1.5, "learning_rate": 0.00018157158609609115, "loss": 2.7435, "step": 10577 }, { "epoch": 0.49519573994967525, "grad_norm": 1.1328125, "learning_rate": 0.00018156820933725096, "loss": 2.7287, "step": 10578 }, { "epoch": 0.49524255368950787, "grad_norm": 1.84375, "learning_rate": 0.0001815648323004718, "loss": 3.1719, "step": 10579 }, { "epoch": 0.4952893674293405, "grad_norm": 0.96484375, "learning_rate": 0.00018156145498576521, "loss": 2.5366, "step": 10580 }, { "epoch": 0.49533618116917316, "grad_norm": 1.59375, "learning_rate": 0.0001815580773931427, "loss": 2.7754, "step": 10581 }, { "epoch": 0.4953829949090058, "grad_norm": 1.890625, "learning_rate": 0.0001815546995226157, "loss": 3.1258, "step": 10582 }, { "epoch": 0.49542980864883845, "grad_norm": 1.421875, "learning_rate": 0.00018155132137419582, "loss": 2.7033, "step": 10583 }, { "epoch": 0.4954766223886711, "grad_norm": 1.359375, "learning_rate": 0.0001815479429478945, "loss": 2.6326, "step": 10584 }, { "epoch": 0.4955234361285037, "grad_norm": 1.3515625, "learning_rate": 0.00018154456424372329, "loss": 3.1988, "step": 10585 }, { "epoch": 0.49557024986833637, "grad_norm": 1.171875, "learning_rate": 0.00018154118526169367, "loss": 2.8071, "step": 10586 }, { "epoch": 0.495617063608169, "grad_norm": 1.2265625, "learning_rate": 0.0001815378060018172, "loss": 2.7723, "step": 10587 }, { "epoch": 0.49566387734800166, "grad_norm": 1.2265625, "learning_rate": 0.00018153442646410536, "loss": 2.4735, "step": 10588 }, { "epoch": 0.4957106910878343, "grad_norm": 1.84375, "learning_rate": 0.00018153104664856966, "loss": 2.9891, "step": 10589 }, { "epoch": 0.4957575048276669, "grad_norm": 1.7890625, "learning_rate": 0.00018152766655522165, "loss": 2.973, "step": 10590 }, { "epoch": 0.49580431856749957, "grad_norm": 1.3515625, "learning_rate": 0.00018152428618407282, "loss": 3.0537, "step": 10591 }, { "epoch": 0.4958511323073322, "grad_norm": 1.390625, "learning_rate": 0.00018152090553513473, "loss": 2.7354, "step": 10592 }, { "epoch": 0.49589794604716486, "grad_norm": 1.109375, "learning_rate": 0.00018151752460841882, "loss": 2.8216, "step": 10593 }, { "epoch": 0.4959447597869975, "grad_norm": 1.1171875, "learning_rate": 0.0001815141434039367, "loss": 2.932, "step": 10594 }, { "epoch": 0.4959915735268301, "grad_norm": 1.3671875, "learning_rate": 0.0001815107619216998, "loss": 3.0547, "step": 10595 }, { "epoch": 0.4960383872666628, "grad_norm": 1.8046875, "learning_rate": 0.0001815073801617197, "loss": 3.0357, "step": 10596 }, { "epoch": 0.4960852010064954, "grad_norm": 1.25, "learning_rate": 0.00018150399812400796, "loss": 3.0169, "step": 10597 }, { "epoch": 0.49613201474632806, "grad_norm": 1.3984375, "learning_rate": 0.000181500615808576, "loss": 2.9844, "step": 10598 }, { "epoch": 0.4961788284861607, "grad_norm": 1.671875, "learning_rate": 0.00018149723321543544, "loss": 3.0142, "step": 10599 }, { "epoch": 0.4962256422259933, "grad_norm": 1.3515625, "learning_rate": 0.00018149385034459776, "loss": 3.0733, "step": 10600 }, { "epoch": 0.496272455965826, "grad_norm": 1.59375, "learning_rate": 0.00018149046719607447, "loss": 2.9533, "step": 10601 }, { "epoch": 0.4963192697056586, "grad_norm": 1.375, "learning_rate": 0.00018148708376987716, "loss": 2.6384, "step": 10602 }, { "epoch": 0.49636608344549127, "grad_norm": 1.6796875, "learning_rate": 0.0001814837000660173, "loss": 3.0796, "step": 10603 }, { "epoch": 0.4964128971853239, "grad_norm": 1.6328125, "learning_rate": 0.00018148031608450645, "loss": 3.0899, "step": 10604 }, { "epoch": 0.49645971092515656, "grad_norm": 1.2265625, "learning_rate": 0.00018147693182535616, "loss": 3.0261, "step": 10605 }, { "epoch": 0.4965065246649892, "grad_norm": 1.2734375, "learning_rate": 0.00018147354728857788, "loss": 2.847, "step": 10606 }, { "epoch": 0.4965533384048218, "grad_norm": 1.3828125, "learning_rate": 0.00018147016247418323, "loss": 2.9975, "step": 10607 }, { "epoch": 0.49660015214465447, "grad_norm": 1.1875, "learning_rate": 0.0001814667773821837, "loss": 2.9634, "step": 10608 }, { "epoch": 0.4966469658844871, "grad_norm": 1.4453125, "learning_rate": 0.00018146339201259083, "loss": 2.9999, "step": 10609 }, { "epoch": 0.49669377962431976, "grad_norm": 1.21875, "learning_rate": 0.00018146000636541616, "loss": 2.8545, "step": 10610 }, { "epoch": 0.4967405933641524, "grad_norm": 1.2109375, "learning_rate": 0.00018145662044067126, "loss": 3.5489, "step": 10611 }, { "epoch": 0.496787407103985, "grad_norm": 1.1640625, "learning_rate": 0.0001814532342383676, "loss": 2.6672, "step": 10612 }, { "epoch": 0.4968342208438177, "grad_norm": 1.5625, "learning_rate": 0.00018144984775851676, "loss": 2.3367, "step": 10613 }, { "epoch": 0.4968810345836503, "grad_norm": 1.2109375, "learning_rate": 0.00018144646100113027, "loss": 2.8106, "step": 10614 }, { "epoch": 0.49692784832348297, "grad_norm": 1.4375, "learning_rate": 0.0001814430739662197, "loss": 2.9207, "step": 10615 }, { "epoch": 0.4969746620633156, "grad_norm": 1.6796875, "learning_rate": 0.00018143968665379655, "loss": 3.1257, "step": 10616 }, { "epoch": 0.4970214758031482, "grad_norm": 1.5234375, "learning_rate": 0.00018143629906387234, "loss": 2.2866, "step": 10617 }, { "epoch": 0.4970682895429809, "grad_norm": 0.91796875, "learning_rate": 0.00018143291119645865, "loss": 2.6883, "step": 10618 }, { "epoch": 0.4971151032828135, "grad_norm": 1.421875, "learning_rate": 0.00018142952305156705, "loss": 2.812, "step": 10619 }, { "epoch": 0.49716191702264617, "grad_norm": 1.2421875, "learning_rate": 0.0001814261346292091, "loss": 3.1743, "step": 10620 }, { "epoch": 0.4972087307624788, "grad_norm": 1.546875, "learning_rate": 0.00018142274592939623, "loss": 2.9208, "step": 10621 }, { "epoch": 0.4972555445023114, "grad_norm": 1.3203125, "learning_rate": 0.00018141935695214007, "loss": 2.9434, "step": 10622 }, { "epoch": 0.4973023582421441, "grad_norm": 1.6328125, "learning_rate": 0.00018141596769745218, "loss": 2.6186, "step": 10623 }, { "epoch": 0.4973491719819767, "grad_norm": 1.8515625, "learning_rate": 0.00018141257816534407, "loss": 3.2484, "step": 10624 }, { "epoch": 0.4973959857218094, "grad_norm": 1.2578125, "learning_rate": 0.00018140918835582732, "loss": 3.0789, "step": 10625 }, { "epoch": 0.497442799461642, "grad_norm": 1.515625, "learning_rate": 0.00018140579826891347, "loss": 2.9675, "step": 10626 }, { "epoch": 0.4974896132014746, "grad_norm": 1.25, "learning_rate": 0.00018140240790461408, "loss": 2.6404, "step": 10627 }, { "epoch": 0.4975364269413073, "grad_norm": 1.4453125, "learning_rate": 0.00018139901726294065, "loss": 2.9796, "step": 10628 }, { "epoch": 0.4975832406811399, "grad_norm": 1.375, "learning_rate": 0.0001813956263439048, "loss": 2.9289, "step": 10629 }, { "epoch": 0.4976300544209726, "grad_norm": 1.5703125, "learning_rate": 0.00018139223514751805, "loss": 3.0756, "step": 10630 }, { "epoch": 0.4976768681608052, "grad_norm": 1.5625, "learning_rate": 0.00018138884367379195, "loss": 2.7944, "step": 10631 }, { "epoch": 0.4977236819006378, "grad_norm": 1.515625, "learning_rate": 0.00018138545192273808, "loss": 2.8534, "step": 10632 }, { "epoch": 0.4977704956404705, "grad_norm": 1.4609375, "learning_rate": 0.00018138205989436798, "loss": 3.1461, "step": 10633 }, { "epoch": 0.4978173093803031, "grad_norm": 1.4609375, "learning_rate": 0.00018137866758869325, "loss": 2.4888, "step": 10634 }, { "epoch": 0.4978641231201358, "grad_norm": 1.4375, "learning_rate": 0.00018137527500572536, "loss": 3.1824, "step": 10635 }, { "epoch": 0.4979109368599684, "grad_norm": 1.4921875, "learning_rate": 0.000181371882145476, "loss": 3.0381, "step": 10636 }, { "epoch": 0.497957750599801, "grad_norm": 1.6953125, "learning_rate": 0.0001813684890079566, "loss": 2.8145, "step": 10637 }, { "epoch": 0.4980045643396337, "grad_norm": 1.796875, "learning_rate": 0.00018136509559317875, "loss": 2.8054, "step": 10638 }, { "epoch": 0.4980513780794663, "grad_norm": 0.9609375, "learning_rate": 0.0001813617019011541, "loss": 2.2351, "step": 10639 }, { "epoch": 0.498098191819299, "grad_norm": 1.5703125, "learning_rate": 0.00018135830793189413, "loss": 3.153, "step": 10640 }, { "epoch": 0.4981450055591316, "grad_norm": 1.2890625, "learning_rate": 0.00018135491368541042, "loss": 3.1277, "step": 10641 }, { "epoch": 0.4981918192989642, "grad_norm": 1.3515625, "learning_rate": 0.00018135151916171455, "loss": 3.2773, "step": 10642 }, { "epoch": 0.4982386330387969, "grad_norm": 1.21875, "learning_rate": 0.00018134812436081807, "loss": 3.0045, "step": 10643 }, { "epoch": 0.4982854467786295, "grad_norm": 1.671875, "learning_rate": 0.0001813447292827326, "loss": 3.031, "step": 10644 }, { "epoch": 0.4983322605184622, "grad_norm": 1.40625, "learning_rate": 0.00018134133392746964, "loss": 2.935, "step": 10645 }, { "epoch": 0.4983790742582948, "grad_norm": 1.4140625, "learning_rate": 0.0001813379382950408, "loss": 2.6989, "step": 10646 }, { "epoch": 0.4984258879981274, "grad_norm": 1.796875, "learning_rate": 0.0001813345423854576, "loss": 3.8301, "step": 10647 }, { "epoch": 0.4984727017379601, "grad_norm": 1.21875, "learning_rate": 0.00018133114619873166, "loss": 2.7616, "step": 10648 }, { "epoch": 0.4985195154777927, "grad_norm": 1.15625, "learning_rate": 0.00018132774973487456, "loss": 2.7848, "step": 10649 }, { "epoch": 0.4985663292176254, "grad_norm": 1.1484375, "learning_rate": 0.00018132435299389785, "loss": 2.9274, "step": 10650 }, { "epoch": 0.498613142957458, "grad_norm": 1.234375, "learning_rate": 0.00018132095597581308, "loss": 2.9378, "step": 10651 }, { "epoch": 0.49865995669729063, "grad_norm": 1.203125, "learning_rate": 0.0001813175586806319, "loss": 2.8737, "step": 10652 }, { "epoch": 0.4987067704371233, "grad_norm": 1.5, "learning_rate": 0.0001813141611083658, "loss": 2.9693, "step": 10653 }, { "epoch": 0.4987535841769559, "grad_norm": 1.09375, "learning_rate": 0.00018131076325902644, "loss": 2.8764, "step": 10654 }, { "epoch": 0.4988003979167886, "grad_norm": 1.4453125, "learning_rate": 0.0001813073651326253, "loss": 2.9971, "step": 10655 }, { "epoch": 0.4988472116566212, "grad_norm": 1.640625, "learning_rate": 0.00018130396672917404, "loss": 3.1297, "step": 10656 }, { "epoch": 0.49889402539645383, "grad_norm": 1.6953125, "learning_rate": 0.0001813005680486842, "loss": 2.59, "step": 10657 }, { "epoch": 0.4989408391362865, "grad_norm": 1.3125, "learning_rate": 0.0001812971690911674, "loss": 2.8815, "step": 10658 }, { "epoch": 0.4989876528761191, "grad_norm": 1.4609375, "learning_rate": 0.0001812937698566352, "loss": 3.1225, "step": 10659 }, { "epoch": 0.4990344666159518, "grad_norm": 1.359375, "learning_rate": 0.00018129037034509912, "loss": 2.9803, "step": 10660 }, { "epoch": 0.4990812803557844, "grad_norm": 1.34375, "learning_rate": 0.00018128697055657083, "loss": 3.0691, "step": 10661 }, { "epoch": 0.49912809409561704, "grad_norm": 1.140625, "learning_rate": 0.0001812835704910619, "loss": 4.1247, "step": 10662 }, { "epoch": 0.4991749078354497, "grad_norm": 1.2578125, "learning_rate": 0.00018128017014858391, "loss": 2.5383, "step": 10663 }, { "epoch": 0.49922172157528233, "grad_norm": 2.171875, "learning_rate": 0.00018127676952914843, "loss": 3.0701, "step": 10664 }, { "epoch": 0.499268535315115, "grad_norm": 1.546875, "learning_rate": 0.00018127336863276705, "loss": 2.0218, "step": 10665 }, { "epoch": 0.4993153490549476, "grad_norm": 1.15625, "learning_rate": 0.00018126996745945135, "loss": 3.0703, "step": 10666 }, { "epoch": 0.4993621627947803, "grad_norm": 1.3125, "learning_rate": 0.00018126656600921296, "loss": 2.7241, "step": 10667 }, { "epoch": 0.4994089765346129, "grad_norm": 1.4375, "learning_rate": 0.00018126316428206346, "loss": 2.6086, "step": 10668 }, { "epoch": 0.49945579027444553, "grad_norm": 2.09375, "learning_rate": 0.00018125976227801439, "loss": 2.4437, "step": 10669 }, { "epoch": 0.4995026040142782, "grad_norm": 3.40625, "learning_rate": 0.0001812563599970774, "loss": 2.3272, "step": 10670 }, { "epoch": 0.4995494177541108, "grad_norm": 1.3515625, "learning_rate": 0.00018125295743926404, "loss": 3.0979, "step": 10671 }, { "epoch": 0.4995962314939435, "grad_norm": 1.2421875, "learning_rate": 0.00018124955460458591, "loss": 3.0463, "step": 10672 }, { "epoch": 0.4996430452337761, "grad_norm": 1.4375, "learning_rate": 0.00018124615149305465, "loss": 3.4484, "step": 10673 }, { "epoch": 0.49968985897360874, "grad_norm": 1.1796875, "learning_rate": 0.0001812427481046818, "loss": 3.1195, "step": 10674 }, { "epoch": 0.4997366727134414, "grad_norm": 2.015625, "learning_rate": 0.000181239344439479, "loss": 2.9887, "step": 10675 }, { "epoch": 0.49978348645327403, "grad_norm": 1.5546875, "learning_rate": 0.00018123594049745782, "loss": 2.9473, "step": 10676 }, { "epoch": 0.4998303001931067, "grad_norm": 1.46875, "learning_rate": 0.00018123253627862988, "loss": 2.9395, "step": 10677 }, { "epoch": 0.4998771139329393, "grad_norm": 1.6015625, "learning_rate": 0.00018122913178300677, "loss": 2.9563, "step": 10678 }, { "epoch": 0.49992392767277194, "grad_norm": 2.296875, "learning_rate": 0.0001812257270106001, "loss": 3.1384, "step": 10679 }, { "epoch": 0.4999707414126046, "grad_norm": 1.4765625, "learning_rate": 0.00018122232196142143, "loss": 3.2357, "step": 10680 }, { "epoch": 0.5000175551524373, "grad_norm": 1.25, "learning_rate": 0.00018121891663548241, "loss": 2.7576, "step": 10681 }, { "epoch": 0.5000643688922699, "grad_norm": 1.1875, "learning_rate": 0.00018121551103279462, "loss": 2.702, "step": 10682 }, { "epoch": 0.5001111826321025, "grad_norm": 1.546875, "learning_rate": 0.00018121210515336967, "loss": 2.5943, "step": 10683 }, { "epoch": 0.5001579963719351, "grad_norm": 1.0703125, "learning_rate": 0.00018120869899721916, "loss": 2.8096, "step": 10684 }, { "epoch": 0.5002048101117678, "grad_norm": 1.203125, "learning_rate": 0.00018120529256435472, "loss": 2.7864, "step": 10685 }, { "epoch": 0.5002516238516005, "grad_norm": 1.4140625, "learning_rate": 0.0001812018858547879, "loss": 3.2158, "step": 10686 }, { "epoch": 0.5002984375914331, "grad_norm": 1.4921875, "learning_rate": 0.0001811984788685304, "loss": 2.6194, "step": 10687 }, { "epoch": 0.5003452513312657, "grad_norm": 1.234375, "learning_rate": 0.00018119507160559376, "loss": 2.3364, "step": 10688 }, { "epoch": 0.5003920650710983, "grad_norm": 1.3203125, "learning_rate": 0.0001811916640659896, "loss": 2.8234, "step": 10689 }, { "epoch": 0.500438878810931, "grad_norm": 1.1171875, "learning_rate": 0.00018118825624972954, "loss": 2.7923, "step": 10690 }, { "epoch": 0.5004856925507637, "grad_norm": 1.15625, "learning_rate": 0.00018118484815682516, "loss": 2.5851, "step": 10691 }, { "epoch": 0.5005325062905963, "grad_norm": 1.7890625, "learning_rate": 0.00018118143978728814, "loss": 2.8374, "step": 10692 }, { "epoch": 0.5005793200304289, "grad_norm": 1.3046875, "learning_rate": 0.00018117803114113006, "loss": 3.3329, "step": 10693 }, { "epoch": 0.5006261337702615, "grad_norm": 1.5625, "learning_rate": 0.0001811746222183625, "loss": 2.8762, "step": 10694 }, { "epoch": 0.5006729475100942, "grad_norm": 1.328125, "learning_rate": 0.00018117121301899713, "loss": 3.2125, "step": 10695 }, { "epoch": 0.5007197612499269, "grad_norm": 1.2109375, "learning_rate": 0.00018116780354304553, "loss": 2.7434, "step": 10696 }, { "epoch": 0.5007665749897595, "grad_norm": 1.6171875, "learning_rate": 0.00018116439379051936, "loss": 3.1224, "step": 10697 }, { "epoch": 0.5008133887295921, "grad_norm": 1.8984375, "learning_rate": 0.00018116098376143018, "loss": 2.7858, "step": 10698 }, { "epoch": 0.5008602024694248, "grad_norm": 1.265625, "learning_rate": 0.00018115757345578965, "loss": 2.6082, "step": 10699 }, { "epoch": 0.5009070162092574, "grad_norm": 1.3203125, "learning_rate": 0.00018115416287360937, "loss": 3.0356, "step": 10700 }, { "epoch": 0.5009538299490901, "grad_norm": 1.2109375, "learning_rate": 0.000181150752014901, "loss": 4.8578, "step": 10701 }, { "epoch": 0.5010006436889227, "grad_norm": 1.1171875, "learning_rate": 0.0001811473408796761, "loss": 2.6834, "step": 10702 }, { "epoch": 0.5010474574287553, "grad_norm": 1.3046875, "learning_rate": 0.00018114392946794636, "loss": 2.6113, "step": 10703 }, { "epoch": 0.501094271168588, "grad_norm": 1.4140625, "learning_rate": 0.00018114051777972333, "loss": 2.826, "step": 10704 }, { "epoch": 0.5011410849084206, "grad_norm": 1.0546875, "learning_rate": 0.0001811371058150187, "loss": 2.7023, "step": 10705 }, { "epoch": 0.5011878986482533, "grad_norm": 1.3984375, "learning_rate": 0.00018113369357384407, "loss": 2.9598, "step": 10706 }, { "epoch": 0.5012347123880859, "grad_norm": 1.234375, "learning_rate": 0.00018113028105621107, "loss": 2.9495, "step": 10707 }, { "epoch": 0.5012815261279185, "grad_norm": 1.6640625, "learning_rate": 0.0001811268682621313, "loss": 2.7173, "step": 10708 }, { "epoch": 0.5013283398677512, "grad_norm": 1.375, "learning_rate": 0.00018112345519161646, "loss": 2.9292, "step": 10709 }, { "epoch": 0.5013751536075838, "grad_norm": 1.1796875, "learning_rate": 0.00018112004184467811, "loss": 2.8025, "step": 10710 }, { "epoch": 0.5014219673474165, "grad_norm": 1.3203125, "learning_rate": 0.00018111662822132792, "loss": 2.9175, "step": 10711 }, { "epoch": 0.5014687810872491, "grad_norm": 1.15625, "learning_rate": 0.00018111321432157752, "loss": 4.1475, "step": 10712 }, { "epoch": 0.5015155948270817, "grad_norm": 1.2734375, "learning_rate": 0.00018110980014543848, "loss": 2.5734, "step": 10713 }, { "epoch": 0.5015624085669144, "grad_norm": 1.328125, "learning_rate": 0.0001811063856929225, "loss": 3.2413, "step": 10714 }, { "epoch": 0.501609222306747, "grad_norm": 2.015625, "learning_rate": 0.00018110297096404123, "loss": 3.0207, "step": 10715 }, { "epoch": 0.5016560360465797, "grad_norm": 1.3515625, "learning_rate": 0.00018109955595880625, "loss": 3.3791, "step": 10716 }, { "epoch": 0.5017028497864123, "grad_norm": 1.5703125, "learning_rate": 0.00018109614067722923, "loss": 3.0621, "step": 10717 }, { "epoch": 0.501749663526245, "grad_norm": 0.98828125, "learning_rate": 0.00018109272511932179, "loss": 2.505, "step": 10718 }, { "epoch": 0.5017964772660776, "grad_norm": 1.296875, "learning_rate": 0.0001810893092850956, "loss": 2.6361, "step": 10719 }, { "epoch": 0.5018432910059102, "grad_norm": 1.4453125, "learning_rate": 0.00018108589317456227, "loss": 3.1109, "step": 10720 }, { "epoch": 0.5018901047457429, "grad_norm": 1.3046875, "learning_rate": 0.00018108247678773342, "loss": 2.8982, "step": 10721 }, { "epoch": 0.5019369184855755, "grad_norm": 1.1640625, "learning_rate": 0.00018107906012462073, "loss": 2.964, "step": 10722 }, { "epoch": 0.5019837322254082, "grad_norm": 1.4921875, "learning_rate": 0.00018107564318523584, "loss": 2.8976, "step": 10723 }, { "epoch": 0.5020305459652408, "grad_norm": 1.40625, "learning_rate": 0.00018107222596959037, "loss": 2.8239, "step": 10724 }, { "epoch": 0.5020773597050734, "grad_norm": 1.2890625, "learning_rate": 0.00018106880847769597, "loss": 3.1807, "step": 10725 }, { "epoch": 0.5021241734449061, "grad_norm": 1.8125, "learning_rate": 0.00018106539070956429, "loss": 3.5719, "step": 10726 }, { "epoch": 0.5021709871847387, "grad_norm": 1.421875, "learning_rate": 0.00018106197266520702, "loss": 2.7545, "step": 10727 }, { "epoch": 0.5022178009245714, "grad_norm": 2.28125, "learning_rate": 0.0001810585543446357, "loss": 3.1503, "step": 10728 }, { "epoch": 0.502264614664404, "grad_norm": 1.2265625, "learning_rate": 0.00018105513574786207, "loss": 2.9082, "step": 10729 }, { "epoch": 0.5023114284042366, "grad_norm": 1.328125, "learning_rate": 0.00018105171687489777, "loss": 2.7597, "step": 10730 }, { "epoch": 0.5023582421440693, "grad_norm": 1.078125, "learning_rate": 0.00018104829772575443, "loss": 2.6827, "step": 10731 }, { "epoch": 0.5024050558839019, "grad_norm": 1.8203125, "learning_rate": 0.00018104487830044366, "loss": 2.1973, "step": 10732 }, { "epoch": 0.5024518696237346, "grad_norm": 1.390625, "learning_rate": 0.00018104145859897716, "loss": 2.7615, "step": 10733 }, { "epoch": 0.5024986833635672, "grad_norm": 1.2734375, "learning_rate": 0.00018103803862136659, "loss": 2.908, "step": 10734 }, { "epoch": 0.5025454971033998, "grad_norm": 1.46875, "learning_rate": 0.0001810346183676236, "loss": 2.5884, "step": 10735 }, { "epoch": 0.5025923108432325, "grad_norm": 1.6328125, "learning_rate": 0.0001810311978377598, "loss": 3.1708, "step": 10736 }, { "epoch": 0.5026391245830651, "grad_norm": 1.828125, "learning_rate": 0.00018102777703178692, "loss": 2.6283, "step": 10737 }, { "epoch": 0.5026859383228978, "grad_norm": 1.28125, "learning_rate": 0.00018102435594971654, "loss": 2.5402, "step": 10738 }, { "epoch": 0.5027327520627304, "grad_norm": 1.078125, "learning_rate": 0.00018102093459156036, "loss": 2.6546, "step": 10739 }, { "epoch": 0.502779565802563, "grad_norm": 1.53125, "learning_rate": 0.00018101751295733002, "loss": 3.0576, "step": 10740 }, { "epoch": 0.5028263795423957, "grad_norm": 1.34375, "learning_rate": 0.00018101409104703722, "loss": 2.7366, "step": 10741 }, { "epoch": 0.5028731932822283, "grad_norm": 1.5078125, "learning_rate": 0.00018101066886069356, "loss": 3.0185, "step": 10742 }, { "epoch": 0.502920007022061, "grad_norm": 1.34375, "learning_rate": 0.00018100724639831073, "loss": 2.967, "step": 10743 }, { "epoch": 0.5029668207618936, "grad_norm": 1.6796875, "learning_rate": 0.0001810038236599004, "loss": 2.9445, "step": 10744 }, { "epoch": 0.5030136345017262, "grad_norm": 1.8125, "learning_rate": 0.00018100040064547422, "loss": 2.6932, "step": 10745 }, { "epoch": 0.5030604482415589, "grad_norm": 1.4296875, "learning_rate": 0.00018099697735504385, "loss": 3.2455, "step": 10746 }, { "epoch": 0.5031072619813916, "grad_norm": 1.1796875, "learning_rate": 0.00018099355378862095, "loss": 2.7566, "step": 10747 }, { "epoch": 0.5031540757212242, "grad_norm": 1.2578125, "learning_rate": 0.0001809901299462172, "loss": 3.0569, "step": 10748 }, { "epoch": 0.5032008894610568, "grad_norm": 1.4609375, "learning_rate": 0.00018098670582784428, "loss": 3.1541, "step": 10749 }, { "epoch": 0.5032477032008894, "grad_norm": 1.328125, "learning_rate": 0.00018098328143351385, "loss": 2.8816, "step": 10750 }, { "epoch": 0.5032945169407221, "grad_norm": 1.2109375, "learning_rate": 0.00018097985676323756, "loss": 2.5615, "step": 10751 }, { "epoch": 0.5033413306805548, "grad_norm": 1.828125, "learning_rate": 0.00018097643181702705, "loss": 2.6785, "step": 10752 }, { "epoch": 0.5033881444203874, "grad_norm": 1.515625, "learning_rate": 0.00018097300659489408, "loss": 2.7175, "step": 10753 }, { "epoch": 0.50343495816022, "grad_norm": 2.09375, "learning_rate": 0.00018096958109685028, "loss": 3.1539, "step": 10754 }, { "epoch": 0.5034817719000526, "grad_norm": 1.4453125, "learning_rate": 0.00018096615532290725, "loss": 2.8056, "step": 10755 }, { "epoch": 0.5035285856398853, "grad_norm": 1.203125, "learning_rate": 0.00018096272927307678, "loss": 3.0672, "step": 10756 }, { "epoch": 0.503575399379718, "grad_norm": 2.515625, "learning_rate": 0.0001809593029473705, "loss": 3.2433, "step": 10757 }, { "epoch": 0.5036222131195506, "grad_norm": 1.2734375, "learning_rate": 0.00018095587634580002, "loss": 2.6762, "step": 10758 }, { "epoch": 0.5036690268593832, "grad_norm": 1.9453125, "learning_rate": 0.0001809524494683771, "loss": 2.9717, "step": 10759 }, { "epoch": 0.5037158405992159, "grad_norm": 1.3125, "learning_rate": 0.0001809490223151134, "loss": 3.2417, "step": 10760 }, { "epoch": 0.5037626543390485, "grad_norm": 1.25, "learning_rate": 0.00018094559488602055, "loss": 3.1402, "step": 10761 }, { "epoch": 0.5038094680788812, "grad_norm": 1.1640625, "learning_rate": 0.00018094216718111028, "loss": 2.9298, "step": 10762 }, { "epoch": 0.5038562818187138, "grad_norm": 1.328125, "learning_rate": 0.0001809387392003943, "loss": 2.8803, "step": 10763 }, { "epoch": 0.5039030955585464, "grad_norm": 1.5546875, "learning_rate": 0.00018093531094388418, "loss": 2.62, "step": 10764 }, { "epoch": 0.5039499092983791, "grad_norm": 1.9296875, "learning_rate": 0.0001809318824115917, "loss": 3.3764, "step": 10765 }, { "epoch": 0.5039967230382117, "grad_norm": 1.3515625, "learning_rate": 0.0001809284536035285, "loss": 2.7309, "step": 10766 }, { "epoch": 0.5040435367780444, "grad_norm": 1.1171875, "learning_rate": 0.00018092502451970625, "loss": 2.5194, "step": 10767 }, { "epoch": 0.504090350517877, "grad_norm": 1.4375, "learning_rate": 0.00018092159516013668, "loss": 3.1647, "step": 10768 }, { "epoch": 0.5041371642577096, "grad_norm": 1.296875, "learning_rate": 0.00018091816552483146, "loss": 2.9343, "step": 10769 }, { "epoch": 0.5041839779975423, "grad_norm": 1.1796875, "learning_rate": 0.00018091473561380225, "loss": 3.0099, "step": 10770 }, { "epoch": 0.504230791737375, "grad_norm": 1.1328125, "learning_rate": 0.00018091130542706078, "loss": 2.6004, "step": 10771 }, { "epoch": 0.5042776054772076, "grad_norm": 2.375, "learning_rate": 0.0001809078749646187, "loss": 2.8111, "step": 10772 }, { "epoch": 0.5043244192170402, "grad_norm": 1.3359375, "learning_rate": 0.0001809044442264877, "loss": 2.8137, "step": 10773 }, { "epoch": 0.5043712329568728, "grad_norm": 1.0546875, "learning_rate": 0.00018090101321267948, "loss": 3.6845, "step": 10774 }, { "epoch": 0.5044180466967055, "grad_norm": 1.1953125, "learning_rate": 0.00018089758192320576, "loss": 3.0263, "step": 10775 }, { "epoch": 0.5044648604365382, "grad_norm": 1.3515625, "learning_rate": 0.0001808941503580782, "loss": 2.8687, "step": 10776 }, { "epoch": 0.5045116741763708, "grad_norm": 2.390625, "learning_rate": 0.0001808907185173085, "loss": 3.691, "step": 10777 }, { "epoch": 0.5045584879162034, "grad_norm": 1.34375, "learning_rate": 0.00018088728640090832, "loss": 2.991, "step": 10778 }, { "epoch": 0.504605301656036, "grad_norm": 1.1171875, "learning_rate": 0.00018088385400888944, "loss": 2.96, "step": 10779 }, { "epoch": 0.5046521153958687, "grad_norm": 1.453125, "learning_rate": 0.00018088042134126346, "loss": 3.1837, "step": 10780 }, { "epoch": 0.5046989291357014, "grad_norm": 1.015625, "learning_rate": 0.00018087698839804213, "loss": 3.4285, "step": 10781 }, { "epoch": 0.504745742875534, "grad_norm": 1.4453125, "learning_rate": 0.00018087355517923715, "loss": 2.9609, "step": 10782 }, { "epoch": 0.5047925566153666, "grad_norm": 1.5546875, "learning_rate": 0.00018087012168486018, "loss": 2.9627, "step": 10783 }, { "epoch": 0.5048393703551992, "grad_norm": 2.0, "learning_rate": 0.00018086668791492296, "loss": 3.4706, "step": 10784 }, { "epoch": 0.5048861840950319, "grad_norm": 1.0703125, "learning_rate": 0.00018086325386943716, "loss": 2.1703, "step": 10785 }, { "epoch": 0.5049329978348646, "grad_norm": 1.328125, "learning_rate": 0.00018085981954841452, "loss": 2.6108, "step": 10786 }, { "epoch": 0.5049798115746972, "grad_norm": 1.25, "learning_rate": 0.0001808563849518667, "loss": 2.6973, "step": 10787 }, { "epoch": 0.5050266253145298, "grad_norm": 1.15625, "learning_rate": 0.00018085295007980547, "loss": 2.3721, "step": 10788 }, { "epoch": 0.5050734390543624, "grad_norm": 1.5390625, "learning_rate": 0.00018084951493224245, "loss": 2.7011, "step": 10789 }, { "epoch": 0.5051202527941951, "grad_norm": 1.1171875, "learning_rate": 0.00018084607950918938, "loss": 2.7907, "step": 10790 }, { "epoch": 0.5051670665340278, "grad_norm": 1.4296875, "learning_rate": 0.00018084264381065793, "loss": 2.6131, "step": 10791 }, { "epoch": 0.5052138802738604, "grad_norm": 1.5, "learning_rate": 0.0001808392078366599, "loss": 2.5844, "step": 10792 }, { "epoch": 0.505260694013693, "grad_norm": 2.15625, "learning_rate": 0.00018083577158720693, "loss": 3.1979, "step": 10793 }, { "epoch": 0.5053075077535256, "grad_norm": 1.328125, "learning_rate": 0.0001808323350623107, "loss": 2.9302, "step": 10794 }, { "epoch": 0.5053543214933583, "grad_norm": 1.140625, "learning_rate": 0.000180828898261983, "loss": 2.7493, "step": 10795 }, { "epoch": 0.505401135233191, "grad_norm": 1.515625, "learning_rate": 0.00018082546118623554, "loss": 2.6501, "step": 10796 }, { "epoch": 0.5054479489730236, "grad_norm": 1.5078125, "learning_rate": 0.00018082202383507993, "loss": 3.2861, "step": 10797 }, { "epoch": 0.5054947627128562, "grad_norm": 1.484375, "learning_rate": 0.00018081858620852797, "loss": 3.2333, "step": 10798 }, { "epoch": 0.5055415764526888, "grad_norm": 1.453125, "learning_rate": 0.00018081514830659134, "loss": 2.6834, "step": 10799 }, { "epoch": 0.5055883901925216, "grad_norm": 2.140625, "learning_rate": 0.00018081171012928175, "loss": 2.6047, "step": 10800 }, { "epoch": 0.5056352039323542, "grad_norm": 1.6484375, "learning_rate": 0.00018080827167661097, "loss": 3.3383, "step": 10801 }, { "epoch": 0.5056820176721868, "grad_norm": 1.7421875, "learning_rate": 0.00018080483294859065, "loss": 3.0358, "step": 10802 }, { "epoch": 0.5057288314120194, "grad_norm": 1.59375, "learning_rate": 0.00018080139394523252, "loss": 3.2427, "step": 10803 }, { "epoch": 0.505775645151852, "grad_norm": 2.328125, "learning_rate": 0.00018079795466654833, "loss": 2.708, "step": 10804 }, { "epoch": 0.5058224588916848, "grad_norm": 1.8515625, "learning_rate": 0.00018079451511254977, "loss": 2.6852, "step": 10805 }, { "epoch": 0.5058692726315174, "grad_norm": 1.359375, "learning_rate": 0.0001807910752832486, "loss": 2.4426, "step": 10806 }, { "epoch": 0.50591608637135, "grad_norm": 1.34375, "learning_rate": 0.00018078763517865646, "loss": 2.8934, "step": 10807 }, { "epoch": 0.5059629001111826, "grad_norm": 1.5625, "learning_rate": 0.00018078419479878517, "loss": 3.03, "step": 10808 }, { "epoch": 0.5060097138510152, "grad_norm": 1.8828125, "learning_rate": 0.00018078075414364636, "loss": 2.1012, "step": 10809 }, { "epoch": 0.506056527590848, "grad_norm": 1.390625, "learning_rate": 0.00018077731321325185, "loss": 2.6299, "step": 10810 }, { "epoch": 0.5061033413306806, "grad_norm": 1.9453125, "learning_rate": 0.00018077387200761326, "loss": 2.9516, "step": 10811 }, { "epoch": 0.5061501550705132, "grad_norm": 1.78125, "learning_rate": 0.0001807704305267424, "loss": 2.7142, "step": 10812 }, { "epoch": 0.5061969688103458, "grad_norm": 1.3203125, "learning_rate": 0.00018076698877065098, "loss": 2.9194, "step": 10813 }, { "epoch": 0.5062437825501784, "grad_norm": 1.3359375, "learning_rate": 0.0001807635467393507, "loss": 2.7969, "step": 10814 }, { "epoch": 0.5062905962900112, "grad_norm": 1.578125, "learning_rate": 0.00018076010443285329, "loss": 2.9261, "step": 10815 }, { "epoch": 0.5063374100298438, "grad_norm": 1.640625, "learning_rate": 0.0001807566618511705, "loss": 2.986, "step": 10816 }, { "epoch": 0.5063842237696764, "grad_norm": 1.953125, "learning_rate": 0.00018075321899431405, "loss": 3.0022, "step": 10817 }, { "epoch": 0.506431037509509, "grad_norm": 1.2578125, "learning_rate": 0.0001807497758622957, "loss": 2.862, "step": 10818 }, { "epoch": 0.5064778512493416, "grad_norm": 2.140625, "learning_rate": 0.00018074633245512714, "loss": 3.2431, "step": 10819 }, { "epoch": 0.5065246649891744, "grad_norm": 1.5, "learning_rate": 0.0001807428887728201, "loss": 3.1911, "step": 10820 }, { "epoch": 0.506571478729007, "grad_norm": 1.5, "learning_rate": 0.00018073944481538637, "loss": 3.2588, "step": 10821 }, { "epoch": 0.5066182924688396, "grad_norm": 1.2578125, "learning_rate": 0.00018073600058283762, "loss": 3.0708, "step": 10822 }, { "epoch": 0.5066651062086722, "grad_norm": 1.3515625, "learning_rate": 0.00018073255607518562, "loss": 2.7705, "step": 10823 }, { "epoch": 0.5067119199485048, "grad_norm": 1.09375, "learning_rate": 0.0001807291112924421, "loss": 3.0398, "step": 10824 }, { "epoch": 0.5067587336883376, "grad_norm": 1.2734375, "learning_rate": 0.0001807256662346188, "loss": 2.8981, "step": 10825 }, { "epoch": 0.5068055474281702, "grad_norm": 1.3125, "learning_rate": 0.0001807222209017275, "loss": 3.1213, "step": 10826 }, { "epoch": 0.5068523611680028, "grad_norm": 1.359375, "learning_rate": 0.00018071877529377982, "loss": 2.8561, "step": 10827 }, { "epoch": 0.5068991749078354, "grad_norm": 1.3671875, "learning_rate": 0.00018071532941078765, "loss": 3.0348, "step": 10828 }, { "epoch": 0.506945988647668, "grad_norm": 1.0859375, "learning_rate": 0.0001807118832527626, "loss": 3.0175, "step": 10829 }, { "epoch": 0.5069928023875008, "grad_norm": 2.078125, "learning_rate": 0.00018070843681971654, "loss": 2.853, "step": 10830 }, { "epoch": 0.5070396161273334, "grad_norm": 2.953125, "learning_rate": 0.0001807049901116611, "loss": 3.3455, "step": 10831 }, { "epoch": 0.507086429867166, "grad_norm": 1.2421875, "learning_rate": 0.0001807015431286081, "loss": 2.9616, "step": 10832 }, { "epoch": 0.5071332436069986, "grad_norm": 1.1875, "learning_rate": 0.0001806980958705692, "loss": 2.1434, "step": 10833 }, { "epoch": 0.5071800573468312, "grad_norm": 1.5859375, "learning_rate": 0.00018069464833755626, "loss": 2.7168, "step": 10834 }, { "epoch": 0.507226871086664, "grad_norm": 1.4921875, "learning_rate": 0.00018069120052958095, "loss": 2.8282, "step": 10835 }, { "epoch": 0.5072736848264966, "grad_norm": 1.59375, "learning_rate": 0.00018068775244665505, "loss": 2.9995, "step": 10836 }, { "epoch": 0.5073204985663292, "grad_norm": 1.3359375, "learning_rate": 0.00018068430408879027, "loss": 2.3835, "step": 10837 }, { "epoch": 0.5073673123061618, "grad_norm": 1.6015625, "learning_rate": 0.00018068085545599842, "loss": 2.9958, "step": 10838 }, { "epoch": 0.5074141260459945, "grad_norm": 1.4296875, "learning_rate": 0.0001806774065482912, "loss": 3.1139, "step": 10839 }, { "epoch": 0.5074609397858272, "grad_norm": 1.5859375, "learning_rate": 0.00018067395736568037, "loss": 2.7791, "step": 10840 }, { "epoch": 0.5075077535256598, "grad_norm": 1.4921875, "learning_rate": 0.00018067050790817772, "loss": 2.9913, "step": 10841 }, { "epoch": 0.5075545672654924, "grad_norm": 1.328125, "learning_rate": 0.00018066705817579496, "loss": 3.1834, "step": 10842 }, { "epoch": 0.507601381005325, "grad_norm": 1.171875, "learning_rate": 0.0001806636081685439, "loss": 3.0991, "step": 10843 }, { "epoch": 0.5076481947451577, "grad_norm": 2.8125, "learning_rate": 0.0001806601578864362, "loss": 2.8335, "step": 10844 }, { "epoch": 0.5076950084849904, "grad_norm": 1.4375, "learning_rate": 0.0001806567073294837, "loss": 2.9215, "step": 10845 }, { "epoch": 0.507741822224823, "grad_norm": 1.265625, "learning_rate": 0.00018065325649769813, "loss": 3.0155, "step": 10846 }, { "epoch": 0.5077886359646556, "grad_norm": 1.3984375, "learning_rate": 0.00018064980539109126, "loss": 2.8682, "step": 10847 }, { "epoch": 0.5078354497044882, "grad_norm": 1.359375, "learning_rate": 0.0001806463540096748, "loss": 3.0058, "step": 10848 }, { "epoch": 0.5078822634443209, "grad_norm": 1.359375, "learning_rate": 0.00018064290235346063, "loss": 2.8014, "step": 10849 }, { "epoch": 0.5079290771841536, "grad_norm": 1.1953125, "learning_rate": 0.00018063945042246036, "loss": 2.7575, "step": 10850 }, { "epoch": 0.5079758909239862, "grad_norm": 3.09375, "learning_rate": 0.00018063599821668586, "loss": 2.9269, "step": 10851 }, { "epoch": 0.5080227046638188, "grad_norm": 1.8828125, "learning_rate": 0.00018063254573614886, "loss": 2.8082, "step": 10852 }, { "epoch": 0.5080695184036514, "grad_norm": 1.3671875, "learning_rate": 0.00018062909298086107, "loss": 3.0328, "step": 10853 }, { "epoch": 0.5081163321434841, "grad_norm": 1.3203125, "learning_rate": 0.00018062563995083436, "loss": 2.8737, "step": 10854 }, { "epoch": 0.5081631458833168, "grad_norm": 1.1796875, "learning_rate": 0.00018062218664608045, "loss": 2.6698, "step": 10855 }, { "epoch": 0.5082099596231494, "grad_norm": 1.125, "learning_rate": 0.00018061873306661106, "loss": 2.6872, "step": 10856 }, { "epoch": 0.508256773362982, "grad_norm": 1.4453125, "learning_rate": 0.000180615279212438, "loss": 2.8355, "step": 10857 }, { "epoch": 0.5083035871028146, "grad_norm": 1.2109375, "learning_rate": 0.00018061182508357303, "loss": 2.4725, "step": 10858 }, { "epoch": 0.5083504008426473, "grad_norm": 1.71875, "learning_rate": 0.00018060837068002795, "loss": 3.207, "step": 10859 }, { "epoch": 0.50839721458248, "grad_norm": 1.7421875, "learning_rate": 0.0001806049160018145, "loss": 3.1407, "step": 10860 }, { "epoch": 0.5084440283223126, "grad_norm": 1.2734375, "learning_rate": 0.00018060146104894445, "loss": 2.9754, "step": 10861 }, { "epoch": 0.5084908420621452, "grad_norm": 1.8515625, "learning_rate": 0.0001805980058214296, "loss": 2.7689, "step": 10862 }, { "epoch": 0.5085376558019779, "grad_norm": 1.3359375, "learning_rate": 0.00018059455031928167, "loss": 2.8598, "step": 10863 }, { "epoch": 0.5085844695418105, "grad_norm": 1.2265625, "learning_rate": 0.0001805910945425125, "loss": 2.7525, "step": 10864 }, { "epoch": 0.5086312832816432, "grad_norm": 1.234375, "learning_rate": 0.00018058763849113384, "loss": 2.9647, "step": 10865 }, { "epoch": 0.5086780970214758, "grad_norm": 1.359375, "learning_rate": 0.00018058418216515743, "loss": 3.4548, "step": 10866 }, { "epoch": 0.5087249107613084, "grad_norm": 1.3125, "learning_rate": 0.0001805807255645951, "loss": 2.8263, "step": 10867 }, { "epoch": 0.508771724501141, "grad_norm": 1.578125, "learning_rate": 0.00018057726868945858, "loss": 2.7419, "step": 10868 }, { "epoch": 0.5088185382409737, "grad_norm": 1.4765625, "learning_rate": 0.0001805738115397597, "loss": 2.986, "step": 10869 }, { "epoch": 0.5088653519808064, "grad_norm": 1.484375, "learning_rate": 0.0001805703541155102, "loss": 4.6337, "step": 10870 }, { "epoch": 0.508912165720639, "grad_norm": 1.8203125, "learning_rate": 0.0001805668964167219, "loss": 3.3171, "step": 10871 }, { "epoch": 0.5089589794604716, "grad_norm": 1.234375, "learning_rate": 0.0001805634384434065, "loss": 3.0344, "step": 10872 }, { "epoch": 0.5090057932003043, "grad_norm": 1.4765625, "learning_rate": 0.00018055998019557591, "loss": 3.0281, "step": 10873 }, { "epoch": 0.5090526069401369, "grad_norm": 1.1953125, "learning_rate": 0.0001805565216732418, "loss": 2.7783, "step": 10874 }, { "epoch": 0.5090994206799696, "grad_norm": 1.2265625, "learning_rate": 0.000180553062876416, "loss": 2.9573, "step": 10875 }, { "epoch": 0.5091462344198022, "grad_norm": 4.0, "learning_rate": 0.00018054960380511033, "loss": 3.7301, "step": 10876 }, { "epoch": 0.5091930481596348, "grad_norm": 1.1640625, "learning_rate": 0.00018054614445933652, "loss": 2.514, "step": 10877 }, { "epoch": 0.5092398618994675, "grad_norm": 1.1640625, "learning_rate": 0.00018054268483910636, "loss": 2.9779, "step": 10878 }, { "epoch": 0.5092866756393001, "grad_norm": 1.5625, "learning_rate": 0.00018053922494443167, "loss": 2.8718, "step": 10879 }, { "epoch": 0.5093334893791328, "grad_norm": 1.3671875, "learning_rate": 0.00018053576477532424, "loss": 3.2827, "step": 10880 }, { "epoch": 0.5093803031189654, "grad_norm": 1.1953125, "learning_rate": 0.00018053230433179583, "loss": 3.1376, "step": 10881 }, { "epoch": 0.509427116858798, "grad_norm": 1.390625, "learning_rate": 0.00018052884361385824, "loss": 3.1868, "step": 10882 }, { "epoch": 0.5094739305986307, "grad_norm": 1.3203125, "learning_rate": 0.0001805253826215233, "loss": 3.0048, "step": 10883 }, { "epoch": 0.5095207443384634, "grad_norm": 1.53125, "learning_rate": 0.00018052192135480276, "loss": 3.0081, "step": 10884 }, { "epoch": 0.509567558078296, "grad_norm": 1.703125, "learning_rate": 0.0001805184598137084, "loss": 3.0979, "step": 10885 }, { "epoch": 0.5096143718181286, "grad_norm": 1.78125, "learning_rate": 0.00018051499799825207, "loss": 2.7342, "step": 10886 }, { "epoch": 0.5096611855579612, "grad_norm": 1.140625, "learning_rate": 0.00018051153590844553, "loss": 3.1142, "step": 10887 }, { "epoch": 0.5097079992977939, "grad_norm": 1.0859375, "learning_rate": 0.0001805080735443006, "loss": 2.2173, "step": 10888 }, { "epoch": 0.5097548130376266, "grad_norm": 1.4765625, "learning_rate": 0.00018050461090582908, "loss": 2.9116, "step": 10889 }, { "epoch": 0.5098016267774592, "grad_norm": 1.7890625, "learning_rate": 0.0001805011479930427, "loss": 2.7936, "step": 10890 }, { "epoch": 0.5098484405172918, "grad_norm": 1.4921875, "learning_rate": 0.00018049768480595332, "loss": 2.7307, "step": 10891 }, { "epoch": 0.5098952542571245, "grad_norm": 1.15625, "learning_rate": 0.00018049422134457275, "loss": 2.8556, "step": 10892 }, { "epoch": 0.5099420679969571, "grad_norm": 1.359375, "learning_rate": 0.0001804907576089128, "loss": 2.7504, "step": 10893 }, { "epoch": 0.5099888817367898, "grad_norm": 1.234375, "learning_rate": 0.0001804872935989852, "loss": 2.7803, "step": 10894 }, { "epoch": 0.5100356954766224, "grad_norm": 2.53125, "learning_rate": 0.0001804838293148018, "loss": 3.0307, "step": 10895 }, { "epoch": 0.510082509216455, "grad_norm": 1.2578125, "learning_rate": 0.00018048036475637444, "loss": 2.9269, "step": 10896 }, { "epoch": 0.5101293229562877, "grad_norm": 1.21875, "learning_rate": 0.0001804768999237149, "loss": 2.8409, "step": 10897 }, { "epoch": 0.5101761366961203, "grad_norm": 1.390625, "learning_rate": 0.00018047343481683491, "loss": 2.9071, "step": 10898 }, { "epoch": 0.510222950435953, "grad_norm": 1.25, "learning_rate": 0.00018046996943574638, "loss": 2.4435, "step": 10899 }, { "epoch": 0.5102697641757856, "grad_norm": 1.2578125, "learning_rate": 0.0001804665037804611, "loss": 2.7749, "step": 10900 }, { "epoch": 0.5103165779156182, "grad_norm": 1.6484375, "learning_rate": 0.00018046303785099083, "loss": 2.8497, "step": 10901 }, { "epoch": 0.5103633916554509, "grad_norm": 1.1640625, "learning_rate": 0.00018045957164734738, "loss": 2.8283, "step": 10902 }, { "epoch": 0.5104102053952835, "grad_norm": 1.484375, "learning_rate": 0.00018045610516954265, "loss": 2.9727, "step": 10903 }, { "epoch": 0.5104570191351162, "grad_norm": 1.390625, "learning_rate": 0.00018045263841758838, "loss": 2.5562, "step": 10904 }, { "epoch": 0.5105038328749488, "grad_norm": 1.59375, "learning_rate": 0.00018044917139149638, "loss": 2.7189, "step": 10905 }, { "epoch": 0.5105506466147814, "grad_norm": 1.625, "learning_rate": 0.0001804457040912785, "loss": 2.8116, "step": 10906 }, { "epoch": 0.5105974603546141, "grad_norm": 1.234375, "learning_rate": 0.00018044223651694652, "loss": 2.7687, "step": 10907 }, { "epoch": 0.5106442740944467, "grad_norm": 1.78125, "learning_rate": 0.00018043876866851225, "loss": 3.0753, "step": 10908 }, { "epoch": 0.5106910878342794, "grad_norm": 1.3984375, "learning_rate": 0.00018043530054598754, "loss": 2.8602, "step": 10909 }, { "epoch": 0.510737901574112, "grad_norm": 2.359375, "learning_rate": 0.0001804318321493842, "loss": 2.6982, "step": 10910 }, { "epoch": 0.5107847153139446, "grad_norm": 2.4375, "learning_rate": 0.000180428363478714, "loss": 3.4688, "step": 10911 }, { "epoch": 0.5108315290537773, "grad_norm": 1.9609375, "learning_rate": 0.00018042489453398884, "loss": 2.9789, "step": 10912 }, { "epoch": 0.5108783427936099, "grad_norm": 1.4296875, "learning_rate": 0.00018042142531522051, "loss": 2.9217, "step": 10913 }, { "epoch": 0.5109251565334426, "grad_norm": 1.7734375, "learning_rate": 0.00018041795582242082, "loss": 2.9425, "step": 10914 }, { "epoch": 0.5109719702732752, "grad_norm": 1.2421875, "learning_rate": 0.00018041448605560158, "loss": 2.8175, "step": 10915 }, { "epoch": 0.5110187840131079, "grad_norm": 1.421875, "learning_rate": 0.00018041101601477463, "loss": 3.1249, "step": 10916 }, { "epoch": 0.5110655977529405, "grad_norm": 2.125, "learning_rate": 0.0001804075456999518, "loss": 3.1789, "step": 10917 }, { "epoch": 0.5111124114927731, "grad_norm": 1.53125, "learning_rate": 0.00018040407511114487, "loss": 2.8069, "step": 10918 }, { "epoch": 0.5111592252326058, "grad_norm": 1.421875, "learning_rate": 0.00018040060424836575, "loss": 2.8262, "step": 10919 }, { "epoch": 0.5112060389724384, "grad_norm": 1.90625, "learning_rate": 0.0001803971331116262, "loss": 3.1411, "step": 10920 }, { "epoch": 0.511252852712271, "grad_norm": 1.3515625, "learning_rate": 0.00018039366170093804, "loss": 3.1232, "step": 10921 }, { "epoch": 0.5112996664521037, "grad_norm": 1.40625, "learning_rate": 0.00018039019001631315, "loss": 3.0257, "step": 10922 }, { "epoch": 0.5113464801919363, "grad_norm": 1.4140625, "learning_rate": 0.00018038671805776333, "loss": 2.266, "step": 10923 }, { "epoch": 0.511393293931769, "grad_norm": 1.15625, "learning_rate": 0.0001803832458253004, "loss": 2.8473, "step": 10924 }, { "epoch": 0.5114401076716016, "grad_norm": 1.9375, "learning_rate": 0.00018037977331893625, "loss": 3.147, "step": 10925 }, { "epoch": 0.5114869214114343, "grad_norm": 1.2421875, "learning_rate": 0.0001803763005386826, "loss": 2.8358, "step": 10926 }, { "epoch": 0.5115337351512669, "grad_norm": 1.2734375, "learning_rate": 0.00018037282748455143, "loss": 3.0323, "step": 10927 }, { "epoch": 0.5115805488910995, "grad_norm": 1.6875, "learning_rate": 0.00018036935415655445, "loss": 3.1872, "step": 10928 }, { "epoch": 0.5116273626309322, "grad_norm": 1.578125, "learning_rate": 0.00018036588055470355, "loss": 3.2206, "step": 10929 }, { "epoch": 0.5116741763707648, "grad_norm": 1.296875, "learning_rate": 0.00018036240667901055, "loss": 2.915, "step": 10930 }, { "epoch": 0.5117209901105975, "grad_norm": 1.265625, "learning_rate": 0.0001803589325294873, "loss": 2.3085, "step": 10931 }, { "epoch": 0.5117678038504301, "grad_norm": 1.2265625, "learning_rate": 0.00018035545810614565, "loss": 3.1729, "step": 10932 }, { "epoch": 0.5118146175902627, "grad_norm": 1.375, "learning_rate": 0.0001803519834089974, "loss": 3.1916, "step": 10933 }, { "epoch": 0.5118614313300954, "grad_norm": 1.421875, "learning_rate": 0.00018034850843805442, "loss": 3.0839, "step": 10934 }, { "epoch": 0.511908245069928, "grad_norm": 1.3203125, "learning_rate": 0.00018034503319332853, "loss": 2.9405, "step": 10935 }, { "epoch": 0.5119550588097607, "grad_norm": 1.734375, "learning_rate": 0.0001803415576748316, "loss": 2.9647, "step": 10936 }, { "epoch": 0.5120018725495933, "grad_norm": 1.3046875, "learning_rate": 0.00018033808188257545, "loss": 2.7564, "step": 10937 }, { "epoch": 0.5120486862894259, "grad_norm": 1.6640625, "learning_rate": 0.00018033460581657191, "loss": 2.6996, "step": 10938 }, { "epoch": 0.5120955000292586, "grad_norm": 1.3203125, "learning_rate": 0.00018033112947683287, "loss": 2.7309, "step": 10939 }, { "epoch": 0.5121423137690913, "grad_norm": 1.34375, "learning_rate": 0.00018032765286337015, "loss": 3.0179, "step": 10940 }, { "epoch": 0.5121891275089239, "grad_norm": 1.0078125, "learning_rate": 0.00018032417597619557, "loss": 2.3111, "step": 10941 }, { "epoch": 0.5122359412487565, "grad_norm": 1.8046875, "learning_rate": 0.00018032069881532102, "loss": 2.0414, "step": 10942 }, { "epoch": 0.5122827549885891, "grad_norm": 1.6015625, "learning_rate": 0.00018031722138075833, "loss": 2.3304, "step": 10943 }, { "epoch": 0.5123295687284218, "grad_norm": 1.3359375, "learning_rate": 0.00018031374367251935, "loss": 3.5735, "step": 10944 }, { "epoch": 0.5123763824682545, "grad_norm": 1.25, "learning_rate": 0.00018031026569061592, "loss": 2.9127, "step": 10945 }, { "epoch": 0.5124231962080871, "grad_norm": 1.34375, "learning_rate": 0.00018030678743505995, "loss": 2.8832, "step": 10946 }, { "epoch": 0.5124700099479197, "grad_norm": 2.28125, "learning_rate": 0.0001803033089058632, "loss": 3.0527, "step": 10947 }, { "epoch": 0.5125168236877523, "grad_norm": 1.1640625, "learning_rate": 0.00018029983010303758, "loss": 2.2871, "step": 10948 }, { "epoch": 0.512563637427585, "grad_norm": 2.375, "learning_rate": 0.00018029635102659493, "loss": 2.3776, "step": 10949 }, { "epoch": 0.5126104511674177, "grad_norm": 1.3671875, "learning_rate": 0.0001802928716765471, "loss": 3.0374, "step": 10950 }, { "epoch": 0.5126572649072503, "grad_norm": 1.265625, "learning_rate": 0.00018028939205290594, "loss": 2.9235, "step": 10951 }, { "epoch": 0.5127040786470829, "grad_norm": 1.3828125, "learning_rate": 0.00018028591215568334, "loss": 2.7465, "step": 10952 }, { "epoch": 0.5127508923869155, "grad_norm": 1.515625, "learning_rate": 0.00018028243198489112, "loss": 2.8281, "step": 10953 }, { "epoch": 0.5127977061267482, "grad_norm": 1.65625, "learning_rate": 0.00018027895154054116, "loss": 2.8867, "step": 10954 }, { "epoch": 0.5128445198665809, "grad_norm": 1.4765625, "learning_rate": 0.0001802754708226453, "loss": 3.1599, "step": 10955 }, { "epoch": 0.5128913336064135, "grad_norm": 1.4921875, "learning_rate": 0.00018027198983121541, "loss": 3.2709, "step": 10956 }, { "epoch": 0.5129381473462461, "grad_norm": 1.734375, "learning_rate": 0.00018026850856626338, "loss": 3.0039, "step": 10957 }, { "epoch": 0.5129849610860787, "grad_norm": 1.3125, "learning_rate": 0.00018026502702780107, "loss": 3.0448, "step": 10958 }, { "epoch": 0.5130317748259114, "grad_norm": 1.3515625, "learning_rate": 0.00018026154521584028, "loss": 2.9044, "step": 10959 }, { "epoch": 0.5130785885657441, "grad_norm": 1.4609375, "learning_rate": 0.0001802580631303929, "loss": 3.0092, "step": 10960 }, { "epoch": 0.5131254023055767, "grad_norm": 1.8515625, "learning_rate": 0.00018025458077147084, "loss": 3.1563, "step": 10961 }, { "epoch": 0.5131722160454093, "grad_norm": 1.671875, "learning_rate": 0.00018025109813908592, "loss": 3.5424, "step": 10962 }, { "epoch": 0.5132190297852419, "grad_norm": 1.640625, "learning_rate": 0.00018024761523325003, "loss": 3.4576, "step": 10963 }, { "epoch": 0.5132658435250746, "grad_norm": 1.1875, "learning_rate": 0.00018024413205397505, "loss": 2.7461, "step": 10964 }, { "epoch": 0.5133126572649073, "grad_norm": 0.90234375, "learning_rate": 0.00018024064860127278, "loss": 2.7486, "step": 10965 }, { "epoch": 0.5133594710047399, "grad_norm": 1.25, "learning_rate": 0.00018023716487515517, "loss": 2.4457, "step": 10966 }, { "epoch": 0.5134062847445725, "grad_norm": 1.3828125, "learning_rate": 0.00018023368087563402, "loss": 2.5888, "step": 10967 }, { "epoch": 0.5134530984844051, "grad_norm": 1.546875, "learning_rate": 0.0001802301966027213, "loss": 2.5043, "step": 10968 }, { "epoch": 0.5134999122242379, "grad_norm": 1.2890625, "learning_rate": 0.0001802267120564288, "loss": 2.8338, "step": 10969 }, { "epoch": 0.5135467259640705, "grad_norm": 1.1015625, "learning_rate": 0.00018022322723676838, "loss": 2.8311, "step": 10970 }, { "epoch": 0.5135935397039031, "grad_norm": 1.4296875, "learning_rate": 0.000180219742143752, "loss": 2.9225, "step": 10971 }, { "epoch": 0.5136403534437357, "grad_norm": 1.3671875, "learning_rate": 0.00018021625677739147, "loss": 2.8502, "step": 10972 }, { "epoch": 0.5136871671835683, "grad_norm": 2.046875, "learning_rate": 0.0001802127711376987, "loss": 3.1232, "step": 10973 }, { "epoch": 0.5137339809234011, "grad_norm": 1.2109375, "learning_rate": 0.0001802092852246855, "loss": 2.8997, "step": 10974 }, { "epoch": 0.5137807946632337, "grad_norm": 1.453125, "learning_rate": 0.00018020579903836384, "loss": 3.0407, "step": 10975 }, { "epoch": 0.5138276084030663, "grad_norm": 1.34375, "learning_rate": 0.00018020231257874551, "loss": 2.849, "step": 10976 }, { "epoch": 0.5138744221428989, "grad_norm": 1.2109375, "learning_rate": 0.00018019882584584246, "loss": 3.0668, "step": 10977 }, { "epoch": 0.5139212358827315, "grad_norm": 1.546875, "learning_rate": 0.00018019533883966658, "loss": 3.021, "step": 10978 }, { "epoch": 0.5139680496225643, "grad_norm": 1.6796875, "learning_rate": 0.00018019185156022968, "loss": 3.1849, "step": 10979 }, { "epoch": 0.5140148633623969, "grad_norm": 1.3828125, "learning_rate": 0.0001801883640075437, "loss": 3.0498, "step": 10980 }, { "epoch": 0.5140616771022295, "grad_norm": 1.2578125, "learning_rate": 0.00018018487618162048, "loss": 2.5642, "step": 10981 }, { "epoch": 0.5141084908420621, "grad_norm": 1.78125, "learning_rate": 0.00018018138808247194, "loss": 2.6445, "step": 10982 }, { "epoch": 0.5141553045818947, "grad_norm": 1.4140625, "learning_rate": 0.00018017789971010997, "loss": 2.889, "step": 10983 }, { "epoch": 0.5142021183217275, "grad_norm": 1.1171875, "learning_rate": 0.00018017441106454643, "loss": 3.3606, "step": 10984 }, { "epoch": 0.5142489320615601, "grad_norm": 1.1640625, "learning_rate": 0.00018017092214579322, "loss": 2.8605, "step": 10985 }, { "epoch": 0.5142957458013927, "grad_norm": 1.3046875, "learning_rate": 0.0001801674329538622, "loss": 3.154, "step": 10986 }, { "epoch": 0.5143425595412253, "grad_norm": 1.359375, "learning_rate": 0.00018016394348876533, "loss": 2.9209, "step": 10987 }, { "epoch": 0.5143893732810579, "grad_norm": 1.53125, "learning_rate": 0.0001801604537505144, "loss": 2.6015, "step": 10988 }, { "epoch": 0.5144361870208907, "grad_norm": 1.234375, "learning_rate": 0.00018015696373912144, "loss": 2.2481, "step": 10989 }, { "epoch": 0.5144830007607233, "grad_norm": 1.453125, "learning_rate": 0.0001801534734545982, "loss": 3.1683, "step": 10990 }, { "epoch": 0.5145298145005559, "grad_norm": 1.28125, "learning_rate": 0.00018014998289695667, "loss": 2.6875, "step": 10991 }, { "epoch": 0.5145766282403885, "grad_norm": 1.5859375, "learning_rate": 0.0001801464920662087, "loss": 2.5721, "step": 10992 }, { "epoch": 0.5146234419802211, "grad_norm": 1.2734375, "learning_rate": 0.00018014300096236613, "loss": 2.5203, "step": 10993 }, { "epoch": 0.5146702557200539, "grad_norm": 2.609375, "learning_rate": 0.00018013950958544096, "loss": 2.6441, "step": 10994 }, { "epoch": 0.5147170694598865, "grad_norm": 1.328125, "learning_rate": 0.00018013601793544508, "loss": 2.4745, "step": 10995 }, { "epoch": 0.5147638831997191, "grad_norm": 2.078125, "learning_rate": 0.0001801325260123903, "loss": 2.8285, "step": 10996 }, { "epoch": 0.5148106969395517, "grad_norm": 1.265625, "learning_rate": 0.0001801290338162886, "loss": 2.7934, "step": 10997 }, { "epoch": 0.5148575106793843, "grad_norm": 1.5, "learning_rate": 0.00018012554134715184, "loss": 2.6933, "step": 10998 }, { "epoch": 0.5149043244192171, "grad_norm": 1.515625, "learning_rate": 0.0001801220486049919, "loss": 2.9292, "step": 10999 }, { "epoch": 0.5149511381590497, "grad_norm": 1.375, "learning_rate": 0.00018011855558982073, "loss": 2.6887, "step": 11000 }, { "epoch": 0.5149979518988823, "grad_norm": 1.21875, "learning_rate": 0.00018011506230165023, "loss": 2.886, "step": 11001 }, { "epoch": 0.5150447656387149, "grad_norm": 1.09375, "learning_rate": 0.00018011156874049225, "loss": 2.5345, "step": 11002 }, { "epoch": 0.5150915793785475, "grad_norm": 1.15625, "learning_rate": 0.00018010807490635876, "loss": 2.4868, "step": 11003 }, { "epoch": 0.5151383931183803, "grad_norm": 1.3359375, "learning_rate": 0.00018010458079926165, "loss": 2.8666, "step": 11004 }, { "epoch": 0.5151852068582129, "grad_norm": 1.2890625, "learning_rate": 0.00018010108641921277, "loss": 2.7863, "step": 11005 }, { "epoch": 0.5152320205980455, "grad_norm": 1.3828125, "learning_rate": 0.0001800975917662241, "loss": 3.2893, "step": 11006 }, { "epoch": 0.5152788343378781, "grad_norm": 1.2265625, "learning_rate": 0.0001800940968403075, "loss": 2.7585, "step": 11007 }, { "epoch": 0.5153256480777109, "grad_norm": 1.0546875, "learning_rate": 0.0001800906016414749, "loss": 2.8606, "step": 11008 }, { "epoch": 0.5153724618175435, "grad_norm": 1.171875, "learning_rate": 0.0001800871061697382, "loss": 2.7731, "step": 11009 }, { "epoch": 0.5154192755573761, "grad_norm": 1.5, "learning_rate": 0.0001800836104251093, "loss": 2.7838, "step": 11010 }, { "epoch": 0.5154660892972087, "grad_norm": 1.734375, "learning_rate": 0.00018008011440760017, "loss": 3.1078, "step": 11011 }, { "epoch": 0.5155129030370413, "grad_norm": 1.4375, "learning_rate": 0.00018007661811722265, "loss": 2.7948, "step": 11012 }, { "epoch": 0.5155597167768741, "grad_norm": 1.4453125, "learning_rate": 0.00018007312155398869, "loss": 2.7697, "step": 11013 }, { "epoch": 0.5156065305167067, "grad_norm": 1.8203125, "learning_rate": 0.00018006962471791017, "loss": 3.3057, "step": 11014 }, { "epoch": 0.5156533442565393, "grad_norm": 1.6875, "learning_rate": 0.00018006612760899907, "loss": 2.7892, "step": 11015 }, { "epoch": 0.5157001579963719, "grad_norm": 1.65625, "learning_rate": 0.00018006263022726725, "loss": 3.0738, "step": 11016 }, { "epoch": 0.5157469717362045, "grad_norm": 1.0234375, "learning_rate": 0.00018005913257272663, "loss": 2.5292, "step": 11017 }, { "epoch": 0.5157937854760373, "grad_norm": 1.71875, "learning_rate": 0.00018005563464538918, "loss": 2.9004, "step": 11018 }, { "epoch": 0.5158405992158699, "grad_norm": 1.53125, "learning_rate": 0.00018005213644526678, "loss": 3.0882, "step": 11019 }, { "epoch": 0.5158874129557025, "grad_norm": 1.328125, "learning_rate": 0.00018004863797237134, "loss": 2.6711, "step": 11020 }, { "epoch": 0.5159342266955351, "grad_norm": 1.640625, "learning_rate": 0.00018004513922671478, "loss": 3.1561, "step": 11021 }, { "epoch": 0.5159810404353677, "grad_norm": 1.53125, "learning_rate": 0.00018004164020830903, "loss": 2.983, "step": 11022 }, { "epoch": 0.5160278541752005, "grad_norm": 1.71875, "learning_rate": 0.00018003814091716605, "loss": 3.1287, "step": 11023 }, { "epoch": 0.5160746679150331, "grad_norm": 1.1953125, "learning_rate": 0.0001800346413532977, "loss": 2.8913, "step": 11024 }, { "epoch": 0.5161214816548657, "grad_norm": 1.9296875, "learning_rate": 0.00018003114151671597, "loss": 3.1782, "step": 11025 }, { "epoch": 0.5161682953946983, "grad_norm": 0.99609375, "learning_rate": 0.0001800276414074327, "loss": 5.1797, "step": 11026 }, { "epoch": 0.516215109134531, "grad_norm": 1.1328125, "learning_rate": 0.0001800241410254599, "loss": 2.5939, "step": 11027 }, { "epoch": 0.5162619228743637, "grad_norm": 1.2421875, "learning_rate": 0.00018002064037080947, "loss": 3.0213, "step": 11028 }, { "epoch": 0.5163087366141963, "grad_norm": 1.6875, "learning_rate": 0.00018001713944349335, "loss": 3.1146, "step": 11029 }, { "epoch": 0.5163555503540289, "grad_norm": 1.1171875, "learning_rate": 0.0001800136382435234, "loss": 2.2484, "step": 11030 }, { "epoch": 0.5164023640938615, "grad_norm": 1.328125, "learning_rate": 0.00018001013677091163, "loss": 2.9089, "step": 11031 }, { "epoch": 0.5164491778336942, "grad_norm": 1.671875, "learning_rate": 0.00018000663502566996, "loss": 2.617, "step": 11032 }, { "epoch": 0.5164959915735269, "grad_norm": 1.3984375, "learning_rate": 0.00018000313300781028, "loss": 3.1469, "step": 11033 }, { "epoch": 0.5165428053133595, "grad_norm": 1.375, "learning_rate": 0.00017999963071734456, "loss": 2.5854, "step": 11034 }, { "epoch": 0.5165896190531921, "grad_norm": 1.7578125, "learning_rate": 0.00017999612815428474, "loss": 3.1163, "step": 11035 }, { "epoch": 0.5166364327930247, "grad_norm": 1.234375, "learning_rate": 0.00017999262531864273, "loss": 2.9347, "step": 11036 }, { "epoch": 0.5166832465328574, "grad_norm": 1.1640625, "learning_rate": 0.00017998912221043044, "loss": 2.7956, "step": 11037 }, { "epoch": 0.5167300602726901, "grad_norm": 1.3359375, "learning_rate": 0.00017998561882965984, "loss": 2.6974, "step": 11038 }, { "epoch": 0.5167768740125227, "grad_norm": 1.5390625, "learning_rate": 0.0001799821151763429, "loss": 3.1852, "step": 11039 }, { "epoch": 0.5168236877523553, "grad_norm": 1.2890625, "learning_rate": 0.00017997861125049154, "loss": 2.871, "step": 11040 }, { "epoch": 0.5168705014921879, "grad_norm": 1.6484375, "learning_rate": 0.00017997510705211766, "loss": 3.0243, "step": 11041 }, { "epoch": 0.5169173152320206, "grad_norm": 1.421875, "learning_rate": 0.0001799716025812332, "loss": 2.8076, "step": 11042 }, { "epoch": 0.5169641289718533, "grad_norm": 1.203125, "learning_rate": 0.00017996809783785018, "loss": 2.7889, "step": 11043 }, { "epoch": 0.5170109427116859, "grad_norm": 1.53125, "learning_rate": 0.00017996459282198046, "loss": 2.4587, "step": 11044 }, { "epoch": 0.5170577564515185, "grad_norm": 1.3125, "learning_rate": 0.000179961087533636, "loss": 2.9308, "step": 11045 }, { "epoch": 0.5171045701913511, "grad_norm": 1.28125, "learning_rate": 0.0001799575819728288, "loss": 2.9248, "step": 11046 }, { "epoch": 0.5171513839311838, "grad_norm": 1.40625, "learning_rate": 0.0001799540761395707, "loss": 2.7725, "step": 11047 }, { "epoch": 0.5171981976710165, "grad_norm": 1.390625, "learning_rate": 0.00017995057003387374, "loss": 2.6133, "step": 11048 }, { "epoch": 0.5172450114108491, "grad_norm": 1.546875, "learning_rate": 0.00017994706365574982, "loss": 2.8903, "step": 11049 }, { "epoch": 0.5172918251506817, "grad_norm": 1.6640625, "learning_rate": 0.00017994355700521095, "loss": 2.8542, "step": 11050 }, { "epoch": 0.5173386388905143, "grad_norm": 1.296875, "learning_rate": 0.00017994005008226897, "loss": 2.9159, "step": 11051 }, { "epoch": 0.517385452630347, "grad_norm": 1.171875, "learning_rate": 0.0001799365428869359, "loss": 2.9625, "step": 11052 }, { "epoch": 0.5174322663701797, "grad_norm": 1.109375, "learning_rate": 0.00017993303541922372, "loss": 3.019, "step": 11053 }, { "epoch": 0.5174790801100123, "grad_norm": 1.4453125, "learning_rate": 0.0001799295276791443, "loss": 3.1627, "step": 11054 }, { "epoch": 0.5175258938498449, "grad_norm": 1.40625, "learning_rate": 0.00017992601966670968, "loss": 2.5496, "step": 11055 }, { "epoch": 0.5175727075896775, "grad_norm": 1.328125, "learning_rate": 0.00017992251138193173, "loss": 2.5321, "step": 11056 }, { "epoch": 0.5176195213295102, "grad_norm": 1.5625, "learning_rate": 0.00017991900282482247, "loss": 2.7893, "step": 11057 }, { "epoch": 0.5176663350693429, "grad_norm": 2.109375, "learning_rate": 0.0001799154939953938, "loss": 3.188, "step": 11058 }, { "epoch": 0.5177131488091755, "grad_norm": 1.5390625, "learning_rate": 0.0001799119848936577, "loss": 3.1461, "step": 11059 }, { "epoch": 0.5177599625490081, "grad_norm": 1.5625, "learning_rate": 0.00017990847551962614, "loss": 3.0154, "step": 11060 }, { "epoch": 0.5178067762888408, "grad_norm": 1.2109375, "learning_rate": 0.0001799049658733111, "loss": 2.8846, "step": 11061 }, { "epoch": 0.5178535900286734, "grad_norm": 1.5390625, "learning_rate": 0.00017990145595472447, "loss": 2.6076, "step": 11062 }, { "epoch": 0.5179004037685061, "grad_norm": 1.21875, "learning_rate": 0.00017989794576387825, "loss": 2.956, "step": 11063 }, { "epoch": 0.5179472175083387, "grad_norm": 1.421875, "learning_rate": 0.0001798944353007844, "loss": 2.8192, "step": 11064 }, { "epoch": 0.5179940312481713, "grad_norm": 1.234375, "learning_rate": 0.0001798909245654549, "loss": 2.8051, "step": 11065 }, { "epoch": 0.518040844988004, "grad_norm": 1.703125, "learning_rate": 0.00017988741355790165, "loss": 3.383, "step": 11066 }, { "epoch": 0.5180876587278366, "grad_norm": 1.359375, "learning_rate": 0.00017988390227813668, "loss": 3.0508, "step": 11067 }, { "epoch": 0.5181344724676693, "grad_norm": 1.5859375, "learning_rate": 0.00017988039072617195, "loss": 3.9327, "step": 11068 }, { "epoch": 0.5181812862075019, "grad_norm": 1.296875, "learning_rate": 0.0001798768789020194, "loss": 2.8687, "step": 11069 }, { "epoch": 0.5182280999473345, "grad_norm": 1.4921875, "learning_rate": 0.00017987336680569098, "loss": 2.7783, "step": 11070 }, { "epoch": 0.5182749136871672, "grad_norm": 1.4609375, "learning_rate": 0.0001798698544371987, "loss": 2.8401, "step": 11071 }, { "epoch": 0.5183217274269998, "grad_norm": 1.3359375, "learning_rate": 0.00017986634179655448, "loss": 3.0676, "step": 11072 }, { "epoch": 0.5183685411668325, "grad_norm": 1.109375, "learning_rate": 0.00017986282888377036, "loss": 2.5231, "step": 11073 }, { "epoch": 0.5184153549066651, "grad_norm": 1.7265625, "learning_rate": 0.00017985931569885824, "loss": 3.312, "step": 11074 }, { "epoch": 0.5184621686464977, "grad_norm": 1.3203125, "learning_rate": 0.00017985580224183013, "loss": 2.6622, "step": 11075 }, { "epoch": 0.5185089823863304, "grad_norm": 1.3671875, "learning_rate": 0.00017985228851269796, "loss": 2.8208, "step": 11076 }, { "epoch": 0.518555796126163, "grad_norm": 1.3515625, "learning_rate": 0.00017984877451147378, "loss": 2.5025, "step": 11077 }, { "epoch": 0.5186026098659957, "grad_norm": 1.2734375, "learning_rate": 0.00017984526023816948, "loss": 2.9761, "step": 11078 }, { "epoch": 0.5186494236058283, "grad_norm": 1.4609375, "learning_rate": 0.0001798417456927971, "loss": 3.0411, "step": 11079 }, { "epoch": 0.518696237345661, "grad_norm": 1.34375, "learning_rate": 0.00017983823087536855, "loss": 3.1458, "step": 11080 }, { "epoch": 0.5187430510854936, "grad_norm": 1.421875, "learning_rate": 0.00017983471578589586, "loss": 3.0263, "step": 11081 }, { "epoch": 0.5187898648253262, "grad_norm": 1.0859375, "learning_rate": 0.00017983120042439098, "loss": 2.4951, "step": 11082 }, { "epoch": 0.5188366785651589, "grad_norm": 1.734375, "learning_rate": 0.0001798276847908659, "loss": 2.8014, "step": 11083 }, { "epoch": 0.5188834923049915, "grad_norm": 1.296875, "learning_rate": 0.00017982416888533262, "loss": 2.957, "step": 11084 }, { "epoch": 0.5189303060448242, "grad_norm": 1.375, "learning_rate": 0.0001798206527078031, "loss": 2.8185, "step": 11085 }, { "epoch": 0.5189771197846568, "grad_norm": 1.390625, "learning_rate": 0.00017981713625828932, "loss": 2.5496, "step": 11086 }, { "epoch": 0.5190239335244894, "grad_norm": 2.265625, "learning_rate": 0.00017981361953680323, "loss": 2.6408, "step": 11087 }, { "epoch": 0.5190707472643221, "grad_norm": 1.484375, "learning_rate": 0.00017981010254335686, "loss": 3.0163, "step": 11088 }, { "epoch": 0.5191175610041547, "grad_norm": 1.4140625, "learning_rate": 0.00017980658527796218, "loss": 3.014, "step": 11089 }, { "epoch": 0.5191643747439874, "grad_norm": 3.1875, "learning_rate": 0.0001798030677406312, "loss": 2.6461, "step": 11090 }, { "epoch": 0.51921118848382, "grad_norm": 1.375, "learning_rate": 0.00017979954993137582, "loss": 2.6492, "step": 11091 }, { "epoch": 0.5192580022236526, "grad_norm": 1.9453125, "learning_rate": 0.00017979603185020815, "loss": 2.5537, "step": 11092 }, { "epoch": 0.5193048159634853, "grad_norm": 2.609375, "learning_rate": 0.00017979251349714006, "loss": 2.5634, "step": 11093 }, { "epoch": 0.5193516297033179, "grad_norm": 1.234375, "learning_rate": 0.00017978899487218364, "loss": 2.638, "step": 11094 }, { "epoch": 0.5193984434431506, "grad_norm": 1.78125, "learning_rate": 0.0001797854759753508, "loss": 2.759, "step": 11095 }, { "epoch": 0.5194452571829832, "grad_norm": 1.4609375, "learning_rate": 0.00017978195680665358, "loss": 2.8855, "step": 11096 }, { "epoch": 0.5194920709228158, "grad_norm": 1.6171875, "learning_rate": 0.00017977843736610393, "loss": 2.9868, "step": 11097 }, { "epoch": 0.5195388846626485, "grad_norm": 1.375, "learning_rate": 0.00017977491765371387, "loss": 3.0817, "step": 11098 }, { "epoch": 0.5195856984024811, "grad_norm": 1.34375, "learning_rate": 0.00017977139766949543, "loss": 2.8524, "step": 11099 }, { "epoch": 0.5196325121423138, "grad_norm": 1.6171875, "learning_rate": 0.00017976787741346052, "loss": 2.9293, "step": 11100 }, { "epoch": 0.5196793258821464, "grad_norm": 1.5546875, "learning_rate": 0.0001797643568856212, "loss": 3.1447, "step": 11101 }, { "epoch": 0.519726139621979, "grad_norm": 1.96875, "learning_rate": 0.00017976083608598945, "loss": 2.5847, "step": 11102 }, { "epoch": 0.5197729533618117, "grad_norm": 1.6796875, "learning_rate": 0.00017975731501457726, "loss": 3.4486, "step": 11103 }, { "epoch": 0.5198197671016443, "grad_norm": 1.6875, "learning_rate": 0.00017975379367139661, "loss": 2.7739, "step": 11104 }, { "epoch": 0.519866580841477, "grad_norm": 1.6953125, "learning_rate": 0.00017975027205645953, "loss": 3.006, "step": 11105 }, { "epoch": 0.5199133945813096, "grad_norm": 1.203125, "learning_rate": 0.000179746750169778, "loss": 2.9017, "step": 11106 }, { "epoch": 0.5199602083211422, "grad_norm": 1.6953125, "learning_rate": 0.00017974322801136402, "loss": 2.8714, "step": 11107 }, { "epoch": 0.5200070220609749, "grad_norm": 1.1484375, "learning_rate": 0.00017973970558122964, "loss": 2.5487, "step": 11108 }, { "epoch": 0.5200538358008076, "grad_norm": 1.109375, "learning_rate": 0.0001797361828793868, "loss": 2.669, "step": 11109 }, { "epoch": 0.5201006495406402, "grad_norm": 1.9140625, "learning_rate": 0.0001797326599058475, "loss": 3.3745, "step": 11110 }, { "epoch": 0.5201474632804728, "grad_norm": 1.25, "learning_rate": 0.0001797291366606238, "loss": 3.1547, "step": 11111 }, { "epoch": 0.5201942770203054, "grad_norm": 1.375, "learning_rate": 0.00017972561314372768, "loss": 2.6962, "step": 11112 }, { "epoch": 0.5202410907601381, "grad_norm": 1.484375, "learning_rate": 0.00017972208935517113, "loss": 2.8259, "step": 11113 }, { "epoch": 0.5202879044999708, "grad_norm": 2.046875, "learning_rate": 0.00017971856529496613, "loss": 2.747, "step": 11114 }, { "epoch": 0.5203347182398034, "grad_norm": 1.796875, "learning_rate": 0.00017971504096312476, "loss": 2.9023, "step": 11115 }, { "epoch": 0.520381531979636, "grad_norm": 2.6875, "learning_rate": 0.000179711516359659, "loss": 2.4444, "step": 11116 }, { "epoch": 0.5204283457194686, "grad_norm": 1.4609375, "learning_rate": 0.00017970799148458086, "loss": 2.9121, "step": 11117 }, { "epoch": 0.5204751594593013, "grad_norm": 1.515625, "learning_rate": 0.00017970446633790233, "loss": 2.8851, "step": 11118 }, { "epoch": 0.520521973199134, "grad_norm": 1.8359375, "learning_rate": 0.00017970094091963548, "loss": 2.8797, "step": 11119 }, { "epoch": 0.5205687869389666, "grad_norm": 1.3515625, "learning_rate": 0.00017969741522979224, "loss": 3.0689, "step": 11120 }, { "epoch": 0.5206156006787992, "grad_norm": 2.046875, "learning_rate": 0.00017969388926838468, "loss": 2.7093, "step": 11121 }, { "epoch": 0.5206624144186318, "grad_norm": 1.5078125, "learning_rate": 0.00017969036303542478, "loss": 3.0919, "step": 11122 }, { "epoch": 0.5207092281584645, "grad_norm": 1.21875, "learning_rate": 0.0001796868365309246, "loss": 2.6943, "step": 11123 }, { "epoch": 0.5207560418982972, "grad_norm": 1.703125, "learning_rate": 0.0001796833097548961, "loss": 3.0381, "step": 11124 }, { "epoch": 0.5208028556381298, "grad_norm": 1.4453125, "learning_rate": 0.00017967978270735135, "loss": 2.581, "step": 11125 }, { "epoch": 0.5208496693779624, "grad_norm": 1.890625, "learning_rate": 0.00017967625538830236, "loss": 2.7959, "step": 11126 }, { "epoch": 0.5208964831177951, "grad_norm": 1.203125, "learning_rate": 0.00017967272779776113, "loss": 2.7601, "step": 11127 }, { "epoch": 0.5209432968576277, "grad_norm": 2.0, "learning_rate": 0.00017966919993573965, "loss": 2.4821, "step": 11128 }, { "epoch": 0.5209901105974604, "grad_norm": 1.203125, "learning_rate": 0.00017966567180225003, "loss": 2.5864, "step": 11129 }, { "epoch": 0.521036924337293, "grad_norm": 1.3359375, "learning_rate": 0.00017966214339730418, "loss": 2.952, "step": 11130 }, { "epoch": 0.5210837380771256, "grad_norm": 1.4609375, "learning_rate": 0.00017965861472091422, "loss": 2.8999, "step": 11131 }, { "epoch": 0.5211305518169583, "grad_norm": 1.4140625, "learning_rate": 0.0001796550857730921, "loss": 2.6798, "step": 11132 }, { "epoch": 0.521177365556791, "grad_norm": 1.5859375, "learning_rate": 0.0001796515565538499, "loss": 2.6826, "step": 11133 }, { "epoch": 0.5212241792966236, "grad_norm": 1.375, "learning_rate": 0.00017964802706319963, "loss": 3.9438, "step": 11134 }, { "epoch": 0.5212709930364562, "grad_norm": 1.390625, "learning_rate": 0.00017964449730115334, "loss": 3.0479, "step": 11135 }, { "epoch": 0.5213178067762888, "grad_norm": 1.265625, "learning_rate": 0.00017964096726772298, "loss": 2.9657, "step": 11136 }, { "epoch": 0.5213646205161215, "grad_norm": 1.5546875, "learning_rate": 0.00017963743696292067, "loss": 2.7312, "step": 11137 }, { "epoch": 0.5214114342559542, "grad_norm": 1.125, "learning_rate": 0.00017963390638675835, "loss": 2.008, "step": 11138 }, { "epoch": 0.5214582479957868, "grad_norm": 1.4375, "learning_rate": 0.00017963037553924812, "loss": 3.287, "step": 11139 }, { "epoch": 0.5215050617356194, "grad_norm": 1.359375, "learning_rate": 0.000179626844420402, "loss": 2.9242, "step": 11140 }, { "epoch": 0.521551875475452, "grad_norm": 1.4296875, "learning_rate": 0.000179623313030232, "loss": 2.6488, "step": 11141 }, { "epoch": 0.5215986892152847, "grad_norm": 1.328125, "learning_rate": 0.00017961978136875017, "loss": 2.7178, "step": 11142 }, { "epoch": 0.5216455029551174, "grad_norm": 1.1328125, "learning_rate": 0.0001796162494359685, "loss": 2.7057, "step": 11143 }, { "epoch": 0.52169231669495, "grad_norm": 1.484375, "learning_rate": 0.0001796127172318991, "loss": 3.2281, "step": 11144 }, { "epoch": 0.5217391304347826, "grad_norm": 1.125, "learning_rate": 0.00017960918475655398, "loss": 2.9071, "step": 11145 }, { "epoch": 0.5217859441746152, "grad_norm": 2.15625, "learning_rate": 0.0001796056520099451, "loss": 3.4816, "step": 11146 }, { "epoch": 0.5218327579144479, "grad_norm": 1.296875, "learning_rate": 0.00017960211899208464, "loss": 3.3028, "step": 11147 }, { "epoch": 0.5218795716542806, "grad_norm": 1.5859375, "learning_rate": 0.00017959858570298454, "loss": 3.1814, "step": 11148 }, { "epoch": 0.5219263853941132, "grad_norm": 1.2578125, "learning_rate": 0.00017959505214265684, "loss": 2.9077, "step": 11149 }, { "epoch": 0.5219731991339458, "grad_norm": 1.3515625, "learning_rate": 0.00017959151831111362, "loss": 2.7746, "step": 11150 }, { "epoch": 0.5220200128737784, "grad_norm": 1.375, "learning_rate": 0.00017958798420836686, "loss": 2.6741, "step": 11151 }, { "epoch": 0.5220668266136111, "grad_norm": 1.3359375, "learning_rate": 0.00017958444983442868, "loss": 3.03, "step": 11152 }, { "epoch": 0.5221136403534438, "grad_norm": 1.2421875, "learning_rate": 0.0001795809151893111, "loss": 2.392, "step": 11153 }, { "epoch": 0.5221604540932764, "grad_norm": 1.4765625, "learning_rate": 0.0001795773802730261, "loss": 3.2313, "step": 11154 }, { "epoch": 0.522207267833109, "grad_norm": 1.3671875, "learning_rate": 0.00017957384508558583, "loss": 2.9851, "step": 11155 }, { "epoch": 0.5222540815729416, "grad_norm": 1.1640625, "learning_rate": 0.00017957030962700228, "loss": 2.6487, "step": 11156 }, { "epoch": 0.5223008953127743, "grad_norm": 1.265625, "learning_rate": 0.00017956677389728746, "loss": 3.3212, "step": 11157 }, { "epoch": 0.522347709052607, "grad_norm": 1.734375, "learning_rate": 0.00017956323789645347, "loss": 2.7379, "step": 11158 }, { "epoch": 0.5223945227924396, "grad_norm": 1.0078125, "learning_rate": 0.00017955970162451237, "loss": 2.9278, "step": 11159 }, { "epoch": 0.5224413365322722, "grad_norm": 1.3125, "learning_rate": 0.00017955616508147617, "loss": 2.5597, "step": 11160 }, { "epoch": 0.5224881502721048, "grad_norm": 1.8359375, "learning_rate": 0.00017955262826735693, "loss": 3.035, "step": 11161 }, { "epoch": 0.5225349640119376, "grad_norm": 1.28125, "learning_rate": 0.00017954909118216673, "loss": 2.8983, "step": 11162 }, { "epoch": 0.5225817777517702, "grad_norm": 1.578125, "learning_rate": 0.00017954555382591756, "loss": 3.1094, "step": 11163 }, { "epoch": 0.5226285914916028, "grad_norm": 1.484375, "learning_rate": 0.00017954201619862156, "loss": 3.295, "step": 11164 }, { "epoch": 0.5226754052314354, "grad_norm": 1.6484375, "learning_rate": 0.00017953847830029068, "loss": 3.2004, "step": 11165 }, { "epoch": 0.522722218971268, "grad_norm": 2.484375, "learning_rate": 0.00017953494013093707, "loss": 2.5299, "step": 11166 }, { "epoch": 0.5227690327111008, "grad_norm": 1.5, "learning_rate": 0.00017953140169057273, "loss": 2.1524, "step": 11167 }, { "epoch": 0.5228158464509334, "grad_norm": 2.015625, "learning_rate": 0.00017952786297920978, "loss": 2.9121, "step": 11168 }, { "epoch": 0.522862660190766, "grad_norm": 1.09375, "learning_rate": 0.0001795243239968602, "loss": 2.5715, "step": 11169 }, { "epoch": 0.5229094739305986, "grad_norm": 2.0, "learning_rate": 0.00017952078474353612, "loss": 3.1094, "step": 11170 }, { "epoch": 0.5229562876704312, "grad_norm": 1.1875, "learning_rate": 0.0001795172452192495, "loss": 2.8741, "step": 11171 }, { "epoch": 0.523003101410264, "grad_norm": 1.25, "learning_rate": 0.00017951370542401249, "loss": 2.8986, "step": 11172 }, { "epoch": 0.5230499151500966, "grad_norm": 1.6484375, "learning_rate": 0.00017951016535783714, "loss": 3.3853, "step": 11173 }, { "epoch": 0.5230967288899292, "grad_norm": 1.171875, "learning_rate": 0.00017950662502073548, "loss": 2.6094, "step": 11174 }, { "epoch": 0.5231435426297618, "grad_norm": 1.421875, "learning_rate": 0.0001795030844127196, "loss": 3.0659, "step": 11175 }, { "epoch": 0.5231903563695944, "grad_norm": 1.140625, "learning_rate": 0.00017949954353380155, "loss": 2.4177, "step": 11176 }, { "epoch": 0.5232371701094272, "grad_norm": 1.7265625, "learning_rate": 0.0001794960023839934, "loss": 3.1801, "step": 11177 }, { "epoch": 0.5232839838492598, "grad_norm": 1.4765625, "learning_rate": 0.00017949246096330722, "loss": 2.9396, "step": 11178 }, { "epoch": 0.5233307975890924, "grad_norm": 1.2578125, "learning_rate": 0.00017948891927175507, "loss": 2.8198, "step": 11179 }, { "epoch": 0.523377611328925, "grad_norm": 1.3125, "learning_rate": 0.00017948537730934905, "loss": 3.043, "step": 11180 }, { "epoch": 0.5234244250687576, "grad_norm": 1.6328125, "learning_rate": 0.00017948183507610118, "loss": 2.8726, "step": 11181 }, { "epoch": 0.5234712388085904, "grad_norm": 2.4375, "learning_rate": 0.00017947829257202353, "loss": 2.6364, "step": 11182 }, { "epoch": 0.523518052548423, "grad_norm": 1.3125, "learning_rate": 0.00017947474979712822, "loss": 2.7327, "step": 11183 }, { "epoch": 0.5235648662882556, "grad_norm": 1.6875, "learning_rate": 0.0001794712067514273, "loss": 2.9266, "step": 11184 }, { "epoch": 0.5236116800280882, "grad_norm": 1.6484375, "learning_rate": 0.0001794676634349328, "loss": 2.9442, "step": 11185 }, { "epoch": 0.5236584937679208, "grad_norm": 1.578125, "learning_rate": 0.00017946411984765687, "loss": 3.105, "step": 11186 }, { "epoch": 0.5237053075077536, "grad_norm": 1.5859375, "learning_rate": 0.00017946057598961152, "loss": 2.8609, "step": 11187 }, { "epoch": 0.5237521212475862, "grad_norm": 1.4375, "learning_rate": 0.00017945703186080886, "loss": 3.2088, "step": 11188 }, { "epoch": 0.5237989349874188, "grad_norm": 1.5625, "learning_rate": 0.00017945348746126096, "loss": 2.9055, "step": 11189 }, { "epoch": 0.5238457487272514, "grad_norm": 2.0, "learning_rate": 0.0001794499427909799, "loss": 2.7658, "step": 11190 }, { "epoch": 0.523892562467084, "grad_norm": 1.09375, "learning_rate": 0.00017944639784997772, "loss": 2.9227, "step": 11191 }, { "epoch": 0.5239393762069168, "grad_norm": 1.1875, "learning_rate": 0.00017944285263826656, "loss": 3.3322, "step": 11192 }, { "epoch": 0.5239861899467494, "grad_norm": 1.1328125, "learning_rate": 0.00017943930715585843, "loss": 2.8441, "step": 11193 }, { "epoch": 0.524033003686582, "grad_norm": 1.546875, "learning_rate": 0.0001794357614027655, "loss": 2.9308, "step": 11194 }, { "epoch": 0.5240798174264146, "grad_norm": 1.2578125, "learning_rate": 0.0001794322153789998, "loss": 2.9277, "step": 11195 }, { "epoch": 0.5241266311662472, "grad_norm": 1.203125, "learning_rate": 0.00017942866908457342, "loss": 2.8445, "step": 11196 }, { "epoch": 0.52417344490608, "grad_norm": 1.703125, "learning_rate": 0.0001794251225194984, "loss": 2.7783, "step": 11197 }, { "epoch": 0.5242202586459126, "grad_norm": 1.3984375, "learning_rate": 0.0001794215756837869, "loss": 2.6589, "step": 11198 }, { "epoch": 0.5242670723857452, "grad_norm": 1.9296875, "learning_rate": 0.000179418028577451, "loss": 2.9262, "step": 11199 }, { "epoch": 0.5243138861255778, "grad_norm": 1.2265625, "learning_rate": 0.0001794144812005027, "loss": 2.7331, "step": 11200 }, { "epoch": 0.5243606998654105, "grad_norm": 1.1640625, "learning_rate": 0.00017941093355295417, "loss": 2.6358, "step": 11201 }, { "epoch": 0.5244075136052432, "grad_norm": 1.4375, "learning_rate": 0.00017940738563481748, "loss": 2.6547, "step": 11202 }, { "epoch": 0.5244543273450758, "grad_norm": 1.53125, "learning_rate": 0.00017940383744610472, "loss": 3.0185, "step": 11203 }, { "epoch": 0.5245011410849084, "grad_norm": 1.1796875, "learning_rate": 0.00017940028898682795, "loss": 2.718, "step": 11204 }, { "epoch": 0.524547954824741, "grad_norm": 1.1484375, "learning_rate": 0.0001793967402569993, "loss": 4.2307, "step": 11205 }, { "epoch": 0.5245947685645737, "grad_norm": 1.15625, "learning_rate": 0.00017939319125663086, "loss": 2.4422, "step": 11206 }, { "epoch": 0.5246415823044064, "grad_norm": 1.8359375, "learning_rate": 0.0001793896419857347, "loss": 2.8068, "step": 11207 }, { "epoch": 0.524688396044239, "grad_norm": 1.296875, "learning_rate": 0.00017938609244432286, "loss": 2.5278, "step": 11208 }, { "epoch": 0.5247352097840716, "grad_norm": 1.3125, "learning_rate": 0.00017938254263240758, "loss": 2.7468, "step": 11209 }, { "epoch": 0.5247820235239042, "grad_norm": 1.4453125, "learning_rate": 0.00017937899255000085, "loss": 2.8334, "step": 11210 }, { "epoch": 0.5248288372637369, "grad_norm": 2.125, "learning_rate": 0.0001793754421971148, "loss": 2.6398, "step": 11211 }, { "epoch": 0.5248756510035696, "grad_norm": 1.1015625, "learning_rate": 0.00017937189157376154, "loss": 2.9433, "step": 11212 }, { "epoch": 0.5249224647434022, "grad_norm": 1.515625, "learning_rate": 0.0001793683406799531, "loss": 2.494, "step": 11213 }, { "epoch": 0.5249692784832348, "grad_norm": 1.078125, "learning_rate": 0.00017936478951570165, "loss": 2.2733, "step": 11214 }, { "epoch": 0.5250160922230674, "grad_norm": 1.2265625, "learning_rate": 0.00017936123808101926, "loss": 2.8752, "step": 11215 }, { "epoch": 0.5250629059629001, "grad_norm": 1.546875, "learning_rate": 0.00017935768637591804, "loss": 2.9679, "step": 11216 }, { "epoch": 0.5251097197027328, "grad_norm": 1.15625, "learning_rate": 0.00017935413440041008, "loss": 2.6784, "step": 11217 }, { "epoch": 0.5251565334425654, "grad_norm": 1.9375, "learning_rate": 0.0001793505821545075, "loss": 2.7125, "step": 11218 }, { "epoch": 0.525203347182398, "grad_norm": 1.3046875, "learning_rate": 0.0001793470296382224, "loss": 2.5489, "step": 11219 }, { "epoch": 0.5252501609222306, "grad_norm": 1.4765625, "learning_rate": 0.0001793434768515669, "loss": 3.1882, "step": 11220 }, { "epoch": 0.5252969746620633, "grad_norm": 1.7265625, "learning_rate": 0.0001793399237945531, "loss": 2.9895, "step": 11221 }, { "epoch": 0.525343788401896, "grad_norm": 1.21875, "learning_rate": 0.00017933637046719305, "loss": 2.8755, "step": 11222 }, { "epoch": 0.5253906021417286, "grad_norm": 2.203125, "learning_rate": 0.00017933281686949892, "loss": 2.9735, "step": 11223 }, { "epoch": 0.5254374158815612, "grad_norm": 1.671875, "learning_rate": 0.0001793292630014828, "loss": 3.2443, "step": 11224 }, { "epoch": 0.5254842296213939, "grad_norm": 1.2734375, "learning_rate": 0.0001793257088631568, "loss": 2.6582, "step": 11225 }, { "epoch": 0.5255310433612265, "grad_norm": 1.0546875, "learning_rate": 0.00017932215445453305, "loss": 2.3251, "step": 11226 }, { "epoch": 0.5255778571010592, "grad_norm": 1.578125, "learning_rate": 0.00017931859977562365, "loss": 2.865, "step": 11227 }, { "epoch": 0.5256246708408918, "grad_norm": 1.5078125, "learning_rate": 0.00017931504482644067, "loss": 2.9132, "step": 11228 }, { "epoch": 0.5256714845807244, "grad_norm": 1.8125, "learning_rate": 0.00017931148960699628, "loss": 3.0185, "step": 11229 }, { "epoch": 0.525718298320557, "grad_norm": 1.6015625, "learning_rate": 0.0001793079341173026, "loss": 2.7573, "step": 11230 }, { "epoch": 0.5257651120603897, "grad_norm": 1.3046875, "learning_rate": 0.0001793043783573717, "loss": 3.1341, "step": 11231 }, { "epoch": 0.5258119258002224, "grad_norm": 1.4140625, "learning_rate": 0.0001793008223272157, "loss": 2.4883, "step": 11232 }, { "epoch": 0.525858739540055, "grad_norm": 1.71875, "learning_rate": 0.00017929726602684674, "loss": 2.9651, "step": 11233 }, { "epoch": 0.5259055532798876, "grad_norm": 1.3984375, "learning_rate": 0.0001792937094562769, "loss": 2.8189, "step": 11234 }, { "epoch": 0.5259523670197203, "grad_norm": 2.0625, "learning_rate": 0.00017929015261551837, "loss": 2.6122, "step": 11235 }, { "epoch": 0.5259991807595529, "grad_norm": 1.53125, "learning_rate": 0.00017928659550458322, "loss": 2.7809, "step": 11236 }, { "epoch": 0.5260459944993856, "grad_norm": 1.3828125, "learning_rate": 0.00017928303812348356, "loss": 2.9334, "step": 11237 }, { "epoch": 0.5260928082392182, "grad_norm": 1.4375, "learning_rate": 0.00017927948047223157, "loss": 2.801, "step": 11238 }, { "epoch": 0.5261396219790508, "grad_norm": 1.5859375, "learning_rate": 0.0001792759225508393, "loss": 2.4063, "step": 11239 }, { "epoch": 0.5261864357188835, "grad_norm": 1.09375, "learning_rate": 0.0001792723643593189, "loss": 2.7153, "step": 11240 }, { "epoch": 0.5262332494587161, "grad_norm": 1.296875, "learning_rate": 0.00017926880589768252, "loss": 2.9143, "step": 11241 }, { "epoch": 0.5262800631985488, "grad_norm": 2.453125, "learning_rate": 0.00017926524716594227, "loss": 2.9824, "step": 11242 }, { "epoch": 0.5263268769383814, "grad_norm": 1.296875, "learning_rate": 0.00017926168816411025, "loss": 3.249, "step": 11243 }, { "epoch": 0.526373690678214, "grad_norm": 1.3828125, "learning_rate": 0.00017925812889219863, "loss": 2.9948, "step": 11244 }, { "epoch": 0.5264205044180467, "grad_norm": 1.171875, "learning_rate": 0.0001792545693502195, "loss": 2.8924, "step": 11245 }, { "epoch": 0.5264673181578793, "grad_norm": 1.2734375, "learning_rate": 0.000179251009538185, "loss": 2.8849, "step": 11246 }, { "epoch": 0.526514131897712, "grad_norm": 1.9140625, "learning_rate": 0.0001792474494561073, "loss": 3.1099, "step": 11247 }, { "epoch": 0.5265609456375446, "grad_norm": 1.2890625, "learning_rate": 0.00017924388910399848, "loss": 2.7951, "step": 11248 }, { "epoch": 0.5266077593773772, "grad_norm": 1.5, "learning_rate": 0.0001792403284818707, "loss": 2.681, "step": 11249 }, { "epoch": 0.5266545731172099, "grad_norm": 1.09375, "learning_rate": 0.00017923676758973604, "loss": 2.738, "step": 11250 }, { "epoch": 0.5267013868570426, "grad_norm": 1.53125, "learning_rate": 0.00017923320642760672, "loss": 3.0027, "step": 11251 }, { "epoch": 0.5267482005968752, "grad_norm": 1.671875, "learning_rate": 0.0001792296449954948, "loss": 2.9088, "step": 11252 }, { "epoch": 0.5267950143367078, "grad_norm": 1.5234375, "learning_rate": 0.00017922608329341247, "loss": 3.1947, "step": 11253 }, { "epoch": 0.5268418280765405, "grad_norm": 1.453125, "learning_rate": 0.0001792225213213718, "loss": 2.6458, "step": 11254 }, { "epoch": 0.5268886418163731, "grad_norm": 1.28125, "learning_rate": 0.000179218959079385, "loss": 2.4729, "step": 11255 }, { "epoch": 0.5269354555562058, "grad_norm": 1.4921875, "learning_rate": 0.00017921539656746418, "loss": 3.1877, "step": 11256 }, { "epoch": 0.5269822692960384, "grad_norm": 1.4296875, "learning_rate": 0.00017921183378562144, "loss": 2.558, "step": 11257 }, { "epoch": 0.527029083035871, "grad_norm": 1.2421875, "learning_rate": 0.000179208270733869, "loss": 2.8778, "step": 11258 }, { "epoch": 0.5270758967757037, "grad_norm": 2.03125, "learning_rate": 0.00017920470741221895, "loss": 3.1304, "step": 11259 }, { "epoch": 0.5271227105155363, "grad_norm": 1.453125, "learning_rate": 0.0001792011438206834, "loss": 3.2703, "step": 11260 }, { "epoch": 0.527169524255369, "grad_norm": 1.6015625, "learning_rate": 0.0001791975799592746, "loss": 3.0165, "step": 11261 }, { "epoch": 0.5272163379952016, "grad_norm": 2.140625, "learning_rate": 0.00017919401582800455, "loss": 2.611, "step": 11262 }, { "epoch": 0.5272631517350342, "grad_norm": 1.390625, "learning_rate": 0.00017919045142688548, "loss": 3.02, "step": 11263 }, { "epoch": 0.5273099654748669, "grad_norm": 1.4140625, "learning_rate": 0.00017918688675592955, "loss": 2.8765, "step": 11264 }, { "epoch": 0.5273567792146995, "grad_norm": 1.3828125, "learning_rate": 0.00017918332181514888, "loss": 2.6552, "step": 11265 }, { "epoch": 0.5274035929545322, "grad_norm": 1.3203125, "learning_rate": 0.0001791797566045556, "loss": 2.8007, "step": 11266 }, { "epoch": 0.5274504066943648, "grad_norm": 1.484375, "learning_rate": 0.00017917619112416187, "loss": 2.5562, "step": 11267 }, { "epoch": 0.5274972204341974, "grad_norm": 1.296875, "learning_rate": 0.00017917262537397988, "loss": 2.8439, "step": 11268 }, { "epoch": 0.5275440341740301, "grad_norm": 1.4921875, "learning_rate": 0.00017916905935402172, "loss": 3.0448, "step": 11269 }, { "epoch": 0.5275908479138627, "grad_norm": 1.2890625, "learning_rate": 0.00017916549306429958, "loss": 2.4355, "step": 11270 }, { "epoch": 0.5276376616536954, "grad_norm": 2.15625, "learning_rate": 0.00017916192650482558, "loss": 2.8662, "step": 11271 }, { "epoch": 0.527684475393528, "grad_norm": 2.390625, "learning_rate": 0.0001791583596756119, "loss": 2.7422, "step": 11272 }, { "epoch": 0.5277312891333606, "grad_norm": 1.1328125, "learning_rate": 0.0001791547925766707, "loss": 3.0212, "step": 11273 }, { "epoch": 0.5277781028731933, "grad_norm": 1.2890625, "learning_rate": 0.0001791512252080141, "loss": 2.347, "step": 11274 }, { "epoch": 0.5278249166130259, "grad_norm": 1.578125, "learning_rate": 0.00017914765756965427, "loss": 2.7076, "step": 11275 }, { "epoch": 0.5278717303528586, "grad_norm": 1.8984375, "learning_rate": 0.0001791440896616034, "loss": 3.3774, "step": 11276 }, { "epoch": 0.5279185440926912, "grad_norm": 1.25, "learning_rate": 0.0001791405214838736, "loss": 2.6656, "step": 11277 }, { "epoch": 0.5279653578325239, "grad_norm": 1.8125, "learning_rate": 0.00017913695303647703, "loss": 2.5639, "step": 11278 }, { "epoch": 0.5280121715723565, "grad_norm": 1.046875, "learning_rate": 0.00017913338431942591, "loss": 2.4289, "step": 11279 }, { "epoch": 0.5280589853121891, "grad_norm": 1.3515625, "learning_rate": 0.00017912981533273233, "loss": 3.0941, "step": 11280 }, { "epoch": 0.5281057990520218, "grad_norm": 4.21875, "learning_rate": 0.00017912624607640848, "loss": 3.2493, "step": 11281 }, { "epoch": 0.5281526127918544, "grad_norm": 1.7890625, "learning_rate": 0.00017912267655046654, "loss": 3.09, "step": 11282 }, { "epoch": 0.528199426531687, "grad_norm": 1.4609375, "learning_rate": 0.00017911910675491864, "loss": 2.9139, "step": 11283 }, { "epoch": 0.5282462402715197, "grad_norm": 1.2734375, "learning_rate": 0.00017911553668977694, "loss": 3.0743, "step": 11284 }, { "epoch": 0.5282930540113523, "grad_norm": 1.5234375, "learning_rate": 0.00017911196635505362, "loss": 3.1463, "step": 11285 }, { "epoch": 0.528339867751185, "grad_norm": 1.765625, "learning_rate": 0.0001791083957507609, "loss": 2.8799, "step": 11286 }, { "epoch": 0.5283866814910176, "grad_norm": 1.0703125, "learning_rate": 0.00017910482487691083, "loss": 2.6007, "step": 11287 }, { "epoch": 0.5284334952308503, "grad_norm": 1.6015625, "learning_rate": 0.00017910125373351568, "loss": 2.8213, "step": 11288 }, { "epoch": 0.5284803089706829, "grad_norm": 1.40625, "learning_rate": 0.00017909768232058756, "loss": 2.9309, "step": 11289 }, { "epoch": 0.5285271227105155, "grad_norm": 1.453125, "learning_rate": 0.00017909411063813866, "loss": 2.924, "step": 11290 }, { "epoch": 0.5285739364503482, "grad_norm": 1.7890625, "learning_rate": 0.00017909053868618116, "loss": 3.1397, "step": 11291 }, { "epoch": 0.5286207501901808, "grad_norm": 1.5625, "learning_rate": 0.00017908696646472722, "loss": 2.7421, "step": 11292 }, { "epoch": 0.5286675639300135, "grad_norm": 1.265625, "learning_rate": 0.00017908339397378902, "loss": 2.6169, "step": 11293 }, { "epoch": 0.5287143776698461, "grad_norm": 1.3046875, "learning_rate": 0.00017907982121337872, "loss": 3.1547, "step": 11294 }, { "epoch": 0.5287611914096787, "grad_norm": 1.1484375, "learning_rate": 0.00017907624818350848, "loss": 2.3125, "step": 11295 }, { "epoch": 0.5288080051495114, "grad_norm": 1.328125, "learning_rate": 0.00017907267488419053, "loss": 2.719, "step": 11296 }, { "epoch": 0.528854818889344, "grad_norm": 1.3046875, "learning_rate": 0.00017906910131543699, "loss": 2.996, "step": 11297 }, { "epoch": 0.5289016326291767, "grad_norm": 1.0859375, "learning_rate": 0.00017906552747726007, "loss": 2.6491, "step": 11298 }, { "epoch": 0.5289484463690093, "grad_norm": 1.0234375, "learning_rate": 0.00017906195336967194, "loss": 2.6011, "step": 11299 }, { "epoch": 0.5289952601088419, "grad_norm": 1.6328125, "learning_rate": 0.00017905837899268473, "loss": 2.7536, "step": 11300 }, { "epoch": 0.5290420738486746, "grad_norm": 1.3203125, "learning_rate": 0.0001790548043463107, "loss": 2.8341, "step": 11301 }, { "epoch": 0.5290888875885073, "grad_norm": 1.328125, "learning_rate": 0.000179051229430562, "loss": 2.7465, "step": 11302 }, { "epoch": 0.5291357013283399, "grad_norm": 1.2578125, "learning_rate": 0.0001790476542454508, "loss": 2.9307, "step": 11303 }, { "epoch": 0.5291825150681725, "grad_norm": 1.3671875, "learning_rate": 0.00017904407879098928, "loss": 3.032, "step": 11304 }, { "epoch": 0.5292293288080051, "grad_norm": 1.765625, "learning_rate": 0.0001790405030671896, "loss": 2.8283, "step": 11305 }, { "epoch": 0.5292761425478378, "grad_norm": 1.234375, "learning_rate": 0.00017903692707406404, "loss": 2.5681, "step": 11306 }, { "epoch": 0.5293229562876705, "grad_norm": 1.421875, "learning_rate": 0.0001790333508116247, "loss": 2.6963, "step": 11307 }, { "epoch": 0.5293697700275031, "grad_norm": 1.1640625, "learning_rate": 0.00017902977427988374, "loss": 2.5151, "step": 11308 }, { "epoch": 0.5294165837673357, "grad_norm": 1.2734375, "learning_rate": 0.00017902619747885344, "loss": 2.5943, "step": 11309 }, { "epoch": 0.5294633975071683, "grad_norm": 1.25, "learning_rate": 0.00017902262040854592, "loss": 3.2983, "step": 11310 }, { "epoch": 0.529510211247001, "grad_norm": 1.5078125, "learning_rate": 0.00017901904306897337, "loss": 3.0032, "step": 11311 }, { "epoch": 0.5295570249868337, "grad_norm": 1.546875, "learning_rate": 0.00017901546546014805, "loss": 3.3668, "step": 11312 }, { "epoch": 0.5296038387266663, "grad_norm": 2.171875, "learning_rate": 0.00017901188758208205, "loss": 2.5918, "step": 11313 }, { "epoch": 0.5296506524664989, "grad_norm": 1.6171875, "learning_rate": 0.00017900830943478764, "loss": 3.0239, "step": 11314 }, { "epoch": 0.5296974662063315, "grad_norm": 1.6015625, "learning_rate": 0.00017900473101827697, "loss": 2.8759, "step": 11315 }, { "epoch": 0.5297442799461642, "grad_norm": 1.2421875, "learning_rate": 0.00017900115233256223, "loss": 2.6993, "step": 11316 }, { "epoch": 0.5297910936859969, "grad_norm": 1.53125, "learning_rate": 0.00017899757337765565, "loss": 2.7455, "step": 11317 }, { "epoch": 0.5298379074258295, "grad_norm": 2.9375, "learning_rate": 0.0001789939941535694, "loss": 2.7602, "step": 11318 }, { "epoch": 0.5298847211656621, "grad_norm": 1.640625, "learning_rate": 0.00017899041466031568, "loss": 2.7137, "step": 11319 }, { "epoch": 0.5299315349054947, "grad_norm": 1.5859375, "learning_rate": 0.00017898683489790668, "loss": 2.324, "step": 11320 }, { "epoch": 0.5299783486453274, "grad_norm": 1.609375, "learning_rate": 0.00017898325486635463, "loss": 3.0149, "step": 11321 }, { "epoch": 0.5300251623851601, "grad_norm": 1.4453125, "learning_rate": 0.0001789796745656717, "loss": 2.7188, "step": 11322 }, { "epoch": 0.5300719761249927, "grad_norm": 1.5234375, "learning_rate": 0.0001789760939958701, "loss": 2.4514, "step": 11323 }, { "epoch": 0.5301187898648253, "grad_norm": 1.234375, "learning_rate": 0.000178972513156962, "loss": 2.8488, "step": 11324 }, { "epoch": 0.5301656036046579, "grad_norm": 1.296875, "learning_rate": 0.00017896893204895963, "loss": 2.7818, "step": 11325 }, { "epoch": 0.5302124173444906, "grad_norm": 1.3828125, "learning_rate": 0.0001789653506718752, "loss": 2.4333, "step": 11326 }, { "epoch": 0.5302592310843233, "grad_norm": 1.65625, "learning_rate": 0.0001789617690257209, "loss": 2.9046, "step": 11327 }, { "epoch": 0.5303060448241559, "grad_norm": 1.265625, "learning_rate": 0.00017895818711050895, "loss": 2.3691, "step": 11328 }, { "epoch": 0.5303528585639885, "grad_norm": 1.5, "learning_rate": 0.0001789546049262515, "loss": 2.7971, "step": 11329 }, { "epoch": 0.5303996723038211, "grad_norm": 1.59375, "learning_rate": 0.00017895102247296083, "loss": 3.0614, "step": 11330 }, { "epoch": 0.5304464860436539, "grad_norm": 1.3046875, "learning_rate": 0.0001789474397506491, "loss": 2.8514, "step": 11331 }, { "epoch": 0.5304932997834865, "grad_norm": 1.4140625, "learning_rate": 0.00017894385675932853, "loss": 2.5467, "step": 11332 }, { "epoch": 0.5305401135233191, "grad_norm": 1.328125, "learning_rate": 0.00017894027349901138, "loss": 2.7968, "step": 11333 }, { "epoch": 0.5305869272631517, "grad_norm": 1.6640625, "learning_rate": 0.00017893668996970977, "loss": 2.4267, "step": 11334 }, { "epoch": 0.5306337410029843, "grad_norm": 2.078125, "learning_rate": 0.00017893310617143592, "loss": 2.764, "step": 11335 }, { "epoch": 0.5306805547428171, "grad_norm": 1.3046875, "learning_rate": 0.00017892952210420214, "loss": 2.7595, "step": 11336 }, { "epoch": 0.5307273684826497, "grad_norm": 1.46875, "learning_rate": 0.00017892593776802054, "loss": 2.8224, "step": 11337 }, { "epoch": 0.5307741822224823, "grad_norm": 1.3515625, "learning_rate": 0.00017892235316290336, "loss": 2.7955, "step": 11338 }, { "epoch": 0.5308209959623149, "grad_norm": 1.1484375, "learning_rate": 0.00017891876828886284, "loss": 2.8019, "step": 11339 }, { "epoch": 0.5308678097021475, "grad_norm": 1.4609375, "learning_rate": 0.00017891518314591117, "loss": 3.2255, "step": 11340 }, { "epoch": 0.5309146234419803, "grad_norm": 1.6015625, "learning_rate": 0.00017891159773406057, "loss": 3.2081, "step": 11341 }, { "epoch": 0.5309614371818129, "grad_norm": 1.359375, "learning_rate": 0.0001789080120533233, "loss": 2.9484, "step": 11342 }, { "epoch": 0.5310082509216455, "grad_norm": 2.125, "learning_rate": 0.00017890442610371147, "loss": 2.8924, "step": 11343 }, { "epoch": 0.5310550646614781, "grad_norm": 1.5, "learning_rate": 0.00017890083988523743, "loss": 3.187, "step": 11344 }, { "epoch": 0.5311018784013107, "grad_norm": 1.1796875, "learning_rate": 0.00017889725339791332, "loss": 2.6285, "step": 11345 }, { "epoch": 0.5311486921411435, "grad_norm": 1.9375, "learning_rate": 0.0001788936666417514, "loss": 2.729, "step": 11346 }, { "epoch": 0.5311955058809761, "grad_norm": 1.46875, "learning_rate": 0.00017889007961676383, "loss": 2.8863, "step": 11347 }, { "epoch": 0.5312423196208087, "grad_norm": 1.21875, "learning_rate": 0.0001788864923229629, "loss": 2.9192, "step": 11348 }, { "epoch": 0.5312891333606413, "grad_norm": 2.21875, "learning_rate": 0.0001788829047603608, "loss": 3.1111, "step": 11349 }, { "epoch": 0.5313359471004739, "grad_norm": 1.4453125, "learning_rate": 0.00017887931692896976, "loss": 2.7318, "step": 11350 }, { "epoch": 0.5313827608403067, "grad_norm": 1.4140625, "learning_rate": 0.00017887572882880202, "loss": 2.3481, "step": 11351 }, { "epoch": 0.5314295745801393, "grad_norm": 1.5625, "learning_rate": 0.00017887214045986976, "loss": 2.8, "step": 11352 }, { "epoch": 0.5314763883199719, "grad_norm": 1.21875, "learning_rate": 0.00017886855182218527, "loss": 2.7311, "step": 11353 }, { "epoch": 0.5315232020598045, "grad_norm": 1.5, "learning_rate": 0.00017886496291576073, "loss": 3.0958, "step": 11354 }, { "epoch": 0.5315700157996371, "grad_norm": 1.2421875, "learning_rate": 0.00017886137374060842, "loss": 2.6586, "step": 11355 }, { "epoch": 0.5316168295394699, "grad_norm": 1.234375, "learning_rate": 0.0001788577842967405, "loss": 2.8354, "step": 11356 }, { "epoch": 0.5316636432793025, "grad_norm": 1.34375, "learning_rate": 0.00017885419458416924, "loss": 2.6558, "step": 11357 }, { "epoch": 0.5317104570191351, "grad_norm": 1.7265625, "learning_rate": 0.0001788506046029069, "loss": 2.6215, "step": 11358 }, { "epoch": 0.5317572707589677, "grad_norm": 1.421875, "learning_rate": 0.00017884701435296567, "loss": 2.7482, "step": 11359 }, { "epoch": 0.5318040844988003, "grad_norm": 1.375, "learning_rate": 0.00017884342383435778, "loss": 4.0274, "step": 11360 }, { "epoch": 0.5318508982386331, "grad_norm": 1.9765625, "learning_rate": 0.0001788398330470955, "loss": 2.8522, "step": 11361 }, { "epoch": 0.5318977119784657, "grad_norm": 1.4609375, "learning_rate": 0.00017883624199119104, "loss": 2.9442, "step": 11362 }, { "epoch": 0.5319445257182983, "grad_norm": 1.1875, "learning_rate": 0.00017883265066665665, "loss": 3.1763, "step": 11363 }, { "epoch": 0.5319913394581309, "grad_norm": 1.0625, "learning_rate": 0.00017882905907350456, "loss": 2.4055, "step": 11364 }, { "epoch": 0.5320381531979635, "grad_norm": 1.734375, "learning_rate": 0.000178825467211747, "loss": 2.8702, "step": 11365 }, { "epoch": 0.5320849669377963, "grad_norm": 1.546875, "learning_rate": 0.0001788218750813962, "loss": 2.7438, "step": 11366 }, { "epoch": 0.5321317806776289, "grad_norm": 2.15625, "learning_rate": 0.00017881828268246443, "loss": 2.6771, "step": 11367 }, { "epoch": 0.5321785944174615, "grad_norm": 1.953125, "learning_rate": 0.0001788146900149639, "loss": 3.0562, "step": 11368 }, { "epoch": 0.5322254081572941, "grad_norm": 1.5234375, "learning_rate": 0.00017881109707890692, "loss": 2.9095, "step": 11369 }, { "epoch": 0.5322722218971268, "grad_norm": 1.3203125, "learning_rate": 0.00017880750387430565, "loss": 2.9152, "step": 11370 }, { "epoch": 0.5323190356369595, "grad_norm": 1.6640625, "learning_rate": 0.00017880391040117237, "loss": 2.573, "step": 11371 }, { "epoch": 0.5323658493767921, "grad_norm": 2.171875, "learning_rate": 0.0001788003166595193, "loss": 2.6879, "step": 11372 }, { "epoch": 0.5324126631166247, "grad_norm": 1.4140625, "learning_rate": 0.00017879672264935873, "loss": 2.5321, "step": 11373 }, { "epoch": 0.5324594768564573, "grad_norm": 1.90625, "learning_rate": 0.0001787931283707029, "loss": 2.757, "step": 11374 }, { "epoch": 0.5325062905962901, "grad_norm": 1.8984375, "learning_rate": 0.00017878953382356402, "loss": 3.2613, "step": 11375 }, { "epoch": 0.5325531043361227, "grad_norm": 1.375, "learning_rate": 0.00017878593900795436, "loss": 2.6071, "step": 11376 }, { "epoch": 0.5325999180759553, "grad_norm": 1.515625, "learning_rate": 0.00017878234392388614, "loss": 3.0485, "step": 11377 }, { "epoch": 0.5326467318157879, "grad_norm": 1.1953125, "learning_rate": 0.00017877874857137168, "loss": 2.8091, "step": 11378 }, { "epoch": 0.5326935455556205, "grad_norm": 1.7421875, "learning_rate": 0.00017877515295042313, "loss": 4.1244, "step": 11379 }, { "epoch": 0.5327403592954533, "grad_norm": 2.609375, "learning_rate": 0.00017877155706105287, "loss": 2.9947, "step": 11380 }, { "epoch": 0.5327871730352859, "grad_norm": 2.3125, "learning_rate": 0.00017876796090327303, "loss": 2.4866, "step": 11381 }, { "epoch": 0.5328339867751185, "grad_norm": 1.234375, "learning_rate": 0.00017876436447709595, "loss": 2.768, "step": 11382 }, { "epoch": 0.5328808005149511, "grad_norm": 1.3359375, "learning_rate": 0.00017876076778253382, "loss": 2.8536, "step": 11383 }, { "epoch": 0.5329276142547837, "grad_norm": 1.3984375, "learning_rate": 0.00017875717081959893, "loss": 2.5707, "step": 11384 }, { "epoch": 0.5329744279946165, "grad_norm": 1.6796875, "learning_rate": 0.00017875357358830356, "loss": 3.1038, "step": 11385 }, { "epoch": 0.5330212417344491, "grad_norm": 1.203125, "learning_rate": 0.0001787499760886599, "loss": 2.163, "step": 11386 }, { "epoch": 0.5330680554742817, "grad_norm": 1.28125, "learning_rate": 0.00017874637832068028, "loss": 2.8032, "step": 11387 }, { "epoch": 0.5331148692141143, "grad_norm": 1.15625, "learning_rate": 0.00017874278028437695, "loss": 2.7729, "step": 11388 }, { "epoch": 0.533161682953947, "grad_norm": 1.078125, "learning_rate": 0.0001787391819797621, "loss": 2.9151, "step": 11389 }, { "epoch": 0.5332084966937797, "grad_norm": 2.28125, "learning_rate": 0.00017873558340684808, "loss": 1.882, "step": 11390 }, { "epoch": 0.5332553104336123, "grad_norm": 1.1640625, "learning_rate": 0.0001787319845656471, "loss": 2.9791, "step": 11391 }, { "epoch": 0.5333021241734449, "grad_norm": 1.5625, "learning_rate": 0.0001787283854561714, "loss": 3.053, "step": 11392 }, { "epoch": 0.5333489379132775, "grad_norm": 1.2109375, "learning_rate": 0.0001787247860784333, "loss": 2.9463, "step": 11393 }, { "epoch": 0.5333957516531102, "grad_norm": 1.5, "learning_rate": 0.00017872118643244504, "loss": 2.9547, "step": 11394 }, { "epoch": 0.5334425653929429, "grad_norm": 1.3203125, "learning_rate": 0.00017871758651821887, "loss": 2.6987, "step": 11395 }, { "epoch": 0.5334893791327755, "grad_norm": 1.359375, "learning_rate": 0.0001787139863357671, "loss": 2.7882, "step": 11396 }, { "epoch": 0.5335361928726081, "grad_norm": 2.25, "learning_rate": 0.00017871038588510195, "loss": 3.1909, "step": 11397 }, { "epoch": 0.5335830066124407, "grad_norm": 1.75, "learning_rate": 0.00017870678516623573, "loss": 3.0384, "step": 11398 }, { "epoch": 0.5336298203522734, "grad_norm": 2.25, "learning_rate": 0.0001787031841791807, "loss": 2.729, "step": 11399 }, { "epoch": 0.5336766340921061, "grad_norm": 1.2109375, "learning_rate": 0.0001786995829239491, "loss": 2.6494, "step": 11400 }, { "epoch": 0.5337234478319387, "grad_norm": 1.15625, "learning_rate": 0.00017869598140055323, "loss": 3.3013, "step": 11401 }, { "epoch": 0.5337702615717713, "grad_norm": 1.5859375, "learning_rate": 0.00017869237960900532, "loss": 2.6888, "step": 11402 }, { "epoch": 0.5338170753116039, "grad_norm": 1.28125, "learning_rate": 0.0001786887775493177, "loss": 3.0071, "step": 11403 }, { "epoch": 0.5338638890514366, "grad_norm": 1.3125, "learning_rate": 0.00017868517522150263, "loss": 2.7951, "step": 11404 }, { "epoch": 0.5339107027912693, "grad_norm": 1.25, "learning_rate": 0.00017868157262557234, "loss": 2.7032, "step": 11405 }, { "epoch": 0.5339575165311019, "grad_norm": 1.6640625, "learning_rate": 0.0001786779697615392, "loss": 2.7206, "step": 11406 }, { "epoch": 0.5340043302709345, "grad_norm": 1.375, "learning_rate": 0.00017867436662941536, "loss": 3.2048, "step": 11407 }, { "epoch": 0.5340511440107671, "grad_norm": 1.4453125, "learning_rate": 0.0001786707632292132, "loss": 3.0173, "step": 11408 }, { "epoch": 0.5340979577505998, "grad_norm": 1.453125, "learning_rate": 0.00017866715956094494, "loss": 2.5465, "step": 11409 }, { "epoch": 0.5341447714904325, "grad_norm": 7.09375, "learning_rate": 0.0001786635556246229, "loss": 2.8856, "step": 11410 }, { "epoch": 0.5341915852302651, "grad_norm": 1.4296875, "learning_rate": 0.00017865995142025932, "loss": 2.8163, "step": 11411 }, { "epoch": 0.5342383989700977, "grad_norm": 1.1796875, "learning_rate": 0.00017865634694786653, "loss": 2.7596, "step": 11412 }, { "epoch": 0.5342852127099303, "grad_norm": 1.203125, "learning_rate": 0.00017865274220745677, "loss": 3.144, "step": 11413 }, { "epoch": 0.534332026449763, "grad_norm": 1.4296875, "learning_rate": 0.00017864913719904232, "loss": 3.3667, "step": 11414 }, { "epoch": 0.5343788401895957, "grad_norm": 1.3671875, "learning_rate": 0.0001786455319226355, "loss": 3.232, "step": 11415 }, { "epoch": 0.5344256539294283, "grad_norm": 1.265625, "learning_rate": 0.00017864192637824858, "loss": 2.7176, "step": 11416 }, { "epoch": 0.5344724676692609, "grad_norm": 1.46875, "learning_rate": 0.00017863832056589385, "loss": 2.65, "step": 11417 }, { "epoch": 0.5345192814090935, "grad_norm": 1.15625, "learning_rate": 0.00017863471448558357, "loss": 2.7073, "step": 11418 }, { "epoch": 0.5345660951489262, "grad_norm": 2.609375, "learning_rate": 0.00017863110813733007, "loss": 3.5685, "step": 11419 }, { "epoch": 0.5346129088887589, "grad_norm": 1.1796875, "learning_rate": 0.00017862750152114557, "loss": 2.7007, "step": 11420 }, { "epoch": 0.5346597226285915, "grad_norm": 1.3671875, "learning_rate": 0.00017862389463704244, "loss": 2.641, "step": 11421 }, { "epoch": 0.5347065363684241, "grad_norm": 1.203125, "learning_rate": 0.0001786202874850329, "loss": 2.4961, "step": 11422 }, { "epoch": 0.5347533501082568, "grad_norm": 2.109375, "learning_rate": 0.0001786166800651293, "loss": 2.6731, "step": 11423 }, { "epoch": 0.5348001638480894, "grad_norm": 1.28125, "learning_rate": 0.0001786130723773439, "loss": 2.9976, "step": 11424 }, { "epoch": 0.5348469775879221, "grad_norm": 1.7265625, "learning_rate": 0.00017860946442168902, "loss": 2.8542, "step": 11425 }, { "epoch": 0.5348937913277547, "grad_norm": 1.53125, "learning_rate": 0.00017860585619817695, "loss": 2.674, "step": 11426 }, { "epoch": 0.5349406050675873, "grad_norm": 1.296875, "learning_rate": 0.0001786022477068199, "loss": 3.1188, "step": 11427 }, { "epoch": 0.53498741880742, "grad_norm": 1.2734375, "learning_rate": 0.00017859863894763026, "loss": 3.0505, "step": 11428 }, { "epoch": 0.5350342325472526, "grad_norm": 2.046875, "learning_rate": 0.00017859502992062032, "loss": 2.8258, "step": 11429 }, { "epoch": 0.5350810462870853, "grad_norm": 1.2265625, "learning_rate": 0.00017859142062580236, "loss": 2.822, "step": 11430 }, { "epoch": 0.5351278600269179, "grad_norm": 1.5078125, "learning_rate": 0.00017858781106318866, "loss": 3.1348, "step": 11431 }, { "epoch": 0.5351746737667505, "grad_norm": 1.4453125, "learning_rate": 0.00017858420123279155, "loss": 2.6493, "step": 11432 }, { "epoch": 0.5352214875065832, "grad_norm": 1.203125, "learning_rate": 0.0001785805911346233, "loss": 2.837, "step": 11433 }, { "epoch": 0.5352683012464158, "grad_norm": 1.5234375, "learning_rate": 0.00017857698076869624, "loss": 3.257, "step": 11434 }, { "epoch": 0.5353151149862485, "grad_norm": 1.421875, "learning_rate": 0.00017857337013502263, "loss": 3.0192, "step": 11435 }, { "epoch": 0.5353619287260811, "grad_norm": 1.2265625, "learning_rate": 0.00017856975923361487, "loss": 3.0171, "step": 11436 }, { "epoch": 0.5354087424659137, "grad_norm": 1.6796875, "learning_rate": 0.00017856614806448514, "loss": 4.7737, "step": 11437 }, { "epoch": 0.5354555562057464, "grad_norm": 1.515625, "learning_rate": 0.0001785625366276458, "loss": 2.698, "step": 11438 }, { "epoch": 0.535502369945579, "grad_norm": 1.640625, "learning_rate": 0.0001785589249231092, "loss": 2.8911, "step": 11439 }, { "epoch": 0.5355491836854117, "grad_norm": 1.21875, "learning_rate": 0.0001785553129508876, "loss": 2.7082, "step": 11440 }, { "epoch": 0.5355959974252443, "grad_norm": 3.375, "learning_rate": 0.00017855170071099326, "loss": 2.639, "step": 11441 }, { "epoch": 0.535642811165077, "grad_norm": 1.375, "learning_rate": 0.00017854808820343858, "loss": 3.067, "step": 11442 }, { "epoch": 0.5356896249049096, "grad_norm": 2.21875, "learning_rate": 0.00017854447542823586, "loss": 3.1062, "step": 11443 }, { "epoch": 0.5357364386447422, "grad_norm": 1.78125, "learning_rate": 0.00017854086238539731, "loss": 2.789, "step": 11444 }, { "epoch": 0.5357832523845749, "grad_norm": 1.6484375, "learning_rate": 0.00017853724907493533, "loss": 2.6596, "step": 11445 }, { "epoch": 0.5358300661244075, "grad_norm": 1.4296875, "learning_rate": 0.00017853363549686225, "loss": 2.6225, "step": 11446 }, { "epoch": 0.5358768798642402, "grad_norm": 1.40625, "learning_rate": 0.00017853002165119032, "loss": 3.0673, "step": 11447 }, { "epoch": 0.5359236936040728, "grad_norm": 1.7578125, "learning_rate": 0.00017852640753793187, "loss": 2.5572, "step": 11448 }, { "epoch": 0.5359705073439054, "grad_norm": 1.1328125, "learning_rate": 0.00017852279315709927, "loss": 2.7999, "step": 11449 }, { "epoch": 0.5360173210837381, "grad_norm": 1.25, "learning_rate": 0.00017851917850870476, "loss": 3.056, "step": 11450 }, { "epoch": 0.5360641348235707, "grad_norm": 1.953125, "learning_rate": 0.00017851556359276067, "loss": 3.1835, "step": 11451 }, { "epoch": 0.5361109485634034, "grad_norm": 1.8515625, "learning_rate": 0.00017851194840927937, "loss": 2.581, "step": 11452 }, { "epoch": 0.536157762303236, "grad_norm": 1.40625, "learning_rate": 0.00017850833295827311, "loss": 2.8803, "step": 11453 }, { "epoch": 0.5362045760430686, "grad_norm": 1.421875, "learning_rate": 0.00017850471723975428, "loss": 2.2711, "step": 11454 }, { "epoch": 0.5362513897829013, "grad_norm": 1.421875, "learning_rate": 0.00017850110125373515, "loss": 2.8518, "step": 11455 }, { "epoch": 0.5362982035227339, "grad_norm": 1.3515625, "learning_rate": 0.00017849748500022806, "loss": 2.7566, "step": 11456 }, { "epoch": 0.5363450172625666, "grad_norm": 1.4453125, "learning_rate": 0.0001784938684792453, "loss": 2.8761, "step": 11457 }, { "epoch": 0.5363918310023992, "grad_norm": 1.578125, "learning_rate": 0.00017849025169079923, "loss": 3.63, "step": 11458 }, { "epoch": 0.5364386447422318, "grad_norm": 1.6328125, "learning_rate": 0.0001784866346349022, "loss": 3.1005, "step": 11459 }, { "epoch": 0.5364854584820645, "grad_norm": 1.078125, "learning_rate": 0.00017848301731156644, "loss": 2.6683, "step": 11460 }, { "epoch": 0.5365322722218971, "grad_norm": 1.3125, "learning_rate": 0.00017847939972080439, "loss": 2.9683, "step": 11461 }, { "epoch": 0.5365790859617298, "grad_norm": 1.6640625, "learning_rate": 0.0001784757818626283, "loss": 2.8366, "step": 11462 }, { "epoch": 0.5366258997015624, "grad_norm": 1.875, "learning_rate": 0.00017847216373705052, "loss": 3.3032, "step": 11463 }, { "epoch": 0.536672713441395, "grad_norm": 1.2109375, "learning_rate": 0.00017846854534408338, "loss": 2.976, "step": 11464 }, { "epoch": 0.5367195271812277, "grad_norm": 1.546875, "learning_rate": 0.0001784649266837392, "loss": 3.0532, "step": 11465 }, { "epoch": 0.5367663409210603, "grad_norm": 1.4296875, "learning_rate": 0.0001784613077560303, "loss": 2.8519, "step": 11466 }, { "epoch": 0.536813154660893, "grad_norm": 1.5390625, "learning_rate": 0.00017845768856096904, "loss": 2.7934, "step": 11467 }, { "epoch": 0.5368599684007256, "grad_norm": 1.1875, "learning_rate": 0.00017845406909856778, "loss": 2.3031, "step": 11468 }, { "epoch": 0.5369067821405582, "grad_norm": 1.328125, "learning_rate": 0.0001784504493688388, "loss": 2.5968, "step": 11469 }, { "epoch": 0.5369535958803909, "grad_norm": 1.2109375, "learning_rate": 0.00017844682937179443, "loss": 2.6442, "step": 11470 }, { "epoch": 0.5370004096202236, "grad_norm": 1.4140625, "learning_rate": 0.00017844320910744701, "loss": 2.8729, "step": 11471 }, { "epoch": 0.5370472233600562, "grad_norm": 1.1796875, "learning_rate": 0.0001784395885758089, "loss": 2.829, "step": 11472 }, { "epoch": 0.5370940370998888, "grad_norm": 1.2109375, "learning_rate": 0.00017843596777689243, "loss": 2.8673, "step": 11473 }, { "epoch": 0.5371408508397214, "grad_norm": 1.3203125, "learning_rate": 0.00017843234671070994, "loss": 2.8268, "step": 11474 }, { "epoch": 0.5371876645795541, "grad_norm": 1.625, "learning_rate": 0.00017842872537727377, "loss": 2.81, "step": 11475 }, { "epoch": 0.5372344783193868, "grad_norm": 6.03125, "learning_rate": 0.00017842510377659624, "loss": 2.4201, "step": 11476 }, { "epoch": 0.5372812920592194, "grad_norm": 1.609375, "learning_rate": 0.00017842148190868972, "loss": 2.5626, "step": 11477 }, { "epoch": 0.537328105799052, "grad_norm": 1.421875, "learning_rate": 0.00017841785977356652, "loss": 2.8891, "step": 11478 }, { "epoch": 0.5373749195388846, "grad_norm": 1.3984375, "learning_rate": 0.00017841423737123896, "loss": 2.8127, "step": 11479 }, { "epoch": 0.5374217332787173, "grad_norm": 1.34375, "learning_rate": 0.00017841061470171948, "loss": 2.9052, "step": 11480 }, { "epoch": 0.53746854701855, "grad_norm": 1.6171875, "learning_rate": 0.00017840699176502033, "loss": 2.9821, "step": 11481 }, { "epoch": 0.5375153607583826, "grad_norm": 1.34375, "learning_rate": 0.00017840336856115387, "loss": 2.8944, "step": 11482 }, { "epoch": 0.5375621744982152, "grad_norm": 1.0546875, "learning_rate": 0.0001783997450901325, "loss": 2.3947, "step": 11483 }, { "epoch": 0.5376089882380478, "grad_norm": 1.6640625, "learning_rate": 0.00017839612135196853, "loss": 3.0023, "step": 11484 }, { "epoch": 0.5376558019778805, "grad_norm": 2.15625, "learning_rate": 0.00017839249734667427, "loss": 3.0433, "step": 11485 }, { "epoch": 0.5377026157177132, "grad_norm": 1.2578125, "learning_rate": 0.00017838887307426214, "loss": 2.5479, "step": 11486 }, { "epoch": 0.5377494294575458, "grad_norm": 1.4296875, "learning_rate": 0.00017838524853474444, "loss": 2.5543, "step": 11487 }, { "epoch": 0.5377962431973784, "grad_norm": 1.59375, "learning_rate": 0.00017838162372813354, "loss": 2.7957, "step": 11488 }, { "epoch": 0.537843056937211, "grad_norm": 1.2578125, "learning_rate": 0.0001783779986544418, "loss": 2.7817, "step": 11489 }, { "epoch": 0.5378898706770437, "grad_norm": 1.859375, "learning_rate": 0.00017837437331368153, "loss": 2.6826, "step": 11490 }, { "epoch": 0.5379366844168764, "grad_norm": 1.8125, "learning_rate": 0.00017837074770586515, "loss": 3.2251, "step": 11491 }, { "epoch": 0.537983498156709, "grad_norm": 1.203125, "learning_rate": 0.00017836712183100497, "loss": 2.6104, "step": 11492 }, { "epoch": 0.5380303118965416, "grad_norm": 1.59375, "learning_rate": 0.00017836349568911333, "loss": 2.8993, "step": 11493 }, { "epoch": 0.5380771256363742, "grad_norm": 1.25, "learning_rate": 0.00017835986928020262, "loss": 2.4356, "step": 11494 }, { "epoch": 0.538123939376207, "grad_norm": 1.421875, "learning_rate": 0.0001783562426042852, "loss": 2.8094, "step": 11495 }, { "epoch": 0.5381707531160396, "grad_norm": 2.140625, "learning_rate": 0.00017835261566137337, "loss": 2.8079, "step": 11496 }, { "epoch": 0.5382175668558722, "grad_norm": 1.4296875, "learning_rate": 0.00017834898845147957, "loss": 2.9627, "step": 11497 }, { "epoch": 0.5382643805957048, "grad_norm": 1.328125, "learning_rate": 0.0001783453609746161, "loss": 2.3541, "step": 11498 }, { "epoch": 0.5383111943355375, "grad_norm": 1.0390625, "learning_rate": 0.00017834173323079538, "loss": 3.5104, "step": 11499 }, { "epoch": 0.5383580080753702, "grad_norm": 1.5390625, "learning_rate": 0.00017833810522002968, "loss": 3.0099, "step": 11500 }, { "epoch": 0.5384048218152028, "grad_norm": 1.359375, "learning_rate": 0.00017833447694233146, "loss": 3.0358, "step": 11501 }, { "epoch": 0.5384516355550354, "grad_norm": 1.6171875, "learning_rate": 0.00017833084839771299, "loss": 2.6661, "step": 11502 }, { "epoch": 0.538498449294868, "grad_norm": 1.4765625, "learning_rate": 0.00017832721958618673, "loss": 2.6218, "step": 11503 }, { "epoch": 0.5385452630347007, "grad_norm": 2.0, "learning_rate": 0.00017832359050776498, "loss": 2.9478, "step": 11504 }, { "epoch": 0.5385920767745334, "grad_norm": 1.296875, "learning_rate": 0.0001783199611624601, "loss": 2.6473, "step": 11505 }, { "epoch": 0.538638890514366, "grad_norm": 1.15625, "learning_rate": 0.0001783163315502845, "loss": 2.7347, "step": 11506 }, { "epoch": 0.5386857042541986, "grad_norm": 1.5546875, "learning_rate": 0.0001783127016712505, "loss": 3.0791, "step": 11507 }, { "epoch": 0.5387325179940312, "grad_norm": 1.2734375, "learning_rate": 0.00017830907152537054, "loss": 2.9507, "step": 11508 }, { "epoch": 0.5387793317338639, "grad_norm": 1.4140625, "learning_rate": 0.00017830544111265692, "loss": 2.8784, "step": 11509 }, { "epoch": 0.5388261454736966, "grad_norm": 1.3125, "learning_rate": 0.00017830181043312205, "loss": 2.783, "step": 11510 }, { "epoch": 0.5388729592135292, "grad_norm": 1.34375, "learning_rate": 0.00017829817948677827, "loss": 2.4371, "step": 11511 }, { "epoch": 0.5389197729533618, "grad_norm": 1.0625, "learning_rate": 0.00017829454827363798, "loss": 3.0301, "step": 11512 }, { "epoch": 0.5389665866931944, "grad_norm": 1.7734375, "learning_rate": 0.00017829091679371354, "loss": 2.4988, "step": 11513 }, { "epoch": 0.5390134004330271, "grad_norm": 1.3046875, "learning_rate": 0.00017828728504701733, "loss": 2.9705, "step": 11514 }, { "epoch": 0.5390602141728598, "grad_norm": 1.1484375, "learning_rate": 0.0001782836530335617, "loss": 2.6198, "step": 11515 }, { "epoch": 0.5391070279126924, "grad_norm": 1.5546875, "learning_rate": 0.00017828002075335908, "loss": 2.9625, "step": 11516 }, { "epoch": 0.539153841652525, "grad_norm": 1.234375, "learning_rate": 0.0001782763882064218, "loss": 2.7621, "step": 11517 }, { "epoch": 0.5392006553923576, "grad_norm": 1.6875, "learning_rate": 0.00017827275539276225, "loss": 2.831, "step": 11518 }, { "epoch": 0.5392474691321903, "grad_norm": 1.3984375, "learning_rate": 0.00017826912231239282, "loss": 2.2439, "step": 11519 }, { "epoch": 0.539294282872023, "grad_norm": 1.4375, "learning_rate": 0.00017826548896532587, "loss": 2.8398, "step": 11520 }, { "epoch": 0.5393410966118556, "grad_norm": 3.21875, "learning_rate": 0.00017826185535157376, "loss": 2.3276, "step": 11521 }, { "epoch": 0.5393879103516882, "grad_norm": 1.3125, "learning_rate": 0.00017825822147114894, "loss": 2.9912, "step": 11522 }, { "epoch": 0.5394347240915208, "grad_norm": 1.9140625, "learning_rate": 0.00017825458732406374, "loss": 2.6724, "step": 11523 }, { "epoch": 0.5394815378313536, "grad_norm": 1.28125, "learning_rate": 0.00017825095291033058, "loss": 3.2727, "step": 11524 }, { "epoch": 0.5395283515711862, "grad_norm": 1.453125, "learning_rate": 0.0001782473182299618, "loss": 2.8915, "step": 11525 }, { "epoch": 0.5395751653110188, "grad_norm": 1.46875, "learning_rate": 0.00017824368328296983, "loss": 2.9482, "step": 11526 }, { "epoch": 0.5396219790508514, "grad_norm": 1.1640625, "learning_rate": 0.00017824004806936698, "loss": 2.8615, "step": 11527 }, { "epoch": 0.539668792790684, "grad_norm": 1.6171875, "learning_rate": 0.00017823641258916576, "loss": 2.8425, "step": 11528 }, { "epoch": 0.5397156065305168, "grad_norm": 1.3828125, "learning_rate": 0.0001782327768423784, "loss": 3.0457, "step": 11529 }, { "epoch": 0.5397624202703494, "grad_norm": 1.3984375, "learning_rate": 0.00017822914082901744, "loss": 2.8607, "step": 11530 }, { "epoch": 0.539809234010182, "grad_norm": 1.34375, "learning_rate": 0.0001782255045490952, "loss": 2.953, "step": 11531 }, { "epoch": 0.5398560477500146, "grad_norm": 8.4375, "learning_rate": 0.00017822186800262407, "loss": 2.6521, "step": 11532 }, { "epoch": 0.5399028614898472, "grad_norm": 2.09375, "learning_rate": 0.00017821823118961642, "loss": 2.7399, "step": 11533 }, { "epoch": 0.53994967522968, "grad_norm": 1.3359375, "learning_rate": 0.0001782145941100847, "loss": 2.4643, "step": 11534 }, { "epoch": 0.5399964889695126, "grad_norm": 1.8203125, "learning_rate": 0.00017821095676404125, "loss": 3.0045, "step": 11535 }, { "epoch": 0.5400433027093452, "grad_norm": 1.265625, "learning_rate": 0.0001782073191514985, "loss": 2.6177, "step": 11536 }, { "epoch": 0.5400901164491778, "grad_norm": 1.3984375, "learning_rate": 0.00017820368127246882, "loss": 2.9011, "step": 11537 }, { "epoch": 0.5401369301890104, "grad_norm": 1.53125, "learning_rate": 0.0001782000431269646, "loss": 2.9189, "step": 11538 }, { "epoch": 0.5401837439288432, "grad_norm": 1.8984375, "learning_rate": 0.0001781964047149983, "loss": 2.3357, "step": 11539 }, { "epoch": 0.5402305576686758, "grad_norm": 1.4140625, "learning_rate": 0.0001781927660365822, "loss": 2.9636, "step": 11540 }, { "epoch": 0.5402773714085084, "grad_norm": 1.40625, "learning_rate": 0.00017818912709172886, "loss": 3.1412, "step": 11541 }, { "epoch": 0.540324185148341, "grad_norm": 1.78125, "learning_rate": 0.00017818548788045053, "loss": 3.3349, "step": 11542 }, { "epoch": 0.5403709988881736, "grad_norm": 1.0546875, "learning_rate": 0.00017818184840275966, "loss": 1.9771, "step": 11543 }, { "epoch": 0.5404178126280064, "grad_norm": 1.390625, "learning_rate": 0.00017817820865866869, "loss": 2.8869, "step": 11544 }, { "epoch": 0.540464626367839, "grad_norm": 1.421875, "learning_rate": 0.00017817456864819, "loss": 2.6084, "step": 11545 }, { "epoch": 0.5405114401076716, "grad_norm": 1.0625, "learning_rate": 0.00017817092837133598, "loss": 2.4023, "step": 11546 }, { "epoch": 0.5405582538475042, "grad_norm": 1.3671875, "learning_rate": 0.00017816728782811905, "loss": 2.9344, "step": 11547 }, { "epoch": 0.5406050675873368, "grad_norm": 1.4453125, "learning_rate": 0.00017816364701855156, "loss": 2.7807, "step": 11548 }, { "epoch": 0.5406518813271696, "grad_norm": 1.4296875, "learning_rate": 0.000178160005942646, "loss": 2.7202, "step": 11549 }, { "epoch": 0.5406986950670022, "grad_norm": 1.21875, "learning_rate": 0.00017815636460041473, "loss": 2.659, "step": 11550 }, { "epoch": 0.5407455088068348, "grad_norm": 1.296875, "learning_rate": 0.00017815272299187015, "loss": 2.7681, "step": 11551 }, { "epoch": 0.5407923225466674, "grad_norm": 1.796875, "learning_rate": 0.0001781490811170247, "loss": 3.2955, "step": 11552 }, { "epoch": 0.5408391362865, "grad_norm": 1.21875, "learning_rate": 0.00017814543897589076, "loss": 2.9018, "step": 11553 }, { "epoch": 0.5408859500263328, "grad_norm": 1.71875, "learning_rate": 0.00017814179656848076, "loss": 2.797, "step": 11554 }, { "epoch": 0.5409327637661654, "grad_norm": 1.4609375, "learning_rate": 0.00017813815389480713, "loss": 3.0396, "step": 11555 }, { "epoch": 0.540979577505998, "grad_norm": 1.5, "learning_rate": 0.0001781345109548822, "loss": 2.7327, "step": 11556 }, { "epoch": 0.5410263912458306, "grad_norm": 1.4296875, "learning_rate": 0.00017813086774871848, "loss": 3.1263, "step": 11557 }, { "epoch": 0.5410732049856632, "grad_norm": 1.2734375, "learning_rate": 0.00017812722427632834, "loss": 2.6944, "step": 11558 }, { "epoch": 0.541120018725496, "grad_norm": 1.4453125, "learning_rate": 0.0001781235805377242, "loss": 2.8782, "step": 11559 }, { "epoch": 0.5411668324653286, "grad_norm": 1.484375, "learning_rate": 0.00017811993653291848, "loss": 3.0713, "step": 11560 }, { "epoch": 0.5412136462051612, "grad_norm": 1.3984375, "learning_rate": 0.0001781162922619236, "loss": 2.9672, "step": 11561 }, { "epoch": 0.5412604599449938, "grad_norm": 1.2734375, "learning_rate": 0.00017811264772475195, "loss": 2.873, "step": 11562 }, { "epoch": 0.5413072736848265, "grad_norm": 2.359375, "learning_rate": 0.00017810900292141596, "loss": 2.6955, "step": 11563 }, { "epoch": 0.5413540874246592, "grad_norm": 1.265625, "learning_rate": 0.00017810535785192807, "loss": 2.8102, "step": 11564 }, { "epoch": 0.5414009011644918, "grad_norm": 1.21875, "learning_rate": 0.0001781017125163007, "loss": 2.5161, "step": 11565 }, { "epoch": 0.5414477149043244, "grad_norm": 1.9609375, "learning_rate": 0.0001780980669145462, "loss": 2.8271, "step": 11566 }, { "epoch": 0.541494528644157, "grad_norm": 1.4609375, "learning_rate": 0.00017809442104667713, "loss": 2.3687, "step": 11567 }, { "epoch": 0.5415413423839897, "grad_norm": 1.3046875, "learning_rate": 0.0001780907749127058, "loss": 2.6197, "step": 11568 }, { "epoch": 0.5415881561238224, "grad_norm": 1.28125, "learning_rate": 0.00017808712851264468, "loss": 2.8615, "step": 11569 }, { "epoch": 0.541634969863655, "grad_norm": 1.0625, "learning_rate": 0.00017808348184650616, "loss": 2.6772, "step": 11570 }, { "epoch": 0.5416817836034876, "grad_norm": 1.125, "learning_rate": 0.00017807983491430272, "loss": 2.5579, "step": 11571 }, { "epoch": 0.5417285973433202, "grad_norm": 1.4765625, "learning_rate": 0.00017807618771604675, "loss": 2.7807, "step": 11572 }, { "epoch": 0.5417754110831529, "grad_norm": 2.0, "learning_rate": 0.00017807254025175063, "loss": 2.8429, "step": 11573 }, { "epoch": 0.5418222248229856, "grad_norm": 1.625, "learning_rate": 0.0001780688925214269, "loss": 2.8329, "step": 11574 }, { "epoch": 0.5418690385628182, "grad_norm": 1.4765625, "learning_rate": 0.00017806524452508793, "loss": 2.7583, "step": 11575 }, { "epoch": 0.5419158523026508, "grad_norm": 1.6796875, "learning_rate": 0.00017806159626274616, "loss": 2.9237, "step": 11576 }, { "epoch": 0.5419626660424834, "grad_norm": 1.2890625, "learning_rate": 0.00017805794773441397, "loss": 2.9217, "step": 11577 }, { "epoch": 0.5420094797823161, "grad_norm": 1.5078125, "learning_rate": 0.00017805429894010387, "loss": 2.6829, "step": 11578 }, { "epoch": 0.5420562935221488, "grad_norm": 1.890625, "learning_rate": 0.00017805064987982822, "loss": 2.9839, "step": 11579 }, { "epoch": 0.5421031072619814, "grad_norm": 1.828125, "learning_rate": 0.0001780470005535995, "loss": 3.1659, "step": 11580 }, { "epoch": 0.542149921001814, "grad_norm": 1.734375, "learning_rate": 0.00017804335096143015, "loss": 2.7455, "step": 11581 }, { "epoch": 0.5421967347416466, "grad_norm": 1.578125, "learning_rate": 0.00017803970110333262, "loss": 2.664, "step": 11582 }, { "epoch": 0.5422435484814793, "grad_norm": 1.1015625, "learning_rate": 0.00017803605097931928, "loss": 4.2724, "step": 11583 }, { "epoch": 0.542290362221312, "grad_norm": 1.4296875, "learning_rate": 0.00017803240058940264, "loss": 2.9545, "step": 11584 }, { "epoch": 0.5423371759611446, "grad_norm": 1.4375, "learning_rate": 0.0001780287499335951, "loss": 3.0993, "step": 11585 }, { "epoch": 0.5423839897009772, "grad_norm": 1.2734375, "learning_rate": 0.00017802509901190908, "loss": 2.3171, "step": 11586 }, { "epoch": 0.5424308034408099, "grad_norm": 1.6953125, "learning_rate": 0.00017802144782435705, "loss": 3.597, "step": 11587 }, { "epoch": 0.5424776171806425, "grad_norm": 1.5234375, "learning_rate": 0.00017801779637095146, "loss": 2.7719, "step": 11588 }, { "epoch": 0.5425244309204752, "grad_norm": 1.09375, "learning_rate": 0.00017801414465170475, "loss": 2.71, "step": 11589 }, { "epoch": 0.5425712446603078, "grad_norm": 1.578125, "learning_rate": 0.0001780104926666293, "loss": 3.0575, "step": 11590 }, { "epoch": 0.5426180584001404, "grad_norm": 1.234375, "learning_rate": 0.00017800684041573766, "loss": 2.9216, "step": 11591 }, { "epoch": 0.542664872139973, "grad_norm": 1.2578125, "learning_rate": 0.00017800318789904217, "loss": 2.5166, "step": 11592 }, { "epoch": 0.5427116858798057, "grad_norm": 1.1953125, "learning_rate": 0.00017799953511655537, "loss": 2.6924, "step": 11593 }, { "epoch": 0.5427584996196384, "grad_norm": 1.109375, "learning_rate": 0.00017799588206828966, "loss": 2.7145, "step": 11594 }, { "epoch": 0.542805313359471, "grad_norm": 1.828125, "learning_rate": 0.00017799222875425748, "loss": 2.8649, "step": 11595 }, { "epoch": 0.5428521270993036, "grad_norm": 1.6640625, "learning_rate": 0.00017798857517447127, "loss": 2.6072, "step": 11596 }, { "epoch": 0.5428989408391363, "grad_norm": 1.328125, "learning_rate": 0.0001779849213289435, "loss": 3.006, "step": 11597 }, { "epoch": 0.5429457545789689, "grad_norm": 1.7265625, "learning_rate": 0.00017798126721768663, "loss": 2.9353, "step": 11598 }, { "epoch": 0.5429925683188016, "grad_norm": 1.7734375, "learning_rate": 0.00017797761284071313, "loss": 2.5966, "step": 11599 }, { "epoch": 0.5430393820586342, "grad_norm": 1.4765625, "learning_rate": 0.0001779739581980354, "loss": 2.9391, "step": 11600 }, { "epoch": 0.5430861957984668, "grad_norm": 1.546875, "learning_rate": 0.00017797030328966588, "loss": 2.7081, "step": 11601 }, { "epoch": 0.5431330095382995, "grad_norm": 1.125, "learning_rate": 0.0001779666481156171, "loss": 4.2973, "step": 11602 }, { "epoch": 0.5431798232781321, "grad_norm": 1.2421875, "learning_rate": 0.00017796299267590145, "loss": 2.7304, "step": 11603 }, { "epoch": 0.5432266370179648, "grad_norm": 1.3359375, "learning_rate": 0.00017795933697053142, "loss": 2.4296, "step": 11604 }, { "epoch": 0.5432734507577974, "grad_norm": 1.3203125, "learning_rate": 0.00017795568099951946, "loss": 2.6715, "step": 11605 }, { "epoch": 0.54332026449763, "grad_norm": 1.484375, "learning_rate": 0.00017795202476287802, "loss": 2.8222, "step": 11606 }, { "epoch": 0.5433670782374627, "grad_norm": 2.390625, "learning_rate": 0.00017794836826061955, "loss": 2.9959, "step": 11607 }, { "epoch": 0.5434138919772953, "grad_norm": 1.6171875, "learning_rate": 0.00017794471149275653, "loss": 2.5704, "step": 11608 }, { "epoch": 0.543460705717128, "grad_norm": 1.5703125, "learning_rate": 0.00017794105445930142, "loss": 2.9951, "step": 11609 }, { "epoch": 0.5435075194569606, "grad_norm": 1.6953125, "learning_rate": 0.00017793739716026664, "loss": 2.874, "step": 11610 }, { "epoch": 0.5435543331967932, "grad_norm": 1.2578125, "learning_rate": 0.00017793373959566473, "loss": 2.6129, "step": 11611 }, { "epoch": 0.5436011469366259, "grad_norm": 1.21875, "learning_rate": 0.0001779300817655081, "loss": 3.1898, "step": 11612 }, { "epoch": 0.5436479606764585, "grad_norm": 1.6796875, "learning_rate": 0.00017792642366980919, "loss": 2.7454, "step": 11613 }, { "epoch": 0.5436947744162912, "grad_norm": 1.296875, "learning_rate": 0.00017792276530858051, "loss": 2.6716, "step": 11614 }, { "epoch": 0.5437415881561238, "grad_norm": 1.2890625, "learning_rate": 0.00017791910668183452, "loss": 2.99, "step": 11615 }, { "epoch": 0.5437884018959565, "grad_norm": 1.1484375, "learning_rate": 0.00017791544778958366, "loss": 3.0165, "step": 11616 }, { "epoch": 0.5438352156357891, "grad_norm": 1.3203125, "learning_rate": 0.00017791178863184044, "loss": 2.6775, "step": 11617 }, { "epoch": 0.5438820293756217, "grad_norm": 1.5390625, "learning_rate": 0.00017790812920861728, "loss": 2.8873, "step": 11618 }, { "epoch": 0.5439288431154544, "grad_norm": 1.3203125, "learning_rate": 0.0001779044695199267, "loss": 3.1631, "step": 11619 }, { "epoch": 0.543975656855287, "grad_norm": 1.1640625, "learning_rate": 0.00017790080956578113, "loss": 2.7645, "step": 11620 }, { "epoch": 0.5440224705951197, "grad_norm": 2.171875, "learning_rate": 0.00017789714934619304, "loss": 3.0698, "step": 11621 }, { "epoch": 0.5440692843349523, "grad_norm": 1.5234375, "learning_rate": 0.00017789348886117492, "loss": 3.016, "step": 11622 }, { "epoch": 0.544116098074785, "grad_norm": 1.7890625, "learning_rate": 0.00017788982811073925, "loss": 3.9342, "step": 11623 }, { "epoch": 0.5441629118146176, "grad_norm": 1.28125, "learning_rate": 0.0001778861670948985, "loss": 2.7638, "step": 11624 }, { "epoch": 0.5442097255544502, "grad_norm": 1.28125, "learning_rate": 0.0001778825058136651, "loss": 2.7553, "step": 11625 }, { "epoch": 0.5442565392942829, "grad_norm": 1.921875, "learning_rate": 0.00017787884426705158, "loss": 3.2067, "step": 11626 }, { "epoch": 0.5443033530341155, "grad_norm": 2.203125, "learning_rate": 0.00017787518245507042, "loss": 3.5446, "step": 11627 }, { "epoch": 0.5443501667739482, "grad_norm": 1.6875, "learning_rate": 0.00017787152037773406, "loss": 3.051, "step": 11628 }, { "epoch": 0.5443969805137808, "grad_norm": 1.6015625, "learning_rate": 0.00017786785803505502, "loss": 2.9767, "step": 11629 }, { "epoch": 0.5444437942536134, "grad_norm": 2.078125, "learning_rate": 0.00017786419542704574, "loss": 2.6102, "step": 11630 }, { "epoch": 0.5444906079934461, "grad_norm": 1.5546875, "learning_rate": 0.00017786053255371872, "loss": 2.759, "step": 11631 }, { "epoch": 0.5445374217332787, "grad_norm": 1.21875, "learning_rate": 0.0001778568694150864, "loss": 2.6716, "step": 11632 }, { "epoch": 0.5445842354731114, "grad_norm": 1.96875, "learning_rate": 0.00017785320601116134, "loss": 2.6723, "step": 11633 }, { "epoch": 0.544631049212944, "grad_norm": 1.3515625, "learning_rate": 0.00017784954234195596, "loss": 2.5991, "step": 11634 }, { "epoch": 0.5446778629527766, "grad_norm": 2.328125, "learning_rate": 0.00017784587840748278, "loss": 3.004, "step": 11635 }, { "epoch": 0.5447246766926093, "grad_norm": 1.2578125, "learning_rate": 0.00017784221420775426, "loss": 2.6789, "step": 11636 }, { "epoch": 0.5447714904324419, "grad_norm": 1.5234375, "learning_rate": 0.00017783854974278289, "loss": 3.3057, "step": 11637 }, { "epoch": 0.5448183041722746, "grad_norm": 1.421875, "learning_rate": 0.00017783488501258116, "loss": 2.5116, "step": 11638 }, { "epoch": 0.5448651179121072, "grad_norm": 1.5859375, "learning_rate": 0.00017783122001716154, "loss": 2.9788, "step": 11639 }, { "epoch": 0.5449119316519399, "grad_norm": 1.8359375, "learning_rate": 0.00017782755475653658, "loss": 3.0181, "step": 11640 }, { "epoch": 0.5449587453917725, "grad_norm": 1.203125, "learning_rate": 0.0001778238892307187, "loss": 2.9005, "step": 11641 }, { "epoch": 0.5450055591316051, "grad_norm": 2.25, "learning_rate": 0.00017782022343972042, "loss": 2.5448, "step": 11642 }, { "epoch": 0.5450523728714378, "grad_norm": 1.2578125, "learning_rate": 0.00017781655738355424, "loss": 2.9425, "step": 11643 }, { "epoch": 0.5450991866112704, "grad_norm": 1.4609375, "learning_rate": 0.0001778128910622326, "loss": 3.023, "step": 11644 }, { "epoch": 0.545146000351103, "grad_norm": 1.5546875, "learning_rate": 0.0001778092244757681, "loss": 3.0181, "step": 11645 }, { "epoch": 0.5451928140909357, "grad_norm": 1.3515625, "learning_rate": 0.0001778055576241731, "loss": 2.979, "step": 11646 }, { "epoch": 0.5452396278307683, "grad_norm": 1.359375, "learning_rate": 0.0001778018905074602, "loss": 2.7558, "step": 11647 }, { "epoch": 0.545286441570601, "grad_norm": 1.2734375, "learning_rate": 0.00017779822312564183, "loss": 2.3264, "step": 11648 }, { "epoch": 0.5453332553104336, "grad_norm": 1.28125, "learning_rate": 0.00017779455547873053, "loss": 2.4021, "step": 11649 }, { "epoch": 0.5453800690502663, "grad_norm": 1.4140625, "learning_rate": 0.00017779088756673876, "loss": 2.8102, "step": 11650 }, { "epoch": 0.5454268827900989, "grad_norm": 1.1171875, "learning_rate": 0.00017778721938967906, "loss": 3.9862, "step": 11651 }, { "epoch": 0.5454736965299315, "grad_norm": 1.1484375, "learning_rate": 0.00017778355094756387, "loss": 2.8198, "step": 11652 }, { "epoch": 0.5455205102697642, "grad_norm": 1.59375, "learning_rate": 0.00017777988224040576, "loss": 2.8874, "step": 11653 }, { "epoch": 0.5455673240095968, "grad_norm": 1.4609375, "learning_rate": 0.00017777621326821717, "loss": 2.4867, "step": 11654 }, { "epoch": 0.5456141377494295, "grad_norm": 1.328125, "learning_rate": 0.00017777254403101065, "loss": 3.1427, "step": 11655 }, { "epoch": 0.5456609514892621, "grad_norm": 1.1875, "learning_rate": 0.00017776887452879868, "loss": 2.7882, "step": 11656 }, { "epoch": 0.5457077652290947, "grad_norm": 2.03125, "learning_rate": 0.00017776520476159378, "loss": 2.9925, "step": 11657 }, { "epoch": 0.5457545789689274, "grad_norm": 1.3046875, "learning_rate": 0.00017776153472940842, "loss": 2.7782, "step": 11658 }, { "epoch": 0.54580139270876, "grad_norm": 1.1796875, "learning_rate": 0.00017775786443225512, "loss": 2.7629, "step": 11659 }, { "epoch": 0.5458482064485927, "grad_norm": 1.34375, "learning_rate": 0.0001777541938701464, "loss": 2.8367, "step": 11660 }, { "epoch": 0.5458950201884253, "grad_norm": 1.2265625, "learning_rate": 0.00017775052304309474, "loss": 2.6426, "step": 11661 }, { "epoch": 0.5459418339282579, "grad_norm": 1.390625, "learning_rate": 0.00017774685195111269, "loss": 2.7718, "step": 11662 }, { "epoch": 0.5459886476680906, "grad_norm": 1.4921875, "learning_rate": 0.00017774318059421274, "loss": 2.8968, "step": 11663 }, { "epoch": 0.5460354614079233, "grad_norm": 1.3515625, "learning_rate": 0.00017773950897240738, "loss": 3.0826, "step": 11664 }, { "epoch": 0.5460822751477559, "grad_norm": 1.4453125, "learning_rate": 0.00017773583708570914, "loss": 2.7601, "step": 11665 }, { "epoch": 0.5461290888875885, "grad_norm": 1.4140625, "learning_rate": 0.00017773216493413053, "loss": 2.6224, "step": 11666 }, { "epoch": 0.5461759026274211, "grad_norm": 1.1953125, "learning_rate": 0.00017772849251768407, "loss": 3.0903, "step": 11667 }, { "epoch": 0.5462227163672538, "grad_norm": 1.3203125, "learning_rate": 0.00017772481983638225, "loss": 2.7049, "step": 11668 }, { "epoch": 0.5462695301070865, "grad_norm": 1.2890625, "learning_rate": 0.0001777211468902376, "loss": 2.8612, "step": 11669 }, { "epoch": 0.5463163438469191, "grad_norm": 1.6875, "learning_rate": 0.00017771747367926265, "loss": 2.5041, "step": 11670 }, { "epoch": 0.5463631575867517, "grad_norm": 1.3203125, "learning_rate": 0.00017771380020346992, "loss": 2.856, "step": 11671 }, { "epoch": 0.5464099713265843, "grad_norm": 1.5703125, "learning_rate": 0.00017771012646287186, "loss": 2.3967, "step": 11672 }, { "epoch": 0.546456785066417, "grad_norm": 3.15625, "learning_rate": 0.00017770645245748106, "loss": 2.3844, "step": 11673 }, { "epoch": 0.5465035988062497, "grad_norm": 1.0859375, "learning_rate": 0.00017770277818731002, "loss": 2.9277, "step": 11674 }, { "epoch": 0.5465504125460823, "grad_norm": 2.6875, "learning_rate": 0.00017769910365237124, "loss": 4.0229, "step": 11675 }, { "epoch": 0.5465972262859149, "grad_norm": 2.015625, "learning_rate": 0.00017769542885267726, "loss": 3.0505, "step": 11676 }, { "epoch": 0.5466440400257475, "grad_norm": 1.3828125, "learning_rate": 0.0001776917537882406, "loss": 2.8976, "step": 11677 }, { "epoch": 0.5466908537655802, "grad_norm": 1.6328125, "learning_rate": 0.00017768807845907375, "loss": 2.9764, "step": 11678 }, { "epoch": 0.5467376675054129, "grad_norm": 1.2578125, "learning_rate": 0.00017768440286518932, "loss": 2.3638, "step": 11679 }, { "epoch": 0.5467844812452455, "grad_norm": 2.046875, "learning_rate": 0.00017768072700659974, "loss": 3.4139, "step": 11680 }, { "epoch": 0.5468312949850781, "grad_norm": 1.3984375, "learning_rate": 0.00017767705088331755, "loss": 2.9361, "step": 11681 }, { "epoch": 0.5468781087249107, "grad_norm": 1.46875, "learning_rate": 0.00017767337449535537, "loss": 2.6067, "step": 11682 }, { "epoch": 0.5469249224647434, "grad_norm": 1.046875, "learning_rate": 0.0001776696978427256, "loss": 2.5005, "step": 11683 }, { "epoch": 0.5469717362045761, "grad_norm": 1.2890625, "learning_rate": 0.00017766602092544084, "loss": 2.9301, "step": 11684 }, { "epoch": 0.5470185499444087, "grad_norm": 1.03125, "learning_rate": 0.00017766234374351361, "loss": 2.3779, "step": 11685 }, { "epoch": 0.5470653636842413, "grad_norm": 2.015625, "learning_rate": 0.0001776586662969564, "loss": 3.232, "step": 11686 }, { "epoch": 0.5471121774240739, "grad_norm": 2.015625, "learning_rate": 0.00017765498858578183, "loss": 2.8778, "step": 11687 }, { "epoch": 0.5471589911639066, "grad_norm": 1.5, "learning_rate": 0.00017765131061000234, "loss": 2.4729, "step": 11688 }, { "epoch": 0.5472058049037393, "grad_norm": 1.4453125, "learning_rate": 0.00017764763236963053, "loss": 2.7852, "step": 11689 }, { "epoch": 0.5472526186435719, "grad_norm": 1.3984375, "learning_rate": 0.00017764395386467885, "loss": 2.8934, "step": 11690 }, { "epoch": 0.5472994323834045, "grad_norm": 1.515625, "learning_rate": 0.0001776402750951599, "loss": 3.0419, "step": 11691 }, { "epoch": 0.5473462461232371, "grad_norm": 1.71875, "learning_rate": 0.00017763659606108624, "loss": 2.7545, "step": 11692 }, { "epoch": 0.5473930598630699, "grad_norm": 1.6953125, "learning_rate": 0.00017763291676247032, "loss": 3.1267, "step": 11693 }, { "epoch": 0.5474398736029025, "grad_norm": 1.5625, "learning_rate": 0.00017762923719932472, "loss": 3.0868, "step": 11694 }, { "epoch": 0.5474866873427351, "grad_norm": 2.109375, "learning_rate": 0.000177625557371662, "loss": 3.0313, "step": 11695 }, { "epoch": 0.5475335010825677, "grad_norm": 1.515625, "learning_rate": 0.0001776218772794947, "loss": 2.9543, "step": 11696 }, { "epoch": 0.5475803148224003, "grad_norm": 1.453125, "learning_rate": 0.0001776181969228353, "loss": 2.7317, "step": 11697 }, { "epoch": 0.5476271285622331, "grad_norm": 1.453125, "learning_rate": 0.00017761451630169642, "loss": 2.7019, "step": 11698 }, { "epoch": 0.5476739423020657, "grad_norm": 1.328125, "learning_rate": 0.00017761083541609053, "loss": 2.2986, "step": 11699 }, { "epoch": 0.5477207560418983, "grad_norm": 1.03125, "learning_rate": 0.00017760715426603022, "loss": 4.4213, "step": 11700 }, { "epoch": 0.5477675697817309, "grad_norm": 1.4609375, "learning_rate": 0.00017760347285152803, "loss": 2.4616, "step": 11701 }, { "epoch": 0.5478143835215635, "grad_norm": 1.171875, "learning_rate": 0.00017759979117259648, "loss": 2.6078, "step": 11702 }, { "epoch": 0.5478611972613963, "grad_norm": 1.2734375, "learning_rate": 0.00017759610922924812, "loss": 2.6509, "step": 11703 }, { "epoch": 0.5479080110012289, "grad_norm": 1.3671875, "learning_rate": 0.0001775924270214955, "loss": 2.9213, "step": 11704 }, { "epoch": 0.5479548247410615, "grad_norm": 1.296875, "learning_rate": 0.0001775887445493512, "loss": 2.6351, "step": 11705 }, { "epoch": 0.5480016384808941, "grad_norm": 1.1875, "learning_rate": 0.00017758506181282768, "loss": 2.7226, "step": 11706 }, { "epoch": 0.5480484522207267, "grad_norm": 1.6171875, "learning_rate": 0.00017758137881193758, "loss": 2.9329, "step": 11707 }, { "epoch": 0.5480952659605595, "grad_norm": 1.2109375, "learning_rate": 0.00017757769554669345, "loss": 2.7455, "step": 11708 }, { "epoch": 0.5481420797003921, "grad_norm": 2.046875, "learning_rate": 0.00017757401201710775, "loss": 2.8907, "step": 11709 }, { "epoch": 0.5481888934402247, "grad_norm": 1.34375, "learning_rate": 0.00017757032822319314, "loss": 2.7335, "step": 11710 }, { "epoch": 0.5482357071800573, "grad_norm": 1.78125, "learning_rate": 0.00017756664416496208, "loss": 1.9446, "step": 11711 }, { "epoch": 0.5482825209198899, "grad_norm": 1.3046875, "learning_rate": 0.0001775629598424272, "loss": 2.7263, "step": 11712 }, { "epoch": 0.5483293346597227, "grad_norm": 2.53125, "learning_rate": 0.00017755927525560096, "loss": 2.3938, "step": 11713 }, { "epoch": 0.5483761483995553, "grad_norm": 2.578125, "learning_rate": 0.00017755559040449602, "loss": 2.9582, "step": 11714 }, { "epoch": 0.5484229621393879, "grad_norm": 1.6171875, "learning_rate": 0.00017755190528912488, "loss": 3.0296, "step": 11715 }, { "epoch": 0.5484697758792205, "grad_norm": 1.6953125, "learning_rate": 0.0001775482199095001, "loss": 2.867, "step": 11716 }, { "epoch": 0.5485165896190531, "grad_norm": 1.296875, "learning_rate": 0.00017754453426563425, "loss": 3.0556, "step": 11717 }, { "epoch": 0.5485634033588859, "grad_norm": 2.203125, "learning_rate": 0.00017754084835753985, "loss": 2.5488, "step": 11718 }, { "epoch": 0.5486102170987185, "grad_norm": 1.734375, "learning_rate": 0.00017753716218522954, "loss": 2.6553, "step": 11719 }, { "epoch": 0.5486570308385511, "grad_norm": 1.1171875, "learning_rate": 0.0001775334757487158, "loss": 2.6645, "step": 11720 }, { "epoch": 0.5487038445783837, "grad_norm": 2.328125, "learning_rate": 0.00017752978904801123, "loss": 2.898, "step": 11721 }, { "epoch": 0.5487506583182163, "grad_norm": 1.4921875, "learning_rate": 0.0001775261020831284, "loss": 2.5885, "step": 11722 }, { "epoch": 0.5487974720580491, "grad_norm": 1.328125, "learning_rate": 0.00017752241485407983, "loss": 2.7787, "step": 11723 }, { "epoch": 0.5488442857978817, "grad_norm": 1.375, "learning_rate": 0.00017751872736087814, "loss": 3.004, "step": 11724 }, { "epoch": 0.5488910995377143, "grad_norm": 1.8984375, "learning_rate": 0.00017751503960353583, "loss": 3.0088, "step": 11725 }, { "epoch": 0.5489379132775469, "grad_norm": 1.9296875, "learning_rate": 0.00017751135158206553, "loss": 2.9176, "step": 11726 }, { "epoch": 0.5489847270173795, "grad_norm": 1.6796875, "learning_rate": 0.0001775076632964798, "loss": 2.9031, "step": 11727 }, { "epoch": 0.5490315407572123, "grad_norm": 1.6015625, "learning_rate": 0.00017750397474679117, "loss": 3.0662, "step": 11728 }, { "epoch": 0.5490783544970449, "grad_norm": 1.140625, "learning_rate": 0.0001775002859330122, "loss": 3.2448, "step": 11729 }, { "epoch": 0.5491251682368775, "grad_norm": 4.53125, "learning_rate": 0.00017749659685515552, "loss": 2.4437, "step": 11730 }, { "epoch": 0.5491719819767101, "grad_norm": 1.453125, "learning_rate": 0.00017749290751323366, "loss": 3.0609, "step": 11731 }, { "epoch": 0.5492187957165428, "grad_norm": 1.1484375, "learning_rate": 0.00017748921790725918, "loss": 2.574, "step": 11732 }, { "epoch": 0.5492656094563755, "grad_norm": 1.515625, "learning_rate": 0.00017748552803724467, "loss": 2.7044, "step": 11733 }, { "epoch": 0.5493124231962081, "grad_norm": 1.921875, "learning_rate": 0.00017748183790320274, "loss": 2.7379, "step": 11734 }, { "epoch": 0.5493592369360407, "grad_norm": 1.21875, "learning_rate": 0.00017747814750514586, "loss": 3.6488, "step": 11735 }, { "epoch": 0.5494060506758733, "grad_norm": 1.1875, "learning_rate": 0.00017747445684308675, "loss": 2.3737, "step": 11736 }, { "epoch": 0.549452864415706, "grad_norm": 1.4765625, "learning_rate": 0.00017747076591703788, "loss": 2.9419, "step": 11737 }, { "epoch": 0.5494996781555387, "grad_norm": 1.375, "learning_rate": 0.0001774670747270118, "loss": 3.063, "step": 11738 }, { "epoch": 0.5495464918953713, "grad_norm": 1.2578125, "learning_rate": 0.00017746338327302117, "loss": 2.5989, "step": 11739 }, { "epoch": 0.5495933056352039, "grad_norm": 1.5625, "learning_rate": 0.00017745969155507857, "loss": 3.0055, "step": 11740 }, { "epoch": 0.5496401193750365, "grad_norm": 1.3515625, "learning_rate": 0.0001774559995731965, "loss": 2.7448, "step": 11741 }, { "epoch": 0.5496869331148693, "grad_norm": 1.3125, "learning_rate": 0.0001774523073273876, "loss": 2.6699, "step": 11742 }, { "epoch": 0.5497337468547019, "grad_norm": 2.078125, "learning_rate": 0.00017744861481766447, "loss": 3.2179, "step": 11743 }, { "epoch": 0.5497805605945345, "grad_norm": 1.6953125, "learning_rate": 0.00017744492204403963, "loss": 2.3669, "step": 11744 }, { "epoch": 0.5498273743343671, "grad_norm": 1.6875, "learning_rate": 0.00017744122900652574, "loss": 2.9457, "step": 11745 }, { "epoch": 0.5498741880741997, "grad_norm": 1.0625, "learning_rate": 0.0001774375357051353, "loss": 2.7658, "step": 11746 }, { "epoch": 0.5499210018140325, "grad_norm": 1.2890625, "learning_rate": 0.00017743384213988095, "loss": 2.9207, "step": 11747 }, { "epoch": 0.5499678155538651, "grad_norm": 1.1484375, "learning_rate": 0.00017743014831077524, "loss": 3.171, "step": 11748 }, { "epoch": 0.5500146292936977, "grad_norm": 1.6953125, "learning_rate": 0.0001774264542178308, "loss": 3.1705, "step": 11749 }, { "epoch": 0.5500614430335303, "grad_norm": 1.6171875, "learning_rate": 0.0001774227598610602, "loss": 2.6693, "step": 11750 }, { "epoch": 0.550108256773363, "grad_norm": 1.765625, "learning_rate": 0.00017741906524047598, "loss": 2.8053, "step": 11751 }, { "epoch": 0.5501550705131957, "grad_norm": 1.734375, "learning_rate": 0.0001774153703560908, "loss": 3.2682, "step": 11752 }, { "epoch": 0.5502018842530283, "grad_norm": 1.421875, "learning_rate": 0.0001774116752079172, "loss": 3.0116, "step": 11753 }, { "epoch": 0.5502486979928609, "grad_norm": 1.6796875, "learning_rate": 0.0001774079797959678, "loss": 2.8706, "step": 11754 }, { "epoch": 0.5502955117326935, "grad_norm": 1.515625, "learning_rate": 0.0001774042841202552, "loss": 2.6399, "step": 11755 }, { "epoch": 0.5503423254725262, "grad_norm": 1.9765625, "learning_rate": 0.00017740058818079194, "loss": 2.9435, "step": 11756 }, { "epoch": 0.5503891392123589, "grad_norm": 1.9765625, "learning_rate": 0.0001773968919775907, "loss": 2.4636, "step": 11757 }, { "epoch": 0.5504359529521915, "grad_norm": 1.3046875, "learning_rate": 0.00017739319551066397, "loss": 2.8268, "step": 11758 }, { "epoch": 0.5504827666920241, "grad_norm": 1.21875, "learning_rate": 0.0001773894987800244, "loss": 3.0647, "step": 11759 }, { "epoch": 0.5505295804318567, "grad_norm": 1.171875, "learning_rate": 0.00017738580178568465, "loss": 2.928, "step": 11760 }, { "epoch": 0.5505763941716894, "grad_norm": 1.2734375, "learning_rate": 0.00017738210452765722, "loss": 2.8949, "step": 11761 }, { "epoch": 0.5506232079115221, "grad_norm": 1.2890625, "learning_rate": 0.00017737840700595472, "loss": 2.8637, "step": 11762 }, { "epoch": 0.5506700216513547, "grad_norm": 1.3203125, "learning_rate": 0.00017737470922058978, "loss": 2.8396, "step": 11763 }, { "epoch": 0.5507168353911873, "grad_norm": 1.6875, "learning_rate": 0.00017737101117157498, "loss": 2.6483, "step": 11764 }, { "epoch": 0.5507636491310199, "grad_norm": 1.53125, "learning_rate": 0.00017736731285892298, "loss": 2.8314, "step": 11765 }, { "epoch": 0.5508104628708526, "grad_norm": 1.4765625, "learning_rate": 0.0001773636142826463, "loss": 3.0427, "step": 11766 }, { "epoch": 0.5508572766106853, "grad_norm": 1.578125, "learning_rate": 0.00017735991544275758, "loss": 2.9397, "step": 11767 }, { "epoch": 0.5509040903505179, "grad_norm": 1.265625, "learning_rate": 0.00017735621633926943, "loss": 2.8873, "step": 11768 }, { "epoch": 0.5509509040903505, "grad_norm": 1.171875, "learning_rate": 0.00017735251697219444, "loss": 2.977, "step": 11769 }, { "epoch": 0.5509977178301831, "grad_norm": 1.46875, "learning_rate": 0.0001773488173415452, "loss": 2.8193, "step": 11770 }, { "epoch": 0.5510445315700158, "grad_norm": 1.8515625, "learning_rate": 0.00017734511744733436, "loss": 2.6778, "step": 11771 }, { "epoch": 0.5510913453098485, "grad_norm": 1.1640625, "learning_rate": 0.00017734141728957447, "loss": 2.6207, "step": 11772 }, { "epoch": 0.5511381590496811, "grad_norm": 1.3125, "learning_rate": 0.0001773377168682782, "loss": 2.7749, "step": 11773 }, { "epoch": 0.5511849727895137, "grad_norm": 1.84375, "learning_rate": 0.00017733401618345816, "loss": 2.9382, "step": 11774 }, { "epoch": 0.5512317865293463, "grad_norm": 1.3203125, "learning_rate": 0.00017733031523512686, "loss": 2.8684, "step": 11775 }, { "epoch": 0.551278600269179, "grad_norm": 1.2734375, "learning_rate": 0.00017732661402329703, "loss": 2.36, "step": 11776 }, { "epoch": 0.5513254140090117, "grad_norm": 1.3984375, "learning_rate": 0.0001773229125479812, "loss": 2.7612, "step": 11777 }, { "epoch": 0.5513722277488443, "grad_norm": 1.375, "learning_rate": 0.00017731921080919202, "loss": 2.6622, "step": 11778 }, { "epoch": 0.5514190414886769, "grad_norm": 1.6171875, "learning_rate": 0.00017731550880694213, "loss": 3.0872, "step": 11779 }, { "epoch": 0.5514658552285095, "grad_norm": 2.203125, "learning_rate": 0.00017731180654124412, "loss": 3.1833, "step": 11780 }, { "epoch": 0.5515126689683422, "grad_norm": 1.3359375, "learning_rate": 0.00017730810401211055, "loss": 2.5578, "step": 11781 }, { "epoch": 0.5515594827081749, "grad_norm": 1.5546875, "learning_rate": 0.0001773044012195541, "loss": 3.0413, "step": 11782 }, { "epoch": 0.5516062964480075, "grad_norm": 1.203125, "learning_rate": 0.00017730069816358735, "loss": 3.0145, "step": 11783 }, { "epoch": 0.5516531101878401, "grad_norm": 1.28125, "learning_rate": 0.000177296994844223, "loss": 2.7189, "step": 11784 }, { "epoch": 0.5516999239276728, "grad_norm": 1.2734375, "learning_rate": 0.00017729329126147357, "loss": 2.3586, "step": 11785 }, { "epoch": 0.5517467376675054, "grad_norm": 1.4765625, "learning_rate": 0.0001772895874153517, "loss": 2.7874, "step": 11786 }, { "epoch": 0.5517935514073381, "grad_norm": 1.2421875, "learning_rate": 0.00017728588330587005, "loss": 2.88, "step": 11787 }, { "epoch": 0.5518403651471707, "grad_norm": 1.25, "learning_rate": 0.00017728217893304122, "loss": 2.9775, "step": 11788 }, { "epoch": 0.5518871788870033, "grad_norm": 1.46875, "learning_rate": 0.00017727847429687785, "loss": 2.491, "step": 11789 }, { "epoch": 0.551933992626836, "grad_norm": 1.2421875, "learning_rate": 0.00017727476939739253, "loss": 2.6422, "step": 11790 }, { "epoch": 0.5519808063666686, "grad_norm": 1.4375, "learning_rate": 0.0001772710642345979, "loss": 3.1263, "step": 11791 }, { "epoch": 0.5520276201065013, "grad_norm": 1.1796875, "learning_rate": 0.00017726735880850658, "loss": 4.9408, "step": 11792 }, { "epoch": 0.5520744338463339, "grad_norm": 1.140625, "learning_rate": 0.0001772636531191312, "loss": 2.7106, "step": 11793 }, { "epoch": 0.5521212475861665, "grad_norm": 1.421875, "learning_rate": 0.0001772599471664844, "loss": 2.4478, "step": 11794 }, { "epoch": 0.5521680613259992, "grad_norm": 2.0625, "learning_rate": 0.00017725624095057878, "loss": 3.1878, "step": 11795 }, { "epoch": 0.5522148750658318, "grad_norm": 1.390625, "learning_rate": 0.000177252534471427, "loss": 2.6874, "step": 11796 }, { "epoch": 0.5522616888056645, "grad_norm": 1.53125, "learning_rate": 0.00017724882772904166, "loss": 3.0044, "step": 11797 }, { "epoch": 0.5523085025454971, "grad_norm": 1.265625, "learning_rate": 0.00017724512072343543, "loss": 2.0752, "step": 11798 }, { "epoch": 0.5523553162853297, "grad_norm": 1.1875, "learning_rate": 0.00017724141345462087, "loss": 2.6919, "step": 11799 }, { "epoch": 0.5524021300251624, "grad_norm": 1.59375, "learning_rate": 0.0001772377059226107, "loss": 2.6894, "step": 11800 }, { "epoch": 0.552448943764995, "grad_norm": 1.4609375, "learning_rate": 0.0001772339981274175, "loss": 2.8658, "step": 11801 }, { "epoch": 0.5524957575048277, "grad_norm": 1.515625, "learning_rate": 0.00017723029006905388, "loss": 2.9258, "step": 11802 }, { "epoch": 0.5525425712446603, "grad_norm": 1.4609375, "learning_rate": 0.00017722658174753258, "loss": 2.8582, "step": 11803 }, { "epoch": 0.552589384984493, "grad_norm": 2.65625, "learning_rate": 0.0001772228731628661, "loss": 2.8149, "step": 11804 }, { "epoch": 0.5526361987243256, "grad_norm": 1.4140625, "learning_rate": 0.0001772191643150672, "loss": 2.7794, "step": 11805 }, { "epoch": 0.5526830124641582, "grad_norm": 2.234375, "learning_rate": 0.00017721545520414843, "loss": 2.8038, "step": 11806 }, { "epoch": 0.5527298262039909, "grad_norm": 1.234375, "learning_rate": 0.00017721174583012247, "loss": 2.7117, "step": 11807 }, { "epoch": 0.5527766399438235, "grad_norm": 1.46875, "learning_rate": 0.00017720803619300195, "loss": 2.9206, "step": 11808 }, { "epoch": 0.5528234536836562, "grad_norm": 1.28125, "learning_rate": 0.0001772043262927995, "loss": 4.05, "step": 11809 }, { "epoch": 0.5528702674234888, "grad_norm": 1.125, "learning_rate": 0.0001772006161295278, "loss": 3.6424, "step": 11810 }, { "epoch": 0.5529170811633214, "grad_norm": 1.4609375, "learning_rate": 0.0001771969057031994, "loss": 2.7049, "step": 11811 }, { "epoch": 0.5529638949031541, "grad_norm": 1.2734375, "learning_rate": 0.00017719319501382702, "loss": 2.7802, "step": 11812 }, { "epoch": 0.5530107086429867, "grad_norm": 1.5234375, "learning_rate": 0.00017718948406142334, "loss": 2.7786, "step": 11813 }, { "epoch": 0.5530575223828194, "grad_norm": 1.265625, "learning_rate": 0.00017718577284600093, "loss": 2.8012, "step": 11814 }, { "epoch": 0.553104336122652, "grad_norm": 1.7421875, "learning_rate": 0.00017718206136757244, "loss": 2.7123, "step": 11815 }, { "epoch": 0.5531511498624846, "grad_norm": 1.515625, "learning_rate": 0.0001771783496261506, "loss": 3.0874, "step": 11816 }, { "epoch": 0.5531979636023173, "grad_norm": 2.234375, "learning_rate": 0.00017717463762174793, "loss": 3.3589, "step": 11817 }, { "epoch": 0.5532447773421499, "grad_norm": 1.6953125, "learning_rate": 0.00017717092535437715, "loss": 2.7501, "step": 11818 }, { "epoch": 0.5532915910819826, "grad_norm": 1.1640625, "learning_rate": 0.00017716721282405094, "loss": 2.7834, "step": 11819 }, { "epoch": 0.5533384048218152, "grad_norm": 1.09375, "learning_rate": 0.00017716350003078185, "loss": 2.723, "step": 11820 }, { "epoch": 0.5533852185616478, "grad_norm": 1.1328125, "learning_rate": 0.00017715978697458264, "loss": 2.7323, "step": 11821 }, { "epoch": 0.5534320323014805, "grad_norm": 1.78125, "learning_rate": 0.0001771560736554659, "loss": 2.9879, "step": 11822 }, { "epoch": 0.5534788460413131, "grad_norm": 1.8046875, "learning_rate": 0.00017715236007344434, "loss": 2.785, "step": 11823 }, { "epoch": 0.5535256597811458, "grad_norm": 1.2421875, "learning_rate": 0.00017714864622853053, "loss": 2.6532, "step": 11824 }, { "epoch": 0.5535724735209784, "grad_norm": 1.5234375, "learning_rate": 0.0001771449321207372, "loss": 2.8723, "step": 11825 }, { "epoch": 0.553619287260811, "grad_norm": 1.3828125, "learning_rate": 0.00017714121775007694, "loss": 2.9651, "step": 11826 }, { "epoch": 0.5536661010006437, "grad_norm": 1.6796875, "learning_rate": 0.00017713750311656244, "loss": 3.174, "step": 11827 }, { "epoch": 0.5537129147404763, "grad_norm": 1.3359375, "learning_rate": 0.0001771337882202064, "loss": 2.7047, "step": 11828 }, { "epoch": 0.553759728480309, "grad_norm": 1.4296875, "learning_rate": 0.00017713007306102138, "loss": 3.2511, "step": 11829 }, { "epoch": 0.5538065422201416, "grad_norm": 1.6796875, "learning_rate": 0.00017712635763902012, "loss": 2.8204, "step": 11830 }, { "epoch": 0.5538533559599742, "grad_norm": 2.421875, "learning_rate": 0.00017712264195421525, "loss": 2.7191, "step": 11831 }, { "epoch": 0.5539001696998069, "grad_norm": 1.453125, "learning_rate": 0.00017711892600661943, "loss": 2.8228, "step": 11832 }, { "epoch": 0.5539469834396396, "grad_norm": 1.671875, "learning_rate": 0.00017711520979624533, "loss": 2.8073, "step": 11833 }, { "epoch": 0.5539937971794722, "grad_norm": 1.7421875, "learning_rate": 0.00017711149332310562, "loss": 3.0807, "step": 11834 }, { "epoch": 0.5540406109193048, "grad_norm": 1.3671875, "learning_rate": 0.00017710777658721294, "loss": 2.7649, "step": 11835 }, { "epoch": 0.5540874246591374, "grad_norm": 1.140625, "learning_rate": 0.00017710405958857998, "loss": 2.6892, "step": 11836 }, { "epoch": 0.5541342383989701, "grad_norm": 1.1484375, "learning_rate": 0.00017710034232721938, "loss": 2.8281, "step": 11837 }, { "epoch": 0.5541810521388028, "grad_norm": 1.5703125, "learning_rate": 0.00017709662480314383, "loss": 3.0896, "step": 11838 }, { "epoch": 0.5542278658786354, "grad_norm": 1.28125, "learning_rate": 0.00017709290701636602, "loss": 2.6438, "step": 11839 }, { "epoch": 0.554274679618468, "grad_norm": 1.3671875, "learning_rate": 0.00017708918896689854, "loss": 2.6795, "step": 11840 }, { "epoch": 0.5543214933583006, "grad_norm": 1.84375, "learning_rate": 0.0001770854706547541, "loss": 2.9273, "step": 11841 }, { "epoch": 0.5543683070981333, "grad_norm": 1.34375, "learning_rate": 0.0001770817520799454, "loss": 3.0282, "step": 11842 }, { "epoch": 0.554415120837966, "grad_norm": 1.5390625, "learning_rate": 0.00017707803324248507, "loss": 2.7442, "step": 11843 }, { "epoch": 0.5544619345777986, "grad_norm": 1.140625, "learning_rate": 0.00017707431414238582, "loss": 2.9228, "step": 11844 }, { "epoch": 0.5545087483176312, "grad_norm": 1.453125, "learning_rate": 0.0001770705947796603, "loss": 4.5855, "step": 11845 }, { "epoch": 0.5545555620574638, "grad_norm": 1.46875, "learning_rate": 0.00017706687515432114, "loss": 3.0573, "step": 11846 }, { "epoch": 0.5546023757972965, "grad_norm": 1.9765625, "learning_rate": 0.00017706315526638111, "loss": 2.6104, "step": 11847 }, { "epoch": 0.5546491895371292, "grad_norm": 1.7421875, "learning_rate": 0.0001770594351158528, "loss": 2.5354, "step": 11848 }, { "epoch": 0.5546960032769618, "grad_norm": 1.5390625, "learning_rate": 0.0001770557147027489, "loss": 2.4579, "step": 11849 }, { "epoch": 0.5547428170167944, "grad_norm": 1.2421875, "learning_rate": 0.00017705199402708215, "loss": 2.4497, "step": 11850 }, { "epoch": 0.554789630756627, "grad_norm": 1.328125, "learning_rate": 0.00017704827308886518, "loss": 2.6857, "step": 11851 }, { "epoch": 0.5548364444964597, "grad_norm": 1.3125, "learning_rate": 0.00017704455188811066, "loss": 2.5548, "step": 11852 }, { "epoch": 0.5548832582362924, "grad_norm": 2.8125, "learning_rate": 0.00017704083042483125, "loss": 2.7264, "step": 11853 }, { "epoch": 0.554930071976125, "grad_norm": 1.3125, "learning_rate": 0.0001770371086990397, "loss": 2.7036, "step": 11854 }, { "epoch": 0.5549768857159576, "grad_norm": 1.671875, "learning_rate": 0.00017703338671074868, "loss": 2.7688, "step": 11855 }, { "epoch": 0.5550236994557902, "grad_norm": 2.484375, "learning_rate": 0.0001770296644599708, "loss": 3.0397, "step": 11856 }, { "epoch": 0.555070513195623, "grad_norm": 1.609375, "learning_rate": 0.00017702594194671882, "loss": 2.7314, "step": 11857 }, { "epoch": 0.5551173269354556, "grad_norm": 2.125, "learning_rate": 0.0001770222191710054, "loss": 2.6009, "step": 11858 }, { "epoch": 0.5551641406752882, "grad_norm": 1.46875, "learning_rate": 0.0001770184961328432, "loss": 2.5962, "step": 11859 }, { "epoch": 0.5552109544151208, "grad_norm": 1.25, "learning_rate": 0.00017701477283224492, "loss": 2.7629, "step": 11860 }, { "epoch": 0.5552577681549534, "grad_norm": 1.375, "learning_rate": 0.00017701104926922328, "loss": 2.7268, "step": 11861 }, { "epoch": 0.5553045818947862, "grad_norm": 1.65625, "learning_rate": 0.00017700732544379092, "loss": 3.982, "step": 11862 }, { "epoch": 0.5553513956346188, "grad_norm": 1.6953125, "learning_rate": 0.00017700360135596054, "loss": 3.0326, "step": 11863 }, { "epoch": 0.5553982093744514, "grad_norm": 1.6953125, "learning_rate": 0.00017699987700574489, "loss": 2.4549, "step": 11864 }, { "epoch": 0.555445023114284, "grad_norm": 1.3203125, "learning_rate": 0.00017699615239315656, "loss": 2.9881, "step": 11865 }, { "epoch": 0.5554918368541167, "grad_norm": 1.640625, "learning_rate": 0.0001769924275182083, "loss": 2.8695, "step": 11866 }, { "epoch": 0.5555386505939494, "grad_norm": 2.25, "learning_rate": 0.00017698870238091278, "loss": 2.5062, "step": 11867 }, { "epoch": 0.555585464333782, "grad_norm": 1.9296875, "learning_rate": 0.00017698497698128275, "loss": 2.8249, "step": 11868 }, { "epoch": 0.5556322780736146, "grad_norm": 2.390625, "learning_rate": 0.00017698125131933084, "loss": 3.201, "step": 11869 }, { "epoch": 0.5556790918134472, "grad_norm": 1.375, "learning_rate": 0.00017697752539506975, "loss": 2.2858, "step": 11870 }, { "epoch": 0.5557259055532799, "grad_norm": 1.1015625, "learning_rate": 0.00017697379920851222, "loss": 2.4401, "step": 11871 }, { "epoch": 0.5557727192931126, "grad_norm": 1.7578125, "learning_rate": 0.0001769700727596709, "loss": 2.8946, "step": 11872 }, { "epoch": 0.5558195330329452, "grad_norm": 1.78125, "learning_rate": 0.00017696634604855851, "loss": 2.9919, "step": 11873 }, { "epoch": 0.5558663467727778, "grad_norm": 1.0625, "learning_rate": 0.00017696261907518775, "loss": 2.9693, "step": 11874 }, { "epoch": 0.5559131605126104, "grad_norm": 1.390625, "learning_rate": 0.0001769588918395713, "loss": 2.882, "step": 11875 }, { "epoch": 0.5559599742524431, "grad_norm": 0.98046875, "learning_rate": 0.00017695516434172191, "loss": 4.1713, "step": 11876 }, { "epoch": 0.5560067879922758, "grad_norm": 1.578125, "learning_rate": 0.0001769514365816522, "loss": 3.0619, "step": 11877 }, { "epoch": 0.5560536017321084, "grad_norm": 1.359375, "learning_rate": 0.00017694770855937494, "loss": 2.9905, "step": 11878 }, { "epoch": 0.556100415471941, "grad_norm": 1.328125, "learning_rate": 0.0001769439802749028, "loss": 2.6221, "step": 11879 }, { "epoch": 0.5561472292117736, "grad_norm": 1.734375, "learning_rate": 0.00017694025172824849, "loss": 3.0001, "step": 11880 }, { "epoch": 0.5561940429516063, "grad_norm": 1.484375, "learning_rate": 0.00017693652291942474, "loss": 2.993, "step": 11881 }, { "epoch": 0.556240856691439, "grad_norm": 1.5546875, "learning_rate": 0.00017693279384844423, "loss": 2.9415, "step": 11882 }, { "epoch": 0.5562876704312716, "grad_norm": 1.4765625, "learning_rate": 0.00017692906451531964, "loss": 2.7138, "step": 11883 }, { "epoch": 0.5563344841711042, "grad_norm": 1.6953125, "learning_rate": 0.00017692533492006377, "loss": 2.2774, "step": 11884 }, { "epoch": 0.5563812979109368, "grad_norm": 1.9453125, "learning_rate": 0.00017692160506268924, "loss": 2.8896, "step": 11885 }, { "epoch": 0.5564281116507696, "grad_norm": 1.2578125, "learning_rate": 0.00017691787494320877, "loss": 2.7754, "step": 11886 }, { "epoch": 0.5564749253906022, "grad_norm": 1.0234375, "learning_rate": 0.00017691414456163512, "loss": 2.8265, "step": 11887 }, { "epoch": 0.5565217391304348, "grad_norm": 1.4453125, "learning_rate": 0.00017691041391798093, "loss": 2.9133, "step": 11888 }, { "epoch": 0.5565685528702674, "grad_norm": 3.0625, "learning_rate": 0.00017690668301225898, "loss": 3.293, "step": 11889 }, { "epoch": 0.5566153666101, "grad_norm": 1.3671875, "learning_rate": 0.00017690295184448194, "loss": 2.8363, "step": 11890 }, { "epoch": 0.5566621803499328, "grad_norm": 2.140625, "learning_rate": 0.00017689922041466254, "loss": 2.9243, "step": 11891 }, { "epoch": 0.5567089940897654, "grad_norm": 1.5703125, "learning_rate": 0.00017689548872281349, "loss": 2.9553, "step": 11892 }, { "epoch": 0.556755807829598, "grad_norm": 1.4609375, "learning_rate": 0.0001768917567689475, "loss": 3.043, "step": 11893 }, { "epoch": 0.5568026215694306, "grad_norm": 1.7109375, "learning_rate": 0.0001768880245530773, "loss": 3.0475, "step": 11894 }, { "epoch": 0.5568494353092632, "grad_norm": 1.5, "learning_rate": 0.0001768842920752156, "loss": 2.4166, "step": 11895 }, { "epoch": 0.556896249049096, "grad_norm": 2.8125, "learning_rate": 0.00017688055933537514, "loss": 2.5084, "step": 11896 }, { "epoch": 0.5569430627889286, "grad_norm": 1.3671875, "learning_rate": 0.0001768768263335686, "loss": 3.0825, "step": 11897 }, { "epoch": 0.5569898765287612, "grad_norm": 1.3203125, "learning_rate": 0.00017687309306980872, "loss": 2.4148, "step": 11898 }, { "epoch": 0.5570366902685938, "grad_norm": 1.3515625, "learning_rate": 0.0001768693595441082, "loss": 2.854, "step": 11899 }, { "epoch": 0.5570835040084264, "grad_norm": 1.1953125, "learning_rate": 0.0001768656257564798, "loss": 4.7562, "step": 11900 }, { "epoch": 0.5571303177482592, "grad_norm": 1.3203125, "learning_rate": 0.00017686189170693624, "loss": 2.5368, "step": 11901 }, { "epoch": 0.5571771314880918, "grad_norm": 1.2578125, "learning_rate": 0.00017685815739549018, "loss": 2.8907, "step": 11902 }, { "epoch": 0.5572239452279244, "grad_norm": 1.015625, "learning_rate": 0.00017685442282215443, "loss": 2.3565, "step": 11903 }, { "epoch": 0.557270758967757, "grad_norm": 1.1796875, "learning_rate": 0.00017685068798694166, "loss": 2.7139, "step": 11904 }, { "epoch": 0.5573175727075896, "grad_norm": 1.65625, "learning_rate": 0.00017684695288986462, "loss": 2.9288, "step": 11905 }, { "epoch": 0.5573643864474224, "grad_norm": 1.3828125, "learning_rate": 0.00017684321753093601, "loss": 2.896, "step": 11906 }, { "epoch": 0.557411200187255, "grad_norm": 1.1796875, "learning_rate": 0.0001768394819101686, "loss": 2.6817, "step": 11907 }, { "epoch": 0.5574580139270876, "grad_norm": 1.421875, "learning_rate": 0.00017683574602757508, "loss": 2.3677, "step": 11908 }, { "epoch": 0.5575048276669202, "grad_norm": 1.359375, "learning_rate": 0.0001768320098831682, "loss": 2.4068, "step": 11909 }, { "epoch": 0.5575516414067528, "grad_norm": 1.21875, "learning_rate": 0.00017682827347696072, "loss": 2.7589, "step": 11910 }, { "epoch": 0.5575984551465856, "grad_norm": 1.5234375, "learning_rate": 0.00017682453680896532, "loss": 3.0271, "step": 11911 }, { "epoch": 0.5576452688864182, "grad_norm": 1.328125, "learning_rate": 0.00017682079987919473, "loss": 2.7874, "step": 11912 }, { "epoch": 0.5576920826262508, "grad_norm": 1.46875, "learning_rate": 0.00017681706268766174, "loss": 2.8834, "step": 11913 }, { "epoch": 0.5577388963660834, "grad_norm": 1.5703125, "learning_rate": 0.000176813325234379, "loss": 2.9252, "step": 11914 }, { "epoch": 0.557785710105916, "grad_norm": 1.15625, "learning_rate": 0.00017680958751935936, "loss": 4.092, "step": 11915 }, { "epoch": 0.5578325238457488, "grad_norm": 1.3515625, "learning_rate": 0.00017680584954261544, "loss": 2.7886, "step": 11916 }, { "epoch": 0.5578793375855814, "grad_norm": 1.53125, "learning_rate": 0.00017680211130416005, "loss": 2.8121, "step": 11917 }, { "epoch": 0.557926151325414, "grad_norm": 1.6953125, "learning_rate": 0.0001767983728040059, "loss": 2.7483, "step": 11918 }, { "epoch": 0.5579729650652466, "grad_norm": 1.7578125, "learning_rate": 0.00017679463404216575, "loss": 2.7884, "step": 11919 }, { "epoch": 0.5580197788050792, "grad_norm": 1.890625, "learning_rate": 0.00017679089501865226, "loss": 3.1997, "step": 11920 }, { "epoch": 0.558066592544912, "grad_norm": 2.28125, "learning_rate": 0.00017678715573347828, "loss": 2.9822, "step": 11921 }, { "epoch": 0.5581134062847446, "grad_norm": 1.7109375, "learning_rate": 0.00017678341618665653, "loss": 2.9816, "step": 11922 }, { "epoch": 0.5581602200245772, "grad_norm": 2.015625, "learning_rate": 0.0001767796763781997, "loss": 2.5733, "step": 11923 }, { "epoch": 0.5582070337644098, "grad_norm": 1.375, "learning_rate": 0.00017677593630812055, "loss": 2.4838, "step": 11924 }, { "epoch": 0.5582538475042425, "grad_norm": 1.5078125, "learning_rate": 0.00017677219597643187, "loss": 2.8695, "step": 11925 }, { "epoch": 0.5583006612440752, "grad_norm": 3.234375, "learning_rate": 0.00017676845538314634, "loss": 2.9176, "step": 11926 }, { "epoch": 0.5583474749839078, "grad_norm": 1.75, "learning_rate": 0.00017676471452827673, "loss": 2.5287, "step": 11927 }, { "epoch": 0.5583942887237404, "grad_norm": 2.203125, "learning_rate": 0.0001767609734118358, "loss": 3.1095, "step": 11928 }, { "epoch": 0.558441102463573, "grad_norm": 1.4765625, "learning_rate": 0.0001767572320338363, "loss": 2.6235, "step": 11929 }, { "epoch": 0.5584879162034057, "grad_norm": 1.046875, "learning_rate": 0.00017675349039429096, "loss": 3.128, "step": 11930 }, { "epoch": 0.5585347299432384, "grad_norm": 1.3984375, "learning_rate": 0.00017674974849321253, "loss": 2.6099, "step": 11931 }, { "epoch": 0.558581543683071, "grad_norm": 1.6953125, "learning_rate": 0.0001767460063306138, "loss": 2.8442, "step": 11932 }, { "epoch": 0.5586283574229036, "grad_norm": 1.1796875, "learning_rate": 0.00017674226390650746, "loss": 2.8193, "step": 11933 }, { "epoch": 0.5586751711627362, "grad_norm": 1.40625, "learning_rate": 0.0001767385212209063, "loss": 2.9566, "step": 11934 }, { "epoch": 0.5587219849025689, "grad_norm": 1.4765625, "learning_rate": 0.00017673477827382308, "loss": 2.8899, "step": 11935 }, { "epoch": 0.5587687986424016, "grad_norm": 1.3359375, "learning_rate": 0.00017673103506527052, "loss": 2.7915, "step": 11936 }, { "epoch": 0.5588156123822342, "grad_norm": 1.3828125, "learning_rate": 0.0001767272915952614, "loss": 2.8428, "step": 11937 }, { "epoch": 0.5588624261220668, "grad_norm": 1.5859375, "learning_rate": 0.00017672354786380846, "loss": 2.7019, "step": 11938 }, { "epoch": 0.5589092398618994, "grad_norm": 1.359375, "learning_rate": 0.00017671980387092448, "loss": 2.7895, "step": 11939 }, { "epoch": 0.5589560536017321, "grad_norm": 1.1484375, "learning_rate": 0.00017671605961662218, "loss": 2.7577, "step": 11940 }, { "epoch": 0.5590028673415648, "grad_norm": 1.21875, "learning_rate": 0.00017671231510091437, "loss": 4.641, "step": 11941 }, { "epoch": 0.5590496810813974, "grad_norm": 1.3828125, "learning_rate": 0.00017670857032381379, "loss": 2.8297, "step": 11942 }, { "epoch": 0.55909649482123, "grad_norm": 1.1953125, "learning_rate": 0.00017670482528533318, "loss": 2.9262, "step": 11943 }, { "epoch": 0.5591433085610626, "grad_norm": 1.7109375, "learning_rate": 0.00017670107998548525, "loss": 3.0656, "step": 11944 }, { "epoch": 0.5591901223008953, "grad_norm": 1.3046875, "learning_rate": 0.00017669733442428288, "loss": 3.1739, "step": 11945 }, { "epoch": 0.559236936040728, "grad_norm": 1.1796875, "learning_rate": 0.00017669358860173876, "loss": 2.5102, "step": 11946 }, { "epoch": 0.5592837497805606, "grad_norm": 1.2890625, "learning_rate": 0.00017668984251786572, "loss": 2.7515, "step": 11947 }, { "epoch": 0.5593305635203932, "grad_norm": 3.515625, "learning_rate": 0.00017668609617267639, "loss": 2.774, "step": 11948 }, { "epoch": 0.5593773772602258, "grad_norm": 1.8671875, "learning_rate": 0.00017668234956618367, "loss": 3.0105, "step": 11949 }, { "epoch": 0.5594241910000585, "grad_norm": 1.5234375, "learning_rate": 0.0001766786026984003, "loss": 2.5649, "step": 11950 }, { "epoch": 0.5594710047398912, "grad_norm": 1.890625, "learning_rate": 0.000176674855569339, "loss": 2.9366, "step": 11951 }, { "epoch": 0.5595178184797238, "grad_norm": 1.1953125, "learning_rate": 0.00017667110817901253, "loss": 2.8697, "step": 11952 }, { "epoch": 0.5595646322195564, "grad_norm": 1.7890625, "learning_rate": 0.00017666736052743373, "loss": 3.1261, "step": 11953 }, { "epoch": 0.559611445959389, "grad_norm": 1.3984375, "learning_rate": 0.0001766636126146153, "loss": 2.4398, "step": 11954 }, { "epoch": 0.5596582596992217, "grad_norm": 1.078125, "learning_rate": 0.00017665986444057006, "loss": 2.5164, "step": 11955 }, { "epoch": 0.5597050734390544, "grad_norm": 1.5390625, "learning_rate": 0.00017665611600531077, "loss": 2.4282, "step": 11956 }, { "epoch": 0.559751887178887, "grad_norm": 1.2109375, "learning_rate": 0.00017665236730885018, "loss": 2.706, "step": 11957 }, { "epoch": 0.5597987009187196, "grad_norm": 1.1953125, "learning_rate": 0.0001766486183512011, "loss": 2.6899, "step": 11958 }, { "epoch": 0.5598455146585523, "grad_norm": 1.5546875, "learning_rate": 0.00017664486913237626, "loss": 2.8804, "step": 11959 }, { "epoch": 0.5598923283983849, "grad_norm": 1.5390625, "learning_rate": 0.00017664111965238845, "loss": 2.9054, "step": 11960 }, { "epoch": 0.5599391421382176, "grad_norm": 2.546875, "learning_rate": 0.00017663736991125047, "loss": 3.2539, "step": 11961 }, { "epoch": 0.5599859558780502, "grad_norm": 1.453125, "learning_rate": 0.00017663361990897508, "loss": 2.6647, "step": 11962 }, { "epoch": 0.5600327696178828, "grad_norm": 1.671875, "learning_rate": 0.0001766298696455751, "loss": 2.6644, "step": 11963 }, { "epoch": 0.5600795833577155, "grad_norm": 1.4296875, "learning_rate": 0.0001766261191210632, "loss": 2.8356, "step": 11964 }, { "epoch": 0.5601263970975481, "grad_norm": 1.296875, "learning_rate": 0.00017662236833545228, "loss": 3.0483, "step": 11965 }, { "epoch": 0.5601732108373808, "grad_norm": 1.46875, "learning_rate": 0.00017661861728875505, "loss": 2.5319, "step": 11966 }, { "epoch": 0.5602200245772134, "grad_norm": 2.09375, "learning_rate": 0.0001766148659809843, "loss": 2.7742, "step": 11967 }, { "epoch": 0.560266838317046, "grad_norm": 1.25, "learning_rate": 0.00017661111441215284, "loss": 2.8262, "step": 11968 }, { "epoch": 0.5603136520568787, "grad_norm": 1.3203125, "learning_rate": 0.0001766073625822734, "loss": 2.4349, "step": 11969 }, { "epoch": 0.5603604657967113, "grad_norm": 1.296875, "learning_rate": 0.00017660361049135888, "loss": 2.9709, "step": 11970 }, { "epoch": 0.560407279536544, "grad_norm": 1.4140625, "learning_rate": 0.00017659985813942193, "loss": 2.8185, "step": 11971 }, { "epoch": 0.5604540932763766, "grad_norm": 1.5625, "learning_rate": 0.0001765961055264754, "loss": 3.0489, "step": 11972 }, { "epoch": 0.5605009070162092, "grad_norm": 1.296875, "learning_rate": 0.00017659235265253203, "loss": 2.8673, "step": 11973 }, { "epoch": 0.5605477207560419, "grad_norm": 1.765625, "learning_rate": 0.00017658859951760468, "loss": 3.1882, "step": 11974 }, { "epoch": 0.5605945344958745, "grad_norm": 1.2578125, "learning_rate": 0.00017658484612170612, "loss": 2.9383, "step": 11975 }, { "epoch": 0.5606413482357072, "grad_norm": 1.484375, "learning_rate": 0.00017658109246484907, "loss": 2.4752, "step": 11976 }, { "epoch": 0.5606881619755398, "grad_norm": 1.2421875, "learning_rate": 0.00017657733854704644, "loss": 2.8508, "step": 11977 }, { "epoch": 0.5607349757153725, "grad_norm": 1.2109375, "learning_rate": 0.00017657358436831092, "loss": 2.5977, "step": 11978 }, { "epoch": 0.5607817894552051, "grad_norm": 1.171875, "learning_rate": 0.00017656982992865535, "loss": 2.7613, "step": 11979 }, { "epoch": 0.5608286031950377, "grad_norm": 1.3984375, "learning_rate": 0.00017656607522809252, "loss": 2.8536, "step": 11980 }, { "epoch": 0.5608754169348704, "grad_norm": 1.515625, "learning_rate": 0.0001765623202666352, "loss": 2.698, "step": 11981 }, { "epoch": 0.560922230674703, "grad_norm": 1.25, "learning_rate": 0.00017655856504429618, "loss": 2.5514, "step": 11982 }, { "epoch": 0.5609690444145357, "grad_norm": 1.3984375, "learning_rate": 0.00017655480956108827, "loss": 2.4738, "step": 11983 }, { "epoch": 0.5610158581543683, "grad_norm": 1.734375, "learning_rate": 0.0001765510538170243, "loss": 2.7496, "step": 11984 }, { "epoch": 0.5610626718942009, "grad_norm": 1.4453125, "learning_rate": 0.00017654729781211705, "loss": 2.9906, "step": 11985 }, { "epoch": 0.5611094856340336, "grad_norm": 1.1796875, "learning_rate": 0.00017654354154637928, "loss": 2.481, "step": 11986 }, { "epoch": 0.5611562993738662, "grad_norm": 2.140625, "learning_rate": 0.00017653978501982383, "loss": 2.6079, "step": 11987 }, { "epoch": 0.5612031131136989, "grad_norm": 1.3046875, "learning_rate": 0.0001765360282324635, "loss": 2.4968, "step": 11988 }, { "epoch": 0.5612499268535315, "grad_norm": 2.40625, "learning_rate": 0.00017653227118431106, "loss": 3.2932, "step": 11989 }, { "epoch": 0.5612967405933642, "grad_norm": 1.4140625, "learning_rate": 0.00017652851387537933, "loss": 3.0739, "step": 11990 }, { "epoch": 0.5613435543331968, "grad_norm": 1.6171875, "learning_rate": 0.0001765247563056811, "loss": 2.7256, "step": 11991 }, { "epoch": 0.5613903680730294, "grad_norm": 1.328125, "learning_rate": 0.0001765209984752292, "loss": 2.409, "step": 11992 }, { "epoch": 0.5614371818128621, "grad_norm": 1.1953125, "learning_rate": 0.0001765172403840364, "loss": 3.0137, "step": 11993 }, { "epoch": 0.5614839955526947, "grad_norm": 1.25, "learning_rate": 0.00017651348203211558, "loss": 3.0633, "step": 11994 }, { "epoch": 0.5615308092925274, "grad_norm": 1.5078125, "learning_rate": 0.00017650972341947947, "loss": 2.9326, "step": 11995 }, { "epoch": 0.56157762303236, "grad_norm": 1.3359375, "learning_rate": 0.00017650596454614088, "loss": 2.638, "step": 11996 }, { "epoch": 0.5616244367721926, "grad_norm": 1.359375, "learning_rate": 0.00017650220541211265, "loss": 2.7514, "step": 11997 }, { "epoch": 0.5616712505120253, "grad_norm": 1.5546875, "learning_rate": 0.00017649844601740758, "loss": 2.4637, "step": 11998 }, { "epoch": 0.5617180642518579, "grad_norm": 1.3671875, "learning_rate": 0.00017649468636203846, "loss": 3.0096, "step": 11999 }, { "epoch": 0.5617648779916906, "grad_norm": 1.8125, "learning_rate": 0.0001764909264460181, "loss": 2.7104, "step": 12000 }, { "epoch": 0.5618116917315232, "grad_norm": 1.5390625, "learning_rate": 0.00017648716626935938, "loss": 2.8946, "step": 12001 }, { "epoch": 0.5618585054713559, "grad_norm": 1.4921875, "learning_rate": 0.00017648340583207504, "loss": 2.939, "step": 12002 }, { "epoch": 0.5619053192111885, "grad_norm": 1.46875, "learning_rate": 0.0001764796451341779, "loss": 2.7418, "step": 12003 }, { "epoch": 0.5619521329510211, "grad_norm": 1.171875, "learning_rate": 0.00017647588417568085, "loss": 2.4681, "step": 12004 }, { "epoch": 0.5619989466908538, "grad_norm": 1.2109375, "learning_rate": 0.0001764721229565966, "loss": 2.7842, "step": 12005 }, { "epoch": 0.5620457604306864, "grad_norm": 1.9609375, "learning_rate": 0.000176468361476938, "loss": 2.8888, "step": 12006 }, { "epoch": 0.562092574170519, "grad_norm": 1.515625, "learning_rate": 0.0001764645997367179, "loss": 2.6813, "step": 12007 }, { "epoch": 0.5621393879103517, "grad_norm": 1.7734375, "learning_rate": 0.00017646083773594908, "loss": 3.0961, "step": 12008 }, { "epoch": 0.5621862016501843, "grad_norm": 1.6875, "learning_rate": 0.00017645707547464436, "loss": 2.6456, "step": 12009 }, { "epoch": 0.562233015390017, "grad_norm": 1.453125, "learning_rate": 0.00017645331295281663, "loss": 2.9402, "step": 12010 }, { "epoch": 0.5622798291298496, "grad_norm": 1.515625, "learning_rate": 0.00017644955017047858, "loss": 2.9191, "step": 12011 }, { "epoch": 0.5623266428696823, "grad_norm": 1.3515625, "learning_rate": 0.00017644578712764316, "loss": 3.1037, "step": 12012 }, { "epoch": 0.5623734566095149, "grad_norm": 1.53125, "learning_rate": 0.00017644202382432315, "loss": 2.7332, "step": 12013 }, { "epoch": 0.5624202703493475, "grad_norm": 1.9609375, "learning_rate": 0.00017643826026053134, "loss": 3.0683, "step": 12014 }, { "epoch": 0.5624670840891802, "grad_norm": 1.2890625, "learning_rate": 0.00017643449643628056, "loss": 2.7116, "step": 12015 }, { "epoch": 0.5625138978290128, "grad_norm": 1.2734375, "learning_rate": 0.0001764307323515837, "loss": 2.6357, "step": 12016 }, { "epoch": 0.5625607115688455, "grad_norm": 1.5078125, "learning_rate": 0.00017642696800645349, "loss": 2.8307, "step": 12017 }, { "epoch": 0.5626075253086781, "grad_norm": 1.2265625, "learning_rate": 0.00017642320340090283, "loss": 2.847, "step": 12018 }, { "epoch": 0.5626543390485107, "grad_norm": 1.3203125, "learning_rate": 0.0001764194385349445, "loss": 3.6484, "step": 12019 }, { "epoch": 0.5627011527883434, "grad_norm": 1.3125, "learning_rate": 0.00017641567340859137, "loss": 2.7442, "step": 12020 }, { "epoch": 0.562747966528176, "grad_norm": 1.4765625, "learning_rate": 0.00017641190802185626, "loss": 3.2067, "step": 12021 }, { "epoch": 0.5627947802680087, "grad_norm": 2.234375, "learning_rate": 0.00017640814237475196, "loss": 2.5898, "step": 12022 }, { "epoch": 0.5628415940078413, "grad_norm": 1.6796875, "learning_rate": 0.00017640437646729136, "loss": 2.7288, "step": 12023 }, { "epoch": 0.5628884077476739, "grad_norm": 1.6640625, "learning_rate": 0.00017640061029948722, "loss": 3.0206, "step": 12024 }, { "epoch": 0.5629352214875066, "grad_norm": 1.25, "learning_rate": 0.00017639684387135246, "loss": 2.8182, "step": 12025 }, { "epoch": 0.5629820352273393, "grad_norm": 1.171875, "learning_rate": 0.00017639307718289983, "loss": 2.706, "step": 12026 }, { "epoch": 0.5630288489671719, "grad_norm": 1.328125, "learning_rate": 0.00017638931023414224, "loss": 2.5808, "step": 12027 }, { "epoch": 0.5630756627070045, "grad_norm": 1.3828125, "learning_rate": 0.0001763855430250925, "loss": 2.5019, "step": 12028 }, { "epoch": 0.5631224764468371, "grad_norm": 1.3203125, "learning_rate": 0.00017638177555576344, "loss": 2.9954, "step": 12029 }, { "epoch": 0.5631692901866698, "grad_norm": 1.46875, "learning_rate": 0.00017637800782616784, "loss": 2.5262, "step": 12030 }, { "epoch": 0.5632161039265025, "grad_norm": 1.1796875, "learning_rate": 0.0001763742398363186, "loss": 2.6642, "step": 12031 }, { "epoch": 0.5632629176663351, "grad_norm": 1.296875, "learning_rate": 0.0001763704715862286, "loss": 2.647, "step": 12032 }, { "epoch": 0.5633097314061677, "grad_norm": 1.5234375, "learning_rate": 0.00017636670307591063, "loss": 3.1899, "step": 12033 }, { "epoch": 0.5633565451460003, "grad_norm": 1.859375, "learning_rate": 0.0001763629343053775, "loss": 4.4221, "step": 12034 }, { "epoch": 0.563403358885833, "grad_norm": 1.078125, "learning_rate": 0.0001763591652746421, "loss": 4.2462, "step": 12035 }, { "epoch": 0.5634501726256657, "grad_norm": 1.4921875, "learning_rate": 0.00017635539598371725, "loss": 2.7333, "step": 12036 }, { "epoch": 0.5634969863654983, "grad_norm": 1.15625, "learning_rate": 0.00017635162643261584, "loss": 2.4498, "step": 12037 }, { "epoch": 0.5635438001053309, "grad_norm": 1.2109375, "learning_rate": 0.00017634785662135066, "loss": 2.8211, "step": 12038 }, { "epoch": 0.5635906138451635, "grad_norm": 1.8359375, "learning_rate": 0.00017634408654993453, "loss": 3.2427, "step": 12039 }, { "epoch": 0.5636374275849962, "grad_norm": 2.28125, "learning_rate": 0.00017634031621838037, "loss": 2.6771, "step": 12040 }, { "epoch": 0.5636842413248289, "grad_norm": 1.0625, "learning_rate": 0.000176336545626701, "loss": 2.7927, "step": 12041 }, { "epoch": 0.5637310550646615, "grad_norm": 1.3203125, "learning_rate": 0.00017633277477490923, "loss": 2.7563, "step": 12042 }, { "epoch": 0.5637778688044941, "grad_norm": 1.875, "learning_rate": 0.000176329003663018, "loss": 2.7578, "step": 12043 }, { "epoch": 0.5638246825443267, "grad_norm": 1.203125, "learning_rate": 0.00017632523229104004, "loss": 2.8217, "step": 12044 }, { "epoch": 0.5638714962841594, "grad_norm": 1.7265625, "learning_rate": 0.00017632146065898829, "loss": 2.6056, "step": 12045 }, { "epoch": 0.5639183100239921, "grad_norm": 1.1640625, "learning_rate": 0.0001763176887668756, "loss": 2.8309, "step": 12046 }, { "epoch": 0.5639651237638247, "grad_norm": 1.3203125, "learning_rate": 0.00017631391661471476, "loss": 1.9721, "step": 12047 }, { "epoch": 0.5640119375036573, "grad_norm": 1.1484375, "learning_rate": 0.00017631014420251868, "loss": 3.0243, "step": 12048 }, { "epoch": 0.5640587512434899, "grad_norm": 1.6328125, "learning_rate": 0.0001763063715303002, "loss": 2.7912, "step": 12049 }, { "epoch": 0.5641055649833226, "grad_norm": 1.4296875, "learning_rate": 0.00017630259859807213, "loss": 3.0524, "step": 12050 }, { "epoch": 0.5641523787231553, "grad_norm": 1.3828125, "learning_rate": 0.0001762988254058474, "loss": 2.9635, "step": 12051 }, { "epoch": 0.5641991924629879, "grad_norm": 1.21875, "learning_rate": 0.00017629505195363882, "loss": 2.6001, "step": 12052 }, { "epoch": 0.5642460062028205, "grad_norm": 1.7578125, "learning_rate": 0.00017629127824145927, "loss": 3.1109, "step": 12053 }, { "epoch": 0.5642928199426531, "grad_norm": 1.3984375, "learning_rate": 0.0001762875042693216, "loss": 2.7183, "step": 12054 }, { "epoch": 0.5643396336824859, "grad_norm": 1.4375, "learning_rate": 0.00017628373003723867, "loss": 2.9235, "step": 12055 }, { "epoch": 0.5643864474223185, "grad_norm": 1.375, "learning_rate": 0.00017627995554522335, "loss": 2.5842, "step": 12056 }, { "epoch": 0.5644332611621511, "grad_norm": 1.6875, "learning_rate": 0.00017627618079328844, "loss": 2.7381, "step": 12057 }, { "epoch": 0.5644800749019837, "grad_norm": 1.453125, "learning_rate": 0.00017627240578144693, "loss": 2.7999, "step": 12058 }, { "epoch": 0.5645268886418163, "grad_norm": 1.4765625, "learning_rate": 0.00017626863050971154, "loss": 2.5145, "step": 12059 }, { "epoch": 0.5645737023816491, "grad_norm": 1.2109375, "learning_rate": 0.00017626485497809524, "loss": 2.6856, "step": 12060 }, { "epoch": 0.5646205161214817, "grad_norm": 1.4375, "learning_rate": 0.00017626107918661084, "loss": 2.9394, "step": 12061 }, { "epoch": 0.5646673298613143, "grad_norm": 1.5234375, "learning_rate": 0.00017625730313527123, "loss": 3.0859, "step": 12062 }, { "epoch": 0.5647141436011469, "grad_norm": 1.3984375, "learning_rate": 0.00017625352682408923, "loss": 2.7533, "step": 12063 }, { "epoch": 0.5647609573409795, "grad_norm": 1.703125, "learning_rate": 0.00017624975025307778, "loss": 4.6445, "step": 12064 }, { "epoch": 0.5648077710808123, "grad_norm": 1.3359375, "learning_rate": 0.00017624597342224968, "loss": 2.4327, "step": 12065 }, { "epoch": 0.5648545848206449, "grad_norm": 1.5390625, "learning_rate": 0.00017624219633161786, "loss": 2.8442, "step": 12066 }, { "epoch": 0.5649013985604775, "grad_norm": 1.796875, "learning_rate": 0.0001762384189811952, "loss": 2.7134, "step": 12067 }, { "epoch": 0.5649482123003101, "grad_norm": 1.3671875, "learning_rate": 0.00017623464137099447, "loss": 2.6349, "step": 12068 }, { "epoch": 0.5649950260401427, "grad_norm": 1.4140625, "learning_rate": 0.00017623086350102864, "loss": 2.8288, "step": 12069 }, { "epoch": 0.5650418397799755, "grad_norm": 2.015625, "learning_rate": 0.00017622708537131054, "loss": 2.9642, "step": 12070 }, { "epoch": 0.5650886535198081, "grad_norm": 1.671875, "learning_rate": 0.00017622330698185306, "loss": 2.7455, "step": 12071 }, { "epoch": 0.5651354672596407, "grad_norm": 1.2421875, "learning_rate": 0.00017621952833266907, "loss": 2.9512, "step": 12072 }, { "epoch": 0.5651822809994733, "grad_norm": 1.2578125, "learning_rate": 0.0001762157494237714, "loss": 2.6633, "step": 12073 }, { "epoch": 0.5652290947393059, "grad_norm": 1.3125, "learning_rate": 0.000176211970255173, "loss": 2.9884, "step": 12074 }, { "epoch": 0.5652759084791387, "grad_norm": 1.2421875, "learning_rate": 0.00017620819082688675, "loss": 2.7099, "step": 12075 }, { "epoch": 0.5653227222189713, "grad_norm": 1.4140625, "learning_rate": 0.00017620441113892546, "loss": 2.8281, "step": 12076 }, { "epoch": 0.5653695359588039, "grad_norm": 1.03125, "learning_rate": 0.00017620063119130202, "loss": 2.513, "step": 12077 }, { "epoch": 0.5654163496986365, "grad_norm": 1.46875, "learning_rate": 0.00017619685098402938, "loss": 2.8777, "step": 12078 }, { "epoch": 0.5654631634384691, "grad_norm": 1.171875, "learning_rate": 0.00017619307051712038, "loss": 2.9366, "step": 12079 }, { "epoch": 0.5655099771783019, "grad_norm": 1.21875, "learning_rate": 0.00017618928979058786, "loss": 2.0263, "step": 12080 }, { "epoch": 0.5655567909181345, "grad_norm": 1.2421875, "learning_rate": 0.00017618550880444477, "loss": 2.7125, "step": 12081 }, { "epoch": 0.5656036046579671, "grad_norm": 1.6484375, "learning_rate": 0.00017618172755870393, "loss": 2.6918, "step": 12082 }, { "epoch": 0.5656504183977997, "grad_norm": 1.3671875, "learning_rate": 0.00017617794605337826, "loss": 2.7444, "step": 12083 }, { "epoch": 0.5656972321376323, "grad_norm": 1.3984375, "learning_rate": 0.00017617416428848066, "loss": 2.5979, "step": 12084 }, { "epoch": 0.5657440458774651, "grad_norm": 1.3671875, "learning_rate": 0.00017617038226402397, "loss": 2.6334, "step": 12085 }, { "epoch": 0.5657908596172977, "grad_norm": 1.40625, "learning_rate": 0.00017616659998002113, "loss": 2.7641, "step": 12086 }, { "epoch": 0.5658376733571303, "grad_norm": 1.21875, "learning_rate": 0.000176162817436485, "loss": 2.9358, "step": 12087 }, { "epoch": 0.5658844870969629, "grad_norm": 1.3203125, "learning_rate": 0.00017615903463342848, "loss": 2.6031, "step": 12088 }, { "epoch": 0.5659313008367955, "grad_norm": 1.3125, "learning_rate": 0.00017615525157086445, "loss": 2.5587, "step": 12089 }, { "epoch": 0.5659781145766283, "grad_norm": 1.3125, "learning_rate": 0.00017615146824880582, "loss": 2.7121, "step": 12090 }, { "epoch": 0.5660249283164609, "grad_norm": 1.3828125, "learning_rate": 0.00017614768466726543, "loss": 3.1482, "step": 12091 }, { "epoch": 0.5660717420562935, "grad_norm": 1.75, "learning_rate": 0.0001761439008262562, "loss": 3.0602, "step": 12092 }, { "epoch": 0.5661185557961261, "grad_norm": 1.75, "learning_rate": 0.00017614011672579104, "loss": 2.764, "step": 12093 }, { "epoch": 0.5661653695359588, "grad_norm": 1.875, "learning_rate": 0.00017613633236588286, "loss": 2.9111, "step": 12094 }, { "epoch": 0.5662121832757915, "grad_norm": 1.6796875, "learning_rate": 0.00017613254774654448, "loss": 2.8581, "step": 12095 }, { "epoch": 0.5662589970156241, "grad_norm": 1.4921875, "learning_rate": 0.00017612876286778888, "loss": 2.9304, "step": 12096 }, { "epoch": 0.5663058107554567, "grad_norm": 1.6171875, "learning_rate": 0.00017612497772962892, "loss": 2.9236, "step": 12097 }, { "epoch": 0.5663526244952893, "grad_norm": 1.6484375, "learning_rate": 0.0001761211923320775, "loss": 2.7833, "step": 12098 }, { "epoch": 0.566399438235122, "grad_norm": 1.3515625, "learning_rate": 0.00017611740667514752, "loss": 2.3606, "step": 12099 }, { "epoch": 0.5664462519749547, "grad_norm": 1.2265625, "learning_rate": 0.00017611362075885186, "loss": 2.6635, "step": 12100 }, { "epoch": 0.5664930657147873, "grad_norm": 1.3515625, "learning_rate": 0.0001761098345832034, "loss": 2.7872, "step": 12101 }, { "epoch": 0.5665398794546199, "grad_norm": 1.3671875, "learning_rate": 0.00017610604814821514, "loss": 2.997, "step": 12102 }, { "epoch": 0.5665866931944525, "grad_norm": 1.2890625, "learning_rate": 0.0001761022614538999, "loss": 2.2256, "step": 12103 }, { "epoch": 0.5666335069342852, "grad_norm": 1.6640625, "learning_rate": 0.0001760984745002706, "loss": 2.6191, "step": 12104 }, { "epoch": 0.5666803206741179, "grad_norm": 2.203125, "learning_rate": 0.00017609468728734014, "loss": 2.9159, "step": 12105 }, { "epoch": 0.5667271344139505, "grad_norm": 4.75, "learning_rate": 0.00017609089981512145, "loss": 2.3419, "step": 12106 }, { "epoch": 0.5667739481537831, "grad_norm": 2.515625, "learning_rate": 0.00017608711208362744, "loss": 2.8365, "step": 12107 }, { "epoch": 0.5668207618936157, "grad_norm": 1.7734375, "learning_rate": 0.00017608332409287095, "loss": 2.4218, "step": 12108 }, { "epoch": 0.5668675756334484, "grad_norm": 1.4765625, "learning_rate": 0.00017607953584286494, "loss": 2.6867, "step": 12109 }, { "epoch": 0.5669143893732811, "grad_norm": 1.5703125, "learning_rate": 0.0001760757473336223, "loss": 3.5169, "step": 12110 }, { "epoch": 0.5669612031131137, "grad_norm": 1.4765625, "learning_rate": 0.00017607195856515596, "loss": 2.6455, "step": 12111 }, { "epoch": 0.5670080168529463, "grad_norm": 1.75, "learning_rate": 0.00017606816953747882, "loss": 2.3999, "step": 12112 }, { "epoch": 0.567054830592779, "grad_norm": 1.390625, "learning_rate": 0.0001760643802506038, "loss": 2.4585, "step": 12113 }, { "epoch": 0.5671016443326117, "grad_norm": 1.25, "learning_rate": 0.00017606059070454378, "loss": 2.7927, "step": 12114 }, { "epoch": 0.5671484580724443, "grad_norm": 1.1328125, "learning_rate": 0.00017605680089931172, "loss": 2.6014, "step": 12115 }, { "epoch": 0.5671952718122769, "grad_norm": 1.4765625, "learning_rate": 0.00017605301083492046, "loss": 3.1417, "step": 12116 }, { "epoch": 0.5672420855521095, "grad_norm": 2.15625, "learning_rate": 0.000176049220511383, "loss": 3.1915, "step": 12117 }, { "epoch": 0.5672888992919422, "grad_norm": 1.5078125, "learning_rate": 0.0001760454299287122, "loss": 2.5651, "step": 12118 }, { "epoch": 0.5673357130317749, "grad_norm": 1.4921875, "learning_rate": 0.00017604163908692102, "loss": 2.9345, "step": 12119 }, { "epoch": 0.5673825267716075, "grad_norm": 1.34375, "learning_rate": 0.0001760378479860223, "loss": 2.6659, "step": 12120 }, { "epoch": 0.5674293405114401, "grad_norm": 1.234375, "learning_rate": 0.00017603405662602905, "loss": 2.958, "step": 12121 }, { "epoch": 0.5674761542512727, "grad_norm": 1.515625, "learning_rate": 0.00017603026500695416, "loss": 3.1701, "step": 12122 }, { "epoch": 0.5675229679911054, "grad_norm": 1.1484375, "learning_rate": 0.00017602647312881051, "loss": 2.634, "step": 12123 }, { "epoch": 0.5675697817309381, "grad_norm": 1.3125, "learning_rate": 0.00017602268099161105, "loss": 2.6146, "step": 12124 }, { "epoch": 0.5676165954707707, "grad_norm": 1.7890625, "learning_rate": 0.0001760188885953687, "loss": 2.3993, "step": 12125 }, { "epoch": 0.5676634092106033, "grad_norm": 1.453125, "learning_rate": 0.00017601509594009638, "loss": 2.9688, "step": 12126 }, { "epoch": 0.5677102229504359, "grad_norm": 1.2578125, "learning_rate": 0.000176011303025807, "loss": 3.0521, "step": 12127 }, { "epoch": 0.5677570366902686, "grad_norm": 1.4296875, "learning_rate": 0.00017600750985251353, "loss": 2.6795, "step": 12128 }, { "epoch": 0.5678038504301013, "grad_norm": 1.3515625, "learning_rate": 0.00017600371642022885, "loss": 2.7698, "step": 12129 }, { "epoch": 0.5678506641699339, "grad_norm": 1.5078125, "learning_rate": 0.0001759999227289659, "loss": 2.831, "step": 12130 }, { "epoch": 0.5678974779097665, "grad_norm": 1.140625, "learning_rate": 0.00017599612877873758, "loss": 2.4072, "step": 12131 }, { "epoch": 0.5679442916495991, "grad_norm": 1.4609375, "learning_rate": 0.00017599233456955688, "loss": 3.0771, "step": 12132 }, { "epoch": 0.5679911053894318, "grad_norm": 1.4609375, "learning_rate": 0.0001759885401014367, "loss": 3.1418, "step": 12133 }, { "epoch": 0.5680379191292645, "grad_norm": 1.171875, "learning_rate": 0.0001759847453743899, "loss": 3.1628, "step": 12134 }, { "epoch": 0.5680847328690971, "grad_norm": 2.09375, "learning_rate": 0.00017598095038842955, "loss": 2.8059, "step": 12135 }, { "epoch": 0.5681315466089297, "grad_norm": 1.6796875, "learning_rate": 0.00017597715514356844, "loss": 2.9042, "step": 12136 }, { "epoch": 0.5681783603487623, "grad_norm": 1.34375, "learning_rate": 0.0001759733596398196, "loss": 2.6951, "step": 12137 }, { "epoch": 0.568225174088595, "grad_norm": 1.2109375, "learning_rate": 0.0001759695638771959, "loss": 3.9754, "step": 12138 }, { "epoch": 0.5682719878284277, "grad_norm": 1.46875, "learning_rate": 0.00017596576785571037, "loss": 3.281, "step": 12139 }, { "epoch": 0.5683188015682603, "grad_norm": 1.1875, "learning_rate": 0.00017596197157537582, "loss": 2.1218, "step": 12140 }, { "epoch": 0.5683656153080929, "grad_norm": 1.3125, "learning_rate": 0.00017595817503620524, "loss": 2.757, "step": 12141 }, { "epoch": 0.5684124290479255, "grad_norm": 2.0625, "learning_rate": 0.0001759543782382116, "loss": 2.8479, "step": 12142 }, { "epoch": 0.5684592427877582, "grad_norm": 1.5078125, "learning_rate": 0.00017595058118140776, "loss": 2.9326, "step": 12143 }, { "epoch": 0.5685060565275909, "grad_norm": 1.5390625, "learning_rate": 0.00017594678386580675, "loss": 2.657, "step": 12144 }, { "epoch": 0.5685528702674235, "grad_norm": 1.6015625, "learning_rate": 0.00017594298629142145, "loss": 2.8704, "step": 12145 }, { "epoch": 0.5685996840072561, "grad_norm": 1.4921875, "learning_rate": 0.00017593918845826482, "loss": 2.4604, "step": 12146 }, { "epoch": 0.5686464977470888, "grad_norm": 1.3046875, "learning_rate": 0.0001759353903663498, "loss": 2.6714, "step": 12147 }, { "epoch": 0.5686933114869214, "grad_norm": 1.328125, "learning_rate": 0.00017593159201568929, "loss": 2.6455, "step": 12148 }, { "epoch": 0.5687401252267541, "grad_norm": 1.8046875, "learning_rate": 0.0001759277934062963, "loss": 2.7433, "step": 12149 }, { "epoch": 0.5687869389665867, "grad_norm": 1.5546875, "learning_rate": 0.00017592399453818374, "loss": 2.5578, "step": 12150 }, { "epoch": 0.5688337527064193, "grad_norm": 1.2890625, "learning_rate": 0.00017592019541136456, "loss": 2.4496, "step": 12151 }, { "epoch": 0.568880566446252, "grad_norm": 1.3203125, "learning_rate": 0.00017591639602585168, "loss": 2.6095, "step": 12152 }, { "epoch": 0.5689273801860846, "grad_norm": 1.328125, "learning_rate": 0.00017591259638165808, "loss": 2.9131, "step": 12153 }, { "epoch": 0.5689741939259173, "grad_norm": 1.5, "learning_rate": 0.0001759087964787967, "loss": 2.8513, "step": 12154 }, { "epoch": 0.5690210076657499, "grad_norm": 1.5078125, "learning_rate": 0.00017590499631728047, "loss": 3.1052, "step": 12155 }, { "epoch": 0.5690678214055825, "grad_norm": 1.625, "learning_rate": 0.0001759011958971224, "loss": 2.8672, "step": 12156 }, { "epoch": 0.5691146351454152, "grad_norm": 1.6328125, "learning_rate": 0.00017589739521833538, "loss": 2.9727, "step": 12157 }, { "epoch": 0.5691614488852478, "grad_norm": 1.421875, "learning_rate": 0.00017589359428093232, "loss": 3.059, "step": 12158 }, { "epoch": 0.5692082626250805, "grad_norm": 1.6953125, "learning_rate": 0.00017588979308492625, "loss": 2.7382, "step": 12159 }, { "epoch": 0.5692550763649131, "grad_norm": 1.3125, "learning_rate": 0.0001758859916303301, "loss": 2.6795, "step": 12160 }, { "epoch": 0.5693018901047457, "grad_norm": 2.34375, "learning_rate": 0.00017588218991715682, "loss": 3.2004, "step": 12161 }, { "epoch": 0.5693487038445784, "grad_norm": 1.6015625, "learning_rate": 0.00017587838794541939, "loss": 2.7927, "step": 12162 }, { "epoch": 0.569395517584411, "grad_norm": 1.7421875, "learning_rate": 0.0001758745857151307, "loss": 2.4831, "step": 12163 }, { "epoch": 0.5694423313242437, "grad_norm": 1.5546875, "learning_rate": 0.00017587078322630378, "loss": 2.8377, "step": 12164 }, { "epoch": 0.5694891450640763, "grad_norm": 1.3046875, "learning_rate": 0.00017586698047895153, "loss": 2.8356, "step": 12165 }, { "epoch": 0.569535958803909, "grad_norm": 1.2734375, "learning_rate": 0.0001758631774730869, "loss": 2.7689, "step": 12166 }, { "epoch": 0.5695827725437416, "grad_norm": 1.671875, "learning_rate": 0.00017585937420872293, "loss": 2.7521, "step": 12167 }, { "epoch": 0.5696295862835742, "grad_norm": 1.09375, "learning_rate": 0.00017585557068587252, "loss": 2.688, "step": 12168 }, { "epoch": 0.5696764000234069, "grad_norm": 2.15625, "learning_rate": 0.0001758517669045486, "loss": 2.504, "step": 12169 }, { "epoch": 0.5697232137632395, "grad_norm": 1.4921875, "learning_rate": 0.00017584796286476418, "loss": 2.434, "step": 12170 }, { "epoch": 0.5697700275030722, "grad_norm": 1.3359375, "learning_rate": 0.00017584415856653225, "loss": 2.9062, "step": 12171 }, { "epoch": 0.5698168412429048, "grad_norm": 1.7265625, "learning_rate": 0.0001758403540098657, "loss": 2.9749, "step": 12172 }, { "epoch": 0.5698636549827374, "grad_norm": 2.796875, "learning_rate": 0.00017583654919477753, "loss": 2.7867, "step": 12173 }, { "epoch": 0.5699104687225701, "grad_norm": 1.6328125, "learning_rate": 0.0001758327441212807, "loss": 2.8162, "step": 12174 }, { "epoch": 0.5699572824624027, "grad_norm": 1.359375, "learning_rate": 0.00017582893878938818, "loss": 2.799, "step": 12175 }, { "epoch": 0.5700040962022354, "grad_norm": 1.6640625, "learning_rate": 0.00017582513319911296, "loss": 3.0674, "step": 12176 }, { "epoch": 0.570050909942068, "grad_norm": 1.7890625, "learning_rate": 0.00017582132735046794, "loss": 2.7655, "step": 12177 }, { "epoch": 0.5700977236819006, "grad_norm": 1.2890625, "learning_rate": 0.00017581752124346616, "loss": 3.0581, "step": 12178 }, { "epoch": 0.5701445374217333, "grad_norm": 2.0, "learning_rate": 0.00017581371487812057, "loss": 2.7506, "step": 12179 }, { "epoch": 0.5701913511615659, "grad_norm": 1.515625, "learning_rate": 0.0001758099082544441, "loss": 2.3236, "step": 12180 }, { "epoch": 0.5702381649013986, "grad_norm": 1.296875, "learning_rate": 0.00017580610137244977, "loss": 2.5901, "step": 12181 }, { "epoch": 0.5702849786412312, "grad_norm": 1.8125, "learning_rate": 0.00017580229423215052, "loss": 2.1427, "step": 12182 }, { "epoch": 0.5703317923810638, "grad_norm": 1.25, "learning_rate": 0.00017579848683355932, "loss": 2.6553, "step": 12183 }, { "epoch": 0.5703786061208965, "grad_norm": 1.75, "learning_rate": 0.00017579467917668918, "loss": 3.0426, "step": 12184 }, { "epoch": 0.5704254198607291, "grad_norm": 1.75, "learning_rate": 0.00017579087126155303, "loss": 2.6689, "step": 12185 }, { "epoch": 0.5704722336005618, "grad_norm": 1.2890625, "learning_rate": 0.00017578706308816387, "loss": 2.6146, "step": 12186 }, { "epoch": 0.5705190473403944, "grad_norm": 1.171875, "learning_rate": 0.00017578325465653468, "loss": 2.6889, "step": 12187 }, { "epoch": 0.570565861080227, "grad_norm": 1.71875, "learning_rate": 0.00017577944596667845, "loss": 3.6224, "step": 12188 }, { "epoch": 0.5706126748200597, "grad_norm": 1.3203125, "learning_rate": 0.00017577563701860811, "loss": 2.5713, "step": 12189 }, { "epoch": 0.5706594885598923, "grad_norm": 1.6484375, "learning_rate": 0.00017577182781233669, "loss": 3.2048, "step": 12190 }, { "epoch": 0.570706302299725, "grad_norm": 1.2265625, "learning_rate": 0.00017576801834787712, "loss": 2.6848, "step": 12191 }, { "epoch": 0.5707531160395576, "grad_norm": 1.203125, "learning_rate": 0.00017576420862524243, "loss": 2.4735, "step": 12192 }, { "epoch": 0.5707999297793902, "grad_norm": 1.265625, "learning_rate": 0.00017576039864444556, "loss": 2.6053, "step": 12193 }, { "epoch": 0.5708467435192229, "grad_norm": 1.5546875, "learning_rate": 0.00017575658840549954, "loss": 2.8361, "step": 12194 }, { "epoch": 0.5708935572590556, "grad_norm": 1.2265625, "learning_rate": 0.00017575277790841728, "loss": 2.6454, "step": 12195 }, { "epoch": 0.5709403709988882, "grad_norm": 1.546875, "learning_rate": 0.00017574896715321186, "loss": 2.8474, "step": 12196 }, { "epoch": 0.5709871847387208, "grad_norm": 2.328125, "learning_rate": 0.00017574515613989617, "loss": 2.8137, "step": 12197 }, { "epoch": 0.5710339984785534, "grad_norm": 1.4609375, "learning_rate": 0.00017574134486848325, "loss": 2.7305, "step": 12198 }, { "epoch": 0.5710808122183861, "grad_norm": 1.4921875, "learning_rate": 0.0001757375333389861, "loss": 2.714, "step": 12199 }, { "epoch": 0.5711276259582188, "grad_norm": 0.96875, "learning_rate": 0.00017573372155141765, "loss": 2.061, "step": 12200 }, { "epoch": 0.5711744396980514, "grad_norm": 1.265625, "learning_rate": 0.00017572990950579092, "loss": 2.8918, "step": 12201 }, { "epoch": 0.571221253437884, "grad_norm": 1.3984375, "learning_rate": 0.00017572609720211893, "loss": 2.8974, "step": 12202 }, { "epoch": 0.5712680671777166, "grad_norm": 1.21875, "learning_rate": 0.00017572228464041463, "loss": 3.4529, "step": 12203 }, { "epoch": 0.5713148809175493, "grad_norm": 1.6484375, "learning_rate": 0.000175718471820691, "loss": 3.041, "step": 12204 }, { "epoch": 0.571361694657382, "grad_norm": 1.3671875, "learning_rate": 0.0001757146587429611, "loss": 2.8131, "step": 12205 }, { "epoch": 0.5714085083972146, "grad_norm": 0.97265625, "learning_rate": 0.00017571084540723782, "loss": 2.1954, "step": 12206 }, { "epoch": 0.5714553221370472, "grad_norm": 1.4921875, "learning_rate": 0.00017570703181353425, "loss": 2.674, "step": 12207 }, { "epoch": 0.5715021358768798, "grad_norm": 1.140625, "learning_rate": 0.00017570321796186336, "loss": 2.4582, "step": 12208 }, { "epoch": 0.5715489496167125, "grad_norm": 1.6640625, "learning_rate": 0.00017569940385223808, "loss": 2.8279, "step": 12209 }, { "epoch": 0.5715957633565452, "grad_norm": 1.40625, "learning_rate": 0.0001756955894846715, "loss": 2.927, "step": 12210 }, { "epoch": 0.5716425770963778, "grad_norm": 1.6796875, "learning_rate": 0.00017569177485917653, "loss": 2.8065, "step": 12211 }, { "epoch": 0.5716893908362104, "grad_norm": 1.0, "learning_rate": 0.00017568795997576622, "loss": 2.7625, "step": 12212 }, { "epoch": 0.571736204576043, "grad_norm": 1.3828125, "learning_rate": 0.0001756841448344536, "loss": 2.742, "step": 12213 }, { "epoch": 0.5717830183158757, "grad_norm": 1.28125, "learning_rate": 0.00017568032943525165, "loss": 2.7857, "step": 12214 }, { "epoch": 0.5718298320557084, "grad_norm": 2.09375, "learning_rate": 0.0001756765137781733, "loss": 2.9446, "step": 12215 }, { "epoch": 0.571876645795541, "grad_norm": 1.4140625, "learning_rate": 0.00017567269786323162, "loss": 3.0877, "step": 12216 }, { "epoch": 0.5719234595353736, "grad_norm": 1.171875, "learning_rate": 0.00017566888169043957, "loss": 3.0482, "step": 12217 }, { "epoch": 0.5719702732752062, "grad_norm": 1.609375, "learning_rate": 0.0001756650652598102, "loss": 2.838, "step": 12218 }, { "epoch": 0.572017087015039, "grad_norm": 1.8125, "learning_rate": 0.0001756612485713565, "loss": 2.8387, "step": 12219 }, { "epoch": 0.5720639007548716, "grad_norm": 1.28125, "learning_rate": 0.00017565743162509145, "loss": 2.8558, "step": 12220 }, { "epoch": 0.5721107144947042, "grad_norm": 2.015625, "learning_rate": 0.00017565361442102811, "loss": 2.9988, "step": 12221 }, { "epoch": 0.5721575282345368, "grad_norm": 1.2421875, "learning_rate": 0.00017564979695917942, "loss": 2.7904, "step": 12222 }, { "epoch": 0.5722043419743694, "grad_norm": 1.0859375, "learning_rate": 0.00017564597923955844, "loss": 2.8464, "step": 12223 }, { "epoch": 0.5722511557142022, "grad_norm": 1.1171875, "learning_rate": 0.00017564216126217814, "loss": 3.1731, "step": 12224 }, { "epoch": 0.5722979694540348, "grad_norm": 1.484375, "learning_rate": 0.00017563834302705158, "loss": 3.2039, "step": 12225 }, { "epoch": 0.5723447831938674, "grad_norm": 1.265625, "learning_rate": 0.00017563452453419172, "loss": 2.4855, "step": 12226 }, { "epoch": 0.5723915969337, "grad_norm": 1.546875, "learning_rate": 0.00017563070578361156, "loss": 2.5868, "step": 12227 }, { "epoch": 0.5724384106735326, "grad_norm": 1.0234375, "learning_rate": 0.00017562688677532417, "loss": 2.8312, "step": 12228 }, { "epoch": 0.5724852244133654, "grad_norm": 1.3359375, "learning_rate": 0.00017562306750934254, "loss": 2.6356, "step": 12229 }, { "epoch": 0.572532038153198, "grad_norm": 1.828125, "learning_rate": 0.00017561924798567967, "loss": 2.6026, "step": 12230 }, { "epoch": 0.5725788518930306, "grad_norm": 1.6796875, "learning_rate": 0.00017561542820434858, "loss": 2.7855, "step": 12231 }, { "epoch": 0.5726256656328632, "grad_norm": 1.3984375, "learning_rate": 0.0001756116081653623, "loss": 2.8457, "step": 12232 }, { "epoch": 0.5726724793726959, "grad_norm": 1.4765625, "learning_rate": 0.00017560778786873384, "loss": 2.727, "step": 12233 }, { "epoch": 0.5727192931125286, "grad_norm": 1.59375, "learning_rate": 0.0001756039673144762, "loss": 2.8641, "step": 12234 }, { "epoch": 0.5727661068523612, "grad_norm": 1.65625, "learning_rate": 0.00017560014650260238, "loss": 2.8053, "step": 12235 }, { "epoch": 0.5728129205921938, "grad_norm": 1.328125, "learning_rate": 0.0001755963254331255, "loss": 2.6303, "step": 12236 }, { "epoch": 0.5728597343320264, "grad_norm": 1.1875, "learning_rate": 0.0001755925041060585, "loss": 2.8814, "step": 12237 }, { "epoch": 0.5729065480718591, "grad_norm": 1.59375, "learning_rate": 0.00017558868252141437, "loss": 3.2371, "step": 12238 }, { "epoch": 0.5729533618116918, "grad_norm": 1.3515625, "learning_rate": 0.0001755848606792062, "loss": 2.607, "step": 12239 }, { "epoch": 0.5730001755515244, "grad_norm": 2.25, "learning_rate": 0.00017558103857944694, "loss": 2.5466, "step": 12240 }, { "epoch": 0.573046989291357, "grad_norm": 1.2578125, "learning_rate": 0.0001755772162221497, "loss": 2.3842, "step": 12241 }, { "epoch": 0.5730938030311896, "grad_norm": 1.2734375, "learning_rate": 0.00017557339360732747, "loss": 2.5514, "step": 12242 }, { "epoch": 0.5731406167710223, "grad_norm": 1.390625, "learning_rate": 0.00017556957073499327, "loss": 2.9523, "step": 12243 }, { "epoch": 0.573187430510855, "grad_norm": 1.6171875, "learning_rate": 0.0001755657476051601, "loss": 2.7657, "step": 12244 }, { "epoch": 0.5732342442506876, "grad_norm": 1.1796875, "learning_rate": 0.00017556192421784103, "loss": 2.7153, "step": 12245 }, { "epoch": 0.5732810579905202, "grad_norm": 1.3125, "learning_rate": 0.00017555810057304908, "loss": 2.6456, "step": 12246 }, { "epoch": 0.5733278717303528, "grad_norm": 1.4296875, "learning_rate": 0.00017555427667079724, "loss": 3.0482, "step": 12247 }, { "epoch": 0.5733746854701856, "grad_norm": 1.3046875, "learning_rate": 0.00017555045251109862, "loss": 2.7118, "step": 12248 }, { "epoch": 0.5734214992100182, "grad_norm": 1.4140625, "learning_rate": 0.00017554662809396616, "loss": 2.3469, "step": 12249 }, { "epoch": 0.5734683129498508, "grad_norm": 1.3515625, "learning_rate": 0.00017554280341941294, "loss": 2.8293, "step": 12250 }, { "epoch": 0.5735151266896834, "grad_norm": 1.515625, "learning_rate": 0.00017553897848745196, "loss": 2.6352, "step": 12251 }, { "epoch": 0.573561940429516, "grad_norm": 1.6875, "learning_rate": 0.00017553515329809632, "loss": 2.7555, "step": 12252 }, { "epoch": 0.5736087541693488, "grad_norm": 1.3984375, "learning_rate": 0.000175531327851359, "loss": 2.935, "step": 12253 }, { "epoch": 0.5736555679091814, "grad_norm": 0.9453125, "learning_rate": 0.000175527502147253, "loss": 3.3554, "step": 12254 }, { "epoch": 0.573702381649014, "grad_norm": 1.25, "learning_rate": 0.00017552367618579146, "loss": 2.6556, "step": 12255 }, { "epoch": 0.5737491953888466, "grad_norm": 1.3984375, "learning_rate": 0.00017551984996698735, "loss": 2.7618, "step": 12256 }, { "epoch": 0.5737960091286792, "grad_norm": 1.546875, "learning_rate": 0.00017551602349085373, "loss": 2.8118, "step": 12257 }, { "epoch": 0.573842822868512, "grad_norm": 1.203125, "learning_rate": 0.0001755121967574036, "loss": 2.3048, "step": 12258 }, { "epoch": 0.5738896366083446, "grad_norm": 1.3984375, "learning_rate": 0.00017550836976665003, "loss": 2.9998, "step": 12259 }, { "epoch": 0.5739364503481772, "grad_norm": 1.1328125, "learning_rate": 0.00017550454251860604, "loss": 2.2415, "step": 12260 }, { "epoch": 0.5739832640880098, "grad_norm": 1.5625, "learning_rate": 0.0001755007150132847, "loss": 2.8788, "step": 12261 }, { "epoch": 0.5740300778278424, "grad_norm": 2.0, "learning_rate": 0.00017549688725069902, "loss": 2.7304, "step": 12262 }, { "epoch": 0.5740768915676752, "grad_norm": 1.9453125, "learning_rate": 0.00017549305923086209, "loss": 2.5918, "step": 12263 }, { "epoch": 0.5741237053075078, "grad_norm": 1.140625, "learning_rate": 0.0001754892309537869, "loss": 2.2546, "step": 12264 }, { "epoch": 0.5741705190473404, "grad_norm": 1.5625, "learning_rate": 0.00017548540241948653, "loss": 3.3265, "step": 12265 }, { "epoch": 0.574217332787173, "grad_norm": 1.53125, "learning_rate": 0.00017548157362797401, "loss": 2.4875, "step": 12266 }, { "epoch": 0.5742641465270056, "grad_norm": 1.25, "learning_rate": 0.0001754777445792624, "loss": 2.9184, "step": 12267 }, { "epoch": 0.5743109602668384, "grad_norm": 1.578125, "learning_rate": 0.00017547391527336474, "loss": 2.6679, "step": 12268 }, { "epoch": 0.574357774006671, "grad_norm": 1.484375, "learning_rate": 0.00017547008571029406, "loss": 2.8843, "step": 12269 }, { "epoch": 0.5744045877465036, "grad_norm": 1.140625, "learning_rate": 0.00017546625589006344, "loss": 2.7981, "step": 12270 }, { "epoch": 0.5744514014863362, "grad_norm": 1.640625, "learning_rate": 0.0001754624258126859, "loss": 2.7295, "step": 12271 }, { "epoch": 0.5744982152261688, "grad_norm": 1.109375, "learning_rate": 0.00017545859547817453, "loss": 2.7492, "step": 12272 }, { "epoch": 0.5745450289660016, "grad_norm": 1.65625, "learning_rate": 0.00017545476488654233, "loss": 2.4175, "step": 12273 }, { "epoch": 0.5745918427058342, "grad_norm": 1.25, "learning_rate": 0.0001754509340378024, "loss": 4.0012, "step": 12274 }, { "epoch": 0.5746386564456668, "grad_norm": 1.4296875, "learning_rate": 0.00017544710293196776, "loss": 2.7113, "step": 12275 }, { "epoch": 0.5746854701854994, "grad_norm": 1.6640625, "learning_rate": 0.0001754432715690515, "loss": 2.7182, "step": 12276 }, { "epoch": 0.574732283925332, "grad_norm": 1.296875, "learning_rate": 0.00017543943994906665, "loss": 2.8796, "step": 12277 }, { "epoch": 0.5747790976651648, "grad_norm": 1.7421875, "learning_rate": 0.00017543560807202623, "loss": 2.9432, "step": 12278 }, { "epoch": 0.5748259114049974, "grad_norm": 1.7109375, "learning_rate": 0.00017543177593794336, "loss": 2.5154, "step": 12279 }, { "epoch": 0.57487272514483, "grad_norm": 2.578125, "learning_rate": 0.00017542794354683108, "loss": 2.6222, "step": 12280 }, { "epoch": 0.5749195388846626, "grad_norm": 1.5625, "learning_rate": 0.00017542411089870242, "loss": 2.9871, "step": 12281 }, { "epoch": 0.5749663526244952, "grad_norm": 1.40625, "learning_rate": 0.0001754202779935705, "loss": 2.9374, "step": 12282 }, { "epoch": 0.575013166364328, "grad_norm": 1.203125, "learning_rate": 0.00017541644483144833, "loss": 2.592, "step": 12283 }, { "epoch": 0.5750599801041606, "grad_norm": 1.40625, "learning_rate": 0.00017541261141234898, "loss": 2.897, "step": 12284 }, { "epoch": 0.5751067938439932, "grad_norm": 1.3828125, "learning_rate": 0.0001754087777362855, "loss": 2.8792, "step": 12285 }, { "epoch": 0.5751536075838258, "grad_norm": 1.6875, "learning_rate": 0.00017540494380327099, "loss": 2.5301, "step": 12286 }, { "epoch": 0.5752004213236585, "grad_norm": 1.0546875, "learning_rate": 0.00017540110961331847, "loss": 2.6749, "step": 12287 }, { "epoch": 0.5752472350634912, "grad_norm": 1.28125, "learning_rate": 0.00017539727516644105, "loss": 2.8506, "step": 12288 }, { "epoch": 0.5752940488033238, "grad_norm": 1.3671875, "learning_rate": 0.00017539344046265174, "loss": 2.6076, "step": 12289 }, { "epoch": 0.5753408625431564, "grad_norm": 1.75, "learning_rate": 0.00017538960550196367, "loss": 2.4101, "step": 12290 }, { "epoch": 0.575387676282989, "grad_norm": 1.8671875, "learning_rate": 0.00017538577028438986, "loss": 3.0969, "step": 12291 }, { "epoch": 0.5754344900228217, "grad_norm": 1.59375, "learning_rate": 0.00017538193480994344, "loss": 3.0644, "step": 12292 }, { "epoch": 0.5754813037626544, "grad_norm": 1.3984375, "learning_rate": 0.00017537809907863737, "loss": 2.968, "step": 12293 }, { "epoch": 0.575528117502487, "grad_norm": 2.4375, "learning_rate": 0.00017537426309048484, "loss": 3.6627, "step": 12294 }, { "epoch": 0.5755749312423196, "grad_norm": 1.1171875, "learning_rate": 0.0001753704268454988, "loss": 1.8212, "step": 12295 }, { "epoch": 0.5756217449821522, "grad_norm": 1.375, "learning_rate": 0.00017536659034369243, "loss": 2.6373, "step": 12296 }, { "epoch": 0.5756685587219849, "grad_norm": 1.359375, "learning_rate": 0.00017536275358507876, "loss": 2.4584, "step": 12297 }, { "epoch": 0.5757153724618176, "grad_norm": 1.515625, "learning_rate": 0.00017535891656967088, "loss": 3.0625, "step": 12298 }, { "epoch": 0.5757621862016502, "grad_norm": 2.046875, "learning_rate": 0.0001753550792974818, "loss": 2.8291, "step": 12299 }, { "epoch": 0.5758089999414828, "grad_norm": 1.5234375, "learning_rate": 0.00017535124176852468, "loss": 2.6883, "step": 12300 }, { "epoch": 0.5758558136813154, "grad_norm": 1.515625, "learning_rate": 0.00017534740398281255, "loss": 2.771, "step": 12301 }, { "epoch": 0.5759026274211481, "grad_norm": 1.7421875, "learning_rate": 0.0001753435659403585, "loss": 2.8917, "step": 12302 }, { "epoch": 0.5759494411609808, "grad_norm": 1.4921875, "learning_rate": 0.00017533972764117558, "loss": 2.8346, "step": 12303 }, { "epoch": 0.5759962549008134, "grad_norm": 1.6640625, "learning_rate": 0.00017533588908527693, "loss": 2.7962, "step": 12304 }, { "epoch": 0.576043068640646, "grad_norm": 2.375, "learning_rate": 0.00017533205027267558, "loss": 2.8192, "step": 12305 }, { "epoch": 0.5760898823804786, "grad_norm": 1.53125, "learning_rate": 0.00017532821120338463, "loss": 2.9133, "step": 12306 }, { "epoch": 0.5761366961203113, "grad_norm": 3.3125, "learning_rate": 0.00017532437187741713, "loss": 2.938, "step": 12307 }, { "epoch": 0.576183509860144, "grad_norm": 1.4921875, "learning_rate": 0.0001753205322947862, "loss": 3.151, "step": 12308 }, { "epoch": 0.5762303235999766, "grad_norm": 2.21875, "learning_rate": 0.00017531669245550492, "loss": 2.7483, "step": 12309 }, { "epoch": 0.5762771373398092, "grad_norm": 1.65625, "learning_rate": 0.00017531285235958635, "loss": 2.8859, "step": 12310 }, { "epoch": 0.5763239510796418, "grad_norm": 1.2578125, "learning_rate": 0.0001753090120070436, "loss": 2.4923, "step": 12311 }, { "epoch": 0.5763707648194745, "grad_norm": 1.265625, "learning_rate": 0.0001753051713978897, "loss": 2.8802, "step": 12312 }, { "epoch": 0.5764175785593072, "grad_norm": 2.21875, "learning_rate": 0.00017530133053213784, "loss": 3.1543, "step": 12313 }, { "epoch": 0.5764643922991398, "grad_norm": 1.328125, "learning_rate": 0.00017529748940980103, "loss": 3.0774, "step": 12314 }, { "epoch": 0.5765112060389724, "grad_norm": 1.484375, "learning_rate": 0.00017529364803089238, "loss": 3.1134, "step": 12315 }, { "epoch": 0.576558019778805, "grad_norm": 1.421875, "learning_rate": 0.00017528980639542496, "loss": 2.6459, "step": 12316 }, { "epoch": 0.5766048335186377, "grad_norm": 1.5390625, "learning_rate": 0.0001752859645034119, "loss": 2.5804, "step": 12317 }, { "epoch": 0.5766516472584704, "grad_norm": 1.6171875, "learning_rate": 0.00017528212235486625, "loss": 2.3802, "step": 12318 }, { "epoch": 0.576698460998303, "grad_norm": 1.28125, "learning_rate": 0.00017527827994980112, "loss": 2.7073, "step": 12319 }, { "epoch": 0.5767452747381356, "grad_norm": 1.859375, "learning_rate": 0.0001752744372882296, "loss": 2.8656, "step": 12320 }, { "epoch": 0.5767920884779683, "grad_norm": 1.4140625, "learning_rate": 0.0001752705943701648, "loss": 2.5009, "step": 12321 }, { "epoch": 0.5768389022178009, "grad_norm": 1.4296875, "learning_rate": 0.0001752667511956198, "loss": 2.6973, "step": 12322 }, { "epoch": 0.5768857159576336, "grad_norm": 1.6640625, "learning_rate": 0.0001752629077646077, "loss": 2.4979, "step": 12323 }, { "epoch": 0.5769325296974662, "grad_norm": 1.953125, "learning_rate": 0.00017525906407714155, "loss": 2.8961, "step": 12324 }, { "epoch": 0.5769793434372988, "grad_norm": 1.3203125, "learning_rate": 0.00017525522013323453, "loss": 2.4201, "step": 12325 }, { "epoch": 0.5770261571771315, "grad_norm": 1.796875, "learning_rate": 0.0001752513759328997, "loss": 2.9603, "step": 12326 }, { "epoch": 0.5770729709169641, "grad_norm": 1.4453125, "learning_rate": 0.00017524753147615013, "loss": 2.622, "step": 12327 }, { "epoch": 0.5771197846567968, "grad_norm": 1.671875, "learning_rate": 0.00017524368676299898, "loss": 2.5189, "step": 12328 }, { "epoch": 0.5771665983966294, "grad_norm": 1.3125, "learning_rate": 0.0001752398417934593, "loss": 2.7066, "step": 12329 }, { "epoch": 0.577213412136462, "grad_norm": 1.4921875, "learning_rate": 0.00017523599656754418, "loss": 2.6142, "step": 12330 }, { "epoch": 0.5772602258762947, "grad_norm": 1.328125, "learning_rate": 0.0001752321510852668, "loss": 2.8161, "step": 12331 }, { "epoch": 0.5773070396161273, "grad_norm": 1.3828125, "learning_rate": 0.00017522830534664018, "loss": 2.7492, "step": 12332 }, { "epoch": 0.57735385335596, "grad_norm": 2.15625, "learning_rate": 0.00017522445935167747, "loss": 2.4703, "step": 12333 }, { "epoch": 0.5774006670957926, "grad_norm": 1.90625, "learning_rate": 0.00017522061310039176, "loss": 2.9736, "step": 12334 }, { "epoch": 0.5774474808356252, "grad_norm": 1.2421875, "learning_rate": 0.00017521676659279614, "loss": 2.996, "step": 12335 }, { "epoch": 0.5774942945754579, "grad_norm": 1.546875, "learning_rate": 0.00017521291982890378, "loss": 3.0864, "step": 12336 }, { "epoch": 0.5775411083152905, "grad_norm": 2.515625, "learning_rate": 0.00017520907280872772, "loss": 2.1087, "step": 12337 }, { "epoch": 0.5775879220551232, "grad_norm": 1.28125, "learning_rate": 0.00017520522553228108, "loss": 2.4502, "step": 12338 }, { "epoch": 0.5776347357949558, "grad_norm": 2.953125, "learning_rate": 0.000175201377999577, "loss": 2.3188, "step": 12339 }, { "epoch": 0.5776815495347885, "grad_norm": 1.71875, "learning_rate": 0.00017519753021062856, "loss": 3.1798, "step": 12340 }, { "epoch": 0.5777283632746211, "grad_norm": 1.53125, "learning_rate": 0.00017519368216544886, "loss": 2.9246, "step": 12341 }, { "epoch": 0.5777751770144537, "grad_norm": 1.9140625, "learning_rate": 0.0001751898338640511, "loss": 2.9848, "step": 12342 }, { "epoch": 0.5778219907542864, "grad_norm": 1.6484375, "learning_rate": 0.00017518598530644829, "loss": 2.7456, "step": 12343 }, { "epoch": 0.577868804494119, "grad_norm": 1.8359375, "learning_rate": 0.00017518213649265357, "loss": 2.5158, "step": 12344 }, { "epoch": 0.5779156182339517, "grad_norm": 1.53125, "learning_rate": 0.00017517828742268007, "loss": 2.9663, "step": 12345 }, { "epoch": 0.5779624319737843, "grad_norm": 1.359375, "learning_rate": 0.0001751744380965409, "loss": 2.4925, "step": 12346 }, { "epoch": 0.5780092457136169, "grad_norm": 1.2890625, "learning_rate": 0.00017517058851424918, "loss": 2.8318, "step": 12347 }, { "epoch": 0.5780560594534496, "grad_norm": 1.515625, "learning_rate": 0.000175166738675818, "loss": 3.0127, "step": 12348 }, { "epoch": 0.5781028731932822, "grad_norm": 1.7109375, "learning_rate": 0.00017516288858126055, "loss": 2.8082, "step": 12349 }, { "epoch": 0.5781496869331149, "grad_norm": 1.828125, "learning_rate": 0.00017515903823058987, "loss": 2.6343, "step": 12350 }, { "epoch": 0.5781965006729475, "grad_norm": 1.1875, "learning_rate": 0.00017515518762381912, "loss": 2.557, "step": 12351 }, { "epoch": 0.5782433144127801, "grad_norm": 1.359375, "learning_rate": 0.00017515133676096145, "loss": 2.8088, "step": 12352 }, { "epoch": 0.5782901281526128, "grad_norm": 1.9765625, "learning_rate": 0.0001751474856420299, "loss": 2.7821, "step": 12353 }, { "epoch": 0.5783369418924454, "grad_norm": 1.296875, "learning_rate": 0.00017514363426703763, "loss": 2.6565, "step": 12354 }, { "epoch": 0.5783837556322781, "grad_norm": 1.2421875, "learning_rate": 0.00017513978263599778, "loss": 2.4823, "step": 12355 }, { "epoch": 0.5784305693721107, "grad_norm": 2.109375, "learning_rate": 0.0001751359307489235, "loss": 3.0612, "step": 12356 }, { "epoch": 0.5784773831119434, "grad_norm": 1.2734375, "learning_rate": 0.00017513207860582784, "loss": 2.6932, "step": 12357 }, { "epoch": 0.578524196851776, "grad_norm": 1.4921875, "learning_rate": 0.000175128226206724, "loss": 2.4954, "step": 12358 }, { "epoch": 0.5785710105916086, "grad_norm": 1.1953125, "learning_rate": 0.00017512437355162502, "loss": 2.6654, "step": 12359 }, { "epoch": 0.5786178243314413, "grad_norm": 1.2734375, "learning_rate": 0.0001751205206405441, "loss": 2.6057, "step": 12360 }, { "epoch": 0.5786646380712739, "grad_norm": 1.34375, "learning_rate": 0.00017511666747349434, "loss": 2.916, "step": 12361 }, { "epoch": 0.5787114518111066, "grad_norm": 1.609375, "learning_rate": 0.0001751128140504889, "loss": 3.1441, "step": 12362 }, { "epoch": 0.5787582655509392, "grad_norm": 1.203125, "learning_rate": 0.00017510896037154088, "loss": 2.6884, "step": 12363 }, { "epoch": 0.5788050792907719, "grad_norm": 1.421875, "learning_rate": 0.0001751051064366634, "loss": 2.8493, "step": 12364 }, { "epoch": 0.5788518930306045, "grad_norm": 1.1796875, "learning_rate": 0.0001751012522458696, "loss": 2.911, "step": 12365 }, { "epoch": 0.5788987067704371, "grad_norm": 3.4375, "learning_rate": 0.00017509739779917268, "loss": 3.4567, "step": 12366 }, { "epoch": 0.5789455205102698, "grad_norm": 1.65625, "learning_rate": 0.0001750935430965857, "loss": 2.9293, "step": 12367 }, { "epoch": 0.5789923342501024, "grad_norm": 1.109375, "learning_rate": 0.00017508968813812178, "loss": 2.2564, "step": 12368 }, { "epoch": 0.579039147989935, "grad_norm": 1.421875, "learning_rate": 0.00017508583292379415, "loss": 2.9382, "step": 12369 }, { "epoch": 0.5790859617297677, "grad_norm": 1.3359375, "learning_rate": 0.00017508197745361582, "loss": 2.3616, "step": 12370 }, { "epoch": 0.5791327754696003, "grad_norm": 1.4453125, "learning_rate": 0.00017507812172760004, "loss": 2.871, "step": 12371 }, { "epoch": 0.579179589209433, "grad_norm": 1.25, "learning_rate": 0.00017507426574575984, "loss": 2.5852, "step": 12372 }, { "epoch": 0.5792264029492656, "grad_norm": 1.765625, "learning_rate": 0.00017507040950810846, "loss": 2.3848, "step": 12373 }, { "epoch": 0.5792732166890983, "grad_norm": 1.3046875, "learning_rate": 0.000175066553014659, "loss": 2.7038, "step": 12374 }, { "epoch": 0.5793200304289309, "grad_norm": 1.578125, "learning_rate": 0.00017506269626542463, "loss": 3.0105, "step": 12375 }, { "epoch": 0.5793668441687635, "grad_norm": 2.546875, "learning_rate": 0.0001750588392604184, "loss": 3.0204, "step": 12376 }, { "epoch": 0.5794136579085962, "grad_norm": 1.390625, "learning_rate": 0.00017505498199965356, "loss": 2.7036, "step": 12377 }, { "epoch": 0.5794604716484288, "grad_norm": 1.5078125, "learning_rate": 0.00017505112448314316, "loss": 3.1249, "step": 12378 }, { "epoch": 0.5795072853882615, "grad_norm": 1.1796875, "learning_rate": 0.00017504726671090044, "loss": 2.5601, "step": 12379 }, { "epoch": 0.5795540991280941, "grad_norm": 1.5703125, "learning_rate": 0.00017504340868293845, "loss": 2.5574, "step": 12380 }, { "epoch": 0.5796009128679267, "grad_norm": 1.1796875, "learning_rate": 0.00017503955039927042, "loss": 2.5267, "step": 12381 }, { "epoch": 0.5796477266077594, "grad_norm": 1.59375, "learning_rate": 0.00017503569185990946, "loss": 2.8011, "step": 12382 }, { "epoch": 0.579694540347592, "grad_norm": 1.0390625, "learning_rate": 0.0001750318330648687, "loss": 2.4811, "step": 12383 }, { "epoch": 0.5797413540874247, "grad_norm": 1.328125, "learning_rate": 0.00017502797401416128, "loss": 2.455, "step": 12384 }, { "epoch": 0.5797881678272573, "grad_norm": 1.9375, "learning_rate": 0.00017502411470780044, "loss": 2.2156, "step": 12385 }, { "epoch": 0.5798349815670899, "grad_norm": 1.46875, "learning_rate": 0.0001750202551457992, "loss": 3.0519, "step": 12386 }, { "epoch": 0.5798817953069226, "grad_norm": 1.4765625, "learning_rate": 0.0001750163953281708, "loss": 2.8185, "step": 12387 }, { "epoch": 0.5799286090467553, "grad_norm": 1.3046875, "learning_rate": 0.0001750125352549284, "loss": 2.5332, "step": 12388 }, { "epoch": 0.5799754227865879, "grad_norm": 1.6953125, "learning_rate": 0.00017500867492608512, "loss": 2.9746, "step": 12389 }, { "epoch": 0.5800222365264205, "grad_norm": 1.5625, "learning_rate": 0.00017500481434165407, "loss": 3.0697, "step": 12390 }, { "epoch": 0.5800690502662531, "grad_norm": 1.625, "learning_rate": 0.00017500095350164849, "loss": 2.6063, "step": 12391 }, { "epoch": 0.5801158640060858, "grad_norm": 1.2578125, "learning_rate": 0.0001749970924060815, "loss": 2.641, "step": 12392 }, { "epoch": 0.5801626777459185, "grad_norm": 1.8046875, "learning_rate": 0.0001749932310549662, "loss": 2.8542, "step": 12393 }, { "epoch": 0.5802094914857511, "grad_norm": 1.453125, "learning_rate": 0.00017498936944831587, "loss": 2.8195, "step": 12394 }, { "epoch": 0.5802563052255837, "grad_norm": 1.71875, "learning_rate": 0.00017498550758614353, "loss": 3.0412, "step": 12395 }, { "epoch": 0.5803031189654163, "grad_norm": 1.4765625, "learning_rate": 0.00017498164546846245, "loss": 2.6673, "step": 12396 }, { "epoch": 0.580349932705249, "grad_norm": 1.984375, "learning_rate": 0.00017497778309528574, "loss": 3.228, "step": 12397 }, { "epoch": 0.5803967464450817, "grad_norm": 1.140625, "learning_rate": 0.00017497392046662654, "loss": 2.7323, "step": 12398 }, { "epoch": 0.5804435601849143, "grad_norm": 1.4921875, "learning_rate": 0.00017497005758249808, "loss": 2.6183, "step": 12399 }, { "epoch": 0.5804903739247469, "grad_norm": 1.5546875, "learning_rate": 0.00017496619444291346, "loss": 2.7371, "step": 12400 }, { "epoch": 0.5805371876645795, "grad_norm": 1.4375, "learning_rate": 0.00017496233104788588, "loss": 2.6191, "step": 12401 }, { "epoch": 0.5805840014044122, "grad_norm": 2.140625, "learning_rate": 0.0001749584673974285, "loss": 2.7234, "step": 12402 }, { "epoch": 0.5806308151442449, "grad_norm": 1.703125, "learning_rate": 0.00017495460349155442, "loss": 2.9744, "step": 12403 }, { "epoch": 0.5806776288840775, "grad_norm": 1.875, "learning_rate": 0.00017495073933027696, "loss": 2.769, "step": 12404 }, { "epoch": 0.5807244426239101, "grad_norm": 1.578125, "learning_rate": 0.00017494687491360908, "loss": 2.7911, "step": 12405 }, { "epoch": 0.5807712563637427, "grad_norm": 1.1328125, "learning_rate": 0.00017494301024156413, "loss": 2.6782, "step": 12406 }, { "epoch": 0.5808180701035754, "grad_norm": 1.40625, "learning_rate": 0.00017493914531415517, "loss": 2.5522, "step": 12407 }, { "epoch": 0.5808648838434081, "grad_norm": 1.53125, "learning_rate": 0.0001749352801313954, "loss": 2.6022, "step": 12408 }, { "epoch": 0.5809116975832407, "grad_norm": 1.3359375, "learning_rate": 0.000174931414693298, "loss": 2.7816, "step": 12409 }, { "epoch": 0.5809585113230733, "grad_norm": 1.484375, "learning_rate": 0.00017492754899987615, "loss": 2.6098, "step": 12410 }, { "epoch": 0.5810053250629059, "grad_norm": 1.15625, "learning_rate": 0.00017492368305114302, "loss": 2.316, "step": 12411 }, { "epoch": 0.5810521388027386, "grad_norm": 1.9375, "learning_rate": 0.00017491981684711173, "loss": 3.2156, "step": 12412 }, { "epoch": 0.5810989525425713, "grad_norm": 1.25, "learning_rate": 0.00017491595038779552, "loss": 2.7501, "step": 12413 }, { "epoch": 0.5811457662824039, "grad_norm": 1.6640625, "learning_rate": 0.00017491208367320752, "loss": 2.6826, "step": 12414 }, { "epoch": 0.5811925800222365, "grad_norm": 1.4453125, "learning_rate": 0.00017490821670336094, "loss": 2.7723, "step": 12415 }, { "epoch": 0.5812393937620691, "grad_norm": 1.9453125, "learning_rate": 0.00017490434947826895, "loss": 2.7091, "step": 12416 }, { "epoch": 0.5812862075019019, "grad_norm": 1.6015625, "learning_rate": 0.0001749004819979447, "loss": 3.065, "step": 12417 }, { "epoch": 0.5813330212417345, "grad_norm": 1.9296875, "learning_rate": 0.00017489661426240143, "loss": 3.0269, "step": 12418 }, { "epoch": 0.5813798349815671, "grad_norm": 1.4453125, "learning_rate": 0.00017489274627165223, "loss": 2.4268, "step": 12419 }, { "epoch": 0.5814266487213997, "grad_norm": 1.6796875, "learning_rate": 0.00017488887802571035, "loss": 3.0488, "step": 12420 }, { "epoch": 0.5814734624612323, "grad_norm": 1.6328125, "learning_rate": 0.00017488500952458894, "loss": 2.9168, "step": 12421 }, { "epoch": 0.5815202762010651, "grad_norm": 1.375, "learning_rate": 0.00017488114076830117, "loss": 2.8012, "step": 12422 }, { "epoch": 0.5815670899408977, "grad_norm": 1.8125, "learning_rate": 0.00017487727175686027, "loss": 2.8987, "step": 12423 }, { "epoch": 0.5816139036807303, "grad_norm": 1.3828125, "learning_rate": 0.00017487340249027934, "loss": 2.9149, "step": 12424 }, { "epoch": 0.5816607174205629, "grad_norm": 1.8359375, "learning_rate": 0.00017486953296857167, "loss": 3.6925, "step": 12425 }, { "epoch": 0.5817075311603955, "grad_norm": 1.1953125, "learning_rate": 0.0001748656631917504, "loss": 2.8739, "step": 12426 }, { "epoch": 0.5817543449002283, "grad_norm": 1.328125, "learning_rate": 0.0001748617931598287, "loss": 3.1364, "step": 12427 }, { "epoch": 0.5818011586400609, "grad_norm": 2.171875, "learning_rate": 0.00017485792287281977, "loss": 2.7826, "step": 12428 }, { "epoch": 0.5818479723798935, "grad_norm": 1.6015625, "learning_rate": 0.00017485405233073678, "loss": 3.1028, "step": 12429 }, { "epoch": 0.5818947861197261, "grad_norm": 1.703125, "learning_rate": 0.00017485018153359292, "loss": 3.1395, "step": 12430 }, { "epoch": 0.5819415998595587, "grad_norm": 1.4140625, "learning_rate": 0.0001748463104814014, "loss": 2.8747, "step": 12431 }, { "epoch": 0.5819884135993915, "grad_norm": 1.5, "learning_rate": 0.0001748424391741754, "loss": 2.4097, "step": 12432 }, { "epoch": 0.5820352273392241, "grad_norm": 1.1875, "learning_rate": 0.00017483856761192816, "loss": 2.9148, "step": 12433 }, { "epoch": 0.5820820410790567, "grad_norm": 1.6015625, "learning_rate": 0.0001748346957946728, "loss": 2.4607, "step": 12434 }, { "epoch": 0.5821288548188893, "grad_norm": 1.7890625, "learning_rate": 0.00017483082372242255, "loss": 2.7151, "step": 12435 }, { "epoch": 0.5821756685587219, "grad_norm": 1.671875, "learning_rate": 0.00017482695139519057, "loss": 2.5328, "step": 12436 }, { "epoch": 0.5822224822985547, "grad_norm": 1.484375, "learning_rate": 0.0001748230788129901, "loss": 2.3415, "step": 12437 }, { "epoch": 0.5822692960383873, "grad_norm": 1.5234375, "learning_rate": 0.00017481920597583428, "loss": 2.8224, "step": 12438 }, { "epoch": 0.5823161097782199, "grad_norm": 1.3515625, "learning_rate": 0.00017481533288373638, "loss": 2.5045, "step": 12439 }, { "epoch": 0.5823629235180525, "grad_norm": 1.984375, "learning_rate": 0.00017481145953670952, "loss": 2.6985, "step": 12440 }, { "epoch": 0.5824097372578851, "grad_norm": 1.21875, "learning_rate": 0.00017480758593476696, "loss": 2.8157, "step": 12441 }, { "epoch": 0.5824565509977179, "grad_norm": 1.3671875, "learning_rate": 0.00017480371207792187, "loss": 2.6679, "step": 12442 }, { "epoch": 0.5825033647375505, "grad_norm": 1.1015625, "learning_rate": 0.00017479983796618746, "loss": 2.8687, "step": 12443 }, { "epoch": 0.5825501784773831, "grad_norm": 1.2109375, "learning_rate": 0.00017479596359957692, "loss": 2.676, "step": 12444 }, { "epoch": 0.5825969922172157, "grad_norm": 1.2890625, "learning_rate": 0.00017479208897810345, "loss": 2.6569, "step": 12445 }, { "epoch": 0.5826438059570483, "grad_norm": 1.109375, "learning_rate": 0.00017478821410178027, "loss": 2.3759, "step": 12446 }, { "epoch": 0.5826906196968811, "grad_norm": 1.3984375, "learning_rate": 0.00017478433897062057, "loss": 3.0256, "step": 12447 }, { "epoch": 0.5827374334367137, "grad_norm": 1.90625, "learning_rate": 0.0001747804635846376, "loss": 3.135, "step": 12448 }, { "epoch": 0.5827842471765463, "grad_norm": 1.6875, "learning_rate": 0.00017477658794384446, "loss": 2.3758, "step": 12449 }, { "epoch": 0.5828310609163789, "grad_norm": 1.515625, "learning_rate": 0.00017477271204825444, "loss": 2.5669, "step": 12450 }, { "epoch": 0.5828778746562115, "grad_norm": 1.265625, "learning_rate": 0.00017476883589788073, "loss": 2.4931, "step": 12451 }, { "epoch": 0.5829246883960443, "grad_norm": 2.1875, "learning_rate": 0.00017476495949273653, "loss": 2.4463, "step": 12452 }, { "epoch": 0.5829715021358769, "grad_norm": 1.2421875, "learning_rate": 0.00017476108283283505, "loss": 2.5999, "step": 12453 }, { "epoch": 0.5830183158757095, "grad_norm": 1.3046875, "learning_rate": 0.0001747572059181895, "loss": 2.8915, "step": 12454 }, { "epoch": 0.5830651296155421, "grad_norm": 2.046875, "learning_rate": 0.0001747533287488131, "loss": 3.0779, "step": 12455 }, { "epoch": 0.5831119433553748, "grad_norm": 2.109375, "learning_rate": 0.00017474945132471908, "loss": 3.2201, "step": 12456 }, { "epoch": 0.5831587570952075, "grad_norm": 1.875, "learning_rate": 0.00017474557364592055, "loss": 2.6681, "step": 12457 }, { "epoch": 0.5832055708350401, "grad_norm": 2.046875, "learning_rate": 0.00017474169571243087, "loss": 3.0079, "step": 12458 }, { "epoch": 0.5832523845748727, "grad_norm": 1.90625, "learning_rate": 0.00017473781752426316, "loss": 2.4271, "step": 12459 }, { "epoch": 0.5832991983147053, "grad_norm": 1.9609375, "learning_rate": 0.00017473393908143064, "loss": 3.2063, "step": 12460 }, { "epoch": 0.583346012054538, "grad_norm": 2.125, "learning_rate": 0.00017473006038394657, "loss": 2.8677, "step": 12461 }, { "epoch": 0.5833928257943707, "grad_norm": 1.9296875, "learning_rate": 0.00017472618143182412, "loss": 2.825, "step": 12462 }, { "epoch": 0.5834396395342033, "grad_norm": 1.3203125, "learning_rate": 0.00017472230222507652, "loss": 3.0907, "step": 12463 }, { "epoch": 0.5834864532740359, "grad_norm": 1.2109375, "learning_rate": 0.00017471842276371702, "loss": 2.8695, "step": 12464 }, { "epoch": 0.5835332670138685, "grad_norm": 1.2109375, "learning_rate": 0.0001747145430477588, "loss": 2.8006, "step": 12465 }, { "epoch": 0.5835800807537012, "grad_norm": 1.7265625, "learning_rate": 0.00017471066307721508, "loss": 2.7587, "step": 12466 }, { "epoch": 0.5836268944935339, "grad_norm": 1.59375, "learning_rate": 0.0001747067828520991, "loss": 2.6953, "step": 12467 }, { "epoch": 0.5836737082333665, "grad_norm": 2.1875, "learning_rate": 0.0001747029023724241, "loss": 2.396, "step": 12468 }, { "epoch": 0.5837205219731991, "grad_norm": 1.765625, "learning_rate": 0.00017469902163820328, "loss": 2.9827, "step": 12469 }, { "epoch": 0.5837673357130317, "grad_norm": 1.0546875, "learning_rate": 0.00017469514064944982, "loss": 2.4329, "step": 12470 }, { "epoch": 0.5838141494528644, "grad_norm": 1.6796875, "learning_rate": 0.000174691259406177, "loss": 2.5644, "step": 12471 }, { "epoch": 0.5838609631926971, "grad_norm": 1.453125, "learning_rate": 0.00017468737790839805, "loss": 2.6835, "step": 12472 }, { "epoch": 0.5839077769325297, "grad_norm": 1.296875, "learning_rate": 0.00017468349615612616, "loss": 2.6828, "step": 12473 }, { "epoch": 0.5839545906723623, "grad_norm": 1.1796875, "learning_rate": 0.00017467961414937454, "loss": 2.6852, "step": 12474 }, { "epoch": 0.584001404412195, "grad_norm": 1.4140625, "learning_rate": 0.0001746757318881565, "loss": 2.7717, "step": 12475 }, { "epoch": 0.5840482181520276, "grad_norm": 1.8359375, "learning_rate": 0.0001746718493724852, "loss": 2.7278, "step": 12476 }, { "epoch": 0.5840950318918603, "grad_norm": 3.09375, "learning_rate": 0.00017466796660237392, "loss": 2.9572, "step": 12477 }, { "epoch": 0.5841418456316929, "grad_norm": 1.265625, "learning_rate": 0.00017466408357783583, "loss": 2.856, "step": 12478 }, { "epoch": 0.5841886593715255, "grad_norm": 1.5859375, "learning_rate": 0.0001746602002988842, "loss": 3.1384, "step": 12479 }, { "epoch": 0.5842354731113582, "grad_norm": 1.5234375, "learning_rate": 0.00017465631676553226, "loss": 3.132, "step": 12480 }, { "epoch": 0.5842822868511909, "grad_norm": 1.4140625, "learning_rate": 0.00017465243297779326, "loss": 3.0123, "step": 12481 }, { "epoch": 0.5843291005910235, "grad_norm": 1.5078125, "learning_rate": 0.00017464854893568034, "loss": 2.7845, "step": 12482 }, { "epoch": 0.5843759143308561, "grad_norm": 1.2734375, "learning_rate": 0.00017464466463920685, "loss": 2.6922, "step": 12483 }, { "epoch": 0.5844227280706887, "grad_norm": 1.4296875, "learning_rate": 0.00017464078008838597, "loss": 2.9769, "step": 12484 }, { "epoch": 0.5844695418105214, "grad_norm": 1.609375, "learning_rate": 0.00017463689528323097, "loss": 2.9737, "step": 12485 }, { "epoch": 0.5845163555503541, "grad_norm": 2.234375, "learning_rate": 0.00017463301022375505, "loss": 2.9209, "step": 12486 }, { "epoch": 0.5845631692901867, "grad_norm": 1.3984375, "learning_rate": 0.00017462912490997146, "loss": 2.5167, "step": 12487 }, { "epoch": 0.5846099830300193, "grad_norm": 1.453125, "learning_rate": 0.00017462523934189342, "loss": 2.468, "step": 12488 }, { "epoch": 0.5846567967698519, "grad_norm": 1.1796875, "learning_rate": 0.0001746213535195342, "loss": 2.7361, "step": 12489 }, { "epoch": 0.5847036105096846, "grad_norm": 1.4453125, "learning_rate": 0.00017461746744290704, "loss": 2.9221, "step": 12490 }, { "epoch": 0.5847504242495173, "grad_norm": 2.484375, "learning_rate": 0.00017461358111202518, "loss": 3.0989, "step": 12491 }, { "epoch": 0.5847972379893499, "grad_norm": 1.4609375, "learning_rate": 0.00017460969452690183, "loss": 2.8464, "step": 12492 }, { "epoch": 0.5848440517291825, "grad_norm": 1.578125, "learning_rate": 0.0001746058076875503, "loss": 2.6747, "step": 12493 }, { "epoch": 0.5848908654690151, "grad_norm": 1.5546875, "learning_rate": 0.00017460192059398375, "loss": 2.745, "step": 12494 }, { "epoch": 0.5849376792088478, "grad_norm": 1.5, "learning_rate": 0.0001745980332462155, "loss": 3.8698, "step": 12495 }, { "epoch": 0.5849844929486805, "grad_norm": 1.3828125, "learning_rate": 0.00017459414564425873, "loss": 2.9832, "step": 12496 }, { "epoch": 0.5850313066885131, "grad_norm": 1.203125, "learning_rate": 0.00017459025778812675, "loss": 2.3523, "step": 12497 }, { "epoch": 0.5850781204283457, "grad_norm": 1.6640625, "learning_rate": 0.00017458636967783276, "loss": 3.0182, "step": 12498 }, { "epoch": 0.5851249341681783, "grad_norm": 1.2890625, "learning_rate": 0.00017458248131339003, "loss": 2.8066, "step": 12499 }, { "epoch": 0.585171747908011, "grad_norm": 1.265625, "learning_rate": 0.00017457859269481182, "loss": 3.1829, "step": 12500 }, { "epoch": 0.5852185616478437, "grad_norm": 1.0546875, "learning_rate": 0.00017457470382211134, "loss": 2.6298, "step": 12501 }, { "epoch": 0.5852653753876763, "grad_norm": 1.84375, "learning_rate": 0.00017457081469530187, "loss": 4.7453, "step": 12502 }, { "epoch": 0.5853121891275089, "grad_norm": 1.2421875, "learning_rate": 0.0001745669253143967, "loss": 2.9627, "step": 12503 }, { "epoch": 0.5853590028673415, "grad_norm": 1.5, "learning_rate": 0.00017456303567940903, "loss": 2.8332, "step": 12504 }, { "epoch": 0.5854058166071742, "grad_norm": 1.4375, "learning_rate": 0.0001745591457903521, "loss": 2.5596, "step": 12505 }, { "epoch": 0.5854526303470069, "grad_norm": 1.3671875, "learning_rate": 0.00017455525564723918, "loss": 2.6772, "step": 12506 }, { "epoch": 0.5854994440868395, "grad_norm": 1.5, "learning_rate": 0.00017455136525008355, "loss": 2.8611, "step": 12507 }, { "epoch": 0.5855462578266721, "grad_norm": 1.578125, "learning_rate": 0.00017454747459889845, "loss": 2.8705, "step": 12508 }, { "epoch": 0.5855930715665048, "grad_norm": 1.4375, "learning_rate": 0.00017454358369369718, "loss": 2.5313, "step": 12509 }, { "epoch": 0.5856398853063374, "grad_norm": 1.296875, "learning_rate": 0.00017453969253449292, "loss": 2.9658, "step": 12510 }, { "epoch": 0.5856866990461701, "grad_norm": 1.3515625, "learning_rate": 0.00017453580112129896, "loss": 2.7558, "step": 12511 }, { "epoch": 0.5857335127860027, "grad_norm": 1.296875, "learning_rate": 0.00017453190945412858, "loss": 2.6142, "step": 12512 }, { "epoch": 0.5857803265258353, "grad_norm": 1.7734375, "learning_rate": 0.00017452801753299504, "loss": 2.8712, "step": 12513 }, { "epoch": 0.585827140265668, "grad_norm": 1.25, "learning_rate": 0.00017452412535791156, "loss": 2.8151, "step": 12514 }, { "epoch": 0.5858739540055006, "grad_norm": 2.421875, "learning_rate": 0.00017452023292889143, "loss": 2.6925, "step": 12515 }, { "epoch": 0.5859207677453333, "grad_norm": 1.1640625, "learning_rate": 0.00017451634024594795, "loss": 2.6582, "step": 12516 }, { "epoch": 0.5859675814851659, "grad_norm": 3.375, "learning_rate": 0.00017451244730909432, "loss": 3.5584, "step": 12517 }, { "epoch": 0.5860143952249985, "grad_norm": 1.296875, "learning_rate": 0.00017450855411834382, "loss": 2.6504, "step": 12518 }, { "epoch": 0.5860612089648312, "grad_norm": 1.5390625, "learning_rate": 0.00017450466067370975, "loss": 2.624, "step": 12519 }, { "epoch": 0.5861080227046638, "grad_norm": 1.25, "learning_rate": 0.00017450076697520536, "loss": 2.6833, "step": 12520 }, { "epoch": 0.5861548364444965, "grad_norm": 1.4375, "learning_rate": 0.0001744968730228439, "loss": 2.7803, "step": 12521 }, { "epoch": 0.5862016501843291, "grad_norm": 2.21875, "learning_rate": 0.00017449297881663863, "loss": 2.6081, "step": 12522 }, { "epoch": 0.5862484639241617, "grad_norm": 1.890625, "learning_rate": 0.0001744890843566029, "loss": 2.8581, "step": 12523 }, { "epoch": 0.5862952776639944, "grad_norm": 1.2265625, "learning_rate": 0.00017448518964274988, "loss": 2.9352, "step": 12524 }, { "epoch": 0.586342091403827, "grad_norm": 1.5703125, "learning_rate": 0.00017448129467509288, "loss": 2.9966, "step": 12525 }, { "epoch": 0.5863889051436597, "grad_norm": 1.6484375, "learning_rate": 0.00017447739945364519, "loss": 2.8579, "step": 12526 }, { "epoch": 0.5864357188834923, "grad_norm": 1.3046875, "learning_rate": 0.00017447350397842005, "loss": 2.6285, "step": 12527 }, { "epoch": 0.586482532623325, "grad_norm": 2.375, "learning_rate": 0.00017446960824943076, "loss": 2.9694, "step": 12528 }, { "epoch": 0.5865293463631576, "grad_norm": 1.8671875, "learning_rate": 0.0001744657122666906, "loss": 2.7077, "step": 12529 }, { "epoch": 0.5865761601029902, "grad_norm": 1.65625, "learning_rate": 0.0001744618160302128, "loss": 2.9772, "step": 12530 }, { "epoch": 0.5866229738428229, "grad_norm": 2.109375, "learning_rate": 0.00017445791954001066, "loss": 2.9428, "step": 12531 }, { "epoch": 0.5866697875826555, "grad_norm": 1.40625, "learning_rate": 0.0001744540227960975, "loss": 2.921, "step": 12532 }, { "epoch": 0.5867166013224882, "grad_norm": 3.046875, "learning_rate": 0.00017445012579848658, "loss": 2.2845, "step": 12533 }, { "epoch": 0.5867634150623208, "grad_norm": 1.140625, "learning_rate": 0.0001744462285471911, "loss": 2.744, "step": 12534 }, { "epoch": 0.5868102288021534, "grad_norm": 1.1796875, "learning_rate": 0.00017444233104222442, "loss": 2.5608, "step": 12535 }, { "epoch": 0.5868570425419861, "grad_norm": 1.4609375, "learning_rate": 0.0001744384332835998, "loss": 2.7851, "step": 12536 }, { "epoch": 0.5869038562818187, "grad_norm": 1.46875, "learning_rate": 0.00017443453527133052, "loss": 2.5103, "step": 12537 }, { "epoch": 0.5869506700216514, "grad_norm": 1.53125, "learning_rate": 0.00017443063700542987, "loss": 2.653, "step": 12538 }, { "epoch": 0.586997483761484, "grad_norm": 1.5234375, "learning_rate": 0.0001744267384859111, "loss": 2.9242, "step": 12539 }, { "epoch": 0.5870442975013166, "grad_norm": 1.3828125, "learning_rate": 0.00017442283971278756, "loss": 2.8352, "step": 12540 }, { "epoch": 0.5870911112411493, "grad_norm": 1.65625, "learning_rate": 0.00017441894068607246, "loss": 3.1187, "step": 12541 }, { "epoch": 0.5871379249809819, "grad_norm": 1.2109375, "learning_rate": 0.00017441504140577914, "loss": 3.2059, "step": 12542 }, { "epoch": 0.5871847387208146, "grad_norm": 1.4609375, "learning_rate": 0.0001744111418719209, "loss": 2.5153, "step": 12543 }, { "epoch": 0.5872315524606472, "grad_norm": 1.578125, "learning_rate": 0.00017440724208451095, "loss": 2.4309, "step": 12544 }, { "epoch": 0.5872783662004798, "grad_norm": 1.421875, "learning_rate": 0.00017440334204356264, "loss": 2.9286, "step": 12545 }, { "epoch": 0.5873251799403125, "grad_norm": 1.3359375, "learning_rate": 0.00017439944174908923, "loss": 2.5168, "step": 12546 }, { "epoch": 0.5873719936801451, "grad_norm": 1.515625, "learning_rate": 0.00017439554120110407, "loss": 2.9698, "step": 12547 }, { "epoch": 0.5874188074199778, "grad_norm": 1.3984375, "learning_rate": 0.00017439164039962035, "loss": 2.745, "step": 12548 }, { "epoch": 0.5874656211598104, "grad_norm": 1.515625, "learning_rate": 0.00017438773934465144, "loss": 2.4705, "step": 12549 }, { "epoch": 0.587512434899643, "grad_norm": 1.8828125, "learning_rate": 0.0001743838380362106, "loss": 3.0312, "step": 12550 }, { "epoch": 0.5875592486394757, "grad_norm": 1.609375, "learning_rate": 0.0001743799364743111, "loss": 2.887, "step": 12551 }, { "epoch": 0.5876060623793083, "grad_norm": 1.375, "learning_rate": 0.00017437603465896633, "loss": 2.6961, "step": 12552 }, { "epoch": 0.587652876119141, "grad_norm": 1.6328125, "learning_rate": 0.00017437213259018948, "loss": 2.3224, "step": 12553 }, { "epoch": 0.5876996898589736, "grad_norm": 1.40625, "learning_rate": 0.0001743682302679939, "loss": 2.5246, "step": 12554 }, { "epoch": 0.5877465035988062, "grad_norm": 1.1953125, "learning_rate": 0.00017436432769239285, "loss": 2.5668, "step": 12555 }, { "epoch": 0.5877933173386389, "grad_norm": 1.75, "learning_rate": 0.0001743604248633997, "loss": 2.6359, "step": 12556 }, { "epoch": 0.5878401310784716, "grad_norm": 3.40625, "learning_rate": 0.00017435652178102767, "loss": 2.3509, "step": 12557 }, { "epoch": 0.5878869448183042, "grad_norm": 2.234375, "learning_rate": 0.00017435261844529008, "loss": 2.698, "step": 12558 }, { "epoch": 0.5879337585581368, "grad_norm": 1.234375, "learning_rate": 0.00017434871485620023, "loss": 2.7159, "step": 12559 }, { "epoch": 0.5879805722979694, "grad_norm": 1.7265625, "learning_rate": 0.00017434481101377146, "loss": 2.6646, "step": 12560 }, { "epoch": 0.5880273860378021, "grad_norm": 1.5, "learning_rate": 0.00017434090691801705, "loss": 2.9446, "step": 12561 }, { "epoch": 0.5880741997776348, "grad_norm": 1.265625, "learning_rate": 0.00017433700256895028, "loss": 2.5532, "step": 12562 }, { "epoch": 0.5881210135174674, "grad_norm": 1.140625, "learning_rate": 0.00017433309796658448, "loss": 2.796, "step": 12563 }, { "epoch": 0.5881678272573, "grad_norm": 1.296875, "learning_rate": 0.00017432919311093293, "loss": 3.028, "step": 12564 }, { "epoch": 0.5882146409971326, "grad_norm": 1.46875, "learning_rate": 0.00017432528800200898, "loss": 2.9871, "step": 12565 }, { "epoch": 0.5882614547369653, "grad_norm": 1.25, "learning_rate": 0.00017432138263982587, "loss": 2.8064, "step": 12566 }, { "epoch": 0.588308268476798, "grad_norm": 1.359375, "learning_rate": 0.00017431747702439698, "loss": 2.9387, "step": 12567 }, { "epoch": 0.5883550822166306, "grad_norm": 1.609375, "learning_rate": 0.00017431357115573558, "loss": 3.0188, "step": 12568 }, { "epoch": 0.5884018959564632, "grad_norm": 1.296875, "learning_rate": 0.00017430966503385496, "loss": 2.7951, "step": 12569 }, { "epoch": 0.5884487096962958, "grad_norm": 1.453125, "learning_rate": 0.00017430575865876846, "loss": 2.4785, "step": 12570 }, { "epoch": 0.5884955234361285, "grad_norm": 1.7421875, "learning_rate": 0.00017430185203048937, "loss": 2.5888, "step": 12571 }, { "epoch": 0.5885423371759612, "grad_norm": 1.609375, "learning_rate": 0.00017429794514903105, "loss": 3.0227, "step": 12572 }, { "epoch": 0.5885891509157938, "grad_norm": 1.4921875, "learning_rate": 0.00017429403801440672, "loss": 3.0069, "step": 12573 }, { "epoch": 0.5886359646556264, "grad_norm": 1.5, "learning_rate": 0.0001742901306266298, "loss": 2.5629, "step": 12574 }, { "epoch": 0.588682778395459, "grad_norm": 1.4140625, "learning_rate": 0.00017428622298571353, "loss": 2.8672, "step": 12575 }, { "epoch": 0.5887295921352917, "grad_norm": 1.015625, "learning_rate": 0.00017428231509167126, "loss": 2.7769, "step": 12576 }, { "epoch": 0.5887764058751244, "grad_norm": 2.125, "learning_rate": 0.0001742784069445163, "loss": 3.1157, "step": 12577 }, { "epoch": 0.588823219614957, "grad_norm": 1.375, "learning_rate": 0.00017427449854426195, "loss": 2.8416, "step": 12578 }, { "epoch": 0.5888700333547896, "grad_norm": 1.6015625, "learning_rate": 0.00017427058989092153, "loss": 2.5315, "step": 12579 }, { "epoch": 0.5889168470946222, "grad_norm": 1.203125, "learning_rate": 0.00017426668098450838, "loss": 2.8083, "step": 12580 }, { "epoch": 0.588963660834455, "grad_norm": 1.359375, "learning_rate": 0.0001742627718250358, "loss": 3.1963, "step": 12581 }, { "epoch": 0.5890104745742876, "grad_norm": 1.203125, "learning_rate": 0.00017425886241251713, "loss": 2.8962, "step": 12582 }, { "epoch": 0.5890572883141202, "grad_norm": 1.4609375, "learning_rate": 0.00017425495274696567, "loss": 3.0379, "step": 12583 }, { "epoch": 0.5891041020539528, "grad_norm": 1.265625, "learning_rate": 0.00017425104282839477, "loss": 2.8598, "step": 12584 }, { "epoch": 0.5891509157937854, "grad_norm": 1.53125, "learning_rate": 0.0001742471326568177, "loss": 2.7373, "step": 12585 }, { "epoch": 0.5891977295336182, "grad_norm": 1.5078125, "learning_rate": 0.00017424322223224785, "loss": 2.9413, "step": 12586 }, { "epoch": 0.5892445432734508, "grad_norm": 1.703125, "learning_rate": 0.0001742393115546985, "loss": 2.8149, "step": 12587 }, { "epoch": 0.5892913570132834, "grad_norm": 1.3671875, "learning_rate": 0.00017423540062418299, "loss": 2.9356, "step": 12588 }, { "epoch": 0.589338170753116, "grad_norm": 1.265625, "learning_rate": 0.0001742314894407146, "loss": 3.0369, "step": 12589 }, { "epoch": 0.5893849844929486, "grad_norm": 1.5625, "learning_rate": 0.00017422757800430677, "loss": 2.5657, "step": 12590 }, { "epoch": 0.5894317982327814, "grad_norm": 1.4921875, "learning_rate": 0.00017422366631497272, "loss": 2.7868, "step": 12591 }, { "epoch": 0.589478611972614, "grad_norm": 1.640625, "learning_rate": 0.00017421975437272583, "loss": 2.4678, "step": 12592 }, { "epoch": 0.5895254257124466, "grad_norm": 1.4375, "learning_rate": 0.0001742158421775794, "loss": 3.0666, "step": 12593 }, { "epoch": 0.5895722394522792, "grad_norm": 1.6484375, "learning_rate": 0.0001742119297295468, "loss": 3.1609, "step": 12594 }, { "epoch": 0.5896190531921118, "grad_norm": 1.6796875, "learning_rate": 0.00017420801702864132, "loss": 2.4129, "step": 12595 }, { "epoch": 0.5896658669319446, "grad_norm": 1.75, "learning_rate": 0.00017420410407487634, "loss": 3.0478, "step": 12596 }, { "epoch": 0.5897126806717772, "grad_norm": 1.8359375, "learning_rate": 0.00017420019086826513, "loss": 2.4411, "step": 12597 }, { "epoch": 0.5897594944116098, "grad_norm": 1.4296875, "learning_rate": 0.00017419627740882106, "loss": 2.5881, "step": 12598 }, { "epoch": 0.5898063081514424, "grad_norm": 1.3125, "learning_rate": 0.0001741923636965575, "loss": 2.4986, "step": 12599 }, { "epoch": 0.589853121891275, "grad_norm": 0.86328125, "learning_rate": 0.0001741884497314877, "loss": 1.7337, "step": 12600 }, { "epoch": 0.5898999356311078, "grad_norm": 1.2109375, "learning_rate": 0.0001741845355136251, "loss": 2.6106, "step": 12601 }, { "epoch": 0.5899467493709404, "grad_norm": 1.640625, "learning_rate": 0.00017418062104298293, "loss": 2.991, "step": 12602 }, { "epoch": 0.589993563110773, "grad_norm": 1.34375, "learning_rate": 0.0001741767063195746, "loss": 2.8414, "step": 12603 }, { "epoch": 0.5900403768506056, "grad_norm": 1.3359375, "learning_rate": 0.00017417279134341346, "loss": 2.7401, "step": 12604 }, { "epoch": 0.5900871905904383, "grad_norm": 1.4609375, "learning_rate": 0.00017416887611451277, "loss": 2.9688, "step": 12605 }, { "epoch": 0.590134004330271, "grad_norm": 1.46875, "learning_rate": 0.00017416496063288596, "loss": 2.856, "step": 12606 }, { "epoch": 0.5901808180701036, "grad_norm": 1.2578125, "learning_rate": 0.00017416104489854632, "loss": 2.7907, "step": 12607 }, { "epoch": 0.5902276318099362, "grad_norm": 1.4609375, "learning_rate": 0.0001741571289115072, "loss": 2.3375, "step": 12608 }, { "epoch": 0.5902744455497688, "grad_norm": 1.5390625, "learning_rate": 0.00017415321267178198, "loss": 2.5027, "step": 12609 }, { "epoch": 0.5903212592896016, "grad_norm": 1.5703125, "learning_rate": 0.00017414929617938393, "loss": 2.4755, "step": 12610 }, { "epoch": 0.5903680730294342, "grad_norm": 1.5703125, "learning_rate": 0.0001741453794343265, "loss": 2.7309, "step": 12611 }, { "epoch": 0.5904148867692668, "grad_norm": 1.7890625, "learning_rate": 0.00017414146243662293, "loss": 2.7864, "step": 12612 }, { "epoch": 0.5904617005090994, "grad_norm": 1.046875, "learning_rate": 0.0001741375451862866, "loss": 1.9307, "step": 12613 }, { "epoch": 0.590508514248932, "grad_norm": 1.53125, "learning_rate": 0.0001741336276833309, "loss": 2.5994, "step": 12614 }, { "epoch": 0.5905553279887648, "grad_norm": 1.25, "learning_rate": 0.00017412970992776913, "loss": 2.578, "step": 12615 }, { "epoch": 0.5906021417285974, "grad_norm": 1.40625, "learning_rate": 0.0001741257919196147, "loss": 2.7547, "step": 12616 }, { "epoch": 0.59064895546843, "grad_norm": 1.53125, "learning_rate": 0.00017412187365888087, "loss": 2.8214, "step": 12617 }, { "epoch": 0.5906957692082626, "grad_norm": 1.3828125, "learning_rate": 0.00017411795514558104, "loss": 2.645, "step": 12618 }, { "epoch": 0.5907425829480952, "grad_norm": 1.59375, "learning_rate": 0.00017411403637972858, "loss": 2.5816, "step": 12619 }, { "epoch": 0.590789396687928, "grad_norm": 1.5703125, "learning_rate": 0.00017411011736133682, "loss": 3.6107, "step": 12620 }, { "epoch": 0.5908362104277606, "grad_norm": 2.28125, "learning_rate": 0.00017410619809041914, "loss": 2.4931, "step": 12621 }, { "epoch": 0.5908830241675932, "grad_norm": 1.40625, "learning_rate": 0.00017410227856698885, "loss": 2.7036, "step": 12622 }, { "epoch": 0.5909298379074258, "grad_norm": 1.5234375, "learning_rate": 0.00017409835879105933, "loss": 2.8199, "step": 12623 }, { "epoch": 0.5909766516472584, "grad_norm": 1.5, "learning_rate": 0.00017409443876264396, "loss": 3.103, "step": 12624 }, { "epoch": 0.5910234653870912, "grad_norm": 1.1484375, "learning_rate": 0.00017409051848175605, "loss": 2.5304, "step": 12625 }, { "epoch": 0.5910702791269238, "grad_norm": 1.734375, "learning_rate": 0.00017408659794840897, "loss": 3.0586, "step": 12626 }, { "epoch": 0.5911170928667564, "grad_norm": 1.359375, "learning_rate": 0.0001740826771626161, "loss": 2.7893, "step": 12627 }, { "epoch": 0.591163906606589, "grad_norm": 1.5546875, "learning_rate": 0.00017407875612439078, "loss": 2.9301, "step": 12628 }, { "epoch": 0.5912107203464216, "grad_norm": 1.4921875, "learning_rate": 0.00017407483483374643, "loss": 2.4504, "step": 12629 }, { "epoch": 0.5912575340862544, "grad_norm": 1.625, "learning_rate": 0.00017407091329069632, "loss": 2.6964, "step": 12630 }, { "epoch": 0.591304347826087, "grad_norm": 1.4765625, "learning_rate": 0.00017406699149525386, "loss": 2.5455, "step": 12631 }, { "epoch": 0.5913511615659196, "grad_norm": 1.40625, "learning_rate": 0.0001740630694474324, "loss": 2.6123, "step": 12632 }, { "epoch": 0.5913979753057522, "grad_norm": 1.5078125, "learning_rate": 0.00017405914714724532, "loss": 3.0022, "step": 12633 }, { "epoch": 0.5914447890455848, "grad_norm": 2.625, "learning_rate": 0.00017405522459470598, "loss": 3.4379, "step": 12634 }, { "epoch": 0.5914916027854176, "grad_norm": 2.140625, "learning_rate": 0.0001740513017898277, "loss": 2.9133, "step": 12635 }, { "epoch": 0.5915384165252502, "grad_norm": 1.40625, "learning_rate": 0.00017404737873262397, "loss": 2.9716, "step": 12636 }, { "epoch": 0.5915852302650828, "grad_norm": 1.5, "learning_rate": 0.00017404345542310803, "loss": 2.635, "step": 12637 }, { "epoch": 0.5916320440049154, "grad_norm": 1.796875, "learning_rate": 0.00017403953186129328, "loss": 2.9589, "step": 12638 }, { "epoch": 0.591678857744748, "grad_norm": 1.484375, "learning_rate": 0.00017403560804719315, "loss": 2.751, "step": 12639 }, { "epoch": 0.5917256714845808, "grad_norm": 1.7109375, "learning_rate": 0.00017403168398082093, "loss": 2.5185, "step": 12640 }, { "epoch": 0.5917724852244134, "grad_norm": 1.3125, "learning_rate": 0.00017402775966219005, "loss": 2.5776, "step": 12641 }, { "epoch": 0.591819298964246, "grad_norm": 1.3125, "learning_rate": 0.00017402383509131385, "loss": 2.8146, "step": 12642 }, { "epoch": 0.5918661127040786, "grad_norm": 1.296875, "learning_rate": 0.0001740199102682057, "loss": 2.8277, "step": 12643 }, { "epoch": 0.5919129264439112, "grad_norm": 1.546875, "learning_rate": 0.00017401598519287898, "loss": 2.8012, "step": 12644 }, { "epoch": 0.591959740183744, "grad_norm": 2.34375, "learning_rate": 0.0001740120598653471, "loss": 2.8483, "step": 12645 }, { "epoch": 0.5920065539235766, "grad_norm": 1.3984375, "learning_rate": 0.00017400813428562336, "loss": 2.9976, "step": 12646 }, { "epoch": 0.5920533676634092, "grad_norm": 1.546875, "learning_rate": 0.00017400420845372123, "loss": 3.0193, "step": 12647 }, { "epoch": 0.5921001814032418, "grad_norm": 1.21875, "learning_rate": 0.000174000282369654, "loss": 2.5137, "step": 12648 }, { "epoch": 0.5921469951430745, "grad_norm": 1.5390625, "learning_rate": 0.0001739963560334351, "loss": 3.026, "step": 12649 }, { "epoch": 0.5921938088829072, "grad_norm": 1.1953125, "learning_rate": 0.00017399242944507787, "loss": 2.9613, "step": 12650 }, { "epoch": 0.5922406226227398, "grad_norm": 1.3046875, "learning_rate": 0.00017398850260459575, "loss": 2.7453, "step": 12651 }, { "epoch": 0.5922874363625724, "grad_norm": 1.546875, "learning_rate": 0.00017398457551200206, "loss": 2.8305, "step": 12652 }, { "epoch": 0.592334250102405, "grad_norm": 2.078125, "learning_rate": 0.0001739806481673102, "loss": 2.6604, "step": 12653 }, { "epoch": 0.5923810638422377, "grad_norm": 7.40625, "learning_rate": 0.0001739767205705336, "loss": 2.9196, "step": 12654 }, { "epoch": 0.5924278775820704, "grad_norm": 1.2109375, "learning_rate": 0.00017397279272168557, "loss": 2.618, "step": 12655 }, { "epoch": 0.592474691321903, "grad_norm": 1.453125, "learning_rate": 0.0001739688646207795, "loss": 2.8141, "step": 12656 }, { "epoch": 0.5925215050617356, "grad_norm": 1.2265625, "learning_rate": 0.00017396493626782884, "loss": 2.8755, "step": 12657 }, { "epoch": 0.5925683188015682, "grad_norm": 1.65625, "learning_rate": 0.00017396100766284693, "loss": 2.5594, "step": 12658 }, { "epoch": 0.5926151325414009, "grad_norm": 1.703125, "learning_rate": 0.00017395707880584716, "loss": 2.7654, "step": 12659 }, { "epoch": 0.5926619462812336, "grad_norm": 1.859375, "learning_rate": 0.0001739531496968429, "loss": 2.5666, "step": 12660 }, { "epoch": 0.5927087600210662, "grad_norm": 1.2421875, "learning_rate": 0.00017394922033584756, "loss": 2.4518, "step": 12661 }, { "epoch": 0.5927555737608988, "grad_norm": 1.6640625, "learning_rate": 0.00017394529072287454, "loss": 2.8251, "step": 12662 }, { "epoch": 0.5928023875007314, "grad_norm": 1.2265625, "learning_rate": 0.00017394136085793722, "loss": 2.1144, "step": 12663 }, { "epoch": 0.5928492012405641, "grad_norm": 1.671875, "learning_rate": 0.000173937430741049, "loss": 2.4979, "step": 12664 }, { "epoch": 0.5928960149803968, "grad_norm": 1.5, "learning_rate": 0.0001739335003722232, "loss": 3.0358, "step": 12665 }, { "epoch": 0.5929428287202294, "grad_norm": 2.375, "learning_rate": 0.00017392956975147334, "loss": 2.7999, "step": 12666 }, { "epoch": 0.592989642460062, "grad_norm": 1.7890625, "learning_rate": 0.0001739256388788127, "loss": 2.677, "step": 12667 }, { "epoch": 0.5930364561998946, "grad_norm": 1.1484375, "learning_rate": 0.0001739217077542547, "loss": 2.9144, "step": 12668 }, { "epoch": 0.5930832699397273, "grad_norm": 1.4921875, "learning_rate": 0.0001739177763778128, "loss": 2.6735, "step": 12669 }, { "epoch": 0.59313008367956, "grad_norm": 1.546875, "learning_rate": 0.00017391384474950034, "loss": 2.8436, "step": 12670 }, { "epoch": 0.5931768974193926, "grad_norm": 1.5, "learning_rate": 0.00017390991286933072, "loss": 2.6166, "step": 12671 }, { "epoch": 0.5932237111592252, "grad_norm": 1.2578125, "learning_rate": 0.00017390598073731734, "loss": 1.736, "step": 12672 }, { "epoch": 0.5932705248990578, "grad_norm": 1.328125, "learning_rate": 0.0001739020483534736, "loss": 2.5516, "step": 12673 }, { "epoch": 0.5933173386388905, "grad_norm": 1.46875, "learning_rate": 0.0001738981157178129, "loss": 2.7452, "step": 12674 }, { "epoch": 0.5933641523787232, "grad_norm": 1.5703125, "learning_rate": 0.0001738941828303487, "loss": 2.6763, "step": 12675 }, { "epoch": 0.5934109661185558, "grad_norm": 1.9140625, "learning_rate": 0.00017389024969109427, "loss": 2.9845, "step": 12676 }, { "epoch": 0.5934577798583884, "grad_norm": 1.1875, "learning_rate": 0.00017388631630006312, "loss": 2.7455, "step": 12677 }, { "epoch": 0.593504593598221, "grad_norm": 1.484375, "learning_rate": 0.0001738823826572686, "loss": 2.6882, "step": 12678 }, { "epoch": 0.5935514073380537, "grad_norm": 1.828125, "learning_rate": 0.00017387844876272412, "loss": 3.0751, "step": 12679 }, { "epoch": 0.5935982210778864, "grad_norm": 1.1328125, "learning_rate": 0.00017387451461644312, "loss": 2.9102, "step": 12680 }, { "epoch": 0.593645034817719, "grad_norm": 1.875, "learning_rate": 0.000173870580218439, "loss": 2.8661, "step": 12681 }, { "epoch": 0.5936918485575516, "grad_norm": 1.609375, "learning_rate": 0.0001738666455687251, "loss": 2.8634, "step": 12682 }, { "epoch": 0.5937386622973843, "grad_norm": 2.21875, "learning_rate": 0.00017386271066731492, "loss": 3.0676, "step": 12683 }, { "epoch": 0.5937854760372169, "grad_norm": 1.21875, "learning_rate": 0.0001738587755142218, "loss": 2.695, "step": 12684 }, { "epoch": 0.5938322897770496, "grad_norm": 1.6015625, "learning_rate": 0.00017385484010945916, "loss": 2.75, "step": 12685 }, { "epoch": 0.5938791035168822, "grad_norm": 1.5078125, "learning_rate": 0.00017385090445304045, "loss": 2.9084, "step": 12686 }, { "epoch": 0.5939259172567148, "grad_norm": 1.09375, "learning_rate": 0.00017384696854497904, "loss": 2.8827, "step": 12687 }, { "epoch": 0.5939727309965475, "grad_norm": 1.3046875, "learning_rate": 0.00017384303238528835, "loss": 2.4814, "step": 12688 }, { "epoch": 0.5940195447363801, "grad_norm": 1.359375, "learning_rate": 0.0001738390959739818, "loss": 2.7973, "step": 12689 }, { "epoch": 0.5940663584762128, "grad_norm": 1.6171875, "learning_rate": 0.0001738351593110728, "loss": 2.7306, "step": 12690 }, { "epoch": 0.5941131722160454, "grad_norm": 1.4609375, "learning_rate": 0.00017383122239657473, "loss": 2.8801, "step": 12691 }, { "epoch": 0.594159985955878, "grad_norm": 1.390625, "learning_rate": 0.0001738272852305011, "loss": 2.7034, "step": 12692 }, { "epoch": 0.5942067996957107, "grad_norm": 1.78125, "learning_rate": 0.0001738233478128652, "loss": 3.0701, "step": 12693 }, { "epoch": 0.5942536134355433, "grad_norm": 1.1015625, "learning_rate": 0.00017381941014368054, "loss": 2.7762, "step": 12694 }, { "epoch": 0.594300427175376, "grad_norm": 1.78125, "learning_rate": 0.00017381547222296054, "loss": 3.4408, "step": 12695 }, { "epoch": 0.5943472409152086, "grad_norm": 1.3671875, "learning_rate": 0.00017381153405071853, "loss": 2.484, "step": 12696 }, { "epoch": 0.5943940546550412, "grad_norm": 1.3125, "learning_rate": 0.000173807595626968, "loss": 2.5565, "step": 12697 }, { "epoch": 0.5944408683948739, "grad_norm": 1.21875, "learning_rate": 0.00017380365695172234, "loss": 2.6197, "step": 12698 }, { "epoch": 0.5944876821347065, "grad_norm": 1.5390625, "learning_rate": 0.00017379971802499502, "loss": 2.58, "step": 12699 }, { "epoch": 0.5945344958745392, "grad_norm": 1.203125, "learning_rate": 0.00017379577884679942, "loss": 2.4847, "step": 12700 }, { "epoch": 0.5945813096143718, "grad_norm": 1.578125, "learning_rate": 0.00017379183941714893, "loss": 2.4469, "step": 12701 }, { "epoch": 0.5946281233542045, "grad_norm": 1.9765625, "learning_rate": 0.00017378789973605704, "loss": 2.6772, "step": 12702 }, { "epoch": 0.5946749370940371, "grad_norm": 1.546875, "learning_rate": 0.00017378395980353716, "loss": 3.192, "step": 12703 }, { "epoch": 0.5947217508338697, "grad_norm": 1.40625, "learning_rate": 0.0001737800196196027, "loss": 2.4678, "step": 12704 }, { "epoch": 0.5947685645737024, "grad_norm": 7.46875, "learning_rate": 0.00017377607918426708, "loss": 2.5607, "step": 12705 }, { "epoch": 0.594815378313535, "grad_norm": 1.2265625, "learning_rate": 0.00017377213849754372, "loss": 2.4646, "step": 12706 }, { "epoch": 0.5948621920533677, "grad_norm": 1.53125, "learning_rate": 0.00017376819755944608, "loss": 2.5973, "step": 12707 }, { "epoch": 0.5949090057932003, "grad_norm": 1.375, "learning_rate": 0.00017376425636998757, "loss": 2.9937, "step": 12708 }, { "epoch": 0.5949558195330329, "grad_norm": 1.6953125, "learning_rate": 0.00017376031492918163, "loss": 2.3361, "step": 12709 }, { "epoch": 0.5950026332728656, "grad_norm": 1.9453125, "learning_rate": 0.00017375637323704165, "loss": 2.8133, "step": 12710 }, { "epoch": 0.5950494470126982, "grad_norm": 1.2890625, "learning_rate": 0.00017375243129358112, "loss": 3.2039, "step": 12711 }, { "epoch": 0.5950962607525309, "grad_norm": 1.703125, "learning_rate": 0.00017374848909881342, "loss": 3.1817, "step": 12712 }, { "epoch": 0.5951430744923635, "grad_norm": 1.515625, "learning_rate": 0.00017374454665275202, "loss": 2.8176, "step": 12713 }, { "epoch": 0.5951898882321961, "grad_norm": 1.1640625, "learning_rate": 0.00017374060395541034, "loss": 2.7484, "step": 12714 }, { "epoch": 0.5952367019720288, "grad_norm": 1.640625, "learning_rate": 0.0001737366610068018, "loss": 2.6032, "step": 12715 }, { "epoch": 0.5952835157118614, "grad_norm": 1.765625, "learning_rate": 0.00017373271780693987, "loss": 2.4545, "step": 12716 }, { "epoch": 0.5953303294516941, "grad_norm": 3.34375, "learning_rate": 0.00017372877435583797, "loss": 3.3512, "step": 12717 }, { "epoch": 0.5953771431915267, "grad_norm": 1.125, "learning_rate": 0.0001737248306535095, "loss": 2.4611, "step": 12718 }, { "epoch": 0.5954239569313593, "grad_norm": 0.95703125, "learning_rate": 0.00017372088669996796, "loss": 3.4098, "step": 12719 }, { "epoch": 0.595470770671192, "grad_norm": 1.765625, "learning_rate": 0.00017371694249522675, "loss": 2.2652, "step": 12720 }, { "epoch": 0.5955175844110246, "grad_norm": 1.4921875, "learning_rate": 0.00017371299803929934, "loss": 3.3031, "step": 12721 }, { "epoch": 0.5955643981508573, "grad_norm": 1.3203125, "learning_rate": 0.00017370905333219912, "loss": 2.7479, "step": 12722 }, { "epoch": 0.5956112118906899, "grad_norm": 1.671875, "learning_rate": 0.00017370510837393956, "loss": 2.0859, "step": 12723 }, { "epoch": 0.5956580256305225, "grad_norm": 1.1953125, "learning_rate": 0.00017370116316453414, "loss": 2.8529, "step": 12724 }, { "epoch": 0.5957048393703552, "grad_norm": 1.265625, "learning_rate": 0.00017369721770399622, "loss": 2.6986, "step": 12725 }, { "epoch": 0.5957516531101879, "grad_norm": 2.03125, "learning_rate": 0.00017369327199233934, "loss": 3.235, "step": 12726 }, { "epoch": 0.5957984668500205, "grad_norm": 1.5625, "learning_rate": 0.00017368932602957683, "loss": 1.9441, "step": 12727 }, { "epoch": 0.5958452805898531, "grad_norm": 1.703125, "learning_rate": 0.00017368537981572225, "loss": 3.1739, "step": 12728 }, { "epoch": 0.5958920943296858, "grad_norm": 1.1640625, "learning_rate": 0.000173681433350789, "loss": 2.6312, "step": 12729 }, { "epoch": 0.5959389080695184, "grad_norm": 2.171875, "learning_rate": 0.0001736774866347905, "loss": 2.6972, "step": 12730 }, { "epoch": 0.595985721809351, "grad_norm": 1.671875, "learning_rate": 0.00017367353966774024, "loss": 2.7657, "step": 12731 }, { "epoch": 0.5960325355491837, "grad_norm": 2.109375, "learning_rate": 0.00017366959244965164, "loss": 2.7066, "step": 12732 }, { "epoch": 0.5960793492890163, "grad_norm": 1.328125, "learning_rate": 0.0001736656449805382, "loss": 2.0207, "step": 12733 }, { "epoch": 0.596126163028849, "grad_norm": 1.53125, "learning_rate": 0.00017366169726041328, "loss": 2.6683, "step": 12734 }, { "epoch": 0.5961729767686816, "grad_norm": 1.40625, "learning_rate": 0.00017365774928929038, "loss": 2.7228, "step": 12735 }, { "epoch": 0.5962197905085143, "grad_norm": 1.6796875, "learning_rate": 0.000173653801067183, "loss": 2.5351, "step": 12736 }, { "epoch": 0.5962666042483469, "grad_norm": 1.3515625, "learning_rate": 0.00017364985259410456, "loss": 2.4899, "step": 12737 }, { "epoch": 0.5963134179881795, "grad_norm": 1.328125, "learning_rate": 0.00017364590387006844, "loss": 2.5506, "step": 12738 }, { "epoch": 0.5963602317280122, "grad_norm": 1.484375, "learning_rate": 0.00017364195489508823, "loss": 2.6981, "step": 12739 }, { "epoch": 0.5964070454678448, "grad_norm": 1.4453125, "learning_rate": 0.00017363800566917728, "loss": 2.8669, "step": 12740 }, { "epoch": 0.5964538592076775, "grad_norm": 1.703125, "learning_rate": 0.00017363405619234907, "loss": 2.7472, "step": 12741 }, { "epoch": 0.5965006729475101, "grad_norm": 1.453125, "learning_rate": 0.00017363010646461707, "loss": 2.8505, "step": 12742 }, { "epoch": 0.5965474866873427, "grad_norm": 1.375, "learning_rate": 0.00017362615648599478, "loss": 2.7131, "step": 12743 }, { "epoch": 0.5965943004271754, "grad_norm": 1.5, "learning_rate": 0.0001736222062564956, "loss": 2.8077, "step": 12744 }, { "epoch": 0.596641114167008, "grad_norm": 1.921875, "learning_rate": 0.000173618255776133, "loss": 2.6749, "step": 12745 }, { "epoch": 0.5966879279068407, "grad_norm": 1.578125, "learning_rate": 0.00017361430504492042, "loss": 2.8416, "step": 12746 }, { "epoch": 0.5967347416466733, "grad_norm": 2.140625, "learning_rate": 0.00017361035406287137, "loss": 2.662, "step": 12747 }, { "epoch": 0.5967815553865059, "grad_norm": 1.7421875, "learning_rate": 0.00017360640282999934, "loss": 3.1442, "step": 12748 }, { "epoch": 0.5968283691263386, "grad_norm": 1.6875, "learning_rate": 0.00017360245134631768, "loss": 3.1459, "step": 12749 }, { "epoch": 0.5968751828661712, "grad_norm": 1.234375, "learning_rate": 0.00017359849961183995, "loss": 2.9293, "step": 12750 }, { "epoch": 0.5969219966060039, "grad_norm": 1.9140625, "learning_rate": 0.0001735945476265796, "loss": 2.6233, "step": 12751 }, { "epoch": 0.5969688103458365, "grad_norm": 1.484375, "learning_rate": 0.00017359059539055006, "loss": 2.9376, "step": 12752 }, { "epoch": 0.5970156240856691, "grad_norm": 1.328125, "learning_rate": 0.00017358664290376483, "loss": 2.4098, "step": 12753 }, { "epoch": 0.5970624378255018, "grad_norm": 1.34375, "learning_rate": 0.00017358269016623737, "loss": 2.8001, "step": 12754 }, { "epoch": 0.5971092515653345, "grad_norm": 2.015625, "learning_rate": 0.00017357873717798114, "loss": 2.4818, "step": 12755 }, { "epoch": 0.5971560653051671, "grad_norm": 2.359375, "learning_rate": 0.00017357478393900962, "loss": 2.9644, "step": 12756 }, { "epoch": 0.5972028790449997, "grad_norm": 1.78125, "learning_rate": 0.00017357083044933628, "loss": 3.1585, "step": 12757 }, { "epoch": 0.5972496927848323, "grad_norm": 1.484375, "learning_rate": 0.0001735668767089746, "loss": 2.755, "step": 12758 }, { "epoch": 0.597296506524665, "grad_norm": 1.4609375, "learning_rate": 0.00017356292271793802, "loss": 2.3294, "step": 12759 }, { "epoch": 0.5973433202644977, "grad_norm": 1.8125, "learning_rate": 0.00017355896847624002, "loss": 3.1951, "step": 12760 }, { "epoch": 0.5973901340043303, "grad_norm": 1.4453125, "learning_rate": 0.0001735550139838941, "loss": 2.9077, "step": 12761 }, { "epoch": 0.5974369477441629, "grad_norm": 1.1484375, "learning_rate": 0.00017355105924091375, "loss": 2.3863, "step": 12762 }, { "epoch": 0.5974837614839955, "grad_norm": 1.3125, "learning_rate": 0.00017354710424731239, "loss": 2.5818, "step": 12763 }, { "epoch": 0.5975305752238282, "grad_norm": 1.5859375, "learning_rate": 0.00017354314900310352, "loss": 2.96, "step": 12764 }, { "epoch": 0.5975773889636609, "grad_norm": 1.453125, "learning_rate": 0.00017353919350830062, "loss": 3.2227, "step": 12765 }, { "epoch": 0.5976242027034935, "grad_norm": 1.59375, "learning_rate": 0.00017353523776291718, "loss": 2.7358, "step": 12766 }, { "epoch": 0.5976710164433261, "grad_norm": 1.3203125, "learning_rate": 0.00017353128176696668, "loss": 2.7397, "step": 12767 }, { "epoch": 0.5977178301831587, "grad_norm": 1.375, "learning_rate": 0.00017352732552046256, "loss": 2.7098, "step": 12768 }, { "epoch": 0.5977646439229914, "grad_norm": 1.5390625, "learning_rate": 0.00017352336902341832, "loss": 2.7608, "step": 12769 }, { "epoch": 0.5978114576628241, "grad_norm": 1.984375, "learning_rate": 0.00017351941227584748, "loss": 2.4471, "step": 12770 }, { "epoch": 0.5978582714026567, "grad_norm": 1.1328125, "learning_rate": 0.0001735154552777635, "loss": 2.6431, "step": 12771 }, { "epoch": 0.5979050851424893, "grad_norm": 2.0625, "learning_rate": 0.00017351149802917983, "loss": 3.1146, "step": 12772 }, { "epoch": 0.5979518988823219, "grad_norm": 1.6015625, "learning_rate": 0.00017350754053010997, "loss": 2.8047, "step": 12773 }, { "epoch": 0.5979987126221546, "grad_norm": 1.296875, "learning_rate": 0.00017350358278056744, "loss": 2.6783, "step": 12774 }, { "epoch": 0.5980455263619873, "grad_norm": 1.1953125, "learning_rate": 0.0001734996247805657, "loss": 2.8486, "step": 12775 }, { "epoch": 0.5980923401018199, "grad_norm": 1.5, "learning_rate": 0.00017349566653011823, "loss": 2.9347, "step": 12776 }, { "epoch": 0.5981391538416525, "grad_norm": 1.796875, "learning_rate": 0.00017349170802923852, "loss": 3.0233, "step": 12777 }, { "epoch": 0.5981859675814851, "grad_norm": 1.2578125, "learning_rate": 0.00017348774927794007, "loss": 2.7116, "step": 12778 }, { "epoch": 0.5982327813213179, "grad_norm": 1.828125, "learning_rate": 0.00017348379027623637, "loss": 2.9088, "step": 12779 }, { "epoch": 0.5982795950611505, "grad_norm": 1.6875, "learning_rate": 0.00017347983102414087, "loss": 2.5148, "step": 12780 }, { "epoch": 0.5983264088009831, "grad_norm": 2.34375, "learning_rate": 0.00017347587152166712, "loss": 2.8619, "step": 12781 }, { "epoch": 0.5983732225408157, "grad_norm": 1.1484375, "learning_rate": 0.0001734719117688286, "loss": 2.6153, "step": 12782 }, { "epoch": 0.5984200362806483, "grad_norm": 1.125, "learning_rate": 0.00017346795176563875, "loss": 2.4402, "step": 12783 }, { "epoch": 0.5984668500204811, "grad_norm": 1.3984375, "learning_rate": 0.0001734639915121111, "loss": 2.8152, "step": 12784 }, { "epoch": 0.5985136637603137, "grad_norm": 1.9140625, "learning_rate": 0.00017346003100825918, "loss": 2.8604, "step": 12785 }, { "epoch": 0.5985604775001463, "grad_norm": 1.359375, "learning_rate": 0.00017345607025409643, "loss": 2.7554, "step": 12786 }, { "epoch": 0.5986072912399789, "grad_norm": 2.59375, "learning_rate": 0.00017345210924963637, "loss": 2.6112, "step": 12787 }, { "epoch": 0.5986541049798115, "grad_norm": 1.15625, "learning_rate": 0.00017344814799489247, "loss": 2.4983, "step": 12788 }, { "epoch": 0.5987009187196443, "grad_norm": 3.3125, "learning_rate": 0.0001734441864898783, "loss": 2.4, "step": 12789 }, { "epoch": 0.5987477324594769, "grad_norm": 1.328125, "learning_rate": 0.00017344022473460727, "loss": 2.3015, "step": 12790 }, { "epoch": 0.5987945461993095, "grad_norm": 1.3046875, "learning_rate": 0.00017343626272909294, "loss": 2.6629, "step": 12791 }, { "epoch": 0.5988413599391421, "grad_norm": 1.5234375, "learning_rate": 0.0001734323004733488, "loss": 2.6375, "step": 12792 }, { "epoch": 0.5988881736789747, "grad_norm": 1.296875, "learning_rate": 0.0001734283379673883, "loss": 2.3982, "step": 12793 }, { "epoch": 0.5989349874188075, "grad_norm": 1.7421875, "learning_rate": 0.000173424375211225, "loss": 2.8874, "step": 12794 }, { "epoch": 0.5989818011586401, "grad_norm": 1.28125, "learning_rate": 0.00017342041220487242, "loss": 2.373, "step": 12795 }, { "epoch": 0.5990286148984727, "grad_norm": 1.4375, "learning_rate": 0.000173416448948344, "loss": 2.6335, "step": 12796 }, { "epoch": 0.5990754286383053, "grad_norm": 1.4921875, "learning_rate": 0.00017341248544165328, "loss": 2.7142, "step": 12797 }, { "epoch": 0.5991222423781379, "grad_norm": 1.984375, "learning_rate": 0.00017340852168481376, "loss": 2.7366, "step": 12798 }, { "epoch": 0.5991690561179707, "grad_norm": 1.203125, "learning_rate": 0.0001734045576778389, "loss": 2.5849, "step": 12799 }, { "epoch": 0.5992158698578033, "grad_norm": 1.2265625, "learning_rate": 0.00017340059342074233, "loss": 2.6951, "step": 12800 }, { "epoch": 0.5992626835976359, "grad_norm": 1.484375, "learning_rate": 0.00017339662891353746, "loss": 2.7015, "step": 12801 }, { "epoch": 0.5993094973374685, "grad_norm": 2.53125, "learning_rate": 0.0001733926641562378, "loss": 3.1989, "step": 12802 }, { "epoch": 0.5993563110773011, "grad_norm": 2.171875, "learning_rate": 0.00017338869914885688, "loss": 3.2593, "step": 12803 }, { "epoch": 0.5994031248171339, "grad_norm": 1.3125, "learning_rate": 0.00017338473389140818, "loss": 2.6034, "step": 12804 }, { "epoch": 0.5994499385569665, "grad_norm": 2.78125, "learning_rate": 0.0001733807683839053, "loss": 3.196, "step": 12805 }, { "epoch": 0.5994967522967991, "grad_norm": 1.3046875, "learning_rate": 0.00017337680262636166, "loss": 2.6393, "step": 12806 }, { "epoch": 0.5995435660366317, "grad_norm": 1.1796875, "learning_rate": 0.00017337283661879083, "loss": 2.3875, "step": 12807 }, { "epoch": 0.5995903797764643, "grad_norm": 1.4609375, "learning_rate": 0.00017336887036120625, "loss": 2.9327, "step": 12808 }, { "epoch": 0.5996371935162971, "grad_norm": 1.8359375, "learning_rate": 0.00017336490385362155, "loss": 2.506, "step": 12809 }, { "epoch": 0.5996840072561297, "grad_norm": 1.4375, "learning_rate": 0.00017336093709605015, "loss": 2.9826, "step": 12810 }, { "epoch": 0.5997308209959623, "grad_norm": 1.546875, "learning_rate": 0.0001733569700885056, "loss": 2.9609, "step": 12811 }, { "epoch": 0.5997776347357949, "grad_norm": 1.203125, "learning_rate": 0.0001733530028310014, "loss": 2.4617, "step": 12812 }, { "epoch": 0.5998244484756275, "grad_norm": 1.2421875, "learning_rate": 0.0001733490353235511, "loss": 2.5298, "step": 12813 }, { "epoch": 0.5998712622154603, "grad_norm": 1.2578125, "learning_rate": 0.00017334506756616817, "loss": 2.6329, "step": 12814 }, { "epoch": 0.5999180759552929, "grad_norm": 1.96875, "learning_rate": 0.0001733410995588662, "loss": 2.6358, "step": 12815 }, { "epoch": 0.5999648896951255, "grad_norm": 1.1796875, "learning_rate": 0.00017333713130165864, "loss": 3.089, "step": 12816 }, { "epoch": 0.6000117034349581, "grad_norm": 1.5078125, "learning_rate": 0.0001733331627945591, "loss": 2.6637, "step": 12817 }, { "epoch": 0.6000585171747908, "grad_norm": 1.59375, "learning_rate": 0.000173329194037581, "loss": 2.6605, "step": 12818 }, { "epoch": 0.6001053309146235, "grad_norm": 1.546875, "learning_rate": 0.0001733252250307379, "loss": 2.9383, "step": 12819 }, { "epoch": 0.6001521446544561, "grad_norm": 1.4453125, "learning_rate": 0.0001733212557740434, "loss": 2.7803, "step": 12820 }, { "epoch": 0.6001989583942887, "grad_norm": 1.671875, "learning_rate": 0.0001733172862675109, "loss": 2.7904, "step": 12821 }, { "epoch": 0.6002457721341213, "grad_norm": 1.3203125, "learning_rate": 0.00017331331651115395, "loss": 2.6232, "step": 12822 }, { "epoch": 0.600292585873954, "grad_norm": 1.4140625, "learning_rate": 0.0001733093465049862, "loss": 2.7768, "step": 12823 }, { "epoch": 0.6003393996137867, "grad_norm": 1.640625, "learning_rate": 0.00017330537624902107, "loss": 2.5381, "step": 12824 }, { "epoch": 0.6003862133536193, "grad_norm": 1.328125, "learning_rate": 0.00017330140574327207, "loss": 2.4543, "step": 12825 }, { "epoch": 0.6004330270934519, "grad_norm": 1.3984375, "learning_rate": 0.00017329743498775279, "loss": 3.1966, "step": 12826 }, { "epoch": 0.6004798408332845, "grad_norm": 1.3046875, "learning_rate": 0.00017329346398247672, "loss": 3.1362, "step": 12827 }, { "epoch": 0.6005266545731172, "grad_norm": 2.296875, "learning_rate": 0.00017328949272745741, "loss": 2.8626, "step": 12828 }, { "epoch": 0.6005734683129499, "grad_norm": 1.265625, "learning_rate": 0.00017328552122270843, "loss": 2.4514, "step": 12829 }, { "epoch": 0.6006202820527825, "grad_norm": 2.296875, "learning_rate": 0.00017328154946824324, "loss": 2.8372, "step": 12830 }, { "epoch": 0.6006670957926151, "grad_norm": 1.4296875, "learning_rate": 0.0001732775774640754, "loss": 2.6294, "step": 12831 }, { "epoch": 0.6007139095324477, "grad_norm": 1.5234375, "learning_rate": 0.00017327360521021844, "loss": 4.6966, "step": 12832 }, { "epoch": 0.6007607232722804, "grad_norm": 1.7734375, "learning_rate": 0.00017326963270668594, "loss": 2.8139, "step": 12833 }, { "epoch": 0.6008075370121131, "grad_norm": 1.84375, "learning_rate": 0.00017326565995349138, "loss": 3.1284, "step": 12834 }, { "epoch": 0.6008543507519457, "grad_norm": 1.4140625, "learning_rate": 0.00017326168695064835, "loss": 2.6496, "step": 12835 }, { "epoch": 0.6009011644917783, "grad_norm": 1.4296875, "learning_rate": 0.00017325771369817032, "loss": 2.3798, "step": 12836 }, { "epoch": 0.600947978231611, "grad_norm": 1.140625, "learning_rate": 0.00017325374019607086, "loss": 2.989, "step": 12837 }, { "epoch": 0.6009947919714436, "grad_norm": 1.359375, "learning_rate": 0.00017324976644436354, "loss": 2.288, "step": 12838 }, { "epoch": 0.6010416057112763, "grad_norm": 1.4453125, "learning_rate": 0.00017324579244306187, "loss": 2.8043, "step": 12839 }, { "epoch": 0.6010884194511089, "grad_norm": 1.703125, "learning_rate": 0.0001732418181921794, "loss": 3.0927, "step": 12840 }, { "epoch": 0.6011352331909415, "grad_norm": 2.1875, "learning_rate": 0.00017323784369172965, "loss": 2.3534, "step": 12841 }, { "epoch": 0.6011820469307741, "grad_norm": 3.09375, "learning_rate": 0.0001732338689417262, "loss": 2.1606, "step": 12842 }, { "epoch": 0.6012288606706068, "grad_norm": 1.3046875, "learning_rate": 0.00017322989394218257, "loss": 2.844, "step": 12843 }, { "epoch": 0.6012756744104395, "grad_norm": 1.2890625, "learning_rate": 0.0001732259186931123, "loss": 2.8443, "step": 12844 }, { "epoch": 0.6013224881502721, "grad_norm": 1.421875, "learning_rate": 0.00017322194319452895, "loss": 2.8489, "step": 12845 }, { "epoch": 0.6013693018901047, "grad_norm": 2.078125, "learning_rate": 0.00017321796744644607, "loss": 2.3483, "step": 12846 }, { "epoch": 0.6014161156299374, "grad_norm": 1.171875, "learning_rate": 0.00017321399144887718, "loss": 2.4164, "step": 12847 }, { "epoch": 0.6014629293697701, "grad_norm": 1.5234375, "learning_rate": 0.00017321001520183583, "loss": 2.5864, "step": 12848 }, { "epoch": 0.6015097431096027, "grad_norm": 1.34375, "learning_rate": 0.00017320603870533562, "loss": 2.8463, "step": 12849 }, { "epoch": 0.6015565568494353, "grad_norm": 1.7734375, "learning_rate": 0.00017320206195939005, "loss": 2.6558, "step": 12850 }, { "epoch": 0.6016033705892679, "grad_norm": 2.203125, "learning_rate": 0.00017319808496401272, "loss": 2.7455, "step": 12851 }, { "epoch": 0.6016501843291006, "grad_norm": 1.4609375, "learning_rate": 0.00017319410771921711, "loss": 2.5055, "step": 12852 }, { "epoch": 0.6016969980689333, "grad_norm": 1.34375, "learning_rate": 0.00017319013022501682, "loss": 2.6306, "step": 12853 }, { "epoch": 0.6017438118087659, "grad_norm": 1.1640625, "learning_rate": 0.00017318615248142539, "loss": 2.7242, "step": 12854 }, { "epoch": 0.6017906255485985, "grad_norm": 1.2421875, "learning_rate": 0.00017318217448845634, "loss": 2.7597, "step": 12855 }, { "epoch": 0.6018374392884311, "grad_norm": 1.2109375, "learning_rate": 0.00017317819624612332, "loss": 2.7038, "step": 12856 }, { "epoch": 0.6018842530282638, "grad_norm": 1.1484375, "learning_rate": 0.0001731742177544398, "loss": 2.4941, "step": 12857 }, { "epoch": 0.6019310667680965, "grad_norm": 1.2890625, "learning_rate": 0.00017317023901341935, "loss": 2.7119, "step": 12858 }, { "epoch": 0.6019778805079291, "grad_norm": 4.40625, "learning_rate": 0.00017316626002307554, "loss": 2.6419, "step": 12859 }, { "epoch": 0.6020246942477617, "grad_norm": 1.359375, "learning_rate": 0.00017316228078342196, "loss": 3.0388, "step": 12860 }, { "epoch": 0.6020715079875943, "grad_norm": 1.046875, "learning_rate": 0.0001731583012944721, "loss": 2.9911, "step": 12861 }, { "epoch": 0.602118321727427, "grad_norm": 1.7734375, "learning_rate": 0.00017315432155623958, "loss": 2.7046, "step": 12862 }, { "epoch": 0.6021651354672597, "grad_norm": 1.78125, "learning_rate": 0.00017315034156873796, "loss": 2.6989, "step": 12863 }, { "epoch": 0.6022119492070923, "grad_norm": 1.6484375, "learning_rate": 0.00017314636133198072, "loss": 2.8656, "step": 12864 }, { "epoch": 0.6022587629469249, "grad_norm": 1.5859375, "learning_rate": 0.00017314238084598153, "loss": 2.8805, "step": 12865 }, { "epoch": 0.6023055766867575, "grad_norm": 1.28125, "learning_rate": 0.0001731384001107539, "loss": 2.8956, "step": 12866 }, { "epoch": 0.6023523904265902, "grad_norm": 1.328125, "learning_rate": 0.00017313441912631137, "loss": 2.0334, "step": 12867 }, { "epoch": 0.6023992041664229, "grad_norm": 1.8046875, "learning_rate": 0.00017313043789266757, "loss": 2.6783, "step": 12868 }, { "epoch": 0.6024460179062555, "grad_norm": 1.171875, "learning_rate": 0.00017312645640983601, "loss": 2.3666, "step": 12869 }, { "epoch": 0.6024928316460881, "grad_norm": 1.6953125, "learning_rate": 0.00017312247467783027, "loss": 2.4364, "step": 12870 }, { "epoch": 0.6025396453859208, "grad_norm": 1.28125, "learning_rate": 0.00017311849269666395, "loss": 2.8374, "step": 12871 }, { "epoch": 0.6025864591257534, "grad_norm": 2.015625, "learning_rate": 0.00017311451046635057, "loss": 2.608, "step": 12872 }, { "epoch": 0.6026332728655861, "grad_norm": 1.40625, "learning_rate": 0.00017311052798690374, "loss": 2.5795, "step": 12873 }, { "epoch": 0.6026800866054187, "grad_norm": 1.3671875, "learning_rate": 0.00017310654525833698, "loss": 3.0867, "step": 12874 }, { "epoch": 0.6027269003452513, "grad_norm": 2.53125, "learning_rate": 0.0001731025622806639, "loss": 3.6164, "step": 12875 }, { "epoch": 0.602773714085084, "grad_norm": 1.8828125, "learning_rate": 0.0001730985790538981, "loss": 3.1894, "step": 12876 }, { "epoch": 0.6028205278249166, "grad_norm": 1.375, "learning_rate": 0.0001730945955780531, "loss": 2.6548, "step": 12877 }, { "epoch": 0.6028673415647493, "grad_norm": 1.2109375, "learning_rate": 0.0001730906118531425, "loss": 2.841, "step": 12878 }, { "epoch": 0.6029141553045819, "grad_norm": 1.8203125, "learning_rate": 0.00017308662787917983, "loss": 1.9458, "step": 12879 }, { "epoch": 0.6029609690444145, "grad_norm": 1.3671875, "learning_rate": 0.00017308264365617872, "loss": 2.5877, "step": 12880 }, { "epoch": 0.6030077827842472, "grad_norm": 1.875, "learning_rate": 0.00017307865918415272, "loss": 2.4993, "step": 12881 }, { "epoch": 0.6030545965240798, "grad_norm": 1.265625, "learning_rate": 0.0001730746744631154, "loss": 2.3218, "step": 12882 }, { "epoch": 0.6031014102639125, "grad_norm": 1.5546875, "learning_rate": 0.00017307068949308038, "loss": 2.4991, "step": 12883 }, { "epoch": 0.6031482240037451, "grad_norm": 1.3359375, "learning_rate": 0.0001730667042740612, "loss": 3.0897, "step": 12884 }, { "epoch": 0.6031950377435777, "grad_norm": 2.203125, "learning_rate": 0.00017306271880607145, "loss": 2.2598, "step": 12885 }, { "epoch": 0.6032418514834104, "grad_norm": 1.2578125, "learning_rate": 0.00017305873308912472, "loss": 2.7146, "step": 12886 }, { "epoch": 0.603288665223243, "grad_norm": 1.484375, "learning_rate": 0.00017305474712323458, "loss": 2.6376, "step": 12887 }, { "epoch": 0.6033354789630757, "grad_norm": 1.890625, "learning_rate": 0.00017305076090841457, "loss": 3.1906, "step": 12888 }, { "epoch": 0.6033822927029083, "grad_norm": 1.3359375, "learning_rate": 0.00017304677444467833, "loss": 2.8396, "step": 12889 }, { "epoch": 0.603429106442741, "grad_norm": 1.1875, "learning_rate": 0.00017304278773203947, "loss": 2.4421, "step": 12890 }, { "epoch": 0.6034759201825736, "grad_norm": 1.125, "learning_rate": 0.00017303880077051148, "loss": 2.5506, "step": 12891 }, { "epoch": 0.6035227339224062, "grad_norm": 1.625, "learning_rate": 0.00017303481356010803, "loss": 2.8549, "step": 12892 }, { "epoch": 0.6035695476622389, "grad_norm": 1.8359375, "learning_rate": 0.00017303082610084267, "loss": 2.58, "step": 12893 }, { "epoch": 0.6036163614020715, "grad_norm": 2.671875, "learning_rate": 0.00017302683839272897, "loss": 2.3346, "step": 12894 }, { "epoch": 0.6036631751419042, "grad_norm": 1.3828125, "learning_rate": 0.00017302285043578058, "loss": 2.6748, "step": 12895 }, { "epoch": 0.6037099888817368, "grad_norm": 1.8359375, "learning_rate": 0.00017301886223001104, "loss": 2.3536, "step": 12896 }, { "epoch": 0.6037568026215694, "grad_norm": 1.4609375, "learning_rate": 0.00017301487377543393, "loss": 2.5987, "step": 12897 }, { "epoch": 0.6038036163614021, "grad_norm": 1.3828125, "learning_rate": 0.00017301088507206287, "loss": 2.7289, "step": 12898 }, { "epoch": 0.6038504301012347, "grad_norm": 1.4296875, "learning_rate": 0.00017300689611991142, "loss": 2.5517, "step": 12899 }, { "epoch": 0.6038972438410674, "grad_norm": 1.5234375, "learning_rate": 0.0001730029069189932, "loss": 2.5606, "step": 12900 }, { "epoch": 0.6039440575809, "grad_norm": 1.2421875, "learning_rate": 0.0001729989174693218, "loss": 2.0516, "step": 12901 }, { "epoch": 0.6039908713207326, "grad_norm": 1.578125, "learning_rate": 0.0001729949277709108, "loss": 3.0584, "step": 12902 }, { "epoch": 0.6040376850605653, "grad_norm": 1.40625, "learning_rate": 0.0001729909378237738, "loss": 2.6306, "step": 12903 }, { "epoch": 0.6040844988003979, "grad_norm": 1.5625, "learning_rate": 0.0001729869476279244, "loss": 2.6048, "step": 12904 }, { "epoch": 0.6041313125402306, "grad_norm": 1.2265625, "learning_rate": 0.00017298295718337623, "loss": 3.2788, "step": 12905 }, { "epoch": 0.6041781262800632, "grad_norm": 1.4765625, "learning_rate": 0.0001729789664901428, "loss": 2.7585, "step": 12906 }, { "epoch": 0.6042249400198958, "grad_norm": 1.1875, "learning_rate": 0.0001729749755482378, "loss": 2.3048, "step": 12907 }, { "epoch": 0.6042717537597285, "grad_norm": 1.21875, "learning_rate": 0.00017297098435767476, "loss": 2.8554, "step": 12908 }, { "epoch": 0.6043185674995611, "grad_norm": 1.53125, "learning_rate": 0.00017296699291846732, "loss": 2.3389, "step": 12909 }, { "epoch": 0.6043653812393938, "grad_norm": 1.3515625, "learning_rate": 0.00017296300123062906, "loss": 2.5064, "step": 12910 }, { "epoch": 0.6044121949792264, "grad_norm": 1.6015625, "learning_rate": 0.00017295900929417362, "loss": 2.5962, "step": 12911 }, { "epoch": 0.604459008719059, "grad_norm": 1.703125, "learning_rate": 0.00017295501710911455, "loss": 3.0001, "step": 12912 }, { "epoch": 0.6045058224588917, "grad_norm": 1.71875, "learning_rate": 0.0001729510246754655, "loss": 2.7413, "step": 12913 }, { "epoch": 0.6045526361987243, "grad_norm": 1.734375, "learning_rate": 0.00017294703199324002, "loss": 2.6204, "step": 12914 }, { "epoch": 0.604599449938557, "grad_norm": 1.1875, "learning_rate": 0.00017294303906245176, "loss": 2.9924, "step": 12915 }, { "epoch": 0.6046462636783896, "grad_norm": 1.3046875, "learning_rate": 0.0001729390458831143, "loss": 2.4999, "step": 12916 }, { "epoch": 0.6046930774182222, "grad_norm": 1.84375, "learning_rate": 0.00017293505245524125, "loss": 2.7347, "step": 12917 }, { "epoch": 0.6047398911580549, "grad_norm": 1.8359375, "learning_rate": 0.00017293105877884624, "loss": 2.4416, "step": 12918 }, { "epoch": 0.6047867048978876, "grad_norm": 1.40625, "learning_rate": 0.00017292706485394285, "loss": 2.9484, "step": 12919 }, { "epoch": 0.6048335186377202, "grad_norm": 1.3359375, "learning_rate": 0.00017292307068054473, "loss": 2.9812, "step": 12920 }, { "epoch": 0.6048803323775528, "grad_norm": 1.4375, "learning_rate": 0.00017291907625866543, "loss": 4.3785, "step": 12921 }, { "epoch": 0.6049271461173854, "grad_norm": 1.125, "learning_rate": 0.00017291508158831861, "loss": 2.5405, "step": 12922 }, { "epoch": 0.6049739598572181, "grad_norm": 1.234375, "learning_rate": 0.00017291108666951787, "loss": 2.5581, "step": 12923 }, { "epoch": 0.6050207735970508, "grad_norm": 1.2578125, "learning_rate": 0.0001729070915022768, "loss": 3.2739, "step": 12924 }, { "epoch": 0.6050675873368834, "grad_norm": 2.4375, "learning_rate": 0.00017290309608660904, "loss": 3.1828, "step": 12925 }, { "epoch": 0.605114401076716, "grad_norm": 1.2109375, "learning_rate": 0.0001728991004225282, "loss": 2.782, "step": 12926 }, { "epoch": 0.6051612148165486, "grad_norm": 1.203125, "learning_rate": 0.0001728951045100479, "loss": 2.7615, "step": 12927 }, { "epoch": 0.6052080285563813, "grad_norm": 1.3125, "learning_rate": 0.00017289110834918173, "loss": 2.4304, "step": 12928 }, { "epoch": 0.605254842296214, "grad_norm": 1.609375, "learning_rate": 0.0001728871119399433, "loss": 2.8636, "step": 12929 }, { "epoch": 0.6053016560360466, "grad_norm": 1.640625, "learning_rate": 0.00017288311528234626, "loss": 2.9176, "step": 12930 }, { "epoch": 0.6053484697758792, "grad_norm": 2.046875, "learning_rate": 0.00017287911837640422, "loss": 2.4451, "step": 12931 }, { "epoch": 0.6053952835157118, "grad_norm": 1.546875, "learning_rate": 0.00017287512122213082, "loss": 2.838, "step": 12932 }, { "epoch": 0.6054420972555445, "grad_norm": 1.59375, "learning_rate": 0.00017287112381953962, "loss": 2.8182, "step": 12933 }, { "epoch": 0.6054889109953772, "grad_norm": 2.015625, "learning_rate": 0.00017286712616864432, "loss": 2.28, "step": 12934 }, { "epoch": 0.6055357247352098, "grad_norm": 1.3671875, "learning_rate": 0.00017286312826945847, "loss": 2.6701, "step": 12935 }, { "epoch": 0.6055825384750424, "grad_norm": 1.4921875, "learning_rate": 0.00017285913012199573, "loss": 2.6761, "step": 12936 }, { "epoch": 0.605629352214875, "grad_norm": 1.609375, "learning_rate": 0.0001728551317262697, "loss": 2.4187, "step": 12937 }, { "epoch": 0.6056761659547077, "grad_norm": 1.1953125, "learning_rate": 0.00017285113308229403, "loss": 2.5625, "step": 12938 }, { "epoch": 0.6057229796945404, "grad_norm": 1.109375, "learning_rate": 0.00017284713419008234, "loss": 2.6996, "step": 12939 }, { "epoch": 0.605769793434373, "grad_norm": 1.2734375, "learning_rate": 0.00017284313504964827, "loss": 2.6221, "step": 12940 }, { "epoch": 0.6058166071742056, "grad_norm": 1.4453125, "learning_rate": 0.00017283913566100537, "loss": 2.5058, "step": 12941 }, { "epoch": 0.6058634209140382, "grad_norm": 1.7578125, "learning_rate": 0.00017283513602416736, "loss": 2.9053, "step": 12942 }, { "epoch": 0.605910234653871, "grad_norm": 1.8046875, "learning_rate": 0.00017283113613914786, "loss": 2.8928, "step": 12943 }, { "epoch": 0.6059570483937036, "grad_norm": 1.265625, "learning_rate": 0.00017282713600596044, "loss": 2.6909, "step": 12944 }, { "epoch": 0.6060038621335362, "grad_norm": 1.4453125, "learning_rate": 0.00017282313562461878, "loss": 2.7465, "step": 12945 }, { "epoch": 0.6060506758733688, "grad_norm": 1.5234375, "learning_rate": 0.00017281913499513646, "loss": 3.1003, "step": 12946 }, { "epoch": 0.6060974896132014, "grad_norm": 1.1953125, "learning_rate": 0.00017281513411752722, "loss": 2.4475, "step": 12947 }, { "epoch": 0.6061443033530342, "grad_norm": 1.2890625, "learning_rate": 0.00017281113299180456, "loss": 2.5034, "step": 12948 }, { "epoch": 0.6061911170928668, "grad_norm": 1.8203125, "learning_rate": 0.0001728071316179822, "loss": 3.0455, "step": 12949 }, { "epoch": 0.6062379308326994, "grad_norm": 1.765625, "learning_rate": 0.0001728031299960737, "loss": 2.8775, "step": 12950 }, { "epoch": 0.606284744572532, "grad_norm": 1.140625, "learning_rate": 0.00017279912812609278, "loss": 1.7528, "step": 12951 }, { "epoch": 0.6063315583123646, "grad_norm": 1.53125, "learning_rate": 0.00017279512600805306, "loss": 2.6318, "step": 12952 }, { "epoch": 0.6063783720521974, "grad_norm": 1.6640625, "learning_rate": 0.00017279112364196813, "loss": 2.5618, "step": 12953 }, { "epoch": 0.60642518579203, "grad_norm": 1.375, "learning_rate": 0.00017278712102785165, "loss": 2.6705, "step": 12954 }, { "epoch": 0.6064719995318626, "grad_norm": 1.4140625, "learning_rate": 0.00017278311816571726, "loss": 2.9951, "step": 12955 }, { "epoch": 0.6065188132716952, "grad_norm": 1.6640625, "learning_rate": 0.0001727791150555786, "loss": 2.4039, "step": 12956 }, { "epoch": 0.6065656270115278, "grad_norm": 1.2890625, "learning_rate": 0.00017277511169744934, "loss": 2.7349, "step": 12957 }, { "epoch": 0.6066124407513606, "grad_norm": 1.5078125, "learning_rate": 0.00017277110809134306, "loss": 2.5457, "step": 12958 }, { "epoch": 0.6066592544911932, "grad_norm": 1.4609375, "learning_rate": 0.00017276710423727346, "loss": 2.88, "step": 12959 }, { "epoch": 0.6067060682310258, "grad_norm": 1.734375, "learning_rate": 0.00017276310013525413, "loss": 2.7871, "step": 12960 }, { "epoch": 0.6067528819708584, "grad_norm": 1.375, "learning_rate": 0.00017275909578529878, "loss": 2.7422, "step": 12961 }, { "epoch": 0.606799695710691, "grad_norm": 1.4140625, "learning_rate": 0.000172755091187421, "loss": 2.9136, "step": 12962 }, { "epoch": 0.6068465094505238, "grad_norm": 1.4765625, "learning_rate": 0.00017275108634163446, "loss": 2.6118, "step": 12963 }, { "epoch": 0.6068933231903564, "grad_norm": 1.2265625, "learning_rate": 0.00017274708124795281, "loss": 2.7103, "step": 12964 }, { "epoch": 0.606940136930189, "grad_norm": 1.84375, "learning_rate": 0.00017274307590638962, "loss": 2.701, "step": 12965 }, { "epoch": 0.6069869506700216, "grad_norm": 1.5703125, "learning_rate": 0.00017273907031695868, "loss": 2.8754, "step": 12966 }, { "epoch": 0.6070337644098542, "grad_norm": 1.5078125, "learning_rate": 0.0001727350644796735, "loss": 2.7937, "step": 12967 }, { "epoch": 0.607080578149687, "grad_norm": 1.6875, "learning_rate": 0.00017273105839454785, "loss": 3.1755, "step": 12968 }, { "epoch": 0.6071273918895196, "grad_norm": 1.8203125, "learning_rate": 0.00017272705206159528, "loss": 2.7117, "step": 12969 }, { "epoch": 0.6071742056293522, "grad_norm": 1.7421875, "learning_rate": 0.0001727230454808295, "loss": 2.5846, "step": 12970 }, { "epoch": 0.6072210193691848, "grad_norm": 1.390625, "learning_rate": 0.00017271903865226417, "loss": 2.9082, "step": 12971 }, { "epoch": 0.6072678331090176, "grad_norm": 1.34375, "learning_rate": 0.00017271503157591288, "loss": 2.9641, "step": 12972 }, { "epoch": 0.6073146468488502, "grad_norm": 1.59375, "learning_rate": 0.00017271102425178936, "loss": 2.8758, "step": 12973 }, { "epoch": 0.6073614605886828, "grad_norm": 1.6484375, "learning_rate": 0.00017270701667990718, "loss": 2.6777, "step": 12974 }, { "epoch": 0.6074082743285154, "grad_norm": 1.53125, "learning_rate": 0.00017270300886028007, "loss": 2.1962, "step": 12975 }, { "epoch": 0.607455088068348, "grad_norm": 1.53125, "learning_rate": 0.00017269900079292167, "loss": 2.8044, "step": 12976 }, { "epoch": 0.6075019018081808, "grad_norm": 1.15625, "learning_rate": 0.0001726949924778456, "loss": 2.7502, "step": 12977 }, { "epoch": 0.6075487155480134, "grad_norm": 1.9375, "learning_rate": 0.00017269098391506558, "loss": 2.9228, "step": 12978 }, { "epoch": 0.607595529287846, "grad_norm": 1.2890625, "learning_rate": 0.0001726869751045952, "loss": 2.4133, "step": 12979 }, { "epoch": 0.6076423430276786, "grad_norm": 1.609375, "learning_rate": 0.00017268296604644817, "loss": 2.7831, "step": 12980 }, { "epoch": 0.6076891567675112, "grad_norm": 2.03125, "learning_rate": 0.00017267895674063813, "loss": 2.5823, "step": 12981 }, { "epoch": 0.607735970507344, "grad_norm": 1.5078125, "learning_rate": 0.00017267494718717878, "loss": 2.589, "step": 12982 }, { "epoch": 0.6077827842471766, "grad_norm": 1.3671875, "learning_rate": 0.0001726709373860837, "loss": 2.5599, "step": 12983 }, { "epoch": 0.6078295979870092, "grad_norm": 1.2734375, "learning_rate": 0.00017266692733736662, "loss": 2.6097, "step": 12984 }, { "epoch": 0.6078764117268418, "grad_norm": 1.171875, "learning_rate": 0.00017266291704104116, "loss": 2.7689, "step": 12985 }, { "epoch": 0.6079232254666744, "grad_norm": 1.265625, "learning_rate": 0.00017265890649712106, "loss": 2.8731, "step": 12986 }, { "epoch": 0.6079700392065072, "grad_norm": 1.3125, "learning_rate": 0.0001726548957056199, "loss": 2.5622, "step": 12987 }, { "epoch": 0.6080168529463398, "grad_norm": 1.859375, "learning_rate": 0.0001726508846665514, "loss": 2.9912, "step": 12988 }, { "epoch": 0.6080636666861724, "grad_norm": 8.25, "learning_rate": 0.0001726468733799292, "loss": 3.8662, "step": 12989 }, { "epoch": 0.608110480426005, "grad_norm": 1.140625, "learning_rate": 0.00017264286184576699, "loss": 2.6736, "step": 12990 }, { "epoch": 0.6081572941658376, "grad_norm": 1.609375, "learning_rate": 0.0001726388500640784, "loss": 2.6668, "step": 12991 }, { "epoch": 0.6082041079056704, "grad_norm": 1.2734375, "learning_rate": 0.00017263483803487716, "loss": 2.4866, "step": 12992 }, { "epoch": 0.608250921645503, "grad_norm": 1.3125, "learning_rate": 0.00017263082575817688, "loss": 2.5198, "step": 12993 }, { "epoch": 0.6082977353853356, "grad_norm": 1.8984375, "learning_rate": 0.00017262681323399127, "loss": 2.5992, "step": 12994 }, { "epoch": 0.6083445491251682, "grad_norm": 1.5, "learning_rate": 0.00017262280046233405, "loss": 2.7716, "step": 12995 }, { "epoch": 0.6083913628650008, "grad_norm": 1.34375, "learning_rate": 0.00017261878744321876, "loss": 2.3415, "step": 12996 }, { "epoch": 0.6084381766048336, "grad_norm": 1.953125, "learning_rate": 0.00017261477417665914, "loss": 2.5964, "step": 12997 }, { "epoch": 0.6084849903446662, "grad_norm": 1.140625, "learning_rate": 0.00017261076066266894, "loss": 2.3333, "step": 12998 }, { "epoch": 0.6085318040844988, "grad_norm": 1.4921875, "learning_rate": 0.00017260674690126174, "loss": 2.758, "step": 12999 }, { "epoch": 0.6085786178243314, "grad_norm": 1.2890625, "learning_rate": 0.0001726027328924512, "loss": 2.6198, "step": 13000 }, { "epoch": 0.608625431564164, "grad_norm": 1.6171875, "learning_rate": 0.0001725987186362511, "loss": 3.2812, "step": 13001 }, { "epoch": 0.6086722453039968, "grad_norm": 1.3359375, "learning_rate": 0.00017259470413267505, "loss": 2.4082, "step": 13002 }, { "epoch": 0.6087190590438294, "grad_norm": 2.5625, "learning_rate": 0.00017259068938173675, "loss": 3.7212, "step": 13003 }, { "epoch": 0.608765872783662, "grad_norm": 1.734375, "learning_rate": 0.00017258667438344984, "loss": 2.2403, "step": 13004 }, { "epoch": 0.6088126865234946, "grad_norm": 1.515625, "learning_rate": 0.00017258265913782807, "loss": 2.6953, "step": 13005 }, { "epoch": 0.6088595002633272, "grad_norm": 1.296875, "learning_rate": 0.00017257864364488508, "loss": 2.531, "step": 13006 }, { "epoch": 0.60890631400316, "grad_norm": 1.546875, "learning_rate": 0.00017257462790463453, "loss": 2.8299, "step": 13007 }, { "epoch": 0.6089531277429926, "grad_norm": 1.3203125, "learning_rate": 0.00017257061191709014, "loss": 2.5949, "step": 13008 }, { "epoch": 0.6089999414828252, "grad_norm": 1.34375, "learning_rate": 0.0001725665956822656, "loss": 2.7678, "step": 13009 }, { "epoch": 0.6090467552226578, "grad_norm": 1.515625, "learning_rate": 0.00017256257920017455, "loss": 2.6518, "step": 13010 }, { "epoch": 0.6090935689624905, "grad_norm": 1.1640625, "learning_rate": 0.00017255856247083075, "loss": 2.2123, "step": 13011 }, { "epoch": 0.6091403827023232, "grad_norm": 1.4453125, "learning_rate": 0.0001725545454942478, "loss": 3.0259, "step": 13012 }, { "epoch": 0.6091871964421558, "grad_norm": 1.5546875, "learning_rate": 0.00017255052827043948, "loss": 2.7442, "step": 13013 }, { "epoch": 0.6092340101819884, "grad_norm": 2.71875, "learning_rate": 0.0001725465107994194, "loss": 2.5564, "step": 13014 }, { "epoch": 0.609280823921821, "grad_norm": 1.609375, "learning_rate": 0.00017254249308120128, "loss": 2.9242, "step": 13015 }, { "epoch": 0.6093276376616537, "grad_norm": 1.3515625, "learning_rate": 0.0001725384751157988, "loss": 2.942, "step": 13016 }, { "epoch": 0.6093744514014864, "grad_norm": 1.3515625, "learning_rate": 0.00017253445690322563, "loss": 2.8849, "step": 13017 }, { "epoch": 0.609421265141319, "grad_norm": 1.2890625, "learning_rate": 0.00017253043844349554, "loss": 2.9008, "step": 13018 }, { "epoch": 0.6094680788811516, "grad_norm": 1.2421875, "learning_rate": 0.00017252641973662216, "loss": 2.5892, "step": 13019 }, { "epoch": 0.6095148926209842, "grad_norm": 1.265625, "learning_rate": 0.00017252240078261918, "loss": 2.6151, "step": 13020 }, { "epoch": 0.6095617063608169, "grad_norm": 1.5859375, "learning_rate": 0.00017251838158150034, "loss": 2.3755, "step": 13021 }, { "epoch": 0.6096085201006496, "grad_norm": 1.75, "learning_rate": 0.0001725143621332793, "loss": 2.7513, "step": 13022 }, { "epoch": 0.6096553338404822, "grad_norm": 2.90625, "learning_rate": 0.00017251034243796973, "loss": 2.3736, "step": 13023 }, { "epoch": 0.6097021475803148, "grad_norm": 1.6171875, "learning_rate": 0.00017250632249558542, "loss": 2.1301, "step": 13024 }, { "epoch": 0.6097489613201474, "grad_norm": 1.3359375, "learning_rate": 0.00017250230230613994, "loss": 2.7715, "step": 13025 }, { "epoch": 0.6097957750599801, "grad_norm": 1.3515625, "learning_rate": 0.0001724982818696471, "loss": 2.4135, "step": 13026 }, { "epoch": 0.6098425887998128, "grad_norm": 1.421875, "learning_rate": 0.00017249426118612053, "loss": 2.7625, "step": 13027 }, { "epoch": 0.6098894025396454, "grad_norm": 1.9140625, "learning_rate": 0.00017249024025557398, "loss": 2.5549, "step": 13028 }, { "epoch": 0.609936216279478, "grad_norm": 1.6796875, "learning_rate": 0.00017248621907802112, "loss": 3.0156, "step": 13029 }, { "epoch": 0.6099830300193106, "grad_norm": 1.234375, "learning_rate": 0.00017248219765347562, "loss": 2.2629, "step": 13030 }, { "epoch": 0.6100298437591433, "grad_norm": 1.3984375, "learning_rate": 0.00017247817598195126, "loss": 2.6251, "step": 13031 }, { "epoch": 0.610076657498976, "grad_norm": 1.6796875, "learning_rate": 0.0001724741540634617, "loss": 3.2583, "step": 13032 }, { "epoch": 0.6101234712388086, "grad_norm": 1.421875, "learning_rate": 0.00017247013189802064, "loss": 2.608, "step": 13033 }, { "epoch": 0.6101702849786412, "grad_norm": 1.5859375, "learning_rate": 0.00017246610948564182, "loss": 2.8404, "step": 13034 }, { "epoch": 0.6102170987184738, "grad_norm": 1.3125, "learning_rate": 0.0001724620868263389, "loss": 2.5521, "step": 13035 }, { "epoch": 0.6102639124583065, "grad_norm": 1.859375, "learning_rate": 0.0001724580639201256, "loss": 2.494, "step": 13036 }, { "epoch": 0.6103107261981392, "grad_norm": 2.171875, "learning_rate": 0.00017245404076701566, "loss": 3.0126, "step": 13037 }, { "epoch": 0.6103575399379718, "grad_norm": 2.484375, "learning_rate": 0.00017245001736702274, "loss": 2.8961, "step": 13038 }, { "epoch": 0.6104043536778044, "grad_norm": 1.4375, "learning_rate": 0.00017244599372016057, "loss": 2.7809, "step": 13039 }, { "epoch": 0.610451167417637, "grad_norm": 1.4453125, "learning_rate": 0.00017244196982644288, "loss": 2.7371, "step": 13040 }, { "epoch": 0.6104979811574697, "grad_norm": 1.34375, "learning_rate": 0.0001724379456858834, "loss": 3.0147, "step": 13041 }, { "epoch": 0.6105447948973024, "grad_norm": 1.2265625, "learning_rate": 0.00017243392129849574, "loss": 3.1644, "step": 13042 }, { "epoch": 0.610591608637135, "grad_norm": 1.5078125, "learning_rate": 0.0001724298966642937, "loss": 2.8037, "step": 13043 }, { "epoch": 0.6106384223769676, "grad_norm": 1.359375, "learning_rate": 0.00017242587178329099, "loss": 2.9126, "step": 13044 }, { "epoch": 0.6106852361168003, "grad_norm": 1.515625, "learning_rate": 0.0001724218466555013, "loss": 2.4314, "step": 13045 }, { "epoch": 0.6107320498566329, "grad_norm": 1.796875, "learning_rate": 0.00017241782128093834, "loss": 2.4335, "step": 13046 }, { "epoch": 0.6107788635964656, "grad_norm": 1.34375, "learning_rate": 0.00017241379565961586, "loss": 2.7148, "step": 13047 }, { "epoch": 0.6108256773362982, "grad_norm": 1.234375, "learning_rate": 0.00017240976979154756, "loss": 2.8405, "step": 13048 }, { "epoch": 0.6108724910761308, "grad_norm": 1.515625, "learning_rate": 0.00017240574367674715, "loss": 2.3654, "step": 13049 }, { "epoch": 0.6109193048159635, "grad_norm": 1.2734375, "learning_rate": 0.00017240171731522835, "loss": 2.4883, "step": 13050 }, { "epoch": 0.6109661185557961, "grad_norm": 1.0234375, "learning_rate": 0.00017239769070700488, "loss": 3.0478, "step": 13051 }, { "epoch": 0.6110129322956288, "grad_norm": 1.3203125, "learning_rate": 0.00017239366385209047, "loss": 2.2764, "step": 13052 }, { "epoch": 0.6110597460354614, "grad_norm": 1.5234375, "learning_rate": 0.00017238963675049885, "loss": 2.6615, "step": 13053 }, { "epoch": 0.611106559775294, "grad_norm": 1.2421875, "learning_rate": 0.0001723856094022437, "loss": 2.5688, "step": 13054 }, { "epoch": 0.6111533735151267, "grad_norm": 1.3125, "learning_rate": 0.00017238158180733879, "loss": 2.6378, "step": 13055 }, { "epoch": 0.6112001872549593, "grad_norm": 1.390625, "learning_rate": 0.0001723775539657978, "loss": 2.7323, "step": 13056 }, { "epoch": 0.611247000994792, "grad_norm": 1.4375, "learning_rate": 0.0001723735258776345, "loss": 2.6662, "step": 13057 }, { "epoch": 0.6112938147346246, "grad_norm": 1.890625, "learning_rate": 0.00017236949754286258, "loss": 2.4864, "step": 13058 }, { "epoch": 0.6113406284744572, "grad_norm": 2.015625, "learning_rate": 0.00017236546896149578, "loss": 3.228, "step": 13059 }, { "epoch": 0.6113874422142899, "grad_norm": 1.234375, "learning_rate": 0.00017236144013354782, "loss": 2.726, "step": 13060 }, { "epoch": 0.6114342559541225, "grad_norm": 1.5625, "learning_rate": 0.00017235741105903248, "loss": 2.7708, "step": 13061 }, { "epoch": 0.6114810696939552, "grad_norm": 1.3359375, "learning_rate": 0.0001723533817379634, "loss": 2.6543, "step": 13062 }, { "epoch": 0.6115278834337878, "grad_norm": 1.6640625, "learning_rate": 0.00017234935217035437, "loss": 3.1647, "step": 13063 }, { "epoch": 0.6115746971736205, "grad_norm": 1.4765625, "learning_rate": 0.00017234532235621913, "loss": 2.5033, "step": 13064 }, { "epoch": 0.6116215109134531, "grad_norm": 1.390625, "learning_rate": 0.00017234129229557136, "loss": 2.9397, "step": 13065 }, { "epoch": 0.6116683246532857, "grad_norm": 1.90625, "learning_rate": 0.0001723372619884248, "loss": 2.9226, "step": 13066 }, { "epoch": 0.6117151383931184, "grad_norm": 1.2578125, "learning_rate": 0.0001723332314347932, "loss": 2.642, "step": 13067 }, { "epoch": 0.611761952132951, "grad_norm": 1.6328125, "learning_rate": 0.00017232920063469032, "loss": 2.9718, "step": 13068 }, { "epoch": 0.6118087658727837, "grad_norm": 1.453125, "learning_rate": 0.00017232516958812986, "loss": 3.2579, "step": 13069 }, { "epoch": 0.6118555796126163, "grad_norm": 1.3984375, "learning_rate": 0.00017232113829512556, "loss": 2.8519, "step": 13070 }, { "epoch": 0.6119023933524489, "grad_norm": 1.1875, "learning_rate": 0.00017231710675569118, "loss": 1.9918, "step": 13071 }, { "epoch": 0.6119492070922816, "grad_norm": 1.640625, "learning_rate": 0.00017231307496984042, "loss": 2.6024, "step": 13072 }, { "epoch": 0.6119960208321142, "grad_norm": 1.7578125, "learning_rate": 0.00017230904293758705, "loss": 3.1192, "step": 13073 }, { "epoch": 0.6120428345719469, "grad_norm": 1.3125, "learning_rate": 0.00017230501065894477, "loss": 2.2121, "step": 13074 }, { "epoch": 0.6120896483117795, "grad_norm": 1.3984375, "learning_rate": 0.00017230097813392738, "loss": 2.3331, "step": 13075 }, { "epoch": 0.6121364620516121, "grad_norm": 1.2578125, "learning_rate": 0.00017229694536254856, "loss": 2.404, "step": 13076 }, { "epoch": 0.6121832757914448, "grad_norm": 1.65625, "learning_rate": 0.0001722929123448221, "loss": 3.2697, "step": 13077 }, { "epoch": 0.6122300895312774, "grad_norm": 1.7734375, "learning_rate": 0.0001722888790807617, "loss": 2.7696, "step": 13078 }, { "epoch": 0.6122769032711101, "grad_norm": 1.640625, "learning_rate": 0.00017228484557038114, "loss": 2.6002, "step": 13079 }, { "epoch": 0.6123237170109427, "grad_norm": 1.3515625, "learning_rate": 0.00017228081181369412, "loss": 2.9922, "step": 13080 }, { "epoch": 0.6123705307507753, "grad_norm": 1.296875, "learning_rate": 0.00017227677781071445, "loss": 2.6879, "step": 13081 }, { "epoch": 0.612417344490608, "grad_norm": 1.2890625, "learning_rate": 0.00017227274356145582, "loss": 2.8525, "step": 13082 }, { "epoch": 0.6124641582304406, "grad_norm": 1.4296875, "learning_rate": 0.000172268709065932, "loss": 2.7822, "step": 13083 }, { "epoch": 0.6125109719702733, "grad_norm": 1.453125, "learning_rate": 0.00017226467432415672, "loss": 2.5422, "step": 13084 }, { "epoch": 0.6125577857101059, "grad_norm": 1.296875, "learning_rate": 0.00017226063933614374, "loss": 2.8022, "step": 13085 }, { "epoch": 0.6126045994499385, "grad_norm": 1.1640625, "learning_rate": 0.00017225660410190682, "loss": 2.7353, "step": 13086 }, { "epoch": 0.6126514131897712, "grad_norm": 2.140625, "learning_rate": 0.0001722525686214597, "loss": 2.9338, "step": 13087 }, { "epoch": 0.6126982269296039, "grad_norm": 1.828125, "learning_rate": 0.00017224853289481612, "loss": 2.6254, "step": 13088 }, { "epoch": 0.6127450406694365, "grad_norm": 1.359375, "learning_rate": 0.0001722444969219898, "loss": 2.6992, "step": 13089 }, { "epoch": 0.6127918544092691, "grad_norm": 1.1796875, "learning_rate": 0.0001722404607029946, "loss": 2.8545, "step": 13090 }, { "epoch": 0.6128386681491017, "grad_norm": 1.390625, "learning_rate": 0.00017223642423784416, "loss": 2.6003, "step": 13091 }, { "epoch": 0.6128854818889344, "grad_norm": 1.4921875, "learning_rate": 0.0001722323875265523, "loss": 2.9624, "step": 13092 }, { "epoch": 0.612932295628767, "grad_norm": 1.375, "learning_rate": 0.00017222835056913277, "loss": 2.4214, "step": 13093 }, { "epoch": 0.6129791093685997, "grad_norm": 1.21875, "learning_rate": 0.00017222431336559927, "loss": 2.6358, "step": 13094 }, { "epoch": 0.6130259231084323, "grad_norm": 1.125, "learning_rate": 0.00017222027591596564, "loss": 2.8853, "step": 13095 }, { "epoch": 0.613072736848265, "grad_norm": 1.984375, "learning_rate": 0.00017221623822024555, "loss": 3.0206, "step": 13096 }, { "epoch": 0.6131195505880976, "grad_norm": 1.1640625, "learning_rate": 0.00017221220027845285, "loss": 2.5775, "step": 13097 }, { "epoch": 0.6131663643279303, "grad_norm": 1.59375, "learning_rate": 0.00017220816209060122, "loss": 2.2765, "step": 13098 }, { "epoch": 0.6132131780677629, "grad_norm": 1.4296875, "learning_rate": 0.00017220412365670447, "loss": 3.1557, "step": 13099 }, { "epoch": 0.6132599918075955, "grad_norm": 1.140625, "learning_rate": 0.00017220008497677633, "loss": 2.5553, "step": 13100 }, { "epoch": 0.6133068055474282, "grad_norm": 1.6953125, "learning_rate": 0.00017219604605083055, "loss": 2.9273, "step": 13101 }, { "epoch": 0.6133536192872608, "grad_norm": 1.4921875, "learning_rate": 0.00017219200687888095, "loss": 2.8526, "step": 13102 }, { "epoch": 0.6134004330270935, "grad_norm": 1.140625, "learning_rate": 0.00017218796746094124, "loss": 2.4605, "step": 13103 }, { "epoch": 0.6134472467669261, "grad_norm": 1.5390625, "learning_rate": 0.00017218392779702523, "loss": 2.6988, "step": 13104 }, { "epoch": 0.6134940605067587, "grad_norm": 1.75, "learning_rate": 0.00017217988788714665, "loss": 2.6063, "step": 13105 }, { "epoch": 0.6135408742465914, "grad_norm": 1.0703125, "learning_rate": 0.00017217584773131925, "loss": 2.7065, "step": 13106 }, { "epoch": 0.613587687986424, "grad_norm": 1.4765625, "learning_rate": 0.00017217180732955685, "loss": 2.8554, "step": 13107 }, { "epoch": 0.6136345017262567, "grad_norm": 1.2734375, "learning_rate": 0.00017216776668187316, "loss": 2.97, "step": 13108 }, { "epoch": 0.6136813154660893, "grad_norm": 2.203125, "learning_rate": 0.00017216372578828198, "loss": 2.8463, "step": 13109 }, { "epoch": 0.6137281292059219, "grad_norm": 1.3984375, "learning_rate": 0.00017215968464879712, "loss": 2.7132, "step": 13110 }, { "epoch": 0.6137749429457546, "grad_norm": 1.3203125, "learning_rate": 0.00017215564326343227, "loss": 2.9853, "step": 13111 }, { "epoch": 0.6138217566855872, "grad_norm": 1.515625, "learning_rate": 0.00017215160163220122, "loss": 2.9124, "step": 13112 }, { "epoch": 0.6138685704254199, "grad_norm": 1.6796875, "learning_rate": 0.0001721475597551178, "loss": 2.9426, "step": 13113 }, { "epoch": 0.6139153841652525, "grad_norm": 1.7265625, "learning_rate": 0.00017214351763219571, "loss": 2.78, "step": 13114 }, { "epoch": 0.6139621979050851, "grad_norm": 1.4140625, "learning_rate": 0.00017213947526344877, "loss": 3.0857, "step": 13115 }, { "epoch": 0.6140090116449178, "grad_norm": 2.640625, "learning_rate": 0.00017213543264889074, "loss": 2.5026, "step": 13116 }, { "epoch": 0.6140558253847505, "grad_norm": 1.328125, "learning_rate": 0.00017213138978853536, "loss": 2.6562, "step": 13117 }, { "epoch": 0.6141026391245831, "grad_norm": 1.265625, "learning_rate": 0.00017212734668239647, "loss": 2.8778, "step": 13118 }, { "epoch": 0.6141494528644157, "grad_norm": 1.3203125, "learning_rate": 0.0001721233033304878, "loss": 2.501, "step": 13119 }, { "epoch": 0.6141962666042483, "grad_norm": 1.40625, "learning_rate": 0.00017211925973282318, "loss": 2.6013, "step": 13120 }, { "epoch": 0.614243080344081, "grad_norm": 1.2421875, "learning_rate": 0.00017211521588941632, "loss": 2.9087, "step": 13121 }, { "epoch": 0.6142898940839137, "grad_norm": 1.6171875, "learning_rate": 0.00017211117180028103, "loss": 2.7589, "step": 13122 }, { "epoch": 0.6143367078237463, "grad_norm": 1.40625, "learning_rate": 0.00017210712746543107, "loss": 2.8801, "step": 13123 }, { "epoch": 0.6143835215635789, "grad_norm": 1.40625, "learning_rate": 0.00017210308288488028, "loss": 2.6993, "step": 13124 }, { "epoch": 0.6144303353034115, "grad_norm": 1.34375, "learning_rate": 0.00017209903805864239, "loss": 2.7775, "step": 13125 }, { "epoch": 0.6144771490432442, "grad_norm": 1.4453125, "learning_rate": 0.00017209499298673117, "loss": 2.439, "step": 13126 }, { "epoch": 0.6145239627830769, "grad_norm": 1.4140625, "learning_rate": 0.00017209094766916047, "loss": 2.6112, "step": 13127 }, { "epoch": 0.6145707765229095, "grad_norm": 1.3203125, "learning_rate": 0.00017208690210594401, "loss": 2.7425, "step": 13128 }, { "epoch": 0.6146175902627421, "grad_norm": 1.21875, "learning_rate": 0.00017208285629709556, "loss": 2.5295, "step": 13129 }, { "epoch": 0.6146644040025747, "grad_norm": 1.3515625, "learning_rate": 0.00017207881024262903, "loss": 2.6113, "step": 13130 }, { "epoch": 0.6147112177424074, "grad_norm": 1.234375, "learning_rate": 0.00017207476394255806, "loss": 3.0089, "step": 13131 }, { "epoch": 0.6147580314822401, "grad_norm": 4.3125, "learning_rate": 0.0001720707173968965, "loss": 2.6653, "step": 13132 }, { "epoch": 0.6148048452220727, "grad_norm": 1.2578125, "learning_rate": 0.00017206667060565815, "loss": 2.9135, "step": 13133 }, { "epoch": 0.6148516589619053, "grad_norm": 1.5, "learning_rate": 0.0001720626235688568, "loss": 2.5674, "step": 13134 }, { "epoch": 0.6148984727017379, "grad_norm": 1.3359375, "learning_rate": 0.00017205857628650623, "loss": 2.4601, "step": 13135 }, { "epoch": 0.6149452864415706, "grad_norm": 2.328125, "learning_rate": 0.00017205452875862018, "loss": 2.4005, "step": 13136 }, { "epoch": 0.6149921001814033, "grad_norm": 1.296875, "learning_rate": 0.0001720504809852125, "loss": 2.86, "step": 13137 }, { "epoch": 0.6150389139212359, "grad_norm": 1.640625, "learning_rate": 0.000172046432966297, "loss": 2.8695, "step": 13138 }, { "epoch": 0.6150857276610685, "grad_norm": 1.3046875, "learning_rate": 0.00017204238470188744, "loss": 2.7021, "step": 13139 }, { "epoch": 0.6151325414009011, "grad_norm": 1.6640625, "learning_rate": 0.00017203833619199762, "loss": 2.9937, "step": 13140 }, { "epoch": 0.6151793551407339, "grad_norm": 2.484375, "learning_rate": 0.00017203428743664133, "loss": 3.0447, "step": 13141 }, { "epoch": 0.6152261688805665, "grad_norm": 1.3828125, "learning_rate": 0.00017203023843583232, "loss": 2.7809, "step": 13142 }, { "epoch": 0.6152729826203991, "grad_norm": 1.625, "learning_rate": 0.0001720261891895845, "loss": 3.1387, "step": 13143 }, { "epoch": 0.6153197963602317, "grad_norm": 1.3515625, "learning_rate": 0.00017202213969791158, "loss": 2.8391, "step": 13144 }, { "epoch": 0.6153666101000643, "grad_norm": 1.25, "learning_rate": 0.00017201808996082738, "loss": 2.4407, "step": 13145 }, { "epoch": 0.6154134238398971, "grad_norm": 1.2265625, "learning_rate": 0.00017201403997834568, "loss": 2.743, "step": 13146 }, { "epoch": 0.6154602375797297, "grad_norm": 1.3671875, "learning_rate": 0.00017200998975048032, "loss": 2.6991, "step": 13147 }, { "epoch": 0.6155070513195623, "grad_norm": 2.046875, "learning_rate": 0.0001720059392772451, "loss": 2.684, "step": 13148 }, { "epoch": 0.6155538650593949, "grad_norm": 2.078125, "learning_rate": 0.0001720018885586538, "loss": 2.7294, "step": 13149 }, { "epoch": 0.6156006787992275, "grad_norm": 1.2421875, "learning_rate": 0.0001719978375947202, "loss": 2.4553, "step": 13150 }, { "epoch": 0.6156474925390603, "grad_norm": 1.2421875, "learning_rate": 0.00017199378638545817, "loss": 2.7919, "step": 13151 }, { "epoch": 0.6156943062788929, "grad_norm": 1.3515625, "learning_rate": 0.00017198973493088147, "loss": 3.025, "step": 13152 }, { "epoch": 0.6157411200187255, "grad_norm": 1.34375, "learning_rate": 0.00017198568323100388, "loss": 2.7509, "step": 13153 }, { "epoch": 0.6157879337585581, "grad_norm": 1.71875, "learning_rate": 0.00017198163128583923, "loss": 2.6038, "step": 13154 }, { "epoch": 0.6158347474983907, "grad_norm": 1.6484375, "learning_rate": 0.00017197757909540138, "loss": 2.2444, "step": 13155 }, { "epoch": 0.6158815612382235, "grad_norm": 1.0859375, "learning_rate": 0.00017197352665970406, "loss": 2.6676, "step": 13156 }, { "epoch": 0.6159283749780561, "grad_norm": 1.3515625, "learning_rate": 0.00017196947397876109, "loss": 3.0934, "step": 13157 }, { "epoch": 0.6159751887178887, "grad_norm": 1.3203125, "learning_rate": 0.00017196542105258632, "loss": 2.5049, "step": 13158 }, { "epoch": 0.6160220024577213, "grad_norm": 1.0234375, "learning_rate": 0.00017196136788119354, "loss": 2.2636, "step": 13159 }, { "epoch": 0.6160688161975539, "grad_norm": 1.5625, "learning_rate": 0.00017195731446459654, "loss": 2.4423, "step": 13160 }, { "epoch": 0.6161156299373867, "grad_norm": 1.2421875, "learning_rate": 0.00017195326080280917, "loss": 2.7787, "step": 13161 }, { "epoch": 0.6161624436772193, "grad_norm": 1.9921875, "learning_rate": 0.00017194920689584523, "loss": 2.9871, "step": 13162 }, { "epoch": 0.6162092574170519, "grad_norm": 1.3671875, "learning_rate": 0.0001719451527437185, "loss": 2.6516, "step": 13163 }, { "epoch": 0.6162560711568845, "grad_norm": 1.390625, "learning_rate": 0.0001719410983464428, "loss": 2.4367, "step": 13164 }, { "epoch": 0.6163028848967171, "grad_norm": 1.3828125, "learning_rate": 0.000171937043704032, "loss": 2.7247, "step": 13165 }, { "epoch": 0.6163496986365499, "grad_norm": 1.828125, "learning_rate": 0.00017193298881649992, "loss": 2.7231, "step": 13166 }, { "epoch": 0.6163965123763825, "grad_norm": 1.21875, "learning_rate": 0.00017192893368386027, "loss": 2.9245, "step": 13167 }, { "epoch": 0.6164433261162151, "grad_norm": 1.53125, "learning_rate": 0.00017192487830612697, "loss": 2.6178, "step": 13168 }, { "epoch": 0.6164901398560477, "grad_norm": 1.4140625, "learning_rate": 0.00017192082268331383, "loss": 3.0858, "step": 13169 }, { "epoch": 0.6165369535958803, "grad_norm": 1.40625, "learning_rate": 0.0001719167668154346, "loss": 2.4692, "step": 13170 }, { "epoch": 0.6165837673357131, "grad_norm": 1.765625, "learning_rate": 0.00017191271070250315, "loss": 3.1385, "step": 13171 }, { "epoch": 0.6166305810755457, "grad_norm": 1.53125, "learning_rate": 0.0001719086543445333, "loss": 2.2903, "step": 13172 }, { "epoch": 0.6166773948153783, "grad_norm": 1.3203125, "learning_rate": 0.00017190459774153887, "loss": 2.405, "step": 13173 }, { "epoch": 0.6167242085552109, "grad_norm": 1.2578125, "learning_rate": 0.00017190054089353366, "loss": 2.7469, "step": 13174 }, { "epoch": 0.6167710222950435, "grad_norm": 1.2890625, "learning_rate": 0.00017189648380053153, "loss": 2.5669, "step": 13175 }, { "epoch": 0.6168178360348763, "grad_norm": 1.2421875, "learning_rate": 0.00017189242646254628, "loss": 2.6855, "step": 13176 }, { "epoch": 0.6168646497747089, "grad_norm": 1.3828125, "learning_rate": 0.00017188836887959175, "loss": 2.4736, "step": 13177 }, { "epoch": 0.6169114635145415, "grad_norm": 1.4765625, "learning_rate": 0.00017188431105168178, "loss": 2.7639, "step": 13178 }, { "epoch": 0.6169582772543741, "grad_norm": 2.03125, "learning_rate": 0.00017188025297883015, "loss": 2.6596, "step": 13179 }, { "epoch": 0.6170050909942068, "grad_norm": 1.46875, "learning_rate": 0.0001718761946610507, "loss": 2.6057, "step": 13180 }, { "epoch": 0.6170519047340395, "grad_norm": 1.8046875, "learning_rate": 0.0001718721360983573, "loss": 2.4879, "step": 13181 }, { "epoch": 0.6170987184738721, "grad_norm": 1.65625, "learning_rate": 0.00017186807729076376, "loss": 3.2121, "step": 13182 }, { "epoch": 0.6171455322137047, "grad_norm": 1.9375, "learning_rate": 0.00017186401823828386, "loss": 2.8399, "step": 13183 }, { "epoch": 0.6171923459535373, "grad_norm": 1.28125, "learning_rate": 0.0001718599589409315, "loss": 2.7261, "step": 13184 }, { "epoch": 0.61723915969337, "grad_norm": 1.1328125, "learning_rate": 0.00017185589939872048, "loss": 2.6563, "step": 13185 }, { "epoch": 0.6172859734332027, "grad_norm": 1.4921875, "learning_rate": 0.0001718518396116646, "loss": 2.6798, "step": 13186 }, { "epoch": 0.6173327871730353, "grad_norm": 1.3203125, "learning_rate": 0.00017184777957977778, "loss": 2.531, "step": 13187 }, { "epoch": 0.6173796009128679, "grad_norm": 1.4375, "learning_rate": 0.00017184371930307378, "loss": 2.9059, "step": 13188 }, { "epoch": 0.6174264146527005, "grad_norm": 1.59375, "learning_rate": 0.00017183965878156648, "loss": 2.5449, "step": 13189 }, { "epoch": 0.6174732283925332, "grad_norm": 1.453125, "learning_rate": 0.00017183559801526967, "loss": 2.8527, "step": 13190 }, { "epoch": 0.6175200421323659, "grad_norm": 1.1484375, "learning_rate": 0.00017183153700419724, "loss": 2.7175, "step": 13191 }, { "epoch": 0.6175668558721985, "grad_norm": 1.546875, "learning_rate": 0.00017182747574836298, "loss": 2.4368, "step": 13192 }, { "epoch": 0.6176136696120311, "grad_norm": 1.4609375, "learning_rate": 0.00017182341424778076, "loss": 2.9224, "step": 13193 }, { "epoch": 0.6176604833518637, "grad_norm": 1.8515625, "learning_rate": 0.00017181935250246437, "loss": 2.9211, "step": 13194 }, { "epoch": 0.6177072970916964, "grad_norm": 1.8203125, "learning_rate": 0.00017181529051242772, "loss": 2.7118, "step": 13195 }, { "epoch": 0.6177541108315291, "grad_norm": 1.28125, "learning_rate": 0.0001718112282776846, "loss": 2.9089, "step": 13196 }, { "epoch": 0.6178009245713617, "grad_norm": 1.359375, "learning_rate": 0.00017180716579824889, "loss": 2.7857, "step": 13197 }, { "epoch": 0.6178477383111943, "grad_norm": 1.875, "learning_rate": 0.0001718031030741344, "loss": 2.959, "step": 13198 }, { "epoch": 0.617894552051027, "grad_norm": 1.375, "learning_rate": 0.000171799040105355, "loss": 2.6653, "step": 13199 }, { "epoch": 0.6179413657908596, "grad_norm": 1.6796875, "learning_rate": 0.00017179497689192452, "loss": 2.918, "step": 13200 }, { "epoch": 0.6179881795306923, "grad_norm": 1.1875, "learning_rate": 0.0001717909134338568, "loss": 2.3725, "step": 13201 }, { "epoch": 0.6180349932705249, "grad_norm": 1.21875, "learning_rate": 0.0001717868497311657, "loss": 2.8156, "step": 13202 }, { "epoch": 0.6180818070103575, "grad_norm": 1.6796875, "learning_rate": 0.00017178278578386508, "loss": 2.9738, "step": 13203 }, { "epoch": 0.6181286207501901, "grad_norm": 1.40625, "learning_rate": 0.0001717787215919687, "loss": 2.5172, "step": 13204 }, { "epoch": 0.6181754344900228, "grad_norm": 1.3046875, "learning_rate": 0.00017177465715549053, "loss": 2.3508, "step": 13205 }, { "epoch": 0.6182222482298555, "grad_norm": 1.078125, "learning_rate": 0.00017177059247444433, "loss": 2.776, "step": 13206 }, { "epoch": 0.6182690619696881, "grad_norm": 1.234375, "learning_rate": 0.00017176652754884403, "loss": 2.913, "step": 13207 }, { "epoch": 0.6183158757095207, "grad_norm": 1.1328125, "learning_rate": 0.00017176246237870339, "loss": 2.5636, "step": 13208 }, { "epoch": 0.6183626894493534, "grad_norm": 1.4921875, "learning_rate": 0.00017175839696403634, "loss": 2.4702, "step": 13209 }, { "epoch": 0.618409503189186, "grad_norm": 1.1171875, "learning_rate": 0.00017175433130485669, "loss": 4.4034, "step": 13210 }, { "epoch": 0.6184563169290187, "grad_norm": 1.1484375, "learning_rate": 0.0001717502654011783, "loss": 2.4918, "step": 13211 }, { "epoch": 0.6185031306688513, "grad_norm": 1.546875, "learning_rate": 0.00017174619925301504, "loss": 2.8237, "step": 13212 }, { "epoch": 0.6185499444086839, "grad_norm": 1.71875, "learning_rate": 0.00017174213286038072, "loss": 2.7261, "step": 13213 }, { "epoch": 0.6185967581485166, "grad_norm": 1.625, "learning_rate": 0.00017173806622328925, "loss": 2.5784, "step": 13214 }, { "epoch": 0.6186435718883492, "grad_norm": 1.9765625, "learning_rate": 0.00017173399934175448, "loss": 2.7413, "step": 13215 }, { "epoch": 0.6186903856281819, "grad_norm": 1.6171875, "learning_rate": 0.00017172993221579023, "loss": 3.0593, "step": 13216 }, { "epoch": 0.6187371993680145, "grad_norm": 1.1640625, "learning_rate": 0.0001717258648454104, "loss": 2.8228, "step": 13217 }, { "epoch": 0.6187840131078471, "grad_norm": 1.328125, "learning_rate": 0.00017172179723062882, "loss": 2.6344, "step": 13218 }, { "epoch": 0.6188308268476798, "grad_norm": 1.5546875, "learning_rate": 0.00017171772937145936, "loss": 2.8718, "step": 13219 }, { "epoch": 0.6188776405875125, "grad_norm": 1.3515625, "learning_rate": 0.0001717136612679159, "loss": 2.5737, "step": 13220 }, { "epoch": 0.6189244543273451, "grad_norm": 1.1015625, "learning_rate": 0.00017170959292001227, "loss": 2.7811, "step": 13221 }, { "epoch": 0.6189712680671777, "grad_norm": 1.265625, "learning_rate": 0.00017170552432776236, "loss": 2.7417, "step": 13222 }, { "epoch": 0.6190180818070103, "grad_norm": 1.2734375, "learning_rate": 0.00017170145549118, "loss": 2.672, "step": 13223 }, { "epoch": 0.619064895546843, "grad_norm": 1.203125, "learning_rate": 0.00017169738641027908, "loss": 2.8188, "step": 13224 }, { "epoch": 0.6191117092866757, "grad_norm": 1.4375, "learning_rate": 0.00017169331708507345, "loss": 2.5809, "step": 13225 }, { "epoch": 0.6191585230265083, "grad_norm": 1.015625, "learning_rate": 0.00017168924751557703, "loss": 4.5584, "step": 13226 }, { "epoch": 0.6192053367663409, "grad_norm": 1.5703125, "learning_rate": 0.0001716851777018036, "loss": 2.8534, "step": 13227 }, { "epoch": 0.6192521505061735, "grad_norm": 1.4921875, "learning_rate": 0.00017168110764376708, "loss": 3.0248, "step": 13228 }, { "epoch": 0.6192989642460062, "grad_norm": 1.3046875, "learning_rate": 0.00017167703734148135, "loss": 2.4844, "step": 13229 }, { "epoch": 0.6193457779858389, "grad_norm": 1.546875, "learning_rate": 0.00017167296679496024, "loss": 2.5713, "step": 13230 }, { "epoch": 0.6193925917256715, "grad_norm": 1.4140625, "learning_rate": 0.00017166889600421766, "loss": 2.6076, "step": 13231 }, { "epoch": 0.6194394054655041, "grad_norm": 1.4375, "learning_rate": 0.00017166482496926743, "loss": 2.8551, "step": 13232 }, { "epoch": 0.6194862192053368, "grad_norm": 1.3359375, "learning_rate": 0.00017166075369012343, "loss": 2.6047, "step": 13233 }, { "epoch": 0.6195330329451694, "grad_norm": 1.5546875, "learning_rate": 0.00017165668216679962, "loss": 2.4203, "step": 13234 }, { "epoch": 0.6195798466850021, "grad_norm": 1.421875, "learning_rate": 0.00017165261039930977, "loss": 2.5544, "step": 13235 }, { "epoch": 0.6196266604248347, "grad_norm": 1.9765625, "learning_rate": 0.0001716485383876678, "loss": 2.9472, "step": 13236 }, { "epoch": 0.6196734741646673, "grad_norm": 1.4609375, "learning_rate": 0.00017164446613188755, "loss": 2.8815, "step": 13237 }, { "epoch": 0.6197202879045, "grad_norm": 1.359375, "learning_rate": 0.00017164039363198297, "loss": 3.0127, "step": 13238 }, { "epoch": 0.6197671016443326, "grad_norm": 1.296875, "learning_rate": 0.00017163632088796783, "loss": 2.6158, "step": 13239 }, { "epoch": 0.6198139153841653, "grad_norm": 1.6796875, "learning_rate": 0.00017163224789985612, "loss": 2.4295, "step": 13240 }, { "epoch": 0.6198607291239979, "grad_norm": 1.53125, "learning_rate": 0.00017162817466766166, "loss": 2.3185, "step": 13241 }, { "epoch": 0.6199075428638305, "grad_norm": 1.7421875, "learning_rate": 0.0001716241011913983, "loss": 2.2136, "step": 13242 }, { "epoch": 0.6199543566036632, "grad_norm": 1.0625, "learning_rate": 0.00017162002747108, "loss": 2.5721, "step": 13243 }, { "epoch": 0.6200011703434958, "grad_norm": 1.265625, "learning_rate": 0.00017161595350672056, "loss": 2.6507, "step": 13244 }, { "epoch": 0.6200479840833285, "grad_norm": 1.4140625, "learning_rate": 0.00017161187929833394, "loss": 3.0784, "step": 13245 }, { "epoch": 0.6200947978231611, "grad_norm": 1.4453125, "learning_rate": 0.00017160780484593393, "loss": 2.8579, "step": 13246 }, { "epoch": 0.6201416115629937, "grad_norm": 1.2578125, "learning_rate": 0.0001716037301495345, "loss": 2.8121, "step": 13247 }, { "epoch": 0.6201884253028264, "grad_norm": 1.25, "learning_rate": 0.00017159965520914948, "loss": 2.3464, "step": 13248 }, { "epoch": 0.620235239042659, "grad_norm": 1.390625, "learning_rate": 0.00017159558002479276, "loss": 2.7616, "step": 13249 }, { "epoch": 0.6202820527824917, "grad_norm": 1.5859375, "learning_rate": 0.00017159150459647828, "loss": 2.7327, "step": 13250 }, { "epoch": 0.6203288665223243, "grad_norm": 1.3125, "learning_rate": 0.00017158742892421985, "loss": 2.7663, "step": 13251 }, { "epoch": 0.620375680262157, "grad_norm": 1.2109375, "learning_rate": 0.00017158335300803144, "loss": 3.2545, "step": 13252 }, { "epoch": 0.6204224940019896, "grad_norm": 1.59375, "learning_rate": 0.00017157927684792684, "loss": 2.6241, "step": 13253 }, { "epoch": 0.6204693077418222, "grad_norm": 1.15625, "learning_rate": 0.00017157520044392004, "loss": 2.7789, "step": 13254 }, { "epoch": 0.6205161214816549, "grad_norm": 1.2578125, "learning_rate": 0.00017157112379602484, "loss": 2.7655, "step": 13255 }, { "epoch": 0.6205629352214875, "grad_norm": 1.3359375, "learning_rate": 0.00017156704690425518, "loss": 2.5679, "step": 13256 }, { "epoch": 0.6206097489613202, "grad_norm": 1.5859375, "learning_rate": 0.00017156296976862496, "loss": 2.6816, "step": 13257 }, { "epoch": 0.6206565627011528, "grad_norm": 1.328125, "learning_rate": 0.00017155889238914805, "loss": 2.9139, "step": 13258 }, { "epoch": 0.6207033764409854, "grad_norm": 1.1796875, "learning_rate": 0.00017155481476583835, "loss": 2.909, "step": 13259 }, { "epoch": 0.6207501901808181, "grad_norm": 1.1796875, "learning_rate": 0.00017155073689870975, "loss": 2.5013, "step": 13260 }, { "epoch": 0.6207970039206507, "grad_norm": 1.25, "learning_rate": 0.00017154665878777616, "loss": 3.1677, "step": 13261 }, { "epoch": 0.6208438176604834, "grad_norm": 1.6953125, "learning_rate": 0.00017154258043305145, "loss": 2.7238, "step": 13262 }, { "epoch": 0.620890631400316, "grad_norm": 1.609375, "learning_rate": 0.00017153850183454953, "loss": 2.835, "step": 13263 }, { "epoch": 0.6209374451401486, "grad_norm": 1.2109375, "learning_rate": 0.00017153442299228433, "loss": 2.6619, "step": 13264 }, { "epoch": 0.6209842588799813, "grad_norm": 1.5078125, "learning_rate": 0.0001715303439062697, "loss": 2.6378, "step": 13265 }, { "epoch": 0.6210310726198139, "grad_norm": 1.671875, "learning_rate": 0.00017152626457651957, "loss": 2.4053, "step": 13266 }, { "epoch": 0.6210778863596466, "grad_norm": 1.6796875, "learning_rate": 0.0001715221850030478, "loss": 2.5572, "step": 13267 }, { "epoch": 0.6211247000994792, "grad_norm": 1.359375, "learning_rate": 0.00017151810518586835, "loss": 2.4232, "step": 13268 }, { "epoch": 0.6211715138393118, "grad_norm": 1.3828125, "learning_rate": 0.00017151402512499505, "loss": 2.8495, "step": 13269 }, { "epoch": 0.6212183275791445, "grad_norm": 2.03125, "learning_rate": 0.00017150994482044188, "loss": 2.5824, "step": 13270 }, { "epoch": 0.6212651413189771, "grad_norm": 1.1953125, "learning_rate": 0.0001715058642722227, "loss": 2.518, "step": 13271 }, { "epoch": 0.6213119550588098, "grad_norm": 1.1171875, "learning_rate": 0.0001715017834803514, "loss": 3.1565, "step": 13272 }, { "epoch": 0.6213587687986424, "grad_norm": 2.328125, "learning_rate": 0.00017149770244484196, "loss": 2.2457, "step": 13273 }, { "epoch": 0.621405582538475, "grad_norm": 1.3828125, "learning_rate": 0.00017149362116570816, "loss": 2.7452, "step": 13274 }, { "epoch": 0.6214523962783077, "grad_norm": 1.46875, "learning_rate": 0.000171489539642964, "loss": 2.9048, "step": 13275 }, { "epoch": 0.6214992100181403, "grad_norm": 1.5625, "learning_rate": 0.0001714854578766234, "loss": 3.2253, "step": 13276 }, { "epoch": 0.621546023757973, "grad_norm": 1.015625, "learning_rate": 0.0001714813758667002, "loss": 2.8602, "step": 13277 }, { "epoch": 0.6215928374978056, "grad_norm": 1.5625, "learning_rate": 0.00017147729361320838, "loss": 2.9137, "step": 13278 }, { "epoch": 0.6216396512376382, "grad_norm": 2.171875, "learning_rate": 0.0001714732111161618, "loss": 2.6534, "step": 13279 }, { "epoch": 0.6216864649774709, "grad_norm": 1.59375, "learning_rate": 0.00017146912837557435, "loss": 2.3504, "step": 13280 }, { "epoch": 0.6217332787173036, "grad_norm": 1.5390625, "learning_rate": 0.00017146504539146, "loss": 2.4822, "step": 13281 }, { "epoch": 0.6217800924571362, "grad_norm": 1.1640625, "learning_rate": 0.00017146096216383264, "loss": 2.9383, "step": 13282 }, { "epoch": 0.6218269061969688, "grad_norm": 1.3125, "learning_rate": 0.0001714568786927062, "loss": 2.4766, "step": 13283 }, { "epoch": 0.6218737199368014, "grad_norm": 1.1953125, "learning_rate": 0.00017145279497809455, "loss": 2.8943, "step": 13284 }, { "epoch": 0.6219205336766341, "grad_norm": 1.9296875, "learning_rate": 0.00017144871102001163, "loss": 2.7912, "step": 13285 }, { "epoch": 0.6219673474164668, "grad_norm": 1.6328125, "learning_rate": 0.00017144462681847138, "loss": 2.1678, "step": 13286 }, { "epoch": 0.6220141611562994, "grad_norm": 1.140625, "learning_rate": 0.00017144054237348767, "loss": 2.528, "step": 13287 }, { "epoch": 0.622060974896132, "grad_norm": 2.125, "learning_rate": 0.00017143645768507445, "loss": 2.9291, "step": 13288 }, { "epoch": 0.6221077886359646, "grad_norm": 1.265625, "learning_rate": 0.00017143237275324565, "loss": 2.9569, "step": 13289 }, { "epoch": 0.6221546023757973, "grad_norm": 2.015625, "learning_rate": 0.00017142828757801514, "loss": 2.7539, "step": 13290 }, { "epoch": 0.62220141611563, "grad_norm": 1.28125, "learning_rate": 0.00017142420215939688, "loss": 2.5365, "step": 13291 }, { "epoch": 0.6222482298554626, "grad_norm": 3.390625, "learning_rate": 0.0001714201164974048, "loss": 2.4106, "step": 13292 }, { "epoch": 0.6222950435952952, "grad_norm": 1.390625, "learning_rate": 0.00017141603059205276, "loss": 2.6543, "step": 13293 }, { "epoch": 0.6223418573351278, "grad_norm": 1.46875, "learning_rate": 0.00017141194444335474, "loss": 2.7955, "step": 13294 }, { "epoch": 0.6223886710749605, "grad_norm": 1.3203125, "learning_rate": 0.00017140785805132465, "loss": 2.6093, "step": 13295 }, { "epoch": 0.6224354848147932, "grad_norm": 2.34375, "learning_rate": 0.0001714037714159764, "loss": 2.2689, "step": 13296 }, { "epoch": 0.6224822985546258, "grad_norm": 1.4765625, "learning_rate": 0.00017139968453732394, "loss": 2.6491, "step": 13297 }, { "epoch": 0.6225291122944584, "grad_norm": 1.4921875, "learning_rate": 0.00017139559741538118, "loss": 2.6886, "step": 13298 }, { "epoch": 0.622575926034291, "grad_norm": 1.7578125, "learning_rate": 0.00017139151005016206, "loss": 2.6588, "step": 13299 }, { "epoch": 0.6226227397741237, "grad_norm": 1.390625, "learning_rate": 0.0001713874224416805, "loss": 2.8942, "step": 13300 }, { "epoch": 0.6226695535139564, "grad_norm": 1.4375, "learning_rate": 0.0001713833345899504, "loss": 2.4165, "step": 13301 }, { "epoch": 0.622716367253789, "grad_norm": 1.53125, "learning_rate": 0.0001713792464949857, "loss": 2.8657, "step": 13302 }, { "epoch": 0.6227631809936216, "grad_norm": 2.125, "learning_rate": 0.00017137515815680038, "loss": 3.4964, "step": 13303 }, { "epoch": 0.6228099947334542, "grad_norm": 1.78125, "learning_rate": 0.0001713710695754083, "loss": 2.3589, "step": 13304 }, { "epoch": 0.622856808473287, "grad_norm": 1.2421875, "learning_rate": 0.00017136698075082346, "loss": 2.3615, "step": 13305 }, { "epoch": 0.6229036222131196, "grad_norm": 1.75, "learning_rate": 0.00017136289168305974, "loss": 2.3716, "step": 13306 }, { "epoch": 0.6229504359529522, "grad_norm": 1.5078125, "learning_rate": 0.0001713588023721311, "loss": 2.8552, "step": 13307 }, { "epoch": 0.6229972496927848, "grad_norm": 1.203125, "learning_rate": 0.00017135471281805146, "loss": 2.8329, "step": 13308 }, { "epoch": 0.6230440634326174, "grad_norm": 2.15625, "learning_rate": 0.00017135062302083478, "loss": 3.1159, "step": 13309 }, { "epoch": 0.6230908771724502, "grad_norm": 1.5703125, "learning_rate": 0.00017134653298049495, "loss": 2.8222, "step": 13310 }, { "epoch": 0.6231376909122828, "grad_norm": 1.4921875, "learning_rate": 0.00017134244269704593, "loss": 2.8464, "step": 13311 }, { "epoch": 0.6231845046521154, "grad_norm": 1.2421875, "learning_rate": 0.00017133835217050168, "loss": 3.2479, "step": 13312 }, { "epoch": 0.623231318391948, "grad_norm": 1.4296875, "learning_rate": 0.0001713342614008761, "loss": 2.7025, "step": 13313 }, { "epoch": 0.6232781321317806, "grad_norm": 1.296875, "learning_rate": 0.00017133017038818316, "loss": 2.3328, "step": 13314 }, { "epoch": 0.6233249458716134, "grad_norm": 1.4609375, "learning_rate": 0.0001713260791324368, "loss": 2.7208, "step": 13315 }, { "epoch": 0.623371759611446, "grad_norm": 1.1796875, "learning_rate": 0.00017132198763365094, "loss": 2.6224, "step": 13316 }, { "epoch": 0.6234185733512786, "grad_norm": 1.125, "learning_rate": 0.00017131789589183951, "loss": 2.2649, "step": 13317 }, { "epoch": 0.6234653870911112, "grad_norm": 1.484375, "learning_rate": 0.00017131380390701648, "loss": 2.5783, "step": 13318 }, { "epoch": 0.6235122008309438, "grad_norm": 1.59375, "learning_rate": 0.0001713097116791958, "loss": 2.7508, "step": 13319 }, { "epoch": 0.6235590145707766, "grad_norm": 1.5390625, "learning_rate": 0.00017130561920839142, "loss": 2.5868, "step": 13320 }, { "epoch": 0.6236058283106092, "grad_norm": 1.4375, "learning_rate": 0.00017130152649461722, "loss": 2.7036, "step": 13321 }, { "epoch": 0.6236526420504418, "grad_norm": 1.5078125, "learning_rate": 0.00017129743353788717, "loss": 2.443, "step": 13322 }, { "epoch": 0.6236994557902744, "grad_norm": 1.4140625, "learning_rate": 0.00017129334033821528, "loss": 2.6544, "step": 13323 }, { "epoch": 0.623746269530107, "grad_norm": 1.4765625, "learning_rate": 0.00017128924689561545, "loss": 2.4602, "step": 13324 }, { "epoch": 0.6237930832699398, "grad_norm": 1.3203125, "learning_rate": 0.00017128515321010164, "loss": 2.6392, "step": 13325 }, { "epoch": 0.6238398970097724, "grad_norm": 1.34375, "learning_rate": 0.00017128105928168776, "loss": 2.7395, "step": 13326 }, { "epoch": 0.623886710749605, "grad_norm": 1.3046875, "learning_rate": 0.0001712769651103878, "loss": 2.9763, "step": 13327 }, { "epoch": 0.6239335244894376, "grad_norm": 1.90625, "learning_rate": 0.0001712728706962157, "loss": 2.8458, "step": 13328 }, { "epoch": 0.6239803382292702, "grad_norm": 1.2578125, "learning_rate": 0.00017126877603918544, "loss": 2.9519, "step": 13329 }, { "epoch": 0.624027151969103, "grad_norm": 1.359375, "learning_rate": 0.00017126468113931096, "loss": 2.3004, "step": 13330 }, { "epoch": 0.6240739657089356, "grad_norm": 1.546875, "learning_rate": 0.00017126058599660615, "loss": 3.0327, "step": 13331 }, { "epoch": 0.6241207794487682, "grad_norm": 1.2890625, "learning_rate": 0.00017125649061108502, "loss": 2.3922, "step": 13332 }, { "epoch": 0.6241675931886008, "grad_norm": 1.375, "learning_rate": 0.00017125239498276152, "loss": 2.7129, "step": 13333 }, { "epoch": 0.6242144069284334, "grad_norm": 1.53125, "learning_rate": 0.0001712482991116496, "loss": 3.2174, "step": 13334 }, { "epoch": 0.6242612206682662, "grad_norm": 1.3984375, "learning_rate": 0.00017124420299776321, "loss": 2.7969, "step": 13335 }, { "epoch": 0.6243080344080988, "grad_norm": 1.875, "learning_rate": 0.00017124010664111637, "loss": 2.9301, "step": 13336 }, { "epoch": 0.6243548481479314, "grad_norm": 1.4140625, "learning_rate": 0.00017123601004172294, "loss": 2.6711, "step": 13337 }, { "epoch": 0.624401661887764, "grad_norm": 1.4140625, "learning_rate": 0.00017123191319959693, "loss": 2.99, "step": 13338 }, { "epoch": 0.6244484756275966, "grad_norm": 1.6015625, "learning_rate": 0.00017122781611475228, "loss": 2.6586, "step": 13339 }, { "epoch": 0.6244952893674294, "grad_norm": 1.6953125, "learning_rate": 0.000171223718787203, "loss": 2.829, "step": 13340 }, { "epoch": 0.624542103107262, "grad_norm": 1.4296875, "learning_rate": 0.000171219621216963, "loss": 2.6614, "step": 13341 }, { "epoch": 0.6245889168470946, "grad_norm": 2.375, "learning_rate": 0.00017121552340404624, "loss": 3.1085, "step": 13342 }, { "epoch": 0.6246357305869272, "grad_norm": 1.234375, "learning_rate": 0.0001712114253484667, "loss": 2.4486, "step": 13343 }, { "epoch": 0.62468254432676, "grad_norm": 2.140625, "learning_rate": 0.00017120732705023836, "loss": 2.6374, "step": 13344 }, { "epoch": 0.6247293580665926, "grad_norm": 1.1484375, "learning_rate": 0.00017120322850937517, "loss": 2.5833, "step": 13345 }, { "epoch": 0.6247761718064252, "grad_norm": 1.4609375, "learning_rate": 0.0001711991297258911, "loss": 2.6525, "step": 13346 }, { "epoch": 0.6248229855462578, "grad_norm": 1.515625, "learning_rate": 0.0001711950306998001, "loss": 2.7054, "step": 13347 }, { "epoch": 0.6248697992860904, "grad_norm": 1.53125, "learning_rate": 0.00017119093143111615, "loss": 2.516, "step": 13348 }, { "epoch": 0.6249166130259232, "grad_norm": 1.46875, "learning_rate": 0.00017118683191985322, "loss": 2.9635, "step": 13349 }, { "epoch": 0.6249634267657558, "grad_norm": 1.5703125, "learning_rate": 0.00017118273216602524, "loss": 2.979, "step": 13350 }, { "epoch": 0.6250102405055884, "grad_norm": 1.9453125, "learning_rate": 0.00017117863216964624, "loss": 3.2243, "step": 13351 }, { "epoch": 0.625057054245421, "grad_norm": 1.5546875, "learning_rate": 0.0001711745319307302, "loss": 2.9773, "step": 13352 }, { "epoch": 0.6251038679852536, "grad_norm": 1.3203125, "learning_rate": 0.000171170431449291, "loss": 2.7999, "step": 13353 }, { "epoch": 0.6251506817250864, "grad_norm": 1.3515625, "learning_rate": 0.00017116633072534272, "loss": 2.9553, "step": 13354 }, { "epoch": 0.625197495464919, "grad_norm": 1.2578125, "learning_rate": 0.00017116222975889923, "loss": 2.6185, "step": 13355 }, { "epoch": 0.6252443092047516, "grad_norm": 1.59375, "learning_rate": 0.0001711581285499746, "loss": 2.6811, "step": 13356 }, { "epoch": 0.6252911229445842, "grad_norm": 1.2890625, "learning_rate": 0.00017115402709858276, "loss": 2.6779, "step": 13357 }, { "epoch": 0.6253379366844168, "grad_norm": 1.765625, "learning_rate": 0.00017114992540473765, "loss": 2.7679, "step": 13358 }, { "epoch": 0.6253847504242496, "grad_norm": 1.109375, "learning_rate": 0.0001711458234684533, "loss": 2.5239, "step": 13359 }, { "epoch": 0.6254315641640822, "grad_norm": 1.4296875, "learning_rate": 0.00017114172128974365, "loss": 2.4592, "step": 13360 }, { "epoch": 0.6254783779039148, "grad_norm": 1.4140625, "learning_rate": 0.00017113761886862271, "loss": 2.5652, "step": 13361 }, { "epoch": 0.6255251916437474, "grad_norm": 1.8046875, "learning_rate": 0.00017113351620510448, "loss": 3.0092, "step": 13362 }, { "epoch": 0.62557200538358, "grad_norm": 1.2265625, "learning_rate": 0.00017112941329920287, "loss": 2.7694, "step": 13363 }, { "epoch": 0.6256188191234128, "grad_norm": 1.40625, "learning_rate": 0.0001711253101509319, "loss": 2.5713, "step": 13364 }, { "epoch": 0.6256656328632454, "grad_norm": 1.3046875, "learning_rate": 0.00017112120676030554, "loss": 2.7253, "step": 13365 }, { "epoch": 0.625712446603078, "grad_norm": 1.390625, "learning_rate": 0.0001711171031273378, "loss": 2.7171, "step": 13366 }, { "epoch": 0.6257592603429106, "grad_norm": 1.2109375, "learning_rate": 0.00017111299925204264, "loss": 2.7362, "step": 13367 }, { "epoch": 0.6258060740827432, "grad_norm": 1.5703125, "learning_rate": 0.00017110889513443403, "loss": 2.5949, "step": 13368 }, { "epoch": 0.625852887822576, "grad_norm": 1.796875, "learning_rate": 0.00017110479077452596, "loss": 2.0032, "step": 13369 }, { "epoch": 0.6258997015624086, "grad_norm": 1.484375, "learning_rate": 0.00017110068617233245, "loss": 2.3492, "step": 13370 }, { "epoch": 0.6259465153022412, "grad_norm": 1.140625, "learning_rate": 0.00017109658132786747, "loss": 2.5015, "step": 13371 }, { "epoch": 0.6259933290420738, "grad_norm": 2.078125, "learning_rate": 0.00017109247624114496, "loss": 3.1608, "step": 13372 }, { "epoch": 0.6260401427819065, "grad_norm": 1.453125, "learning_rate": 0.00017108837091217897, "loss": 2.1894, "step": 13373 }, { "epoch": 0.6260869565217392, "grad_norm": 1.3515625, "learning_rate": 0.00017108426534098346, "loss": 3.0152, "step": 13374 }, { "epoch": 0.6261337702615718, "grad_norm": 1.4375, "learning_rate": 0.00017108015952757244, "loss": 3.0576, "step": 13375 }, { "epoch": 0.6261805840014044, "grad_norm": 0.94140625, "learning_rate": 0.00017107605347195986, "loss": 3.2385, "step": 13376 }, { "epoch": 0.626227397741237, "grad_norm": 1.609375, "learning_rate": 0.00017107194717415976, "loss": 2.5976, "step": 13377 }, { "epoch": 0.6262742114810697, "grad_norm": 1.7265625, "learning_rate": 0.00017106784063418607, "loss": 2.8434, "step": 13378 }, { "epoch": 0.6263210252209024, "grad_norm": 1.7421875, "learning_rate": 0.00017106373385205288, "loss": 2.6772, "step": 13379 }, { "epoch": 0.626367838960735, "grad_norm": 1.7734375, "learning_rate": 0.00017105962682777408, "loss": 3.153, "step": 13380 }, { "epoch": 0.6264146527005676, "grad_norm": 1.171875, "learning_rate": 0.00017105551956136372, "loss": 3.0902, "step": 13381 }, { "epoch": 0.6264614664404002, "grad_norm": 1.4296875, "learning_rate": 0.00017105141205283578, "loss": 2.9935, "step": 13382 }, { "epoch": 0.6265082801802329, "grad_norm": 1.2734375, "learning_rate": 0.00017104730430220427, "loss": 2.6639, "step": 13383 }, { "epoch": 0.6265550939200656, "grad_norm": 1.2734375, "learning_rate": 0.0001710431963094832, "loss": 2.7366, "step": 13384 }, { "epoch": 0.6266019076598982, "grad_norm": 1.1875, "learning_rate": 0.00017103908807468646, "loss": 2.9498, "step": 13385 }, { "epoch": 0.6266487213997308, "grad_norm": 1.359375, "learning_rate": 0.0001710349795978282, "loss": 2.9256, "step": 13386 }, { "epoch": 0.6266955351395634, "grad_norm": 1.5, "learning_rate": 0.00017103087087892238, "loss": 2.4101, "step": 13387 }, { "epoch": 0.6267423488793961, "grad_norm": 1.296875, "learning_rate": 0.00017102676191798293, "loss": 2.835, "step": 13388 }, { "epoch": 0.6267891626192288, "grad_norm": 2.328125, "learning_rate": 0.0001710226527150239, "loss": 3.0261, "step": 13389 }, { "epoch": 0.6268359763590614, "grad_norm": 1.1171875, "learning_rate": 0.00017101854327005928, "loss": 2.4318, "step": 13390 }, { "epoch": 0.626882790098894, "grad_norm": 1.5390625, "learning_rate": 0.00017101443358310308, "loss": 2.9096, "step": 13391 }, { "epoch": 0.6269296038387266, "grad_norm": 1.8125, "learning_rate": 0.0001710103236541693, "loss": 2.6836, "step": 13392 }, { "epoch": 0.6269764175785593, "grad_norm": 1.2109375, "learning_rate": 0.000171006213483272, "loss": 2.7645, "step": 13393 }, { "epoch": 0.627023231318392, "grad_norm": 1.5546875, "learning_rate": 0.00017100210307042507, "loss": 2.751, "step": 13394 }, { "epoch": 0.6270700450582246, "grad_norm": 1.140625, "learning_rate": 0.00017099799241564257, "loss": 2.2854, "step": 13395 }, { "epoch": 0.6271168587980572, "grad_norm": 1.3125, "learning_rate": 0.00017099388151893855, "loss": 2.5748, "step": 13396 }, { "epoch": 0.6271636725378898, "grad_norm": 1.4140625, "learning_rate": 0.000170989770380327, "loss": 2.6077, "step": 13397 }, { "epoch": 0.6272104862777225, "grad_norm": 1.3359375, "learning_rate": 0.00017098565899982188, "loss": 2.6369, "step": 13398 }, { "epoch": 0.6272573000175552, "grad_norm": 1.25, "learning_rate": 0.00017098154737743722, "loss": 2.4229, "step": 13399 }, { "epoch": 0.6273041137573878, "grad_norm": 1.25, "learning_rate": 0.00017097743551318706, "loss": 2.7594, "step": 13400 }, { "epoch": 0.6273509274972204, "grad_norm": 1.6328125, "learning_rate": 0.0001709733234070854, "loss": 2.8231, "step": 13401 }, { "epoch": 0.627397741237053, "grad_norm": 1.5546875, "learning_rate": 0.00017096921105914622, "loss": 3.0876, "step": 13402 }, { "epoch": 0.6274445549768857, "grad_norm": 1.8125, "learning_rate": 0.00017096509846938358, "loss": 2.8409, "step": 13403 }, { "epoch": 0.6274913687167184, "grad_norm": 1.1796875, "learning_rate": 0.00017096098563781144, "loss": 2.5458, "step": 13404 }, { "epoch": 0.627538182456551, "grad_norm": 1.7265625, "learning_rate": 0.00017095687256444387, "loss": 2.6345, "step": 13405 }, { "epoch": 0.6275849961963836, "grad_norm": 1.515625, "learning_rate": 0.00017095275924929485, "loss": 2.564, "step": 13406 }, { "epoch": 0.6276318099362163, "grad_norm": 1.2734375, "learning_rate": 0.0001709486456923784, "loss": 2.7915, "step": 13407 }, { "epoch": 0.6276786236760489, "grad_norm": 1.3671875, "learning_rate": 0.00017094453189370852, "loss": 2.6725, "step": 13408 }, { "epoch": 0.6277254374158816, "grad_norm": 1.546875, "learning_rate": 0.00017094041785329926, "loss": 2.5575, "step": 13409 }, { "epoch": 0.6277722511557142, "grad_norm": 1.6875, "learning_rate": 0.00017093630357116463, "loss": 2.736, "step": 13410 }, { "epoch": 0.6278190648955468, "grad_norm": 1.578125, "learning_rate": 0.00017093218904731868, "loss": 2.6435, "step": 13411 }, { "epoch": 0.6278658786353795, "grad_norm": 1.203125, "learning_rate": 0.00017092807428177535, "loss": 2.9097, "step": 13412 }, { "epoch": 0.6279126923752121, "grad_norm": 1.4296875, "learning_rate": 0.00017092395927454873, "loss": 3.1668, "step": 13413 }, { "epoch": 0.6279595061150448, "grad_norm": 1.765625, "learning_rate": 0.0001709198440256528, "loss": 3.1961, "step": 13414 }, { "epoch": 0.6280063198548774, "grad_norm": 1.4609375, "learning_rate": 0.00017091572853510163, "loss": 2.6643, "step": 13415 }, { "epoch": 0.62805313359471, "grad_norm": 1.375, "learning_rate": 0.00017091161280290918, "loss": 2.4449, "step": 13416 }, { "epoch": 0.6280999473345427, "grad_norm": 2.109375, "learning_rate": 0.00017090749682908954, "loss": 2.7928, "step": 13417 }, { "epoch": 0.6281467610743753, "grad_norm": 1.5390625, "learning_rate": 0.00017090338061365667, "loss": 2.8287, "step": 13418 }, { "epoch": 0.628193574814208, "grad_norm": 1.265625, "learning_rate": 0.00017089926415662468, "loss": 2.1535, "step": 13419 }, { "epoch": 0.6282403885540406, "grad_norm": 1.8515625, "learning_rate": 0.0001708951474580075, "loss": 3.3154, "step": 13420 }, { "epoch": 0.6282872022938732, "grad_norm": 1.28125, "learning_rate": 0.0001708910305178192, "loss": 2.6556, "step": 13421 }, { "epoch": 0.6283340160337059, "grad_norm": 1.484375, "learning_rate": 0.00017088691333607385, "loss": 2.9563, "step": 13422 }, { "epoch": 0.6283808297735385, "grad_norm": 1.8046875, "learning_rate": 0.0001708827959127854, "loss": 2.8851, "step": 13423 }, { "epoch": 0.6284276435133712, "grad_norm": 1.203125, "learning_rate": 0.00017087867824796792, "loss": 2.9854, "step": 13424 }, { "epoch": 0.6284744572532038, "grad_norm": 1.2421875, "learning_rate": 0.00017087456034163548, "loss": 2.6228, "step": 13425 }, { "epoch": 0.6285212709930365, "grad_norm": 1.5, "learning_rate": 0.00017087044219380205, "loss": 2.6727, "step": 13426 }, { "epoch": 0.6285680847328691, "grad_norm": 1.5078125, "learning_rate": 0.0001708663238044817, "loss": 2.4975, "step": 13427 }, { "epoch": 0.6286148984727017, "grad_norm": 1.2734375, "learning_rate": 0.00017086220517368843, "loss": 2.1981, "step": 13428 }, { "epoch": 0.6286617122125344, "grad_norm": 1.640625, "learning_rate": 0.0001708580863014363, "loss": 2.9626, "step": 13429 }, { "epoch": 0.628708525952367, "grad_norm": 1.3984375, "learning_rate": 0.00017085396718773935, "loss": 3.0528, "step": 13430 }, { "epoch": 0.6287553396921997, "grad_norm": 1.40625, "learning_rate": 0.00017084984783261156, "loss": 2.5752, "step": 13431 }, { "epoch": 0.6288021534320323, "grad_norm": 1.125, "learning_rate": 0.00017084572823606702, "loss": 2.3617, "step": 13432 }, { "epoch": 0.6288489671718649, "grad_norm": 1.2734375, "learning_rate": 0.00017084160839811978, "loss": 2.4529, "step": 13433 }, { "epoch": 0.6288957809116976, "grad_norm": 2.90625, "learning_rate": 0.00017083748831878386, "loss": 2.4915, "step": 13434 }, { "epoch": 0.6289425946515302, "grad_norm": 1.578125, "learning_rate": 0.00017083336799807327, "loss": 2.7524, "step": 13435 }, { "epoch": 0.6289894083913629, "grad_norm": 1.25, "learning_rate": 0.00017082924743600208, "loss": 2.8226, "step": 13436 }, { "epoch": 0.6290362221311955, "grad_norm": 1.7421875, "learning_rate": 0.0001708251266325843, "loss": 3.1325, "step": 13437 }, { "epoch": 0.6290830358710281, "grad_norm": 1.59375, "learning_rate": 0.00017082100558783402, "loss": 2.5395, "step": 13438 }, { "epoch": 0.6291298496108608, "grad_norm": 1.5625, "learning_rate": 0.0001708168843017653, "loss": 2.8687, "step": 13439 }, { "epoch": 0.6291766633506934, "grad_norm": 1.4296875, "learning_rate": 0.00017081276277439207, "loss": 2.5859, "step": 13440 }, { "epoch": 0.6292234770905261, "grad_norm": 1.53125, "learning_rate": 0.00017080864100572847, "loss": 2.7139, "step": 13441 }, { "epoch": 0.6292702908303587, "grad_norm": 1.5390625, "learning_rate": 0.0001708045189957885, "loss": 2.7409, "step": 13442 }, { "epoch": 0.6293171045701913, "grad_norm": 1.578125, "learning_rate": 0.00017080039674458624, "loss": 2.7682, "step": 13443 }, { "epoch": 0.629363918310024, "grad_norm": 1.4921875, "learning_rate": 0.00017079627425213573, "loss": 2.5622, "step": 13444 }, { "epoch": 0.6294107320498566, "grad_norm": 1.28125, "learning_rate": 0.000170792151518451, "loss": 2.1758, "step": 13445 }, { "epoch": 0.6294575457896893, "grad_norm": 1.2265625, "learning_rate": 0.00017078802854354609, "loss": 2.4943, "step": 13446 }, { "epoch": 0.6295043595295219, "grad_norm": 1.3046875, "learning_rate": 0.00017078390532743512, "loss": 2.4754, "step": 13447 }, { "epoch": 0.6295511732693545, "grad_norm": 1.34375, "learning_rate": 0.00017077978187013203, "loss": 2.6998, "step": 13448 }, { "epoch": 0.6295979870091872, "grad_norm": 1.9609375, "learning_rate": 0.00017077565817165096, "loss": 2.9048, "step": 13449 }, { "epoch": 0.6296448007490199, "grad_norm": 3.015625, "learning_rate": 0.0001707715342320059, "loss": 2.6073, "step": 13450 }, { "epoch": 0.6296916144888525, "grad_norm": 1.53125, "learning_rate": 0.0001707674100512109, "loss": 3.1168, "step": 13451 }, { "epoch": 0.6297384282286851, "grad_norm": 1.9609375, "learning_rate": 0.00017076328562928007, "loss": 2.7536, "step": 13452 }, { "epoch": 0.6297852419685177, "grad_norm": 1.2578125, "learning_rate": 0.00017075916096622747, "loss": 2.5828, "step": 13453 }, { "epoch": 0.6298320557083504, "grad_norm": 1.1015625, "learning_rate": 0.00017075503606206707, "loss": 2.5559, "step": 13454 }, { "epoch": 0.629878869448183, "grad_norm": 1.1640625, "learning_rate": 0.00017075091091681298, "loss": 2.5901, "step": 13455 }, { "epoch": 0.6299256831880157, "grad_norm": 1.3515625, "learning_rate": 0.00017074678553047928, "loss": 2.95, "step": 13456 }, { "epoch": 0.6299724969278483, "grad_norm": 1.2265625, "learning_rate": 0.00017074265990307995, "loss": 3.111, "step": 13457 }, { "epoch": 0.6300193106676809, "grad_norm": 1.671875, "learning_rate": 0.00017073853403462916, "loss": 2.4441, "step": 13458 }, { "epoch": 0.6300661244075136, "grad_norm": 1.4609375, "learning_rate": 0.00017073440792514087, "loss": 2.529, "step": 13459 }, { "epoch": 0.6301129381473463, "grad_norm": 1.0078125, "learning_rate": 0.00017073028157462916, "loss": 3.8952, "step": 13460 }, { "epoch": 0.6301597518871789, "grad_norm": 1.2109375, "learning_rate": 0.0001707261549831081, "loss": 1.9263, "step": 13461 }, { "epoch": 0.6302065656270115, "grad_norm": 1.1953125, "learning_rate": 0.0001707220281505918, "loss": 2.5405, "step": 13462 }, { "epoch": 0.6302533793668442, "grad_norm": 1.78125, "learning_rate": 0.00017071790107709423, "loss": 2.9945, "step": 13463 }, { "epoch": 0.6303001931066768, "grad_norm": 1.3046875, "learning_rate": 0.00017071377376262953, "loss": 2.5758, "step": 13464 }, { "epoch": 0.6303470068465095, "grad_norm": 1.5078125, "learning_rate": 0.00017070964620721175, "loss": 2.77, "step": 13465 }, { "epoch": 0.6303938205863421, "grad_norm": 1.3359375, "learning_rate": 0.00017070551841085488, "loss": 2.7147, "step": 13466 }, { "epoch": 0.6304406343261747, "grad_norm": 2.25, "learning_rate": 0.00017070139037357306, "loss": 2.8839, "step": 13467 }, { "epoch": 0.6304874480660074, "grad_norm": 1.1875, "learning_rate": 0.00017069726209538037, "loss": 2.7925, "step": 13468 }, { "epoch": 0.63053426180584, "grad_norm": 1.6640625, "learning_rate": 0.00017069313357629082, "loss": 2.3879, "step": 13469 }, { "epoch": 0.6305810755456727, "grad_norm": 1.1015625, "learning_rate": 0.0001706890048163185, "loss": 2.4846, "step": 13470 }, { "epoch": 0.6306278892855053, "grad_norm": 1.6640625, "learning_rate": 0.0001706848758154775, "loss": 2.4474, "step": 13471 }, { "epoch": 0.6306747030253379, "grad_norm": 2.03125, "learning_rate": 0.0001706807465737819, "loss": 2.9744, "step": 13472 }, { "epoch": 0.6307215167651706, "grad_norm": 1.5546875, "learning_rate": 0.00017067661709124567, "loss": 2.5415, "step": 13473 }, { "epoch": 0.6307683305050032, "grad_norm": 1.1640625, "learning_rate": 0.00017067248736788302, "loss": 2.7098, "step": 13474 }, { "epoch": 0.6308151442448359, "grad_norm": 1.8515625, "learning_rate": 0.00017066835740370793, "loss": 2.2887, "step": 13475 }, { "epoch": 0.6308619579846685, "grad_norm": 1.7890625, "learning_rate": 0.00017066422719873447, "loss": 2.5918, "step": 13476 }, { "epoch": 0.6309087717245011, "grad_norm": 1.3828125, "learning_rate": 0.00017066009675297677, "loss": 2.5781, "step": 13477 }, { "epoch": 0.6309555854643338, "grad_norm": 1.46875, "learning_rate": 0.00017065596606644884, "loss": 2.7729, "step": 13478 }, { "epoch": 0.6310023992041665, "grad_norm": 1.9765625, "learning_rate": 0.00017065183513916482, "loss": 2.7451, "step": 13479 }, { "epoch": 0.6310492129439991, "grad_norm": 1.53125, "learning_rate": 0.00017064770397113877, "loss": 2.3935, "step": 13480 }, { "epoch": 0.6310960266838317, "grad_norm": 1.4609375, "learning_rate": 0.0001706435725623847, "loss": 2.9663, "step": 13481 }, { "epoch": 0.6311428404236643, "grad_norm": 1.375, "learning_rate": 0.0001706394409129168, "loss": 2.8269, "step": 13482 }, { "epoch": 0.631189654163497, "grad_norm": 1.2734375, "learning_rate": 0.00017063530902274908, "loss": 2.5862, "step": 13483 }, { "epoch": 0.6312364679033297, "grad_norm": 1.2890625, "learning_rate": 0.0001706311768918956, "loss": 2.8373, "step": 13484 }, { "epoch": 0.6312832816431623, "grad_norm": 1.1796875, "learning_rate": 0.00017062704452037048, "loss": 2.403, "step": 13485 }, { "epoch": 0.6313300953829949, "grad_norm": 2.703125, "learning_rate": 0.0001706229119081878, "loss": 2.0954, "step": 13486 }, { "epoch": 0.6313769091228275, "grad_norm": 1.25, "learning_rate": 0.00017061877905536157, "loss": 2.3706, "step": 13487 }, { "epoch": 0.6314237228626602, "grad_norm": 1.3046875, "learning_rate": 0.000170614645961906, "loss": 2.5158, "step": 13488 }, { "epoch": 0.6314705366024929, "grad_norm": 1.5, "learning_rate": 0.00017061051262783509, "loss": 2.8183, "step": 13489 }, { "epoch": 0.6315173503423255, "grad_norm": 1.3671875, "learning_rate": 0.00017060637905316292, "loss": 2.8465, "step": 13490 }, { "epoch": 0.6315641640821581, "grad_norm": 1.375, "learning_rate": 0.0001706022452379036, "loss": 2.8621, "step": 13491 }, { "epoch": 0.6316109778219907, "grad_norm": 1.0234375, "learning_rate": 0.00017059811118207122, "loss": 2.529, "step": 13492 }, { "epoch": 0.6316577915618234, "grad_norm": 2.53125, "learning_rate": 0.00017059397688567985, "loss": 2.9711, "step": 13493 }, { "epoch": 0.6317046053016561, "grad_norm": 1.828125, "learning_rate": 0.00017058984234874362, "loss": 2.9109, "step": 13494 }, { "epoch": 0.6317514190414887, "grad_norm": 1.5390625, "learning_rate": 0.00017058570757127653, "loss": 2.7766, "step": 13495 }, { "epoch": 0.6317982327813213, "grad_norm": 1.4453125, "learning_rate": 0.00017058157255329275, "loss": 3.0211, "step": 13496 }, { "epoch": 0.6318450465211539, "grad_norm": 1.6015625, "learning_rate": 0.00017057743729480634, "loss": 2.3511, "step": 13497 }, { "epoch": 0.6318918602609866, "grad_norm": 1.5859375, "learning_rate": 0.0001705733017958314, "loss": 2.6516, "step": 13498 }, { "epoch": 0.6319386740008193, "grad_norm": 1.421875, "learning_rate": 0.00017056916605638197, "loss": 2.9556, "step": 13499 }, { "epoch": 0.6319854877406519, "grad_norm": 1.5703125, "learning_rate": 0.00017056503007647223, "loss": 2.8481, "step": 13500 }, { "epoch": 0.6320323014804845, "grad_norm": 1.7109375, "learning_rate": 0.0001705608938561162, "loss": 2.527, "step": 13501 }, { "epoch": 0.6320791152203171, "grad_norm": 1.3359375, "learning_rate": 0.00017055675739532803, "loss": 2.8095, "step": 13502 }, { "epoch": 0.6321259289601499, "grad_norm": 1.1875, "learning_rate": 0.00017055262069412178, "loss": 2.5034, "step": 13503 }, { "epoch": 0.6321727426999825, "grad_norm": 1.4453125, "learning_rate": 0.00017054848375251154, "loss": 2.3962, "step": 13504 }, { "epoch": 0.6322195564398151, "grad_norm": 1.4296875, "learning_rate": 0.00017054434657051145, "loss": 2.8536, "step": 13505 }, { "epoch": 0.6322663701796477, "grad_norm": 1.4921875, "learning_rate": 0.00017054020914813553, "loss": 2.9518, "step": 13506 }, { "epoch": 0.6323131839194803, "grad_norm": 1.4453125, "learning_rate": 0.00017053607148539795, "loss": 3.0613, "step": 13507 }, { "epoch": 0.6323599976593131, "grad_norm": 1.4375, "learning_rate": 0.0001705319335823128, "loss": 2.6774, "step": 13508 }, { "epoch": 0.6324068113991457, "grad_norm": 1.1953125, "learning_rate": 0.00017052779543889413, "loss": 2.591, "step": 13509 }, { "epoch": 0.6324536251389783, "grad_norm": 1.6875, "learning_rate": 0.00017052365705515609, "loss": 2.8382, "step": 13510 }, { "epoch": 0.6325004388788109, "grad_norm": 1.34375, "learning_rate": 0.00017051951843111276, "loss": 2.591, "step": 13511 }, { "epoch": 0.6325472526186435, "grad_norm": 1.5234375, "learning_rate": 0.00017051537956677825, "loss": 2.7338, "step": 13512 }, { "epoch": 0.6325940663584763, "grad_norm": 1.15625, "learning_rate": 0.00017051124046216668, "loss": 2.4873, "step": 13513 }, { "epoch": 0.6326408800983089, "grad_norm": 1.3046875, "learning_rate": 0.0001705071011172921, "loss": 2.6326, "step": 13514 }, { "epoch": 0.6326876938381415, "grad_norm": 1.3671875, "learning_rate": 0.00017050296153216867, "loss": 2.674, "step": 13515 }, { "epoch": 0.6327345075779741, "grad_norm": 1.1328125, "learning_rate": 0.00017049882170681047, "loss": 2.5303, "step": 13516 }, { "epoch": 0.6327813213178067, "grad_norm": 1.53125, "learning_rate": 0.00017049468164123162, "loss": 2.8184, "step": 13517 }, { "epoch": 0.6328281350576395, "grad_norm": 1.8046875, "learning_rate": 0.0001704905413354462, "loss": 2.1721, "step": 13518 }, { "epoch": 0.6328749487974721, "grad_norm": 1.640625, "learning_rate": 0.0001704864007894683, "loss": 2.5263, "step": 13519 }, { "epoch": 0.6329217625373047, "grad_norm": 1.265625, "learning_rate": 0.0001704822600033121, "loss": 2.784, "step": 13520 }, { "epoch": 0.6329685762771373, "grad_norm": 1.359375, "learning_rate": 0.00017047811897699167, "loss": 2.6375, "step": 13521 }, { "epoch": 0.6330153900169699, "grad_norm": 1.5234375, "learning_rate": 0.0001704739777105212, "loss": 2.4426, "step": 13522 }, { "epoch": 0.6330622037568027, "grad_norm": 1.8125, "learning_rate": 0.00017046983620391465, "loss": 2.9033, "step": 13523 }, { "epoch": 0.6331090174966353, "grad_norm": 1.5546875, "learning_rate": 0.0001704656944571862, "loss": 2.8155, "step": 13524 }, { "epoch": 0.6331558312364679, "grad_norm": 1.546875, "learning_rate": 0.00017046155247034997, "loss": 2.8831, "step": 13525 }, { "epoch": 0.6332026449763005, "grad_norm": 1.4609375, "learning_rate": 0.00017045741024342007, "loss": 2.6192, "step": 13526 }, { "epoch": 0.6332494587161331, "grad_norm": 1.0625, "learning_rate": 0.00017045326777641063, "loss": 2.5341, "step": 13527 }, { "epoch": 0.6332962724559659, "grad_norm": 1.8984375, "learning_rate": 0.00017044912506933575, "loss": 3.0307, "step": 13528 }, { "epoch": 0.6333430861957985, "grad_norm": 1.515625, "learning_rate": 0.00017044498212220955, "loss": 2.6403, "step": 13529 }, { "epoch": 0.6333898999356311, "grad_norm": 1.5703125, "learning_rate": 0.00017044083893504614, "loss": 3.103, "step": 13530 }, { "epoch": 0.6334367136754637, "grad_norm": 1.3828125, "learning_rate": 0.00017043669550785966, "loss": 2.8875, "step": 13531 }, { "epoch": 0.6334835274152963, "grad_norm": 1.390625, "learning_rate": 0.0001704325518406642, "loss": 2.6075, "step": 13532 }, { "epoch": 0.6335303411551291, "grad_norm": 1.6640625, "learning_rate": 0.0001704284079334739, "loss": 2.8655, "step": 13533 }, { "epoch": 0.6335771548949617, "grad_norm": 1.484375, "learning_rate": 0.00017042426378630285, "loss": 3.0176, "step": 13534 }, { "epoch": 0.6336239686347943, "grad_norm": 1.6953125, "learning_rate": 0.0001704201193991652, "loss": 2.2843, "step": 13535 }, { "epoch": 0.6336707823746269, "grad_norm": 1.1171875, "learning_rate": 0.00017041597477207508, "loss": 4.0751, "step": 13536 }, { "epoch": 0.6337175961144595, "grad_norm": 1.5234375, "learning_rate": 0.00017041182990504657, "loss": 2.9123, "step": 13537 }, { "epoch": 0.6337644098542923, "grad_norm": 1.1796875, "learning_rate": 0.00017040768479809385, "loss": 2.6374, "step": 13538 }, { "epoch": 0.6338112235941249, "grad_norm": 1.2890625, "learning_rate": 0.000170403539451231, "loss": 2.5361, "step": 13539 }, { "epoch": 0.6338580373339575, "grad_norm": 1.2734375, "learning_rate": 0.00017039939386447216, "loss": 2.3057, "step": 13540 }, { "epoch": 0.6339048510737901, "grad_norm": 1.5546875, "learning_rate": 0.00017039524803783143, "loss": 2.6667, "step": 13541 }, { "epoch": 0.6339516648136228, "grad_norm": 1.296875, "learning_rate": 0.000170391101971323, "loss": 2.5573, "step": 13542 }, { "epoch": 0.6339984785534555, "grad_norm": 1.1796875, "learning_rate": 0.00017038695566496092, "loss": 2.5809, "step": 13543 }, { "epoch": 0.6340452922932881, "grad_norm": 1.171875, "learning_rate": 0.0001703828091187594, "loss": 2.3804, "step": 13544 }, { "epoch": 0.6340921060331207, "grad_norm": 2.171875, "learning_rate": 0.0001703786623327325, "loss": 2.2386, "step": 13545 }, { "epoch": 0.6341389197729533, "grad_norm": 1.4453125, "learning_rate": 0.0001703745153068944, "loss": 2.672, "step": 13546 }, { "epoch": 0.634185733512786, "grad_norm": 1.5234375, "learning_rate": 0.00017037036804125915, "loss": 2.7491, "step": 13547 }, { "epoch": 0.6342325472526187, "grad_norm": 1.421875, "learning_rate": 0.000170366220535841, "loss": 2.5832, "step": 13548 }, { "epoch": 0.6342793609924513, "grad_norm": 1.6171875, "learning_rate": 0.000170362072790654, "loss": 3.207, "step": 13549 }, { "epoch": 0.6343261747322839, "grad_norm": 1.1875, "learning_rate": 0.00017035792480571228, "loss": 2.59, "step": 13550 }, { "epoch": 0.6343729884721165, "grad_norm": 1.3125, "learning_rate": 0.00017035377658103005, "loss": 2.7483, "step": 13551 }, { "epoch": 0.6344198022119492, "grad_norm": 1.6875, "learning_rate": 0.00017034962811662133, "loss": 3.3869, "step": 13552 }, { "epoch": 0.6344666159517819, "grad_norm": 1.703125, "learning_rate": 0.00017034547941250036, "loss": 3.0462, "step": 13553 }, { "epoch": 0.6345134296916145, "grad_norm": 1.546875, "learning_rate": 0.00017034133046868124, "loss": 2.8407, "step": 13554 }, { "epoch": 0.6345602434314471, "grad_norm": 1.46875, "learning_rate": 0.0001703371812851781, "loss": 2.8328, "step": 13555 }, { "epoch": 0.6346070571712797, "grad_norm": 1.390625, "learning_rate": 0.00017033303186200506, "loss": 2.666, "step": 13556 }, { "epoch": 0.6346538709111124, "grad_norm": 1.8828125, "learning_rate": 0.00017032888219917629, "loss": 2.3299, "step": 13557 }, { "epoch": 0.6347006846509451, "grad_norm": 1.1875, "learning_rate": 0.0001703247322967059, "loss": 2.685, "step": 13558 }, { "epoch": 0.6347474983907777, "grad_norm": 1.46875, "learning_rate": 0.00017032058215460806, "loss": 2.6603, "step": 13559 }, { "epoch": 0.6347943121306103, "grad_norm": 1.375, "learning_rate": 0.00017031643177289692, "loss": 2.8925, "step": 13560 }, { "epoch": 0.634841125870443, "grad_norm": 1.7578125, "learning_rate": 0.0001703122811515866, "loss": 2.0487, "step": 13561 }, { "epoch": 0.6348879396102756, "grad_norm": 1.46875, "learning_rate": 0.00017030813029069122, "loss": 2.2898, "step": 13562 }, { "epoch": 0.6349347533501083, "grad_norm": 1.5625, "learning_rate": 0.00017030397919022498, "loss": 2.5042, "step": 13563 }, { "epoch": 0.6349815670899409, "grad_norm": 1.328125, "learning_rate": 0.00017029982785020197, "loss": 2.6924, "step": 13564 }, { "epoch": 0.6350283808297735, "grad_norm": 1.1796875, "learning_rate": 0.00017029567627063638, "loss": 2.4259, "step": 13565 }, { "epoch": 0.6350751945696061, "grad_norm": 1.2734375, "learning_rate": 0.00017029152445154232, "loss": 2.5477, "step": 13566 }, { "epoch": 0.6351220083094388, "grad_norm": 2.234375, "learning_rate": 0.00017028737239293399, "loss": 3.1004, "step": 13567 }, { "epoch": 0.6351688220492715, "grad_norm": 1.3203125, "learning_rate": 0.00017028322009482544, "loss": 2.9567, "step": 13568 }, { "epoch": 0.6352156357891041, "grad_norm": 1.8203125, "learning_rate": 0.00017027906755723093, "loss": 2.9185, "step": 13569 }, { "epoch": 0.6352624495289367, "grad_norm": 10.375, "learning_rate": 0.00017027491478016456, "loss": 3.4837, "step": 13570 }, { "epoch": 0.6353092632687694, "grad_norm": 1.4609375, "learning_rate": 0.00017027076176364046, "loss": 2.7086, "step": 13571 }, { "epoch": 0.635356077008602, "grad_norm": 1.2890625, "learning_rate": 0.0001702666085076728, "loss": 2.4448, "step": 13572 }, { "epoch": 0.6354028907484347, "grad_norm": 1.765625, "learning_rate": 0.00017026245501227573, "loss": 1.9713, "step": 13573 }, { "epoch": 0.6354497044882673, "grad_norm": 1.28125, "learning_rate": 0.0001702583012774634, "loss": 2.7262, "step": 13574 }, { "epoch": 0.6354965182280999, "grad_norm": 1.2734375, "learning_rate": 0.00017025414730325, "loss": 2.7312, "step": 13575 }, { "epoch": 0.6355433319679326, "grad_norm": 1.40625, "learning_rate": 0.00017024999308964963, "loss": 3.0286, "step": 13576 }, { "epoch": 0.6355901457077652, "grad_norm": 1.6640625, "learning_rate": 0.00017024583863667648, "loss": 2.7209, "step": 13577 }, { "epoch": 0.6356369594475979, "grad_norm": 1.8828125, "learning_rate": 0.00017024168394434467, "loss": 2.567, "step": 13578 }, { "epoch": 0.6356837731874305, "grad_norm": 1.484375, "learning_rate": 0.00017023752901266842, "loss": 2.6956, "step": 13579 }, { "epoch": 0.6357305869272631, "grad_norm": 1.765625, "learning_rate": 0.00017023337384166186, "loss": 3.1698, "step": 13580 }, { "epoch": 0.6357774006670958, "grad_norm": 1.2109375, "learning_rate": 0.0001702292184313391, "loss": 3.0127, "step": 13581 }, { "epoch": 0.6358242144069284, "grad_norm": 1.3671875, "learning_rate": 0.00017022506278171435, "loss": 2.4884, "step": 13582 }, { "epoch": 0.6358710281467611, "grad_norm": 2.21875, "learning_rate": 0.00017022090689280175, "loss": 2.7258, "step": 13583 }, { "epoch": 0.6359178418865937, "grad_norm": 1.2421875, "learning_rate": 0.00017021675076461549, "loss": 2.3951, "step": 13584 }, { "epoch": 0.6359646556264263, "grad_norm": 1.2265625, "learning_rate": 0.00017021259439716968, "loss": 2.6255, "step": 13585 }, { "epoch": 0.636011469366259, "grad_norm": 1.2890625, "learning_rate": 0.00017020843779047852, "loss": 2.6498, "step": 13586 }, { "epoch": 0.6360582831060917, "grad_norm": 1.296875, "learning_rate": 0.0001702042809445562, "loss": 2.6864, "step": 13587 }, { "epoch": 0.6361050968459243, "grad_norm": 1.234375, "learning_rate": 0.00017020012385941683, "loss": 2.739, "step": 13588 }, { "epoch": 0.6361519105857569, "grad_norm": 2.046875, "learning_rate": 0.00017019596653507457, "loss": 2.5887, "step": 13589 }, { "epoch": 0.6361987243255895, "grad_norm": 2.046875, "learning_rate": 0.00017019180897154365, "loss": 2.6552, "step": 13590 }, { "epoch": 0.6362455380654222, "grad_norm": 1.6171875, "learning_rate": 0.0001701876511688382, "loss": 2.7633, "step": 13591 }, { "epoch": 0.6362923518052549, "grad_norm": 1.515625, "learning_rate": 0.00017018349312697236, "loss": 2.6927, "step": 13592 }, { "epoch": 0.6363391655450875, "grad_norm": 1.125, "learning_rate": 0.00017017933484596032, "loss": 2.7739, "step": 13593 }, { "epoch": 0.6363859792849201, "grad_norm": 1.328125, "learning_rate": 0.00017017517632581626, "loss": 2.7009, "step": 13594 }, { "epoch": 0.6364327930247528, "grad_norm": 1.3828125, "learning_rate": 0.00017017101756655433, "loss": 2.5336, "step": 13595 }, { "epoch": 0.6364796067645854, "grad_norm": 1.4453125, "learning_rate": 0.00017016685856818874, "loss": 2.4529, "step": 13596 }, { "epoch": 0.6365264205044181, "grad_norm": 1.5234375, "learning_rate": 0.0001701626993307336, "loss": 2.9695, "step": 13597 }, { "epoch": 0.6365732342442507, "grad_norm": 1.4453125, "learning_rate": 0.00017015853985420315, "loss": 2.858, "step": 13598 }, { "epoch": 0.6366200479840833, "grad_norm": 1.3984375, "learning_rate": 0.00017015438013861156, "loss": 2.8509, "step": 13599 }, { "epoch": 0.636666861723916, "grad_norm": 1.3515625, "learning_rate": 0.00017015022018397293, "loss": 2.9086, "step": 13600 }, { "epoch": 0.6367136754637486, "grad_norm": 1.3203125, "learning_rate": 0.0001701460599903015, "loss": 3.1879, "step": 13601 }, { "epoch": 0.6367604892035813, "grad_norm": 1.421875, "learning_rate": 0.0001701418995576114, "loss": 2.8829, "step": 13602 }, { "epoch": 0.6368073029434139, "grad_norm": 4.125, "learning_rate": 0.00017013773888591683, "loss": 2.3585, "step": 13603 }, { "epoch": 0.6368541166832465, "grad_norm": 1.3046875, "learning_rate": 0.00017013357797523198, "loss": 3.0963, "step": 13604 }, { "epoch": 0.6369009304230792, "grad_norm": 1.7109375, "learning_rate": 0.00017012941682557103, "loss": 2.8948, "step": 13605 }, { "epoch": 0.6369477441629118, "grad_norm": 1.6171875, "learning_rate": 0.00017012525543694815, "loss": 2.5851, "step": 13606 }, { "epoch": 0.6369945579027445, "grad_norm": 1.5546875, "learning_rate": 0.0001701210938093775, "loss": 2.669, "step": 13607 }, { "epoch": 0.6370413716425771, "grad_norm": 1.7109375, "learning_rate": 0.0001701169319428733, "loss": 2.6517, "step": 13608 }, { "epoch": 0.6370881853824097, "grad_norm": 1.2890625, "learning_rate": 0.0001701127698374497, "loss": 2.7683, "step": 13609 }, { "epoch": 0.6371349991222424, "grad_norm": 1.3828125, "learning_rate": 0.00017010860749312086, "loss": 2.5969, "step": 13610 }, { "epoch": 0.637181812862075, "grad_norm": 1.3828125, "learning_rate": 0.00017010444490990102, "loss": 2.4364, "step": 13611 }, { "epoch": 0.6372286266019077, "grad_norm": 1.1640625, "learning_rate": 0.0001701002820878043, "loss": 2.1125, "step": 13612 }, { "epoch": 0.6372754403417403, "grad_norm": 1.453125, "learning_rate": 0.00017009611902684495, "loss": 3.0414, "step": 13613 }, { "epoch": 0.637322254081573, "grad_norm": 1.6328125, "learning_rate": 0.00017009195572703712, "loss": 2.7214, "step": 13614 }, { "epoch": 0.6373690678214056, "grad_norm": 1.765625, "learning_rate": 0.000170087792188395, "loss": 2.7374, "step": 13615 }, { "epoch": 0.6374158815612382, "grad_norm": 1.375, "learning_rate": 0.0001700836284109328, "loss": 2.3675, "step": 13616 }, { "epoch": 0.6374626953010709, "grad_norm": 2.828125, "learning_rate": 0.00017007946439466463, "loss": 2.9174, "step": 13617 }, { "epoch": 0.6375095090409035, "grad_norm": 2.234375, "learning_rate": 0.00017007530013960482, "loss": 2.9064, "step": 13618 }, { "epoch": 0.6375563227807362, "grad_norm": 1.2734375, "learning_rate": 0.00017007113564576742, "loss": 2.759, "step": 13619 }, { "epoch": 0.6376031365205688, "grad_norm": 1.5234375, "learning_rate": 0.00017006697091316667, "loss": 2.5216, "step": 13620 }, { "epoch": 0.6376499502604014, "grad_norm": 1.4375, "learning_rate": 0.0001700628059418168, "loss": 2.6692, "step": 13621 }, { "epoch": 0.6376967640002341, "grad_norm": 1.3671875, "learning_rate": 0.00017005864073173194, "loss": 2.6101, "step": 13622 }, { "epoch": 0.6377435777400667, "grad_norm": 1.5546875, "learning_rate": 0.00017005447528292632, "loss": 2.7046, "step": 13623 }, { "epoch": 0.6377903914798994, "grad_norm": 1.4921875, "learning_rate": 0.00017005030959541415, "loss": 2.3515, "step": 13624 }, { "epoch": 0.637837205219732, "grad_norm": 1.421875, "learning_rate": 0.00017004614366920956, "loss": 2.4272, "step": 13625 }, { "epoch": 0.6378840189595646, "grad_norm": 1.7890625, "learning_rate": 0.0001700419775043268, "loss": 2.5496, "step": 13626 }, { "epoch": 0.6379308326993973, "grad_norm": 1.4921875, "learning_rate": 0.00017003781110078007, "loss": 2.9486, "step": 13627 }, { "epoch": 0.6379776464392299, "grad_norm": 1.7734375, "learning_rate": 0.00017003364445858355, "loss": 3.0232, "step": 13628 }, { "epoch": 0.6380244601790626, "grad_norm": 1.3359375, "learning_rate": 0.00017002947757775141, "loss": 2.7015, "step": 13629 }, { "epoch": 0.6380712739188952, "grad_norm": 1.21875, "learning_rate": 0.00017002531045829788, "loss": 2.7895, "step": 13630 }, { "epoch": 0.6381180876587278, "grad_norm": 1.3359375, "learning_rate": 0.00017002114310023714, "loss": 2.5806, "step": 13631 }, { "epoch": 0.6381649013985605, "grad_norm": 1.390625, "learning_rate": 0.00017001697550358342, "loss": 2.8047, "step": 13632 }, { "epoch": 0.6382117151383931, "grad_norm": 1.46875, "learning_rate": 0.00017001280766835089, "loss": 2.6691, "step": 13633 }, { "epoch": 0.6382585288782258, "grad_norm": 1.75, "learning_rate": 0.00017000863959455375, "loss": 2.8071, "step": 13634 }, { "epoch": 0.6383053426180584, "grad_norm": 0.9765625, "learning_rate": 0.00017000447128220623, "loss": 2.5755, "step": 13635 }, { "epoch": 0.638352156357891, "grad_norm": 1.1171875, "learning_rate": 0.00017000030273132255, "loss": 2.4426, "step": 13636 }, { "epoch": 0.6383989700977237, "grad_norm": 1.2578125, "learning_rate": 0.00016999613394191686, "loss": 2.4422, "step": 13637 }, { "epoch": 0.6384457838375563, "grad_norm": 1.375, "learning_rate": 0.0001699919649140034, "loss": 2.4411, "step": 13638 }, { "epoch": 0.638492597577389, "grad_norm": 1.703125, "learning_rate": 0.00016998779564759636, "loss": 2.6348, "step": 13639 }, { "epoch": 0.6385394113172216, "grad_norm": 1.390625, "learning_rate": 0.00016998362614270996, "loss": 2.7585, "step": 13640 }, { "epoch": 0.6385862250570542, "grad_norm": 1.3828125, "learning_rate": 0.00016997945639935836, "loss": 2.4137, "step": 13641 }, { "epoch": 0.6386330387968869, "grad_norm": 1.9453125, "learning_rate": 0.00016997528641755585, "loss": 2.4407, "step": 13642 }, { "epoch": 0.6386798525367196, "grad_norm": 1.71875, "learning_rate": 0.00016997111619731657, "loss": 2.5873, "step": 13643 }, { "epoch": 0.6387266662765522, "grad_norm": 2.09375, "learning_rate": 0.00016996694573865478, "loss": 2.8405, "step": 13644 }, { "epoch": 0.6387734800163848, "grad_norm": 1.2578125, "learning_rate": 0.00016996277504158463, "loss": 2.7603, "step": 13645 }, { "epoch": 0.6388202937562174, "grad_norm": 1.3359375, "learning_rate": 0.00016995860410612044, "loss": 2.5878, "step": 13646 }, { "epoch": 0.6388671074960501, "grad_norm": 1.3359375, "learning_rate": 0.00016995443293227628, "loss": 2.1925, "step": 13647 }, { "epoch": 0.6389139212358828, "grad_norm": 1.0546875, "learning_rate": 0.00016995026152006646, "loss": 2.6908, "step": 13648 }, { "epoch": 0.6389607349757154, "grad_norm": 1.46875, "learning_rate": 0.00016994608986950515, "loss": 3.265, "step": 13649 }, { "epoch": 0.639007548715548, "grad_norm": 1.3125, "learning_rate": 0.0001699419179806066, "loss": 3.0119, "step": 13650 }, { "epoch": 0.6390543624553806, "grad_norm": 0.94140625, "learning_rate": 0.00016993774585338502, "loss": 2.0578, "step": 13651 }, { "epoch": 0.6391011761952133, "grad_norm": 1.59375, "learning_rate": 0.0001699335734878546, "loss": 4.6432, "step": 13652 }, { "epoch": 0.639147989935046, "grad_norm": 1.40625, "learning_rate": 0.00016992940088402956, "loss": 2.7892, "step": 13653 }, { "epoch": 0.6391948036748786, "grad_norm": 1.2109375, "learning_rate": 0.00016992522804192413, "loss": 2.8245, "step": 13654 }, { "epoch": 0.6392416174147112, "grad_norm": 1.28125, "learning_rate": 0.00016992105496155252, "loss": 2.749, "step": 13655 }, { "epoch": 0.6392884311545438, "grad_norm": 1.4296875, "learning_rate": 0.00016991688164292898, "loss": 2.9105, "step": 13656 }, { "epoch": 0.6393352448943765, "grad_norm": 1.5859375, "learning_rate": 0.0001699127080860677, "loss": 2.6463, "step": 13657 }, { "epoch": 0.6393820586342092, "grad_norm": 1.203125, "learning_rate": 0.00016990853429098293, "loss": 2.5607, "step": 13658 }, { "epoch": 0.6394288723740418, "grad_norm": 1.8203125, "learning_rate": 0.0001699043602576888, "loss": 2.7098, "step": 13659 }, { "epoch": 0.6394756861138744, "grad_norm": 1.2578125, "learning_rate": 0.00016990018598619968, "loss": 2.7952, "step": 13660 }, { "epoch": 0.639522499853707, "grad_norm": 1.421875, "learning_rate": 0.0001698960114765297, "loss": 2.9017, "step": 13661 }, { "epoch": 0.6395693135935397, "grad_norm": 1.484375, "learning_rate": 0.00016989183672869306, "loss": 2.8109, "step": 13662 }, { "epoch": 0.6396161273333724, "grad_norm": 1.15625, "learning_rate": 0.00016988766174270406, "loss": 2.852, "step": 13663 }, { "epoch": 0.639662941073205, "grad_norm": 2.046875, "learning_rate": 0.00016988348651857685, "loss": 2.6435, "step": 13664 }, { "epoch": 0.6397097548130376, "grad_norm": 1.5625, "learning_rate": 0.00016987931105632575, "loss": 2.4891, "step": 13665 }, { "epoch": 0.6397565685528702, "grad_norm": 1.703125, "learning_rate": 0.00016987513535596493, "loss": 2.7429, "step": 13666 }, { "epoch": 0.639803382292703, "grad_norm": 1.265625, "learning_rate": 0.00016987095941750863, "loss": 2.6187, "step": 13667 }, { "epoch": 0.6398501960325356, "grad_norm": 1.125, "learning_rate": 0.00016986678324097104, "loss": 2.2741, "step": 13668 }, { "epoch": 0.6398970097723682, "grad_norm": 1.8125, "learning_rate": 0.00016986260682636645, "loss": 2.8971, "step": 13669 }, { "epoch": 0.6399438235122008, "grad_norm": 1.4921875, "learning_rate": 0.00016985843017370907, "loss": 2.8807, "step": 13670 }, { "epoch": 0.6399906372520334, "grad_norm": 1.3671875, "learning_rate": 0.00016985425328301312, "loss": 2.6686, "step": 13671 }, { "epoch": 0.6400374509918662, "grad_norm": 1.5078125, "learning_rate": 0.00016985007615429284, "loss": 2.3329, "step": 13672 }, { "epoch": 0.6400842647316988, "grad_norm": 1.3203125, "learning_rate": 0.00016984589878756245, "loss": 2.1819, "step": 13673 }, { "epoch": 0.6401310784715314, "grad_norm": 1.2109375, "learning_rate": 0.00016984172118283622, "loss": 2.8528, "step": 13674 }, { "epoch": 0.640177892211364, "grad_norm": 1.171875, "learning_rate": 0.00016983754334012836, "loss": 2.8791, "step": 13675 }, { "epoch": 0.6402247059511966, "grad_norm": 1.3828125, "learning_rate": 0.0001698333652594531, "loss": 2.8402, "step": 13676 }, { "epoch": 0.6402715196910294, "grad_norm": 1.2109375, "learning_rate": 0.0001698291869408247, "loss": 2.9074, "step": 13677 }, { "epoch": 0.640318333430862, "grad_norm": 1.1953125, "learning_rate": 0.00016982500838425734, "loss": 2.7849, "step": 13678 }, { "epoch": 0.6403651471706946, "grad_norm": 1.21875, "learning_rate": 0.00016982082958976532, "loss": 2.8275, "step": 13679 }, { "epoch": 0.6404119609105272, "grad_norm": 1.2890625, "learning_rate": 0.00016981665055736285, "loss": 2.8366, "step": 13680 }, { "epoch": 0.6404587746503598, "grad_norm": 1.765625, "learning_rate": 0.0001698124712870642, "loss": 2.6296, "step": 13681 }, { "epoch": 0.6405055883901926, "grad_norm": 1.9453125, "learning_rate": 0.00016980829177888358, "loss": 2.7966, "step": 13682 }, { "epoch": 0.6405524021300252, "grad_norm": 1.7734375, "learning_rate": 0.00016980411203283523, "loss": 2.9003, "step": 13683 }, { "epoch": 0.6405992158698578, "grad_norm": 1.203125, "learning_rate": 0.00016979993204893344, "loss": 2.1975, "step": 13684 }, { "epoch": 0.6406460296096904, "grad_norm": 1.640625, "learning_rate": 0.0001697957518271924, "loss": 3.2113, "step": 13685 }, { "epoch": 0.640692843349523, "grad_norm": 1.390625, "learning_rate": 0.00016979157136762636, "loss": 2.583, "step": 13686 }, { "epoch": 0.6407396570893558, "grad_norm": 1.3359375, "learning_rate": 0.0001697873906702496, "loss": 2.9154, "step": 13687 }, { "epoch": 0.6407864708291884, "grad_norm": 1.3671875, "learning_rate": 0.00016978320973507627, "loss": 2.6648, "step": 13688 }, { "epoch": 0.640833284569021, "grad_norm": 1.5859375, "learning_rate": 0.00016977902856212077, "loss": 2.8557, "step": 13689 }, { "epoch": 0.6408800983088536, "grad_norm": 1.5859375, "learning_rate": 0.0001697748471513972, "loss": 2.8553, "step": 13690 }, { "epoch": 0.6409269120486862, "grad_norm": 1.7734375, "learning_rate": 0.0001697706655029199, "loss": 3.8355, "step": 13691 }, { "epoch": 0.640973725788519, "grad_norm": 1.7421875, "learning_rate": 0.0001697664836167031, "loss": 3.0037, "step": 13692 }, { "epoch": 0.6410205395283516, "grad_norm": 1.203125, "learning_rate": 0.000169762301492761, "loss": 2.6132, "step": 13693 }, { "epoch": 0.6410673532681842, "grad_norm": 1.359375, "learning_rate": 0.00016975811913110794, "loss": 2.6882, "step": 13694 }, { "epoch": 0.6411141670080168, "grad_norm": 1.75, "learning_rate": 0.0001697539365317581, "loss": 2.6088, "step": 13695 }, { "epoch": 0.6411609807478494, "grad_norm": 1.3984375, "learning_rate": 0.00016974975369472576, "loss": 2.4838, "step": 13696 }, { "epoch": 0.6412077944876822, "grad_norm": 1.3046875, "learning_rate": 0.00016974557062002514, "loss": 2.7787, "step": 13697 }, { "epoch": 0.6412546082275148, "grad_norm": 0.98828125, "learning_rate": 0.00016974138730767055, "loss": 3.3737, "step": 13698 }, { "epoch": 0.6413014219673474, "grad_norm": 1.3515625, "learning_rate": 0.0001697372037576762, "loss": 2.6102, "step": 13699 }, { "epoch": 0.64134823570718, "grad_norm": 1.375, "learning_rate": 0.00016973301997005638, "loss": 2.538, "step": 13700 }, { "epoch": 0.6413950494470126, "grad_norm": 1.6953125, "learning_rate": 0.00016972883594482528, "loss": 2.7516, "step": 13701 }, { "epoch": 0.6414418631868454, "grad_norm": 1.28125, "learning_rate": 0.00016972465168199724, "loss": 2.3359, "step": 13702 }, { "epoch": 0.641488676926678, "grad_norm": 1.3671875, "learning_rate": 0.00016972046718158646, "loss": 2.5076, "step": 13703 }, { "epoch": 0.6415354906665106, "grad_norm": 1.25, "learning_rate": 0.00016971628244360725, "loss": 2.5125, "step": 13704 }, { "epoch": 0.6415823044063432, "grad_norm": 1.3984375, "learning_rate": 0.0001697120974680738, "loss": 2.537, "step": 13705 }, { "epoch": 0.6416291181461758, "grad_norm": 1.875, "learning_rate": 0.00016970791225500042, "loss": 2.7758, "step": 13706 }, { "epoch": 0.6416759318860086, "grad_norm": 1.609375, "learning_rate": 0.00016970372680440136, "loss": 2.6628, "step": 13707 }, { "epoch": 0.6417227456258412, "grad_norm": 2.9375, "learning_rate": 0.0001696995411162909, "loss": 2.8065, "step": 13708 }, { "epoch": 0.6417695593656738, "grad_norm": 1.1640625, "learning_rate": 0.00016969535519068324, "loss": 2.7524, "step": 13709 }, { "epoch": 0.6418163731055064, "grad_norm": 1.546875, "learning_rate": 0.00016969116902759273, "loss": 2.8257, "step": 13710 }, { "epoch": 0.6418631868453392, "grad_norm": 1.2265625, "learning_rate": 0.00016968698262703356, "loss": 2.532, "step": 13711 }, { "epoch": 0.6419100005851718, "grad_norm": 1.6328125, "learning_rate": 0.00016968279598902002, "loss": 2.9477, "step": 13712 }, { "epoch": 0.6419568143250044, "grad_norm": 1.2421875, "learning_rate": 0.0001696786091135664, "loss": 2.6634, "step": 13713 }, { "epoch": 0.642003628064837, "grad_norm": 1.828125, "learning_rate": 0.00016967442200068695, "loss": 2.7774, "step": 13714 }, { "epoch": 0.6420504418046696, "grad_norm": 1.453125, "learning_rate": 0.00016967023465039592, "loss": 2.9008, "step": 13715 }, { "epoch": 0.6420972555445024, "grad_norm": 1.1875, "learning_rate": 0.00016966604706270758, "loss": 2.3225, "step": 13716 }, { "epoch": 0.642144069284335, "grad_norm": 1.421875, "learning_rate": 0.00016966185923763624, "loss": 2.684, "step": 13717 }, { "epoch": 0.6421908830241676, "grad_norm": 1.4140625, "learning_rate": 0.00016965767117519613, "loss": 2.6125, "step": 13718 }, { "epoch": 0.6422376967640002, "grad_norm": 1.3671875, "learning_rate": 0.00016965348287540155, "loss": 2.8598, "step": 13719 }, { "epoch": 0.6422845105038328, "grad_norm": 1.265625, "learning_rate": 0.00016964929433826672, "loss": 2.4798, "step": 13720 }, { "epoch": 0.6423313242436656, "grad_norm": 1.6953125, "learning_rate": 0.00016964510556380597, "loss": 2.5839, "step": 13721 }, { "epoch": 0.6423781379834982, "grad_norm": 1.5546875, "learning_rate": 0.00016964091655203352, "loss": 2.8843, "step": 13722 }, { "epoch": 0.6424249517233308, "grad_norm": 2.3125, "learning_rate": 0.0001696367273029637, "loss": 2.3427, "step": 13723 }, { "epoch": 0.6424717654631634, "grad_norm": 1.2421875, "learning_rate": 0.00016963253781661074, "loss": 2.197, "step": 13724 }, { "epoch": 0.642518579202996, "grad_norm": 2.0, "learning_rate": 0.00016962834809298896, "loss": 2.8619, "step": 13725 }, { "epoch": 0.6425653929428288, "grad_norm": 1.34375, "learning_rate": 0.00016962415813211258, "loss": 2.781, "step": 13726 }, { "epoch": 0.6426122066826614, "grad_norm": 1.4375, "learning_rate": 0.0001696199679339959, "loss": 2.7841, "step": 13727 }, { "epoch": 0.642659020422494, "grad_norm": 1.9296875, "learning_rate": 0.00016961577749865324, "loss": 2.7698, "step": 13728 }, { "epoch": 0.6427058341623266, "grad_norm": 1.4609375, "learning_rate": 0.00016961158682609882, "loss": 2.8866, "step": 13729 }, { "epoch": 0.6427526479021592, "grad_norm": 2.34375, "learning_rate": 0.00016960739591634694, "loss": 2.7098, "step": 13730 }, { "epoch": 0.642799461641992, "grad_norm": 1.2734375, "learning_rate": 0.00016960320476941187, "loss": 2.246, "step": 13731 }, { "epoch": 0.6428462753818246, "grad_norm": 1.390625, "learning_rate": 0.00016959901338530796, "loss": 2.7993, "step": 13732 }, { "epoch": 0.6428930891216572, "grad_norm": 1.609375, "learning_rate": 0.00016959482176404938, "loss": 2.8753, "step": 13733 }, { "epoch": 0.6429399028614898, "grad_norm": 1.5, "learning_rate": 0.00016959062990565046, "loss": 2.8947, "step": 13734 }, { "epoch": 0.6429867166013224, "grad_norm": 1.8515625, "learning_rate": 0.0001695864378101255, "loss": 2.3309, "step": 13735 }, { "epoch": 0.6430335303411552, "grad_norm": 1.4375, "learning_rate": 0.0001695822454774888, "loss": 2.6802, "step": 13736 }, { "epoch": 0.6430803440809878, "grad_norm": 1.3125, "learning_rate": 0.0001695780529077546, "loss": 2.8384, "step": 13737 }, { "epoch": 0.6431271578208204, "grad_norm": 1.5390625, "learning_rate": 0.0001695738601009372, "loss": 2.7578, "step": 13738 }, { "epoch": 0.643173971560653, "grad_norm": 1.2421875, "learning_rate": 0.00016956966705705093, "loss": 2.6429, "step": 13739 }, { "epoch": 0.6432207853004857, "grad_norm": 1.7890625, "learning_rate": 0.00016956547377611, "loss": 2.8714, "step": 13740 }, { "epoch": 0.6432675990403184, "grad_norm": 1.390625, "learning_rate": 0.00016956128025812875, "loss": 2.0282, "step": 13741 }, { "epoch": 0.643314412780151, "grad_norm": 1.4921875, "learning_rate": 0.00016955708650312148, "loss": 2.5048, "step": 13742 }, { "epoch": 0.6433612265199836, "grad_norm": 1.5703125, "learning_rate": 0.00016955289251110246, "loss": 2.5781, "step": 13743 }, { "epoch": 0.6434080402598162, "grad_norm": 1.390625, "learning_rate": 0.00016954869828208596, "loss": 2.5638, "step": 13744 }, { "epoch": 0.6434548539996489, "grad_norm": 1.5234375, "learning_rate": 0.00016954450381608628, "loss": 2.7819, "step": 13745 }, { "epoch": 0.6435016677394816, "grad_norm": 1.34375, "learning_rate": 0.00016954030911311776, "loss": 2.6572, "step": 13746 }, { "epoch": 0.6435484814793142, "grad_norm": 1.6640625, "learning_rate": 0.0001695361141731946, "loss": 2.4616, "step": 13747 }, { "epoch": 0.6435952952191468, "grad_norm": 1.40625, "learning_rate": 0.00016953191899633118, "loss": 2.8976, "step": 13748 }, { "epoch": 0.6436421089589794, "grad_norm": 1.5390625, "learning_rate": 0.00016952772358254178, "loss": 2.4898, "step": 13749 }, { "epoch": 0.6436889226988121, "grad_norm": 2.5, "learning_rate": 0.00016952352793184064, "loss": 2.8513, "step": 13750 }, { "epoch": 0.6437357364386448, "grad_norm": 1.6171875, "learning_rate": 0.00016951933204424217, "loss": 2.6233, "step": 13751 }, { "epoch": 0.6437825501784774, "grad_norm": 1.3359375, "learning_rate": 0.00016951513591976055, "loss": 2.5909, "step": 13752 }, { "epoch": 0.64382936391831, "grad_norm": 1.609375, "learning_rate": 0.0001695109395584101, "loss": 3.0638, "step": 13753 }, { "epoch": 0.6438761776581426, "grad_norm": 1.140625, "learning_rate": 0.0001695067429602052, "loss": 2.511, "step": 13754 }, { "epoch": 0.6439229913979753, "grad_norm": 2.6875, "learning_rate": 0.00016950254612516005, "loss": 2.9872, "step": 13755 }, { "epoch": 0.643969805137808, "grad_norm": 1.2734375, "learning_rate": 0.00016949834905328895, "loss": 2.5591, "step": 13756 }, { "epoch": 0.6440166188776406, "grad_norm": 1.3125, "learning_rate": 0.00016949415174460631, "loss": 2.7075, "step": 13757 }, { "epoch": 0.6440634326174732, "grad_norm": 1.2421875, "learning_rate": 0.00016948995419912633, "loss": 2.7253, "step": 13758 }, { "epoch": 0.6441102463573058, "grad_norm": 1.2734375, "learning_rate": 0.0001694857564168634, "loss": 2.6196, "step": 13759 }, { "epoch": 0.6441570600971385, "grad_norm": 1.53125, "learning_rate": 0.00016948155839783172, "loss": 2.9048, "step": 13760 }, { "epoch": 0.6442038738369712, "grad_norm": 1.28125, "learning_rate": 0.00016947736014204568, "loss": 2.8989, "step": 13761 }, { "epoch": 0.6442506875768038, "grad_norm": 1.171875, "learning_rate": 0.00016947316164951954, "loss": 2.4186, "step": 13762 }, { "epoch": 0.6442975013166364, "grad_norm": 1.359375, "learning_rate": 0.0001694689629202676, "loss": 2.3798, "step": 13763 }, { "epoch": 0.644344315056469, "grad_norm": 1.3984375, "learning_rate": 0.00016946476395430417, "loss": 2.5711, "step": 13764 }, { "epoch": 0.6443911287963017, "grad_norm": 1.421875, "learning_rate": 0.0001694605647516436, "loss": 2.81, "step": 13765 }, { "epoch": 0.6444379425361344, "grad_norm": 1.46875, "learning_rate": 0.00016945636531230017, "loss": 2.6923, "step": 13766 }, { "epoch": 0.644484756275967, "grad_norm": 1.234375, "learning_rate": 0.0001694521656362882, "loss": 2.5483, "step": 13767 }, { "epoch": 0.6445315700157996, "grad_norm": 1.4296875, "learning_rate": 0.00016944796572362195, "loss": 2.5429, "step": 13768 }, { "epoch": 0.6445783837556323, "grad_norm": 1.28125, "learning_rate": 0.0001694437655743158, "loss": 2.705, "step": 13769 }, { "epoch": 0.6446251974954649, "grad_norm": 1.296875, "learning_rate": 0.00016943956518838404, "loss": 2.6196, "step": 13770 }, { "epoch": 0.6446720112352976, "grad_norm": 1.5703125, "learning_rate": 0.00016943536456584098, "loss": 2.6926, "step": 13771 }, { "epoch": 0.6447188249751302, "grad_norm": 1.5625, "learning_rate": 0.0001694311637067009, "loss": 2.7053, "step": 13772 }, { "epoch": 0.6447656387149628, "grad_norm": 2.0, "learning_rate": 0.00016942696261097818, "loss": 2.8436, "step": 13773 }, { "epoch": 0.6448124524547955, "grad_norm": 1.5625, "learning_rate": 0.00016942276127868703, "loss": 2.6553, "step": 13774 }, { "epoch": 0.6448592661946281, "grad_norm": 1.3984375, "learning_rate": 0.00016941855970984188, "loss": 2.6673, "step": 13775 }, { "epoch": 0.6449060799344608, "grad_norm": 1.3125, "learning_rate": 0.00016941435790445702, "loss": 2.7438, "step": 13776 }, { "epoch": 0.6449528936742934, "grad_norm": 1.5, "learning_rate": 0.0001694101558625467, "loss": 2.4685, "step": 13777 }, { "epoch": 0.644999707414126, "grad_norm": 1.6015625, "learning_rate": 0.00016940595358412534, "loss": 2.8175, "step": 13778 }, { "epoch": 0.6450465211539587, "grad_norm": 1.5078125, "learning_rate": 0.00016940175106920717, "loss": 2.7519, "step": 13779 }, { "epoch": 0.6450933348937913, "grad_norm": 2.421875, "learning_rate": 0.00016939754831780656, "loss": 2.6975, "step": 13780 }, { "epoch": 0.645140148633624, "grad_norm": 8.25, "learning_rate": 0.0001693933453299378, "loss": 2.3266, "step": 13781 }, { "epoch": 0.6451869623734566, "grad_norm": 1.3046875, "learning_rate": 0.00016938914210561524, "loss": 2.7358, "step": 13782 }, { "epoch": 0.6452337761132892, "grad_norm": 2.0625, "learning_rate": 0.00016938493864485319, "loss": 3.0648, "step": 13783 }, { "epoch": 0.6452805898531219, "grad_norm": 1.296875, "learning_rate": 0.00016938073494766597, "loss": 3.0019, "step": 13784 }, { "epoch": 0.6453274035929545, "grad_norm": 1.7265625, "learning_rate": 0.0001693765310140679, "loss": 2.8382, "step": 13785 }, { "epoch": 0.6453742173327872, "grad_norm": 2.015625, "learning_rate": 0.0001693723268440733, "loss": 3.0159, "step": 13786 }, { "epoch": 0.6454210310726198, "grad_norm": 1.2890625, "learning_rate": 0.00016936812243769652, "loss": 2.8937, "step": 13787 }, { "epoch": 0.6454678448124525, "grad_norm": 1.0390625, "learning_rate": 0.0001693639177949519, "loss": 2.129, "step": 13788 }, { "epoch": 0.6455146585522851, "grad_norm": 1.3984375, "learning_rate": 0.0001693597129158537, "loss": 2.8318, "step": 13789 }, { "epoch": 0.6455614722921177, "grad_norm": 1.7421875, "learning_rate": 0.00016935550780041632, "loss": 2.7465, "step": 13790 }, { "epoch": 0.6456082860319504, "grad_norm": 1.0859375, "learning_rate": 0.00016935130244865402, "loss": 2.6962, "step": 13791 }, { "epoch": 0.645655099771783, "grad_norm": 1.9765625, "learning_rate": 0.00016934709686058116, "loss": 3.0207, "step": 13792 }, { "epoch": 0.6457019135116157, "grad_norm": 1.4453125, "learning_rate": 0.0001693428910362121, "loss": 2.6265, "step": 13793 }, { "epoch": 0.6457487272514483, "grad_norm": 1.2890625, "learning_rate": 0.00016933868497556116, "loss": 2.8193, "step": 13794 }, { "epoch": 0.6457955409912809, "grad_norm": 1.6015625, "learning_rate": 0.00016933447867864264, "loss": 2.4729, "step": 13795 }, { "epoch": 0.6458423547311136, "grad_norm": 1.3984375, "learning_rate": 0.00016933027214547088, "loss": 2.8125, "step": 13796 }, { "epoch": 0.6458891684709462, "grad_norm": 2.015625, "learning_rate": 0.00016932606537606025, "loss": 2.3503, "step": 13797 }, { "epoch": 0.6459359822107789, "grad_norm": 1.5703125, "learning_rate": 0.00016932185837042506, "loss": 2.8467, "step": 13798 }, { "epoch": 0.6459827959506115, "grad_norm": 1.640625, "learning_rate": 0.00016931765112857962, "loss": 2.9268, "step": 13799 }, { "epoch": 0.6460296096904441, "grad_norm": 1.984375, "learning_rate": 0.00016931344365053833, "loss": 2.5475, "step": 13800 }, { "epoch": 0.6460764234302768, "grad_norm": 1.8359375, "learning_rate": 0.00016930923593631544, "loss": 2.788, "step": 13801 }, { "epoch": 0.6461232371701094, "grad_norm": 1.3046875, "learning_rate": 0.00016930502798592536, "loss": 2.5144, "step": 13802 }, { "epoch": 0.6461700509099421, "grad_norm": 1.2109375, "learning_rate": 0.0001693008197993824, "loss": 2.8939, "step": 13803 }, { "epoch": 0.6462168646497747, "grad_norm": 1.28125, "learning_rate": 0.0001692966113767009, "loss": 2.79, "step": 13804 }, { "epoch": 0.6462636783896073, "grad_norm": 1.1875, "learning_rate": 0.0001692924027178952, "loss": 2.7749, "step": 13805 }, { "epoch": 0.64631049212944, "grad_norm": 1.75, "learning_rate": 0.00016928819382297963, "loss": 3.0264, "step": 13806 }, { "epoch": 0.6463573058692726, "grad_norm": 1.1796875, "learning_rate": 0.0001692839846919686, "loss": 2.4849, "step": 13807 }, { "epoch": 0.6464041196091053, "grad_norm": 1.25, "learning_rate": 0.00016927977532487634, "loss": 2.8127, "step": 13808 }, { "epoch": 0.6464509333489379, "grad_norm": 1.2265625, "learning_rate": 0.00016927556572171728, "loss": 2.3769, "step": 13809 }, { "epoch": 0.6464977470887705, "grad_norm": 1.3515625, "learning_rate": 0.0001692713558825057, "loss": 2.5347, "step": 13810 }, { "epoch": 0.6465445608286032, "grad_norm": 1.34375, "learning_rate": 0.00016926714580725602, "loss": 2.9918, "step": 13811 }, { "epoch": 0.6465913745684359, "grad_norm": 1.2421875, "learning_rate": 0.0001692629354959825, "loss": 2.6351, "step": 13812 }, { "epoch": 0.6466381883082685, "grad_norm": 4.625, "learning_rate": 0.00016925872494869955, "loss": 2.3227, "step": 13813 }, { "epoch": 0.6466850020481011, "grad_norm": 1.125, "learning_rate": 0.00016925451416542153, "loss": 2.5, "step": 13814 }, { "epoch": 0.6467318157879337, "grad_norm": 1.5390625, "learning_rate": 0.00016925030314616273, "loss": 2.8347, "step": 13815 }, { "epoch": 0.6467786295277664, "grad_norm": 2.125, "learning_rate": 0.00016924609189093753, "loss": 2.303, "step": 13816 }, { "epoch": 0.646825443267599, "grad_norm": 1.125, "learning_rate": 0.00016924188039976024, "loss": 2.7928, "step": 13817 }, { "epoch": 0.6468722570074317, "grad_norm": 1.3125, "learning_rate": 0.0001692376686726453, "loss": 2.9271, "step": 13818 }, { "epoch": 0.6469190707472643, "grad_norm": 2.03125, "learning_rate": 0.000169233456709607, "loss": 3.0698, "step": 13819 }, { "epoch": 0.6469658844870969, "grad_norm": 1.1796875, "learning_rate": 0.00016922924451065967, "loss": 2.344, "step": 13820 }, { "epoch": 0.6470126982269296, "grad_norm": 2.03125, "learning_rate": 0.0001692250320758177, "loss": 2.8966, "step": 13821 }, { "epoch": 0.6470595119667623, "grad_norm": 1.40625, "learning_rate": 0.0001692208194050954, "loss": 2.9913, "step": 13822 }, { "epoch": 0.6471063257065949, "grad_norm": 1.546875, "learning_rate": 0.00016921660649850723, "loss": 2.7558, "step": 13823 }, { "epoch": 0.6471531394464275, "grad_norm": 1.2734375, "learning_rate": 0.0001692123933560674, "loss": 2.5578, "step": 13824 }, { "epoch": 0.6471999531862601, "grad_norm": 1.359375, "learning_rate": 0.00016920817997779039, "loss": 2.7478, "step": 13825 }, { "epoch": 0.6472467669260928, "grad_norm": 1.609375, "learning_rate": 0.0001692039663636905, "loss": 2.6889, "step": 13826 }, { "epoch": 0.6472935806659255, "grad_norm": 1.1328125, "learning_rate": 0.0001691997525137821, "loss": 2.562, "step": 13827 }, { "epoch": 0.6473403944057581, "grad_norm": 1.2890625, "learning_rate": 0.00016919553842807954, "loss": 2.5518, "step": 13828 }, { "epoch": 0.6473872081455907, "grad_norm": 1.4921875, "learning_rate": 0.00016919132410659718, "loss": 2.9232, "step": 13829 }, { "epoch": 0.6474340218854233, "grad_norm": 1.734375, "learning_rate": 0.0001691871095493494, "loss": 3.0013, "step": 13830 }, { "epoch": 0.647480835625256, "grad_norm": 1.3984375, "learning_rate": 0.0001691828947563505, "loss": 2.7714, "step": 13831 }, { "epoch": 0.6475276493650887, "grad_norm": 1.109375, "learning_rate": 0.00016917867972761493, "loss": 2.1486, "step": 13832 }, { "epoch": 0.6475744631049213, "grad_norm": 1.4609375, "learning_rate": 0.00016917446446315694, "loss": 2.2728, "step": 13833 }, { "epoch": 0.6476212768447539, "grad_norm": 1.4140625, "learning_rate": 0.00016917024896299102, "loss": 2.6741, "step": 13834 }, { "epoch": 0.6476680905845866, "grad_norm": 1.5, "learning_rate": 0.00016916603322713147, "loss": 2.6374, "step": 13835 }, { "epoch": 0.6477149043244192, "grad_norm": 1.421875, "learning_rate": 0.00016916181725559263, "loss": 2.5807, "step": 13836 }, { "epoch": 0.6477617180642519, "grad_norm": 1.484375, "learning_rate": 0.00016915760104838892, "loss": 2.3022, "step": 13837 }, { "epoch": 0.6478085318040845, "grad_norm": 1.4296875, "learning_rate": 0.0001691533846055347, "loss": 2.7893, "step": 13838 }, { "epoch": 0.6478553455439171, "grad_norm": 1.3671875, "learning_rate": 0.0001691491679270443, "loss": 2.463, "step": 13839 }, { "epoch": 0.6479021592837498, "grad_norm": 1.46875, "learning_rate": 0.0001691449510129321, "loss": 2.7205, "step": 13840 }, { "epoch": 0.6479489730235825, "grad_norm": 1.1171875, "learning_rate": 0.00016914073386321246, "loss": 2.6945, "step": 13841 }, { "epoch": 0.6479957867634151, "grad_norm": 1.390625, "learning_rate": 0.0001691365164778998, "loss": 2.629, "step": 13842 }, { "epoch": 0.6480426005032477, "grad_norm": 1.5, "learning_rate": 0.00016913229885700843, "loss": 2.5588, "step": 13843 }, { "epoch": 0.6480894142430803, "grad_norm": 1.2734375, "learning_rate": 0.00016912808100055275, "loss": 2.6502, "step": 13844 }, { "epoch": 0.648136227982913, "grad_norm": 2.375, "learning_rate": 0.00016912386290854716, "loss": 2.7516, "step": 13845 }, { "epoch": 0.6481830417227457, "grad_norm": 1.46875, "learning_rate": 0.00016911964458100597, "loss": 2.8485, "step": 13846 }, { "epoch": 0.6482298554625783, "grad_norm": 1.234375, "learning_rate": 0.00016911542601794364, "loss": 2.83, "step": 13847 }, { "epoch": 0.6482766692024109, "grad_norm": 1.2578125, "learning_rate": 0.00016911120721937445, "loss": 2.9045, "step": 13848 }, { "epoch": 0.6483234829422435, "grad_norm": 1.5078125, "learning_rate": 0.0001691069881853128, "loss": 2.5507, "step": 13849 }, { "epoch": 0.6483702966820762, "grad_norm": 1.59375, "learning_rate": 0.00016910276891577312, "loss": 3.0576, "step": 13850 }, { "epoch": 0.6484171104219089, "grad_norm": 1.734375, "learning_rate": 0.0001690985494107697, "loss": 2.3739, "step": 13851 }, { "epoch": 0.6484639241617415, "grad_norm": 1.4375, "learning_rate": 0.000169094329670317, "loss": 2.7595, "step": 13852 }, { "epoch": 0.6485107379015741, "grad_norm": 1.5625, "learning_rate": 0.00016909010969442934, "loss": 2.7796, "step": 13853 }, { "epoch": 0.6485575516414067, "grad_norm": 1.3515625, "learning_rate": 0.00016908588948312114, "loss": 2.55, "step": 13854 }, { "epoch": 0.6486043653812394, "grad_norm": 1.4921875, "learning_rate": 0.00016908166903640678, "loss": 2.9804, "step": 13855 }, { "epoch": 0.6486511791210721, "grad_norm": 1.5078125, "learning_rate": 0.00016907744835430063, "loss": 2.6843, "step": 13856 }, { "epoch": 0.6486979928609047, "grad_norm": 1.515625, "learning_rate": 0.00016907322743681707, "loss": 2.846, "step": 13857 }, { "epoch": 0.6487448066007373, "grad_norm": 11.75, "learning_rate": 0.00016906900628397043, "loss": 3.5756, "step": 13858 }, { "epoch": 0.6487916203405699, "grad_norm": 1.421875, "learning_rate": 0.00016906478489577518, "loss": 2.708, "step": 13859 }, { "epoch": 0.6488384340804026, "grad_norm": 1.9296875, "learning_rate": 0.0001690605632722457, "loss": 2.5349, "step": 13860 }, { "epoch": 0.6488852478202353, "grad_norm": 1.4296875, "learning_rate": 0.00016905634141339628, "loss": 2.6527, "step": 13861 }, { "epoch": 0.6489320615600679, "grad_norm": 1.5859375, "learning_rate": 0.0001690521193192414, "loss": 2.6491, "step": 13862 }, { "epoch": 0.6489788752999005, "grad_norm": 1.421875, "learning_rate": 0.00016904789698979542, "loss": 3.0215, "step": 13863 }, { "epoch": 0.6490256890397331, "grad_norm": 1.4453125, "learning_rate": 0.0001690436744250727, "loss": 2.6016, "step": 13864 }, { "epoch": 0.6490725027795659, "grad_norm": 1.484375, "learning_rate": 0.00016903945162508767, "loss": 2.0964, "step": 13865 }, { "epoch": 0.6491193165193985, "grad_norm": 1.1796875, "learning_rate": 0.0001690352285898547, "loss": 4.3232, "step": 13866 }, { "epoch": 0.6491661302592311, "grad_norm": 2.0, "learning_rate": 0.00016903100531938815, "loss": 2.7441, "step": 13867 }, { "epoch": 0.6492129439990637, "grad_norm": 1.3828125, "learning_rate": 0.00016902678181370247, "loss": 2.5859, "step": 13868 }, { "epoch": 0.6492597577388963, "grad_norm": 1.5703125, "learning_rate": 0.000169022558072812, "loss": 2.5616, "step": 13869 }, { "epoch": 0.649306571478729, "grad_norm": 1.21875, "learning_rate": 0.00016901833409673118, "loss": 2.6961, "step": 13870 }, { "epoch": 0.6493533852185617, "grad_norm": 1.7578125, "learning_rate": 0.00016901410988547438, "loss": 2.9068, "step": 13871 }, { "epoch": 0.6494001989583943, "grad_norm": 1.421875, "learning_rate": 0.00016900988543905597, "loss": 2.4619, "step": 13872 }, { "epoch": 0.6494470126982269, "grad_norm": 1.546875, "learning_rate": 0.00016900566075749035, "loss": 2.7975, "step": 13873 }, { "epoch": 0.6494938264380595, "grad_norm": 1.15625, "learning_rate": 0.00016900143584079195, "loss": 2.6532, "step": 13874 }, { "epoch": 0.6495406401778923, "grad_norm": 1.390625, "learning_rate": 0.00016899721068897513, "loss": 2.757, "step": 13875 }, { "epoch": 0.6495874539177249, "grad_norm": 1.4296875, "learning_rate": 0.00016899298530205433, "loss": 2.5611, "step": 13876 }, { "epoch": 0.6496342676575575, "grad_norm": 1.3984375, "learning_rate": 0.0001689887596800439, "loss": 2.8595, "step": 13877 }, { "epoch": 0.6496810813973901, "grad_norm": 1.4453125, "learning_rate": 0.00016898453382295826, "loss": 2.6406, "step": 13878 }, { "epoch": 0.6497278951372227, "grad_norm": 1.3984375, "learning_rate": 0.0001689803077308118, "loss": 2.7057, "step": 13879 }, { "epoch": 0.6497747088770555, "grad_norm": 1.5078125, "learning_rate": 0.00016897608140361894, "loss": 2.7561, "step": 13880 }, { "epoch": 0.6498215226168881, "grad_norm": 1.25, "learning_rate": 0.00016897185484139406, "loss": 2.3032, "step": 13881 }, { "epoch": 0.6498683363567207, "grad_norm": 1.3203125, "learning_rate": 0.00016896762804415158, "loss": 2.6894, "step": 13882 }, { "epoch": 0.6499151500965533, "grad_norm": 1.578125, "learning_rate": 0.00016896340101190592, "loss": 2.7303, "step": 13883 }, { "epoch": 0.6499619638363859, "grad_norm": 1.3828125, "learning_rate": 0.0001689591737446714, "loss": 2.8633, "step": 13884 }, { "epoch": 0.6500087775762187, "grad_norm": 1.515625, "learning_rate": 0.00016895494624246253, "loss": 2.7593, "step": 13885 }, { "epoch": 0.6500555913160513, "grad_norm": 1.2265625, "learning_rate": 0.00016895071850529366, "loss": 2.5286, "step": 13886 }, { "epoch": 0.6501024050558839, "grad_norm": 1.734375, "learning_rate": 0.00016894649053317919, "loss": 2.6338, "step": 13887 }, { "epoch": 0.6501492187957165, "grad_norm": 1.609375, "learning_rate": 0.00016894226232613355, "loss": 2.6172, "step": 13888 }, { "epoch": 0.6501960325355491, "grad_norm": 1.6953125, "learning_rate": 0.00016893803388417112, "loss": 2.9521, "step": 13889 }, { "epoch": 0.6502428462753819, "grad_norm": 1.625, "learning_rate": 0.00016893380520730634, "loss": 2.7039, "step": 13890 }, { "epoch": 0.6502896600152145, "grad_norm": 1.7265625, "learning_rate": 0.0001689295762955536, "loss": 2.7412, "step": 13891 }, { "epoch": 0.6503364737550471, "grad_norm": 1.40625, "learning_rate": 0.00016892534714892728, "loss": 2.8331, "step": 13892 }, { "epoch": 0.6503832874948797, "grad_norm": 1.7578125, "learning_rate": 0.00016892111776744187, "loss": 2.917, "step": 13893 }, { "epoch": 0.6504301012347123, "grad_norm": 1.40625, "learning_rate": 0.0001689168881511117, "loss": 2.5976, "step": 13894 }, { "epoch": 0.6504769149745451, "grad_norm": 1.3828125, "learning_rate": 0.00016891265829995126, "loss": 2.6641, "step": 13895 }, { "epoch": 0.6505237287143777, "grad_norm": 1.3046875, "learning_rate": 0.0001689084282139749, "loss": 2.6478, "step": 13896 }, { "epoch": 0.6505705424542103, "grad_norm": 1.3515625, "learning_rate": 0.00016890419789319702, "loss": 2.6068, "step": 13897 }, { "epoch": 0.6506173561940429, "grad_norm": 1.828125, "learning_rate": 0.0001688999673376321, "loss": 2.5042, "step": 13898 }, { "epoch": 0.6506641699338755, "grad_norm": 1.28125, "learning_rate": 0.00016889573654729452, "loss": 2.542, "step": 13899 }, { "epoch": 0.6507109836737083, "grad_norm": 1.7578125, "learning_rate": 0.00016889150552219873, "loss": 3.1772, "step": 13900 }, { "epoch": 0.6507577974135409, "grad_norm": 1.53125, "learning_rate": 0.00016888727426235905, "loss": 2.7446, "step": 13901 }, { "epoch": 0.6508046111533735, "grad_norm": 1.4609375, "learning_rate": 0.00016888304276779, "loss": 2.6639, "step": 13902 }, { "epoch": 0.6508514248932061, "grad_norm": 1.59375, "learning_rate": 0.00016887881103850599, "loss": 2.7763, "step": 13903 }, { "epoch": 0.6508982386330388, "grad_norm": 1.40625, "learning_rate": 0.0001688745790745214, "loss": 2.8699, "step": 13904 }, { "epoch": 0.6509450523728715, "grad_norm": 1.296875, "learning_rate": 0.00016887034687585066, "loss": 2.5691, "step": 13905 }, { "epoch": 0.6509918661127041, "grad_norm": 1.28125, "learning_rate": 0.00016886611444250817, "loss": 2.7139, "step": 13906 }, { "epoch": 0.6510386798525367, "grad_norm": 1.59375, "learning_rate": 0.0001688618817745084, "loss": 2.7332, "step": 13907 }, { "epoch": 0.6510854935923693, "grad_norm": 2.296875, "learning_rate": 0.00016885764887186576, "loss": 2.7686, "step": 13908 }, { "epoch": 0.651132307332202, "grad_norm": 1.3359375, "learning_rate": 0.00016885341573459462, "loss": 2.6297, "step": 13909 }, { "epoch": 0.6511791210720347, "grad_norm": 1.1640625, "learning_rate": 0.00016884918236270946, "loss": 2.4908, "step": 13910 }, { "epoch": 0.6512259348118673, "grad_norm": 1.2890625, "learning_rate": 0.0001688449487562247, "loss": 2.4807, "step": 13911 }, { "epoch": 0.6512727485516999, "grad_norm": 1.3828125, "learning_rate": 0.00016884071491515473, "loss": 2.7047, "step": 13912 }, { "epoch": 0.6513195622915325, "grad_norm": 1.546875, "learning_rate": 0.00016883648083951405, "loss": 3.007, "step": 13913 }, { "epoch": 0.6513663760313652, "grad_norm": 1.453125, "learning_rate": 0.000168832246529317, "loss": 2.2384, "step": 13914 }, { "epoch": 0.6514131897711979, "grad_norm": 1.2734375, "learning_rate": 0.0001688280119845781, "loss": 2.2724, "step": 13915 }, { "epoch": 0.6514600035110305, "grad_norm": 1.6484375, "learning_rate": 0.00016882377720531168, "loss": 2.4464, "step": 13916 }, { "epoch": 0.6515068172508631, "grad_norm": 1.9609375, "learning_rate": 0.00016881954219153222, "loss": 2.9031, "step": 13917 }, { "epoch": 0.6515536309906957, "grad_norm": 1.453125, "learning_rate": 0.00016881530694325418, "loss": 2.8871, "step": 13918 }, { "epoch": 0.6516004447305284, "grad_norm": 1.3671875, "learning_rate": 0.00016881107146049192, "loss": 2.7247, "step": 13919 }, { "epoch": 0.6516472584703611, "grad_norm": 1.5078125, "learning_rate": 0.00016880683574325994, "loss": 2.7065, "step": 13920 }, { "epoch": 0.6516940722101937, "grad_norm": 1.4453125, "learning_rate": 0.00016880259979157262, "loss": 2.8121, "step": 13921 }, { "epoch": 0.6517408859500263, "grad_norm": 1.6640625, "learning_rate": 0.00016879836360544442, "loss": 2.6369, "step": 13922 }, { "epoch": 0.651787699689859, "grad_norm": 1.53125, "learning_rate": 0.00016879412718488981, "loss": 2.7923, "step": 13923 }, { "epoch": 0.6518345134296916, "grad_norm": 1.4140625, "learning_rate": 0.00016878989052992319, "loss": 2.5579, "step": 13924 }, { "epoch": 0.6518813271695243, "grad_norm": 1.6875, "learning_rate": 0.00016878565364055893, "loss": 2.7789, "step": 13925 }, { "epoch": 0.6519281409093569, "grad_norm": 2.734375, "learning_rate": 0.00016878141651681158, "loss": 3.017, "step": 13926 }, { "epoch": 0.6519749546491895, "grad_norm": 1.3046875, "learning_rate": 0.00016877717915869553, "loss": 2.7806, "step": 13927 }, { "epoch": 0.6520217683890221, "grad_norm": 1.5234375, "learning_rate": 0.00016877294156622522, "loss": 2.604, "step": 13928 }, { "epoch": 0.6520685821288548, "grad_norm": 1.3125, "learning_rate": 0.00016876870373941504, "loss": 2.4558, "step": 13929 }, { "epoch": 0.6521153958686875, "grad_norm": 1.2578125, "learning_rate": 0.00016876446567827953, "loss": 2.4608, "step": 13930 }, { "epoch": 0.6521622096085201, "grad_norm": 1.34375, "learning_rate": 0.00016876022738283305, "loss": 2.8493, "step": 13931 }, { "epoch": 0.6522090233483527, "grad_norm": 2.46875, "learning_rate": 0.00016875598885309008, "loss": 2.6922, "step": 13932 }, { "epoch": 0.6522558370881854, "grad_norm": 1.78125, "learning_rate": 0.00016875175008906505, "loss": 2.5883, "step": 13933 }, { "epoch": 0.652302650828018, "grad_norm": 1.375, "learning_rate": 0.0001687475110907724, "loss": 2.8953, "step": 13934 }, { "epoch": 0.6523494645678507, "grad_norm": 2.359375, "learning_rate": 0.0001687432718582266, "loss": 2.5973, "step": 13935 }, { "epoch": 0.6523962783076833, "grad_norm": 1.2578125, "learning_rate": 0.00016873903239144204, "loss": 2.6786, "step": 13936 }, { "epoch": 0.6524430920475159, "grad_norm": 1.4921875, "learning_rate": 0.00016873479269043323, "loss": 2.8791, "step": 13937 }, { "epoch": 0.6524899057873486, "grad_norm": 1.4375, "learning_rate": 0.00016873055275521458, "loss": 2.7864, "step": 13938 }, { "epoch": 0.6525367195271812, "grad_norm": 1.125, "learning_rate": 0.00016872631258580054, "loss": 2.1747, "step": 13939 }, { "epoch": 0.6525835332670139, "grad_norm": 1.5234375, "learning_rate": 0.00016872207218220558, "loss": 2.6169, "step": 13940 }, { "epoch": 0.6526303470068465, "grad_norm": 1.421875, "learning_rate": 0.00016871783154444412, "loss": 2.7144, "step": 13941 }, { "epoch": 0.6526771607466791, "grad_norm": 2.0, "learning_rate": 0.0001687135906725306, "loss": 3.0084, "step": 13942 }, { "epoch": 0.6527239744865118, "grad_norm": 1.25, "learning_rate": 0.0001687093495664795, "loss": 2.3562, "step": 13943 }, { "epoch": 0.6527707882263444, "grad_norm": 1.6484375, "learning_rate": 0.00016870510822630528, "loss": 2.7265, "step": 13944 }, { "epoch": 0.6528176019661771, "grad_norm": 1.265625, "learning_rate": 0.00016870086665202237, "loss": 2.3899, "step": 13945 }, { "epoch": 0.6528644157060097, "grad_norm": 1.21875, "learning_rate": 0.00016869662484364522, "loss": 2.8161, "step": 13946 }, { "epoch": 0.6529112294458423, "grad_norm": 1.7421875, "learning_rate": 0.00016869238280118828, "loss": 2.81, "step": 13947 }, { "epoch": 0.652958043185675, "grad_norm": 1.359375, "learning_rate": 0.00016868814052466605, "loss": 2.6731, "step": 13948 }, { "epoch": 0.6530048569255076, "grad_norm": 1.578125, "learning_rate": 0.00016868389801409292, "loss": 3.5491, "step": 13949 }, { "epoch": 0.6530516706653403, "grad_norm": 1.375, "learning_rate": 0.00016867965526948342, "loss": 2.7101, "step": 13950 }, { "epoch": 0.6530984844051729, "grad_norm": 1.2421875, "learning_rate": 0.0001686754122908519, "loss": 2.4018, "step": 13951 }, { "epoch": 0.6531452981450055, "grad_norm": 1.4140625, "learning_rate": 0.00016867116907821295, "loss": 2.6974, "step": 13952 }, { "epoch": 0.6531921118848382, "grad_norm": 1.4765625, "learning_rate": 0.00016866692563158092, "loss": 2.5899, "step": 13953 }, { "epoch": 0.6532389256246709, "grad_norm": 2.21875, "learning_rate": 0.00016866268195097034, "loss": 2.4425, "step": 13954 }, { "epoch": 0.6532857393645035, "grad_norm": 1.34375, "learning_rate": 0.00016865843803639562, "loss": 3.0683, "step": 13955 }, { "epoch": 0.6533325531043361, "grad_norm": 1.6875, "learning_rate": 0.00016865419388787121, "loss": 2.5858, "step": 13956 }, { "epoch": 0.6533793668441688, "grad_norm": 2.296875, "learning_rate": 0.00016864994950541165, "loss": 2.7334, "step": 13957 }, { "epoch": 0.6534261805840014, "grad_norm": 1.4453125, "learning_rate": 0.00016864570488903132, "loss": 2.5563, "step": 13958 }, { "epoch": 0.6534729943238341, "grad_norm": 2.078125, "learning_rate": 0.00016864146003874476, "loss": 2.3497, "step": 13959 }, { "epoch": 0.6535198080636667, "grad_norm": 1.2734375, "learning_rate": 0.00016863721495456635, "loss": 2.7185, "step": 13960 }, { "epoch": 0.6535666218034993, "grad_norm": 4.84375, "learning_rate": 0.00016863296963651062, "loss": 2.9214, "step": 13961 }, { "epoch": 0.653613435543332, "grad_norm": 1.28125, "learning_rate": 0.00016862872408459202, "loss": 2.8412, "step": 13962 }, { "epoch": 0.6536602492831646, "grad_norm": 1.375, "learning_rate": 0.000168624478298825, "loss": 2.7202, "step": 13963 }, { "epoch": 0.6537070630229973, "grad_norm": 1.5625, "learning_rate": 0.00016862023227922403, "loss": 2.7511, "step": 13964 }, { "epoch": 0.6537538767628299, "grad_norm": 1.140625, "learning_rate": 0.0001686159860258036, "loss": 2.8052, "step": 13965 }, { "epoch": 0.6538006905026625, "grad_norm": 1.078125, "learning_rate": 0.00016861173953857815, "loss": 2.551, "step": 13966 }, { "epoch": 0.6538475042424952, "grad_norm": 1.296875, "learning_rate": 0.00016860749281756216, "loss": 2.4049, "step": 13967 }, { "epoch": 0.6538943179823278, "grad_norm": 1.1484375, "learning_rate": 0.0001686032458627701, "loss": 2.3189, "step": 13968 }, { "epoch": 0.6539411317221605, "grad_norm": 2.015625, "learning_rate": 0.00016859899867421646, "loss": 2.3812, "step": 13969 }, { "epoch": 0.6539879454619931, "grad_norm": 1.4296875, "learning_rate": 0.0001685947512519157, "loss": 2.583, "step": 13970 }, { "epoch": 0.6540347592018257, "grad_norm": 1.71875, "learning_rate": 0.00016859050359588232, "loss": 2.9822, "step": 13971 }, { "epoch": 0.6540815729416584, "grad_norm": 1.3828125, "learning_rate": 0.00016858625570613072, "loss": 2.4497, "step": 13972 }, { "epoch": 0.654128386681491, "grad_norm": 1.3359375, "learning_rate": 0.00016858200758267542, "loss": 2.3968, "step": 13973 }, { "epoch": 0.6541752004213237, "grad_norm": 1.5546875, "learning_rate": 0.00016857775922553088, "loss": 2.7153, "step": 13974 }, { "epoch": 0.6542220141611563, "grad_norm": 1.546875, "learning_rate": 0.00016857351063471164, "loss": 2.8759, "step": 13975 }, { "epoch": 0.654268827900989, "grad_norm": 2.34375, "learning_rate": 0.00016856926181023208, "loss": 2.6039, "step": 13976 }, { "epoch": 0.6543156416408216, "grad_norm": 1.4375, "learning_rate": 0.00016856501275210674, "loss": 2.7131, "step": 13977 }, { "epoch": 0.6543624553806542, "grad_norm": 1.4375, "learning_rate": 0.00016856076346035006, "loss": 2.6055, "step": 13978 }, { "epoch": 0.6544092691204869, "grad_norm": 1.4921875, "learning_rate": 0.00016855651393497654, "loss": 2.5842, "step": 13979 }, { "epoch": 0.6544560828603195, "grad_norm": 1.390625, "learning_rate": 0.00016855226417600072, "loss": 2.771, "step": 13980 }, { "epoch": 0.6545028966001522, "grad_norm": 4.46875, "learning_rate": 0.000168548014183437, "loss": 2.3458, "step": 13981 }, { "epoch": 0.6545497103399848, "grad_norm": 1.625, "learning_rate": 0.00016854376395729988, "loss": 2.5279, "step": 13982 }, { "epoch": 0.6545965240798174, "grad_norm": 1.2890625, "learning_rate": 0.0001685395134976038, "loss": 2.3633, "step": 13983 }, { "epoch": 0.6546433378196501, "grad_norm": 1.453125, "learning_rate": 0.00016853526280436335, "loss": 2.4095, "step": 13984 }, { "epoch": 0.6546901515594827, "grad_norm": 1.6953125, "learning_rate": 0.00016853101187759296, "loss": 2.9369, "step": 13985 }, { "epoch": 0.6547369652993154, "grad_norm": 2.546875, "learning_rate": 0.00016852676071730705, "loss": 2.4897, "step": 13986 }, { "epoch": 0.654783779039148, "grad_norm": 1.640625, "learning_rate": 0.00016852250932352018, "loss": 2.6339, "step": 13987 }, { "epoch": 0.6548305927789806, "grad_norm": 1.2890625, "learning_rate": 0.00016851825769624682, "loss": 2.9992, "step": 13988 }, { "epoch": 0.6548774065188133, "grad_norm": 1.5625, "learning_rate": 0.00016851400583550147, "loss": 2.5499, "step": 13989 }, { "epoch": 0.6549242202586459, "grad_norm": 1.53125, "learning_rate": 0.00016850975374129866, "loss": 2.4185, "step": 13990 }, { "epoch": 0.6549710339984786, "grad_norm": 1.390625, "learning_rate": 0.00016850550141365273, "loss": 2.5867, "step": 13991 }, { "epoch": 0.6550178477383112, "grad_norm": 1.640625, "learning_rate": 0.00016850124885257832, "loss": 2.7794, "step": 13992 }, { "epoch": 0.6550646614781438, "grad_norm": 1.375, "learning_rate": 0.00016849699605808984, "loss": 2.438, "step": 13993 }, { "epoch": 0.6551114752179765, "grad_norm": 1.8046875, "learning_rate": 0.0001684927430302018, "loss": 2.5958, "step": 13994 }, { "epoch": 0.6551582889578091, "grad_norm": 1.46875, "learning_rate": 0.00016848848976892873, "loss": 2.7337, "step": 13995 }, { "epoch": 0.6552051026976418, "grad_norm": 2.21875, "learning_rate": 0.00016848423627428507, "loss": 2.5117, "step": 13996 }, { "epoch": 0.6552519164374744, "grad_norm": 1.1328125, "learning_rate": 0.00016847998254628532, "loss": 2.8192, "step": 13997 }, { "epoch": 0.655298730177307, "grad_norm": 1.3203125, "learning_rate": 0.000168475728584944, "loss": 2.7385, "step": 13998 }, { "epoch": 0.6553455439171397, "grad_norm": 1.1015625, "learning_rate": 0.00016847147439027563, "loss": 2.4798, "step": 13999 }, { "epoch": 0.6553923576569723, "grad_norm": 1.765625, "learning_rate": 0.00016846721996229463, "loss": 2.898, "step": 14000 }, { "epoch": 0.655439171396805, "grad_norm": 1.28125, "learning_rate": 0.0001684629653010155, "loss": 2.4521, "step": 14001 }, { "epoch": 0.6554859851366376, "grad_norm": 1.28125, "learning_rate": 0.0001684587104064528, "loss": 2.1915, "step": 14002 }, { "epoch": 0.6555327988764702, "grad_norm": 1.3359375, "learning_rate": 0.00016845445527862105, "loss": 2.9206, "step": 14003 }, { "epoch": 0.6555796126163029, "grad_norm": 1.1796875, "learning_rate": 0.00016845019991753465, "loss": 2.7261, "step": 14004 }, { "epoch": 0.6556264263561355, "grad_norm": 1.296875, "learning_rate": 0.00016844594432320818, "loss": 2.4435, "step": 14005 }, { "epoch": 0.6556732400959682, "grad_norm": 1.3125, "learning_rate": 0.0001684416884956561, "loss": 2.6139, "step": 14006 }, { "epoch": 0.6557200538358008, "grad_norm": 1.265625, "learning_rate": 0.00016843743243489292, "loss": 2.3442, "step": 14007 }, { "epoch": 0.6557668675756334, "grad_norm": 1.71875, "learning_rate": 0.00016843317614093312, "loss": 2.4215, "step": 14008 }, { "epoch": 0.6558136813154661, "grad_norm": 2.109375, "learning_rate": 0.0001684289196137913, "loss": 2.8741, "step": 14009 }, { "epoch": 0.6558604950552988, "grad_norm": 2.0625, "learning_rate": 0.0001684246628534818, "loss": 2.6331, "step": 14010 }, { "epoch": 0.6559073087951314, "grad_norm": 1.15625, "learning_rate": 0.00016842040586001925, "loss": 2.7442, "step": 14011 }, { "epoch": 0.655954122534964, "grad_norm": 1.015625, "learning_rate": 0.00016841614863341814, "loss": 2.1471, "step": 14012 }, { "epoch": 0.6560009362747966, "grad_norm": 1.234375, "learning_rate": 0.00016841189117369293, "loss": 2.7565, "step": 14013 }, { "epoch": 0.6560477500146293, "grad_norm": 1.0625, "learning_rate": 0.00016840763348085819, "loss": 2.9743, "step": 14014 }, { "epoch": 0.656094563754462, "grad_norm": 1.4375, "learning_rate": 0.0001684033755549284, "loss": 2.8368, "step": 14015 }, { "epoch": 0.6561413774942946, "grad_norm": 1.09375, "learning_rate": 0.00016839911739591803, "loss": 2.83, "step": 14016 }, { "epoch": 0.6561881912341272, "grad_norm": 2.265625, "learning_rate": 0.00016839485900384162, "loss": 3.1096, "step": 14017 }, { "epoch": 0.6562350049739598, "grad_norm": 1.484375, "learning_rate": 0.00016839060037871368, "loss": 2.9027, "step": 14018 }, { "epoch": 0.6562818187137925, "grad_norm": 1.375, "learning_rate": 0.00016838634152054876, "loss": 2.5801, "step": 14019 }, { "epoch": 0.6563286324536252, "grad_norm": 1.28125, "learning_rate": 0.00016838208242936128, "loss": 2.4669, "step": 14020 }, { "epoch": 0.6563754461934578, "grad_norm": 1.6796875, "learning_rate": 0.00016837782310516586, "loss": 2.8235, "step": 14021 }, { "epoch": 0.6564222599332904, "grad_norm": 1.28125, "learning_rate": 0.00016837356354797693, "loss": 2.2732, "step": 14022 }, { "epoch": 0.656469073673123, "grad_norm": 1.515625, "learning_rate": 0.00016836930375780903, "loss": 2.8085, "step": 14023 }, { "epoch": 0.6565158874129557, "grad_norm": 1.6875, "learning_rate": 0.0001683650437346767, "loss": 2.2269, "step": 14024 }, { "epoch": 0.6565627011527884, "grad_norm": 1.5546875, "learning_rate": 0.00016836078347859444, "loss": 2.7654, "step": 14025 }, { "epoch": 0.656609514892621, "grad_norm": 1.4140625, "learning_rate": 0.0001683565229895767, "loss": 2.6613, "step": 14026 }, { "epoch": 0.6566563286324536, "grad_norm": 1.2265625, "learning_rate": 0.0001683522622676381, "loss": 2.5959, "step": 14027 }, { "epoch": 0.6567031423722862, "grad_norm": 1.3359375, "learning_rate": 0.00016834800131279316, "loss": 2.8133, "step": 14028 }, { "epoch": 0.656749956112119, "grad_norm": 1.3984375, "learning_rate": 0.00016834374012505632, "loss": 2.7422, "step": 14029 }, { "epoch": 0.6567967698519516, "grad_norm": 1.6171875, "learning_rate": 0.00016833947870444212, "loss": 3.0335, "step": 14030 }, { "epoch": 0.6568435835917842, "grad_norm": 1.3671875, "learning_rate": 0.0001683352170509651, "loss": 2.9802, "step": 14031 }, { "epoch": 0.6568903973316168, "grad_norm": 1.34375, "learning_rate": 0.0001683309551646398, "loss": 2.7861, "step": 14032 }, { "epoch": 0.6569372110714494, "grad_norm": 1.46875, "learning_rate": 0.00016832669304548073, "loss": 2.576, "step": 14033 }, { "epoch": 0.6569840248112822, "grad_norm": 1.5, "learning_rate": 0.00016832243069350237, "loss": 2.5413, "step": 14034 }, { "epoch": 0.6570308385511148, "grad_norm": 2.03125, "learning_rate": 0.00016831816810871928, "loss": 2.8887, "step": 14035 }, { "epoch": 0.6570776522909474, "grad_norm": 1.34375, "learning_rate": 0.000168313905291146, "loss": 2.781, "step": 14036 }, { "epoch": 0.65712446603078, "grad_norm": 1.234375, "learning_rate": 0.00016830964224079703, "loss": 2.6384, "step": 14037 }, { "epoch": 0.6571712797706126, "grad_norm": 1.46875, "learning_rate": 0.0001683053789576869, "loss": 2.6094, "step": 14038 }, { "epoch": 0.6572180935104454, "grad_norm": 1.8125, "learning_rate": 0.00016830111544183013, "loss": 2.8928, "step": 14039 }, { "epoch": 0.657264907250278, "grad_norm": 1.96875, "learning_rate": 0.00016829685169324129, "loss": 2.9099, "step": 14040 }, { "epoch": 0.6573117209901106, "grad_norm": 1.6484375, "learning_rate": 0.00016829258771193484, "loss": 3.5032, "step": 14041 }, { "epoch": 0.6573585347299432, "grad_norm": 1.3984375, "learning_rate": 0.00016828832349792533, "loss": 2.9614, "step": 14042 }, { "epoch": 0.6574053484697758, "grad_norm": 1.7578125, "learning_rate": 0.00016828405905122736, "loss": 2.8366, "step": 14043 }, { "epoch": 0.6574521622096086, "grad_norm": 1.3203125, "learning_rate": 0.00016827979437185534, "loss": 2.57, "step": 14044 }, { "epoch": 0.6574989759494412, "grad_norm": 2.046875, "learning_rate": 0.00016827552945982392, "loss": 2.7698, "step": 14045 }, { "epoch": 0.6575457896892738, "grad_norm": 1.265625, "learning_rate": 0.00016827126431514757, "loss": 2.6531, "step": 14046 }, { "epoch": 0.6575926034291064, "grad_norm": 1.921875, "learning_rate": 0.00016826699893784084, "loss": 2.8444, "step": 14047 }, { "epoch": 0.657639417168939, "grad_norm": 1.109375, "learning_rate": 0.00016826273332791822, "loss": 2.2217, "step": 14048 }, { "epoch": 0.6576862309087718, "grad_norm": 1.3359375, "learning_rate": 0.00016825846748539432, "loss": 2.7438, "step": 14049 }, { "epoch": 0.6577330446486044, "grad_norm": 1.3671875, "learning_rate": 0.0001682542014102836, "loss": 2.5291, "step": 14050 }, { "epoch": 0.657779858388437, "grad_norm": 1.25, "learning_rate": 0.00016824993510260067, "loss": 2.9901, "step": 14051 }, { "epoch": 0.6578266721282696, "grad_norm": 1.09375, "learning_rate": 0.00016824566856236004, "loss": 2.6427, "step": 14052 }, { "epoch": 0.6578734858681022, "grad_norm": 2.234375, "learning_rate": 0.00016824140178957618, "loss": 2.7659, "step": 14053 }, { "epoch": 0.657920299607935, "grad_norm": 1.578125, "learning_rate": 0.00016823713478426373, "loss": 2.7721, "step": 14054 }, { "epoch": 0.6579671133477676, "grad_norm": 1.296875, "learning_rate": 0.00016823286754643718, "loss": 2.3643, "step": 14055 }, { "epoch": 0.6580139270876002, "grad_norm": 1.4453125, "learning_rate": 0.0001682286000761111, "loss": 2.8174, "step": 14056 }, { "epoch": 0.6580607408274328, "grad_norm": 1.6015625, "learning_rate": 0.0001682243323733, "loss": 4.1302, "step": 14057 }, { "epoch": 0.6581075545672654, "grad_norm": 1.3828125, "learning_rate": 0.00016822006443801844, "loss": 2.6209, "step": 14058 }, { "epoch": 0.6581543683070982, "grad_norm": 1.453125, "learning_rate": 0.00016821579627028093, "loss": 2.4003, "step": 14059 }, { "epoch": 0.6582011820469308, "grad_norm": 1.4453125, "learning_rate": 0.00016821152787010202, "loss": 4.0084, "step": 14060 }, { "epoch": 0.6582479957867634, "grad_norm": 1.34375, "learning_rate": 0.0001682072592374963, "loss": 2.5971, "step": 14061 }, { "epoch": 0.658294809526596, "grad_norm": 1.6015625, "learning_rate": 0.00016820299037247825, "loss": 2.7124, "step": 14062 }, { "epoch": 0.6583416232664286, "grad_norm": 1.5, "learning_rate": 0.0001681987212750625, "loss": 2.4263, "step": 14063 }, { "epoch": 0.6583884370062614, "grad_norm": 1.25, "learning_rate": 0.00016819445194526353, "loss": 2.9459, "step": 14064 }, { "epoch": 0.658435250746094, "grad_norm": 1.328125, "learning_rate": 0.00016819018238309593, "loss": 3.1372, "step": 14065 }, { "epoch": 0.6584820644859266, "grad_norm": 1.6328125, "learning_rate": 0.0001681859125885742, "loss": 2.2012, "step": 14066 }, { "epoch": 0.6585288782257592, "grad_norm": 1.8671875, "learning_rate": 0.00016818164256171293, "loss": 2.5652, "step": 14067 }, { "epoch": 0.6585756919655918, "grad_norm": 1.6796875, "learning_rate": 0.00016817737230252663, "loss": 2.8346, "step": 14068 }, { "epoch": 0.6586225057054246, "grad_norm": 2.015625, "learning_rate": 0.00016817310181102988, "loss": 2.6064, "step": 14069 }, { "epoch": 0.6586693194452572, "grad_norm": 1.0625, "learning_rate": 0.00016816883108723723, "loss": 2.3474, "step": 14070 }, { "epoch": 0.6587161331850898, "grad_norm": 3.421875, "learning_rate": 0.00016816456013116324, "loss": 2.258, "step": 14071 }, { "epoch": 0.6587629469249224, "grad_norm": 1.5625, "learning_rate": 0.00016816028894282243, "loss": 2.9021, "step": 14072 }, { "epoch": 0.658809760664755, "grad_norm": 1.734375, "learning_rate": 0.0001681560175222294, "loss": 2.641, "step": 14073 }, { "epoch": 0.6588565744045878, "grad_norm": 1.3828125, "learning_rate": 0.00016815174586939867, "loss": 2.5802, "step": 14074 }, { "epoch": 0.6589033881444204, "grad_norm": 2.859375, "learning_rate": 0.0001681474739843448, "loss": 2.6474, "step": 14075 }, { "epoch": 0.658950201884253, "grad_norm": 1.5078125, "learning_rate": 0.00016814320186708237, "loss": 2.803, "step": 14076 }, { "epoch": 0.6589970156240856, "grad_norm": 1.15625, "learning_rate": 0.0001681389295176259, "loss": 2.4841, "step": 14077 }, { "epoch": 0.6590438293639184, "grad_norm": 2.21875, "learning_rate": 0.00016813465693598997, "loss": 2.3356, "step": 14078 }, { "epoch": 0.659090643103751, "grad_norm": 2.0625, "learning_rate": 0.0001681303841221891, "loss": 3.2041, "step": 14079 }, { "epoch": 0.6591374568435836, "grad_norm": 2.34375, "learning_rate": 0.00016812611107623794, "loss": 2.9027, "step": 14080 }, { "epoch": 0.6591842705834162, "grad_norm": 1.328125, "learning_rate": 0.00016812183779815098, "loss": 2.8616, "step": 14081 }, { "epoch": 0.6592310843232488, "grad_norm": 1.3828125, "learning_rate": 0.00016811756428794278, "loss": 2.7204, "step": 14082 }, { "epoch": 0.6592778980630816, "grad_norm": 1.71875, "learning_rate": 0.00016811329054562793, "loss": 3.0244, "step": 14083 }, { "epoch": 0.6593247118029142, "grad_norm": 1.703125, "learning_rate": 0.00016810901657122098, "loss": 3.2785, "step": 14084 }, { "epoch": 0.6593715255427468, "grad_norm": 1.5546875, "learning_rate": 0.0001681047423647365, "loss": 2.7352, "step": 14085 }, { "epoch": 0.6594183392825794, "grad_norm": 2.171875, "learning_rate": 0.00016810046792618903, "loss": 2.7318, "step": 14086 }, { "epoch": 0.659465153022412, "grad_norm": 1.390625, "learning_rate": 0.00016809619325559315, "loss": 2.7597, "step": 14087 }, { "epoch": 0.6595119667622448, "grad_norm": 1.078125, "learning_rate": 0.00016809191835296346, "loss": 2.6453, "step": 14088 }, { "epoch": 0.6595587805020774, "grad_norm": 1.125, "learning_rate": 0.00016808764321831445, "loss": 1.8929, "step": 14089 }, { "epoch": 0.65960559424191, "grad_norm": 1.5, "learning_rate": 0.00016808336785166075, "loss": 2.5389, "step": 14090 }, { "epoch": 0.6596524079817426, "grad_norm": 1.25, "learning_rate": 0.00016807909225301693, "loss": 2.5178, "step": 14091 }, { "epoch": 0.6596992217215752, "grad_norm": 1.5703125, "learning_rate": 0.00016807481642239748, "loss": 2.6132, "step": 14092 }, { "epoch": 0.659746035461408, "grad_norm": 1.5390625, "learning_rate": 0.0001680705403598171, "loss": 2.9295, "step": 14093 }, { "epoch": 0.6597928492012406, "grad_norm": 1.421875, "learning_rate": 0.00016806626406529022, "loss": 2.8156, "step": 14094 }, { "epoch": 0.6598396629410732, "grad_norm": 1.40625, "learning_rate": 0.00016806198753883153, "loss": 2.5155, "step": 14095 }, { "epoch": 0.6598864766809058, "grad_norm": 1.4453125, "learning_rate": 0.00016805771078045556, "loss": 2.6367, "step": 14096 }, { "epoch": 0.6599332904207384, "grad_norm": 1.21875, "learning_rate": 0.0001680534337901768, "loss": 2.6283, "step": 14097 }, { "epoch": 0.6599801041605712, "grad_norm": 1.3828125, "learning_rate": 0.00016804915656800997, "loss": 2.8941, "step": 14098 }, { "epoch": 0.6600269179004038, "grad_norm": 1.6875, "learning_rate": 0.00016804487911396956, "loss": 2.4389, "step": 14099 }, { "epoch": 0.6600737316402364, "grad_norm": 1.765625, "learning_rate": 0.00016804060142807013, "loss": 2.9214, "step": 14100 }, { "epoch": 0.660120545380069, "grad_norm": 1.3203125, "learning_rate": 0.00016803632351032632, "loss": 2.9126, "step": 14101 }, { "epoch": 0.6601673591199017, "grad_norm": 1.46875, "learning_rate": 0.00016803204536075265, "loss": 2.6997, "step": 14102 }, { "epoch": 0.6602141728597344, "grad_norm": 1.5234375, "learning_rate": 0.00016802776697936374, "loss": 2.8819, "step": 14103 }, { "epoch": 0.660260986599567, "grad_norm": 1.46875, "learning_rate": 0.0001680234883661741, "loss": 2.7351, "step": 14104 }, { "epoch": 0.6603078003393996, "grad_norm": 1.25, "learning_rate": 0.0001680192095211984, "loss": 2.6165, "step": 14105 }, { "epoch": 0.6603546140792322, "grad_norm": 2.1875, "learning_rate": 0.00016801493044445116, "loss": 2.1679, "step": 14106 }, { "epoch": 0.6604014278190649, "grad_norm": 1.6640625, "learning_rate": 0.00016801065113594696, "loss": 2.5533, "step": 14107 }, { "epoch": 0.6604482415588976, "grad_norm": 1.296875, "learning_rate": 0.0001680063715957004, "loss": 2.946, "step": 14108 }, { "epoch": 0.6604950552987302, "grad_norm": 1.4375, "learning_rate": 0.00016800209182372608, "loss": 2.6461, "step": 14109 }, { "epoch": 0.6605418690385628, "grad_norm": 1.453125, "learning_rate": 0.00016799781182003855, "loss": 2.6217, "step": 14110 }, { "epoch": 0.6605886827783954, "grad_norm": 2.609375, "learning_rate": 0.0001679935315846524, "loss": 2.6205, "step": 14111 }, { "epoch": 0.6606354965182281, "grad_norm": 1.625, "learning_rate": 0.00016798925111758225, "loss": 2.4658, "step": 14112 }, { "epoch": 0.6606823102580608, "grad_norm": 1.84375, "learning_rate": 0.00016798497041884262, "loss": 2.302, "step": 14113 }, { "epoch": 0.6607291239978934, "grad_norm": 1.5234375, "learning_rate": 0.0001679806894884482, "loss": 2.9248, "step": 14114 }, { "epoch": 0.660775937737726, "grad_norm": 1.234375, "learning_rate": 0.00016797640832641345, "loss": 2.864, "step": 14115 }, { "epoch": 0.6608227514775586, "grad_norm": 1.4609375, "learning_rate": 0.000167972126932753, "loss": 2.789, "step": 14116 }, { "epoch": 0.6608695652173913, "grad_norm": 1.21875, "learning_rate": 0.0001679678453074815, "loss": 2.7996, "step": 14117 }, { "epoch": 0.660916378957224, "grad_norm": 1.4921875, "learning_rate": 0.00016796356345061346, "loss": 2.5336, "step": 14118 }, { "epoch": 0.6609631926970566, "grad_norm": 1.7578125, "learning_rate": 0.00016795928136216354, "loss": 2.3698, "step": 14119 }, { "epoch": 0.6610100064368892, "grad_norm": 1.59375, "learning_rate": 0.0001679549990421463, "loss": 2.9014, "step": 14120 }, { "epoch": 0.6610568201767218, "grad_norm": 1.421875, "learning_rate": 0.0001679507164905763, "loss": 2.911, "step": 14121 }, { "epoch": 0.6611036339165545, "grad_norm": 1.1796875, "learning_rate": 0.00016794643370746817, "loss": 2.5802, "step": 14122 }, { "epoch": 0.6611504476563872, "grad_norm": 1.1328125, "learning_rate": 0.00016794215069283652, "loss": 2.3488, "step": 14123 }, { "epoch": 0.6611972613962198, "grad_norm": 1.59375, "learning_rate": 0.00016793786744669588, "loss": 2.6435, "step": 14124 }, { "epoch": 0.6612440751360524, "grad_norm": 1.515625, "learning_rate": 0.00016793358396906093, "loss": 2.9073, "step": 14125 }, { "epoch": 0.661290888875885, "grad_norm": 1.5, "learning_rate": 0.00016792930025994615, "loss": 2.379, "step": 14126 }, { "epoch": 0.6613377026157177, "grad_norm": 1.6171875, "learning_rate": 0.00016792501631936624, "loss": 2.6168, "step": 14127 }, { "epoch": 0.6613845163555504, "grad_norm": 2.15625, "learning_rate": 0.0001679207321473358, "loss": 2.4046, "step": 14128 }, { "epoch": 0.661431330095383, "grad_norm": 1.3515625, "learning_rate": 0.00016791644774386935, "loss": 2.4088, "step": 14129 }, { "epoch": 0.6614781438352156, "grad_norm": 2.078125, "learning_rate": 0.00016791216310898155, "loss": 3.0165, "step": 14130 }, { "epoch": 0.6615249575750483, "grad_norm": 2.078125, "learning_rate": 0.00016790787824268695, "loss": 2.6805, "step": 14131 }, { "epoch": 0.6615717713148809, "grad_norm": 1.7265625, "learning_rate": 0.0001679035931450002, "loss": 2.4728, "step": 14132 }, { "epoch": 0.6616185850547136, "grad_norm": 1.1953125, "learning_rate": 0.0001678993078159359, "loss": 2.6962, "step": 14133 }, { "epoch": 0.6616653987945462, "grad_norm": 1.4453125, "learning_rate": 0.00016789502225550862, "loss": 2.5976, "step": 14134 }, { "epoch": 0.6617122125343788, "grad_norm": 1.1484375, "learning_rate": 0.00016789073646373295, "loss": 2.6613, "step": 14135 }, { "epoch": 0.6617590262742115, "grad_norm": 1.4375, "learning_rate": 0.00016788645044062353, "loss": 2.9617, "step": 14136 }, { "epoch": 0.6618058400140441, "grad_norm": 1.46875, "learning_rate": 0.000167882164186195, "loss": 2.2024, "step": 14137 }, { "epoch": 0.6618526537538768, "grad_norm": 1.28125, "learning_rate": 0.00016787787770046188, "loss": 2.7367, "step": 14138 }, { "epoch": 0.6618994674937094, "grad_norm": 1.046875, "learning_rate": 0.0001678735909834388, "loss": 2.5433, "step": 14139 }, { "epoch": 0.661946281233542, "grad_norm": 1.3671875, "learning_rate": 0.0001678693040351404, "loss": 2.7339, "step": 14140 }, { "epoch": 0.6619930949733747, "grad_norm": 1.65625, "learning_rate": 0.00016786501685558128, "loss": 2.5965, "step": 14141 }, { "epoch": 0.6620399087132073, "grad_norm": 1.6796875, "learning_rate": 0.00016786072944477602, "loss": 2.8829, "step": 14142 }, { "epoch": 0.66208672245304, "grad_norm": 4.0, "learning_rate": 0.00016785644180273928, "loss": 3.5382, "step": 14143 }, { "epoch": 0.6621335361928726, "grad_norm": 1.1953125, "learning_rate": 0.0001678521539294856, "loss": 2.736, "step": 14144 }, { "epoch": 0.6621803499327052, "grad_norm": 1.3515625, "learning_rate": 0.00016784786582502964, "loss": 2.7282, "step": 14145 }, { "epoch": 0.6622271636725379, "grad_norm": 1.5, "learning_rate": 0.00016784357748938602, "loss": 2.8638, "step": 14146 }, { "epoch": 0.6622739774123705, "grad_norm": 1.25, "learning_rate": 0.00016783928892256928, "loss": 2.7567, "step": 14147 }, { "epoch": 0.6623207911522032, "grad_norm": 1.4921875, "learning_rate": 0.00016783500012459412, "loss": 2.0877, "step": 14148 }, { "epoch": 0.6623676048920358, "grad_norm": 1.171875, "learning_rate": 0.00016783071109547512, "loss": 2.4446, "step": 14149 }, { "epoch": 0.6624144186318685, "grad_norm": 1.90625, "learning_rate": 0.00016782642183522687, "loss": 2.7864, "step": 14150 }, { "epoch": 0.6624612323717011, "grad_norm": 1.484375, "learning_rate": 0.00016782213234386404, "loss": 3.0437, "step": 14151 }, { "epoch": 0.6625080461115337, "grad_norm": 2.484375, "learning_rate": 0.0001678178426214012, "loss": 2.5065, "step": 14152 }, { "epoch": 0.6625548598513664, "grad_norm": 1.4609375, "learning_rate": 0.000167813552667853, "loss": 2.5378, "step": 14153 }, { "epoch": 0.662601673591199, "grad_norm": 1.6484375, "learning_rate": 0.000167809262483234, "loss": 2.9748, "step": 14154 }, { "epoch": 0.6626484873310317, "grad_norm": 1.6328125, "learning_rate": 0.00016780497206755887, "loss": 2.5861, "step": 14155 }, { "epoch": 0.6626953010708643, "grad_norm": 1.4375, "learning_rate": 0.00016780068142084224, "loss": 2.9439, "step": 14156 }, { "epoch": 0.6627421148106969, "grad_norm": 1.3125, "learning_rate": 0.00016779639054309868, "loss": 2.5872, "step": 14157 }, { "epoch": 0.6627889285505296, "grad_norm": 1.3984375, "learning_rate": 0.00016779209943434284, "loss": 2.2898, "step": 14158 }, { "epoch": 0.6628357422903622, "grad_norm": 1.796875, "learning_rate": 0.00016778780809458937, "loss": 2.6923, "step": 14159 }, { "epoch": 0.6628825560301949, "grad_norm": 1.890625, "learning_rate": 0.0001677835165238528, "loss": 2.56, "step": 14160 }, { "epoch": 0.6629293697700275, "grad_norm": 0.9921875, "learning_rate": 0.00016777922472214788, "loss": 2.5153, "step": 14161 }, { "epoch": 0.6629761835098601, "grad_norm": 1.4296875, "learning_rate": 0.00016777493268948916, "loss": 2.8667, "step": 14162 }, { "epoch": 0.6630229972496928, "grad_norm": 1.1015625, "learning_rate": 0.00016777064042589125, "loss": 2.4298, "step": 14163 }, { "epoch": 0.6630698109895254, "grad_norm": 2.25, "learning_rate": 0.00016776634793136882, "loss": 2.9098, "step": 14164 }, { "epoch": 0.6631166247293581, "grad_norm": 1.4765625, "learning_rate": 0.00016776205520593646, "loss": 2.6936, "step": 14165 }, { "epoch": 0.6631634384691907, "grad_norm": 2.140625, "learning_rate": 0.00016775776224960885, "loss": 2.4513, "step": 14166 }, { "epoch": 0.6632102522090233, "grad_norm": 1.71875, "learning_rate": 0.00016775346906240053, "loss": 3.1966, "step": 14167 }, { "epoch": 0.663257065948856, "grad_norm": 1.421875, "learning_rate": 0.0001677491756443262, "loss": 2.9106, "step": 14168 }, { "epoch": 0.6633038796886886, "grad_norm": 1.125, "learning_rate": 0.0001677448819954005, "loss": 2.4258, "step": 14169 }, { "epoch": 0.6633506934285213, "grad_norm": 1.6328125, "learning_rate": 0.00016774058811563797, "loss": 2.7539, "step": 14170 }, { "epoch": 0.6633975071683539, "grad_norm": 1.3828125, "learning_rate": 0.00016773629400505336, "loss": 3.0128, "step": 14171 }, { "epoch": 0.6634443209081865, "grad_norm": 1.2265625, "learning_rate": 0.00016773199966366121, "loss": 2.5601, "step": 14172 }, { "epoch": 0.6634911346480192, "grad_norm": 1.8203125, "learning_rate": 0.0001677277050914762, "loss": 2.9338, "step": 14173 }, { "epoch": 0.6635379483878519, "grad_norm": 1.171875, "learning_rate": 0.00016772341028851293, "loss": 2.4984, "step": 14174 }, { "epoch": 0.6635847621276845, "grad_norm": 1.3515625, "learning_rate": 0.0001677191152547861, "loss": 2.5481, "step": 14175 }, { "epoch": 0.6636315758675171, "grad_norm": 1.4453125, "learning_rate": 0.00016771481999031026, "loss": 2.6651, "step": 14176 }, { "epoch": 0.6636783896073497, "grad_norm": 1.5390625, "learning_rate": 0.00016771052449510008, "loss": 2.7309, "step": 14177 }, { "epoch": 0.6637252033471824, "grad_norm": 1.5625, "learning_rate": 0.0001677062287691702, "loss": 2.8024, "step": 14178 }, { "epoch": 0.663772017087015, "grad_norm": 1.5703125, "learning_rate": 0.00016770193281253528, "loss": 2.448, "step": 14179 }, { "epoch": 0.6638188308268477, "grad_norm": 1.40625, "learning_rate": 0.00016769763662520992, "loss": 3.021, "step": 14180 }, { "epoch": 0.6638656445666803, "grad_norm": 1.5625, "learning_rate": 0.00016769334020720882, "loss": 2.4883, "step": 14181 }, { "epoch": 0.6639124583065129, "grad_norm": 1.4140625, "learning_rate": 0.00016768904355854653, "loss": 4.0274, "step": 14182 }, { "epoch": 0.6639592720463456, "grad_norm": 1.40625, "learning_rate": 0.00016768474667923776, "loss": 2.9195, "step": 14183 }, { "epoch": 0.6640060857861783, "grad_norm": 1.4296875, "learning_rate": 0.0001676804495692971, "loss": 2.8762, "step": 14184 }, { "epoch": 0.6640528995260109, "grad_norm": 1.34375, "learning_rate": 0.00016767615222873926, "loss": 2.8231, "step": 14185 }, { "epoch": 0.6640997132658435, "grad_norm": 2.5, "learning_rate": 0.0001676718546575788, "loss": 2.3316, "step": 14186 }, { "epoch": 0.6641465270056761, "grad_norm": 1.375, "learning_rate": 0.00016766755685583044, "loss": 2.8145, "step": 14187 }, { "epoch": 0.6641933407455088, "grad_norm": 1.609375, "learning_rate": 0.00016766325882350876, "loss": 2.5591, "step": 14188 }, { "epoch": 0.6642401544853415, "grad_norm": 2.171875, "learning_rate": 0.00016765896056062846, "loss": 2.5441, "step": 14189 }, { "epoch": 0.6642869682251741, "grad_norm": 1.2734375, "learning_rate": 0.00016765466206720417, "loss": 2.3598, "step": 14190 }, { "epoch": 0.6643337819650067, "grad_norm": 1.2890625, "learning_rate": 0.00016765036334325052, "loss": 2.4821, "step": 14191 }, { "epoch": 0.6643805957048393, "grad_norm": 1.203125, "learning_rate": 0.00016764606438878218, "loss": 2.4582, "step": 14192 }, { "epoch": 0.664427409444672, "grad_norm": 1.46875, "learning_rate": 0.00016764176520381375, "loss": 2.6736, "step": 14193 }, { "epoch": 0.6644742231845047, "grad_norm": 1.3828125, "learning_rate": 0.00016763746578835992, "loss": 2.8468, "step": 14194 }, { "epoch": 0.6645210369243373, "grad_norm": 1.265625, "learning_rate": 0.00016763316614243538, "loss": 2.8184, "step": 14195 }, { "epoch": 0.6645678506641699, "grad_norm": 1.53125, "learning_rate": 0.0001676288662660547, "loss": 2.6046, "step": 14196 }, { "epoch": 0.6646146644040025, "grad_norm": 1.4375, "learning_rate": 0.00016762456615923257, "loss": 2.9985, "step": 14197 }, { "epoch": 0.6646614781438352, "grad_norm": 1.25, "learning_rate": 0.00016762026582198364, "loss": 2.7897, "step": 14198 }, { "epoch": 0.6647082918836679, "grad_norm": 1.4921875, "learning_rate": 0.00016761596525432254, "loss": 2.8311, "step": 14199 }, { "epoch": 0.6647551056235005, "grad_norm": 2.796875, "learning_rate": 0.000167611664456264, "loss": 2.6516, "step": 14200 }, { "epoch": 0.6648019193633331, "grad_norm": 1.8671875, "learning_rate": 0.00016760736342782262, "loss": 2.8643, "step": 14201 }, { "epoch": 0.6648487331031658, "grad_norm": 1.9296875, "learning_rate": 0.00016760306216901302, "loss": 2.7491, "step": 14202 }, { "epoch": 0.6648955468429985, "grad_norm": 1.671875, "learning_rate": 0.00016759876067984992, "loss": 2.6658, "step": 14203 }, { "epoch": 0.6649423605828311, "grad_norm": 1.3515625, "learning_rate": 0.00016759445896034794, "loss": 2.4447, "step": 14204 }, { "epoch": 0.6649891743226637, "grad_norm": 1.59375, "learning_rate": 0.00016759015701052173, "loss": 2.5319, "step": 14205 }, { "epoch": 0.6650359880624963, "grad_norm": 1.09375, "learning_rate": 0.000167585854830386, "loss": 2.5466, "step": 14206 }, { "epoch": 0.665082801802329, "grad_norm": 1.734375, "learning_rate": 0.00016758155241995533, "loss": 2.7506, "step": 14207 }, { "epoch": 0.6651296155421617, "grad_norm": 1.8828125, "learning_rate": 0.00016757724977924446, "loss": 2.6221, "step": 14208 }, { "epoch": 0.6651764292819943, "grad_norm": 1.265625, "learning_rate": 0.00016757294690826802, "loss": 2.5584, "step": 14209 }, { "epoch": 0.6652232430218269, "grad_norm": 1.2265625, "learning_rate": 0.00016756864380704066, "loss": 3.6698, "step": 14210 }, { "epoch": 0.6652700567616595, "grad_norm": 1.3828125, "learning_rate": 0.00016756434047557705, "loss": 2.5936, "step": 14211 }, { "epoch": 0.6653168705014922, "grad_norm": 1.1171875, "learning_rate": 0.00016756003691389187, "loss": 2.5463, "step": 14212 }, { "epoch": 0.6653636842413249, "grad_norm": 2.359375, "learning_rate": 0.00016755573312199974, "loss": 2.7029, "step": 14213 }, { "epoch": 0.6654104979811575, "grad_norm": 1.3359375, "learning_rate": 0.00016755142909991537, "loss": 3.3027, "step": 14214 }, { "epoch": 0.6654573117209901, "grad_norm": 1.6484375, "learning_rate": 0.00016754712484765342, "loss": 2.8542, "step": 14215 }, { "epoch": 0.6655041254608227, "grad_norm": 1.9453125, "learning_rate": 0.00016754282036522854, "loss": 2.7053, "step": 14216 }, { "epoch": 0.6655509392006554, "grad_norm": 1.453125, "learning_rate": 0.0001675385156526554, "loss": 2.8208, "step": 14217 }, { "epoch": 0.6655977529404881, "grad_norm": 1.8203125, "learning_rate": 0.00016753421070994865, "loss": 2.5113, "step": 14218 }, { "epoch": 0.6656445666803207, "grad_norm": 1.3046875, "learning_rate": 0.00016752990553712302, "loss": 2.8619, "step": 14219 }, { "epoch": 0.6656913804201533, "grad_norm": 2.1875, "learning_rate": 0.00016752560013419315, "loss": 2.8225, "step": 14220 }, { "epoch": 0.6657381941599859, "grad_norm": 1.2578125, "learning_rate": 0.00016752129450117368, "loss": 2.8614, "step": 14221 }, { "epoch": 0.6657850078998186, "grad_norm": 1.2421875, "learning_rate": 0.00016751698863807927, "loss": 2.5518, "step": 14222 }, { "epoch": 0.6658318216396513, "grad_norm": 1.046875, "learning_rate": 0.00016751268254492464, "loss": 3.0325, "step": 14223 }, { "epoch": 0.6658786353794839, "grad_norm": 1.171875, "learning_rate": 0.00016750837622172445, "loss": 2.7091, "step": 14224 }, { "epoch": 0.6659254491193165, "grad_norm": 1.6015625, "learning_rate": 0.0001675040696684934, "loss": 2.6067, "step": 14225 }, { "epoch": 0.6659722628591491, "grad_norm": 1.15625, "learning_rate": 0.00016749976288524612, "loss": 2.8742, "step": 14226 }, { "epoch": 0.6660190765989819, "grad_norm": 1.484375, "learning_rate": 0.00016749545587199728, "loss": 2.7372, "step": 14227 }, { "epoch": 0.6660658903388145, "grad_norm": 1.5390625, "learning_rate": 0.0001674911486287616, "loss": 2.6634, "step": 14228 }, { "epoch": 0.6661127040786471, "grad_norm": 6.53125, "learning_rate": 0.00016748684115555374, "loss": 3.3429, "step": 14229 }, { "epoch": 0.6661595178184797, "grad_norm": 1.765625, "learning_rate": 0.00016748253345238832, "loss": 2.8541, "step": 14230 }, { "epoch": 0.6662063315583123, "grad_norm": 1.2890625, "learning_rate": 0.0001674782255192801, "loss": 2.455, "step": 14231 }, { "epoch": 0.666253145298145, "grad_norm": 1.4453125, "learning_rate": 0.00016747391735624372, "loss": 2.8096, "step": 14232 }, { "epoch": 0.6662999590379777, "grad_norm": 1.8203125, "learning_rate": 0.00016746960896329389, "loss": 2.6972, "step": 14233 }, { "epoch": 0.6663467727778103, "grad_norm": 1.5625, "learning_rate": 0.00016746530034044522, "loss": 3.8173, "step": 14234 }, { "epoch": 0.6663935865176429, "grad_norm": 1.25, "learning_rate": 0.0001674609914877125, "loss": 2.5076, "step": 14235 }, { "epoch": 0.6664404002574755, "grad_norm": 1.2109375, "learning_rate": 0.0001674566824051103, "loss": 2.7224, "step": 14236 }, { "epoch": 0.6664872139973083, "grad_norm": 1.6328125, "learning_rate": 0.00016745237309265338, "loss": 2.873, "step": 14237 }, { "epoch": 0.6665340277371409, "grad_norm": 1.34375, "learning_rate": 0.0001674480635503564, "loss": 2.764, "step": 14238 }, { "epoch": 0.6665808414769735, "grad_norm": 1.734375, "learning_rate": 0.00016744375377823403, "loss": 2.5334, "step": 14239 }, { "epoch": 0.6666276552168061, "grad_norm": 2.15625, "learning_rate": 0.00016743944377630094, "loss": 2.4288, "step": 14240 }, { "epoch": 0.6666744689566387, "grad_norm": 1.28125, "learning_rate": 0.0001674351335445719, "loss": 3.0451, "step": 14241 }, { "epoch": 0.6667212826964715, "grad_norm": 1.4765625, "learning_rate": 0.0001674308230830615, "loss": 2.3949, "step": 14242 }, { "epoch": 0.6667680964363041, "grad_norm": 1.640625, "learning_rate": 0.00016742651239178447, "loss": 2.3076, "step": 14243 }, { "epoch": 0.6668149101761367, "grad_norm": 1.421875, "learning_rate": 0.00016742220147075552, "loss": 2.787, "step": 14244 }, { "epoch": 0.6668617239159693, "grad_norm": 2.1875, "learning_rate": 0.0001674178903199893, "loss": 2.3984, "step": 14245 }, { "epoch": 0.6669085376558019, "grad_norm": 1.4609375, "learning_rate": 0.00016741357893950052, "loss": 2.629, "step": 14246 }, { "epoch": 0.6669553513956347, "grad_norm": 1.484375, "learning_rate": 0.00016740926732930386, "loss": 2.6513, "step": 14247 }, { "epoch": 0.6670021651354673, "grad_norm": 1.2734375, "learning_rate": 0.00016740495548941403, "loss": 2.5976, "step": 14248 }, { "epoch": 0.6670489788752999, "grad_norm": 1.4375, "learning_rate": 0.00016740064341984573, "loss": 2.8574, "step": 14249 }, { "epoch": 0.6670957926151325, "grad_norm": 1.703125, "learning_rate": 0.0001673963311206136, "loss": 3.1309, "step": 14250 }, { "epoch": 0.6671426063549651, "grad_norm": 1.2578125, "learning_rate": 0.00016739201859173236, "loss": 2.2937, "step": 14251 }, { "epoch": 0.6671894200947979, "grad_norm": 2.0625, "learning_rate": 0.00016738770583321673, "loss": 2.8912, "step": 14252 }, { "epoch": 0.6672362338346305, "grad_norm": 1.4375, "learning_rate": 0.00016738339284508137, "loss": 2.5753, "step": 14253 }, { "epoch": 0.6672830475744631, "grad_norm": 1.703125, "learning_rate": 0.00016737907962734104, "loss": 2.7486, "step": 14254 }, { "epoch": 0.6673298613142957, "grad_norm": 1.984375, "learning_rate": 0.00016737476618001033, "loss": 3.1177, "step": 14255 }, { "epoch": 0.6673766750541283, "grad_norm": 1.6875, "learning_rate": 0.00016737045250310404, "loss": 2.9475, "step": 14256 }, { "epoch": 0.6674234887939611, "grad_norm": 1.4609375, "learning_rate": 0.00016736613859663684, "loss": 2.7509, "step": 14257 }, { "epoch": 0.6674703025337937, "grad_norm": 4.0625, "learning_rate": 0.00016736182446062337, "loss": 2.174, "step": 14258 }, { "epoch": 0.6675171162736263, "grad_norm": 1.28125, "learning_rate": 0.0001673575100950784, "loss": 2.7059, "step": 14259 }, { "epoch": 0.6675639300134589, "grad_norm": 1.390625, "learning_rate": 0.00016735319550001662, "loss": 2.5825, "step": 14260 }, { "epoch": 0.6676107437532915, "grad_norm": 1.5390625, "learning_rate": 0.0001673488806754527, "loss": 2.529, "step": 14261 }, { "epoch": 0.6676575574931243, "grad_norm": 1.4140625, "learning_rate": 0.00016734456562140135, "loss": 2.6578, "step": 14262 }, { "epoch": 0.6677043712329569, "grad_norm": 1.609375, "learning_rate": 0.00016734025033787733, "loss": 2.5944, "step": 14263 }, { "epoch": 0.6677511849727895, "grad_norm": 1.4609375, "learning_rate": 0.00016733593482489524, "loss": 2.9149, "step": 14264 }, { "epoch": 0.6677979987126221, "grad_norm": 1.1875, "learning_rate": 0.00016733161908246991, "loss": 2.9783, "step": 14265 }, { "epoch": 0.6678448124524548, "grad_norm": 1.7265625, "learning_rate": 0.00016732730311061592, "loss": 2.5178, "step": 14266 }, { "epoch": 0.6678916261922875, "grad_norm": 1.734375, "learning_rate": 0.0001673229869093481, "loss": 2.6402, "step": 14267 }, { "epoch": 0.6679384399321201, "grad_norm": 1.7109375, "learning_rate": 0.00016731867047868105, "loss": 2.56, "step": 14268 }, { "epoch": 0.6679852536719527, "grad_norm": 1.7734375, "learning_rate": 0.0001673143538186295, "loss": 3.0075, "step": 14269 }, { "epoch": 0.6680320674117853, "grad_norm": 1.3984375, "learning_rate": 0.00016731003692920822, "loss": 2.3307, "step": 14270 }, { "epoch": 0.668078881151618, "grad_norm": 1.65625, "learning_rate": 0.00016730571981043188, "loss": 2.217, "step": 14271 }, { "epoch": 0.6681256948914507, "grad_norm": 1.78125, "learning_rate": 0.00016730140246231517, "loss": 2.9566, "step": 14272 }, { "epoch": 0.6681725086312833, "grad_norm": 1.1328125, "learning_rate": 0.00016729708488487285, "loss": 2.7857, "step": 14273 }, { "epoch": 0.6682193223711159, "grad_norm": 1.421875, "learning_rate": 0.00016729276707811957, "loss": 2.1433, "step": 14274 }, { "epoch": 0.6682661361109485, "grad_norm": 1.71875, "learning_rate": 0.00016728844904207008, "loss": 2.6591, "step": 14275 }, { "epoch": 0.6683129498507812, "grad_norm": 1.546875, "learning_rate": 0.00016728413077673912, "loss": 3.0046, "step": 14276 }, { "epoch": 0.6683597635906139, "grad_norm": 1.171875, "learning_rate": 0.00016727981228214134, "loss": 2.8927, "step": 14277 }, { "epoch": 0.6684065773304465, "grad_norm": 1.390625, "learning_rate": 0.0001672754935582915, "loss": 2.3331, "step": 14278 }, { "epoch": 0.6684533910702791, "grad_norm": 1.9921875, "learning_rate": 0.00016727117460520432, "loss": 2.6397, "step": 14279 }, { "epoch": 0.6685002048101117, "grad_norm": 1.265625, "learning_rate": 0.00016726685542289447, "loss": 2.9668, "step": 14280 }, { "epoch": 0.6685470185499444, "grad_norm": 1.671875, "learning_rate": 0.00016726253601137675, "loss": 2.729, "step": 14281 }, { "epoch": 0.6685938322897771, "grad_norm": 1.296875, "learning_rate": 0.00016725821637066576, "loss": 2.7826, "step": 14282 }, { "epoch": 0.6686406460296097, "grad_norm": 1.265625, "learning_rate": 0.00016725389650077633, "loss": 2.2443, "step": 14283 }, { "epoch": 0.6686874597694423, "grad_norm": 1.4765625, "learning_rate": 0.0001672495764017231, "loss": 2.7066, "step": 14284 }, { "epoch": 0.668734273509275, "grad_norm": 1.59375, "learning_rate": 0.00016724525607352084, "loss": 2.598, "step": 14285 }, { "epoch": 0.6687810872491076, "grad_norm": 1.7734375, "learning_rate": 0.0001672409355161843, "loss": 2.5754, "step": 14286 }, { "epoch": 0.6688279009889403, "grad_norm": 1.203125, "learning_rate": 0.0001672366147297281, "loss": 2.4759, "step": 14287 }, { "epoch": 0.6688747147287729, "grad_norm": 2.34375, "learning_rate": 0.00016723229371416704, "loss": 2.3976, "step": 14288 }, { "epoch": 0.6689215284686055, "grad_norm": 1.2109375, "learning_rate": 0.00016722797246951585, "loss": 2.6975, "step": 14289 }, { "epoch": 0.6689683422084381, "grad_norm": 1.1875, "learning_rate": 0.0001672236509957892, "loss": 2.7842, "step": 14290 }, { "epoch": 0.6690151559482708, "grad_norm": 1.4609375, "learning_rate": 0.00016721932929300186, "loss": 2.4574, "step": 14291 }, { "epoch": 0.6690619696881035, "grad_norm": 1.2109375, "learning_rate": 0.00016721500736116852, "loss": 2.5027, "step": 14292 }, { "epoch": 0.6691087834279361, "grad_norm": 1.4453125, "learning_rate": 0.00016721068520030396, "loss": 2.6791, "step": 14293 }, { "epoch": 0.6691555971677687, "grad_norm": 1.375, "learning_rate": 0.00016720636281042286, "loss": 2.5834, "step": 14294 }, { "epoch": 0.6692024109076014, "grad_norm": 1.3359375, "learning_rate": 0.00016720204019153996, "loss": 2.91, "step": 14295 }, { "epoch": 0.669249224647434, "grad_norm": 1.5546875, "learning_rate": 0.00016719771734367, "loss": 2.6507, "step": 14296 }, { "epoch": 0.6692960383872667, "grad_norm": 1.6640625, "learning_rate": 0.0001671933942668277, "loss": 2.7937, "step": 14297 }, { "epoch": 0.6693428521270993, "grad_norm": 2.3125, "learning_rate": 0.0001671890709610278, "loss": 2.9934, "step": 14298 }, { "epoch": 0.6693896658669319, "grad_norm": 1.2734375, "learning_rate": 0.000167184747426285, "loss": 2.9096, "step": 14299 }, { "epoch": 0.6694364796067646, "grad_norm": 1.8359375, "learning_rate": 0.0001671804236626141, "loss": 2.1649, "step": 14300 }, { "epoch": 0.6694832933465972, "grad_norm": 1.8046875, "learning_rate": 0.00016717609967002974, "loss": 2.7662, "step": 14301 }, { "epoch": 0.6695301070864299, "grad_norm": 1.6328125, "learning_rate": 0.0001671717754485467, "loss": 2.5109, "step": 14302 }, { "epoch": 0.6695769208262625, "grad_norm": 1.2890625, "learning_rate": 0.00016716745099817977, "loss": 2.3498, "step": 14303 }, { "epoch": 0.6696237345660951, "grad_norm": 1.8515625, "learning_rate": 0.00016716312631894357, "loss": 2.789, "step": 14304 }, { "epoch": 0.6696705483059278, "grad_norm": 1.203125, "learning_rate": 0.00016715880141085298, "loss": 2.929, "step": 14305 }, { "epoch": 0.6697173620457604, "grad_norm": 1.578125, "learning_rate": 0.0001671544762739226, "loss": 2.8906, "step": 14306 }, { "epoch": 0.6697641757855931, "grad_norm": 1.34375, "learning_rate": 0.00016715015090816723, "loss": 2.8116, "step": 14307 }, { "epoch": 0.6698109895254257, "grad_norm": 1.9296875, "learning_rate": 0.0001671458253136016, "loss": 2.6204, "step": 14308 }, { "epoch": 0.6698578032652583, "grad_norm": 1.0078125, "learning_rate": 0.00016714149949024045, "loss": 2.554, "step": 14309 }, { "epoch": 0.669904617005091, "grad_norm": 1.4375, "learning_rate": 0.00016713717343809853, "loss": 2.8856, "step": 14310 }, { "epoch": 0.6699514307449236, "grad_norm": 1.4140625, "learning_rate": 0.00016713284715719057, "loss": 2.3426, "step": 14311 }, { "epoch": 0.6699982444847563, "grad_norm": 2.640625, "learning_rate": 0.00016712852064753133, "loss": 2.6177, "step": 14312 }, { "epoch": 0.6700450582245889, "grad_norm": 1.359375, "learning_rate": 0.00016712419390913551, "loss": 2.5351, "step": 14313 }, { "epoch": 0.6700918719644215, "grad_norm": 1.5859375, "learning_rate": 0.0001671198669420179, "loss": 2.6173, "step": 14314 }, { "epoch": 0.6701386857042542, "grad_norm": 1.234375, "learning_rate": 0.0001671155397461932, "loss": 2.7999, "step": 14315 }, { "epoch": 0.6701854994440868, "grad_norm": 1.7421875, "learning_rate": 0.00016711121232167622, "loss": 2.8394, "step": 14316 }, { "epoch": 0.6702323131839195, "grad_norm": 1.109375, "learning_rate": 0.0001671068846684816, "loss": 2.2028, "step": 14317 }, { "epoch": 0.6702791269237521, "grad_norm": 1.3125, "learning_rate": 0.0001671025567866242, "loss": 2.7251, "step": 14318 }, { "epoch": 0.6703259406635848, "grad_norm": 1.7265625, "learning_rate": 0.0001670982286761187, "loss": 2.6688, "step": 14319 }, { "epoch": 0.6703727544034174, "grad_norm": 1.65625, "learning_rate": 0.00016709390033697988, "loss": 2.6964, "step": 14320 }, { "epoch": 0.67041956814325, "grad_norm": 1.09375, "learning_rate": 0.00016708957176922248, "loss": 2.8833, "step": 14321 }, { "epoch": 0.6704663818830827, "grad_norm": 1.375, "learning_rate": 0.00016708524297286118, "loss": 2.5178, "step": 14322 }, { "epoch": 0.6705131956229153, "grad_norm": 1.90625, "learning_rate": 0.00016708091394791085, "loss": 2.0924, "step": 14323 }, { "epoch": 0.670560009362748, "grad_norm": 1.421875, "learning_rate": 0.00016707658469438617, "loss": 2.7781, "step": 14324 }, { "epoch": 0.6706068231025806, "grad_norm": 1.390625, "learning_rate": 0.0001670722552123019, "loss": 2.8284, "step": 14325 }, { "epoch": 0.6706536368424133, "grad_norm": 1.5859375, "learning_rate": 0.0001670679255016728, "loss": 2.5163, "step": 14326 }, { "epoch": 0.6707004505822459, "grad_norm": 1.3984375, "learning_rate": 0.00016706359556251362, "loss": 2.763, "step": 14327 }, { "epoch": 0.6707472643220785, "grad_norm": 1.578125, "learning_rate": 0.0001670592653948391, "loss": 2.6722, "step": 14328 }, { "epoch": 0.6707940780619112, "grad_norm": 1.8046875, "learning_rate": 0.00016705493499866403, "loss": 2.8153, "step": 14329 }, { "epoch": 0.6708408918017438, "grad_norm": 1.2109375, "learning_rate": 0.00016705060437400312, "loss": 2.5765, "step": 14330 }, { "epoch": 0.6708877055415765, "grad_norm": 2.359375, "learning_rate": 0.0001670462735208712, "loss": 2.8866, "step": 14331 }, { "epoch": 0.6709345192814091, "grad_norm": 1.4921875, "learning_rate": 0.00016704194243928293, "loss": 2.8279, "step": 14332 }, { "epoch": 0.6709813330212417, "grad_norm": 1.71875, "learning_rate": 0.00016703761112925314, "loss": 2.5921, "step": 14333 }, { "epoch": 0.6710281467610744, "grad_norm": 1.75, "learning_rate": 0.00016703327959079656, "loss": 2.3789, "step": 14334 }, { "epoch": 0.671074960500907, "grad_norm": 3.625, "learning_rate": 0.00016702894782392796, "loss": 2.7808, "step": 14335 }, { "epoch": 0.6711217742407397, "grad_norm": 1.1875, "learning_rate": 0.0001670246158286621, "loss": 2.4795, "step": 14336 }, { "epoch": 0.6711685879805723, "grad_norm": 1.3515625, "learning_rate": 0.0001670202836050137, "loss": 2.0586, "step": 14337 }, { "epoch": 0.671215401720405, "grad_norm": 1.4921875, "learning_rate": 0.00016701595115299758, "loss": 2.6178, "step": 14338 }, { "epoch": 0.6712622154602376, "grad_norm": 1.609375, "learning_rate": 0.00016701161847262848, "loss": 3.0311, "step": 14339 }, { "epoch": 0.6713090292000702, "grad_norm": 1.53125, "learning_rate": 0.0001670072855639212, "loss": 2.5519, "step": 14340 }, { "epoch": 0.6713558429399029, "grad_norm": 1.6171875, "learning_rate": 0.0001670029524268904, "loss": 2.7979, "step": 14341 }, { "epoch": 0.6714026566797355, "grad_norm": 1.4140625, "learning_rate": 0.00016699861906155095, "loss": 2.5684, "step": 14342 }, { "epoch": 0.6714494704195682, "grad_norm": 1.59375, "learning_rate": 0.00016699428546791757, "loss": 3.0439, "step": 14343 }, { "epoch": 0.6714962841594008, "grad_norm": 1.2578125, "learning_rate": 0.00016698995164600504, "loss": 2.3106, "step": 14344 }, { "epoch": 0.6715430978992334, "grad_norm": 1.296875, "learning_rate": 0.0001669856175958281, "loss": 2.423, "step": 14345 }, { "epoch": 0.6715899116390661, "grad_norm": 1.3828125, "learning_rate": 0.0001669812833174016, "loss": 2.7314, "step": 14346 }, { "epoch": 0.6716367253788987, "grad_norm": 1.375, "learning_rate": 0.00016697694881074018, "loss": 2.3419, "step": 14347 }, { "epoch": 0.6716835391187314, "grad_norm": 1.265625, "learning_rate": 0.00016697261407585873, "loss": 2.4082, "step": 14348 }, { "epoch": 0.671730352858564, "grad_norm": 1.5625, "learning_rate": 0.00016696827911277193, "loss": 2.4479, "step": 14349 }, { "epoch": 0.6717771665983966, "grad_norm": 1.2734375, "learning_rate": 0.0001669639439214946, "loss": 2.3654, "step": 14350 }, { "epoch": 0.6718239803382293, "grad_norm": 1.5625, "learning_rate": 0.0001669596085020415, "loss": 2.7846, "step": 14351 }, { "epoch": 0.6718707940780619, "grad_norm": 2.015625, "learning_rate": 0.00016695527285442744, "loss": 2.8668, "step": 14352 }, { "epoch": 0.6719176078178946, "grad_norm": 1.5546875, "learning_rate": 0.00016695093697866712, "loss": 2.7278, "step": 14353 }, { "epoch": 0.6719644215577272, "grad_norm": 1.34375, "learning_rate": 0.00016694660087477535, "loss": 2.8017, "step": 14354 }, { "epoch": 0.6720112352975598, "grad_norm": 1.3828125, "learning_rate": 0.00016694226454276693, "loss": 2.3425, "step": 14355 }, { "epoch": 0.6720580490373925, "grad_norm": 1.4140625, "learning_rate": 0.00016693792798265662, "loss": 3.0348, "step": 14356 }, { "epoch": 0.6721048627772251, "grad_norm": 1.8359375, "learning_rate": 0.00016693359119445916, "loss": 2.7212, "step": 14357 }, { "epoch": 0.6721516765170578, "grad_norm": 1.25, "learning_rate": 0.00016692925417818936, "loss": 2.7227, "step": 14358 }, { "epoch": 0.6721984902568904, "grad_norm": 1.25, "learning_rate": 0.000166924916933862, "loss": 2.5538, "step": 14359 }, { "epoch": 0.672245303996723, "grad_norm": 1.578125, "learning_rate": 0.00016692057946149187, "loss": 2.9171, "step": 14360 }, { "epoch": 0.6722921177365557, "grad_norm": 1.5234375, "learning_rate": 0.0001669162417610937, "loss": 3.015, "step": 14361 }, { "epoch": 0.6723389314763883, "grad_norm": 1.328125, "learning_rate": 0.00016691190383268234, "loss": 2.9924, "step": 14362 }, { "epoch": 0.672385745216221, "grad_norm": 1.21875, "learning_rate": 0.00016690756567627253, "loss": 2.8409, "step": 14363 }, { "epoch": 0.6724325589560536, "grad_norm": 2.0, "learning_rate": 0.00016690322729187904, "loss": 2.8427, "step": 14364 }, { "epoch": 0.6724793726958862, "grad_norm": 1.3671875, "learning_rate": 0.00016689888867951668, "loss": 2.7659, "step": 14365 }, { "epoch": 0.6725261864357189, "grad_norm": 1.5703125, "learning_rate": 0.00016689454983920024, "loss": 2.4339, "step": 14366 }, { "epoch": 0.6725730001755515, "grad_norm": 1.2265625, "learning_rate": 0.00016689021077094448, "loss": 2.4805, "step": 14367 }, { "epoch": 0.6726198139153842, "grad_norm": 1.5859375, "learning_rate": 0.00016688587147476415, "loss": 2.3657, "step": 14368 }, { "epoch": 0.6726666276552168, "grad_norm": 2.109375, "learning_rate": 0.00016688153195067412, "loss": 2.8494, "step": 14369 }, { "epoch": 0.6727134413950494, "grad_norm": 1.1796875, "learning_rate": 0.00016687719219868913, "loss": 2.0989, "step": 14370 }, { "epoch": 0.6727602551348821, "grad_norm": 1.34375, "learning_rate": 0.00016687285221882398, "loss": 2.9554, "step": 14371 }, { "epoch": 0.6728070688747148, "grad_norm": 1.515625, "learning_rate": 0.00016686851201109346, "loss": 3.0286, "step": 14372 }, { "epoch": 0.6728538826145474, "grad_norm": 1.15625, "learning_rate": 0.00016686417157551234, "loss": 1.9997, "step": 14373 }, { "epoch": 0.67290069635438, "grad_norm": 1.359375, "learning_rate": 0.00016685983091209542, "loss": 2.332, "step": 14374 }, { "epoch": 0.6729475100942126, "grad_norm": 1.3984375, "learning_rate": 0.00016685549002085746, "loss": 2.9535, "step": 14375 }, { "epoch": 0.6729943238340453, "grad_norm": 1.796875, "learning_rate": 0.00016685114890181328, "loss": 2.1536, "step": 14376 }, { "epoch": 0.673041137573878, "grad_norm": 1.453125, "learning_rate": 0.0001668468075549777, "loss": 3.0875, "step": 14377 }, { "epoch": 0.6730879513137106, "grad_norm": 1.4765625, "learning_rate": 0.0001668424659803655, "loss": 2.8862, "step": 14378 }, { "epoch": 0.6731347650535432, "grad_norm": 2.09375, "learning_rate": 0.00016683812417799145, "loss": 2.4948, "step": 14379 }, { "epoch": 0.6731815787933758, "grad_norm": 1.3046875, "learning_rate": 0.00016683378214787036, "loss": 2.879, "step": 14380 }, { "epoch": 0.6732283925332085, "grad_norm": 1.5234375, "learning_rate": 0.00016682943989001703, "loss": 2.8579, "step": 14381 }, { "epoch": 0.6732752062730412, "grad_norm": 1.3515625, "learning_rate": 0.00016682509740444622, "loss": 2.7005, "step": 14382 }, { "epoch": 0.6733220200128738, "grad_norm": 2.4375, "learning_rate": 0.0001668207546911728, "loss": 2.7234, "step": 14383 }, { "epoch": 0.6733688337527064, "grad_norm": 1.5078125, "learning_rate": 0.00016681641175021145, "loss": 2.3536, "step": 14384 }, { "epoch": 0.673415647492539, "grad_norm": 1.8125, "learning_rate": 0.00016681206858157706, "loss": 2.5115, "step": 14385 }, { "epoch": 0.6734624612323717, "grad_norm": 1.3515625, "learning_rate": 0.00016680772518528443, "loss": 2.5585, "step": 14386 }, { "epoch": 0.6735092749722044, "grad_norm": 1.3828125, "learning_rate": 0.00016680338156134834, "loss": 2.905, "step": 14387 }, { "epoch": 0.673556088712037, "grad_norm": 1.6015625, "learning_rate": 0.0001667990377097836, "loss": 2.3723, "step": 14388 }, { "epoch": 0.6736029024518696, "grad_norm": 1.2578125, "learning_rate": 0.00016679469363060497, "loss": 2.4741, "step": 14389 }, { "epoch": 0.6736497161917022, "grad_norm": 1.34375, "learning_rate": 0.0001667903493238273, "loss": 2.698, "step": 14390 }, { "epoch": 0.673696529931535, "grad_norm": 1.609375, "learning_rate": 0.00016678600478946539, "loss": 2.5804, "step": 14391 }, { "epoch": 0.6737433436713676, "grad_norm": 1.265625, "learning_rate": 0.000166781660027534, "loss": 2.422, "step": 14392 }, { "epoch": 0.6737901574112002, "grad_norm": 1.3515625, "learning_rate": 0.00016677731503804796, "loss": 2.5544, "step": 14393 }, { "epoch": 0.6738369711510328, "grad_norm": 1.6484375, "learning_rate": 0.00016677296982102208, "loss": 2.8082, "step": 14394 }, { "epoch": 0.6738837848908654, "grad_norm": 1.171875, "learning_rate": 0.00016676862437647118, "loss": 2.7058, "step": 14395 }, { "epoch": 0.6739305986306982, "grad_norm": 1.2578125, "learning_rate": 0.00016676427870441005, "loss": 2.3969, "step": 14396 }, { "epoch": 0.6739774123705308, "grad_norm": 1.1875, "learning_rate": 0.00016675993280485348, "loss": 2.5893, "step": 14397 }, { "epoch": 0.6740242261103634, "grad_norm": 1.5390625, "learning_rate": 0.0001667555866778163, "loss": 2.4959, "step": 14398 }, { "epoch": 0.674071039850196, "grad_norm": 1.1640625, "learning_rate": 0.00016675124032331336, "loss": 2.6134, "step": 14399 }, { "epoch": 0.6741178535900286, "grad_norm": 2.28125, "learning_rate": 0.00016674689374135938, "loss": 2.8107, "step": 14400 }, { "epoch": 0.6741646673298614, "grad_norm": 1.65625, "learning_rate": 0.00016674254693196925, "loss": 2.9338, "step": 14401 }, { "epoch": 0.674211481069694, "grad_norm": 1.9453125, "learning_rate": 0.00016673819989515773, "loss": 2.2693, "step": 14402 }, { "epoch": 0.6742582948095266, "grad_norm": 1.4140625, "learning_rate": 0.00016673385263093967, "loss": 3.0269, "step": 14403 }, { "epoch": 0.6743051085493592, "grad_norm": 1.484375, "learning_rate": 0.00016672950513932984, "loss": 2.7876, "step": 14404 }, { "epoch": 0.6743519222891918, "grad_norm": 1.4140625, "learning_rate": 0.0001667251574203431, "loss": 2.2006, "step": 14405 }, { "epoch": 0.6743987360290246, "grad_norm": 1.828125, "learning_rate": 0.0001667208094739942, "loss": 2.6109, "step": 14406 }, { "epoch": 0.6744455497688572, "grad_norm": 1.65625, "learning_rate": 0.00016671646130029806, "loss": 2.6887, "step": 14407 }, { "epoch": 0.6744923635086898, "grad_norm": 2.625, "learning_rate": 0.0001667121128992694, "loss": 2.783, "step": 14408 }, { "epoch": 0.6745391772485224, "grad_norm": 1.4140625, "learning_rate": 0.00016670776427092307, "loss": 2.6839, "step": 14409 }, { "epoch": 0.674585990988355, "grad_norm": 1.7265625, "learning_rate": 0.0001667034154152739, "loss": 2.5719, "step": 14410 }, { "epoch": 0.6746328047281878, "grad_norm": 1.421875, "learning_rate": 0.0001666990663323367, "loss": 2.4283, "step": 14411 }, { "epoch": 0.6746796184680204, "grad_norm": 1.9453125, "learning_rate": 0.0001666947170221263, "loss": 2.2985, "step": 14412 }, { "epoch": 0.674726432207853, "grad_norm": 1.3828125, "learning_rate": 0.00016669036748465747, "loss": 2.4422, "step": 14413 }, { "epoch": 0.6747732459476856, "grad_norm": 1.265625, "learning_rate": 0.0001666860177199451, "loss": 2.5401, "step": 14414 }, { "epoch": 0.6748200596875182, "grad_norm": 1.4609375, "learning_rate": 0.00016668166772800395, "loss": 2.3792, "step": 14415 }, { "epoch": 0.674866873427351, "grad_norm": 1.4921875, "learning_rate": 0.00016667731750884886, "loss": 2.8817, "step": 14416 }, { "epoch": 0.6749136871671836, "grad_norm": 1.328125, "learning_rate": 0.0001666729670624947, "loss": 2.7698, "step": 14417 }, { "epoch": 0.6749605009070162, "grad_norm": 1.1484375, "learning_rate": 0.00016666861638895626, "loss": 4.1927, "step": 14418 }, { "epoch": 0.6750073146468488, "grad_norm": 1.4453125, "learning_rate": 0.00016666426548824837, "loss": 2.5342, "step": 14419 }, { "epoch": 0.6750541283866814, "grad_norm": 1.265625, "learning_rate": 0.00016665991436038582, "loss": 2.4172, "step": 14420 }, { "epoch": 0.6751009421265142, "grad_norm": 1.1796875, "learning_rate": 0.00016665556300538348, "loss": 2.8411, "step": 14421 }, { "epoch": 0.6751477558663468, "grad_norm": 1.4921875, "learning_rate": 0.00016665121142325618, "loss": 2.4326, "step": 14422 }, { "epoch": 0.6751945696061794, "grad_norm": 1.421875, "learning_rate": 0.00016664685961401867, "loss": 2.9286, "step": 14423 }, { "epoch": 0.675241383346012, "grad_norm": 1.5390625, "learning_rate": 0.0001666425075776859, "loss": 2.6387, "step": 14424 }, { "epoch": 0.6752881970858446, "grad_norm": 1.8828125, "learning_rate": 0.0001666381553142726, "loss": 3.0236, "step": 14425 }, { "epoch": 0.6753350108256774, "grad_norm": 1.8125, "learning_rate": 0.00016663380282379365, "loss": 3.2739, "step": 14426 }, { "epoch": 0.67538182456551, "grad_norm": 1.2421875, "learning_rate": 0.00016662945010626387, "loss": 2.4547, "step": 14427 }, { "epoch": 0.6754286383053426, "grad_norm": 1.34375, "learning_rate": 0.0001666250971616981, "loss": 2.7742, "step": 14428 }, { "epoch": 0.6754754520451752, "grad_norm": 1.453125, "learning_rate": 0.00016662074399011116, "loss": 3.0683, "step": 14429 }, { "epoch": 0.6755222657850078, "grad_norm": 1.515625, "learning_rate": 0.00016661639059151786, "loss": 2.7765, "step": 14430 }, { "epoch": 0.6755690795248406, "grad_norm": 1.4375, "learning_rate": 0.0001666120369659331, "loss": 2.5425, "step": 14431 }, { "epoch": 0.6756158932646732, "grad_norm": 1.234375, "learning_rate": 0.00016660768311337165, "loss": 2.5053, "step": 14432 }, { "epoch": 0.6756627070045058, "grad_norm": 1.328125, "learning_rate": 0.00016660332903384838, "loss": 2.623, "step": 14433 }, { "epoch": 0.6757095207443384, "grad_norm": 1.2890625, "learning_rate": 0.00016659897472737808, "loss": 2.734, "step": 14434 }, { "epoch": 0.675756334484171, "grad_norm": 1.640625, "learning_rate": 0.00016659462019397565, "loss": 2.9746, "step": 14435 }, { "epoch": 0.6758031482240038, "grad_norm": 1.1953125, "learning_rate": 0.00016659026543365592, "loss": 2.732, "step": 14436 }, { "epoch": 0.6758499619638364, "grad_norm": 1.5078125, "learning_rate": 0.00016658591044643365, "loss": 2.6642, "step": 14437 }, { "epoch": 0.675896775703669, "grad_norm": 1.3125, "learning_rate": 0.0001665815552323238, "loss": 2.6669, "step": 14438 }, { "epoch": 0.6759435894435016, "grad_norm": 1.5390625, "learning_rate": 0.00016657719979134113, "loss": 2.6453, "step": 14439 }, { "epoch": 0.6759904031833343, "grad_norm": 1.8515625, "learning_rate": 0.0001665728441235005, "loss": 2.6639, "step": 14440 }, { "epoch": 0.676037216923167, "grad_norm": 1.5078125, "learning_rate": 0.00016656848822881677, "loss": 2.5352, "step": 14441 }, { "epoch": 0.6760840306629996, "grad_norm": 1.453125, "learning_rate": 0.00016656413210730474, "loss": 2.8339, "step": 14442 }, { "epoch": 0.6761308444028322, "grad_norm": 1.390625, "learning_rate": 0.0001665597757589793, "loss": 2.4, "step": 14443 }, { "epoch": 0.6761776581426648, "grad_norm": 1.3828125, "learning_rate": 0.00016655541918385525, "loss": 2.476, "step": 14444 }, { "epoch": 0.6762244718824975, "grad_norm": 1.1328125, "learning_rate": 0.00016655106238194745, "loss": 2.5195, "step": 14445 }, { "epoch": 0.6762712856223302, "grad_norm": 1.4453125, "learning_rate": 0.00016654670535327077, "loss": 2.7758, "step": 14446 }, { "epoch": 0.6763180993621628, "grad_norm": 1.8984375, "learning_rate": 0.00016654234809784001, "loss": 2.5863, "step": 14447 }, { "epoch": 0.6763649131019954, "grad_norm": 1.6484375, "learning_rate": 0.0001665379906156701, "loss": 2.6692, "step": 14448 }, { "epoch": 0.676411726841828, "grad_norm": 1.1171875, "learning_rate": 0.0001665336329067758, "loss": 2.9834, "step": 14449 }, { "epoch": 0.6764585405816608, "grad_norm": 1.3671875, "learning_rate": 0.000166529274971172, "loss": 2.4885, "step": 14450 }, { "epoch": 0.6765053543214934, "grad_norm": 1.2734375, "learning_rate": 0.0001665249168088735, "loss": 2.6728, "step": 14451 }, { "epoch": 0.676552168061326, "grad_norm": 1.203125, "learning_rate": 0.00016652055841989523, "loss": 2.3422, "step": 14452 }, { "epoch": 0.6765989818011586, "grad_norm": 1.9375, "learning_rate": 0.000166516199804252, "loss": 2.4869, "step": 14453 }, { "epoch": 0.6766457955409912, "grad_norm": 1.359375, "learning_rate": 0.00016651184096195865, "loss": 2.8856, "step": 14454 }, { "epoch": 0.676692609280824, "grad_norm": 1.984375, "learning_rate": 0.00016650748189303007, "loss": 2.5003, "step": 14455 }, { "epoch": 0.6767394230206566, "grad_norm": 1.2109375, "learning_rate": 0.00016650312259748108, "loss": 2.9114, "step": 14456 }, { "epoch": 0.6767862367604892, "grad_norm": 1.4609375, "learning_rate": 0.00016649876307532654, "loss": 2.5076, "step": 14457 }, { "epoch": 0.6768330505003218, "grad_norm": 1.4921875, "learning_rate": 0.0001664944033265813, "loss": 2.6646, "step": 14458 }, { "epoch": 0.6768798642401544, "grad_norm": 1.2421875, "learning_rate": 0.00016649004335126023, "loss": 2.4454, "step": 14459 }, { "epoch": 0.6769266779799872, "grad_norm": 1.4375, "learning_rate": 0.00016648568314937817, "loss": 2.5712, "step": 14460 }, { "epoch": 0.6769734917198198, "grad_norm": 1.109375, "learning_rate": 0.00016648132272095004, "loss": 2.7212, "step": 14461 }, { "epoch": 0.6770203054596524, "grad_norm": 1.5078125, "learning_rate": 0.0001664769620659906, "loss": 2.7867, "step": 14462 }, { "epoch": 0.677067119199485, "grad_norm": 1.4765625, "learning_rate": 0.00016647260118451474, "loss": 2.7761, "step": 14463 }, { "epoch": 0.6771139329393177, "grad_norm": 1.71875, "learning_rate": 0.00016646824007653736, "loss": 2.7928, "step": 14464 }, { "epoch": 0.6771607466791504, "grad_norm": 1.2421875, "learning_rate": 0.00016646387874207327, "loss": 2.5092, "step": 14465 }, { "epoch": 0.677207560418983, "grad_norm": 1.2265625, "learning_rate": 0.0001664595171811374, "loss": 2.3657, "step": 14466 }, { "epoch": 0.6772543741588156, "grad_norm": 1.515625, "learning_rate": 0.00016645515539374453, "loss": 2.7104, "step": 14467 }, { "epoch": 0.6773011878986482, "grad_norm": 2.28125, "learning_rate": 0.00016645079337990957, "loss": 2.8492, "step": 14468 }, { "epoch": 0.6773480016384809, "grad_norm": 1.6875, "learning_rate": 0.00016644643113964737, "loss": 2.7488, "step": 14469 }, { "epoch": 0.6773948153783136, "grad_norm": 1.3671875, "learning_rate": 0.00016644206867297282, "loss": 2.6826, "step": 14470 }, { "epoch": 0.6774416291181462, "grad_norm": 1.484375, "learning_rate": 0.00016643770597990073, "loss": 2.5545, "step": 14471 }, { "epoch": 0.6774884428579788, "grad_norm": 1.4296875, "learning_rate": 0.00016643334306044603, "loss": 2.5691, "step": 14472 }, { "epoch": 0.6775352565978114, "grad_norm": 1.1484375, "learning_rate": 0.00016642897991462352, "loss": 2.5044, "step": 14473 }, { "epoch": 0.6775820703376441, "grad_norm": 1.171875, "learning_rate": 0.0001664246165424481, "loss": 1.9784, "step": 14474 }, { "epoch": 0.6776288840774768, "grad_norm": 1.28125, "learning_rate": 0.00016642025294393467, "loss": 2.9552, "step": 14475 }, { "epoch": 0.6776756978173094, "grad_norm": 1.4609375, "learning_rate": 0.00016641588911909803, "loss": 2.7306, "step": 14476 }, { "epoch": 0.677722511557142, "grad_norm": 1.1484375, "learning_rate": 0.0001664115250679531, "loss": 2.3985, "step": 14477 }, { "epoch": 0.6777693252969746, "grad_norm": 1.21875, "learning_rate": 0.00016640716079051475, "loss": 2.6722, "step": 14478 }, { "epoch": 0.6778161390368073, "grad_norm": 2.125, "learning_rate": 0.00016640279628679782, "loss": 2.9245, "step": 14479 }, { "epoch": 0.67786295277664, "grad_norm": 1.46875, "learning_rate": 0.00016639843155681722, "loss": 2.5055, "step": 14480 }, { "epoch": 0.6779097665164726, "grad_norm": 1.4609375, "learning_rate": 0.00016639406660058778, "loss": 2.451, "step": 14481 }, { "epoch": 0.6779565802563052, "grad_norm": 1.3359375, "learning_rate": 0.00016638970141812442, "loss": 2.648, "step": 14482 }, { "epoch": 0.6780033939961378, "grad_norm": 1.6640625, "learning_rate": 0.00016638533600944196, "loss": 2.3984, "step": 14483 }, { "epoch": 0.6780502077359705, "grad_norm": 1.640625, "learning_rate": 0.00016638097037455536, "loss": 2.9095, "step": 14484 }, { "epoch": 0.6780970214758032, "grad_norm": 1.21875, "learning_rate": 0.00016637660451347938, "loss": 2.046, "step": 14485 }, { "epoch": 0.6781438352156358, "grad_norm": 1.5859375, "learning_rate": 0.000166372238426229, "loss": 2.5373, "step": 14486 }, { "epoch": 0.6781906489554684, "grad_norm": 1.453125, "learning_rate": 0.000166367872112819, "loss": 2.6651, "step": 14487 }, { "epoch": 0.678237462695301, "grad_norm": 1.625, "learning_rate": 0.00016636350557326437, "loss": 2.612, "step": 14488 }, { "epoch": 0.6782842764351337, "grad_norm": 1.421875, "learning_rate": 0.0001663591388075799, "loss": 2.6559, "step": 14489 }, { "epoch": 0.6783310901749664, "grad_norm": 1.3359375, "learning_rate": 0.00016635477181578052, "loss": 3.0371, "step": 14490 }, { "epoch": 0.678377903914799, "grad_norm": 1.2578125, "learning_rate": 0.00016635040459788108, "loss": 2.8495, "step": 14491 }, { "epoch": 0.6784247176546316, "grad_norm": 1.375, "learning_rate": 0.00016634603715389646, "loss": 2.5999, "step": 14492 }, { "epoch": 0.6784715313944643, "grad_norm": 2.015625, "learning_rate": 0.00016634166948384155, "loss": 2.7664, "step": 14493 }, { "epoch": 0.6785183451342969, "grad_norm": 1.640625, "learning_rate": 0.00016633730158773126, "loss": 2.5736, "step": 14494 }, { "epoch": 0.6785651588741296, "grad_norm": 1.390625, "learning_rate": 0.00016633293346558046, "loss": 2.575, "step": 14495 }, { "epoch": 0.6786119726139622, "grad_norm": 1.796875, "learning_rate": 0.00016632856511740397, "loss": 2.9965, "step": 14496 }, { "epoch": 0.6786587863537948, "grad_norm": 1.71875, "learning_rate": 0.0001663241965432168, "loss": 2.4397, "step": 14497 }, { "epoch": 0.6787056000936275, "grad_norm": 1.8125, "learning_rate": 0.0001663198277430337, "loss": 2.4723, "step": 14498 }, { "epoch": 0.6787524138334601, "grad_norm": 1.6484375, "learning_rate": 0.00016631545871686963, "loss": 2.5402, "step": 14499 }, { "epoch": 0.6787992275732928, "grad_norm": 1.4296875, "learning_rate": 0.0001663110894647395, "loss": 2.3176, "step": 14500 }, { "epoch": 0.6788460413131254, "grad_norm": 1.75, "learning_rate": 0.00016630671998665815, "loss": 2.8243, "step": 14501 }, { "epoch": 0.678892855052958, "grad_norm": 1.6171875, "learning_rate": 0.0001663023502826405, "loss": 2.5345, "step": 14502 }, { "epoch": 0.6789396687927907, "grad_norm": 1.6015625, "learning_rate": 0.0001662979803527014, "loss": 2.7255, "step": 14503 }, { "epoch": 0.6789864825326233, "grad_norm": 1.4375, "learning_rate": 0.00016629361019685575, "loss": 2.4928, "step": 14504 }, { "epoch": 0.679033296272456, "grad_norm": 1.3359375, "learning_rate": 0.00016628923981511847, "loss": 2.4867, "step": 14505 }, { "epoch": 0.6790801100122886, "grad_norm": 1.4453125, "learning_rate": 0.00016628486920750444, "loss": 2.6738, "step": 14506 }, { "epoch": 0.6791269237521212, "grad_norm": 1.484375, "learning_rate": 0.00016628049837402858, "loss": 2.3799, "step": 14507 }, { "epoch": 0.6791737374919539, "grad_norm": 1.390625, "learning_rate": 0.0001662761273147057, "loss": 2.2445, "step": 14508 }, { "epoch": 0.6792205512317865, "grad_norm": 1.7890625, "learning_rate": 0.0001662717560295508, "loss": 2.6672, "step": 14509 }, { "epoch": 0.6792673649716192, "grad_norm": 1.8671875, "learning_rate": 0.00016626738451857868, "loss": 2.442, "step": 14510 }, { "epoch": 0.6793141787114518, "grad_norm": 1.390625, "learning_rate": 0.0001662630127818043, "loss": 2.3547, "step": 14511 }, { "epoch": 0.6793609924512845, "grad_norm": 1.5546875, "learning_rate": 0.00016625864081924253, "loss": 2.6417, "step": 14512 }, { "epoch": 0.6794078061911171, "grad_norm": 3.5625, "learning_rate": 0.00016625426863090825, "loss": 3.468, "step": 14513 }, { "epoch": 0.6794546199309497, "grad_norm": 1.5078125, "learning_rate": 0.0001662498962168164, "loss": 2.5986, "step": 14514 }, { "epoch": 0.6795014336707824, "grad_norm": 1.9609375, "learning_rate": 0.00016624552357698188, "loss": 3.0354, "step": 14515 }, { "epoch": 0.679548247410615, "grad_norm": 1.4453125, "learning_rate": 0.00016624115071141952, "loss": 2.2884, "step": 14516 }, { "epoch": 0.6795950611504477, "grad_norm": 1.21875, "learning_rate": 0.0001662367776201443, "loss": 2.789, "step": 14517 }, { "epoch": 0.6796418748902803, "grad_norm": 1.1171875, "learning_rate": 0.00016623240430317106, "loss": 2.4917, "step": 14518 }, { "epoch": 0.6796886886301129, "grad_norm": 1.7578125, "learning_rate": 0.00016622803076051478, "loss": 2.8767, "step": 14519 }, { "epoch": 0.6797355023699456, "grad_norm": 1.5, "learning_rate": 0.00016622365699219026, "loss": 2.9715, "step": 14520 }, { "epoch": 0.6797823161097782, "grad_norm": 1.515625, "learning_rate": 0.00016621928299821249, "loss": 2.6765, "step": 14521 }, { "epoch": 0.6798291298496109, "grad_norm": 1.4921875, "learning_rate": 0.0001662149087785963, "loss": 2.9131, "step": 14522 }, { "epoch": 0.6798759435894435, "grad_norm": 1.484375, "learning_rate": 0.0001662105343333567, "loss": 2.7043, "step": 14523 }, { "epoch": 0.6799227573292761, "grad_norm": 1.5390625, "learning_rate": 0.0001662061596625085, "loss": 2.6854, "step": 14524 }, { "epoch": 0.6799695710691088, "grad_norm": 1.3671875, "learning_rate": 0.00016620178476606663, "loss": 2.9321, "step": 14525 }, { "epoch": 0.6800163848089414, "grad_norm": 1.109375, "learning_rate": 0.000166197409644046, "loss": 2.1758, "step": 14526 }, { "epoch": 0.6800631985487741, "grad_norm": 1.5703125, "learning_rate": 0.00016619303429646153, "loss": 3.1067, "step": 14527 }, { "epoch": 0.6801100122886067, "grad_norm": 1.8984375, "learning_rate": 0.00016618865872332814, "loss": 2.9127, "step": 14528 }, { "epoch": 0.6801568260284393, "grad_norm": 1.4921875, "learning_rate": 0.00016618428292466072, "loss": 2.7829, "step": 14529 }, { "epoch": 0.680203639768272, "grad_norm": 1.8671875, "learning_rate": 0.00016617990690047415, "loss": 2.9135, "step": 14530 }, { "epoch": 0.6802504535081046, "grad_norm": 2.328125, "learning_rate": 0.00016617553065078337, "loss": 2.4466, "step": 14531 }, { "epoch": 0.6802972672479373, "grad_norm": 1.65625, "learning_rate": 0.00016617115417560333, "loss": 2.3015, "step": 14532 }, { "epoch": 0.6803440809877699, "grad_norm": 1.9296875, "learning_rate": 0.0001661667774749489, "loss": 2.3901, "step": 14533 }, { "epoch": 0.6803908947276025, "grad_norm": 2.109375, "learning_rate": 0.000166162400548835, "loss": 2.4986, "step": 14534 }, { "epoch": 0.6804377084674352, "grad_norm": 1.3515625, "learning_rate": 0.00016615802339727657, "loss": 2.6756, "step": 14535 }, { "epoch": 0.6804845222072679, "grad_norm": 1.71875, "learning_rate": 0.00016615364602028842, "loss": 2.6915, "step": 14536 }, { "epoch": 0.6805313359471005, "grad_norm": 1.234375, "learning_rate": 0.00016614926841788559, "loss": 2.4497, "step": 14537 }, { "epoch": 0.6805781496869331, "grad_norm": 2.046875, "learning_rate": 0.00016614489059008295, "loss": 2.5708, "step": 14538 }, { "epoch": 0.6806249634267657, "grad_norm": 1.5703125, "learning_rate": 0.00016614051253689542, "loss": 2.7828, "step": 14539 }, { "epoch": 0.6806717771665984, "grad_norm": 1.6328125, "learning_rate": 0.00016613613425833795, "loss": 2.8869, "step": 14540 }, { "epoch": 0.680718590906431, "grad_norm": 1.5078125, "learning_rate": 0.00016613175575442538, "loss": 2.7289, "step": 14541 }, { "epoch": 0.6807654046462637, "grad_norm": 1.96875, "learning_rate": 0.00016612737702517268, "loss": 2.8684, "step": 14542 }, { "epoch": 0.6808122183860963, "grad_norm": 3.171875, "learning_rate": 0.0001661229980705948, "loss": 2.7695, "step": 14543 }, { "epoch": 0.6808590321259289, "grad_norm": 1.375, "learning_rate": 0.0001661186188907066, "loss": 2.7163, "step": 14544 }, { "epoch": 0.6809058458657616, "grad_norm": 1.453125, "learning_rate": 0.00016611423948552302, "loss": 2.7726, "step": 14545 }, { "epoch": 0.6809526596055943, "grad_norm": 1.234375, "learning_rate": 0.000166109859855059, "loss": 2.3166, "step": 14546 }, { "epoch": 0.6809994733454269, "grad_norm": 1.765625, "learning_rate": 0.00016610547999932945, "loss": 2.7949, "step": 14547 }, { "epoch": 0.6810462870852595, "grad_norm": 1.3203125, "learning_rate": 0.0001661010999183493, "loss": 2.7453, "step": 14548 }, { "epoch": 0.6810931008250921, "grad_norm": 1.8828125, "learning_rate": 0.00016609671961213346, "loss": 2.7474, "step": 14549 }, { "epoch": 0.6811399145649248, "grad_norm": 1.171875, "learning_rate": 0.0001660923390806969, "loss": 2.6336, "step": 14550 }, { "epoch": 0.6811867283047575, "grad_norm": 1.4765625, "learning_rate": 0.00016608795832405448, "loss": 2.3027, "step": 14551 }, { "epoch": 0.6812335420445901, "grad_norm": 1.5078125, "learning_rate": 0.00016608357734222118, "loss": 2.5423, "step": 14552 }, { "epoch": 0.6812803557844227, "grad_norm": 2.03125, "learning_rate": 0.00016607919613521193, "loss": 2.758, "step": 14553 }, { "epoch": 0.6813271695242553, "grad_norm": 1.75, "learning_rate": 0.0001660748147030416, "loss": 2.5783, "step": 14554 }, { "epoch": 0.681373983264088, "grad_norm": 1.515625, "learning_rate": 0.0001660704330457252, "loss": 2.4691, "step": 14555 }, { "epoch": 0.6814207970039207, "grad_norm": 1.5625, "learning_rate": 0.00016606605116327759, "loss": 2.6668, "step": 14556 }, { "epoch": 0.6814676107437533, "grad_norm": 1.3984375, "learning_rate": 0.00016606166905571373, "loss": 2.7965, "step": 14557 }, { "epoch": 0.6815144244835859, "grad_norm": 1.3515625, "learning_rate": 0.00016605728672304854, "loss": 2.4127, "step": 14558 }, { "epoch": 0.6815612382234185, "grad_norm": 1.4453125, "learning_rate": 0.000166052904165297, "loss": 2.7526, "step": 14559 }, { "epoch": 0.6816080519632512, "grad_norm": 1.421875, "learning_rate": 0.00016604852138247397, "loss": 3.0895, "step": 14560 }, { "epoch": 0.6816548657030839, "grad_norm": 2.0, "learning_rate": 0.00016604413837459445, "loss": 2.4254, "step": 14561 }, { "epoch": 0.6817016794429165, "grad_norm": 1.546875, "learning_rate": 0.00016603975514167333, "loss": 2.8971, "step": 14562 }, { "epoch": 0.6817484931827491, "grad_norm": 1.3359375, "learning_rate": 0.00016603537168372555, "loss": 2.391, "step": 14563 }, { "epoch": 0.6817953069225817, "grad_norm": 1.421875, "learning_rate": 0.0001660309880007661, "loss": 2.7502, "step": 14564 }, { "epoch": 0.6818421206624145, "grad_norm": 1.4765625, "learning_rate": 0.00016602660409280985, "loss": 2.7357, "step": 14565 }, { "epoch": 0.6818889344022471, "grad_norm": 1.3203125, "learning_rate": 0.00016602221995987173, "loss": 3.6677, "step": 14566 }, { "epoch": 0.6819357481420797, "grad_norm": 1.4921875, "learning_rate": 0.00016601783560196676, "loss": 2.7022, "step": 14567 }, { "epoch": 0.6819825618819123, "grad_norm": 1.5859375, "learning_rate": 0.00016601345101910982, "loss": 3.0263, "step": 14568 }, { "epoch": 0.682029375621745, "grad_norm": 1.390625, "learning_rate": 0.00016600906621131584, "loss": 2.7371, "step": 14569 }, { "epoch": 0.6820761893615777, "grad_norm": 1.375, "learning_rate": 0.0001660046811785998, "loss": 2.5411, "step": 14570 }, { "epoch": 0.6821230031014103, "grad_norm": 1.5859375, "learning_rate": 0.0001660002959209766, "loss": 2.8819, "step": 14571 }, { "epoch": 0.6821698168412429, "grad_norm": 1.953125, "learning_rate": 0.00016599591043846125, "loss": 2.5015, "step": 14572 }, { "epoch": 0.6822166305810755, "grad_norm": 1.21875, "learning_rate": 0.00016599152473106862, "loss": 2.4869, "step": 14573 }, { "epoch": 0.6822634443209082, "grad_norm": 1.453125, "learning_rate": 0.0001659871387988137, "loss": 2.704, "step": 14574 }, { "epoch": 0.6823102580607409, "grad_norm": 1.0546875, "learning_rate": 0.00016598275264171142, "loss": 2.4656, "step": 14575 }, { "epoch": 0.6823570718005735, "grad_norm": 1.203125, "learning_rate": 0.0001659783662597767, "loss": 2.8482, "step": 14576 }, { "epoch": 0.6824038855404061, "grad_norm": 1.2421875, "learning_rate": 0.00016597397965302456, "loss": 2.8679, "step": 14577 }, { "epoch": 0.6824506992802387, "grad_norm": 1.2890625, "learning_rate": 0.00016596959282146983, "loss": 2.519, "step": 14578 }, { "epoch": 0.6824975130200714, "grad_norm": 1.4453125, "learning_rate": 0.00016596520576512758, "loss": 2.4828, "step": 14579 }, { "epoch": 0.6825443267599041, "grad_norm": 1.4375, "learning_rate": 0.00016596081848401268, "loss": 2.3004, "step": 14580 }, { "epoch": 0.6825911404997367, "grad_norm": 1.875, "learning_rate": 0.00016595643097814012, "loss": 2.482, "step": 14581 }, { "epoch": 0.6826379542395693, "grad_norm": 0.80859375, "learning_rate": 0.0001659520432475248, "loss": 3.6675, "step": 14582 }, { "epoch": 0.6826847679794019, "grad_norm": 1.484375, "learning_rate": 0.00016594765529218174, "loss": 2.545, "step": 14583 }, { "epoch": 0.6827315817192346, "grad_norm": 1.6875, "learning_rate": 0.00016594326711212584, "loss": 2.4618, "step": 14584 }, { "epoch": 0.6827783954590673, "grad_norm": 1.96875, "learning_rate": 0.00016593887870737208, "loss": 3.0803, "step": 14585 }, { "epoch": 0.6828252091988999, "grad_norm": 1.1875, "learning_rate": 0.0001659344900779354, "loss": 2.6365, "step": 14586 }, { "epoch": 0.6828720229387325, "grad_norm": 1.453125, "learning_rate": 0.00016593010122383076, "loss": 2.5983, "step": 14587 }, { "epoch": 0.6829188366785651, "grad_norm": 1.8671875, "learning_rate": 0.00016592571214507306, "loss": 2.7516, "step": 14588 }, { "epoch": 0.6829656504183979, "grad_norm": 1.5, "learning_rate": 0.00016592132284167735, "loss": 2.5989, "step": 14589 }, { "epoch": 0.6830124641582305, "grad_norm": 1.3359375, "learning_rate": 0.00016591693331365854, "loss": 2.715, "step": 14590 }, { "epoch": 0.6830592778980631, "grad_norm": 1.5546875, "learning_rate": 0.0001659125435610316, "loss": 2.489, "step": 14591 }, { "epoch": 0.6831060916378957, "grad_norm": 1.296875, "learning_rate": 0.00016590815358381148, "loss": 2.4781, "step": 14592 }, { "epoch": 0.6831529053777283, "grad_norm": 1.4765625, "learning_rate": 0.00016590376338201313, "loss": 2.629, "step": 14593 }, { "epoch": 0.683199719117561, "grad_norm": 1.3515625, "learning_rate": 0.0001658993729556515, "loss": 2.2467, "step": 14594 }, { "epoch": 0.6832465328573937, "grad_norm": 1.28125, "learning_rate": 0.0001658949823047416, "loss": 2.8396, "step": 14595 }, { "epoch": 0.6832933465972263, "grad_norm": 1.4453125, "learning_rate": 0.0001658905914292983, "loss": 2.7051, "step": 14596 }, { "epoch": 0.6833401603370589, "grad_norm": 1.5234375, "learning_rate": 0.00016588620032933666, "loss": 2.4869, "step": 14597 }, { "epoch": 0.6833869740768915, "grad_norm": 1.5546875, "learning_rate": 0.00016588180900487158, "loss": 2.5802, "step": 14598 }, { "epoch": 0.6834337878167243, "grad_norm": 1.5390625, "learning_rate": 0.00016587741745591807, "loss": 3.0285, "step": 14599 }, { "epoch": 0.6834806015565569, "grad_norm": 1.3359375, "learning_rate": 0.00016587302568249105, "loss": 2.3218, "step": 14600 }, { "epoch": 0.6835274152963895, "grad_norm": 1.796875, "learning_rate": 0.00016586863368460553, "loss": 2.6745, "step": 14601 }, { "epoch": 0.6835742290362221, "grad_norm": 1.3125, "learning_rate": 0.0001658642414622764, "loss": 2.7554, "step": 14602 }, { "epoch": 0.6836210427760547, "grad_norm": 1.2734375, "learning_rate": 0.00016585984901551872, "loss": 2.7334, "step": 14603 }, { "epoch": 0.6836678565158875, "grad_norm": 2.015625, "learning_rate": 0.0001658554563443474, "loss": 2.8915, "step": 14604 }, { "epoch": 0.6837146702557201, "grad_norm": 1.3125, "learning_rate": 0.00016585106344877738, "loss": 2.5254, "step": 14605 }, { "epoch": 0.6837614839955527, "grad_norm": 1.453125, "learning_rate": 0.00016584667032882368, "loss": 2.7686, "step": 14606 }, { "epoch": 0.6838082977353853, "grad_norm": 1.2890625, "learning_rate": 0.0001658422769845013, "loss": 2.9567, "step": 14607 }, { "epoch": 0.6838551114752179, "grad_norm": 1.171875, "learning_rate": 0.00016583788341582514, "loss": 1.9756, "step": 14608 }, { "epoch": 0.6839019252150507, "grad_norm": 1.265625, "learning_rate": 0.0001658334896228102, "loss": 2.8338, "step": 14609 }, { "epoch": 0.6839487389548833, "grad_norm": 1.4765625, "learning_rate": 0.00016582909560547147, "loss": 2.59, "step": 14610 }, { "epoch": 0.6839955526947159, "grad_norm": 1.3359375, "learning_rate": 0.00016582470136382386, "loss": 2.3646, "step": 14611 }, { "epoch": 0.6840423664345485, "grad_norm": 1.7421875, "learning_rate": 0.00016582030689788241, "loss": 2.4988, "step": 14612 }, { "epoch": 0.6840891801743811, "grad_norm": 1.4140625, "learning_rate": 0.00016581591220766207, "loss": 2.7055, "step": 14613 }, { "epoch": 0.6841359939142139, "grad_norm": 1.2734375, "learning_rate": 0.00016581151729317778, "loss": 2.3946, "step": 14614 }, { "epoch": 0.6841828076540465, "grad_norm": 1.625, "learning_rate": 0.0001658071221544446, "loss": 2.7227, "step": 14615 }, { "epoch": 0.6842296213938791, "grad_norm": 1.4296875, "learning_rate": 0.00016580272679147742, "loss": 2.2784, "step": 14616 }, { "epoch": 0.6842764351337117, "grad_norm": 1.1328125, "learning_rate": 0.00016579833120429127, "loss": 2.7373, "step": 14617 }, { "epoch": 0.6843232488735443, "grad_norm": 1.2265625, "learning_rate": 0.0001657939353929011, "loss": 2.5741, "step": 14618 }, { "epoch": 0.6843700626133771, "grad_norm": 1.3046875, "learning_rate": 0.00016578953935732194, "loss": 2.8229, "step": 14619 }, { "epoch": 0.6844168763532097, "grad_norm": 1.9453125, "learning_rate": 0.00016578514309756868, "loss": 2.3669, "step": 14620 }, { "epoch": 0.6844636900930423, "grad_norm": 1.65625, "learning_rate": 0.00016578074661365632, "loss": 2.4906, "step": 14621 }, { "epoch": 0.6845105038328749, "grad_norm": 1.765625, "learning_rate": 0.00016577634990559993, "loss": 3.0348, "step": 14622 }, { "epoch": 0.6845573175727075, "grad_norm": 1.5078125, "learning_rate": 0.0001657719529734144, "loss": 2.5807, "step": 14623 }, { "epoch": 0.6846041313125403, "grad_norm": 1.5, "learning_rate": 0.00016576755581711473, "loss": 2.976, "step": 14624 }, { "epoch": 0.6846509450523729, "grad_norm": 1.2734375, "learning_rate": 0.00016576315843671595, "loss": 2.3586, "step": 14625 }, { "epoch": 0.6846977587922055, "grad_norm": 1.2734375, "learning_rate": 0.00016575876083223297, "loss": 2.6152, "step": 14626 }, { "epoch": 0.6847445725320381, "grad_norm": 1.3203125, "learning_rate": 0.00016575436300368085, "loss": 2.535, "step": 14627 }, { "epoch": 0.6847913862718708, "grad_norm": 1.3515625, "learning_rate": 0.00016574996495107453, "loss": 2.6808, "step": 14628 }, { "epoch": 0.6848382000117035, "grad_norm": 1.265625, "learning_rate": 0.000165745566674429, "loss": 2.9308, "step": 14629 }, { "epoch": 0.6848850137515361, "grad_norm": 1.3984375, "learning_rate": 0.00016574116817375924, "loss": 2.5709, "step": 14630 }, { "epoch": 0.6849318274913687, "grad_norm": 1.359375, "learning_rate": 0.00016573676944908026, "loss": 2.7609, "step": 14631 }, { "epoch": 0.6849786412312013, "grad_norm": 1.5625, "learning_rate": 0.00016573237050040705, "loss": 2.5561, "step": 14632 }, { "epoch": 0.685025454971034, "grad_norm": 1.453125, "learning_rate": 0.00016572797132775454, "loss": 2.4333, "step": 14633 }, { "epoch": 0.6850722687108667, "grad_norm": 1.671875, "learning_rate": 0.0001657235719311378, "loss": 2.6199, "step": 14634 }, { "epoch": 0.6851190824506993, "grad_norm": 1.46875, "learning_rate": 0.0001657191723105718, "loss": 2.8583, "step": 14635 }, { "epoch": 0.6851658961905319, "grad_norm": 1.609375, "learning_rate": 0.00016571477246607152, "loss": 2.8064, "step": 14636 }, { "epoch": 0.6852127099303645, "grad_norm": 1.921875, "learning_rate": 0.00016571037239765195, "loss": 2.4515, "step": 14637 }, { "epoch": 0.6852595236701972, "grad_norm": 1.609375, "learning_rate": 0.00016570597210532805, "loss": 2.3061, "step": 14638 }, { "epoch": 0.6853063374100299, "grad_norm": 1.5625, "learning_rate": 0.0001657015715891149, "loss": 2.5719, "step": 14639 }, { "epoch": 0.6853531511498625, "grad_norm": 1.6796875, "learning_rate": 0.0001656971708490274, "loss": 2.3464, "step": 14640 }, { "epoch": 0.6853999648896951, "grad_norm": 1.8203125, "learning_rate": 0.00016569276988508063, "loss": 2.7118, "step": 14641 }, { "epoch": 0.6854467786295277, "grad_norm": 1.78125, "learning_rate": 0.00016568836869728949, "loss": 2.461, "step": 14642 }, { "epoch": 0.6854935923693604, "grad_norm": 1.9765625, "learning_rate": 0.00016568396728566906, "loss": 1.8889, "step": 14643 }, { "epoch": 0.6855404061091931, "grad_norm": 1.65625, "learning_rate": 0.0001656795656502343, "loss": 3.3437, "step": 14644 }, { "epoch": 0.6855872198490257, "grad_norm": 1.1796875, "learning_rate": 0.00016567516379100024, "loss": 2.4522, "step": 14645 }, { "epoch": 0.6856340335888583, "grad_norm": 1.2890625, "learning_rate": 0.00016567076170798185, "loss": 2.5677, "step": 14646 }, { "epoch": 0.685680847328691, "grad_norm": 1.3203125, "learning_rate": 0.0001656663594011941, "loss": 2.3173, "step": 14647 }, { "epoch": 0.6857276610685236, "grad_norm": 1.96875, "learning_rate": 0.00016566195687065207, "loss": 2.5536, "step": 14648 }, { "epoch": 0.6857744748083563, "grad_norm": 1.1484375, "learning_rate": 0.00016565755411637067, "loss": 2.5385, "step": 14649 }, { "epoch": 0.6858212885481889, "grad_norm": 1.9453125, "learning_rate": 0.000165653151138365, "loss": 2.844, "step": 14650 }, { "epoch": 0.6858681022880215, "grad_norm": 1.65625, "learning_rate": 0.00016564874793664996, "loss": 2.2707, "step": 14651 }, { "epoch": 0.6859149160278541, "grad_norm": 1.515625, "learning_rate": 0.00016564434451124064, "loss": 2.6821, "step": 14652 }, { "epoch": 0.6859617297676868, "grad_norm": 1.609375, "learning_rate": 0.00016563994086215203, "loss": 3.1599, "step": 14653 }, { "epoch": 0.6860085435075195, "grad_norm": 1.21875, "learning_rate": 0.00016563553698939907, "loss": 2.9571, "step": 14654 }, { "epoch": 0.6860553572473521, "grad_norm": 1.1796875, "learning_rate": 0.00016563113289299683, "loss": 2.2204, "step": 14655 }, { "epoch": 0.6861021709871847, "grad_norm": 1.3671875, "learning_rate": 0.00016562672857296028, "loss": 2.4227, "step": 14656 }, { "epoch": 0.6861489847270174, "grad_norm": 1.484375, "learning_rate": 0.00016562232402930448, "loss": 2.4042, "step": 14657 }, { "epoch": 0.68619579846685, "grad_norm": 1.4921875, "learning_rate": 0.0001656179192620444, "loss": 1.9912, "step": 14658 }, { "epoch": 0.6862426122066827, "grad_norm": 2.21875, "learning_rate": 0.00016561351427119505, "loss": 2.8196, "step": 14659 }, { "epoch": 0.6862894259465153, "grad_norm": 1.8125, "learning_rate": 0.00016560910905677142, "loss": 2.7084, "step": 14660 }, { "epoch": 0.6863362396863479, "grad_norm": 1.765625, "learning_rate": 0.00016560470361878858, "loss": 2.7583, "step": 14661 }, { "epoch": 0.6863830534261806, "grad_norm": 1.3203125, "learning_rate": 0.00016560029795726147, "loss": 2.69, "step": 14662 }, { "epoch": 0.6864298671660132, "grad_norm": 1.1796875, "learning_rate": 0.00016559589207220516, "loss": 2.545, "step": 14663 }, { "epoch": 0.6864766809058459, "grad_norm": 1.2890625, "learning_rate": 0.00016559148596363463, "loss": 2.6809, "step": 14664 }, { "epoch": 0.6865234946456785, "grad_norm": 1.859375, "learning_rate": 0.00016558707963156488, "loss": 3.1919, "step": 14665 }, { "epoch": 0.6865703083855111, "grad_norm": 1.453125, "learning_rate": 0.00016558267307601097, "loss": 2.4969, "step": 14666 }, { "epoch": 0.6866171221253438, "grad_norm": 1.28125, "learning_rate": 0.0001655782662969879, "loss": 2.3747, "step": 14667 }, { "epoch": 0.6866639358651764, "grad_norm": 1.21875, "learning_rate": 0.00016557385929451068, "loss": 2.5643, "step": 14668 }, { "epoch": 0.6867107496050091, "grad_norm": 1.5546875, "learning_rate": 0.00016556945206859432, "loss": 2.9729, "step": 14669 }, { "epoch": 0.6867575633448417, "grad_norm": 1.1796875, "learning_rate": 0.00016556504461925383, "loss": 1.852, "step": 14670 }, { "epoch": 0.6868043770846743, "grad_norm": 1.265625, "learning_rate": 0.00016556063694650423, "loss": 2.1933, "step": 14671 }, { "epoch": 0.686851190824507, "grad_norm": 1.140625, "learning_rate": 0.0001655562290503606, "loss": 2.3751, "step": 14672 }, { "epoch": 0.6868980045643396, "grad_norm": 1.8671875, "learning_rate": 0.00016555182093083785, "loss": 2.5756, "step": 14673 }, { "epoch": 0.6869448183041723, "grad_norm": 1.5703125, "learning_rate": 0.00016554741258795107, "loss": 2.9334, "step": 14674 }, { "epoch": 0.6869916320440049, "grad_norm": 1.3984375, "learning_rate": 0.0001655430040217153, "loss": 2.6341, "step": 14675 }, { "epoch": 0.6870384457838375, "grad_norm": 1.578125, "learning_rate": 0.0001655385952321455, "loss": 2.7235, "step": 14676 }, { "epoch": 0.6870852595236702, "grad_norm": 1.7421875, "learning_rate": 0.00016553418621925676, "loss": 2.8107, "step": 14677 }, { "epoch": 0.6871320732635028, "grad_norm": 1.1640625, "learning_rate": 0.00016552977698306408, "loss": 2.6119, "step": 14678 }, { "epoch": 0.6871788870033355, "grad_norm": 1.109375, "learning_rate": 0.00016552536752358242, "loss": 2.042, "step": 14679 }, { "epoch": 0.6872257007431681, "grad_norm": 1.453125, "learning_rate": 0.0001655209578408269, "loss": 2.9639, "step": 14680 }, { "epoch": 0.6872725144830008, "grad_norm": 1.296875, "learning_rate": 0.00016551654793481245, "loss": 2.808, "step": 14681 }, { "epoch": 0.6873193282228334, "grad_norm": 1.2890625, "learning_rate": 0.0001655121378055542, "loss": 2.642, "step": 14682 }, { "epoch": 0.687366141962666, "grad_norm": 1.4609375, "learning_rate": 0.00016550772745306712, "loss": 2.4959, "step": 14683 }, { "epoch": 0.6874129557024987, "grad_norm": 1.5546875, "learning_rate": 0.00016550331687736623, "loss": 2.6081, "step": 14684 }, { "epoch": 0.6874597694423313, "grad_norm": 1.421875, "learning_rate": 0.00016549890607846658, "loss": 2.8324, "step": 14685 }, { "epoch": 0.687506583182164, "grad_norm": 1.4921875, "learning_rate": 0.0001654944950563832, "loss": 2.4424, "step": 14686 }, { "epoch": 0.6875533969219966, "grad_norm": 1.09375, "learning_rate": 0.00016549008381113112, "loss": 2.4124, "step": 14687 }, { "epoch": 0.6876002106618292, "grad_norm": 1.9375, "learning_rate": 0.00016548567234272535, "loss": 2.8909, "step": 14688 }, { "epoch": 0.6876470244016619, "grad_norm": 1.171875, "learning_rate": 0.00016548126065118096, "loss": 2.7849, "step": 14689 }, { "epoch": 0.6876938381414945, "grad_norm": 1.0859375, "learning_rate": 0.00016547684873651291, "loss": 2.328, "step": 14690 }, { "epoch": 0.6877406518813272, "grad_norm": 1.234375, "learning_rate": 0.0001654724365987363, "loss": 2.5159, "step": 14691 }, { "epoch": 0.6877874656211598, "grad_norm": 1.4609375, "learning_rate": 0.00016546802423786616, "loss": 2.7289, "step": 14692 }, { "epoch": 0.6878342793609925, "grad_norm": 1.28125, "learning_rate": 0.00016546361165391754, "loss": 2.7195, "step": 14693 }, { "epoch": 0.6878810931008251, "grad_norm": 1.3203125, "learning_rate": 0.0001654591988469054, "loss": 2.7347, "step": 14694 }, { "epoch": 0.6879279068406577, "grad_norm": 2.3125, "learning_rate": 0.00016545478581684483, "loss": 2.5692, "step": 14695 }, { "epoch": 0.6879747205804904, "grad_norm": 1.296875, "learning_rate": 0.00016545037256375088, "loss": 2.6603, "step": 14696 }, { "epoch": 0.688021534320323, "grad_norm": 1.671875, "learning_rate": 0.00016544595908763858, "loss": 2.5621, "step": 14697 }, { "epoch": 0.6880683480601557, "grad_norm": 1.7265625, "learning_rate": 0.00016544154538852292, "loss": 2.5286, "step": 14698 }, { "epoch": 0.6881151617999883, "grad_norm": 1.2890625, "learning_rate": 0.00016543713146641902, "loss": 2.6282, "step": 14699 }, { "epoch": 0.688161975539821, "grad_norm": 1.5078125, "learning_rate": 0.00016543271732134184, "loss": 2.1191, "step": 14700 }, { "epoch": 0.6882087892796536, "grad_norm": 1.4375, "learning_rate": 0.00016542830295330647, "loss": 2.7045, "step": 14701 }, { "epoch": 0.6882556030194862, "grad_norm": 1.5546875, "learning_rate": 0.00016542388836232795, "loss": 2.3469, "step": 14702 }, { "epoch": 0.6883024167593189, "grad_norm": 1.375, "learning_rate": 0.0001654194735484213, "loss": 2.4177, "step": 14703 }, { "epoch": 0.6883492304991515, "grad_norm": 1.5546875, "learning_rate": 0.00016541505851160157, "loss": 2.655, "step": 14704 }, { "epoch": 0.6883960442389842, "grad_norm": 1.9453125, "learning_rate": 0.00016541064325188384, "loss": 2.4469, "step": 14705 }, { "epoch": 0.6884428579788168, "grad_norm": 1.4140625, "learning_rate": 0.00016540622776928308, "loss": 2.8947, "step": 14706 }, { "epoch": 0.6884896717186494, "grad_norm": 2.125, "learning_rate": 0.0001654018120638144, "loss": 3.2477, "step": 14707 }, { "epoch": 0.6885364854584821, "grad_norm": 1.40625, "learning_rate": 0.00016539739613549283, "loss": 2.8375, "step": 14708 }, { "epoch": 0.6885832991983147, "grad_norm": 1.46875, "learning_rate": 0.00016539297998433342, "loss": 2.2961, "step": 14709 }, { "epoch": 0.6886301129381474, "grad_norm": 1.4296875, "learning_rate": 0.0001653885636103512, "loss": 2.7172, "step": 14710 }, { "epoch": 0.68867692667798, "grad_norm": 1.3984375, "learning_rate": 0.0001653841470135612, "loss": 2.5258, "step": 14711 }, { "epoch": 0.6887237404178126, "grad_norm": 1.4140625, "learning_rate": 0.00016537973019397853, "loss": 2.7012, "step": 14712 }, { "epoch": 0.6887705541576453, "grad_norm": 1.515625, "learning_rate": 0.0001653753131516182, "loss": 2.2893, "step": 14713 }, { "epoch": 0.6888173678974779, "grad_norm": 1.46875, "learning_rate": 0.00016537089588649528, "loss": 2.7675, "step": 14714 }, { "epoch": 0.6888641816373106, "grad_norm": 1.1171875, "learning_rate": 0.00016536647839862482, "loss": 2.4102, "step": 14715 }, { "epoch": 0.6889109953771432, "grad_norm": 1.3515625, "learning_rate": 0.00016536206068802183, "loss": 2.5814, "step": 14716 }, { "epoch": 0.6889578091169758, "grad_norm": 1.3359375, "learning_rate": 0.00016535764275470144, "loss": 2.1798, "step": 14717 }, { "epoch": 0.6890046228568085, "grad_norm": 2.03125, "learning_rate": 0.00016535322459867863, "loss": 2.3899, "step": 14718 }, { "epoch": 0.6890514365966411, "grad_norm": 1.3671875, "learning_rate": 0.0001653488062199685, "loss": 2.5151, "step": 14719 }, { "epoch": 0.6890982503364738, "grad_norm": 1.4375, "learning_rate": 0.00016534438761858607, "loss": 2.3331, "step": 14720 }, { "epoch": 0.6891450640763064, "grad_norm": 1.6484375, "learning_rate": 0.0001653399687945464, "loss": 2.3462, "step": 14721 }, { "epoch": 0.689191877816139, "grad_norm": 1.7265625, "learning_rate": 0.00016533554974786462, "loss": 2.6036, "step": 14722 }, { "epoch": 0.6892386915559717, "grad_norm": 1.4140625, "learning_rate": 0.00016533113047855567, "loss": 2.7706, "step": 14723 }, { "epoch": 0.6892855052958043, "grad_norm": 1.34375, "learning_rate": 0.00016532671098663473, "loss": 2.4881, "step": 14724 }, { "epoch": 0.689332319035637, "grad_norm": 1.4140625, "learning_rate": 0.00016532229127211675, "loss": 2.3609, "step": 14725 }, { "epoch": 0.6893791327754696, "grad_norm": 1.7265625, "learning_rate": 0.0001653178713350169, "loss": 2.809, "step": 14726 }, { "epoch": 0.6894259465153022, "grad_norm": 1.9765625, "learning_rate": 0.0001653134511753501, "loss": 2.7331, "step": 14727 }, { "epoch": 0.6894727602551349, "grad_norm": 1.65625, "learning_rate": 0.00016530903079313154, "loss": 2.7277, "step": 14728 }, { "epoch": 0.6895195739949675, "grad_norm": 1.3046875, "learning_rate": 0.00016530461018837621, "loss": 2.3704, "step": 14729 }, { "epoch": 0.6895663877348002, "grad_norm": 1.34375, "learning_rate": 0.0001653001893610992, "loss": 2.7354, "step": 14730 }, { "epoch": 0.6896132014746328, "grad_norm": 1.921875, "learning_rate": 0.00016529576831131557, "loss": 2.5778, "step": 14731 }, { "epoch": 0.6896600152144654, "grad_norm": 1.5234375, "learning_rate": 0.00016529134703904038, "loss": 2.551, "step": 14732 }, { "epoch": 0.6897068289542981, "grad_norm": 2.140625, "learning_rate": 0.00016528692554428873, "loss": 2.582, "step": 14733 }, { "epoch": 0.6897536426941308, "grad_norm": 1.3984375, "learning_rate": 0.00016528250382707565, "loss": 2.0954, "step": 14734 }, { "epoch": 0.6898004564339634, "grad_norm": 1.25, "learning_rate": 0.00016527808188741619, "loss": 2.6923, "step": 14735 }, { "epoch": 0.689847270173796, "grad_norm": 1.1328125, "learning_rate": 0.00016527365972532544, "loss": 2.5779, "step": 14736 }, { "epoch": 0.6898940839136286, "grad_norm": 1.703125, "learning_rate": 0.00016526923734081845, "loss": 2.0703, "step": 14737 }, { "epoch": 0.6899408976534613, "grad_norm": 1.3203125, "learning_rate": 0.00016526481473391034, "loss": 2.8196, "step": 14738 }, { "epoch": 0.689987711393294, "grad_norm": 1.3125, "learning_rate": 0.0001652603919046161, "loss": 2.4615, "step": 14739 }, { "epoch": 0.6900345251331266, "grad_norm": 1.4921875, "learning_rate": 0.00016525596885295089, "loss": 2.6528, "step": 14740 }, { "epoch": 0.6900813388729592, "grad_norm": 1.359375, "learning_rate": 0.0001652515455789297, "loss": 2.8294, "step": 14741 }, { "epoch": 0.6901281526127918, "grad_norm": 1.390625, "learning_rate": 0.00016524712208256773, "loss": 2.5778, "step": 14742 }, { "epoch": 0.6901749663526245, "grad_norm": 1.2734375, "learning_rate": 0.00016524269836387987, "loss": 3.1861, "step": 14743 }, { "epoch": 0.6902217800924572, "grad_norm": 1.3671875, "learning_rate": 0.00016523827442288132, "loss": 2.4192, "step": 14744 }, { "epoch": 0.6902685938322898, "grad_norm": 1.3671875, "learning_rate": 0.0001652338502595871, "loss": 2.6994, "step": 14745 }, { "epoch": 0.6903154075721224, "grad_norm": 1.53125, "learning_rate": 0.00016522942587401235, "loss": 2.7251, "step": 14746 }, { "epoch": 0.690362221311955, "grad_norm": 2.125, "learning_rate": 0.00016522500126617205, "loss": 2.6122, "step": 14747 }, { "epoch": 0.6904090350517877, "grad_norm": 1.515625, "learning_rate": 0.00016522057643608137, "loss": 2.5866, "step": 14748 }, { "epoch": 0.6904558487916204, "grad_norm": 1.4609375, "learning_rate": 0.00016521615138375534, "loss": 2.4416, "step": 14749 }, { "epoch": 0.690502662531453, "grad_norm": 1.65625, "learning_rate": 0.00016521172610920904, "loss": 2.7994, "step": 14750 }, { "epoch": 0.6905494762712856, "grad_norm": 1.1875, "learning_rate": 0.00016520730061245754, "loss": 2.1484, "step": 14751 }, { "epoch": 0.6905962900111182, "grad_norm": 1.4609375, "learning_rate": 0.00016520287489351593, "loss": 2.7101, "step": 14752 }, { "epoch": 0.690643103750951, "grad_norm": 1.8671875, "learning_rate": 0.00016519844895239932, "loss": 2.8244, "step": 14753 }, { "epoch": 0.6906899174907836, "grad_norm": 2.03125, "learning_rate": 0.00016519402278912274, "loss": 2.2263, "step": 14754 }, { "epoch": 0.6907367312306162, "grad_norm": 1.4609375, "learning_rate": 0.00016518959640370132, "loss": 2.8921, "step": 14755 }, { "epoch": 0.6907835449704488, "grad_norm": 1.75, "learning_rate": 0.00016518516979615008, "loss": 3.9025, "step": 14756 }, { "epoch": 0.6908303587102814, "grad_norm": 2.109375, "learning_rate": 0.00016518074296648414, "loss": 2.7125, "step": 14757 }, { "epoch": 0.6908771724501142, "grad_norm": 2.03125, "learning_rate": 0.0001651763159147186, "loss": 2.7641, "step": 14758 }, { "epoch": 0.6909239861899468, "grad_norm": 1.359375, "learning_rate": 0.00016517188864086855, "loss": 2.5075, "step": 14759 }, { "epoch": 0.6909707999297794, "grad_norm": 1.5, "learning_rate": 0.00016516746114494905, "loss": 2.4121, "step": 14760 }, { "epoch": 0.691017613669612, "grad_norm": 1.1796875, "learning_rate": 0.00016516303342697518, "loss": 2.7554, "step": 14761 }, { "epoch": 0.6910644274094446, "grad_norm": 1.0859375, "learning_rate": 0.00016515860548696205, "loss": 2.4929, "step": 14762 }, { "epoch": 0.6911112411492774, "grad_norm": 1.390625, "learning_rate": 0.00016515417732492475, "loss": 2.6624, "step": 14763 }, { "epoch": 0.69115805488911, "grad_norm": 1.2421875, "learning_rate": 0.00016514974894087832, "loss": 2.6638, "step": 14764 }, { "epoch": 0.6912048686289426, "grad_norm": 1.3671875, "learning_rate": 0.00016514532033483793, "loss": 2.4423, "step": 14765 }, { "epoch": 0.6912516823687752, "grad_norm": 1.7109375, "learning_rate": 0.0001651408915068186, "loss": 2.8545, "step": 14766 }, { "epoch": 0.6912984961086078, "grad_norm": 1.703125, "learning_rate": 0.00016513646245683544, "loss": 3.0263, "step": 14767 }, { "epoch": 0.6913453098484406, "grad_norm": 1.3046875, "learning_rate": 0.00016513203318490355, "loss": 2.6787, "step": 14768 }, { "epoch": 0.6913921235882732, "grad_norm": 1.5546875, "learning_rate": 0.00016512760369103804, "loss": 2.7454, "step": 14769 }, { "epoch": 0.6914389373281058, "grad_norm": 1.4375, "learning_rate": 0.00016512317397525397, "loss": 2.7779, "step": 14770 }, { "epoch": 0.6914857510679384, "grad_norm": 1.5390625, "learning_rate": 0.00016511874403756646, "loss": 2.4319, "step": 14771 }, { "epoch": 0.691532564807771, "grad_norm": 1.5, "learning_rate": 0.0001651143138779906, "loss": 2.7506, "step": 14772 }, { "epoch": 0.6915793785476038, "grad_norm": 1.6328125, "learning_rate": 0.00016510988349654148, "loss": 2.7408, "step": 14773 }, { "epoch": 0.6916261922874364, "grad_norm": 1.671875, "learning_rate": 0.00016510545289323417, "loss": 2.6475, "step": 14774 }, { "epoch": 0.691673006027269, "grad_norm": 1.3125, "learning_rate": 0.00016510102206808384, "loss": 2.5339, "step": 14775 }, { "epoch": 0.6917198197671016, "grad_norm": 2.75, "learning_rate": 0.00016509659102110548, "loss": 2.7261, "step": 14776 }, { "epoch": 0.6917666335069342, "grad_norm": 1.671875, "learning_rate": 0.00016509215975231428, "loss": 2.7554, "step": 14777 }, { "epoch": 0.691813447246767, "grad_norm": 1.75, "learning_rate": 0.00016508772826172533, "loss": 2.6263, "step": 14778 }, { "epoch": 0.6918602609865996, "grad_norm": 1.8671875, "learning_rate": 0.00016508329654935365, "loss": 2.357, "step": 14779 }, { "epoch": 0.6919070747264322, "grad_norm": 1.9296875, "learning_rate": 0.00016507886461521447, "loss": 2.7014, "step": 14780 }, { "epoch": 0.6919538884662648, "grad_norm": 1.34375, "learning_rate": 0.00016507443245932277, "loss": 2.7625, "step": 14781 }, { "epoch": 0.6920007022060974, "grad_norm": 1.2890625, "learning_rate": 0.00016507000008169373, "loss": 2.5454, "step": 14782 }, { "epoch": 0.6920475159459302, "grad_norm": 1.9375, "learning_rate": 0.00016506556748234241, "loss": 2.6275, "step": 14783 }, { "epoch": 0.6920943296857628, "grad_norm": 1.34375, "learning_rate": 0.00016506113466128395, "loss": 2.7871, "step": 14784 }, { "epoch": 0.6921411434255954, "grad_norm": 1.609375, "learning_rate": 0.00016505670161853339, "loss": 2.8211, "step": 14785 }, { "epoch": 0.692187957165428, "grad_norm": 2.28125, "learning_rate": 0.0001650522683541059, "loss": 2.6623, "step": 14786 }, { "epoch": 0.6922347709052606, "grad_norm": 1.484375, "learning_rate": 0.00016504783486801658, "loss": 2.365, "step": 14787 }, { "epoch": 0.6922815846450934, "grad_norm": 1.5546875, "learning_rate": 0.00016504340116028053, "loss": 3.1366, "step": 14788 }, { "epoch": 0.692328398384926, "grad_norm": 1.3515625, "learning_rate": 0.00016503896723091282, "loss": 2.45, "step": 14789 }, { "epoch": 0.6923752121247586, "grad_norm": 1.359375, "learning_rate": 0.0001650345330799286, "loss": 2.7758, "step": 14790 }, { "epoch": 0.6924220258645912, "grad_norm": 1.5, "learning_rate": 0.00016503009870734298, "loss": 2.7162, "step": 14791 }, { "epoch": 0.6924688396044238, "grad_norm": 1.265625, "learning_rate": 0.00016502566411317107, "loss": 3.7522, "step": 14792 }, { "epoch": 0.6925156533442566, "grad_norm": 1.3359375, "learning_rate": 0.00016502122929742795, "loss": 2.4951, "step": 14793 }, { "epoch": 0.6925624670840892, "grad_norm": 1.4609375, "learning_rate": 0.00016501679426012873, "loss": 2.3054, "step": 14794 }, { "epoch": 0.6926092808239218, "grad_norm": 1.6328125, "learning_rate": 0.00016501235900128855, "loss": 2.8718, "step": 14795 }, { "epoch": 0.6926560945637544, "grad_norm": 1.2890625, "learning_rate": 0.00016500792352092252, "loss": 2.8549, "step": 14796 }, { "epoch": 0.692702908303587, "grad_norm": 1.28125, "learning_rate": 0.00016500348781904576, "loss": 2.6999, "step": 14797 }, { "epoch": 0.6927497220434198, "grad_norm": 1.25, "learning_rate": 0.00016499905189567334, "loss": 2.523, "step": 14798 }, { "epoch": 0.6927965357832524, "grad_norm": 1.703125, "learning_rate": 0.00016499461575082046, "loss": 2.7662, "step": 14799 }, { "epoch": 0.692843349523085, "grad_norm": 1.3125, "learning_rate": 0.00016499017938450216, "loss": 2.7161, "step": 14800 }, { "epoch": 0.6928901632629176, "grad_norm": 1.65625, "learning_rate": 0.00016498574279673358, "loss": 2.6259, "step": 14801 }, { "epoch": 0.6929369770027503, "grad_norm": 1.2578125, "learning_rate": 0.00016498130598752983, "loss": 2.5586, "step": 14802 }, { "epoch": 0.692983790742583, "grad_norm": 1.2265625, "learning_rate": 0.00016497686895690605, "loss": 2.7569, "step": 14803 }, { "epoch": 0.6930306044824156, "grad_norm": 1.3203125, "learning_rate": 0.0001649724317048773, "loss": 2.6059, "step": 14804 }, { "epoch": 0.6930774182222482, "grad_norm": 1.453125, "learning_rate": 0.00016496799423145882, "loss": 2.1761, "step": 14805 }, { "epoch": 0.6931242319620808, "grad_norm": 1.4140625, "learning_rate": 0.0001649635565366656, "loss": 2.5077, "step": 14806 }, { "epoch": 0.6931710457019135, "grad_norm": 1.3828125, "learning_rate": 0.00016495911862051283, "loss": 2.6346, "step": 14807 }, { "epoch": 0.6932178594417462, "grad_norm": 1.296875, "learning_rate": 0.00016495468048301562, "loss": 2.6525, "step": 14808 }, { "epoch": 0.6932646731815788, "grad_norm": 1.3359375, "learning_rate": 0.00016495024212418908, "loss": 2.6278, "step": 14809 }, { "epoch": 0.6933114869214114, "grad_norm": 1.3125, "learning_rate": 0.00016494580354404836, "loss": 2.5599, "step": 14810 }, { "epoch": 0.693358300661244, "grad_norm": 1.375, "learning_rate": 0.00016494136474260859, "loss": 2.404, "step": 14811 }, { "epoch": 0.6934051144010767, "grad_norm": 1.3671875, "learning_rate": 0.00016493692571988483, "loss": 2.4708, "step": 14812 }, { "epoch": 0.6934519281409094, "grad_norm": 1.375, "learning_rate": 0.00016493248647589224, "loss": 2.545, "step": 14813 }, { "epoch": 0.693498741880742, "grad_norm": 1.46875, "learning_rate": 0.000164928047010646, "loss": 3.189, "step": 14814 }, { "epoch": 0.6935455556205746, "grad_norm": 1.7890625, "learning_rate": 0.00016492360732416116, "loss": 2.5575, "step": 14815 }, { "epoch": 0.6935923693604072, "grad_norm": 1.875, "learning_rate": 0.00016491916741645288, "loss": 2.6898, "step": 14816 }, { "epoch": 0.69363918310024, "grad_norm": 1.359375, "learning_rate": 0.00016491472728753632, "loss": 2.7645, "step": 14817 }, { "epoch": 0.6936859968400726, "grad_norm": 1.8515625, "learning_rate": 0.00016491028693742656, "loss": 2.6542, "step": 14818 }, { "epoch": 0.6937328105799052, "grad_norm": 1.625, "learning_rate": 0.00016490584636613873, "loss": 2.8389, "step": 14819 }, { "epoch": 0.6937796243197378, "grad_norm": 1.3515625, "learning_rate": 0.00016490140557368804, "loss": 2.7442, "step": 14820 }, { "epoch": 0.6938264380595704, "grad_norm": 2.09375, "learning_rate": 0.00016489696456008953, "loss": 2.2852, "step": 14821 }, { "epoch": 0.6938732517994032, "grad_norm": 1.125, "learning_rate": 0.00016489252332535834, "loss": 2.6129, "step": 14822 }, { "epoch": 0.6939200655392358, "grad_norm": 1.7265625, "learning_rate": 0.00016488808186950962, "loss": 2.5168, "step": 14823 }, { "epoch": 0.6939668792790684, "grad_norm": 1.6171875, "learning_rate": 0.00016488364019255855, "loss": 2.9729, "step": 14824 }, { "epoch": 0.694013693018901, "grad_norm": 1.3671875, "learning_rate": 0.00016487919829452023, "loss": 2.6945, "step": 14825 }, { "epoch": 0.6940605067587337, "grad_norm": 1.578125, "learning_rate": 0.00016487475617540977, "loss": 2.6865, "step": 14826 }, { "epoch": 0.6941073204985664, "grad_norm": 1.9921875, "learning_rate": 0.0001648703138352423, "loss": 2.1775, "step": 14827 }, { "epoch": 0.694154134238399, "grad_norm": 1.390625, "learning_rate": 0.00016486587127403305, "loss": 2.1369, "step": 14828 }, { "epoch": 0.6942009479782316, "grad_norm": 1.3125, "learning_rate": 0.0001648614284917971, "loss": 3.9611, "step": 14829 }, { "epoch": 0.6942477617180642, "grad_norm": 1.4375, "learning_rate": 0.0001648569854885495, "loss": 2.3737, "step": 14830 }, { "epoch": 0.6942945754578969, "grad_norm": 1.359375, "learning_rate": 0.00016485254226430556, "loss": 2.6345, "step": 14831 }, { "epoch": 0.6943413891977296, "grad_norm": 1.421875, "learning_rate": 0.00016484809881908027, "loss": 2.8691, "step": 14832 }, { "epoch": 0.6943882029375622, "grad_norm": 1.265625, "learning_rate": 0.00016484365515288886, "loss": 2.4128, "step": 14833 }, { "epoch": 0.6944350166773948, "grad_norm": 1.1796875, "learning_rate": 0.00016483921126574642, "loss": 2.5474, "step": 14834 }, { "epoch": 0.6944818304172274, "grad_norm": 1.3828125, "learning_rate": 0.00016483476715766813, "loss": 2.5264, "step": 14835 }, { "epoch": 0.6945286441570601, "grad_norm": 1.3984375, "learning_rate": 0.00016483032282866908, "loss": 2.755, "step": 14836 }, { "epoch": 0.6945754578968928, "grad_norm": 1.4140625, "learning_rate": 0.0001648258782787645, "loss": 2.6666, "step": 14837 }, { "epoch": 0.6946222716367254, "grad_norm": 1.3046875, "learning_rate": 0.00016482143350796946, "loss": 2.7282, "step": 14838 }, { "epoch": 0.694669085376558, "grad_norm": 1.5, "learning_rate": 0.00016481698851629917, "loss": 2.4825, "step": 14839 }, { "epoch": 0.6947158991163906, "grad_norm": 2.28125, "learning_rate": 0.00016481254330376872, "loss": 4.4023, "step": 14840 }, { "epoch": 0.6947627128562233, "grad_norm": 1.1875, "learning_rate": 0.00016480809787039328, "loss": 2.5123, "step": 14841 }, { "epoch": 0.694809526596056, "grad_norm": 3.0625, "learning_rate": 0.000164803652216188, "loss": 2.6745, "step": 14842 }, { "epoch": 0.6948563403358886, "grad_norm": 1.109375, "learning_rate": 0.00016479920634116795, "loss": 2.6904, "step": 14843 }, { "epoch": 0.6949031540757212, "grad_norm": 1.296875, "learning_rate": 0.00016479476024534843, "loss": 2.584, "step": 14844 }, { "epoch": 0.6949499678155538, "grad_norm": 1.5859375, "learning_rate": 0.00016479031392874446, "loss": 2.5284, "step": 14845 }, { "epoch": 0.6949967815553865, "grad_norm": 2.109375, "learning_rate": 0.00016478586739137127, "loss": 2.3703, "step": 14846 }, { "epoch": 0.6950435952952192, "grad_norm": 1.234375, "learning_rate": 0.00016478142063324397, "loss": 2.6211, "step": 14847 }, { "epoch": 0.6950904090350518, "grad_norm": 1.3671875, "learning_rate": 0.00016477697365437774, "loss": 2.5551, "step": 14848 }, { "epoch": 0.6951372227748844, "grad_norm": 1.2890625, "learning_rate": 0.00016477252645478774, "loss": 2.7609, "step": 14849 }, { "epoch": 0.695184036514717, "grad_norm": 1.0859375, "learning_rate": 0.00016476807903448903, "loss": 4.0077, "step": 14850 }, { "epoch": 0.6952308502545497, "grad_norm": 1.15625, "learning_rate": 0.00016476363139349687, "loss": 2.9233, "step": 14851 }, { "epoch": 0.6952776639943824, "grad_norm": 1.90625, "learning_rate": 0.00016475918353182637, "loss": 2.392, "step": 14852 }, { "epoch": 0.695324477734215, "grad_norm": 1.34375, "learning_rate": 0.0001647547354494927, "loss": 2.4355, "step": 14853 }, { "epoch": 0.6953712914740476, "grad_norm": 1.2578125, "learning_rate": 0.00016475028714651102, "loss": 2.4549, "step": 14854 }, { "epoch": 0.6954181052138803, "grad_norm": 1.09375, "learning_rate": 0.00016474583862289646, "loss": 2.5816, "step": 14855 }, { "epoch": 0.6954649189537129, "grad_norm": 2.390625, "learning_rate": 0.00016474138987866422, "loss": 2.7534, "step": 14856 }, { "epoch": 0.6955117326935456, "grad_norm": 1.125, "learning_rate": 0.00016473694091382945, "loss": 2.0348, "step": 14857 }, { "epoch": 0.6955585464333782, "grad_norm": 1.1171875, "learning_rate": 0.0001647324917284073, "loss": 2.4619, "step": 14858 }, { "epoch": 0.6956053601732108, "grad_norm": 1.3515625, "learning_rate": 0.0001647280423224129, "loss": 2.5024, "step": 14859 }, { "epoch": 0.6956521739130435, "grad_norm": 1.453125, "learning_rate": 0.0001647235926958614, "loss": 2.8004, "step": 14860 }, { "epoch": 0.6956989876528761, "grad_norm": 1.40625, "learning_rate": 0.00016471914284876807, "loss": 2.3366, "step": 14861 }, { "epoch": 0.6957458013927088, "grad_norm": 1.5859375, "learning_rate": 0.00016471469278114795, "loss": 2.6089, "step": 14862 }, { "epoch": 0.6957926151325414, "grad_norm": 1.7109375, "learning_rate": 0.0001647102424930163, "loss": 2.5929, "step": 14863 }, { "epoch": 0.695839428872374, "grad_norm": 1.21875, "learning_rate": 0.0001647057919843882, "loss": 2.6595, "step": 14864 }, { "epoch": 0.6958862426122067, "grad_norm": 1.3671875, "learning_rate": 0.00016470134125527887, "loss": 2.6733, "step": 14865 }, { "epoch": 0.6959330563520393, "grad_norm": 1.5, "learning_rate": 0.00016469689030570347, "loss": 2.7576, "step": 14866 }, { "epoch": 0.695979870091872, "grad_norm": 1.5859375, "learning_rate": 0.00016469243913567712, "loss": 2.4268, "step": 14867 }, { "epoch": 0.6960266838317046, "grad_norm": 1.734375, "learning_rate": 0.00016468798774521505, "loss": 2.7318, "step": 14868 }, { "epoch": 0.6960734975715372, "grad_norm": 1.046875, "learning_rate": 0.0001646835361343324, "loss": 2.5478, "step": 14869 }, { "epoch": 0.6961203113113699, "grad_norm": 1.390625, "learning_rate": 0.00016467908430304435, "loss": 2.743, "step": 14870 }, { "epoch": 0.6961671250512025, "grad_norm": 1.5234375, "learning_rate": 0.00016467463225136602, "loss": 2.4741, "step": 14871 }, { "epoch": 0.6962139387910352, "grad_norm": 1.4765625, "learning_rate": 0.00016467017997931266, "loss": 2.359, "step": 14872 }, { "epoch": 0.6962607525308678, "grad_norm": 1.28125, "learning_rate": 0.0001646657274868994, "loss": 2.5455, "step": 14873 }, { "epoch": 0.6963075662707005, "grad_norm": 1.171875, "learning_rate": 0.0001646612747741414, "loss": 2.7847, "step": 14874 }, { "epoch": 0.6963543800105331, "grad_norm": 1.2265625, "learning_rate": 0.00016465682184105382, "loss": 2.632, "step": 14875 }, { "epoch": 0.6964011937503657, "grad_norm": 2.6875, "learning_rate": 0.00016465236868765188, "loss": 2.7913, "step": 14876 }, { "epoch": 0.6964480074901984, "grad_norm": 1.3515625, "learning_rate": 0.00016464791531395072, "loss": 4.2622, "step": 14877 }, { "epoch": 0.696494821230031, "grad_norm": 1.2890625, "learning_rate": 0.00016464346171996555, "loss": 2.6517, "step": 14878 }, { "epoch": 0.6965416349698637, "grad_norm": 1.0859375, "learning_rate": 0.0001646390079057115, "loss": 2.8362, "step": 14879 }, { "epoch": 0.6965884487096963, "grad_norm": 1.078125, "learning_rate": 0.00016463455387120377, "loss": 2.6849, "step": 14880 }, { "epoch": 0.6966352624495289, "grad_norm": 1.390625, "learning_rate": 0.00016463009961645755, "loss": 2.4588, "step": 14881 }, { "epoch": 0.6966820761893616, "grad_norm": 2.359375, "learning_rate": 0.00016462564514148798, "loss": 3.1497, "step": 14882 }, { "epoch": 0.6967288899291942, "grad_norm": 1.265625, "learning_rate": 0.00016462119044631025, "loss": 2.8493, "step": 14883 }, { "epoch": 0.6967757036690269, "grad_norm": 1.203125, "learning_rate": 0.00016461673553093957, "loss": 2.2121, "step": 14884 }, { "epoch": 0.6968225174088595, "grad_norm": 1.3671875, "learning_rate": 0.00016461228039539108, "loss": 2.3937, "step": 14885 }, { "epoch": 0.6968693311486921, "grad_norm": 1.21875, "learning_rate": 0.00016460782503967998, "loss": 2.4708, "step": 14886 }, { "epoch": 0.6969161448885248, "grad_norm": 1.6484375, "learning_rate": 0.0001646033694638215, "loss": 2.9425, "step": 14887 }, { "epoch": 0.6969629586283574, "grad_norm": 1.265625, "learning_rate": 0.00016459891366783072, "loss": 2.3338, "step": 14888 }, { "epoch": 0.6970097723681901, "grad_norm": 1.21875, "learning_rate": 0.0001645944576517229, "loss": 2.4895, "step": 14889 }, { "epoch": 0.6970565861080227, "grad_norm": 1.3046875, "learning_rate": 0.00016459000141551318, "loss": 2.7397, "step": 14890 }, { "epoch": 0.6971033998478553, "grad_norm": 1.6640625, "learning_rate": 0.00016458554495921677, "loss": 2.6158, "step": 14891 }, { "epoch": 0.697150213587688, "grad_norm": 1.34375, "learning_rate": 0.00016458108828284887, "loss": 2.4859, "step": 14892 }, { "epoch": 0.6971970273275206, "grad_norm": 1.75, "learning_rate": 0.00016457663138642463, "loss": 2.7829, "step": 14893 }, { "epoch": 0.6972438410673533, "grad_norm": 1.5546875, "learning_rate": 0.00016457217426995923, "loss": 2.6671, "step": 14894 }, { "epoch": 0.6972906548071859, "grad_norm": 1.640625, "learning_rate": 0.0001645677169334679, "loss": 3.2513, "step": 14895 }, { "epoch": 0.6973374685470185, "grad_norm": 1.46875, "learning_rate": 0.00016456325937696582, "loss": 2.4922, "step": 14896 }, { "epoch": 0.6973842822868512, "grad_norm": 1.6953125, "learning_rate": 0.00016455880160046814, "loss": 3.1017, "step": 14897 }, { "epoch": 0.6974310960266838, "grad_norm": 1.453125, "learning_rate": 0.0001645543436039901, "loss": 2.9169, "step": 14898 }, { "epoch": 0.6974779097665165, "grad_norm": 1.0390625, "learning_rate": 0.00016454988538754686, "loss": 2.4313, "step": 14899 }, { "epoch": 0.6975247235063491, "grad_norm": 1.328125, "learning_rate": 0.00016454542695115357, "loss": 2.0365, "step": 14900 }, { "epoch": 0.6975715372461817, "grad_norm": 1.3125, "learning_rate": 0.00016454096829482553, "loss": 2.8577, "step": 14901 }, { "epoch": 0.6976183509860144, "grad_norm": 1.4140625, "learning_rate": 0.00016453650941857783, "loss": 2.9445, "step": 14902 }, { "epoch": 0.697665164725847, "grad_norm": 1.7734375, "learning_rate": 0.00016453205032242573, "loss": 3.2501, "step": 14903 }, { "epoch": 0.6977119784656797, "grad_norm": 1.609375, "learning_rate": 0.00016452759100638438, "loss": 2.2332, "step": 14904 }, { "epoch": 0.6977587922055123, "grad_norm": 1.1875, "learning_rate": 0.000164523131470469, "loss": 2.3589, "step": 14905 }, { "epoch": 0.6978056059453449, "grad_norm": 1.4375, "learning_rate": 0.0001645186717146948, "loss": 2.89, "step": 14906 }, { "epoch": 0.6978524196851776, "grad_norm": 0.9921875, "learning_rate": 0.00016451421173907695, "loss": 2.5746, "step": 14907 }, { "epoch": 0.6978992334250103, "grad_norm": 1.40625, "learning_rate": 0.00016450975154363065, "loss": 2.5911, "step": 14908 }, { "epoch": 0.6979460471648429, "grad_norm": 1.609375, "learning_rate": 0.00016450529112837107, "loss": 2.6756, "step": 14909 }, { "epoch": 0.6979928609046755, "grad_norm": 1.46875, "learning_rate": 0.00016450083049331346, "loss": 2.985, "step": 14910 }, { "epoch": 0.6980396746445081, "grad_norm": 1.453125, "learning_rate": 0.00016449636963847302, "loss": 2.5222, "step": 14911 }, { "epoch": 0.6980864883843408, "grad_norm": 1.8046875, "learning_rate": 0.00016449190856386488, "loss": 2.8206, "step": 14912 }, { "epoch": 0.6981333021241735, "grad_norm": 1.515625, "learning_rate": 0.00016448744726950434, "loss": 2.5749, "step": 14913 }, { "epoch": 0.6981801158640061, "grad_norm": 2.40625, "learning_rate": 0.0001644829857554065, "loss": 1.9644, "step": 14914 }, { "epoch": 0.6982269296038387, "grad_norm": 1.4921875, "learning_rate": 0.00016447852402158667, "loss": 2.6364, "step": 14915 }, { "epoch": 0.6982737433436713, "grad_norm": 1.671875, "learning_rate": 0.00016447406206805996, "loss": 2.4928, "step": 14916 }, { "epoch": 0.698320557083504, "grad_norm": 1.265625, "learning_rate": 0.0001644695998948416, "loss": 2.4893, "step": 14917 }, { "epoch": 0.6983673708233367, "grad_norm": 1.125, "learning_rate": 0.00016446513750194685, "loss": 2.347, "step": 14918 }, { "epoch": 0.6984141845631693, "grad_norm": 1.5546875, "learning_rate": 0.00016446067488939083, "loss": 2.4612, "step": 14919 }, { "epoch": 0.6984609983030019, "grad_norm": 1.3125, "learning_rate": 0.00016445621205718878, "loss": 2.679, "step": 14920 }, { "epoch": 0.6985078120428345, "grad_norm": 1.2265625, "learning_rate": 0.00016445174900535595, "loss": 2.2915, "step": 14921 }, { "epoch": 0.6985546257826672, "grad_norm": 1.6875, "learning_rate": 0.00016444728573390747, "loss": 2.7872, "step": 14922 }, { "epoch": 0.6986014395224999, "grad_norm": 1.6640625, "learning_rate": 0.00016444282224285862, "loss": 2.7545, "step": 14923 }, { "epoch": 0.6986482532623325, "grad_norm": 1.46875, "learning_rate": 0.00016443835853222457, "loss": 2.644, "step": 14924 }, { "epoch": 0.6986950670021651, "grad_norm": 1.0859375, "learning_rate": 0.00016443389460202057, "loss": 4.237, "step": 14925 }, { "epoch": 0.6987418807419977, "grad_norm": 1.6328125, "learning_rate": 0.0001644294304522618, "loss": 2.4013, "step": 14926 }, { "epoch": 0.6987886944818305, "grad_norm": 1.1875, "learning_rate": 0.0001644249660829634, "loss": 2.3788, "step": 14927 }, { "epoch": 0.6988355082216631, "grad_norm": 1.1328125, "learning_rate": 0.00016442050149414068, "loss": 2.2769, "step": 14928 }, { "epoch": 0.6988823219614957, "grad_norm": 1.5546875, "learning_rate": 0.00016441603668580882, "loss": 2.5298, "step": 14929 }, { "epoch": 0.6989291357013283, "grad_norm": 2.09375, "learning_rate": 0.00016441157165798307, "loss": 2.4011, "step": 14930 }, { "epoch": 0.6989759494411609, "grad_norm": 1.0390625, "learning_rate": 0.0001644071064106786, "loss": 2.7178, "step": 14931 }, { "epoch": 0.6990227631809937, "grad_norm": 1.5234375, "learning_rate": 0.00016440264094391062, "loss": 2.5214, "step": 14932 }, { "epoch": 0.6990695769208263, "grad_norm": 2.203125, "learning_rate": 0.00016439817525769438, "loss": 3.0278, "step": 14933 }, { "epoch": 0.6991163906606589, "grad_norm": 1.890625, "learning_rate": 0.00016439370935204508, "loss": 3.3533, "step": 14934 }, { "epoch": 0.6991632044004915, "grad_norm": 1.390625, "learning_rate": 0.00016438924322697792, "loss": 2.2196, "step": 14935 }, { "epoch": 0.6992100181403241, "grad_norm": 1.5, "learning_rate": 0.00016438477688250814, "loss": 2.8177, "step": 14936 }, { "epoch": 0.6992568318801569, "grad_norm": 1.3515625, "learning_rate": 0.00016438031031865098, "loss": 2.2709, "step": 14937 }, { "epoch": 0.6993036456199895, "grad_norm": 1.3125, "learning_rate": 0.00016437584353542163, "loss": 2.6126, "step": 14938 }, { "epoch": 0.6993504593598221, "grad_norm": 2.09375, "learning_rate": 0.0001643713765328353, "loss": 2.9097, "step": 14939 }, { "epoch": 0.6993972730996547, "grad_norm": 1.25, "learning_rate": 0.00016436690931090722, "loss": 1.9969, "step": 14940 }, { "epoch": 0.6994440868394874, "grad_norm": 1.5625, "learning_rate": 0.00016436244186965264, "loss": 2.5822, "step": 14941 }, { "epoch": 0.6994909005793201, "grad_norm": 1.4453125, "learning_rate": 0.00016435797420908673, "loss": 2.5904, "step": 14942 }, { "epoch": 0.6995377143191527, "grad_norm": 1.28125, "learning_rate": 0.00016435350632922475, "loss": 2.7385, "step": 14943 }, { "epoch": 0.6995845280589853, "grad_norm": 1.2734375, "learning_rate": 0.00016434903823008194, "loss": 2.4454, "step": 14944 }, { "epoch": 0.6996313417988179, "grad_norm": 1.765625, "learning_rate": 0.00016434456991167351, "loss": 2.9389, "step": 14945 }, { "epoch": 0.6996781555386506, "grad_norm": 1.984375, "learning_rate": 0.00016434010137401464, "loss": 2.3608, "step": 14946 }, { "epoch": 0.6997249692784833, "grad_norm": 1.2734375, "learning_rate": 0.0001643356326171206, "loss": 2.5112, "step": 14947 }, { "epoch": 0.6997717830183159, "grad_norm": 1.9921875, "learning_rate": 0.00016433116364100662, "loss": 2.824, "step": 14948 }, { "epoch": 0.6998185967581485, "grad_norm": 1.609375, "learning_rate": 0.00016432669444568792, "loss": 2.5867, "step": 14949 }, { "epoch": 0.6998654104979811, "grad_norm": 1.359375, "learning_rate": 0.00016432222503117973, "loss": 2.6989, "step": 14950 }, { "epoch": 0.6999122242378139, "grad_norm": 1.296875, "learning_rate": 0.00016431775539749725, "loss": 2.7542, "step": 14951 }, { "epoch": 0.6999590379776465, "grad_norm": 2.4375, "learning_rate": 0.00016431328554465576, "loss": 3.3289, "step": 14952 }, { "epoch": 0.7000058517174791, "grad_norm": 1.875, "learning_rate": 0.00016430881547267048, "loss": 2.3375, "step": 14953 }, { "epoch": 0.7000526654573117, "grad_norm": 1.453125, "learning_rate": 0.0001643043451815566, "loss": 3.3789, "step": 14954 }, { "epoch": 0.7000994791971443, "grad_norm": 2.328125, "learning_rate": 0.0001642998746713294, "loss": 2.4557, "step": 14955 }, { "epoch": 0.700146292936977, "grad_norm": 1.296875, "learning_rate": 0.00016429540394200407, "loss": 2.9703, "step": 14956 }, { "epoch": 0.7001931066768097, "grad_norm": 1.6796875, "learning_rate": 0.00016429093299359592, "loss": 2.4557, "step": 14957 }, { "epoch": 0.7002399204166423, "grad_norm": 1.296875, "learning_rate": 0.00016428646182612008, "loss": 2.4126, "step": 14958 }, { "epoch": 0.7002867341564749, "grad_norm": 1.078125, "learning_rate": 0.00016428199043959184, "loss": 2.157, "step": 14959 }, { "epoch": 0.7003335478963075, "grad_norm": 1.1953125, "learning_rate": 0.00016427751883402644, "loss": 2.3102, "step": 14960 }, { "epoch": 0.7003803616361403, "grad_norm": 1.6015625, "learning_rate": 0.00016427304700943914, "loss": 2.3098, "step": 14961 }, { "epoch": 0.7004271753759729, "grad_norm": 1.546875, "learning_rate": 0.0001642685749658451, "loss": 2.0288, "step": 14962 }, { "epoch": 0.7004739891158055, "grad_norm": 1.7734375, "learning_rate": 0.00016426410270325965, "loss": 2.7026, "step": 14963 }, { "epoch": 0.7005208028556381, "grad_norm": 1.3828125, "learning_rate": 0.00016425963022169795, "loss": 2.7511, "step": 14964 }, { "epoch": 0.7005676165954707, "grad_norm": 1.4921875, "learning_rate": 0.00016425515752117528, "loss": 2.9346, "step": 14965 }, { "epoch": 0.7006144303353035, "grad_norm": 1.453125, "learning_rate": 0.0001642506846017069, "loss": 2.5046, "step": 14966 }, { "epoch": 0.7006612440751361, "grad_norm": 1.171875, "learning_rate": 0.00016424621146330802, "loss": 2.0944, "step": 14967 }, { "epoch": 0.7007080578149687, "grad_norm": 1.25, "learning_rate": 0.00016424173810599386, "loss": 2.283, "step": 14968 }, { "epoch": 0.7007548715548013, "grad_norm": 1.8515625, "learning_rate": 0.0001642372645297797, "loss": 3.0908, "step": 14969 }, { "epoch": 0.7008016852946339, "grad_norm": 1.1953125, "learning_rate": 0.00016423279073468077, "loss": 2.5677, "step": 14970 }, { "epoch": 0.7008484990344667, "grad_norm": 1.3828125, "learning_rate": 0.00016422831672071236, "loss": 2.9457, "step": 14971 }, { "epoch": 0.7008953127742993, "grad_norm": 1.9453125, "learning_rate": 0.00016422384248788964, "loss": 2.4803, "step": 14972 }, { "epoch": 0.7009421265141319, "grad_norm": 1.5234375, "learning_rate": 0.00016421936803622788, "loss": 2.6054, "step": 14973 }, { "epoch": 0.7009889402539645, "grad_norm": 2.5, "learning_rate": 0.00016421489336574237, "loss": 2.5674, "step": 14974 }, { "epoch": 0.7010357539937971, "grad_norm": 1.3671875, "learning_rate": 0.0001642104184764483, "loss": 2.4564, "step": 14975 }, { "epoch": 0.7010825677336299, "grad_norm": 1.546875, "learning_rate": 0.00016420594336836092, "loss": 2.6979, "step": 14976 }, { "epoch": 0.7011293814734625, "grad_norm": 1.4765625, "learning_rate": 0.00016420146804149553, "loss": 2.5186, "step": 14977 }, { "epoch": 0.7011761952132951, "grad_norm": 1.34375, "learning_rate": 0.00016419699249586734, "loss": 2.4216, "step": 14978 }, { "epoch": 0.7012230089531277, "grad_norm": 1.765625, "learning_rate": 0.0001641925167314916, "loss": 2.6133, "step": 14979 }, { "epoch": 0.7012698226929603, "grad_norm": 1.5, "learning_rate": 0.00016418804074838357, "loss": 2.4119, "step": 14980 }, { "epoch": 0.7013166364327931, "grad_norm": 1.7265625, "learning_rate": 0.0001641835645465585, "loss": 2.6216, "step": 14981 }, { "epoch": 0.7013634501726257, "grad_norm": 1.8125, "learning_rate": 0.0001641790881260317, "loss": 2.9762, "step": 14982 }, { "epoch": 0.7014102639124583, "grad_norm": 1.0703125, "learning_rate": 0.0001641746114868183, "loss": 2.7486, "step": 14983 }, { "epoch": 0.7014570776522909, "grad_norm": 1.40625, "learning_rate": 0.00016417013462893365, "loss": 2.3211, "step": 14984 }, { "epoch": 0.7015038913921235, "grad_norm": 1.4765625, "learning_rate": 0.00016416565755239296, "loss": 2.6566, "step": 14985 }, { "epoch": 0.7015507051319563, "grad_norm": 1.1796875, "learning_rate": 0.0001641611802572115, "loss": 2.9484, "step": 14986 }, { "epoch": 0.7015975188717889, "grad_norm": 2.0625, "learning_rate": 0.00016415670274340453, "loss": 3.1019, "step": 14987 }, { "epoch": 0.7016443326116215, "grad_norm": 1.328125, "learning_rate": 0.0001641522250109873, "loss": 2.6861, "step": 14988 }, { "epoch": 0.7016911463514541, "grad_norm": 1.4609375, "learning_rate": 0.0001641477470599751, "loss": 2.7604, "step": 14989 }, { "epoch": 0.7017379600912867, "grad_norm": 1.140625, "learning_rate": 0.00016414326889038313, "loss": 1.8942, "step": 14990 }, { "epoch": 0.7017847738311195, "grad_norm": 1.140625, "learning_rate": 0.0001641387905022267, "loss": 2.6739, "step": 14991 }, { "epoch": 0.7018315875709521, "grad_norm": 1.859375, "learning_rate": 0.00016413431189552105, "loss": 2.8987, "step": 14992 }, { "epoch": 0.7018784013107847, "grad_norm": 1.296875, "learning_rate": 0.0001641298330702814, "loss": 2.3977, "step": 14993 }, { "epoch": 0.7019252150506173, "grad_norm": 2.671875, "learning_rate": 0.0001641253540265231, "loss": 2.7894, "step": 14994 }, { "epoch": 0.70197202879045, "grad_norm": 1.4140625, "learning_rate": 0.00016412087476426132, "loss": 2.7077, "step": 14995 }, { "epoch": 0.7020188425302827, "grad_norm": 2.578125, "learning_rate": 0.00016411639528351137, "loss": 2.4197, "step": 14996 }, { "epoch": 0.7020656562701153, "grad_norm": 1.1953125, "learning_rate": 0.00016411191558428852, "loss": 2.8253, "step": 14997 }, { "epoch": 0.7021124700099479, "grad_norm": 1.0859375, "learning_rate": 0.00016410743566660805, "loss": 1.9269, "step": 14998 }, { "epoch": 0.7021592837497805, "grad_norm": 1.546875, "learning_rate": 0.00016410295553048515, "loss": 2.7621, "step": 14999 }, { "epoch": 0.7022060974896132, "grad_norm": 1.7734375, "learning_rate": 0.00016409847517593517, "loss": 2.6275, "step": 15000 }, { "epoch": 0.7022529112294459, "grad_norm": 1.5078125, "learning_rate": 0.00016409399460297336, "loss": 2.3592, "step": 15001 }, { "epoch": 0.7022997249692785, "grad_norm": 1.359375, "learning_rate": 0.0001640895138116149, "loss": 2.4305, "step": 15002 }, { "epoch": 0.7023465387091111, "grad_norm": 1.5859375, "learning_rate": 0.00016408503280187521, "loss": 2.6849, "step": 15003 }, { "epoch": 0.7023933524489437, "grad_norm": 1.4453125, "learning_rate": 0.00016408055157376942, "loss": 2.5239, "step": 15004 }, { "epoch": 0.7024401661887764, "grad_norm": 1.546875, "learning_rate": 0.00016407607012731287, "loss": 2.414, "step": 15005 }, { "epoch": 0.7024869799286091, "grad_norm": 1.8203125, "learning_rate": 0.0001640715884625208, "loss": 2.6901, "step": 15006 }, { "epoch": 0.7025337936684417, "grad_norm": 1.4765625, "learning_rate": 0.00016406710657940852, "loss": 2.8342, "step": 15007 }, { "epoch": 0.7025806074082743, "grad_norm": 1.6015625, "learning_rate": 0.00016406262447799127, "loss": 2.4141, "step": 15008 }, { "epoch": 0.702627421148107, "grad_norm": 1.328125, "learning_rate": 0.0001640581421582843, "loss": 2.6482, "step": 15009 }, { "epoch": 0.7026742348879396, "grad_norm": 1.1328125, "learning_rate": 0.00016405365962030295, "loss": 3.7989, "step": 15010 }, { "epoch": 0.7027210486277723, "grad_norm": 1.484375, "learning_rate": 0.00016404917686406245, "loss": 2.0495, "step": 15011 }, { "epoch": 0.7027678623676049, "grad_norm": 1.8515625, "learning_rate": 0.00016404469388957806, "loss": 2.7884, "step": 15012 }, { "epoch": 0.7028146761074375, "grad_norm": 1.8125, "learning_rate": 0.0001640402106968651, "loss": 2.6303, "step": 15013 }, { "epoch": 0.7028614898472701, "grad_norm": 1.4453125, "learning_rate": 0.00016403572728593884, "loss": 2.3235, "step": 15014 }, { "epoch": 0.7029083035871028, "grad_norm": 1.2890625, "learning_rate": 0.0001640312436568145, "loss": 2.4707, "step": 15015 }, { "epoch": 0.7029551173269355, "grad_norm": 1.875, "learning_rate": 0.00016402675980950742, "loss": 2.7433, "step": 15016 }, { "epoch": 0.7030019310667681, "grad_norm": 1.6328125, "learning_rate": 0.00016402227574403283, "loss": 2.0886, "step": 15017 }, { "epoch": 0.7030487448066007, "grad_norm": 1.8515625, "learning_rate": 0.0001640177914604061, "loss": 2.6816, "step": 15018 }, { "epoch": 0.7030955585464334, "grad_norm": 1.421875, "learning_rate": 0.00016401330695864239, "loss": 2.3125, "step": 15019 }, { "epoch": 0.703142372286266, "grad_norm": 1.671875, "learning_rate": 0.00016400882223875705, "loss": 2.9206, "step": 15020 }, { "epoch": 0.7031891860260987, "grad_norm": 1.2109375, "learning_rate": 0.00016400433730076537, "loss": 2.9118, "step": 15021 }, { "epoch": 0.7032359997659313, "grad_norm": 2.328125, "learning_rate": 0.00016399985214468258, "loss": 2.5811, "step": 15022 }, { "epoch": 0.7032828135057639, "grad_norm": 1.640625, "learning_rate": 0.000163995366770524, "loss": 2.3605, "step": 15023 }, { "epoch": 0.7033296272455966, "grad_norm": 1.5234375, "learning_rate": 0.00016399088117830492, "loss": 2.6798, "step": 15024 }, { "epoch": 0.7033764409854292, "grad_norm": 1.4921875, "learning_rate": 0.00016398639536804058, "loss": 2.2191, "step": 15025 }, { "epoch": 0.7034232547252619, "grad_norm": 1.421875, "learning_rate": 0.00016398190933974633, "loss": 3.1245, "step": 15026 }, { "epoch": 0.7034700684650945, "grad_norm": 1.4609375, "learning_rate": 0.00016397742309343742, "loss": 2.9209, "step": 15027 }, { "epoch": 0.7035168822049271, "grad_norm": 3.578125, "learning_rate": 0.00016397293662912912, "loss": 2.4738, "step": 15028 }, { "epoch": 0.7035636959447598, "grad_norm": 1.3984375, "learning_rate": 0.00016396844994683675, "loss": 2.6027, "step": 15029 }, { "epoch": 0.7036105096845924, "grad_norm": 1.953125, "learning_rate": 0.0001639639630465756, "loss": 2.7603, "step": 15030 }, { "epoch": 0.7036573234244251, "grad_norm": 2.515625, "learning_rate": 0.0001639594759283609, "loss": 2.3669, "step": 15031 }, { "epoch": 0.7037041371642577, "grad_norm": 1.1953125, "learning_rate": 0.00016395498859220802, "loss": 2.8211, "step": 15032 }, { "epoch": 0.7037509509040903, "grad_norm": 1.21875, "learning_rate": 0.00016395050103813218, "loss": 2.494, "step": 15033 }, { "epoch": 0.703797764643923, "grad_norm": 1.34375, "learning_rate": 0.00016394601326614872, "loss": 2.5904, "step": 15034 }, { "epoch": 0.7038445783837556, "grad_norm": 2.859375, "learning_rate": 0.00016394152527627297, "loss": 4.16, "step": 15035 }, { "epoch": 0.7038913921235883, "grad_norm": 1.234375, "learning_rate": 0.0001639370370685201, "loss": 2.2042, "step": 15036 }, { "epoch": 0.7039382058634209, "grad_norm": 1.609375, "learning_rate": 0.00016393254864290548, "loss": 2.4587, "step": 15037 }, { "epoch": 0.7039850196032535, "grad_norm": 1.59375, "learning_rate": 0.0001639280599994444, "loss": 2.4414, "step": 15038 }, { "epoch": 0.7040318333430862, "grad_norm": 1.5234375, "learning_rate": 0.0001639235711381522, "loss": 2.8414, "step": 15039 }, { "epoch": 0.7040786470829188, "grad_norm": 2.5, "learning_rate": 0.00016391908205904408, "loss": 2.6951, "step": 15040 }, { "epoch": 0.7041254608227515, "grad_norm": 1.6953125, "learning_rate": 0.00016391459276213536, "loss": 2.8157, "step": 15041 }, { "epoch": 0.7041722745625841, "grad_norm": 1.671875, "learning_rate": 0.0001639101032474414, "loss": 2.9447, "step": 15042 }, { "epoch": 0.7042190883024168, "grad_norm": 3.34375, "learning_rate": 0.00016390561351497742, "loss": 2.4139, "step": 15043 }, { "epoch": 0.7042659020422494, "grad_norm": 2.203125, "learning_rate": 0.0001639011235647588, "loss": 3.9415, "step": 15044 }, { "epoch": 0.704312715782082, "grad_norm": 1.484375, "learning_rate": 0.00016389663339680077, "loss": 2.4011, "step": 15045 }, { "epoch": 0.7043595295219147, "grad_norm": 1.75, "learning_rate": 0.00016389214301111866, "loss": 2.6791, "step": 15046 }, { "epoch": 0.7044063432617473, "grad_norm": 1.234375, "learning_rate": 0.00016388765240772773, "loss": 2.5408, "step": 15047 }, { "epoch": 0.70445315700158, "grad_norm": 1.25, "learning_rate": 0.00016388316158664337, "loss": 2.5124, "step": 15048 }, { "epoch": 0.7044999707414126, "grad_norm": 2.109375, "learning_rate": 0.00016387867054788084, "loss": 2.6035, "step": 15049 }, { "epoch": 0.7045467844812452, "grad_norm": 1.5, "learning_rate": 0.0001638741792914554, "loss": 1.9802, "step": 15050 }, { "epoch": 0.7045935982210779, "grad_norm": 1.5078125, "learning_rate": 0.0001638696878173824, "loss": 2.4744, "step": 15051 }, { "epoch": 0.7046404119609105, "grad_norm": 1.4765625, "learning_rate": 0.0001638651961256771, "loss": 2.5317, "step": 15052 }, { "epoch": 0.7046872257007432, "grad_norm": 2.390625, "learning_rate": 0.00016386070421635487, "loss": 2.37, "step": 15053 }, { "epoch": 0.7047340394405758, "grad_norm": 1.3203125, "learning_rate": 0.00016385621208943096, "loss": 2.6736, "step": 15054 }, { "epoch": 0.7047808531804084, "grad_norm": 1.5390625, "learning_rate": 0.00016385171974492071, "loss": 2.4678, "step": 15055 }, { "epoch": 0.7048276669202411, "grad_norm": 2.21875, "learning_rate": 0.0001638472271828394, "loss": 2.6126, "step": 15056 }, { "epoch": 0.7048744806600737, "grad_norm": 1.1015625, "learning_rate": 0.00016384273440320238, "loss": 4.1746, "step": 15057 }, { "epoch": 0.7049212943999064, "grad_norm": 1.6796875, "learning_rate": 0.00016383824140602492, "loss": 2.2006, "step": 15058 }, { "epoch": 0.704968108139739, "grad_norm": 1.34375, "learning_rate": 0.00016383374819132235, "loss": 2.5299, "step": 15059 }, { "epoch": 0.7050149218795716, "grad_norm": 1.3984375, "learning_rate": 0.00016382925475910996, "loss": 2.4457, "step": 15060 }, { "epoch": 0.7050617356194043, "grad_norm": 1.1484375, "learning_rate": 0.0001638247611094031, "loss": 2.9705, "step": 15061 }, { "epoch": 0.705108549359237, "grad_norm": 1.8828125, "learning_rate": 0.00016382026724221703, "loss": 2.4862, "step": 15062 }, { "epoch": 0.7051553630990696, "grad_norm": 2.21875, "learning_rate": 0.0001638157731575671, "loss": 2.3523, "step": 15063 }, { "epoch": 0.7052021768389022, "grad_norm": 2.109375, "learning_rate": 0.00016381127885546856, "loss": 2.5262, "step": 15064 }, { "epoch": 0.7052489905787349, "grad_norm": 1.125, "learning_rate": 0.00016380678433593684, "loss": 2.491, "step": 15065 }, { "epoch": 0.7052958043185675, "grad_norm": 1.21875, "learning_rate": 0.00016380228959898715, "loss": 2.4739, "step": 15066 }, { "epoch": 0.7053426180584002, "grad_norm": 1.203125, "learning_rate": 0.00016379779464463487, "loss": 2.1966, "step": 15067 }, { "epoch": 0.7053894317982328, "grad_norm": 1.484375, "learning_rate": 0.00016379329947289532, "loss": 2.7471, "step": 15068 }, { "epoch": 0.7054362455380654, "grad_norm": 1.265625, "learning_rate": 0.00016378880408378374, "loss": 2.0966, "step": 15069 }, { "epoch": 0.7054830592778981, "grad_norm": 1.5859375, "learning_rate": 0.00016378430847731554, "loss": 2.7499, "step": 15070 }, { "epoch": 0.7055298730177307, "grad_norm": 1.5625, "learning_rate": 0.00016377981265350595, "loss": 2.7758, "step": 15071 }, { "epoch": 0.7055766867575634, "grad_norm": 1.28125, "learning_rate": 0.00016377531661237036, "loss": 2.1892, "step": 15072 }, { "epoch": 0.705623500497396, "grad_norm": 1.453125, "learning_rate": 0.00016377082035392407, "loss": 2.7777, "step": 15073 }, { "epoch": 0.7056703142372286, "grad_norm": 1.4765625, "learning_rate": 0.0001637663238781824, "loss": 2.6133, "step": 15074 }, { "epoch": 0.7057171279770613, "grad_norm": 1.484375, "learning_rate": 0.00016376182718516065, "loss": 2.571, "step": 15075 }, { "epoch": 0.7057639417168939, "grad_norm": 1.4453125, "learning_rate": 0.00016375733027487415, "loss": 2.0701, "step": 15076 }, { "epoch": 0.7058107554567266, "grad_norm": 1.1015625, "learning_rate": 0.00016375283314733826, "loss": 2.4626, "step": 15077 }, { "epoch": 0.7058575691965592, "grad_norm": 1.515625, "learning_rate": 0.00016374833580256825, "loss": 2.4725, "step": 15078 }, { "epoch": 0.7059043829363918, "grad_norm": 1.078125, "learning_rate": 0.0001637438382405795, "loss": 2.6479, "step": 15079 }, { "epoch": 0.7059511966762245, "grad_norm": 1.765625, "learning_rate": 0.00016373934046138729, "loss": 2.9344, "step": 15080 }, { "epoch": 0.7059980104160571, "grad_norm": 1.5390625, "learning_rate": 0.00016373484246500695, "loss": 2.5171, "step": 15081 }, { "epoch": 0.7060448241558898, "grad_norm": 1.640625, "learning_rate": 0.00016373034425145382, "loss": 2.8644, "step": 15082 }, { "epoch": 0.7060916378957224, "grad_norm": 2.21875, "learning_rate": 0.00016372584582074323, "loss": 2.3397, "step": 15083 }, { "epoch": 0.706138451635555, "grad_norm": 1.5625, "learning_rate": 0.0001637213471728905, "loss": 2.3486, "step": 15084 }, { "epoch": 0.7061852653753877, "grad_norm": 1.5, "learning_rate": 0.00016371684830791098, "loss": 2.7125, "step": 15085 }, { "epoch": 0.7062320791152203, "grad_norm": 1.4296875, "learning_rate": 0.00016371234922582, "loss": 2.7911, "step": 15086 }, { "epoch": 0.706278892855053, "grad_norm": 1.4375, "learning_rate": 0.00016370784992663287, "loss": 2.5162, "step": 15087 }, { "epoch": 0.7063257065948856, "grad_norm": 1.2578125, "learning_rate": 0.00016370335041036488, "loss": 2.4465, "step": 15088 }, { "epoch": 0.7063725203347182, "grad_norm": 1.6171875, "learning_rate": 0.0001636988506770314, "loss": 2.4495, "step": 15089 }, { "epoch": 0.7064193340745509, "grad_norm": 1.4453125, "learning_rate": 0.00016369435072664782, "loss": 2.8571, "step": 15090 }, { "epoch": 0.7064661478143835, "grad_norm": 1.375, "learning_rate": 0.0001636898505592294, "loss": 2.2845, "step": 15091 }, { "epoch": 0.7065129615542162, "grad_norm": 1.4140625, "learning_rate": 0.00016368535017479148, "loss": 2.4277, "step": 15092 }, { "epoch": 0.7065597752940488, "grad_norm": 1.40625, "learning_rate": 0.00016368084957334942, "loss": 2.7399, "step": 15093 }, { "epoch": 0.7066065890338814, "grad_norm": 1.53125, "learning_rate": 0.00016367634875491855, "loss": 2.4946, "step": 15094 }, { "epoch": 0.7066534027737141, "grad_norm": 1.1484375, "learning_rate": 0.0001636718477195142, "loss": 2.5748, "step": 15095 }, { "epoch": 0.7067002165135468, "grad_norm": 1.2890625, "learning_rate": 0.00016366734646715172, "loss": 2.5143, "step": 15096 }, { "epoch": 0.7067470302533794, "grad_norm": 1.3984375, "learning_rate": 0.00016366284499784643, "loss": 2.9294, "step": 15097 }, { "epoch": 0.706793843993212, "grad_norm": 1.34375, "learning_rate": 0.00016365834331161366, "loss": 2.6263, "step": 15098 }, { "epoch": 0.7068406577330446, "grad_norm": 1.6640625, "learning_rate": 0.00016365384140846876, "loss": 2.7175, "step": 15099 }, { "epoch": 0.7068874714728773, "grad_norm": 1.0078125, "learning_rate": 0.0001636493392884271, "loss": 2.2969, "step": 15100 }, { "epoch": 0.70693428521271, "grad_norm": 1.625, "learning_rate": 0.000163644836951504, "loss": 2.7307, "step": 15101 }, { "epoch": 0.7069810989525426, "grad_norm": 1.484375, "learning_rate": 0.00016364033439771479, "loss": 2.762, "step": 15102 }, { "epoch": 0.7070279126923752, "grad_norm": 1.1875, "learning_rate": 0.00016363583162707482, "loss": 2.3952, "step": 15103 }, { "epoch": 0.7070747264322078, "grad_norm": 1.421875, "learning_rate": 0.0001636313286395994, "loss": 2.4634, "step": 15104 }, { "epoch": 0.7071215401720405, "grad_norm": 1.9765625, "learning_rate": 0.00016362682543530395, "loss": 2.5058, "step": 15105 }, { "epoch": 0.7071683539118732, "grad_norm": 1.59375, "learning_rate": 0.00016362232201420375, "loss": 2.4935, "step": 15106 }, { "epoch": 0.7072151676517058, "grad_norm": 1.3046875, "learning_rate": 0.0001636178183763142, "loss": 2.7919, "step": 15107 }, { "epoch": 0.7072619813915384, "grad_norm": 1.125, "learning_rate": 0.00016361331452165057, "loss": 2.4177, "step": 15108 }, { "epoch": 0.707308795131371, "grad_norm": 1.4375, "learning_rate": 0.00016360881045022823, "loss": 2.5511, "step": 15109 }, { "epoch": 0.7073556088712037, "grad_norm": 1.90625, "learning_rate": 0.00016360430616206258, "loss": 2.4534, "step": 15110 }, { "epoch": 0.7074024226110364, "grad_norm": 2.03125, "learning_rate": 0.00016359980165716893, "loss": 2.8837, "step": 15111 }, { "epoch": 0.707449236350869, "grad_norm": 1.5625, "learning_rate": 0.0001635952969355626, "loss": 2.7652, "step": 15112 }, { "epoch": 0.7074960500907016, "grad_norm": 1.90625, "learning_rate": 0.000163590791997259, "loss": 2.7808, "step": 15113 }, { "epoch": 0.7075428638305342, "grad_norm": 1.3125, "learning_rate": 0.00016358628684227347, "loss": 2.4304, "step": 15114 }, { "epoch": 0.707589677570367, "grad_norm": 1.2890625, "learning_rate": 0.00016358178147062133, "loss": 2.4685, "step": 15115 }, { "epoch": 0.7076364913101996, "grad_norm": 1.4375, "learning_rate": 0.00016357727588231792, "loss": 2.8362, "step": 15116 }, { "epoch": 0.7076833050500322, "grad_norm": 1.390625, "learning_rate": 0.00016357277007737867, "loss": 2.3156, "step": 15117 }, { "epoch": 0.7077301187898648, "grad_norm": 1.671875, "learning_rate": 0.00016356826405581885, "loss": 2.6847, "step": 15118 }, { "epoch": 0.7077769325296974, "grad_norm": 1.734375, "learning_rate": 0.00016356375781765382, "loss": 2.567, "step": 15119 }, { "epoch": 0.7078237462695302, "grad_norm": 1.5859375, "learning_rate": 0.00016355925136289897, "loss": 2.9145, "step": 15120 }, { "epoch": 0.7078705600093628, "grad_norm": 1.296875, "learning_rate": 0.00016355474469156967, "loss": 2.2738, "step": 15121 }, { "epoch": 0.7079173737491954, "grad_norm": 1.4609375, "learning_rate": 0.00016355023780368125, "loss": 2.6185, "step": 15122 }, { "epoch": 0.707964187489028, "grad_norm": 1.625, "learning_rate": 0.00016354573069924904, "loss": 2.2802, "step": 15123 }, { "epoch": 0.7080110012288606, "grad_norm": 1.2890625, "learning_rate": 0.00016354122337828843, "loss": 2.8866, "step": 15124 }, { "epoch": 0.7080578149686934, "grad_norm": 1.2109375, "learning_rate": 0.0001635367158408148, "loss": 2.6612, "step": 15125 }, { "epoch": 0.708104628708526, "grad_norm": 1.4140625, "learning_rate": 0.00016353220808684347, "loss": 2.5543, "step": 15126 }, { "epoch": 0.7081514424483586, "grad_norm": 1.484375, "learning_rate": 0.0001635277001163898, "loss": 2.881, "step": 15127 }, { "epoch": 0.7081982561881912, "grad_norm": 1.6796875, "learning_rate": 0.00016352319192946917, "loss": 4.0099, "step": 15128 }, { "epoch": 0.7082450699280238, "grad_norm": 1.65625, "learning_rate": 0.00016351868352609694, "loss": 2.8073, "step": 15129 }, { "epoch": 0.7082918836678566, "grad_norm": 1.9609375, "learning_rate": 0.00016351417490628847, "loss": 2.676, "step": 15130 }, { "epoch": 0.7083386974076892, "grad_norm": 1.484375, "learning_rate": 0.0001635096660700591, "loss": 2.4813, "step": 15131 }, { "epoch": 0.7083855111475218, "grad_norm": 1.5078125, "learning_rate": 0.00016350515701742425, "loss": 3.9187, "step": 15132 }, { "epoch": 0.7084323248873544, "grad_norm": 2.078125, "learning_rate": 0.0001635006477483992, "loss": 2.6465, "step": 15133 }, { "epoch": 0.708479138627187, "grad_norm": 1.34375, "learning_rate": 0.00016349613826299942, "loss": 2.5002, "step": 15134 }, { "epoch": 0.7085259523670198, "grad_norm": 1.203125, "learning_rate": 0.0001634916285612402, "loss": 2.556, "step": 15135 }, { "epoch": 0.7085727661068524, "grad_norm": 1.5, "learning_rate": 0.00016348711864313693, "loss": 2.5934, "step": 15136 }, { "epoch": 0.708619579846685, "grad_norm": 1.1875, "learning_rate": 0.00016348260850870496, "loss": 2.7274, "step": 15137 }, { "epoch": 0.7086663935865176, "grad_norm": 1.1953125, "learning_rate": 0.00016347809815795967, "loss": 2.678, "step": 15138 }, { "epoch": 0.7087132073263502, "grad_norm": 1.4609375, "learning_rate": 0.00016347358759091643, "loss": 2.6729, "step": 15139 }, { "epoch": 0.708760021066183, "grad_norm": 1.3671875, "learning_rate": 0.0001634690768075906, "loss": 2.6136, "step": 15140 }, { "epoch": 0.7088068348060156, "grad_norm": 2.046875, "learning_rate": 0.0001634645658079976, "loss": 2.4362, "step": 15141 }, { "epoch": 0.7088536485458482, "grad_norm": 1.296875, "learning_rate": 0.0001634600545921527, "loss": 2.5911, "step": 15142 }, { "epoch": 0.7089004622856808, "grad_norm": 1.515625, "learning_rate": 0.00016345554316007138, "loss": 3.0945, "step": 15143 }, { "epoch": 0.7089472760255134, "grad_norm": 1.6015625, "learning_rate": 0.00016345103151176895, "loss": 2.7614, "step": 15144 }, { "epoch": 0.7089940897653462, "grad_norm": 1.125, "learning_rate": 0.0001634465196472608, "loss": 2.7604, "step": 15145 }, { "epoch": 0.7090409035051788, "grad_norm": 1.2890625, "learning_rate": 0.00016344200756656228, "loss": 2.6223, "step": 15146 }, { "epoch": 0.7090877172450114, "grad_norm": 1.3515625, "learning_rate": 0.00016343749526968881, "loss": 2.4754, "step": 15147 }, { "epoch": 0.709134530984844, "grad_norm": 1.515625, "learning_rate": 0.00016343298275665573, "loss": 2.788, "step": 15148 }, { "epoch": 0.7091813447246766, "grad_norm": 1.890625, "learning_rate": 0.00016342847002747844, "loss": 2.7093, "step": 15149 }, { "epoch": 0.7092281584645094, "grad_norm": 1.1796875, "learning_rate": 0.00016342395708217227, "loss": 2.7958, "step": 15150 }, { "epoch": 0.709274972204342, "grad_norm": 1.515625, "learning_rate": 0.00016341944392075268, "loss": 2.985, "step": 15151 }, { "epoch": 0.7093217859441746, "grad_norm": 1.484375, "learning_rate": 0.000163414930543235, "loss": 2.0927, "step": 15152 }, { "epoch": 0.7093685996840072, "grad_norm": 1.3046875, "learning_rate": 0.00016341041694963458, "loss": 2.5942, "step": 15153 }, { "epoch": 0.7094154134238398, "grad_norm": 1.78125, "learning_rate": 0.00016340590313996685, "loss": 2.6798, "step": 15154 }, { "epoch": 0.7094622271636726, "grad_norm": 1.4453125, "learning_rate": 0.00016340138911424717, "loss": 2.3299, "step": 15155 }, { "epoch": 0.7095090409035052, "grad_norm": 1.2265625, "learning_rate": 0.00016339687487249092, "loss": 2.3846, "step": 15156 }, { "epoch": 0.7095558546433378, "grad_norm": 1.5390625, "learning_rate": 0.00016339236041471345, "loss": 2.7614, "step": 15157 }, { "epoch": 0.7096026683831704, "grad_norm": 1.2109375, "learning_rate": 0.0001633878457409302, "loss": 2.3568, "step": 15158 }, { "epoch": 0.709649482123003, "grad_norm": 1.90625, "learning_rate": 0.00016338333085115655, "loss": 2.5007, "step": 15159 }, { "epoch": 0.7096962958628358, "grad_norm": 1.5234375, "learning_rate": 0.00016337881574540782, "loss": 2.7338, "step": 15160 }, { "epoch": 0.7097431096026684, "grad_norm": 1.5703125, "learning_rate": 0.00016337430042369947, "loss": 2.7075, "step": 15161 }, { "epoch": 0.709789923342501, "grad_norm": 1.5625, "learning_rate": 0.00016336978488604685, "loss": 2.6232, "step": 15162 }, { "epoch": 0.7098367370823336, "grad_norm": 1.578125, "learning_rate": 0.00016336526913246537, "loss": 2.3321, "step": 15163 }, { "epoch": 0.7098835508221663, "grad_norm": 1.4765625, "learning_rate": 0.00016336075316297037, "loss": 1.8631, "step": 15164 }, { "epoch": 0.709930364561999, "grad_norm": 1.671875, "learning_rate": 0.00016335623697757726, "loss": 2.6958, "step": 15165 }, { "epoch": 0.7099771783018316, "grad_norm": 1.171875, "learning_rate": 0.00016335172057630146, "loss": 2.2821, "step": 15166 }, { "epoch": 0.7100239920416642, "grad_norm": 1.3515625, "learning_rate": 0.0001633472039591583, "loss": 2.5769, "step": 15167 }, { "epoch": 0.7100708057814968, "grad_norm": 1.5625, "learning_rate": 0.00016334268712616323, "loss": 2.2874, "step": 15168 }, { "epoch": 0.7101176195213295, "grad_norm": 1.3671875, "learning_rate": 0.0001633381700773316, "loss": 2.649, "step": 15169 }, { "epoch": 0.7101644332611622, "grad_norm": 1.5625, "learning_rate": 0.00016333365281267885, "loss": 2.3859, "step": 15170 }, { "epoch": 0.7102112470009948, "grad_norm": 1.796875, "learning_rate": 0.0001633291353322203, "loss": 2.9845, "step": 15171 }, { "epoch": 0.7102580607408274, "grad_norm": 1.46875, "learning_rate": 0.0001633246176359714, "loss": 2.4802, "step": 15172 }, { "epoch": 0.71030487448066, "grad_norm": 1.7734375, "learning_rate": 0.00016332009972394755, "loss": 2.4543, "step": 15173 }, { "epoch": 0.7103516882204927, "grad_norm": 1.7578125, "learning_rate": 0.00016331558159616408, "loss": 2.32, "step": 15174 }, { "epoch": 0.7103985019603254, "grad_norm": 1.578125, "learning_rate": 0.00016331106325263644, "loss": 2.4438, "step": 15175 }, { "epoch": 0.710445315700158, "grad_norm": 1.1796875, "learning_rate": 0.00016330654469338, "loss": 3.2372, "step": 15176 }, { "epoch": 0.7104921294399906, "grad_norm": 1.1875, "learning_rate": 0.0001633020259184102, "loss": 2.7006, "step": 15177 }, { "epoch": 0.7105389431798232, "grad_norm": 1.7578125, "learning_rate": 0.00016329750692774237, "loss": 2.8772, "step": 15178 }, { "epoch": 0.7105857569196559, "grad_norm": 1.5703125, "learning_rate": 0.00016329298772139198, "loss": 2.5123, "step": 15179 }, { "epoch": 0.7106325706594886, "grad_norm": 1.390625, "learning_rate": 0.00016328846829937436, "loss": 2.5318, "step": 15180 }, { "epoch": 0.7106793843993212, "grad_norm": 2.015625, "learning_rate": 0.00016328394866170495, "loss": 2.7223, "step": 15181 }, { "epoch": 0.7107261981391538, "grad_norm": 1.4765625, "learning_rate": 0.00016327942880839914, "loss": 2.4409, "step": 15182 }, { "epoch": 0.7107730118789864, "grad_norm": 1.3671875, "learning_rate": 0.00016327490873947235, "loss": 2.7595, "step": 15183 }, { "epoch": 0.7108198256188192, "grad_norm": 1.484375, "learning_rate": 0.00016327038845493995, "loss": 2.6603, "step": 15184 }, { "epoch": 0.7108666393586518, "grad_norm": 1.453125, "learning_rate": 0.00016326586795481737, "loss": 2.5408, "step": 15185 }, { "epoch": 0.7109134530984844, "grad_norm": 1.34375, "learning_rate": 0.00016326134723912002, "loss": 2.6663, "step": 15186 }, { "epoch": 0.710960266838317, "grad_norm": 1.4921875, "learning_rate": 0.00016325682630786325, "loss": 3.5048, "step": 15187 }, { "epoch": 0.7110070805781497, "grad_norm": 1.6328125, "learning_rate": 0.0001632523051610625, "loss": 2.8074, "step": 15188 }, { "epoch": 0.7110538943179824, "grad_norm": 1.4296875, "learning_rate": 0.0001632477837987332, "loss": 2.5726, "step": 15189 }, { "epoch": 0.711100708057815, "grad_norm": 1.6328125, "learning_rate": 0.00016324326222089073, "loss": 2.819, "step": 15190 }, { "epoch": 0.7111475217976476, "grad_norm": 1.25, "learning_rate": 0.0001632387404275505, "loss": 2.6184, "step": 15191 }, { "epoch": 0.7111943355374802, "grad_norm": 1.453125, "learning_rate": 0.00016323421841872793, "loss": 3.2806, "step": 15192 }, { "epoch": 0.7112411492773129, "grad_norm": 1.8671875, "learning_rate": 0.00016322969619443837, "loss": 2.6224, "step": 15193 }, { "epoch": 0.7112879630171456, "grad_norm": 1.2890625, "learning_rate": 0.0001632251737546973, "loss": 2.5508, "step": 15194 }, { "epoch": 0.7113347767569782, "grad_norm": 1.90625, "learning_rate": 0.0001632206510995201, "loss": 2.6279, "step": 15195 }, { "epoch": 0.7113815904968108, "grad_norm": 1.671875, "learning_rate": 0.0001632161282289222, "loss": 2.4646, "step": 15196 }, { "epoch": 0.7114284042366434, "grad_norm": 1.75, "learning_rate": 0.000163211605142919, "loss": 2.9139, "step": 15197 }, { "epoch": 0.7114752179764761, "grad_norm": 1.4296875, "learning_rate": 0.0001632070818415259, "loss": 2.6883, "step": 15198 }, { "epoch": 0.7115220317163088, "grad_norm": 1.2734375, "learning_rate": 0.00016320255832475832, "loss": 2.6595, "step": 15199 }, { "epoch": 0.7115688454561414, "grad_norm": 4.03125, "learning_rate": 0.00016319803459263166, "loss": 2.6851, "step": 15200 }, { "epoch": 0.711615659195974, "grad_norm": 1.5390625, "learning_rate": 0.00016319351064516136, "loss": 2.7088, "step": 15201 }, { "epoch": 0.7116624729358066, "grad_norm": 1.3671875, "learning_rate": 0.00016318898648236285, "loss": 2.2434, "step": 15202 }, { "epoch": 0.7117092866756393, "grad_norm": 1.421875, "learning_rate": 0.0001631844621042515, "loss": 2.5475, "step": 15203 }, { "epoch": 0.711756100415472, "grad_norm": 1.7109375, "learning_rate": 0.00016317993751084275, "loss": 2.97, "step": 15204 }, { "epoch": 0.7118029141553046, "grad_norm": 1.484375, "learning_rate": 0.00016317541270215202, "loss": 2.7745, "step": 15205 }, { "epoch": 0.7118497278951372, "grad_norm": 1.3046875, "learning_rate": 0.00016317088767819473, "loss": 2.4032, "step": 15206 }, { "epoch": 0.7118965416349698, "grad_norm": 1.390625, "learning_rate": 0.00016316636243898626, "loss": 2.2771, "step": 15207 }, { "epoch": 0.7119433553748025, "grad_norm": 1.328125, "learning_rate": 0.0001631618369845421, "loss": 2.2841, "step": 15208 }, { "epoch": 0.7119901691146352, "grad_norm": 1.4765625, "learning_rate": 0.00016315731131487761, "loss": 2.7984, "step": 15209 }, { "epoch": 0.7120369828544678, "grad_norm": 1.2890625, "learning_rate": 0.00016315278543000824, "loss": 2.7951, "step": 15210 }, { "epoch": 0.7120837965943004, "grad_norm": 1.734375, "learning_rate": 0.0001631482593299494, "loss": 2.1914, "step": 15211 }, { "epoch": 0.712130610334133, "grad_norm": 1.40625, "learning_rate": 0.00016314373301471653, "loss": 2.7566, "step": 15212 }, { "epoch": 0.7121774240739657, "grad_norm": 2.4375, "learning_rate": 0.000163139206484325, "loss": 2.4265, "step": 15213 }, { "epoch": 0.7122242378137984, "grad_norm": 1.7265625, "learning_rate": 0.00016313467973879032, "loss": 2.8572, "step": 15214 }, { "epoch": 0.712271051553631, "grad_norm": 1.6328125, "learning_rate": 0.00016313015277812784, "loss": 2.4551, "step": 15215 }, { "epoch": 0.7123178652934636, "grad_norm": 2.296875, "learning_rate": 0.000163125625602353, "loss": 2.5439, "step": 15216 }, { "epoch": 0.7123646790332963, "grad_norm": 1.84375, "learning_rate": 0.00016312109821148128, "loss": 2.7724, "step": 15217 }, { "epoch": 0.7124114927731289, "grad_norm": 1.4453125, "learning_rate": 0.00016311657060552804, "loss": 2.5922, "step": 15218 }, { "epoch": 0.7124583065129616, "grad_norm": 1.59375, "learning_rate": 0.00016311204278450874, "loss": 2.7345, "step": 15219 }, { "epoch": 0.7125051202527942, "grad_norm": 2.828125, "learning_rate": 0.0001631075147484388, "loss": 2.9063, "step": 15220 }, { "epoch": 0.7125519339926268, "grad_norm": 1.125, "learning_rate": 0.00016310298649733368, "loss": 2.7354, "step": 15221 }, { "epoch": 0.7125987477324595, "grad_norm": 1.40625, "learning_rate": 0.00016309845803120872, "loss": 2.5206, "step": 15222 }, { "epoch": 0.7126455614722921, "grad_norm": 1.484375, "learning_rate": 0.00016309392935007946, "loss": 2.568, "step": 15223 }, { "epoch": 0.7126923752121248, "grad_norm": 1.3828125, "learning_rate": 0.00016308940045396127, "loss": 2.6, "step": 15224 }, { "epoch": 0.7127391889519574, "grad_norm": 1.2421875, "learning_rate": 0.0001630848713428696, "loss": 2.5797, "step": 15225 }, { "epoch": 0.71278600269179, "grad_norm": 1.640625, "learning_rate": 0.00016308034201681986, "loss": 3.2676, "step": 15226 }, { "epoch": 0.7128328164316227, "grad_norm": 2.46875, "learning_rate": 0.0001630758124758275, "loss": 2.6191, "step": 15227 }, { "epoch": 0.7128796301714553, "grad_norm": 2.171875, "learning_rate": 0.000163071282719908, "loss": 2.4692, "step": 15228 }, { "epoch": 0.712926443911288, "grad_norm": 1.171875, "learning_rate": 0.0001630667527490767, "loss": 2.2876, "step": 15229 }, { "epoch": 0.7129732576511206, "grad_norm": 1.7265625, "learning_rate": 0.0001630622225633491, "loss": 2.8835, "step": 15230 }, { "epoch": 0.7130200713909532, "grad_norm": 1.390625, "learning_rate": 0.00016305769216274065, "loss": 2.6996, "step": 15231 }, { "epoch": 0.7130668851307859, "grad_norm": 1.265625, "learning_rate": 0.00016305316154726672, "loss": 2.3174, "step": 15232 }, { "epoch": 0.7131136988706185, "grad_norm": 1.296875, "learning_rate": 0.00016304863071694279, "loss": 3.0359, "step": 15233 }, { "epoch": 0.7131605126104512, "grad_norm": 1.1484375, "learning_rate": 0.0001630440996717843, "loss": 2.5281, "step": 15234 }, { "epoch": 0.7132073263502838, "grad_norm": 1.4765625, "learning_rate": 0.00016303956841180669, "loss": 2.727, "step": 15235 }, { "epoch": 0.7132541400901165, "grad_norm": 1.71875, "learning_rate": 0.0001630350369370254, "loss": 2.7443, "step": 15236 }, { "epoch": 0.7133009538299491, "grad_norm": 2.015625, "learning_rate": 0.00016303050524745584, "loss": 2.8262, "step": 15237 }, { "epoch": 0.7133477675697817, "grad_norm": 1.1796875, "learning_rate": 0.0001630259733431135, "loss": 2.5058, "step": 15238 }, { "epoch": 0.7133945813096144, "grad_norm": 2.046875, "learning_rate": 0.00016302144122401382, "loss": 2.5955, "step": 15239 }, { "epoch": 0.713441395049447, "grad_norm": 1.734375, "learning_rate": 0.0001630169088901722, "loss": 2.7717, "step": 15240 }, { "epoch": 0.7134882087892797, "grad_norm": 1.2109375, "learning_rate": 0.0001630123763416041, "loss": 2.2763, "step": 15241 }, { "epoch": 0.7135350225291123, "grad_norm": 1.15625, "learning_rate": 0.00016300784357832496, "loss": 2.3063, "step": 15242 }, { "epoch": 0.7135818362689449, "grad_norm": 1.4765625, "learning_rate": 0.00016300331060035026, "loss": 2.4867, "step": 15243 }, { "epoch": 0.7136286500087776, "grad_norm": 1.9453125, "learning_rate": 0.0001629987774076954, "loss": 2.7873, "step": 15244 }, { "epoch": 0.7136754637486102, "grad_norm": 1.2265625, "learning_rate": 0.00016299424400037586, "loss": 2.7629, "step": 15245 }, { "epoch": 0.7137222774884429, "grad_norm": 1.546875, "learning_rate": 0.00016298971037840708, "loss": 2.7053, "step": 15246 }, { "epoch": 0.7137690912282755, "grad_norm": 1.921875, "learning_rate": 0.00016298517654180446, "loss": 2.3837, "step": 15247 }, { "epoch": 0.7138159049681081, "grad_norm": 2.171875, "learning_rate": 0.00016298064249058354, "loss": 2.9008, "step": 15248 }, { "epoch": 0.7138627187079408, "grad_norm": 1.3984375, "learning_rate": 0.0001629761082247597, "loss": 2.5848, "step": 15249 }, { "epoch": 0.7139095324477734, "grad_norm": 1.4765625, "learning_rate": 0.00016297157374434843, "loss": 2.9555, "step": 15250 }, { "epoch": 0.7139563461876061, "grad_norm": 1.2109375, "learning_rate": 0.00016296703904936514, "loss": 2.5905, "step": 15251 }, { "epoch": 0.7140031599274387, "grad_norm": 1.2578125, "learning_rate": 0.00016296250413982532, "loss": 2.9934, "step": 15252 }, { "epoch": 0.7140499736672713, "grad_norm": 1.296875, "learning_rate": 0.00016295796901574441, "loss": 2.6279, "step": 15253 }, { "epoch": 0.714096787407104, "grad_norm": 1.125, "learning_rate": 0.00016295343367713784, "loss": 2.4952, "step": 15254 }, { "epoch": 0.7141436011469366, "grad_norm": 1.7421875, "learning_rate": 0.0001629488981240211, "loss": 2.617, "step": 15255 }, { "epoch": 0.7141904148867693, "grad_norm": 1.78125, "learning_rate": 0.00016294436235640963, "loss": 2.9283, "step": 15256 }, { "epoch": 0.7142372286266019, "grad_norm": 1.75, "learning_rate": 0.00016293982637431885, "loss": 2.7894, "step": 15257 }, { "epoch": 0.7142840423664345, "grad_norm": 1.2890625, "learning_rate": 0.0001629352901777643, "loss": 2.4589, "step": 15258 }, { "epoch": 0.7143308561062672, "grad_norm": 2.015625, "learning_rate": 0.00016293075376676134, "loss": 2.4621, "step": 15259 }, { "epoch": 0.7143776698460998, "grad_norm": 1.53125, "learning_rate": 0.0001629262171413255, "loss": 2.2958, "step": 15260 }, { "epoch": 0.7144244835859325, "grad_norm": 1.3046875, "learning_rate": 0.00016292168030147223, "loss": 3.0226, "step": 15261 }, { "epoch": 0.7144712973257651, "grad_norm": 1.484375, "learning_rate": 0.00016291714324721693, "loss": 2.7889, "step": 15262 }, { "epoch": 0.7145181110655977, "grad_norm": 1.75, "learning_rate": 0.00016291260597857513, "loss": 2.9044, "step": 15263 }, { "epoch": 0.7145649248054304, "grad_norm": 1.375, "learning_rate": 0.00016290806849556227, "loss": 2.5416, "step": 15264 }, { "epoch": 0.714611738545263, "grad_norm": 2.734375, "learning_rate": 0.00016290353079819378, "loss": 2.3228, "step": 15265 }, { "epoch": 0.7146585522850957, "grad_norm": 1.328125, "learning_rate": 0.00016289899288648513, "loss": 2.5591, "step": 15266 }, { "epoch": 0.7147053660249283, "grad_norm": 1.59375, "learning_rate": 0.00016289445476045183, "loss": 2.4199, "step": 15267 }, { "epoch": 0.7147521797647609, "grad_norm": 1.3203125, "learning_rate": 0.00016288991642010928, "loss": 2.7128, "step": 15268 }, { "epoch": 0.7147989935045936, "grad_norm": 1.046875, "learning_rate": 0.00016288537786547303, "loss": 2.4818, "step": 15269 }, { "epoch": 0.7148458072444263, "grad_norm": 1.2421875, "learning_rate": 0.00016288083909655842, "loss": 3.5124, "step": 15270 }, { "epoch": 0.7148926209842589, "grad_norm": 1.3203125, "learning_rate": 0.00016287630011338104, "loss": 2.2603, "step": 15271 }, { "epoch": 0.7149394347240915, "grad_norm": 1.5078125, "learning_rate": 0.00016287176091595628, "loss": 2.7219, "step": 15272 }, { "epoch": 0.7149862484639241, "grad_norm": 1.5390625, "learning_rate": 0.00016286722150429963, "loss": 2.909, "step": 15273 }, { "epoch": 0.7150330622037568, "grad_norm": 1.7578125, "learning_rate": 0.00016286268187842655, "loss": 2.6932, "step": 15274 }, { "epoch": 0.7150798759435895, "grad_norm": 1.46875, "learning_rate": 0.00016285814203835253, "loss": 2.61, "step": 15275 }, { "epoch": 0.7151266896834221, "grad_norm": 2.109375, "learning_rate": 0.00016285360198409302, "loss": 2.692, "step": 15276 }, { "epoch": 0.7151735034232547, "grad_norm": 1.5625, "learning_rate": 0.00016284906171566346, "loss": 3.1829, "step": 15277 }, { "epoch": 0.7152203171630873, "grad_norm": 1.4296875, "learning_rate": 0.00016284452123307942, "loss": 2.9432, "step": 15278 }, { "epoch": 0.71526713090292, "grad_norm": 1.1796875, "learning_rate": 0.00016283998053635626, "loss": 2.7943, "step": 15279 }, { "epoch": 0.7153139446427527, "grad_norm": 2.234375, "learning_rate": 0.0001628354396255095, "loss": 2.7376, "step": 15280 }, { "epoch": 0.7153607583825853, "grad_norm": 1.34375, "learning_rate": 0.00016283089850055465, "loss": 2.7274, "step": 15281 }, { "epoch": 0.7154075721224179, "grad_norm": 1.3125, "learning_rate": 0.0001628263571615071, "loss": 2.8341, "step": 15282 }, { "epoch": 0.7154543858622505, "grad_norm": 1.3515625, "learning_rate": 0.00016282181560838238, "loss": 2.7153, "step": 15283 }, { "epoch": 0.7155011996020832, "grad_norm": 1.40625, "learning_rate": 0.00016281727384119597, "loss": 2.3332, "step": 15284 }, { "epoch": 0.7155480133419159, "grad_norm": 3.03125, "learning_rate": 0.0001628127318599633, "loss": 2.3329, "step": 15285 }, { "epoch": 0.7155948270817485, "grad_norm": 1.328125, "learning_rate": 0.0001628081896646999, "loss": 2.6233, "step": 15286 }, { "epoch": 0.7156416408215811, "grad_norm": 1.4375, "learning_rate": 0.00016280364725542126, "loss": 3.0214, "step": 15287 }, { "epoch": 0.7156884545614137, "grad_norm": 1.2578125, "learning_rate": 0.00016279910463214278, "loss": 2.7147, "step": 15288 }, { "epoch": 0.7157352683012465, "grad_norm": 1.5, "learning_rate": 0.00016279456179487997, "loss": 2.6115, "step": 15289 }, { "epoch": 0.7157820820410791, "grad_norm": 1.4296875, "learning_rate": 0.00016279001874364835, "loss": 2.7666, "step": 15290 }, { "epoch": 0.7158288957809117, "grad_norm": 1.8203125, "learning_rate": 0.00016278547547846336, "loss": 2.8272, "step": 15291 }, { "epoch": 0.7158757095207443, "grad_norm": 1.484375, "learning_rate": 0.00016278093199934048, "loss": 2.7101, "step": 15292 }, { "epoch": 0.7159225232605769, "grad_norm": 1.578125, "learning_rate": 0.00016277638830629521, "loss": 2.8464, "step": 15293 }, { "epoch": 0.7159693370004097, "grad_norm": 1.3828125, "learning_rate": 0.00016277184439934304, "loss": 2.0182, "step": 15294 }, { "epoch": 0.7160161507402423, "grad_norm": 3.53125, "learning_rate": 0.0001627673002784994, "loss": 2.1161, "step": 15295 }, { "epoch": 0.7160629644800749, "grad_norm": 1.4765625, "learning_rate": 0.00016276275594377982, "loss": 2.6119, "step": 15296 }, { "epoch": 0.7161097782199075, "grad_norm": 1.234375, "learning_rate": 0.00016275821139519984, "loss": 2.4681, "step": 15297 }, { "epoch": 0.7161565919597401, "grad_norm": 1.625, "learning_rate": 0.00016275366663277483, "loss": 2.8516, "step": 15298 }, { "epoch": 0.7162034056995729, "grad_norm": 1.1171875, "learning_rate": 0.00016274912165652032, "loss": 2.5444, "step": 15299 }, { "epoch": 0.7162502194394055, "grad_norm": 1.4453125, "learning_rate": 0.00016274457646645182, "loss": 2.3193, "step": 15300 }, { "epoch": 0.7162970331792381, "grad_norm": 1.65625, "learning_rate": 0.00016274003106258483, "loss": 2.5729, "step": 15301 }, { "epoch": 0.7163438469190707, "grad_norm": 1.2265625, "learning_rate": 0.00016273548544493477, "loss": 2.6832, "step": 15302 }, { "epoch": 0.7163906606589033, "grad_norm": 1.59375, "learning_rate": 0.00016273093961351718, "loss": 2.7028, "step": 15303 }, { "epoch": 0.7164374743987361, "grad_norm": 1.4921875, "learning_rate": 0.00016272639356834754, "loss": 2.5533, "step": 15304 }, { "epoch": 0.7164842881385687, "grad_norm": 1.75, "learning_rate": 0.00016272184730944136, "loss": 2.1973, "step": 15305 }, { "epoch": 0.7165311018784013, "grad_norm": 1.3046875, "learning_rate": 0.00016271730083681409, "loss": 2.2083, "step": 15306 }, { "epoch": 0.7165779156182339, "grad_norm": 1.3515625, "learning_rate": 0.00016271275415048124, "loss": 2.709, "step": 15307 }, { "epoch": 0.7166247293580666, "grad_norm": 1.2734375, "learning_rate": 0.0001627082072504583, "loss": 2.5535, "step": 15308 }, { "epoch": 0.7166715430978993, "grad_norm": 1.4921875, "learning_rate": 0.00016270366013676078, "loss": 2.5307, "step": 15309 }, { "epoch": 0.7167183568377319, "grad_norm": 1.25, "learning_rate": 0.00016269911280940416, "loss": 2.7532, "step": 15310 }, { "epoch": 0.7167651705775645, "grad_norm": 1.3125, "learning_rate": 0.00016269456526840395, "loss": 2.4944, "step": 15311 }, { "epoch": 0.7168119843173971, "grad_norm": 1.3984375, "learning_rate": 0.00016269001751377562, "loss": 2.5893, "step": 15312 }, { "epoch": 0.7168587980572299, "grad_norm": 1.359375, "learning_rate": 0.00016268546954553467, "loss": 2.5748, "step": 15313 }, { "epoch": 0.7169056117970625, "grad_norm": 1.4375, "learning_rate": 0.0001626809213636966, "loss": 2.4535, "step": 15314 }, { "epoch": 0.7169524255368951, "grad_norm": 1.2734375, "learning_rate": 0.00016267637296827695, "loss": 2.6816, "step": 15315 }, { "epoch": 0.7169992392767277, "grad_norm": 1.4296875, "learning_rate": 0.00016267182435929117, "loss": 2.5955, "step": 15316 }, { "epoch": 0.7170460530165603, "grad_norm": 1.3203125, "learning_rate": 0.00016266727553675478, "loss": 2.5613, "step": 15317 }, { "epoch": 0.717092866756393, "grad_norm": 2.515625, "learning_rate": 0.00016266272650068323, "loss": 2.0798, "step": 15318 }, { "epoch": 0.7171396804962257, "grad_norm": 1.7890625, "learning_rate": 0.0001626581772510921, "loss": 2.3987, "step": 15319 }, { "epoch": 0.7171864942360583, "grad_norm": 1.4921875, "learning_rate": 0.00016265362778799682, "loss": 2.6248, "step": 15320 }, { "epoch": 0.7172333079758909, "grad_norm": 1.1328125, "learning_rate": 0.00016264907811141293, "loss": 2.2415, "step": 15321 }, { "epoch": 0.7172801217157235, "grad_norm": 2.34375, "learning_rate": 0.00016264452822135596, "loss": 2.5539, "step": 15322 }, { "epoch": 0.7173269354555563, "grad_norm": 2.28125, "learning_rate": 0.00016263997811784137, "loss": 2.4336, "step": 15323 }, { "epoch": 0.7173737491953889, "grad_norm": 1.3828125, "learning_rate": 0.00016263542780088467, "loss": 2.5727, "step": 15324 }, { "epoch": 0.7174205629352215, "grad_norm": 2.09375, "learning_rate": 0.00016263087727050137, "loss": 2.5629, "step": 15325 }, { "epoch": 0.7174673766750541, "grad_norm": 1.2734375, "learning_rate": 0.00016262632652670698, "loss": 2.4822, "step": 15326 }, { "epoch": 0.7175141904148867, "grad_norm": 1.3359375, "learning_rate": 0.000162621775569517, "loss": 2.2059, "step": 15327 }, { "epoch": 0.7175610041547195, "grad_norm": 1.203125, "learning_rate": 0.00016261722439894693, "loss": 2.9171, "step": 15328 }, { "epoch": 0.7176078178945521, "grad_norm": 1.4296875, "learning_rate": 0.00016261267301501229, "loss": 2.7608, "step": 15329 }, { "epoch": 0.7176546316343847, "grad_norm": 2.34375, "learning_rate": 0.00016260812141772859, "loss": 2.8254, "step": 15330 }, { "epoch": 0.7177014453742173, "grad_norm": 1.3515625, "learning_rate": 0.00016260356960711133, "loss": 2.3495, "step": 15331 }, { "epoch": 0.7177482591140499, "grad_norm": 1.5078125, "learning_rate": 0.00016259901758317605, "loss": 2.6221, "step": 15332 }, { "epoch": 0.7177950728538827, "grad_norm": 1.671875, "learning_rate": 0.0001625944653459382, "loss": 2.5132, "step": 15333 }, { "epoch": 0.7178418865937153, "grad_norm": 1.296875, "learning_rate": 0.00016258991289541335, "loss": 2.549, "step": 15334 }, { "epoch": 0.7178887003335479, "grad_norm": 1.640625, "learning_rate": 0.00016258536023161697, "loss": 2.2946, "step": 15335 }, { "epoch": 0.7179355140733805, "grad_norm": 1.4921875, "learning_rate": 0.00016258080735456464, "loss": 2.7224, "step": 15336 }, { "epoch": 0.7179823278132131, "grad_norm": 1.28125, "learning_rate": 0.00016257625426427177, "loss": 2.5084, "step": 15337 }, { "epoch": 0.7180291415530459, "grad_norm": 2.015625, "learning_rate": 0.00016257170096075396, "loss": 2.7959, "step": 15338 }, { "epoch": 0.7180759552928785, "grad_norm": 1.453125, "learning_rate": 0.00016256714744402668, "loss": 2.7606, "step": 15339 }, { "epoch": 0.7181227690327111, "grad_norm": 1.421875, "learning_rate": 0.00016256259371410546, "loss": 2.8796, "step": 15340 }, { "epoch": 0.7181695827725437, "grad_norm": 1.3671875, "learning_rate": 0.00016255803977100584, "loss": 2.7825, "step": 15341 }, { "epoch": 0.7182163965123763, "grad_norm": 1.34375, "learning_rate": 0.0001625534856147433, "loss": 2.5822, "step": 15342 }, { "epoch": 0.7182632102522091, "grad_norm": 1.46875, "learning_rate": 0.00016254893124533336, "loss": 2.6441, "step": 15343 }, { "epoch": 0.7183100239920417, "grad_norm": 1.453125, "learning_rate": 0.0001625443766627916, "loss": 2.1368, "step": 15344 }, { "epoch": 0.7183568377318743, "grad_norm": 1.4765625, "learning_rate": 0.00016253982186713347, "loss": 2.8024, "step": 15345 }, { "epoch": 0.7184036514717069, "grad_norm": 1.4140625, "learning_rate": 0.00016253526685837448, "loss": 2.5614, "step": 15346 }, { "epoch": 0.7184504652115395, "grad_norm": 1.6640625, "learning_rate": 0.00016253071163653023, "loss": 3.4089, "step": 15347 }, { "epoch": 0.7184972789513723, "grad_norm": 1.4765625, "learning_rate": 0.00016252615620161614, "loss": 2.6961, "step": 15348 }, { "epoch": 0.7185440926912049, "grad_norm": 1.4375, "learning_rate": 0.00016252160055364783, "loss": 2.5019, "step": 15349 }, { "epoch": 0.7185909064310375, "grad_norm": 2.703125, "learning_rate": 0.00016251704469264077, "loss": 2.4705, "step": 15350 }, { "epoch": 0.7186377201708701, "grad_norm": 1.421875, "learning_rate": 0.0001625124886186105, "loss": 2.3817, "step": 15351 }, { "epoch": 0.7186845339107027, "grad_norm": 1.40625, "learning_rate": 0.0001625079323315725, "loss": 2.5201, "step": 15352 }, { "epoch": 0.7187313476505355, "grad_norm": 1.5078125, "learning_rate": 0.00016250337583154236, "loss": 2.679, "step": 15353 }, { "epoch": 0.7187781613903681, "grad_norm": 1.6875, "learning_rate": 0.00016249881911853563, "loss": 2.8484, "step": 15354 }, { "epoch": 0.7188249751302007, "grad_norm": 1.3203125, "learning_rate": 0.0001624942621925677, "loss": 2.9255, "step": 15355 }, { "epoch": 0.7188717888700333, "grad_norm": 1.1328125, "learning_rate": 0.00016248970505365425, "loss": 2.6182, "step": 15356 }, { "epoch": 0.718918602609866, "grad_norm": 1.390625, "learning_rate": 0.0001624851477018107, "loss": 2.3158, "step": 15357 }, { "epoch": 0.7189654163496987, "grad_norm": 1.5546875, "learning_rate": 0.00016248059013705264, "loss": 2.8876, "step": 15358 }, { "epoch": 0.7190122300895313, "grad_norm": 1.6171875, "learning_rate": 0.00016247603235939557, "loss": 4.0838, "step": 15359 }, { "epoch": 0.7190590438293639, "grad_norm": 1.4140625, "learning_rate": 0.00016247147436885504, "loss": 2.6141, "step": 15360 }, { "epoch": 0.7191058575691965, "grad_norm": 1.5859375, "learning_rate": 0.0001624669161654466, "loss": 2.3109, "step": 15361 }, { "epoch": 0.7191526713090292, "grad_norm": 1.328125, "learning_rate": 0.00016246235774918568, "loss": 2.6086, "step": 15362 }, { "epoch": 0.7191994850488619, "grad_norm": 1.390625, "learning_rate": 0.00016245779912008794, "loss": 2.5783, "step": 15363 }, { "epoch": 0.7192462987886945, "grad_norm": 2.296875, "learning_rate": 0.00016245324027816887, "loss": 2.7242, "step": 15364 }, { "epoch": 0.7192931125285271, "grad_norm": 2.0625, "learning_rate": 0.00016244868122344396, "loss": 3.0058, "step": 15365 }, { "epoch": 0.7193399262683597, "grad_norm": 1.34375, "learning_rate": 0.0001624441219559288, "loss": 2.4279, "step": 15366 }, { "epoch": 0.7193867400081924, "grad_norm": 1.2890625, "learning_rate": 0.0001624395624756389, "loss": 2.6554, "step": 15367 }, { "epoch": 0.7194335537480251, "grad_norm": 1.328125, "learning_rate": 0.00016243500278258978, "loss": 2.6148, "step": 15368 }, { "epoch": 0.7194803674878577, "grad_norm": 1.4375, "learning_rate": 0.00016243044287679703, "loss": 3.1121, "step": 15369 }, { "epoch": 0.7195271812276903, "grad_norm": 1.90625, "learning_rate": 0.00016242588275827612, "loss": 2.7133, "step": 15370 }, { "epoch": 0.719573994967523, "grad_norm": 1.078125, "learning_rate": 0.00016242132242704263, "loss": 2.5121, "step": 15371 }, { "epoch": 0.7196208087073556, "grad_norm": 1.2421875, "learning_rate": 0.0001624167618831121, "loss": 2.4249, "step": 15372 }, { "epoch": 0.7196676224471883, "grad_norm": 1.2890625, "learning_rate": 0.00016241220112650007, "loss": 2.9893, "step": 15373 }, { "epoch": 0.7197144361870209, "grad_norm": 1.859375, "learning_rate": 0.0001624076401572221, "loss": 2.8446, "step": 15374 }, { "epoch": 0.7197612499268535, "grad_norm": 1.2734375, "learning_rate": 0.00016240307897529368, "loss": 2.3356, "step": 15375 }, { "epoch": 0.7198080636666861, "grad_norm": 1.734375, "learning_rate": 0.00016239851758073035, "loss": 2.2007, "step": 15376 }, { "epoch": 0.7198548774065188, "grad_norm": 1.359375, "learning_rate": 0.0001623939559735477, "loss": 2.5149, "step": 15377 }, { "epoch": 0.7199016911463515, "grad_norm": 1.4609375, "learning_rate": 0.00016238939415376123, "loss": 2.6237, "step": 15378 }, { "epoch": 0.7199485048861841, "grad_norm": 1.8984375, "learning_rate": 0.0001623848321213865, "loss": 2.6502, "step": 15379 }, { "epoch": 0.7199953186260167, "grad_norm": 1.890625, "learning_rate": 0.0001623802698764391, "loss": 3.2454, "step": 15380 }, { "epoch": 0.7200421323658494, "grad_norm": 1.546875, "learning_rate": 0.00016237570741893452, "loss": 2.5448, "step": 15381 }, { "epoch": 0.720088946105682, "grad_norm": 1.2890625, "learning_rate": 0.0001623711447488883, "loss": 2.4308, "step": 15382 }, { "epoch": 0.7201357598455147, "grad_norm": 1.625, "learning_rate": 0.00016236658186631606, "loss": 2.7259, "step": 15383 }, { "epoch": 0.7201825735853473, "grad_norm": 1.3984375, "learning_rate": 0.00016236201877123328, "loss": 2.77, "step": 15384 }, { "epoch": 0.7202293873251799, "grad_norm": 1.5390625, "learning_rate": 0.0001623574554636555, "loss": 2.9886, "step": 15385 }, { "epoch": 0.7202762010650126, "grad_norm": 1.328125, "learning_rate": 0.0001623528919435983, "loss": 2.3473, "step": 15386 }, { "epoch": 0.7203230148048452, "grad_norm": 1.3203125, "learning_rate": 0.00016234832821107725, "loss": 2.835, "step": 15387 }, { "epoch": 0.7203698285446779, "grad_norm": 1.4375, "learning_rate": 0.00016234376426610785, "loss": 2.4842, "step": 15388 }, { "epoch": 0.7204166422845105, "grad_norm": 1.78125, "learning_rate": 0.00016233920010870568, "loss": 2.3762, "step": 15389 }, { "epoch": 0.7204634560243431, "grad_norm": 1.3984375, "learning_rate": 0.00016233463573888627, "loss": 2.6052, "step": 15390 }, { "epoch": 0.7205102697641758, "grad_norm": 1.40625, "learning_rate": 0.00016233007115666522, "loss": 2.751, "step": 15391 }, { "epoch": 0.7205570835040084, "grad_norm": 1.1484375, "learning_rate": 0.00016232550636205808, "loss": 2.4403, "step": 15392 }, { "epoch": 0.7206038972438411, "grad_norm": 1.2421875, "learning_rate": 0.00016232094135508037, "loss": 2.4852, "step": 15393 }, { "epoch": 0.7206507109836737, "grad_norm": 1.3203125, "learning_rate": 0.00016231637613574763, "loss": 2.5657, "step": 15394 }, { "epoch": 0.7206975247235063, "grad_norm": 1.1484375, "learning_rate": 0.00016231181070407543, "loss": 2.9527, "step": 15395 }, { "epoch": 0.720744338463339, "grad_norm": 1.3046875, "learning_rate": 0.00016230724506007934, "loss": 2.7903, "step": 15396 }, { "epoch": 0.7207911522031716, "grad_norm": 2.546875, "learning_rate": 0.00016230267920377492, "loss": 2.2785, "step": 15397 }, { "epoch": 0.7208379659430043, "grad_norm": 1.21875, "learning_rate": 0.00016229811313517772, "loss": 2.646, "step": 15398 }, { "epoch": 0.7208847796828369, "grad_norm": 1.3828125, "learning_rate": 0.00016229354685430334, "loss": 2.11, "step": 15399 }, { "epoch": 0.7209315934226695, "grad_norm": 2.234375, "learning_rate": 0.00016228898036116726, "loss": 2.8597, "step": 15400 }, { "epoch": 0.7209784071625022, "grad_norm": 1.8515625, "learning_rate": 0.0001622844136557851, "loss": 2.7972, "step": 15401 }, { "epoch": 0.7210252209023348, "grad_norm": 1.34375, "learning_rate": 0.00016227984673817237, "loss": 2.3456, "step": 15402 }, { "epoch": 0.7210720346421675, "grad_norm": 1.40625, "learning_rate": 0.0001622752796083447, "loss": 2.0566, "step": 15403 }, { "epoch": 0.7211188483820001, "grad_norm": 1.1015625, "learning_rate": 0.00016227071226631757, "loss": 2.6284, "step": 15404 }, { "epoch": 0.7211656621218328, "grad_norm": 1.6171875, "learning_rate": 0.00016226614471210663, "loss": 2.6868, "step": 15405 }, { "epoch": 0.7212124758616654, "grad_norm": 1.4453125, "learning_rate": 0.00016226157694572738, "loss": 2.7505, "step": 15406 }, { "epoch": 0.721259289601498, "grad_norm": 1.2109375, "learning_rate": 0.0001622570089671954, "loss": 2.6367, "step": 15407 }, { "epoch": 0.7213061033413307, "grad_norm": 1.3515625, "learning_rate": 0.00016225244077652627, "loss": 2.0643, "step": 15408 }, { "epoch": 0.7213529170811633, "grad_norm": 1.609375, "learning_rate": 0.00016224787237373555, "loss": 2.512, "step": 15409 }, { "epoch": 0.721399730820996, "grad_norm": 1.6171875, "learning_rate": 0.0001622433037588388, "loss": 2.3384, "step": 15410 }, { "epoch": 0.7214465445608286, "grad_norm": 1.875, "learning_rate": 0.0001622387349318516, "loss": 2.6105, "step": 15411 }, { "epoch": 0.7214933583006612, "grad_norm": 1.1015625, "learning_rate": 0.0001622341658927895, "loss": 2.3442, "step": 15412 }, { "epoch": 0.7215401720404939, "grad_norm": 1.8515625, "learning_rate": 0.00016222959664166807, "loss": 2.2814, "step": 15413 }, { "epoch": 0.7215869857803265, "grad_norm": 1.8671875, "learning_rate": 0.0001622250271785029, "loss": 2.7649, "step": 15414 }, { "epoch": 0.7216337995201592, "grad_norm": 1.6328125, "learning_rate": 0.00016222045750330954, "loss": 2.7014, "step": 15415 }, { "epoch": 0.7216806132599918, "grad_norm": 1.390625, "learning_rate": 0.00016221588761610356, "loss": 1.9963, "step": 15416 }, { "epoch": 0.7217274269998244, "grad_norm": 1.453125, "learning_rate": 0.00016221131751690054, "loss": 2.5513, "step": 15417 }, { "epoch": 0.7217742407396571, "grad_norm": 5.96875, "learning_rate": 0.00016220674720571608, "loss": 3.2466, "step": 15418 }, { "epoch": 0.7218210544794897, "grad_norm": 2.1875, "learning_rate": 0.0001622021766825657, "loss": 3.0746, "step": 15419 }, { "epoch": 0.7218678682193224, "grad_norm": 1.5390625, "learning_rate": 0.000162197605947465, "loss": 2.8366, "step": 15420 }, { "epoch": 0.721914681959155, "grad_norm": 1.4140625, "learning_rate": 0.00016219303500042957, "loss": 2.4906, "step": 15421 }, { "epoch": 0.7219614956989876, "grad_norm": 1.3828125, "learning_rate": 0.00016218846384147493, "loss": 2.8141, "step": 15422 }, { "epoch": 0.7220083094388203, "grad_norm": 1.203125, "learning_rate": 0.0001621838924706167, "loss": 2.4058, "step": 15423 }, { "epoch": 0.722055123178653, "grad_norm": 1.4765625, "learning_rate": 0.00016217932088787048, "loss": 2.9097, "step": 15424 }, { "epoch": 0.7221019369184856, "grad_norm": 1.6875, "learning_rate": 0.00016217474909325182, "loss": 2.7186, "step": 15425 }, { "epoch": 0.7221487506583182, "grad_norm": 1.4296875, "learning_rate": 0.00016217017708677626, "loss": 2.412, "step": 15426 }, { "epoch": 0.7221955643981508, "grad_norm": 1.421875, "learning_rate": 0.00016216560486845944, "loss": 2.5275, "step": 15427 }, { "epoch": 0.7222423781379835, "grad_norm": 1.7890625, "learning_rate": 0.00016216103243831692, "loss": 3.1826, "step": 15428 }, { "epoch": 0.7222891918778162, "grad_norm": 1.5546875, "learning_rate": 0.00016215645979636426, "loss": 2.9223, "step": 15429 }, { "epoch": 0.7223360056176488, "grad_norm": 1.453125, "learning_rate": 0.00016215188694261706, "loss": 2.4891, "step": 15430 }, { "epoch": 0.7223828193574814, "grad_norm": 1.140625, "learning_rate": 0.0001621473138770909, "loss": 2.5622, "step": 15431 }, { "epoch": 0.7224296330973141, "grad_norm": 1.59375, "learning_rate": 0.00016214274059980137, "loss": 2.6101, "step": 15432 }, { "epoch": 0.7224764468371467, "grad_norm": 1.2421875, "learning_rate": 0.00016213816711076401, "loss": 3.4852, "step": 15433 }, { "epoch": 0.7225232605769794, "grad_norm": 1.375, "learning_rate": 0.00016213359340999447, "loss": 2.8676, "step": 15434 }, { "epoch": 0.722570074316812, "grad_norm": 1.1953125, "learning_rate": 0.0001621290194975083, "loss": 2.4931, "step": 15435 }, { "epoch": 0.7226168880566446, "grad_norm": 1.421875, "learning_rate": 0.0001621244453733211, "loss": 2.4503, "step": 15436 }, { "epoch": 0.7226637017964773, "grad_norm": 1.40625, "learning_rate": 0.0001621198710374484, "loss": 2.4543, "step": 15437 }, { "epoch": 0.7227105155363099, "grad_norm": 2.0, "learning_rate": 0.00016211529648990584, "loss": 2.8541, "step": 15438 }, { "epoch": 0.7227573292761426, "grad_norm": 1.3515625, "learning_rate": 0.00016211072173070903, "loss": 2.4248, "step": 15439 }, { "epoch": 0.7228041430159752, "grad_norm": 1.3984375, "learning_rate": 0.0001621061467598735, "loss": 2.6851, "step": 15440 }, { "epoch": 0.7228509567558078, "grad_norm": 1.3125, "learning_rate": 0.00016210157157741487, "loss": 2.728, "step": 15441 }, { "epoch": 0.7228977704956405, "grad_norm": 1.5390625, "learning_rate": 0.00016209699618334873, "loss": 2.532, "step": 15442 }, { "epoch": 0.7229445842354731, "grad_norm": 1.75, "learning_rate": 0.00016209242057769067, "loss": 2.111, "step": 15443 }, { "epoch": 0.7229913979753058, "grad_norm": 1.328125, "learning_rate": 0.00016208784476045627, "loss": 2.8192, "step": 15444 }, { "epoch": 0.7230382117151384, "grad_norm": 1.390625, "learning_rate": 0.00016208326873166112, "loss": 2.7472, "step": 15445 }, { "epoch": 0.723085025454971, "grad_norm": 1.8828125, "learning_rate": 0.0001620786924913208, "loss": 2.5774, "step": 15446 }, { "epoch": 0.7231318391948037, "grad_norm": 1.96875, "learning_rate": 0.00016207411603945095, "loss": 2.6919, "step": 15447 }, { "epoch": 0.7231786529346363, "grad_norm": 1.1484375, "learning_rate": 0.00016206953937606715, "loss": 2.5629, "step": 15448 }, { "epoch": 0.723225466674469, "grad_norm": 1.90625, "learning_rate": 0.000162064962501185, "loss": 2.7083, "step": 15449 }, { "epoch": 0.7232722804143016, "grad_norm": 1.8046875, "learning_rate": 0.00016206038541482002, "loss": 3.0052, "step": 15450 }, { "epoch": 0.7233190941541342, "grad_norm": 1.296875, "learning_rate": 0.0001620558081169879, "loss": 2.5684, "step": 15451 }, { "epoch": 0.7233659078939669, "grad_norm": 1.421875, "learning_rate": 0.0001620512306077042, "loss": 2.9155, "step": 15452 }, { "epoch": 0.7234127216337995, "grad_norm": 1.1796875, "learning_rate": 0.00016204665288698452, "loss": 2.8153, "step": 15453 }, { "epoch": 0.7234595353736322, "grad_norm": 2.046875, "learning_rate": 0.00016204207495484446, "loss": 2.0617, "step": 15454 }, { "epoch": 0.7235063491134648, "grad_norm": 1.609375, "learning_rate": 0.00016203749681129956, "loss": 2.5237, "step": 15455 }, { "epoch": 0.7235531628532974, "grad_norm": 1.625, "learning_rate": 0.0001620329184563655, "loss": 2.6903, "step": 15456 }, { "epoch": 0.7235999765931301, "grad_norm": 1.328125, "learning_rate": 0.0001620283398900579, "loss": 2.617, "step": 15457 }, { "epoch": 0.7236467903329628, "grad_norm": 3.515625, "learning_rate": 0.00016202376111239229, "loss": 2.7248, "step": 15458 }, { "epoch": 0.7236936040727954, "grad_norm": 1.5859375, "learning_rate": 0.0001620191821233843, "loss": 2.5479, "step": 15459 }, { "epoch": 0.723740417812628, "grad_norm": 2.40625, "learning_rate": 0.00016201460292304955, "loss": 1.9348, "step": 15460 }, { "epoch": 0.7237872315524606, "grad_norm": 1.859375, "learning_rate": 0.0001620100235114036, "loss": 2.2293, "step": 15461 }, { "epoch": 0.7238340452922933, "grad_norm": 1.796875, "learning_rate": 0.00016200544388846207, "loss": 2.1086, "step": 15462 }, { "epoch": 0.723880859032126, "grad_norm": 1.421875, "learning_rate": 0.0001620008640542406, "loss": 2.7084, "step": 15463 }, { "epoch": 0.7239276727719586, "grad_norm": 1.6171875, "learning_rate": 0.00016199628400875477, "loss": 2.7867, "step": 15464 }, { "epoch": 0.7239744865117912, "grad_norm": 1.4375, "learning_rate": 0.00016199170375202017, "loss": 2.2813, "step": 15465 }, { "epoch": 0.7240213002516238, "grad_norm": 1.609375, "learning_rate": 0.00016198712328405242, "loss": 2.7123, "step": 15466 }, { "epoch": 0.7240681139914565, "grad_norm": 1.015625, "learning_rate": 0.00016198254260486715, "loss": 2.5961, "step": 15467 }, { "epoch": 0.7241149277312892, "grad_norm": 1.390625, "learning_rate": 0.00016197796171447994, "loss": 3.0383, "step": 15468 }, { "epoch": 0.7241617414711218, "grad_norm": 1.6015625, "learning_rate": 0.00016197338061290642, "loss": 2.7995, "step": 15469 }, { "epoch": 0.7242085552109544, "grad_norm": 1.5859375, "learning_rate": 0.00016196879930016218, "loss": 2.477, "step": 15470 }, { "epoch": 0.724255368950787, "grad_norm": 1.2109375, "learning_rate": 0.0001619642177762628, "loss": 2.6462, "step": 15471 }, { "epoch": 0.7243021826906197, "grad_norm": 1.75, "learning_rate": 0.00016195963604122396, "loss": 2.8556, "step": 15472 }, { "epoch": 0.7243489964304524, "grad_norm": 1.875, "learning_rate": 0.00016195505409506125, "loss": 2.4654, "step": 15473 }, { "epoch": 0.724395810170285, "grad_norm": 1.1875, "learning_rate": 0.00016195047193779026, "loss": 1.8282, "step": 15474 }, { "epoch": 0.7244426239101176, "grad_norm": 1.2578125, "learning_rate": 0.00016194588956942662, "loss": 2.1503, "step": 15475 }, { "epoch": 0.7244894376499502, "grad_norm": 1.1796875, "learning_rate": 0.00016194130698998595, "loss": 2.4647, "step": 15476 }, { "epoch": 0.724536251389783, "grad_norm": 1.6953125, "learning_rate": 0.00016193672419948386, "loss": 2.4473, "step": 15477 }, { "epoch": 0.7245830651296156, "grad_norm": 1.5625, "learning_rate": 0.000161932141197936, "loss": 2.9692, "step": 15478 }, { "epoch": 0.7246298788694482, "grad_norm": 1.671875, "learning_rate": 0.00016192755798535787, "loss": 2.8711, "step": 15479 }, { "epoch": 0.7246766926092808, "grad_norm": 2.03125, "learning_rate": 0.0001619229745617652, "loss": 2.745, "step": 15480 }, { "epoch": 0.7247235063491134, "grad_norm": 1.703125, "learning_rate": 0.00016191839092717357, "loss": 2.479, "step": 15481 }, { "epoch": 0.7247703200889462, "grad_norm": 1.7421875, "learning_rate": 0.0001619138070815986, "loss": 2.7341, "step": 15482 }, { "epoch": 0.7248171338287788, "grad_norm": 1.4609375, "learning_rate": 0.00016190922302505593, "loss": 2.4065, "step": 15483 }, { "epoch": 0.7248639475686114, "grad_norm": 1.0625, "learning_rate": 0.00016190463875756115, "loss": 3.7704, "step": 15484 }, { "epoch": 0.724910761308444, "grad_norm": 1.546875, "learning_rate": 0.0001619000542791299, "loss": 2.5934, "step": 15485 }, { "epoch": 0.7249575750482766, "grad_norm": 1.4375, "learning_rate": 0.00016189546958977777, "loss": 2.583, "step": 15486 }, { "epoch": 0.7250043887881094, "grad_norm": 1.9921875, "learning_rate": 0.0001618908846895204, "loss": 2.7514, "step": 15487 }, { "epoch": 0.725051202527942, "grad_norm": 1.2109375, "learning_rate": 0.00016188629957837348, "loss": 2.7954, "step": 15488 }, { "epoch": 0.7250980162677746, "grad_norm": 1.921875, "learning_rate": 0.0001618817142563525, "loss": 2.5919, "step": 15489 }, { "epoch": 0.7251448300076072, "grad_norm": 1.9296875, "learning_rate": 0.00016187712872347317, "loss": 2.9928, "step": 15490 }, { "epoch": 0.7251916437474398, "grad_norm": 1.640625, "learning_rate": 0.00016187254297975114, "loss": 2.9457, "step": 15491 }, { "epoch": 0.7252384574872726, "grad_norm": 1.8515625, "learning_rate": 0.00016186795702520198, "loss": 2.548, "step": 15492 }, { "epoch": 0.7252852712271052, "grad_norm": 1.5859375, "learning_rate": 0.0001618633708598413, "loss": 2.7336, "step": 15493 }, { "epoch": 0.7253320849669378, "grad_norm": 1.2578125, "learning_rate": 0.0001618587844836848, "loss": 2.6912, "step": 15494 }, { "epoch": 0.7253788987067704, "grad_norm": 1.5234375, "learning_rate": 0.00016185419789674805, "loss": 2.7159, "step": 15495 }, { "epoch": 0.725425712446603, "grad_norm": 1.5390625, "learning_rate": 0.0001618496110990467, "loss": 2.8589, "step": 15496 }, { "epoch": 0.7254725261864358, "grad_norm": 1.1328125, "learning_rate": 0.00016184502409059636, "loss": 2.398, "step": 15497 }, { "epoch": 0.7255193399262684, "grad_norm": 1.4609375, "learning_rate": 0.00016184043687141267, "loss": 2.6567, "step": 15498 }, { "epoch": 0.725566153666101, "grad_norm": 1.28125, "learning_rate": 0.0001618358494415113, "loss": 2.6798, "step": 15499 }, { "epoch": 0.7256129674059336, "grad_norm": 1.1796875, "learning_rate": 0.00016183126180090783, "loss": 3.0664, "step": 15500 }, { "epoch": 0.7256597811457662, "grad_norm": 1.4375, "learning_rate": 0.00016182667394961793, "loss": 2.3544, "step": 15501 }, { "epoch": 0.725706594885599, "grad_norm": 1.6015625, "learning_rate": 0.00016182208588765717, "loss": 2.6808, "step": 15502 }, { "epoch": 0.7257534086254316, "grad_norm": 1.171875, "learning_rate": 0.00016181749761504127, "loss": 2.5834, "step": 15503 }, { "epoch": 0.7258002223652642, "grad_norm": 1.484375, "learning_rate": 0.0001618129091317858, "loss": 2.2585, "step": 15504 }, { "epoch": 0.7258470361050968, "grad_norm": 1.875, "learning_rate": 0.0001618083204379064, "loss": 2.8506, "step": 15505 }, { "epoch": 0.7258938498449294, "grad_norm": 1.34375, "learning_rate": 0.00016180373153341872, "loss": 2.7898, "step": 15506 }, { "epoch": 0.7259406635847622, "grad_norm": 2.25, "learning_rate": 0.00016179914241833842, "loss": 2.6544, "step": 15507 }, { "epoch": 0.7259874773245948, "grad_norm": 1.40625, "learning_rate": 0.00016179455309268113, "loss": 2.6356, "step": 15508 }, { "epoch": 0.7260342910644274, "grad_norm": 1.328125, "learning_rate": 0.00016178996355646242, "loss": 2.7203, "step": 15509 }, { "epoch": 0.72608110480426, "grad_norm": 1.4765625, "learning_rate": 0.00016178537380969802, "loss": 2.3879, "step": 15510 }, { "epoch": 0.7261279185440926, "grad_norm": 1.5, "learning_rate": 0.00016178078385240352, "loss": 2.5918, "step": 15511 }, { "epoch": 0.7261747322839254, "grad_norm": 1.5, "learning_rate": 0.00016177619368459454, "loss": 2.8385, "step": 15512 }, { "epoch": 0.726221546023758, "grad_norm": 1.5234375, "learning_rate": 0.00016177160330628677, "loss": 2.7918, "step": 15513 }, { "epoch": 0.7262683597635906, "grad_norm": 1.3046875, "learning_rate": 0.00016176701271749586, "loss": 2.2797, "step": 15514 }, { "epoch": 0.7263151735034232, "grad_norm": 1.296875, "learning_rate": 0.0001617624219182374, "loss": 2.7835, "step": 15515 }, { "epoch": 0.7263619872432558, "grad_norm": 1.578125, "learning_rate": 0.00016175783090852707, "loss": 2.4696, "step": 15516 }, { "epoch": 0.7264088009830886, "grad_norm": 1.7421875, "learning_rate": 0.00016175323968838052, "loss": 2.4141, "step": 15517 }, { "epoch": 0.7264556147229212, "grad_norm": 1.53125, "learning_rate": 0.00016174864825781334, "loss": 2.5445, "step": 15518 }, { "epoch": 0.7265024284627538, "grad_norm": 1.7265625, "learning_rate": 0.0001617440566168412, "loss": 2.5134, "step": 15519 }, { "epoch": 0.7265492422025864, "grad_norm": 1.3359375, "learning_rate": 0.00016173946476547978, "loss": 2.4746, "step": 15520 }, { "epoch": 0.726596055942419, "grad_norm": 1.75, "learning_rate": 0.0001617348727037447, "loss": 3.0819, "step": 15521 }, { "epoch": 0.7266428696822518, "grad_norm": 4.25, "learning_rate": 0.00016173028043165163, "loss": 2.3384, "step": 15522 }, { "epoch": 0.7266896834220844, "grad_norm": 1.484375, "learning_rate": 0.00016172568794921616, "loss": 2.5618, "step": 15523 }, { "epoch": 0.726736497161917, "grad_norm": 1.46875, "learning_rate": 0.000161721095256454, "loss": 2.4302, "step": 15524 }, { "epoch": 0.7267833109017496, "grad_norm": 1.5859375, "learning_rate": 0.00016171650235338074, "loss": 2.6151, "step": 15525 }, { "epoch": 0.7268301246415823, "grad_norm": 1.265625, "learning_rate": 0.00016171190924001212, "loss": 2.4919, "step": 15526 }, { "epoch": 0.726876938381415, "grad_norm": 1.828125, "learning_rate": 0.0001617073159163637, "loss": 2.4158, "step": 15527 }, { "epoch": 0.7269237521212476, "grad_norm": 1.34375, "learning_rate": 0.0001617027223824512, "loss": 2.5359, "step": 15528 }, { "epoch": 0.7269705658610802, "grad_norm": 1.546875, "learning_rate": 0.0001616981286382902, "loss": 2.7153, "step": 15529 }, { "epoch": 0.7270173796009128, "grad_norm": 1.3515625, "learning_rate": 0.0001616935346838964, "loss": 2.6318, "step": 15530 }, { "epoch": 0.7270641933407455, "grad_norm": 1.4453125, "learning_rate": 0.00016168894051928547, "loss": 2.7268, "step": 15531 }, { "epoch": 0.7271110070805782, "grad_norm": 1.21875, "learning_rate": 0.00016168434614447302, "loss": 2.3406, "step": 15532 }, { "epoch": 0.7271578208204108, "grad_norm": 1.6640625, "learning_rate": 0.00016167975155947472, "loss": 2.3643, "step": 15533 }, { "epoch": 0.7272046345602434, "grad_norm": 1.4296875, "learning_rate": 0.00016167515676430627, "loss": 2.6508, "step": 15534 }, { "epoch": 0.727251448300076, "grad_norm": 1.2109375, "learning_rate": 0.00016167056175898329, "loss": 2.6372, "step": 15535 }, { "epoch": 0.7272982620399087, "grad_norm": 1.7421875, "learning_rate": 0.0001616659665435214, "loss": 2.6164, "step": 15536 }, { "epoch": 0.7273450757797414, "grad_norm": 2.125, "learning_rate": 0.00016166137111793628, "loss": 2.2187, "step": 15537 }, { "epoch": 0.727391889519574, "grad_norm": 1.46875, "learning_rate": 0.00016165677548224362, "loss": 2.5652, "step": 15538 }, { "epoch": 0.7274387032594066, "grad_norm": 2.125, "learning_rate": 0.00016165217963645906, "loss": 3.0267, "step": 15539 }, { "epoch": 0.7274855169992392, "grad_norm": 1.34375, "learning_rate": 0.00016164758358059827, "loss": 2.6954, "step": 15540 }, { "epoch": 0.7275323307390719, "grad_norm": 1.3828125, "learning_rate": 0.0001616429873146769, "loss": 2.7067, "step": 15541 }, { "epoch": 0.7275791444789046, "grad_norm": 1.2734375, "learning_rate": 0.00016163839083871062, "loss": 2.4301, "step": 15542 }, { "epoch": 0.7276259582187372, "grad_norm": 1.3359375, "learning_rate": 0.00016163379415271508, "loss": 2.7112, "step": 15543 }, { "epoch": 0.7276727719585698, "grad_norm": 1.4453125, "learning_rate": 0.00016162919725670594, "loss": 2.7741, "step": 15544 }, { "epoch": 0.7277195856984024, "grad_norm": 1.296875, "learning_rate": 0.0001616246001506989, "loss": 2.5788, "step": 15545 }, { "epoch": 0.7277663994382351, "grad_norm": 1.109375, "learning_rate": 0.0001616200028347096, "loss": 2.7078, "step": 15546 }, { "epoch": 0.7278132131780678, "grad_norm": 1.40625, "learning_rate": 0.00016161540530875364, "loss": 2.8357, "step": 15547 }, { "epoch": 0.7278600269179004, "grad_norm": 1.5859375, "learning_rate": 0.0001616108075728468, "loss": 2.9459, "step": 15548 }, { "epoch": 0.727906840657733, "grad_norm": 1.1328125, "learning_rate": 0.0001616062096270047, "loss": 2.5342, "step": 15549 }, { "epoch": 0.7279536543975657, "grad_norm": 1.3125, "learning_rate": 0.00016160161147124298, "loss": 2.6439, "step": 15550 }, { "epoch": 0.7280004681373983, "grad_norm": 2.890625, "learning_rate": 0.00016159701310557734, "loss": 3.0411, "step": 15551 }, { "epoch": 0.728047281877231, "grad_norm": 1.625, "learning_rate": 0.00016159241453002343, "loss": 2.7192, "step": 15552 }, { "epoch": 0.7280940956170636, "grad_norm": 1.2734375, "learning_rate": 0.0001615878157445969, "loss": 2.379, "step": 15553 }, { "epoch": 0.7281409093568962, "grad_norm": 1.90625, "learning_rate": 0.0001615832167493135, "loss": 2.8197, "step": 15554 }, { "epoch": 0.7281877230967289, "grad_norm": 1.3828125, "learning_rate": 0.00016157861754418883, "loss": 2.6313, "step": 15555 }, { "epoch": 0.7282345368365616, "grad_norm": 1.65625, "learning_rate": 0.00016157401812923858, "loss": 2.6165, "step": 15556 }, { "epoch": 0.7282813505763942, "grad_norm": 1.0390625, "learning_rate": 0.0001615694185044784, "loss": 4.4257, "step": 15557 }, { "epoch": 0.7283281643162268, "grad_norm": 1.421875, "learning_rate": 0.00016156481866992403, "loss": 2.6756, "step": 15558 }, { "epoch": 0.7283749780560594, "grad_norm": 1.5390625, "learning_rate": 0.00016156021862559107, "loss": 2.8259, "step": 15559 }, { "epoch": 0.7284217917958921, "grad_norm": 1.609375, "learning_rate": 0.00016155561837149525, "loss": 2.9743, "step": 15560 }, { "epoch": 0.7284686055357248, "grad_norm": 1.078125, "learning_rate": 0.00016155101790765218, "loss": 2.2884, "step": 15561 }, { "epoch": 0.7285154192755574, "grad_norm": 1.875, "learning_rate": 0.00016154641723407757, "loss": 2.2991, "step": 15562 }, { "epoch": 0.72856223301539, "grad_norm": 2.015625, "learning_rate": 0.00016154181635078714, "loss": 2.5753, "step": 15563 }, { "epoch": 0.7286090467552226, "grad_norm": 1.53125, "learning_rate": 0.00016153721525779651, "loss": 2.3603, "step": 15564 }, { "epoch": 0.7286558604950553, "grad_norm": 1.46875, "learning_rate": 0.00016153261395512137, "loss": 2.4448, "step": 15565 }, { "epoch": 0.728702674234888, "grad_norm": 1.7890625, "learning_rate": 0.00016152801244277742, "loss": 2.6792, "step": 15566 }, { "epoch": 0.7287494879747206, "grad_norm": 1.359375, "learning_rate": 0.00016152341072078034, "loss": 2.8383, "step": 15567 }, { "epoch": 0.7287963017145532, "grad_norm": 1.6015625, "learning_rate": 0.00016151880878914574, "loss": 3.1064, "step": 15568 }, { "epoch": 0.7288431154543858, "grad_norm": 1.2109375, "learning_rate": 0.0001615142066478894, "loss": 2.3577, "step": 15569 }, { "epoch": 0.7288899291942185, "grad_norm": 1.359375, "learning_rate": 0.00016150960429702692, "loss": 2.512, "step": 15570 }, { "epoch": 0.7289367429340512, "grad_norm": 1.171875, "learning_rate": 0.00016150500173657407, "loss": 2.5158, "step": 15571 }, { "epoch": 0.7289835566738838, "grad_norm": 1.359375, "learning_rate": 0.0001615003989665464, "loss": 2.4486, "step": 15572 }, { "epoch": 0.7290303704137164, "grad_norm": 1.5625, "learning_rate": 0.00016149579598695977, "loss": 2.3776, "step": 15573 }, { "epoch": 0.729077184153549, "grad_norm": 1.9375, "learning_rate": 0.00016149119279782972, "loss": 2.8516, "step": 15574 }, { "epoch": 0.7291239978933817, "grad_norm": 1.640625, "learning_rate": 0.00016148658939917198, "loss": 2.8741, "step": 15575 }, { "epoch": 0.7291708116332144, "grad_norm": 1.3125, "learning_rate": 0.00016148198579100224, "loss": 3.5082, "step": 15576 }, { "epoch": 0.729217625373047, "grad_norm": 1.1875, "learning_rate": 0.00016147738197333622, "loss": 2.6266, "step": 15577 }, { "epoch": 0.7292644391128796, "grad_norm": 1.9921875, "learning_rate": 0.00016147277794618952, "loss": 2.3932, "step": 15578 }, { "epoch": 0.7293112528527123, "grad_norm": 1.3046875, "learning_rate": 0.00016146817370957795, "loss": 2.6762, "step": 15579 }, { "epoch": 0.7293580665925449, "grad_norm": 1.453125, "learning_rate": 0.00016146356926351709, "loss": 2.425, "step": 15580 }, { "epoch": 0.7294048803323776, "grad_norm": 1.640625, "learning_rate": 0.00016145896460802267, "loss": 3.0475, "step": 15581 }, { "epoch": 0.7294516940722102, "grad_norm": 1.109375, "learning_rate": 0.00016145435974311039, "loss": 2.5325, "step": 15582 }, { "epoch": 0.7294985078120428, "grad_norm": 1.453125, "learning_rate": 0.00016144975466879594, "loss": 2.7272, "step": 15583 }, { "epoch": 0.7295453215518755, "grad_norm": 1.7890625, "learning_rate": 0.00016144514938509498, "loss": 2.8007, "step": 15584 }, { "epoch": 0.7295921352917081, "grad_norm": 1.5078125, "learning_rate": 0.00016144054389202324, "loss": 1.8256, "step": 15585 }, { "epoch": 0.7296389490315408, "grad_norm": 1.734375, "learning_rate": 0.0001614359381895964, "loss": 2.4709, "step": 15586 }, { "epoch": 0.7296857627713734, "grad_norm": 1.8515625, "learning_rate": 0.00016143133227783016, "loss": 2.635, "step": 15587 }, { "epoch": 0.729732576511206, "grad_norm": 1.4140625, "learning_rate": 0.0001614267261567402, "loss": 2.4982, "step": 15588 }, { "epoch": 0.7297793902510387, "grad_norm": 2.703125, "learning_rate": 0.0001614221198263422, "loss": 2.1839, "step": 15589 }, { "epoch": 0.7298262039908713, "grad_norm": 1.3359375, "learning_rate": 0.00016141751328665186, "loss": 2.7212, "step": 15590 }, { "epoch": 0.729873017730704, "grad_norm": 2.0, "learning_rate": 0.00016141290653768495, "loss": 2.7404, "step": 15591 }, { "epoch": 0.7299198314705366, "grad_norm": 2.703125, "learning_rate": 0.0001614082995794571, "loss": 2.0224, "step": 15592 }, { "epoch": 0.7299666452103692, "grad_norm": 1.484375, "learning_rate": 0.000161403692411984, "loss": 2.899, "step": 15593 }, { "epoch": 0.7300134589502019, "grad_norm": 1.328125, "learning_rate": 0.0001613990850352814, "loss": 2.7414, "step": 15594 }, { "epoch": 0.7300602726900345, "grad_norm": 1.5, "learning_rate": 0.0001613944774493649, "loss": 2.5678, "step": 15595 }, { "epoch": 0.7301070864298672, "grad_norm": 1.453125, "learning_rate": 0.00016138986965425031, "loss": 2.6987, "step": 15596 }, { "epoch": 0.7301539001696998, "grad_norm": 2.90625, "learning_rate": 0.00016138526164995328, "loss": 2.7665, "step": 15597 }, { "epoch": 0.7302007139095325, "grad_norm": 1.15625, "learning_rate": 0.00016138065343648955, "loss": 2.6009, "step": 15598 }, { "epoch": 0.7302475276493651, "grad_norm": 1.1796875, "learning_rate": 0.00016137604501387473, "loss": 2.4162, "step": 15599 }, { "epoch": 0.7302943413891977, "grad_norm": 1.5078125, "learning_rate": 0.0001613714363821246, "loss": 2.6919, "step": 15600 }, { "epoch": 0.7303411551290304, "grad_norm": 1.1875, "learning_rate": 0.0001613668275412549, "loss": 2.4487, "step": 15601 }, { "epoch": 0.730387968868863, "grad_norm": 2.0625, "learning_rate": 0.00016136221849128126, "loss": 2.3068, "step": 15602 }, { "epoch": 0.7304347826086957, "grad_norm": 1.203125, "learning_rate": 0.00016135760923221938, "loss": 2.3285, "step": 15603 }, { "epoch": 0.7304815963485283, "grad_norm": 1.765625, "learning_rate": 0.00016135299976408502, "loss": 2.3064, "step": 15604 }, { "epoch": 0.7305284100883609, "grad_norm": 1.328125, "learning_rate": 0.00016134839008689384, "loss": 2.4111, "step": 15605 }, { "epoch": 0.7305752238281936, "grad_norm": 1.3515625, "learning_rate": 0.00016134378020066155, "loss": 2.6043, "step": 15606 }, { "epoch": 0.7306220375680262, "grad_norm": 1.5859375, "learning_rate": 0.00016133917010540395, "loss": 2.7884, "step": 15607 }, { "epoch": 0.7306688513078589, "grad_norm": 1.7734375, "learning_rate": 0.0001613345598011366, "loss": 2.3315, "step": 15608 }, { "epoch": 0.7307156650476915, "grad_norm": 1.1875, "learning_rate": 0.00016132994928787532, "loss": 2.6049, "step": 15609 }, { "epoch": 0.7307624787875241, "grad_norm": 1.421875, "learning_rate": 0.00016132533856563574, "loss": 2.3752, "step": 15610 }, { "epoch": 0.7308092925273568, "grad_norm": 1.4296875, "learning_rate": 0.00016132072763443364, "loss": 2.1022, "step": 15611 }, { "epoch": 0.7308561062671894, "grad_norm": 1.4453125, "learning_rate": 0.00016131611649428473, "loss": 2.7908, "step": 15612 }, { "epoch": 0.7309029200070221, "grad_norm": 1.1640625, "learning_rate": 0.00016131150514520467, "loss": 1.8784, "step": 15613 }, { "epoch": 0.7309497337468547, "grad_norm": 1.3046875, "learning_rate": 0.0001613068935872092, "loss": 2.5323, "step": 15614 }, { "epoch": 0.7309965474866873, "grad_norm": 1.5390625, "learning_rate": 0.00016130228182031407, "loss": 2.8956, "step": 15615 }, { "epoch": 0.73104336122652, "grad_norm": 1.359375, "learning_rate": 0.0001612976698445349, "loss": 2.4814, "step": 15616 }, { "epoch": 0.7310901749663526, "grad_norm": 1.2734375, "learning_rate": 0.0001612930576598875, "loss": 2.3414, "step": 15617 }, { "epoch": 0.7311369887061853, "grad_norm": 1.4296875, "learning_rate": 0.00016128844526638753, "loss": 2.4626, "step": 15618 }, { "epoch": 0.7311838024460179, "grad_norm": 2.078125, "learning_rate": 0.00016128383266405074, "loss": 2.5146, "step": 15619 }, { "epoch": 0.7312306161858505, "grad_norm": 1.3125, "learning_rate": 0.00016127921985289282, "loss": 2.5054, "step": 15620 }, { "epoch": 0.7312774299256832, "grad_norm": 1.21875, "learning_rate": 0.00016127460683292952, "loss": 2.3786, "step": 15621 }, { "epoch": 0.7313242436655158, "grad_norm": 1.1875, "learning_rate": 0.00016126999360417655, "loss": 2.3755, "step": 15622 }, { "epoch": 0.7313710574053485, "grad_norm": 1.5390625, "learning_rate": 0.00016126538016664958, "loss": 2.3707, "step": 15623 }, { "epoch": 0.7314178711451811, "grad_norm": 1.3046875, "learning_rate": 0.0001612607665203644, "loss": 2.7148, "step": 15624 }, { "epoch": 0.7314646848850137, "grad_norm": 1.2734375, "learning_rate": 0.00016125615266533667, "loss": 2.2622, "step": 15625 }, { "epoch": 0.7315114986248464, "grad_norm": 1.15625, "learning_rate": 0.00016125153860158212, "loss": 2.5642, "step": 15626 }, { "epoch": 0.731558312364679, "grad_norm": 1.328125, "learning_rate": 0.00016124692432911654, "loss": 2.553, "step": 15627 }, { "epoch": 0.7316051261045117, "grad_norm": 1.4765625, "learning_rate": 0.00016124230984795561, "loss": 2.6414, "step": 15628 }, { "epoch": 0.7316519398443443, "grad_norm": 1.8828125, "learning_rate": 0.00016123769515811502, "loss": 3.0745, "step": 15629 }, { "epoch": 0.7316987535841769, "grad_norm": 1.6640625, "learning_rate": 0.00016123308025961053, "loss": 2.6318, "step": 15630 }, { "epoch": 0.7317455673240096, "grad_norm": 1.296875, "learning_rate": 0.0001612284651524579, "loss": 2.5999, "step": 15631 }, { "epoch": 0.7317923810638423, "grad_norm": 1.4609375, "learning_rate": 0.00016122384983667277, "loss": 2.7004, "step": 15632 }, { "epoch": 0.7318391948036749, "grad_norm": 1.4296875, "learning_rate": 0.0001612192343122709, "loss": 2.6621, "step": 15633 }, { "epoch": 0.7318860085435075, "grad_norm": 1.4296875, "learning_rate": 0.00016121461857926804, "loss": 3.0577, "step": 15634 }, { "epoch": 0.7319328222833401, "grad_norm": 1.484375, "learning_rate": 0.00016121000263767993, "loss": 2.7436, "step": 15635 }, { "epoch": 0.7319796360231728, "grad_norm": 1.3984375, "learning_rate": 0.00016120538648752226, "loss": 2.5476, "step": 15636 }, { "epoch": 0.7320264497630055, "grad_norm": 1.4296875, "learning_rate": 0.00016120077012881073, "loss": 2.7183, "step": 15637 }, { "epoch": 0.7320732635028381, "grad_norm": 1.3359375, "learning_rate": 0.00016119615356156118, "loss": 2.8868, "step": 15638 }, { "epoch": 0.7321200772426707, "grad_norm": 2.328125, "learning_rate": 0.00016119153678578925, "loss": 2.5228, "step": 15639 }, { "epoch": 0.7321668909825033, "grad_norm": 4.84375, "learning_rate": 0.0001611869198015107, "loss": 3.2761, "step": 15640 }, { "epoch": 0.732213704722336, "grad_norm": 1.5703125, "learning_rate": 0.00016118230260874126, "loss": 2.5483, "step": 15641 }, { "epoch": 0.7322605184621687, "grad_norm": 1.3359375, "learning_rate": 0.00016117768520749662, "loss": 2.7132, "step": 15642 }, { "epoch": 0.7323073322020013, "grad_norm": 1.7578125, "learning_rate": 0.0001611730675977926, "loss": 2.7655, "step": 15643 }, { "epoch": 0.7323541459418339, "grad_norm": 1.1796875, "learning_rate": 0.00016116844977964484, "loss": 2.3863, "step": 15644 }, { "epoch": 0.7324009596816665, "grad_norm": 1.359375, "learning_rate": 0.00016116383175306917, "loss": 2.4634, "step": 15645 }, { "epoch": 0.7324477734214992, "grad_norm": 1.203125, "learning_rate": 0.00016115921351808127, "loss": 2.5384, "step": 15646 }, { "epoch": 0.7324945871613319, "grad_norm": 1.5, "learning_rate": 0.00016115459507469687, "loss": 2.7819, "step": 15647 }, { "epoch": 0.7325414009011645, "grad_norm": 1.1796875, "learning_rate": 0.00016114997642293174, "loss": 2.8354, "step": 15648 }, { "epoch": 0.7325882146409971, "grad_norm": 1.453125, "learning_rate": 0.0001611453575628016, "loss": 2.4876, "step": 15649 }, { "epoch": 0.7326350283808297, "grad_norm": 1.234375, "learning_rate": 0.00016114073849432216, "loss": 2.6341, "step": 15650 }, { "epoch": 0.7326818421206625, "grad_norm": 1.9140625, "learning_rate": 0.00016113611921750922, "loss": 3.4321, "step": 15651 }, { "epoch": 0.7327286558604951, "grad_norm": 1.3203125, "learning_rate": 0.00016113149973237846, "loss": 2.6442, "step": 15652 }, { "epoch": 0.7327754696003277, "grad_norm": 1.546875, "learning_rate": 0.0001611268800389457, "loss": 2.476, "step": 15653 }, { "epoch": 0.7328222833401603, "grad_norm": 1.421875, "learning_rate": 0.00016112226013722655, "loss": 2.8, "step": 15654 }, { "epoch": 0.7328690970799929, "grad_norm": 1.515625, "learning_rate": 0.0001611176400272369, "loss": 2.6903, "step": 15655 }, { "epoch": 0.7329159108198257, "grad_norm": 1.5, "learning_rate": 0.0001611130197089924, "loss": 2.7082, "step": 15656 }, { "epoch": 0.7329627245596583, "grad_norm": 1.5, "learning_rate": 0.00016110839918250883, "loss": 2.4108, "step": 15657 }, { "epoch": 0.7330095382994909, "grad_norm": 2.125, "learning_rate": 0.00016110377844780188, "loss": 2.781, "step": 15658 }, { "epoch": 0.7330563520393235, "grad_norm": 1.5234375, "learning_rate": 0.00016109915750488738, "loss": 2.3856, "step": 15659 }, { "epoch": 0.7331031657791561, "grad_norm": 1.296875, "learning_rate": 0.00016109453635378102, "loss": 2.5866, "step": 15660 }, { "epoch": 0.7331499795189889, "grad_norm": 2.125, "learning_rate": 0.00016108991499449855, "loss": 2.4989, "step": 15661 }, { "epoch": 0.7331967932588215, "grad_norm": 1.359375, "learning_rate": 0.00016108529342705573, "loss": 2.2022, "step": 15662 }, { "epoch": 0.7332436069986541, "grad_norm": 1.796875, "learning_rate": 0.0001610806716514683, "loss": 2.6371, "step": 15663 }, { "epoch": 0.7332904207384867, "grad_norm": 1.7265625, "learning_rate": 0.00016107604966775202, "loss": 2.7776, "step": 15664 }, { "epoch": 0.7333372344783193, "grad_norm": 1.4296875, "learning_rate": 0.00016107142747592262, "loss": 2.4426, "step": 15665 }, { "epoch": 0.7333840482181521, "grad_norm": 2.3125, "learning_rate": 0.0001610668050759959, "loss": 2.5514, "step": 15666 }, { "epoch": 0.7334308619579847, "grad_norm": 1.578125, "learning_rate": 0.00016106218246798753, "loss": 2.4421, "step": 15667 }, { "epoch": 0.7334776756978173, "grad_norm": 1.703125, "learning_rate": 0.0001610575596519133, "loss": 2.7932, "step": 15668 }, { "epoch": 0.7335244894376499, "grad_norm": 1.625, "learning_rate": 0.000161052936627789, "loss": 2.5935, "step": 15669 }, { "epoch": 0.7335713031774825, "grad_norm": 1.578125, "learning_rate": 0.00016104831339563034, "loss": 1.5957, "step": 15670 }, { "epoch": 0.7336181169173153, "grad_norm": 1.6484375, "learning_rate": 0.00016104368995545305, "loss": 2.6473, "step": 15671 }, { "epoch": 0.7336649306571479, "grad_norm": 1.4609375, "learning_rate": 0.00016103906630727293, "loss": 2.8851, "step": 15672 }, { "epoch": 0.7337117443969805, "grad_norm": 1.234375, "learning_rate": 0.0001610344424511057, "loss": 2.5129, "step": 15673 }, { "epoch": 0.7337585581368131, "grad_norm": 2.15625, "learning_rate": 0.00016102981838696717, "loss": 2.3704, "step": 15674 }, { "epoch": 0.7338053718766457, "grad_norm": 1.5234375, "learning_rate": 0.00016102519411487304, "loss": 2.7474, "step": 15675 }, { "epoch": 0.7338521856164785, "grad_norm": 1.2890625, "learning_rate": 0.0001610205696348391, "loss": 2.7946, "step": 15676 }, { "epoch": 0.7338989993563111, "grad_norm": 1.546875, "learning_rate": 0.0001610159449468811, "loss": 2.7008, "step": 15677 }, { "epoch": 0.7339458130961437, "grad_norm": 1.3203125, "learning_rate": 0.0001610113200510148, "loss": 2.2473, "step": 15678 }, { "epoch": 0.7339926268359763, "grad_norm": 1.5625, "learning_rate": 0.00016100669494725596, "loss": 2.6713, "step": 15679 }, { "epoch": 0.734039440575809, "grad_norm": 1.2421875, "learning_rate": 0.0001610020696356203, "loss": 2.5169, "step": 15680 }, { "epoch": 0.7340862543156417, "grad_norm": 1.2734375, "learning_rate": 0.00016099744411612364, "loss": 2.4827, "step": 15681 }, { "epoch": 0.7341330680554743, "grad_norm": 1.5859375, "learning_rate": 0.00016099281838878168, "loss": 2.3397, "step": 15682 }, { "epoch": 0.7341798817953069, "grad_norm": 1.578125, "learning_rate": 0.00016098819245361024, "loss": 2.332, "step": 15683 }, { "epoch": 0.7342266955351395, "grad_norm": 2.28125, "learning_rate": 0.0001609835663106251, "loss": 2.5761, "step": 15684 }, { "epoch": 0.7342735092749723, "grad_norm": 1.5546875, "learning_rate": 0.0001609789399598419, "loss": 2.5729, "step": 15685 }, { "epoch": 0.7343203230148049, "grad_norm": 2.5625, "learning_rate": 0.00016097431340127654, "loss": 2.5343, "step": 15686 }, { "epoch": 0.7343671367546375, "grad_norm": 1.78125, "learning_rate": 0.00016096968663494469, "loss": 2.7349, "step": 15687 }, { "epoch": 0.7344139504944701, "grad_norm": 1.3125, "learning_rate": 0.00016096505966086224, "loss": 2.8612, "step": 15688 }, { "epoch": 0.7344607642343027, "grad_norm": 1.2265625, "learning_rate": 0.0001609604324790448, "loss": 2.5297, "step": 15689 }, { "epoch": 0.7345075779741355, "grad_norm": 1.3125, "learning_rate": 0.00016095580508950822, "loss": 2.6913, "step": 15690 }, { "epoch": 0.7345543917139681, "grad_norm": 1.4921875, "learning_rate": 0.0001609511774922683, "loss": 2.3583, "step": 15691 }, { "epoch": 0.7346012054538007, "grad_norm": 1.53125, "learning_rate": 0.0001609465496873407, "loss": 2.6079, "step": 15692 }, { "epoch": 0.7346480191936333, "grad_norm": 1.328125, "learning_rate": 0.0001609419216747413, "loss": 2.9259, "step": 15693 }, { "epoch": 0.7346948329334659, "grad_norm": 1.640625, "learning_rate": 0.0001609372934544858, "loss": 2.935, "step": 15694 }, { "epoch": 0.7347416466732987, "grad_norm": 2.09375, "learning_rate": 0.00016093266502659, "loss": 2.9449, "step": 15695 }, { "epoch": 0.7347884604131313, "grad_norm": 1.2578125, "learning_rate": 0.00016092803639106967, "loss": 2.7599, "step": 15696 }, { "epoch": 0.7348352741529639, "grad_norm": 1.0859375, "learning_rate": 0.00016092340754794062, "loss": 2.4801, "step": 15697 }, { "epoch": 0.7348820878927965, "grad_norm": 1.3046875, "learning_rate": 0.00016091877849721856, "loss": 2.6266, "step": 15698 }, { "epoch": 0.7349289016326291, "grad_norm": 1.359375, "learning_rate": 0.00016091414923891925, "loss": 2.7273, "step": 15699 }, { "epoch": 0.7349757153724619, "grad_norm": 1.1796875, "learning_rate": 0.00016090951977305854, "loss": 2.7399, "step": 15700 }, { "epoch": 0.7350225291122945, "grad_norm": 1.3046875, "learning_rate": 0.0001609048900996521, "loss": 2.6061, "step": 15701 }, { "epoch": 0.7350693428521271, "grad_norm": 1.40625, "learning_rate": 0.0001609002602187158, "loss": 2.8211, "step": 15702 }, { "epoch": 0.7351161565919597, "grad_norm": 2.640625, "learning_rate": 0.00016089563013026541, "loss": 2.8081, "step": 15703 }, { "epoch": 0.7351629703317923, "grad_norm": 1.359375, "learning_rate": 0.00016089099983431667, "loss": 2.4228, "step": 15704 }, { "epoch": 0.7352097840716251, "grad_norm": 1.40625, "learning_rate": 0.00016088636933088532, "loss": 2.7743, "step": 15705 }, { "epoch": 0.7352565978114577, "grad_norm": 1.5703125, "learning_rate": 0.00016088173861998723, "loss": 2.9281, "step": 15706 }, { "epoch": 0.7353034115512903, "grad_norm": 1.453125, "learning_rate": 0.00016087710770163813, "loss": 2.2584, "step": 15707 }, { "epoch": 0.7353502252911229, "grad_norm": 1.765625, "learning_rate": 0.00016087247657585381, "loss": 2.5329, "step": 15708 }, { "epoch": 0.7353970390309555, "grad_norm": 2.0, "learning_rate": 0.00016086784524265002, "loss": 2.1462, "step": 15709 }, { "epoch": 0.7354438527707883, "grad_norm": 1.1796875, "learning_rate": 0.00016086321370204257, "loss": 3.9023, "step": 15710 }, { "epoch": 0.7354906665106209, "grad_norm": 1.6875, "learning_rate": 0.00016085858195404726, "loss": 2.8817, "step": 15711 }, { "epoch": 0.7355374802504535, "grad_norm": 1.359375, "learning_rate": 0.00016085394999867985, "loss": 2.5202, "step": 15712 }, { "epoch": 0.7355842939902861, "grad_norm": 1.21875, "learning_rate": 0.0001608493178359561, "loss": 2.3798, "step": 15713 }, { "epoch": 0.7356311077301187, "grad_norm": 2.015625, "learning_rate": 0.00016084468546589183, "loss": 2.9304, "step": 15714 }, { "epoch": 0.7356779214699515, "grad_norm": 1.7109375, "learning_rate": 0.00016084005288850278, "loss": 2.4617, "step": 15715 }, { "epoch": 0.7357247352097841, "grad_norm": 1.15625, "learning_rate": 0.00016083542010380484, "loss": 2.4149, "step": 15716 }, { "epoch": 0.7357715489496167, "grad_norm": 1.8203125, "learning_rate": 0.00016083078711181368, "loss": 2.838, "step": 15717 }, { "epoch": 0.7358183626894493, "grad_norm": 2.984375, "learning_rate": 0.00016082615391254512, "loss": 2.3111, "step": 15718 }, { "epoch": 0.735865176429282, "grad_norm": 1.6328125, "learning_rate": 0.00016082152050601495, "loss": 2.7298, "step": 15719 }, { "epoch": 0.7359119901691147, "grad_norm": 1.609375, "learning_rate": 0.00016081688689223897, "loss": 2.4344, "step": 15720 }, { "epoch": 0.7359588039089473, "grad_norm": 1.546875, "learning_rate": 0.000160812253071233, "loss": 2.8734, "step": 15721 }, { "epoch": 0.7360056176487799, "grad_norm": 1.125, "learning_rate": 0.00016080761904301274, "loss": 2.6539, "step": 15722 }, { "epoch": 0.7360524313886125, "grad_norm": 1.3828125, "learning_rate": 0.00016080298480759407, "loss": 2.6826, "step": 15723 }, { "epoch": 0.7360992451284452, "grad_norm": 1.8671875, "learning_rate": 0.00016079835036499272, "loss": 2.826, "step": 15724 }, { "epoch": 0.7361460588682779, "grad_norm": 1.265625, "learning_rate": 0.00016079371571522453, "loss": 2.5135, "step": 15725 }, { "epoch": 0.7361928726081105, "grad_norm": 1.1796875, "learning_rate": 0.00016078908085830524, "loss": 2.4389, "step": 15726 }, { "epoch": 0.7362396863479431, "grad_norm": 1.8359375, "learning_rate": 0.0001607844457942507, "loss": 2.7395, "step": 15727 }, { "epoch": 0.7362865000877757, "grad_norm": 1.8359375, "learning_rate": 0.00016077981052307666, "loss": 2.1755, "step": 15728 }, { "epoch": 0.7363333138276084, "grad_norm": 1.5, "learning_rate": 0.0001607751750447989, "loss": 2.4048, "step": 15729 }, { "epoch": 0.7363801275674411, "grad_norm": 1.34375, "learning_rate": 0.0001607705393594333, "loss": 2.309, "step": 15730 }, { "epoch": 0.7364269413072737, "grad_norm": 1.7890625, "learning_rate": 0.00016076590346699555, "loss": 2.2639, "step": 15731 }, { "epoch": 0.7364737550471063, "grad_norm": 1.4765625, "learning_rate": 0.00016076126736750155, "loss": 2.3772, "step": 15732 }, { "epoch": 0.736520568786939, "grad_norm": 1.1484375, "learning_rate": 0.00016075663106096698, "loss": 2.4254, "step": 15733 }, { "epoch": 0.7365673825267716, "grad_norm": 1.5859375, "learning_rate": 0.00016075199454740775, "loss": 3.0229, "step": 15734 }, { "epoch": 0.7366141962666043, "grad_norm": 1.296875, "learning_rate": 0.0001607473578268396, "loss": 2.4275, "step": 15735 }, { "epoch": 0.7366610100064369, "grad_norm": 1.2734375, "learning_rate": 0.00016074272089927833, "loss": 2.4902, "step": 15736 }, { "epoch": 0.7367078237462695, "grad_norm": 1.6796875, "learning_rate": 0.00016073808376473975, "loss": 2.6942, "step": 15737 }, { "epoch": 0.7367546374861021, "grad_norm": 1.6796875, "learning_rate": 0.00016073344642323966, "loss": 2.505, "step": 15738 }, { "epoch": 0.7368014512259348, "grad_norm": 1.2421875, "learning_rate": 0.00016072880887479387, "loss": 2.5171, "step": 15739 }, { "epoch": 0.7368482649657675, "grad_norm": 1.5703125, "learning_rate": 0.00016072417111941814, "loss": 2.8531, "step": 15740 }, { "epoch": 0.7368950787056001, "grad_norm": 1.796875, "learning_rate": 0.00016071953315712837, "loss": 2.8402, "step": 15741 }, { "epoch": 0.7369418924454327, "grad_norm": 1.6171875, "learning_rate": 0.00016071489498794026, "loss": 2.3067, "step": 15742 }, { "epoch": 0.7369887061852654, "grad_norm": 1.5703125, "learning_rate": 0.00016071025661186966, "loss": 2.5693, "step": 15743 }, { "epoch": 0.737035519925098, "grad_norm": 1.2890625, "learning_rate": 0.00016070561802893237, "loss": 2.5661, "step": 15744 }, { "epoch": 0.7370823336649307, "grad_norm": 1.5546875, "learning_rate": 0.0001607009792391442, "loss": 2.4191, "step": 15745 }, { "epoch": 0.7371291474047633, "grad_norm": 1.6015625, "learning_rate": 0.00016069634024252095, "loss": 2.5421, "step": 15746 }, { "epoch": 0.7371759611445959, "grad_norm": 1.453125, "learning_rate": 0.00016069170103907846, "loss": 2.4217, "step": 15747 }, { "epoch": 0.7372227748844286, "grad_norm": 1.0859375, "learning_rate": 0.00016068706162883245, "loss": 2.0793, "step": 15748 }, { "epoch": 0.7372695886242612, "grad_norm": 1.609375, "learning_rate": 0.0001606824220117988, "loss": 2.7809, "step": 15749 }, { "epoch": 0.7373164023640939, "grad_norm": 1.640625, "learning_rate": 0.00016067778218799332, "loss": 2.6501, "step": 15750 }, { "epoch": 0.7373632161039265, "grad_norm": 1.4453125, "learning_rate": 0.0001606731421574318, "loss": 2.4407, "step": 15751 }, { "epoch": 0.7374100298437591, "grad_norm": 1.2109375, "learning_rate": 0.00016066850192013004, "loss": 2.6744, "step": 15752 }, { "epoch": 0.7374568435835918, "grad_norm": 2.65625, "learning_rate": 0.00016066386147610388, "loss": 2.7946, "step": 15753 }, { "epoch": 0.7375036573234244, "grad_norm": 1.5078125, "learning_rate": 0.00016065922082536912, "loss": 2.629, "step": 15754 }, { "epoch": 0.7375504710632571, "grad_norm": 1.125, "learning_rate": 0.00016065457996794158, "loss": 2.4718, "step": 15755 }, { "epoch": 0.7375972848030897, "grad_norm": 1.7734375, "learning_rate": 0.00016064993890383707, "loss": 2.8906, "step": 15756 }, { "epoch": 0.7376440985429223, "grad_norm": 1.4140625, "learning_rate": 0.00016064529763307138, "loss": 2.8647, "step": 15757 }, { "epoch": 0.737690912282755, "grad_norm": 1.8359375, "learning_rate": 0.00016064065615566033, "loss": 2.6964, "step": 15758 }, { "epoch": 0.7377377260225876, "grad_norm": 1.734375, "learning_rate": 0.00016063601447161978, "loss": 2.8029, "step": 15759 }, { "epoch": 0.7377845397624203, "grad_norm": 1.3671875, "learning_rate": 0.0001606313725809655, "loss": 2.5102, "step": 15760 }, { "epoch": 0.7378313535022529, "grad_norm": 1.5078125, "learning_rate": 0.00016062673048371332, "loss": 2.4699, "step": 15761 }, { "epoch": 0.7378781672420855, "grad_norm": 2.484375, "learning_rate": 0.00016062208817987906, "loss": 2.8347, "step": 15762 }, { "epoch": 0.7379249809819182, "grad_norm": 3.078125, "learning_rate": 0.00016061744566947853, "loss": 2.4628, "step": 15763 }, { "epoch": 0.7379717947217508, "grad_norm": 2.25, "learning_rate": 0.0001606128029525276, "loss": 2.8298, "step": 15764 }, { "epoch": 0.7380186084615835, "grad_norm": 1.3984375, "learning_rate": 0.000160608160029042, "loss": 2.5024, "step": 15765 }, { "epoch": 0.7380654222014161, "grad_norm": 1.34375, "learning_rate": 0.00016060351689903764, "loss": 2.4379, "step": 15766 }, { "epoch": 0.7381122359412488, "grad_norm": 1.3046875, "learning_rate": 0.0001605988735625303, "loss": 2.2189, "step": 15767 }, { "epoch": 0.7381590496810814, "grad_norm": 1.453125, "learning_rate": 0.00016059423001953577, "loss": 2.2113, "step": 15768 }, { "epoch": 0.738205863420914, "grad_norm": 1.328125, "learning_rate": 0.0001605895862700699, "loss": 2.3338, "step": 15769 }, { "epoch": 0.7382526771607467, "grad_norm": 1.2578125, "learning_rate": 0.00016058494231414852, "loss": 2.9013, "step": 15770 }, { "epoch": 0.7382994909005793, "grad_norm": 1.6015625, "learning_rate": 0.00016058029815178745, "loss": 2.7774, "step": 15771 }, { "epoch": 0.738346304640412, "grad_norm": 1.03125, "learning_rate": 0.00016057565378300255, "loss": 3.6912, "step": 15772 }, { "epoch": 0.7383931183802446, "grad_norm": 1.5859375, "learning_rate": 0.00016057100920780957, "loss": 2.2803, "step": 15773 }, { "epoch": 0.7384399321200772, "grad_norm": 1.3671875, "learning_rate": 0.00016056636442622442, "loss": 2.501, "step": 15774 }, { "epoch": 0.7384867458599099, "grad_norm": 1.34375, "learning_rate": 0.00016056171943826283, "loss": 3.1185, "step": 15775 }, { "epoch": 0.7385335595997425, "grad_norm": 1.625, "learning_rate": 0.00016055707424394072, "loss": 2.7379, "step": 15776 }, { "epoch": 0.7385803733395752, "grad_norm": 1.609375, "learning_rate": 0.00016055242884327386, "loss": 2.7426, "step": 15777 }, { "epoch": 0.7386271870794078, "grad_norm": 1.671875, "learning_rate": 0.00016054778323627811, "loss": 2.6798, "step": 15778 }, { "epoch": 0.7386740008192404, "grad_norm": 1.2890625, "learning_rate": 0.0001605431374229693, "loss": 2.4132, "step": 15779 }, { "epoch": 0.7387208145590731, "grad_norm": 1.4765625, "learning_rate": 0.00016053849140336323, "loss": 2.0857, "step": 15780 }, { "epoch": 0.7387676282989057, "grad_norm": 1.2734375, "learning_rate": 0.00016053384517747575, "loss": 2.5563, "step": 15781 }, { "epoch": 0.7388144420387384, "grad_norm": 1.21875, "learning_rate": 0.0001605291987453227, "loss": 2.3962, "step": 15782 }, { "epoch": 0.738861255778571, "grad_norm": 1.7265625, "learning_rate": 0.0001605245521069199, "loss": 2.283, "step": 15783 }, { "epoch": 0.7389080695184036, "grad_norm": 1.2578125, "learning_rate": 0.0001605199052622832, "loss": 2.6668, "step": 15784 }, { "epoch": 0.7389548832582363, "grad_norm": 1.3359375, "learning_rate": 0.0001605152582114284, "loss": 2.5485, "step": 15785 }, { "epoch": 0.739001696998069, "grad_norm": 1.5859375, "learning_rate": 0.00016051061095437137, "loss": 2.5686, "step": 15786 }, { "epoch": 0.7390485107379016, "grad_norm": 1.71875, "learning_rate": 0.00016050596349112794, "loss": 2.5469, "step": 15787 }, { "epoch": 0.7390953244777342, "grad_norm": 1.15625, "learning_rate": 0.0001605013158217139, "loss": 2.4417, "step": 15788 }, { "epoch": 0.7391421382175668, "grad_norm": 1.21875, "learning_rate": 0.00016049666794614516, "loss": 2.4215, "step": 15789 }, { "epoch": 0.7391889519573995, "grad_norm": 1.359375, "learning_rate": 0.00016049201986443752, "loss": 3.1222, "step": 15790 }, { "epoch": 0.7392357656972322, "grad_norm": 1.40625, "learning_rate": 0.0001604873715766068, "loss": 2.4486, "step": 15791 }, { "epoch": 0.7392825794370648, "grad_norm": 2.234375, "learning_rate": 0.00016048272308266884, "loss": 2.7621, "step": 15792 }, { "epoch": 0.7393293931768974, "grad_norm": 1.3515625, "learning_rate": 0.00016047807438263956, "loss": 2.8034, "step": 15793 }, { "epoch": 0.73937620691673, "grad_norm": 1.5390625, "learning_rate": 0.00016047342547653468, "loss": 2.487, "step": 15794 }, { "epoch": 0.7394230206565627, "grad_norm": 1.234375, "learning_rate": 0.0001604687763643701, "loss": 2.7778, "step": 15795 }, { "epoch": 0.7394698343963954, "grad_norm": 1.40625, "learning_rate": 0.00016046412704616168, "loss": 2.2488, "step": 15796 }, { "epoch": 0.739516648136228, "grad_norm": 1.125, "learning_rate": 0.00016045947752192524, "loss": 2.8921, "step": 15797 }, { "epoch": 0.7395634618760606, "grad_norm": 1.609375, "learning_rate": 0.00016045482779167666, "loss": 2.7127, "step": 15798 }, { "epoch": 0.7396102756158933, "grad_norm": 3.046875, "learning_rate": 0.0001604501778554317, "loss": 2.6056, "step": 15799 }, { "epoch": 0.7396570893557259, "grad_norm": 1.46875, "learning_rate": 0.00016044552771320625, "loss": 2.4337, "step": 15800 }, { "epoch": 0.7397039030955586, "grad_norm": 1.1171875, "learning_rate": 0.00016044087736501617, "loss": 2.627, "step": 15801 }, { "epoch": 0.7397507168353912, "grad_norm": 1.3671875, "learning_rate": 0.0001604362268108773, "loss": 2.6375, "step": 15802 }, { "epoch": 0.7397975305752238, "grad_norm": 1.390625, "learning_rate": 0.00016043157605080547, "loss": 2.826, "step": 15803 }, { "epoch": 0.7398443443150565, "grad_norm": 1.2578125, "learning_rate": 0.00016042692508481656, "loss": 2.5877, "step": 15804 }, { "epoch": 0.7398911580548891, "grad_norm": 1.6796875, "learning_rate": 0.00016042227391292637, "loss": 2.5331, "step": 15805 }, { "epoch": 0.7399379717947218, "grad_norm": 1.4375, "learning_rate": 0.0001604176225351508, "loss": 2.9229, "step": 15806 }, { "epoch": 0.7399847855345544, "grad_norm": 1.6328125, "learning_rate": 0.00016041297095150566, "loss": 2.6737, "step": 15807 }, { "epoch": 0.740031599274387, "grad_norm": 1.2421875, "learning_rate": 0.00016040831916200681, "loss": 2.4475, "step": 15808 }, { "epoch": 0.7400784130142197, "grad_norm": 1.2109375, "learning_rate": 0.0001604036671666701, "loss": 2.9066, "step": 15809 }, { "epoch": 0.7401252267540523, "grad_norm": 1.4765625, "learning_rate": 0.0001603990149655114, "loss": 2.7129, "step": 15810 }, { "epoch": 0.740172040493885, "grad_norm": 1.7109375, "learning_rate": 0.00016039436255854654, "loss": 2.479, "step": 15811 }, { "epoch": 0.7402188542337176, "grad_norm": 1.71875, "learning_rate": 0.00016038970994579138, "loss": 2.6615, "step": 15812 }, { "epoch": 0.7402656679735502, "grad_norm": 1.8046875, "learning_rate": 0.00016038505712726177, "loss": 2.9053, "step": 15813 }, { "epoch": 0.7403124817133829, "grad_norm": 1.46875, "learning_rate": 0.00016038040410297358, "loss": 2.0368, "step": 15814 }, { "epoch": 0.7403592954532155, "grad_norm": 1.6953125, "learning_rate": 0.00016037575087294266, "loss": 2.2319, "step": 15815 }, { "epoch": 0.7404061091930482, "grad_norm": 1.4375, "learning_rate": 0.0001603710974371848, "loss": 2.5429, "step": 15816 }, { "epoch": 0.7404529229328808, "grad_norm": 1.6171875, "learning_rate": 0.000160366443795716, "loss": 2.5736, "step": 15817 }, { "epoch": 0.7404997366727134, "grad_norm": 1.4765625, "learning_rate": 0.00016036178994855197, "loss": 2.2917, "step": 15818 }, { "epoch": 0.7405465504125461, "grad_norm": 1.4296875, "learning_rate": 0.00016035713589570866, "loss": 3.2362, "step": 15819 }, { "epoch": 0.7405933641523788, "grad_norm": 1.25, "learning_rate": 0.00016035248163720188, "loss": 2.7561, "step": 15820 }, { "epoch": 0.7406401778922114, "grad_norm": 1.3359375, "learning_rate": 0.00016034782717304756, "loss": 2.6447, "step": 15821 }, { "epoch": 0.740686991632044, "grad_norm": 1.4296875, "learning_rate": 0.00016034317250326145, "loss": 2.751, "step": 15822 }, { "epoch": 0.7407338053718766, "grad_norm": 4.96875, "learning_rate": 0.00016033851762785946, "loss": 2.6365, "step": 15823 }, { "epoch": 0.7407806191117093, "grad_norm": 1.421875, "learning_rate": 0.0001603338625468575, "loss": 2.7556, "step": 15824 }, { "epoch": 0.740827432851542, "grad_norm": 1.4765625, "learning_rate": 0.00016032920726027135, "loss": 2.4974, "step": 15825 }, { "epoch": 0.7408742465913746, "grad_norm": 1.5703125, "learning_rate": 0.00016032455176811695, "loss": 2.4045, "step": 15826 }, { "epoch": 0.7409210603312072, "grad_norm": 1.59375, "learning_rate": 0.00016031989607041011, "loss": 2.6723, "step": 15827 }, { "epoch": 0.7409678740710398, "grad_norm": 2.046875, "learning_rate": 0.0001603152401671667, "loss": 2.5008, "step": 15828 }, { "epoch": 0.7410146878108725, "grad_norm": 1.1484375, "learning_rate": 0.00016031058405840262, "loss": 2.402, "step": 15829 }, { "epoch": 0.7410615015507052, "grad_norm": 1.234375, "learning_rate": 0.0001603059277441337, "loss": 2.4868, "step": 15830 }, { "epoch": 0.7411083152905378, "grad_norm": 1.5078125, "learning_rate": 0.00016030127122437584, "loss": 2.6912, "step": 15831 }, { "epoch": 0.7411551290303704, "grad_norm": 1.2265625, "learning_rate": 0.00016029661449914486, "loss": 2.2622, "step": 15832 }, { "epoch": 0.741201942770203, "grad_norm": 1.2109375, "learning_rate": 0.00016029195756845662, "loss": 2.4263, "step": 15833 }, { "epoch": 0.7412487565100357, "grad_norm": 1.8671875, "learning_rate": 0.00016028730043232708, "loss": 2.4752, "step": 15834 }, { "epoch": 0.7412955702498684, "grad_norm": 2.828125, "learning_rate": 0.00016028264309077202, "loss": 2.4124, "step": 15835 }, { "epoch": 0.741342383989701, "grad_norm": 1.4375, "learning_rate": 0.00016027798554380733, "loss": 2.9637, "step": 15836 }, { "epoch": 0.7413891977295336, "grad_norm": 1.1484375, "learning_rate": 0.0001602733277914489, "loss": 2.5594, "step": 15837 }, { "epoch": 0.7414360114693662, "grad_norm": 1.1796875, "learning_rate": 0.0001602686698337126, "loss": 2.5353, "step": 15838 }, { "epoch": 0.741482825209199, "grad_norm": 1.4296875, "learning_rate": 0.00016026401167061426, "loss": 2.9436, "step": 15839 }, { "epoch": 0.7415296389490316, "grad_norm": 1.203125, "learning_rate": 0.0001602593533021698, "loss": 2.5119, "step": 15840 }, { "epoch": 0.7415764526888642, "grad_norm": 1.3359375, "learning_rate": 0.00016025469472839507, "loss": 2.5769, "step": 15841 }, { "epoch": 0.7416232664286968, "grad_norm": 1.5859375, "learning_rate": 0.000160250035949306, "loss": 2.2961, "step": 15842 }, { "epoch": 0.7416700801685294, "grad_norm": 1.3828125, "learning_rate": 0.00016024537696491834, "loss": 2.3481, "step": 15843 }, { "epoch": 0.7417168939083622, "grad_norm": 1.9453125, "learning_rate": 0.00016024071777524806, "loss": 2.5103, "step": 15844 }, { "epoch": 0.7417637076481948, "grad_norm": 1.6015625, "learning_rate": 0.00016023605838031104, "loss": 2.4127, "step": 15845 }, { "epoch": 0.7418105213880274, "grad_norm": 1.3671875, "learning_rate": 0.00016023139878012312, "loss": 2.2967, "step": 15846 }, { "epoch": 0.74185733512786, "grad_norm": 1.703125, "learning_rate": 0.0001602267389747002, "loss": 2.2377, "step": 15847 }, { "epoch": 0.7419041488676926, "grad_norm": 2.015625, "learning_rate": 0.00016022207896405815, "loss": 2.6965, "step": 15848 }, { "epoch": 0.7419509626075254, "grad_norm": 1.5859375, "learning_rate": 0.00016021741874821282, "loss": 2.6757, "step": 15849 }, { "epoch": 0.741997776347358, "grad_norm": 1.625, "learning_rate": 0.00016021275832718015, "loss": 2.431, "step": 15850 }, { "epoch": 0.7420445900871906, "grad_norm": 1.296875, "learning_rate": 0.00016020809770097598, "loss": 2.2109, "step": 15851 }, { "epoch": 0.7420914038270232, "grad_norm": 2.75, "learning_rate": 0.00016020343686961618, "loss": 2.73, "step": 15852 }, { "epoch": 0.7421382175668558, "grad_norm": 1.6015625, "learning_rate": 0.00016019877583311668, "loss": 2.8457, "step": 15853 }, { "epoch": 0.7421850313066886, "grad_norm": 1.6328125, "learning_rate": 0.00016019411459149328, "loss": 2.4713, "step": 15854 }, { "epoch": 0.7422318450465212, "grad_norm": 2.140625, "learning_rate": 0.00016018945314476196, "loss": 2.8291, "step": 15855 }, { "epoch": 0.7422786587863538, "grad_norm": 2.328125, "learning_rate": 0.00016018479149293853, "loss": 2.6872, "step": 15856 }, { "epoch": 0.7423254725261864, "grad_norm": 1.5, "learning_rate": 0.0001601801296360389, "loss": 2.4548, "step": 15857 }, { "epoch": 0.742372286266019, "grad_norm": 1.46875, "learning_rate": 0.000160175467574079, "loss": 3.0412, "step": 15858 }, { "epoch": 0.7424191000058518, "grad_norm": 1.8984375, "learning_rate": 0.0001601708053070746, "loss": 3.0999, "step": 15859 }, { "epoch": 0.7424659137456844, "grad_norm": 1.484375, "learning_rate": 0.00016016614283504175, "loss": 2.8094, "step": 15860 }, { "epoch": 0.742512727485517, "grad_norm": 1.921875, "learning_rate": 0.00016016148015799618, "loss": 2.5303, "step": 15861 }, { "epoch": 0.7425595412253496, "grad_norm": 1.921875, "learning_rate": 0.00016015681727595387, "loss": 2.5844, "step": 15862 }, { "epoch": 0.7426063549651822, "grad_norm": 1.9296875, "learning_rate": 0.00016015215418893066, "loss": 2.3533, "step": 15863 }, { "epoch": 0.742653168705015, "grad_norm": 1.6015625, "learning_rate": 0.00016014749089694248, "loss": 2.6442, "step": 15864 }, { "epoch": 0.7426999824448476, "grad_norm": 2.015625, "learning_rate": 0.0001601428274000052, "loss": 2.5822, "step": 15865 }, { "epoch": 0.7427467961846802, "grad_norm": 1.171875, "learning_rate": 0.00016013816369813468, "loss": 2.3153, "step": 15866 }, { "epoch": 0.7427936099245128, "grad_norm": 1.25, "learning_rate": 0.0001601334997913469, "loss": 2.6085, "step": 15867 }, { "epoch": 0.7428404236643454, "grad_norm": 1.2890625, "learning_rate": 0.00016012883567965766, "loss": 2.6833, "step": 15868 }, { "epoch": 0.7428872374041782, "grad_norm": 1.4609375, "learning_rate": 0.0001601241713630829, "loss": 2.7707, "step": 15869 }, { "epoch": 0.7429340511440108, "grad_norm": 1.4140625, "learning_rate": 0.00016011950684163847, "loss": 2.5675, "step": 15870 }, { "epoch": 0.7429808648838434, "grad_norm": 1.671875, "learning_rate": 0.00016011484211534034, "loss": 2.8617, "step": 15871 }, { "epoch": 0.743027678623676, "grad_norm": 1.40625, "learning_rate": 0.00016011017718420433, "loss": 2.694, "step": 15872 }, { "epoch": 0.7430744923635086, "grad_norm": 1.4375, "learning_rate": 0.00016010551204824637, "loss": 2.6463, "step": 15873 }, { "epoch": 0.7431213061033414, "grad_norm": 1.359375, "learning_rate": 0.00016010084670748238, "loss": 2.6074, "step": 15874 }, { "epoch": 0.743168119843174, "grad_norm": 1.4296875, "learning_rate": 0.00016009618116192818, "loss": 2.8581, "step": 15875 }, { "epoch": 0.7432149335830066, "grad_norm": 1.4921875, "learning_rate": 0.00016009151541159978, "loss": 2.3149, "step": 15876 }, { "epoch": 0.7432617473228392, "grad_norm": 1.7109375, "learning_rate": 0.00016008684945651295, "loss": 2.4801, "step": 15877 }, { "epoch": 0.7433085610626718, "grad_norm": 2.125, "learning_rate": 0.0001600821832966837, "loss": 2.4791, "step": 15878 }, { "epoch": 0.7433553748025046, "grad_norm": 1.5234375, "learning_rate": 0.00016007751693212786, "loss": 2.394, "step": 15879 }, { "epoch": 0.7434021885423372, "grad_norm": 1.484375, "learning_rate": 0.00016007285036286138, "loss": 2.7627, "step": 15880 }, { "epoch": 0.7434490022821698, "grad_norm": 1.171875, "learning_rate": 0.0001600681835889001, "loss": 2.3916, "step": 15881 }, { "epoch": 0.7434958160220024, "grad_norm": 1.390625, "learning_rate": 0.00016006351661025997, "loss": 2.2396, "step": 15882 }, { "epoch": 0.743542629761835, "grad_norm": 1.4296875, "learning_rate": 0.0001600588494269569, "loss": 2.637, "step": 15883 }, { "epoch": 0.7435894435016678, "grad_norm": 1.2890625, "learning_rate": 0.00016005418203900672, "loss": 2.8066, "step": 15884 }, { "epoch": 0.7436362572415004, "grad_norm": 1.1796875, "learning_rate": 0.00016004951444642545, "loss": 2.5113, "step": 15885 }, { "epoch": 0.743683070981333, "grad_norm": 1.2734375, "learning_rate": 0.0001600448466492289, "loss": 2.2609, "step": 15886 }, { "epoch": 0.7437298847211656, "grad_norm": 1.2265625, "learning_rate": 0.000160040178647433, "loss": 3.0901, "step": 15887 }, { "epoch": 0.7437766984609983, "grad_norm": 1.046875, "learning_rate": 0.00016003551044105368, "loss": 3.5034, "step": 15888 }, { "epoch": 0.743823512200831, "grad_norm": 1.21875, "learning_rate": 0.0001600308420301068, "loss": 2.3204, "step": 15889 }, { "epoch": 0.7438703259406636, "grad_norm": 1.21875, "learning_rate": 0.0001600261734146083, "loss": 2.1519, "step": 15890 }, { "epoch": 0.7439171396804962, "grad_norm": 1.546875, "learning_rate": 0.0001600215045945741, "loss": 2.7794, "step": 15891 }, { "epoch": 0.7439639534203288, "grad_norm": 1.8828125, "learning_rate": 0.0001600168355700201, "loss": 2.7536, "step": 15892 }, { "epoch": 0.7440107671601615, "grad_norm": 1.515625, "learning_rate": 0.00016001216634096218, "loss": 2.6005, "step": 15893 }, { "epoch": 0.7440575808999942, "grad_norm": 2.0, "learning_rate": 0.00016000749690741627, "loss": 2.7607, "step": 15894 }, { "epoch": 0.7441043946398268, "grad_norm": 1.828125, "learning_rate": 0.0001600028272693983, "loss": 2.3542, "step": 15895 }, { "epoch": 0.7441512083796594, "grad_norm": 1.34375, "learning_rate": 0.00015999815742692418, "loss": 2.4815, "step": 15896 }, { "epoch": 0.744198022119492, "grad_norm": 1.5625, "learning_rate": 0.00015999348738000973, "loss": 2.5888, "step": 15897 }, { "epoch": 0.7442448358593247, "grad_norm": 1.359375, "learning_rate": 0.00015998881712867103, "loss": 2.7305, "step": 15898 }, { "epoch": 0.7442916495991574, "grad_norm": 1.359375, "learning_rate": 0.00015998414667292387, "loss": 2.4367, "step": 15899 }, { "epoch": 0.74433846333899, "grad_norm": 1.3125, "learning_rate": 0.00015997947601278418, "loss": 2.9744, "step": 15900 }, { "epoch": 0.7443852770788226, "grad_norm": 1.125, "learning_rate": 0.00015997480514826792, "loss": 2.3739, "step": 15901 }, { "epoch": 0.7444320908186552, "grad_norm": 1.8359375, "learning_rate": 0.00015997013407939096, "loss": 2.8116, "step": 15902 }, { "epoch": 0.7444789045584879, "grad_norm": 1.78125, "learning_rate": 0.00015996546280616923, "loss": 3.0283, "step": 15903 }, { "epoch": 0.7445257182983206, "grad_norm": 1.7578125, "learning_rate": 0.0001599607913286187, "loss": 2.9511, "step": 15904 }, { "epoch": 0.7445725320381532, "grad_norm": 1.3125, "learning_rate": 0.00015995611964675517, "loss": 2.7308, "step": 15905 }, { "epoch": 0.7446193457779858, "grad_norm": 1.65625, "learning_rate": 0.00015995144776059466, "loss": 2.6538, "step": 15906 }, { "epoch": 0.7446661595178184, "grad_norm": 1.953125, "learning_rate": 0.00015994677567015306, "loss": 2.4813, "step": 15907 }, { "epoch": 0.7447129732576511, "grad_norm": 1.59375, "learning_rate": 0.00015994210337544627, "loss": 2.7907, "step": 15908 }, { "epoch": 0.7447597869974838, "grad_norm": 2.03125, "learning_rate": 0.00015993743087649027, "loss": 2.6849, "step": 15909 }, { "epoch": 0.7448066007373164, "grad_norm": 1.734375, "learning_rate": 0.00015993275817330091, "loss": 2.2206, "step": 15910 }, { "epoch": 0.744853414477149, "grad_norm": 1.59375, "learning_rate": 0.00015992808526589414, "loss": 2.072, "step": 15911 }, { "epoch": 0.7449002282169817, "grad_norm": 2.734375, "learning_rate": 0.00015992341215428587, "loss": 2.217, "step": 15912 }, { "epoch": 0.7449470419568143, "grad_norm": 1.546875, "learning_rate": 0.00015991873883849204, "loss": 2.59, "step": 15913 }, { "epoch": 0.744993855696647, "grad_norm": 1.359375, "learning_rate": 0.00015991406531852857, "loss": 2.7813, "step": 15914 }, { "epoch": 0.7450406694364796, "grad_norm": 1.453125, "learning_rate": 0.00015990939159441143, "loss": 2.7994, "step": 15915 }, { "epoch": 0.7450874831763122, "grad_norm": 1.6796875, "learning_rate": 0.00015990471766615648, "loss": 2.8337, "step": 15916 }, { "epoch": 0.7451342969161449, "grad_norm": 1.3125, "learning_rate": 0.00015990004353377962, "loss": 2.6451, "step": 15917 }, { "epoch": 0.7451811106559775, "grad_norm": 1.2265625, "learning_rate": 0.00015989536919729688, "loss": 2.6862, "step": 15918 }, { "epoch": 0.7452279243958102, "grad_norm": 1.6171875, "learning_rate": 0.0001598906946567241, "loss": 2.4602, "step": 15919 }, { "epoch": 0.7452747381356428, "grad_norm": 1.3125, "learning_rate": 0.00015988601991207726, "loss": 2.5165, "step": 15920 }, { "epoch": 0.7453215518754754, "grad_norm": 1.6171875, "learning_rate": 0.00015988134496337225, "loss": 2.4319, "step": 15921 }, { "epoch": 0.7453683656153081, "grad_norm": 1.65625, "learning_rate": 0.00015987666981062503, "loss": 2.909, "step": 15922 }, { "epoch": 0.7454151793551408, "grad_norm": 1.9609375, "learning_rate": 0.00015987199445385153, "loss": 2.4022, "step": 15923 }, { "epoch": 0.7454619930949734, "grad_norm": 1.5390625, "learning_rate": 0.00015986731889306764, "loss": 2.7364, "step": 15924 }, { "epoch": 0.745508806834806, "grad_norm": 1.578125, "learning_rate": 0.00015986264312828935, "loss": 2.4793, "step": 15925 }, { "epoch": 0.7455556205746386, "grad_norm": 1.6015625, "learning_rate": 0.00015985796715953257, "loss": 2.771, "step": 15926 }, { "epoch": 0.7456024343144713, "grad_norm": 1.1484375, "learning_rate": 0.0001598532909868132, "loss": 2.3454, "step": 15927 }, { "epoch": 0.745649248054304, "grad_norm": 1.2734375, "learning_rate": 0.0001598486146101472, "loss": 2.6786, "step": 15928 }, { "epoch": 0.7456960617941366, "grad_norm": 1.640625, "learning_rate": 0.00015984393802955053, "loss": 2.7439, "step": 15929 }, { "epoch": 0.7457428755339692, "grad_norm": 1.1484375, "learning_rate": 0.00015983926124503906, "loss": 2.7138, "step": 15930 }, { "epoch": 0.7457896892738018, "grad_norm": 1.1640625, "learning_rate": 0.0001598345842566288, "loss": 2.6575, "step": 15931 }, { "epoch": 0.7458365030136345, "grad_norm": 1.296875, "learning_rate": 0.00015982990706433565, "loss": 2.7345, "step": 15932 }, { "epoch": 0.7458833167534672, "grad_norm": 1.640625, "learning_rate": 0.00015982522966817553, "loss": 2.8279, "step": 15933 }, { "epoch": 0.7459301304932998, "grad_norm": 0.9765625, "learning_rate": 0.00015982055206816443, "loss": 1.8066, "step": 15934 }, { "epoch": 0.7459769442331324, "grad_norm": 1.2578125, "learning_rate": 0.00015981587426431822, "loss": 2.4243, "step": 15935 }, { "epoch": 0.746023757972965, "grad_norm": 1.625, "learning_rate": 0.0001598111962566529, "loss": 2.6419, "step": 15936 }, { "epoch": 0.7460705717127977, "grad_norm": 3.203125, "learning_rate": 0.00015980651804518438, "loss": 2.281, "step": 15937 }, { "epoch": 0.7461173854526304, "grad_norm": 1.234375, "learning_rate": 0.0001598018396299286, "loss": 2.5989, "step": 15938 }, { "epoch": 0.746164199192463, "grad_norm": 1.4375, "learning_rate": 0.0001597971610109015, "loss": 2.9827, "step": 15939 }, { "epoch": 0.7462110129322956, "grad_norm": 1.703125, "learning_rate": 0.00015979248218811907, "loss": 2.4569, "step": 15940 }, { "epoch": 0.7462578266721283, "grad_norm": 2.28125, "learning_rate": 0.00015978780316159717, "loss": 2.6264, "step": 15941 }, { "epoch": 0.7463046404119609, "grad_norm": 2.015625, "learning_rate": 0.00015978312393135182, "loss": 2.6246, "step": 15942 }, { "epoch": 0.7463514541517936, "grad_norm": 1.3359375, "learning_rate": 0.00015977844449739892, "loss": 2.6559, "step": 15943 }, { "epoch": 0.7463982678916262, "grad_norm": 1.3125, "learning_rate": 0.0001597737648597544, "loss": 2.3146, "step": 15944 }, { "epoch": 0.7464450816314588, "grad_norm": 1.8203125, "learning_rate": 0.00015976908501843428, "loss": 2.7995, "step": 15945 }, { "epoch": 0.7464918953712915, "grad_norm": 1.4921875, "learning_rate": 0.00015976440497345444, "loss": 2.7139, "step": 15946 }, { "epoch": 0.7465387091111241, "grad_norm": 1.421875, "learning_rate": 0.00015975972472483083, "loss": 2.6848, "step": 15947 }, { "epoch": 0.7465855228509568, "grad_norm": 2.03125, "learning_rate": 0.00015975504427257942, "loss": 2.8011, "step": 15948 }, { "epoch": 0.7466323365907894, "grad_norm": 1.5859375, "learning_rate": 0.00015975036361671614, "loss": 3.0756, "step": 15949 }, { "epoch": 0.746679150330622, "grad_norm": 2.328125, "learning_rate": 0.00015974568275725694, "loss": 2.5939, "step": 15950 }, { "epoch": 0.7467259640704547, "grad_norm": 1.7734375, "learning_rate": 0.0001597410016942178, "loss": 2.1296, "step": 15951 }, { "epoch": 0.7467727778102873, "grad_norm": 1.546875, "learning_rate": 0.00015973632042761465, "loss": 2.6934, "step": 15952 }, { "epoch": 0.74681959155012, "grad_norm": 1.609375, "learning_rate": 0.00015973163895746347, "loss": 2.786, "step": 15953 }, { "epoch": 0.7468664052899526, "grad_norm": 1.3828125, "learning_rate": 0.0001597269572837801, "loss": 2.5127, "step": 15954 }, { "epoch": 0.7469132190297852, "grad_norm": 1.0546875, "learning_rate": 0.00015972227540658068, "loss": 2.5339, "step": 15955 }, { "epoch": 0.7469600327696179, "grad_norm": 1.4453125, "learning_rate": 0.000159717593325881, "loss": 2.7267, "step": 15956 }, { "epoch": 0.7470068465094505, "grad_norm": 1.3984375, "learning_rate": 0.00015971291104169706, "loss": 2.4294, "step": 15957 }, { "epoch": 0.7470536602492832, "grad_norm": 1.5859375, "learning_rate": 0.00015970822855404485, "loss": 2.5768, "step": 15958 }, { "epoch": 0.7471004739891158, "grad_norm": 2.046875, "learning_rate": 0.0001597035458629403, "loss": 2.4914, "step": 15959 }, { "epoch": 0.7471472877289485, "grad_norm": 1.4609375, "learning_rate": 0.00015969886296839935, "loss": 2.8249, "step": 15960 }, { "epoch": 0.7471941014687811, "grad_norm": 1.28125, "learning_rate": 0.000159694179870438, "loss": 1.9871, "step": 15961 }, { "epoch": 0.7472409152086137, "grad_norm": 1.28125, "learning_rate": 0.00015968949656907218, "loss": 2.3341, "step": 15962 }, { "epoch": 0.7472877289484464, "grad_norm": 1.3203125, "learning_rate": 0.00015968481306431783, "loss": 2.6468, "step": 15963 }, { "epoch": 0.747334542688279, "grad_norm": 1.484375, "learning_rate": 0.00015968012935619098, "loss": 2.5949, "step": 15964 }, { "epoch": 0.7473813564281117, "grad_norm": 1.421875, "learning_rate": 0.0001596754454447075, "loss": 2.329, "step": 15965 }, { "epoch": 0.7474281701679443, "grad_norm": 1.3828125, "learning_rate": 0.00015967076132988335, "loss": 2.508, "step": 15966 }, { "epoch": 0.7474749839077769, "grad_norm": 1.1953125, "learning_rate": 0.0001596660770117346, "loss": 2.0235, "step": 15967 }, { "epoch": 0.7475217976476096, "grad_norm": 1.5, "learning_rate": 0.00015966139249027708, "loss": 2.1569, "step": 15968 }, { "epoch": 0.7475686113874422, "grad_norm": 1.25, "learning_rate": 0.00015965670776552687, "loss": 2.8833, "step": 15969 }, { "epoch": 0.7476154251272749, "grad_norm": 1.5234375, "learning_rate": 0.00015965202283749986, "loss": 2.8722, "step": 15970 }, { "epoch": 0.7476622388671075, "grad_norm": 1.6171875, "learning_rate": 0.00015964733770621202, "loss": 2.8712, "step": 15971 }, { "epoch": 0.7477090526069401, "grad_norm": 2.046875, "learning_rate": 0.00015964265237167932, "loss": 2.8706, "step": 15972 }, { "epoch": 0.7477558663467728, "grad_norm": 1.2734375, "learning_rate": 0.00015963796683391774, "loss": 2.9923, "step": 15973 }, { "epoch": 0.7478026800866054, "grad_norm": 1.5234375, "learning_rate": 0.00015963328109294325, "loss": 2.3459, "step": 15974 }, { "epoch": 0.7478494938264381, "grad_norm": 1.1953125, "learning_rate": 0.00015962859514877177, "loss": 2.624, "step": 15975 }, { "epoch": 0.7478963075662707, "grad_norm": 1.515625, "learning_rate": 0.00015962390900141931, "loss": 2.6184, "step": 15976 }, { "epoch": 0.7479431213061033, "grad_norm": 1.3359375, "learning_rate": 0.00015961922265090184, "loss": 2.4423, "step": 15977 }, { "epoch": 0.747989935045936, "grad_norm": 2.265625, "learning_rate": 0.0001596145360972353, "loss": 2.442, "step": 15978 }, { "epoch": 0.7480367487857686, "grad_norm": 1.2265625, "learning_rate": 0.0001596098493404357, "loss": 2.4472, "step": 15979 }, { "epoch": 0.7480835625256013, "grad_norm": 1.2265625, "learning_rate": 0.00015960516238051894, "loss": 2.5025, "step": 15980 }, { "epoch": 0.7481303762654339, "grad_norm": 1.25, "learning_rate": 0.00015960047521750107, "loss": 2.5268, "step": 15981 }, { "epoch": 0.7481771900052665, "grad_norm": 1.796875, "learning_rate": 0.000159595787851398, "loss": 2.5653, "step": 15982 }, { "epoch": 0.7482240037450992, "grad_norm": 1.4140625, "learning_rate": 0.00015959110028222572, "loss": 2.6796, "step": 15983 }, { "epoch": 0.7482708174849318, "grad_norm": 4.0, "learning_rate": 0.00015958641251000025, "loss": 2.6038, "step": 15984 }, { "epoch": 0.7483176312247645, "grad_norm": 1.21875, "learning_rate": 0.00015958172453473752, "loss": 2.7568, "step": 15985 }, { "epoch": 0.7483644449645971, "grad_norm": 2.25, "learning_rate": 0.00015957703635645349, "loss": 2.6031, "step": 15986 }, { "epoch": 0.7484112587044297, "grad_norm": 1.2421875, "learning_rate": 0.00015957234797516417, "loss": 2.8085, "step": 15987 }, { "epoch": 0.7484580724442624, "grad_norm": 1.78125, "learning_rate": 0.0001595676593908855, "loss": 2.2735, "step": 15988 }, { "epoch": 0.748504886184095, "grad_norm": 1.0078125, "learning_rate": 0.00015956297060363346, "loss": 2.7866, "step": 15989 }, { "epoch": 0.7485516999239277, "grad_norm": 1.4921875, "learning_rate": 0.00015955828161342406, "loss": 2.1538, "step": 15990 }, { "epoch": 0.7485985136637603, "grad_norm": 1.2421875, "learning_rate": 0.00015955359242027326, "loss": 2.5869, "step": 15991 }, { "epoch": 0.7486453274035929, "grad_norm": 1.5234375, "learning_rate": 0.000159548903024197, "loss": 2.6273, "step": 15992 }, { "epoch": 0.7486921411434256, "grad_norm": 1.375, "learning_rate": 0.00015954421342521139, "loss": 2.4151, "step": 15993 }, { "epoch": 0.7487389548832583, "grad_norm": 1.8203125, "learning_rate": 0.0001595395236233322, "loss": 2.4245, "step": 15994 }, { "epoch": 0.7487857686230909, "grad_norm": 1.6875, "learning_rate": 0.00015953483361857558, "loss": 2.3363, "step": 15995 }, { "epoch": 0.7488325823629235, "grad_norm": 1.4140625, "learning_rate": 0.00015953014341095747, "loss": 2.2603, "step": 15996 }, { "epoch": 0.7488793961027561, "grad_norm": 1.8828125, "learning_rate": 0.00015952545300049382, "loss": 3.0418, "step": 15997 }, { "epoch": 0.7489262098425888, "grad_norm": 1.640625, "learning_rate": 0.00015952076238720062, "loss": 2.6633, "step": 15998 }, { "epoch": 0.7489730235824215, "grad_norm": 2.234375, "learning_rate": 0.00015951607157109384, "loss": 2.8472, "step": 15999 }, { "epoch": 0.7490198373222541, "grad_norm": 1.71875, "learning_rate": 0.00015951138055218954, "loss": 2.2721, "step": 16000 }, { "epoch": 0.7490666510620867, "grad_norm": 1.9296875, "learning_rate": 0.00015950668933050364, "loss": 2.3527, "step": 16001 }, { "epoch": 0.7491134648019193, "grad_norm": 1.3046875, "learning_rate": 0.0001595019979060521, "loss": 2.645, "step": 16002 }, { "epoch": 0.749160278541752, "grad_norm": 1.1328125, "learning_rate": 0.000159497306278851, "loss": 2.2968, "step": 16003 }, { "epoch": 0.7492070922815847, "grad_norm": 1.9140625, "learning_rate": 0.0001594926144489162, "loss": 2.3907, "step": 16004 }, { "epoch": 0.7492539060214173, "grad_norm": 2.328125, "learning_rate": 0.0001594879224162638, "loss": 2.4362, "step": 16005 }, { "epoch": 0.7493007197612499, "grad_norm": 1.6484375, "learning_rate": 0.00015948323018090974, "loss": 2.6863, "step": 16006 }, { "epoch": 0.7493475335010825, "grad_norm": 1.8125, "learning_rate": 0.00015947853774287, "loss": 2.616, "step": 16007 }, { "epoch": 0.7493943472409152, "grad_norm": 1.5546875, "learning_rate": 0.0001594738451021606, "loss": 2.613, "step": 16008 }, { "epoch": 0.7494411609807479, "grad_norm": 1.875, "learning_rate": 0.00015946915225879748, "loss": 2.6892, "step": 16009 }, { "epoch": 0.7494879747205805, "grad_norm": 1.53125, "learning_rate": 0.00015946445921279667, "loss": 2.4725, "step": 16010 }, { "epoch": 0.7495347884604131, "grad_norm": 1.5625, "learning_rate": 0.00015945976596417415, "loss": 2.5408, "step": 16011 }, { "epoch": 0.7495816022002457, "grad_norm": 1.625, "learning_rate": 0.00015945507251294595, "loss": 2.8869, "step": 16012 }, { "epoch": 0.7496284159400785, "grad_norm": 1.484375, "learning_rate": 0.000159450378859128, "loss": 2.6795, "step": 16013 }, { "epoch": 0.7496752296799111, "grad_norm": 1.609375, "learning_rate": 0.00015944568500273629, "loss": 2.5577, "step": 16014 }, { "epoch": 0.7497220434197437, "grad_norm": 1.7578125, "learning_rate": 0.00015944099094378688, "loss": 2.3738, "step": 16015 }, { "epoch": 0.7497688571595763, "grad_norm": 1.4609375, "learning_rate": 0.00015943629668229572, "loss": 2.2402, "step": 16016 }, { "epoch": 0.7498156708994089, "grad_norm": 1.1875, "learning_rate": 0.00015943160221827882, "loss": 2.4003, "step": 16017 }, { "epoch": 0.7498624846392417, "grad_norm": 2.796875, "learning_rate": 0.00015942690755175218, "loss": 1.9254, "step": 16018 }, { "epoch": 0.7499092983790743, "grad_norm": 1.5078125, "learning_rate": 0.0001594222126827318, "loss": 2.6172, "step": 16019 }, { "epoch": 0.7499561121189069, "grad_norm": 1.828125, "learning_rate": 0.00015941751761123363, "loss": 2.3949, "step": 16020 }, { "epoch": 0.7500029258587395, "grad_norm": 1.546875, "learning_rate": 0.00015941282233727369, "loss": 2.1318, "step": 16021 }, { "epoch": 0.7500497395985721, "grad_norm": 1.40625, "learning_rate": 0.00015940812686086805, "loss": 2.5411, "step": 16022 }, { "epoch": 0.7500965533384049, "grad_norm": 1.3046875, "learning_rate": 0.0001594034311820326, "loss": 2.5629, "step": 16023 }, { "epoch": 0.7501433670782375, "grad_norm": 1.4453125, "learning_rate": 0.0001593987353007834, "loss": 2.7122, "step": 16024 }, { "epoch": 0.7501901808180701, "grad_norm": 1.5546875, "learning_rate": 0.00015939403921713647, "loss": 2.6812, "step": 16025 }, { "epoch": 0.7502369945579027, "grad_norm": 1.609375, "learning_rate": 0.00015938934293110773, "loss": 2.1512, "step": 16026 }, { "epoch": 0.7502838082977353, "grad_norm": 1.46875, "learning_rate": 0.0001593846464427133, "loss": 2.2695, "step": 16027 }, { "epoch": 0.7503306220375681, "grad_norm": 1.609375, "learning_rate": 0.0001593799497519691, "loss": 2.6767, "step": 16028 }, { "epoch": 0.7503774357774007, "grad_norm": 1.390625, "learning_rate": 0.00015937525285889114, "loss": 2.6196, "step": 16029 }, { "epoch": 0.7504242495172333, "grad_norm": 1.4453125, "learning_rate": 0.00015937055576349543, "loss": 2.2704, "step": 16030 }, { "epoch": 0.7504710632570659, "grad_norm": 1.5390625, "learning_rate": 0.00015936585846579803, "loss": 2.3832, "step": 16031 }, { "epoch": 0.7505178769968985, "grad_norm": 1.2734375, "learning_rate": 0.00015936116096581485, "loss": 2.7053, "step": 16032 }, { "epoch": 0.7505646907367313, "grad_norm": 1.3203125, "learning_rate": 0.00015935646326356196, "loss": 2.4447, "step": 16033 }, { "epoch": 0.7506115044765639, "grad_norm": 1.4140625, "learning_rate": 0.00015935176535905536, "loss": 2.5997, "step": 16034 }, { "epoch": 0.7506583182163965, "grad_norm": 1.546875, "learning_rate": 0.00015934706725231106, "loss": 2.8899, "step": 16035 }, { "epoch": 0.7507051319562291, "grad_norm": 1.28125, "learning_rate": 0.00015934236894334504, "loss": 2.4135, "step": 16036 }, { "epoch": 0.7507519456960617, "grad_norm": 1.3359375, "learning_rate": 0.0001593376704321733, "loss": 2.7846, "step": 16037 }, { "epoch": 0.7507987594358945, "grad_norm": 1.640625, "learning_rate": 0.00015933297171881195, "loss": 2.5132, "step": 16038 }, { "epoch": 0.7508455731757271, "grad_norm": 1.3828125, "learning_rate": 0.00015932827280327688, "loss": 2.68, "step": 16039 }, { "epoch": 0.7508923869155597, "grad_norm": 2.203125, "learning_rate": 0.00015932357368558415, "loss": 2.777, "step": 16040 }, { "epoch": 0.7509392006553923, "grad_norm": 1.65625, "learning_rate": 0.0001593188743657498, "loss": 3.2586, "step": 16041 }, { "epoch": 0.7509860143952249, "grad_norm": 1.9453125, "learning_rate": 0.00015931417484378978, "loss": 2.6915, "step": 16042 }, { "epoch": 0.7510328281350577, "grad_norm": 1.1875, "learning_rate": 0.00015930947511972015, "loss": 2.5548, "step": 16043 }, { "epoch": 0.7510796418748903, "grad_norm": 1.3046875, "learning_rate": 0.00015930477519355692, "loss": 2.5549, "step": 16044 }, { "epoch": 0.7511264556147229, "grad_norm": 1.2421875, "learning_rate": 0.0001593000750653161, "loss": 2.2082, "step": 16045 }, { "epoch": 0.7511732693545555, "grad_norm": 1.28125, "learning_rate": 0.0001592953747350137, "loss": 2.6254, "step": 16046 }, { "epoch": 0.7512200830943883, "grad_norm": 1.109375, "learning_rate": 0.00015929067420266573, "loss": 2.8754, "step": 16047 }, { "epoch": 0.7512668968342209, "grad_norm": 1.2265625, "learning_rate": 0.0001592859734682882, "loss": 2.5651, "step": 16048 }, { "epoch": 0.7513137105740535, "grad_norm": 1.4453125, "learning_rate": 0.00015928127253189716, "loss": 2.6575, "step": 16049 }, { "epoch": 0.7513605243138861, "grad_norm": 1.328125, "learning_rate": 0.00015927657139350862, "loss": 2.5729, "step": 16050 }, { "epoch": 0.7514073380537187, "grad_norm": 1.3046875, "learning_rate": 0.00015927187005313856, "loss": 2.735, "step": 16051 }, { "epoch": 0.7514541517935515, "grad_norm": 1.2734375, "learning_rate": 0.00015926716851080307, "loss": 2.1274, "step": 16052 }, { "epoch": 0.7515009655333841, "grad_norm": 1.5546875, "learning_rate": 0.0001592624667665181, "loss": 2.5635, "step": 16053 }, { "epoch": 0.7515477792732167, "grad_norm": 1.6875, "learning_rate": 0.0001592577648202997, "loss": 2.9839, "step": 16054 }, { "epoch": 0.7515945930130493, "grad_norm": 2.140625, "learning_rate": 0.0001592530626721639, "loss": 2.6199, "step": 16055 }, { "epoch": 0.7516414067528819, "grad_norm": 1.46875, "learning_rate": 0.0001592483603221267, "loss": 2.5523, "step": 16056 }, { "epoch": 0.7516882204927147, "grad_norm": 1.921875, "learning_rate": 0.00015924365777020416, "loss": 2.2738, "step": 16057 }, { "epoch": 0.7517350342325473, "grad_norm": 1.1015625, "learning_rate": 0.00015923895501641227, "loss": 2.4035, "step": 16058 }, { "epoch": 0.7517818479723799, "grad_norm": 1.1796875, "learning_rate": 0.00015923425206076702, "loss": 2.6566, "step": 16059 }, { "epoch": 0.7518286617122125, "grad_norm": 1.1796875, "learning_rate": 0.00015922954890328455, "loss": 2.5677, "step": 16060 }, { "epoch": 0.7518754754520451, "grad_norm": 1.296875, "learning_rate": 0.00015922484554398077, "loss": 2.5703, "step": 16061 }, { "epoch": 0.7519222891918779, "grad_norm": 1.5625, "learning_rate": 0.00015922014198287179, "loss": 2.5203, "step": 16062 }, { "epoch": 0.7519691029317105, "grad_norm": 1.84375, "learning_rate": 0.0001592154382199736, "loss": 2.7114, "step": 16063 }, { "epoch": 0.7520159166715431, "grad_norm": 1.3515625, "learning_rate": 0.0001592107342553022, "loss": 2.6993, "step": 16064 }, { "epoch": 0.7520627304113757, "grad_norm": 1.5, "learning_rate": 0.00015920603008887362, "loss": 2.825, "step": 16065 }, { "epoch": 0.7521095441512083, "grad_norm": 1.375, "learning_rate": 0.00015920132572070397, "loss": 2.4165, "step": 16066 }, { "epoch": 0.7521563578910411, "grad_norm": 1.4296875, "learning_rate": 0.00015919662115080919, "loss": 2.5865, "step": 16067 }, { "epoch": 0.7522031716308737, "grad_norm": 2.078125, "learning_rate": 0.00015919191637920536, "loss": 2.6982, "step": 16068 }, { "epoch": 0.7522499853707063, "grad_norm": 1.2890625, "learning_rate": 0.00015918721140590852, "loss": 2.3287, "step": 16069 }, { "epoch": 0.7522967991105389, "grad_norm": 1.4140625, "learning_rate": 0.00015918250623093465, "loss": 2.4375, "step": 16070 }, { "epoch": 0.7523436128503715, "grad_norm": 1.3359375, "learning_rate": 0.0001591778008542998, "loss": 2.7829, "step": 16071 }, { "epoch": 0.7523904265902043, "grad_norm": 1.734375, "learning_rate": 0.00015917309527602, "loss": 2.8216, "step": 16072 }, { "epoch": 0.7524372403300369, "grad_norm": 1.546875, "learning_rate": 0.00015916838949611134, "loss": 2.3837, "step": 16073 }, { "epoch": 0.7524840540698695, "grad_norm": 1.34375, "learning_rate": 0.00015916368351458982, "loss": 2.5483, "step": 16074 }, { "epoch": 0.7525308678097021, "grad_norm": 1.2890625, "learning_rate": 0.00015915897733147146, "loss": 2.5305, "step": 16075 }, { "epoch": 0.7525776815495347, "grad_norm": 1.15625, "learning_rate": 0.00015915427094677226, "loss": 2.4409, "step": 16076 }, { "epoch": 0.7526244952893675, "grad_norm": 1.8125, "learning_rate": 0.0001591495643605083, "loss": 2.5643, "step": 16077 }, { "epoch": 0.7526713090292001, "grad_norm": 1.359375, "learning_rate": 0.00015914485757269567, "loss": 2.5244, "step": 16078 }, { "epoch": 0.7527181227690327, "grad_norm": 1.15625, "learning_rate": 0.00015914015058335037, "loss": 2.2383, "step": 16079 }, { "epoch": 0.7527649365088653, "grad_norm": 1.21875, "learning_rate": 0.00015913544339248842, "loss": 2.4708, "step": 16080 }, { "epoch": 0.752811750248698, "grad_norm": 1.5390625, "learning_rate": 0.0001591307360001258, "loss": 2.4332, "step": 16081 }, { "epoch": 0.7528585639885307, "grad_norm": 1.296875, "learning_rate": 0.00015912602840627865, "loss": 2.2867, "step": 16082 }, { "epoch": 0.7529053777283633, "grad_norm": 1.5234375, "learning_rate": 0.00015912132061096296, "loss": 2.5324, "step": 16083 }, { "epoch": 0.7529521914681959, "grad_norm": 1.6328125, "learning_rate": 0.00015911661261419483, "loss": 2.7697, "step": 16084 }, { "epoch": 0.7529990052080285, "grad_norm": 2.109375, "learning_rate": 0.00015911190441599023, "loss": 2.1777, "step": 16085 }, { "epoch": 0.7530458189478612, "grad_norm": 1.3125, "learning_rate": 0.00015910719601636524, "loss": 2.3135, "step": 16086 }, { "epoch": 0.7530926326876939, "grad_norm": 1.609375, "learning_rate": 0.0001591024874153359, "loss": 2.4316, "step": 16087 }, { "epoch": 0.7531394464275265, "grad_norm": 1.359375, "learning_rate": 0.00015909777861291822, "loss": 2.6494, "step": 16088 }, { "epoch": 0.7531862601673591, "grad_norm": 1.453125, "learning_rate": 0.00015909306960912832, "loss": 2.8084, "step": 16089 }, { "epoch": 0.7532330739071917, "grad_norm": 1.421875, "learning_rate": 0.0001590883604039822, "loss": 2.5794, "step": 16090 }, { "epoch": 0.7532798876470244, "grad_norm": 1.1953125, "learning_rate": 0.00015908365099749587, "loss": 2.3328, "step": 16091 }, { "epoch": 0.7533267013868571, "grad_norm": 0.703125, "learning_rate": 0.0001590789413896854, "loss": 2.8248, "step": 16092 }, { "epoch": 0.7533735151266897, "grad_norm": 1.4140625, "learning_rate": 0.0001590742315805669, "loss": 2.7978, "step": 16093 }, { "epoch": 0.7534203288665223, "grad_norm": 1.3515625, "learning_rate": 0.00015906952157015636, "loss": 2.341, "step": 16094 }, { "epoch": 0.753467142606355, "grad_norm": 1.34375, "learning_rate": 0.00015906481135846985, "loss": 2.633, "step": 16095 }, { "epoch": 0.7535139563461876, "grad_norm": 2.734375, "learning_rate": 0.00015906010094552338, "loss": 2.4673, "step": 16096 }, { "epoch": 0.7535607700860203, "grad_norm": 1.3125, "learning_rate": 0.00015905539033133305, "loss": 2.579, "step": 16097 }, { "epoch": 0.7536075838258529, "grad_norm": 1.265625, "learning_rate": 0.0001590506795159149, "loss": 2.7279, "step": 16098 }, { "epoch": 0.7536543975656855, "grad_norm": 1.421875, "learning_rate": 0.00015904596849928494, "loss": 2.8983, "step": 16099 }, { "epoch": 0.7537012113055181, "grad_norm": 1.46875, "learning_rate": 0.00015904125728145928, "loss": 2.686, "step": 16100 }, { "epoch": 0.7537480250453508, "grad_norm": 1.359375, "learning_rate": 0.00015903654586245397, "loss": 2.8714, "step": 16101 }, { "epoch": 0.7537948387851835, "grad_norm": 1.265625, "learning_rate": 0.000159031834242285, "loss": 2.6445, "step": 16102 }, { "epoch": 0.7538416525250161, "grad_norm": 1.3671875, "learning_rate": 0.00015902712242096847, "loss": 2.8051, "step": 16103 }, { "epoch": 0.7538884662648487, "grad_norm": 1.3515625, "learning_rate": 0.00015902241039852043, "loss": 2.6204, "step": 16104 }, { "epoch": 0.7539352800046814, "grad_norm": 1.359375, "learning_rate": 0.000159017698174957, "loss": 2.8712, "step": 16105 }, { "epoch": 0.753982093744514, "grad_norm": 1.8515625, "learning_rate": 0.0001590129857502941, "loss": 2.6463, "step": 16106 }, { "epoch": 0.7540289074843467, "grad_norm": 1.5078125, "learning_rate": 0.00015900827312454786, "loss": 2.7117, "step": 16107 }, { "epoch": 0.7540757212241793, "grad_norm": 1.578125, "learning_rate": 0.0001590035602977344, "loss": 2.4192, "step": 16108 }, { "epoch": 0.7541225349640119, "grad_norm": 1.53125, "learning_rate": 0.0001589988472698697, "loss": 2.7325, "step": 16109 }, { "epoch": 0.7541693487038446, "grad_norm": 1.34375, "learning_rate": 0.00015899413404096981, "loss": 2.6435, "step": 16110 }, { "epoch": 0.7542161624436772, "grad_norm": 1.359375, "learning_rate": 0.00015898942061105085, "loss": 2.2043, "step": 16111 }, { "epoch": 0.7542629761835099, "grad_norm": 1.4296875, "learning_rate": 0.00015898470698012883, "loss": 2.7496, "step": 16112 }, { "epoch": 0.7543097899233425, "grad_norm": 1.640625, "learning_rate": 0.00015897999314821984, "loss": 2.5574, "step": 16113 }, { "epoch": 0.7543566036631751, "grad_norm": 1.4609375, "learning_rate": 0.00015897527911533992, "loss": 2.8269, "step": 16114 }, { "epoch": 0.7544034174030078, "grad_norm": 1.2890625, "learning_rate": 0.00015897056488150516, "loss": 2.7044, "step": 16115 }, { "epoch": 0.7544502311428404, "grad_norm": 1.8203125, "learning_rate": 0.00015896585044673157, "loss": 2.5647, "step": 16116 }, { "epoch": 0.7544970448826731, "grad_norm": 1.9609375, "learning_rate": 0.00015896113581103532, "loss": 2.3355, "step": 16117 }, { "epoch": 0.7545438586225057, "grad_norm": 1.2890625, "learning_rate": 0.00015895642097443234, "loss": 2.4051, "step": 16118 }, { "epoch": 0.7545906723623383, "grad_norm": 1.1171875, "learning_rate": 0.0001589517059369388, "loss": 2.1615, "step": 16119 }, { "epoch": 0.754637486102171, "grad_norm": 1.4296875, "learning_rate": 0.00015894699069857074, "loss": 2.6037, "step": 16120 }, { "epoch": 0.7546842998420036, "grad_norm": 1.15625, "learning_rate": 0.00015894227525934418, "loss": 2.5111, "step": 16121 }, { "epoch": 0.7547311135818363, "grad_norm": 1.2734375, "learning_rate": 0.00015893755961927526, "loss": 2.5818, "step": 16122 }, { "epoch": 0.7547779273216689, "grad_norm": 1.1953125, "learning_rate": 0.00015893284377838001, "loss": 3.0541, "step": 16123 }, { "epoch": 0.7548247410615015, "grad_norm": 1.5859375, "learning_rate": 0.00015892812773667448, "loss": 1.9611, "step": 16124 }, { "epoch": 0.7548715548013342, "grad_norm": 1.3359375, "learning_rate": 0.00015892341149417475, "loss": 2.5003, "step": 16125 }, { "epoch": 0.7549183685411668, "grad_norm": 2.109375, "learning_rate": 0.0001589186950508969, "loss": 2.6503, "step": 16126 }, { "epoch": 0.7549651822809995, "grad_norm": 1.4453125, "learning_rate": 0.00015891397840685705, "loss": 2.9836, "step": 16127 }, { "epoch": 0.7550119960208321, "grad_norm": 1.390625, "learning_rate": 0.00015890926156207123, "loss": 2.6181, "step": 16128 }, { "epoch": 0.7550588097606648, "grad_norm": 1.5, "learning_rate": 0.00015890454451655546, "loss": 2.5192, "step": 16129 }, { "epoch": 0.7551056235004974, "grad_norm": 1.375, "learning_rate": 0.00015889982727032586, "loss": 2.243, "step": 16130 }, { "epoch": 0.75515243724033, "grad_norm": 2.484375, "learning_rate": 0.00015889510982339852, "loss": 2.151, "step": 16131 }, { "epoch": 0.7551992509801627, "grad_norm": 1.0703125, "learning_rate": 0.0001588903921757895, "loss": 2.7572, "step": 16132 }, { "epoch": 0.7552460647199953, "grad_norm": 1.4921875, "learning_rate": 0.00015888567432751486, "loss": 2.794, "step": 16133 }, { "epoch": 0.755292878459828, "grad_norm": 1.359375, "learning_rate": 0.00015888095627859068, "loss": 2.0712, "step": 16134 }, { "epoch": 0.7553396921996606, "grad_norm": 1.078125, "learning_rate": 0.00015887623802903308, "loss": 2.2242, "step": 16135 }, { "epoch": 0.7553865059394932, "grad_norm": 1.8984375, "learning_rate": 0.00015887151957885806, "loss": 2.84, "step": 16136 }, { "epoch": 0.7554333196793259, "grad_norm": 1.34375, "learning_rate": 0.00015886680092808177, "loss": 2.901, "step": 16137 }, { "epoch": 0.7554801334191585, "grad_norm": 1.515625, "learning_rate": 0.00015886208207672027, "loss": 2.743, "step": 16138 }, { "epoch": 0.7555269471589912, "grad_norm": 1.3125, "learning_rate": 0.0001588573630247896, "loss": 2.441, "step": 16139 }, { "epoch": 0.7555737608988238, "grad_norm": 1.7890625, "learning_rate": 0.00015885264377230585, "loss": 2.4926, "step": 16140 }, { "epoch": 0.7556205746386564, "grad_norm": 1.359375, "learning_rate": 0.00015884792431928517, "loss": 2.5587, "step": 16141 }, { "epoch": 0.7556673883784891, "grad_norm": 1.28125, "learning_rate": 0.00015884320466574356, "loss": 3.7949, "step": 16142 }, { "epoch": 0.7557142021183217, "grad_norm": 1.25, "learning_rate": 0.00015883848481169715, "loss": 2.4941, "step": 16143 }, { "epoch": 0.7557610158581544, "grad_norm": 1.2265625, "learning_rate": 0.00015883376475716196, "loss": 2.4142, "step": 16144 }, { "epoch": 0.755807829597987, "grad_norm": 1.71875, "learning_rate": 0.00015882904450215414, "loss": 2.8189, "step": 16145 }, { "epoch": 0.7558546433378196, "grad_norm": 1.6796875, "learning_rate": 0.0001588243240466898, "loss": 2.9235, "step": 16146 }, { "epoch": 0.7559014570776523, "grad_norm": 1.3046875, "learning_rate": 0.00015881960339078489, "loss": 2.7132, "step": 16147 }, { "epoch": 0.755948270817485, "grad_norm": 1.3671875, "learning_rate": 0.00015881488253445563, "loss": 2.6277, "step": 16148 }, { "epoch": 0.7559950845573176, "grad_norm": 2.0, "learning_rate": 0.00015881016147771808, "loss": 2.7629, "step": 16149 }, { "epoch": 0.7560418982971502, "grad_norm": 1.734375, "learning_rate": 0.00015880544022058826, "loss": 2.7246, "step": 16150 }, { "epoch": 0.7560887120369828, "grad_norm": 1.46875, "learning_rate": 0.00015880071876308232, "loss": 2.619, "step": 16151 }, { "epoch": 0.7561355257768155, "grad_norm": 1.4765625, "learning_rate": 0.00015879599710521633, "loss": 2.5221, "step": 16152 }, { "epoch": 0.7561823395166481, "grad_norm": 1.5, "learning_rate": 0.00015879127524700637, "loss": 2.5936, "step": 16153 }, { "epoch": 0.7562291532564808, "grad_norm": 1.2109375, "learning_rate": 0.00015878655318846856, "loss": 2.4161, "step": 16154 }, { "epoch": 0.7562759669963134, "grad_norm": 1.6796875, "learning_rate": 0.00015878183092961896, "loss": 2.6393, "step": 16155 }, { "epoch": 0.756322780736146, "grad_norm": 1.2890625, "learning_rate": 0.00015877710847047368, "loss": 1.919, "step": 16156 }, { "epoch": 0.7563695944759787, "grad_norm": 1.5625, "learning_rate": 0.00015877238581104878, "loss": 2.1932, "step": 16157 }, { "epoch": 0.7564164082158114, "grad_norm": 1.3515625, "learning_rate": 0.00015876766295136035, "loss": 2.3882, "step": 16158 }, { "epoch": 0.756463221955644, "grad_norm": 1.21875, "learning_rate": 0.0001587629398914245, "loss": 2.4547, "step": 16159 }, { "epoch": 0.7565100356954766, "grad_norm": 1.671875, "learning_rate": 0.0001587582166312574, "loss": 2.3584, "step": 16160 }, { "epoch": 0.7565568494353092, "grad_norm": 2.046875, "learning_rate": 0.000158753493170875, "loss": 2.8536, "step": 16161 }, { "epoch": 0.7566036631751419, "grad_norm": 1.7109375, "learning_rate": 0.00015874876951029352, "loss": 2.6226, "step": 16162 }, { "epoch": 0.7566504769149746, "grad_norm": 1.328125, "learning_rate": 0.00015874404564952897, "loss": 2.2451, "step": 16163 }, { "epoch": 0.7566972906548072, "grad_norm": 1.296875, "learning_rate": 0.0001587393215885975, "loss": 2.581, "step": 16164 }, { "epoch": 0.7567441043946398, "grad_norm": 1.359375, "learning_rate": 0.0001587345973275152, "loss": 2.5533, "step": 16165 }, { "epoch": 0.7567909181344724, "grad_norm": 1.234375, "learning_rate": 0.00015872987286629813, "loss": 2.5711, "step": 16166 }, { "epoch": 0.7568377318743051, "grad_norm": 1.8203125, "learning_rate": 0.0001587251482049624, "loss": 2.4094, "step": 16167 }, { "epoch": 0.7568845456141378, "grad_norm": 1.5625, "learning_rate": 0.00015872042334352413, "loss": 2.7769, "step": 16168 }, { "epoch": 0.7569313593539704, "grad_norm": 1.8203125, "learning_rate": 0.00015871569828199942, "loss": 2.1592, "step": 16169 }, { "epoch": 0.756978173093803, "grad_norm": 1.5078125, "learning_rate": 0.00015871097302040438, "loss": 2.382, "step": 16170 }, { "epoch": 0.7570249868336357, "grad_norm": 1.3515625, "learning_rate": 0.00015870624755875507, "loss": 2.5212, "step": 16171 }, { "epoch": 0.7570718005734683, "grad_norm": 1.3203125, "learning_rate": 0.0001587015218970676, "loss": 2.5426, "step": 16172 }, { "epoch": 0.757118614313301, "grad_norm": 2.78125, "learning_rate": 0.00015869679603535813, "loss": 2.6542, "step": 16173 }, { "epoch": 0.7571654280531336, "grad_norm": 1.4609375, "learning_rate": 0.00015869206997364273, "loss": 1.7894, "step": 16174 }, { "epoch": 0.7572122417929662, "grad_norm": 1.484375, "learning_rate": 0.00015868734371193743, "loss": 1.9311, "step": 16175 }, { "epoch": 0.7572590555327989, "grad_norm": 1.1640625, "learning_rate": 0.00015868261725025844, "loss": 2.5392, "step": 16176 }, { "epoch": 0.7573058692726315, "grad_norm": 1.71875, "learning_rate": 0.0001586778905886218, "loss": 2.3813, "step": 16177 }, { "epoch": 0.7573526830124642, "grad_norm": 1.4921875, "learning_rate": 0.00015867316372704369, "loss": 2.6812, "step": 16178 }, { "epoch": 0.7573994967522968, "grad_norm": 1.484375, "learning_rate": 0.00015866843666554012, "loss": 2.2128, "step": 16179 }, { "epoch": 0.7574463104921294, "grad_norm": 1.9140625, "learning_rate": 0.00015866370940412723, "loss": 2.923, "step": 16180 }, { "epoch": 0.7574931242319621, "grad_norm": 1.8203125, "learning_rate": 0.00015865898194282116, "loss": 2.8102, "step": 16181 }, { "epoch": 0.7575399379717948, "grad_norm": 1.25, "learning_rate": 0.00015865425428163803, "loss": 2.5994, "step": 16182 }, { "epoch": 0.7575867517116274, "grad_norm": 1.9921875, "learning_rate": 0.00015864952642059388, "loss": 3.089, "step": 16183 }, { "epoch": 0.75763356545146, "grad_norm": 1.5, "learning_rate": 0.0001586447983597049, "loss": 2.562, "step": 16184 }, { "epoch": 0.7576803791912926, "grad_norm": 1.171875, "learning_rate": 0.00015864007009898712, "loss": 2.2016, "step": 16185 }, { "epoch": 0.7577271929311253, "grad_norm": 1.6796875, "learning_rate": 0.00015863534163845672, "loss": 3.1143, "step": 16186 }, { "epoch": 0.757774006670958, "grad_norm": 1.625, "learning_rate": 0.00015863061297812973, "loss": 2.5642, "step": 16187 }, { "epoch": 0.7578208204107906, "grad_norm": 1.234375, "learning_rate": 0.00015862588411802238, "loss": 2.4753, "step": 16188 }, { "epoch": 0.7578676341506232, "grad_norm": 1.609375, "learning_rate": 0.00015862115505815068, "loss": 2.7079, "step": 16189 }, { "epoch": 0.7579144478904558, "grad_norm": 1.59375, "learning_rate": 0.0001586164257985308, "loss": 2.6646, "step": 16190 }, { "epoch": 0.7579612616302885, "grad_norm": 1.15625, "learning_rate": 0.00015861169633917883, "loss": 2.3581, "step": 16191 }, { "epoch": 0.7580080753701212, "grad_norm": 1.3515625, "learning_rate": 0.00015860696668011087, "loss": 2.3188, "step": 16192 }, { "epoch": 0.7580548891099538, "grad_norm": 1.875, "learning_rate": 0.0001586022368213431, "loss": 2.8057, "step": 16193 }, { "epoch": 0.7581017028497864, "grad_norm": 1.40625, "learning_rate": 0.0001585975067628916, "loss": 2.5407, "step": 16194 }, { "epoch": 0.758148516589619, "grad_norm": 1.2734375, "learning_rate": 0.00015859277650477242, "loss": 2.5825, "step": 16195 }, { "epoch": 0.7581953303294517, "grad_norm": 1.125, "learning_rate": 0.0001585880460470018, "loss": 2.4889, "step": 16196 }, { "epoch": 0.7582421440692844, "grad_norm": 1.2734375, "learning_rate": 0.00015858331538959577, "loss": 2.6224, "step": 16197 }, { "epoch": 0.758288957809117, "grad_norm": 1.9140625, "learning_rate": 0.00015857858453257047, "loss": 3.0006, "step": 16198 }, { "epoch": 0.7583357715489496, "grad_norm": 1.4140625, "learning_rate": 0.00015857385347594206, "loss": 2.4737, "step": 16199 }, { "epoch": 0.7583825852887822, "grad_norm": 1.359375, "learning_rate": 0.00015856912221972661, "loss": 2.5434, "step": 16200 }, { "epoch": 0.758429399028615, "grad_norm": 1.8515625, "learning_rate": 0.00015856439076394028, "loss": 2.6752, "step": 16201 }, { "epoch": 0.7584762127684476, "grad_norm": 1.3046875, "learning_rate": 0.00015855965910859914, "loss": 2.4526, "step": 16202 }, { "epoch": 0.7585230265082802, "grad_norm": 1.265625, "learning_rate": 0.0001585549272537194, "loss": 1.8852, "step": 16203 }, { "epoch": 0.7585698402481128, "grad_norm": 1.9765625, "learning_rate": 0.00015855019519931708, "loss": 2.9373, "step": 16204 }, { "epoch": 0.7586166539879454, "grad_norm": 1.4140625, "learning_rate": 0.0001585454629454084, "loss": 2.4306, "step": 16205 }, { "epoch": 0.7586634677277782, "grad_norm": 2.078125, "learning_rate": 0.00015854073049200942, "loss": 3.1048, "step": 16206 }, { "epoch": 0.7587102814676108, "grad_norm": 1.71875, "learning_rate": 0.00015853599783913626, "loss": 2.5813, "step": 16207 }, { "epoch": 0.7587570952074434, "grad_norm": 1.46875, "learning_rate": 0.0001585312649868051, "loss": 2.4972, "step": 16208 }, { "epoch": 0.758803908947276, "grad_norm": 1.21875, "learning_rate": 0.00015852653193503203, "loss": 2.5846, "step": 16209 }, { "epoch": 0.7588507226871086, "grad_norm": 1.265625, "learning_rate": 0.00015852179868383316, "loss": 2.6163, "step": 16210 }, { "epoch": 0.7588975364269414, "grad_norm": 2.796875, "learning_rate": 0.00015851706523322467, "loss": 2.6729, "step": 16211 }, { "epoch": 0.758944350166774, "grad_norm": 2.6875, "learning_rate": 0.00015851233158322266, "loss": 1.8453, "step": 16212 }, { "epoch": 0.7589911639066066, "grad_norm": 1.421875, "learning_rate": 0.00015850759773384328, "loss": 2.2827, "step": 16213 }, { "epoch": 0.7590379776464392, "grad_norm": 1.265625, "learning_rate": 0.0001585028636851026, "loss": 2.4573, "step": 16214 }, { "epoch": 0.7590847913862718, "grad_norm": 1.625, "learning_rate": 0.00015849812943701682, "loss": 2.6149, "step": 16215 }, { "epoch": 0.7591316051261046, "grad_norm": 1.2734375, "learning_rate": 0.00015849339498960206, "loss": 3.6636, "step": 16216 }, { "epoch": 0.7591784188659372, "grad_norm": 1.2734375, "learning_rate": 0.0001584886603428744, "loss": 2.5535, "step": 16217 }, { "epoch": 0.7592252326057698, "grad_norm": 1.4375, "learning_rate": 0.00015848392549685004, "loss": 2.5948, "step": 16218 }, { "epoch": 0.7592720463456024, "grad_norm": 1.3046875, "learning_rate": 0.00015847919045154508, "loss": 2.5848, "step": 16219 }, { "epoch": 0.759318860085435, "grad_norm": 1.3984375, "learning_rate": 0.00015847445520697563, "loss": 2.6244, "step": 16220 }, { "epoch": 0.7593656738252678, "grad_norm": 1.2890625, "learning_rate": 0.00015846971976315788, "loss": 2.3942, "step": 16221 }, { "epoch": 0.7594124875651004, "grad_norm": 1.0703125, "learning_rate": 0.00015846498412010795, "loss": 1.9872, "step": 16222 }, { "epoch": 0.759459301304933, "grad_norm": 2.15625, "learning_rate": 0.00015846024827784197, "loss": 2.9039, "step": 16223 }, { "epoch": 0.7595061150447656, "grad_norm": 1.6484375, "learning_rate": 0.00015845551223637604, "loss": 3.2804, "step": 16224 }, { "epoch": 0.7595529287845982, "grad_norm": 1.2890625, "learning_rate": 0.00015845077599572636, "loss": 2.6012, "step": 16225 }, { "epoch": 0.759599742524431, "grad_norm": 1.265625, "learning_rate": 0.00015844603955590902, "loss": 2.4223, "step": 16226 }, { "epoch": 0.7596465562642636, "grad_norm": 1.515625, "learning_rate": 0.00015844130291694017, "loss": 2.1588, "step": 16227 }, { "epoch": 0.7596933700040962, "grad_norm": 1.3359375, "learning_rate": 0.00015843656607883596, "loss": 2.3234, "step": 16228 }, { "epoch": 0.7597401837439288, "grad_norm": 2.59375, "learning_rate": 0.00015843182904161257, "loss": 2.6134, "step": 16229 }, { "epoch": 0.7597869974837614, "grad_norm": 1.2734375, "learning_rate": 0.00015842709180528605, "loss": 2.4695, "step": 16230 }, { "epoch": 0.7598338112235942, "grad_norm": 2.0, "learning_rate": 0.00015842235436987263, "loss": 2.002, "step": 16231 }, { "epoch": 0.7598806249634268, "grad_norm": 1.4453125, "learning_rate": 0.0001584176167353884, "loss": 2.2701, "step": 16232 }, { "epoch": 0.7599274387032594, "grad_norm": 2.5625, "learning_rate": 0.00015841287890184952, "loss": 2.6518, "step": 16233 }, { "epoch": 0.759974252443092, "grad_norm": 1.140625, "learning_rate": 0.00015840814086927213, "loss": 2.5349, "step": 16234 }, { "epoch": 0.7600210661829246, "grad_norm": 2.0, "learning_rate": 0.00015840340263767238, "loss": 2.492, "step": 16235 }, { "epoch": 0.7600678799227574, "grad_norm": 1.4140625, "learning_rate": 0.0001583986642070664, "loss": 2.7049, "step": 16236 }, { "epoch": 0.76011469366259, "grad_norm": 1.125, "learning_rate": 0.00015839392557747033, "loss": 2.765, "step": 16237 }, { "epoch": 0.7601615074024226, "grad_norm": 1.546875, "learning_rate": 0.00015838918674890038, "loss": 2.5879, "step": 16238 }, { "epoch": 0.7602083211422552, "grad_norm": 1.3515625, "learning_rate": 0.0001583844477213726, "loss": 2.1215, "step": 16239 }, { "epoch": 0.7602551348820878, "grad_norm": 1.4296875, "learning_rate": 0.00015837970849490326, "loss": 2.7178, "step": 16240 }, { "epoch": 0.7603019486219206, "grad_norm": 1.5234375, "learning_rate": 0.0001583749690695084, "loss": 2.4664, "step": 16241 }, { "epoch": 0.7603487623617532, "grad_norm": 1.4921875, "learning_rate": 0.0001583702294452042, "loss": 2.4683, "step": 16242 }, { "epoch": 0.7603955761015858, "grad_norm": 1.2109375, "learning_rate": 0.0001583654896220068, "loss": 2.2651, "step": 16243 }, { "epoch": 0.7604423898414184, "grad_norm": 1.234375, "learning_rate": 0.00015836074959993236, "loss": 2.6456, "step": 16244 }, { "epoch": 0.760489203581251, "grad_norm": 1.0, "learning_rate": 0.00015835600937899708, "loss": 1.9479, "step": 16245 }, { "epoch": 0.7605360173210838, "grad_norm": 1.734375, "learning_rate": 0.00015835126895921706, "loss": 2.8973, "step": 16246 }, { "epoch": 0.7605828310609164, "grad_norm": 1.2578125, "learning_rate": 0.00015834652834060848, "loss": 2.5585, "step": 16247 }, { "epoch": 0.760629644800749, "grad_norm": 1.640625, "learning_rate": 0.00015834178752318744, "loss": 2.7684, "step": 16248 }, { "epoch": 0.7606764585405816, "grad_norm": 1.2890625, "learning_rate": 0.00015833704650697014, "loss": 2.3923, "step": 16249 }, { "epoch": 0.7607232722804143, "grad_norm": 1.4765625, "learning_rate": 0.00015833230529197273, "loss": 2.4588, "step": 16250 }, { "epoch": 0.760770086020247, "grad_norm": 1.671875, "learning_rate": 0.0001583275638782114, "loss": 2.4138, "step": 16251 }, { "epoch": 0.7608168997600796, "grad_norm": 2.078125, "learning_rate": 0.00015832282226570224, "loss": 2.2148, "step": 16252 }, { "epoch": 0.7608637134999122, "grad_norm": 1.7578125, "learning_rate": 0.0001583180804544614, "loss": 2.4836, "step": 16253 }, { "epoch": 0.7609105272397448, "grad_norm": 1.6796875, "learning_rate": 0.0001583133384445051, "loss": 2.5296, "step": 16254 }, { "epoch": 0.7609573409795775, "grad_norm": 1.734375, "learning_rate": 0.00015830859623584947, "loss": 3.0286, "step": 16255 }, { "epoch": 0.7610041547194102, "grad_norm": 1.6640625, "learning_rate": 0.0001583038538285107, "loss": 2.6927, "step": 16256 }, { "epoch": 0.7610509684592428, "grad_norm": 1.25, "learning_rate": 0.00015829911122250488, "loss": 2.653, "step": 16257 }, { "epoch": 0.7610977821990754, "grad_norm": 1.640625, "learning_rate": 0.00015829436841784822, "loss": 2.596, "step": 16258 }, { "epoch": 0.761144595938908, "grad_norm": 1.46875, "learning_rate": 0.00015828962541455686, "loss": 2.3287, "step": 16259 }, { "epoch": 0.7611914096787407, "grad_norm": 2.109375, "learning_rate": 0.000158284882212647, "loss": 2.5748, "step": 16260 }, { "epoch": 0.7612382234185734, "grad_norm": 1.4921875, "learning_rate": 0.00015828013881213473, "loss": 2.7548, "step": 16261 }, { "epoch": 0.761285037158406, "grad_norm": 1.78125, "learning_rate": 0.00015827539521303627, "loss": 2.7714, "step": 16262 }, { "epoch": 0.7613318508982386, "grad_norm": 1.2578125, "learning_rate": 0.0001582706514153678, "loss": 2.4784, "step": 16263 }, { "epoch": 0.7613786646380712, "grad_norm": 1.9140625, "learning_rate": 0.0001582659074191454, "loss": 2.7187, "step": 16264 }, { "epoch": 0.7614254783779039, "grad_norm": 1.9375, "learning_rate": 0.00015826116322438532, "loss": 2.6435, "step": 16265 }, { "epoch": 0.7614722921177366, "grad_norm": 1.3984375, "learning_rate": 0.00015825641883110368, "loss": 2.6175, "step": 16266 }, { "epoch": 0.7615191058575692, "grad_norm": 2.09375, "learning_rate": 0.00015825167423931665, "loss": 2.9377, "step": 16267 }, { "epoch": 0.7615659195974018, "grad_norm": 1.7734375, "learning_rate": 0.00015824692944904045, "loss": 2.7194, "step": 16268 }, { "epoch": 0.7616127333372344, "grad_norm": 1.9296875, "learning_rate": 0.00015824218446029118, "loss": 3.0547, "step": 16269 }, { "epoch": 0.7616595470770671, "grad_norm": 2.078125, "learning_rate": 0.00015823743927308507, "loss": 2.8116, "step": 16270 }, { "epoch": 0.7617063608168998, "grad_norm": 2.40625, "learning_rate": 0.0001582326938874382, "loss": 2.7845, "step": 16271 }, { "epoch": 0.7617531745567324, "grad_norm": 1.4375, "learning_rate": 0.0001582279483033668, "loss": 2.4797, "step": 16272 }, { "epoch": 0.761799988296565, "grad_norm": 1.265625, "learning_rate": 0.00015822320252088706, "loss": 2.4655, "step": 16273 }, { "epoch": 0.7618468020363977, "grad_norm": 1.5, "learning_rate": 0.00015821845654001508, "loss": 2.4906, "step": 16274 }, { "epoch": 0.7618936157762303, "grad_norm": 1.5390625, "learning_rate": 0.00015821371036076707, "loss": 2.5955, "step": 16275 }, { "epoch": 0.761940429516063, "grad_norm": 1.375, "learning_rate": 0.00015820896398315926, "loss": 2.5895, "step": 16276 }, { "epoch": 0.7619872432558956, "grad_norm": 1.8125, "learning_rate": 0.00015820421740720777, "loss": 2.3735, "step": 16277 }, { "epoch": 0.7620340569957282, "grad_norm": 1.3515625, "learning_rate": 0.00015819947063292871, "loss": 2.5515, "step": 16278 }, { "epoch": 0.7620808707355609, "grad_norm": 1.28125, "learning_rate": 0.0001581947236603384, "loss": 2.5473, "step": 16279 }, { "epoch": 0.7621276844753935, "grad_norm": 1.859375, "learning_rate": 0.00015818997648945287, "loss": 2.447, "step": 16280 }, { "epoch": 0.7621744982152262, "grad_norm": 1.3203125, "learning_rate": 0.00015818522912028837, "loss": 2.4746, "step": 16281 }, { "epoch": 0.7622213119550588, "grad_norm": 1.578125, "learning_rate": 0.0001581804815528611, "loss": 2.791, "step": 16282 }, { "epoch": 0.7622681256948914, "grad_norm": 1.875, "learning_rate": 0.00015817573378718716, "loss": 3.0572, "step": 16283 }, { "epoch": 0.7623149394347241, "grad_norm": 1.484375, "learning_rate": 0.00015817098582328277, "loss": 2.5773, "step": 16284 }, { "epoch": 0.7623617531745567, "grad_norm": 1.6171875, "learning_rate": 0.0001581662376611641, "loss": 2.8447, "step": 16285 }, { "epoch": 0.7624085669143894, "grad_norm": 1.390625, "learning_rate": 0.0001581614893008474, "loss": 2.5064, "step": 16286 }, { "epoch": 0.762455380654222, "grad_norm": 2.140625, "learning_rate": 0.00015815674074234873, "loss": 2.0699, "step": 16287 }, { "epoch": 0.7625021943940546, "grad_norm": 2.171875, "learning_rate": 0.00015815199198568436, "loss": 2.5995, "step": 16288 }, { "epoch": 0.7625490081338873, "grad_norm": 2.53125, "learning_rate": 0.00015814724303087044, "loss": 2.6105, "step": 16289 }, { "epoch": 0.76259582187372, "grad_norm": 1.453125, "learning_rate": 0.0001581424938779231, "loss": 2.5534, "step": 16290 }, { "epoch": 0.7626426356135526, "grad_norm": 1.2578125, "learning_rate": 0.00015813774452685863, "loss": 2.7129, "step": 16291 }, { "epoch": 0.7626894493533852, "grad_norm": 1.375, "learning_rate": 0.00015813299497769313, "loss": 2.6533, "step": 16292 }, { "epoch": 0.7627362630932178, "grad_norm": 1.359375, "learning_rate": 0.0001581282452304428, "loss": 2.6824, "step": 16293 }, { "epoch": 0.7627830768330505, "grad_norm": 1.3671875, "learning_rate": 0.00015812349528512387, "loss": 4.1395, "step": 16294 }, { "epoch": 0.7628298905728832, "grad_norm": 1.171875, "learning_rate": 0.00015811874514175248, "loss": 2.5408, "step": 16295 }, { "epoch": 0.7628767043127158, "grad_norm": 1.3125, "learning_rate": 0.00015811399480034478, "loss": 2.4626, "step": 16296 }, { "epoch": 0.7629235180525484, "grad_norm": 1.875, "learning_rate": 0.00015810924426091702, "loss": 2.5988, "step": 16297 }, { "epoch": 0.762970331792381, "grad_norm": 1.859375, "learning_rate": 0.0001581044935234854, "loss": 2.5162, "step": 16298 }, { "epoch": 0.7630171455322137, "grad_norm": 1.3515625, "learning_rate": 0.00015809974258806605, "loss": 2.6815, "step": 16299 }, { "epoch": 0.7630639592720464, "grad_norm": 1.703125, "learning_rate": 0.0001580949914546752, "loss": 2.3817, "step": 16300 }, { "epoch": 0.763110773011879, "grad_norm": 1.875, "learning_rate": 0.00015809024012332902, "loss": 2.4234, "step": 16301 }, { "epoch": 0.7631575867517116, "grad_norm": 1.4921875, "learning_rate": 0.0001580854885940437, "loss": 2.7642, "step": 16302 }, { "epoch": 0.7632044004915443, "grad_norm": 1.6328125, "learning_rate": 0.00015808073686683546, "loss": 2.7487, "step": 16303 }, { "epoch": 0.7632512142313769, "grad_norm": 1.4921875, "learning_rate": 0.00015807598494172045, "loss": 2.4252, "step": 16304 }, { "epoch": 0.7632980279712096, "grad_norm": 2.15625, "learning_rate": 0.00015807123281871485, "loss": 2.2447, "step": 16305 }, { "epoch": 0.7633448417110422, "grad_norm": 1.71875, "learning_rate": 0.0001580664804978349, "loss": 2.9145, "step": 16306 }, { "epoch": 0.7633916554508748, "grad_norm": 1.15625, "learning_rate": 0.0001580617279790968, "loss": 2.6108, "step": 16307 }, { "epoch": 0.7634384691907075, "grad_norm": 2.296875, "learning_rate": 0.0001580569752625167, "loss": 2.6825, "step": 16308 }, { "epoch": 0.7634852829305401, "grad_norm": 1.3671875, "learning_rate": 0.0001580522223481108, "loss": 2.7428, "step": 16309 }, { "epoch": 0.7635320966703728, "grad_norm": 1.953125, "learning_rate": 0.00015804746923589534, "loss": 2.8296, "step": 16310 }, { "epoch": 0.7635789104102054, "grad_norm": 1.7421875, "learning_rate": 0.00015804271592588647, "loss": 2.4972, "step": 16311 }, { "epoch": 0.763625724150038, "grad_norm": 1.375, "learning_rate": 0.00015803796241810035, "loss": 2.5581, "step": 16312 }, { "epoch": 0.7636725378898707, "grad_norm": 1.6015625, "learning_rate": 0.00015803320871255329, "loss": 2.8918, "step": 16313 }, { "epoch": 0.7637193516297033, "grad_norm": 1.421875, "learning_rate": 0.00015802845480926138, "loss": 2.5457, "step": 16314 }, { "epoch": 0.763766165369536, "grad_norm": 1.1953125, "learning_rate": 0.00015802370070824092, "loss": 2.6579, "step": 16315 }, { "epoch": 0.7638129791093686, "grad_norm": 1.1640625, "learning_rate": 0.000158018946409508, "loss": 2.346, "step": 16316 }, { "epoch": 0.7638597928492012, "grad_norm": 2.328125, "learning_rate": 0.0001580141919130789, "loss": 2.3623, "step": 16317 }, { "epoch": 0.7639066065890339, "grad_norm": 1.484375, "learning_rate": 0.00015800943721896982, "loss": 2.6731, "step": 16318 }, { "epoch": 0.7639534203288665, "grad_norm": 1.640625, "learning_rate": 0.00015800468232719691, "loss": 2.3451, "step": 16319 }, { "epoch": 0.7640002340686992, "grad_norm": 1.5390625, "learning_rate": 0.0001579999272377764, "loss": 2.4861, "step": 16320 }, { "epoch": 0.7640470478085318, "grad_norm": 1.3203125, "learning_rate": 0.0001579951719507245, "loss": 2.1721, "step": 16321 }, { "epoch": 0.7640938615483645, "grad_norm": 1.4609375, "learning_rate": 0.00015799041646605736, "loss": 2.757, "step": 16322 }, { "epoch": 0.7641406752881971, "grad_norm": 1.40625, "learning_rate": 0.00015798566078379124, "loss": 2.6377, "step": 16323 }, { "epoch": 0.7641874890280297, "grad_norm": 1.4765625, "learning_rate": 0.00015798090490394236, "loss": 2.2534, "step": 16324 }, { "epoch": 0.7642343027678624, "grad_norm": 1.34375, "learning_rate": 0.0001579761488265269, "loss": 2.6416, "step": 16325 }, { "epoch": 0.764281116507695, "grad_norm": 1.3046875, "learning_rate": 0.00015797139255156105, "loss": 2.6101, "step": 16326 }, { "epoch": 0.7643279302475277, "grad_norm": 1.3984375, "learning_rate": 0.00015796663607906103, "loss": 2.3644, "step": 16327 }, { "epoch": 0.7643747439873603, "grad_norm": 1.34375, "learning_rate": 0.00015796187940904306, "loss": 2.6841, "step": 16328 }, { "epoch": 0.7644215577271929, "grad_norm": 1.3515625, "learning_rate": 0.00015795712254152333, "loss": 2.5019, "step": 16329 }, { "epoch": 0.7644683714670256, "grad_norm": 1.6484375, "learning_rate": 0.00015795236547651805, "loss": 2.8399, "step": 16330 }, { "epoch": 0.7645151852068582, "grad_norm": 1.34375, "learning_rate": 0.00015794760821404344, "loss": 2.4654, "step": 16331 }, { "epoch": 0.7645619989466909, "grad_norm": 1.484375, "learning_rate": 0.0001579428507541157, "loss": 2.7003, "step": 16332 }, { "epoch": 0.7646088126865235, "grad_norm": 1.6484375, "learning_rate": 0.00015793809309675106, "loss": 2.9316, "step": 16333 }, { "epoch": 0.7646556264263561, "grad_norm": 1.5546875, "learning_rate": 0.00015793333524196566, "loss": 2.4428, "step": 16334 }, { "epoch": 0.7647024401661888, "grad_norm": 1.1484375, "learning_rate": 0.00015792857718977582, "loss": 2.3327, "step": 16335 }, { "epoch": 0.7647492539060214, "grad_norm": 1.5, "learning_rate": 0.00015792381894019772, "loss": 2.4277, "step": 16336 }, { "epoch": 0.7647960676458541, "grad_norm": 2.125, "learning_rate": 0.00015791906049324753, "loss": 2.8355, "step": 16337 }, { "epoch": 0.7648428813856867, "grad_norm": 1.2890625, "learning_rate": 0.0001579143018489415, "loss": 2.5899, "step": 16338 }, { "epoch": 0.7648896951255193, "grad_norm": 2.265625, "learning_rate": 0.0001579095430072958, "loss": 4.413, "step": 16339 }, { "epoch": 0.764936508865352, "grad_norm": 1.765625, "learning_rate": 0.0001579047839683267, "loss": 2.6169, "step": 16340 }, { "epoch": 0.7649833226051846, "grad_norm": 1.2578125, "learning_rate": 0.0001579000247320504, "loss": 2.3886, "step": 16341 }, { "epoch": 0.7650301363450173, "grad_norm": 1.34375, "learning_rate": 0.00015789526529848312, "loss": 2.8997, "step": 16342 }, { "epoch": 0.7650769500848499, "grad_norm": 1.4609375, "learning_rate": 0.00015789050566764105, "loss": 2.3932, "step": 16343 }, { "epoch": 0.7651237638246825, "grad_norm": 1.1015625, "learning_rate": 0.00015788574583954046, "loss": 2.367, "step": 16344 }, { "epoch": 0.7651705775645152, "grad_norm": 4.0625, "learning_rate": 0.0001578809858141975, "loss": 2.5544, "step": 16345 }, { "epoch": 0.7652173913043478, "grad_norm": 1.7890625, "learning_rate": 0.00015787622559162846, "loss": 2.5532, "step": 16346 }, { "epoch": 0.7652642050441805, "grad_norm": 1.1640625, "learning_rate": 0.0001578714651718495, "loss": 2.3875, "step": 16347 }, { "epoch": 0.7653110187840131, "grad_norm": 1.2578125, "learning_rate": 0.0001578667045548769, "loss": 2.5846, "step": 16348 }, { "epoch": 0.7653578325238457, "grad_norm": 1.5, "learning_rate": 0.00015786194374072681, "loss": 2.2987, "step": 16349 }, { "epoch": 0.7654046462636784, "grad_norm": 1.2421875, "learning_rate": 0.00015785718272941552, "loss": 2.5035, "step": 16350 }, { "epoch": 0.765451460003511, "grad_norm": 2.15625, "learning_rate": 0.0001578524215209592, "loss": 2.9992, "step": 16351 }, { "epoch": 0.7654982737433437, "grad_norm": 1.6328125, "learning_rate": 0.00015784766011537411, "loss": 2.1094, "step": 16352 }, { "epoch": 0.7655450874831763, "grad_norm": 1.2890625, "learning_rate": 0.00015784289851267645, "loss": 2.4835, "step": 16353 }, { "epoch": 0.7655919012230089, "grad_norm": 1.484375, "learning_rate": 0.00015783813671288249, "loss": 2.5862, "step": 16354 }, { "epoch": 0.7656387149628416, "grad_norm": 1.6640625, "learning_rate": 0.00015783337471600838, "loss": 2.6844, "step": 16355 }, { "epoch": 0.7656855287026743, "grad_norm": 2.109375, "learning_rate": 0.0001578286125220704, "loss": 2.8139, "step": 16356 }, { "epoch": 0.7657323424425069, "grad_norm": 1.6328125, "learning_rate": 0.00015782385013108477, "loss": 2.417, "step": 16357 }, { "epoch": 0.7657791561823395, "grad_norm": 1.4453125, "learning_rate": 0.00015781908754306773, "loss": 2.5517, "step": 16358 }, { "epoch": 0.7658259699221721, "grad_norm": 1.09375, "learning_rate": 0.00015781432475803548, "loss": 2.3377, "step": 16359 }, { "epoch": 0.7658727836620048, "grad_norm": 1.578125, "learning_rate": 0.00015780956177600426, "loss": 2.5316, "step": 16360 }, { "epoch": 0.7659195974018375, "grad_norm": 1.4765625, "learning_rate": 0.0001578047985969903, "loss": 2.6246, "step": 16361 }, { "epoch": 0.7659664111416701, "grad_norm": 1.703125, "learning_rate": 0.00015780003522100982, "loss": 2.5206, "step": 16362 }, { "epoch": 0.7660132248815027, "grad_norm": 1.546875, "learning_rate": 0.00015779527164807907, "loss": 2.3579, "step": 16363 }, { "epoch": 0.7660600386213353, "grad_norm": 1.2890625, "learning_rate": 0.00015779050787821428, "loss": 2.4749, "step": 16364 }, { "epoch": 0.766106852361168, "grad_norm": 1.2265625, "learning_rate": 0.0001577857439114317, "loss": 2.4277, "step": 16365 }, { "epoch": 0.7661536661010007, "grad_norm": 1.0, "learning_rate": 0.00015778097974774747, "loss": 3.8304, "step": 16366 }, { "epoch": 0.7662004798408333, "grad_norm": 1.203125, "learning_rate": 0.00015777621538717792, "loss": 2.1216, "step": 16367 }, { "epoch": 0.7662472935806659, "grad_norm": 2.015625, "learning_rate": 0.00015777145082973928, "loss": 2.5907, "step": 16368 }, { "epoch": 0.7662941073204985, "grad_norm": 1.40625, "learning_rate": 0.00015776668607544772, "loss": 2.2506, "step": 16369 }, { "epoch": 0.7663409210603312, "grad_norm": 2.484375, "learning_rate": 0.00015776192112431956, "loss": 2.7969, "step": 16370 }, { "epoch": 0.7663877348001639, "grad_norm": 1.5546875, "learning_rate": 0.00015775715597637097, "loss": 2.4707, "step": 16371 }, { "epoch": 0.7664345485399965, "grad_norm": 1.7265625, "learning_rate": 0.0001577523906316182, "loss": 2.8136, "step": 16372 }, { "epoch": 0.7664813622798291, "grad_norm": 1.9921875, "learning_rate": 0.0001577476250900775, "loss": 2.344, "step": 16373 }, { "epoch": 0.7665281760196617, "grad_norm": 1.25, "learning_rate": 0.00015774285935176514, "loss": 2.8486, "step": 16374 }, { "epoch": 0.7665749897594945, "grad_norm": 1.328125, "learning_rate": 0.0001577380934166973, "loss": 2.6787, "step": 16375 }, { "epoch": 0.7666218034993271, "grad_norm": 1.7109375, "learning_rate": 0.00015773332728489026, "loss": 2.2003, "step": 16376 }, { "epoch": 0.7666686172391597, "grad_norm": 1.734375, "learning_rate": 0.0001577285609563602, "loss": 2.4361, "step": 16377 }, { "epoch": 0.7667154309789923, "grad_norm": 1.8203125, "learning_rate": 0.00015772379443112345, "loss": 3.0238, "step": 16378 }, { "epoch": 0.7667622447188249, "grad_norm": 1.9296875, "learning_rate": 0.00015771902770919617, "loss": 1.9444, "step": 16379 }, { "epoch": 0.7668090584586577, "grad_norm": 1.296875, "learning_rate": 0.00015771426079059465, "loss": 2.2207, "step": 16380 }, { "epoch": 0.7668558721984903, "grad_norm": 1.5234375, "learning_rate": 0.0001577094936753351, "loss": 2.712, "step": 16381 }, { "epoch": 0.7669026859383229, "grad_norm": 1.53125, "learning_rate": 0.00015770472636343386, "loss": 2.9588, "step": 16382 }, { "epoch": 0.7669494996781555, "grad_norm": 1.828125, "learning_rate": 0.00015769995885490704, "loss": 2.6785, "step": 16383 }, { "epoch": 0.7669963134179881, "grad_norm": 1.328125, "learning_rate": 0.000157695191149771, "loss": 2.2853, "step": 16384 }, { "epoch": 0.7670431271578209, "grad_norm": 1.4765625, "learning_rate": 0.00015769042324804188, "loss": 2.5478, "step": 16385 }, { "epoch": 0.7670899408976535, "grad_norm": 2.03125, "learning_rate": 0.000157685655149736, "loss": 2.8182, "step": 16386 }, { "epoch": 0.7671367546374861, "grad_norm": 1.859375, "learning_rate": 0.00015768088685486957, "loss": 2.4259, "step": 16387 }, { "epoch": 0.7671835683773187, "grad_norm": 1.234375, "learning_rate": 0.00015767611836345886, "loss": 2.8367, "step": 16388 }, { "epoch": 0.7672303821171513, "grad_norm": 1.4140625, "learning_rate": 0.00015767134967552008, "loss": 2.9139, "step": 16389 }, { "epoch": 0.7672771958569841, "grad_norm": 1.5078125, "learning_rate": 0.00015766658079106955, "loss": 2.6197, "step": 16390 }, { "epoch": 0.7673240095968167, "grad_norm": 1.15625, "learning_rate": 0.00015766181171012345, "loss": 2.3517, "step": 16391 }, { "epoch": 0.7673708233366493, "grad_norm": 1.3671875, "learning_rate": 0.0001576570424326981, "loss": 2.6854, "step": 16392 }, { "epoch": 0.7674176370764819, "grad_norm": 1.125, "learning_rate": 0.00015765227295880968, "loss": 2.7563, "step": 16393 }, { "epoch": 0.7674644508163145, "grad_norm": 2.140625, "learning_rate": 0.0001576475032884745, "loss": 2.6157, "step": 16394 }, { "epoch": 0.7675112645561473, "grad_norm": 1.296875, "learning_rate": 0.00015764273342170877, "loss": 2.4218, "step": 16395 }, { "epoch": 0.7675580782959799, "grad_norm": 1.15625, "learning_rate": 0.00015763796335852874, "loss": 3.7479, "step": 16396 }, { "epoch": 0.7676048920358125, "grad_norm": 1.1875, "learning_rate": 0.0001576331930989507, "loss": 2.4388, "step": 16397 }, { "epoch": 0.7676517057756451, "grad_norm": 1.4296875, "learning_rate": 0.00015762842264299088, "loss": 2.5171, "step": 16398 }, { "epoch": 0.7676985195154777, "grad_norm": 1.234375, "learning_rate": 0.00015762365199066552, "loss": 2.8575, "step": 16399 }, { "epoch": 0.7677453332553105, "grad_norm": 1.46875, "learning_rate": 0.00015761888114199094, "loss": 2.4261, "step": 16400 }, { "epoch": 0.7677921469951431, "grad_norm": 1.28125, "learning_rate": 0.00015761411009698333, "loss": 2.8307, "step": 16401 }, { "epoch": 0.7678389607349757, "grad_norm": 1.1953125, "learning_rate": 0.00015760933885565893, "loss": 2.2661, "step": 16402 }, { "epoch": 0.7678857744748083, "grad_norm": 1.4296875, "learning_rate": 0.0001576045674180341, "loss": 2.0962, "step": 16403 }, { "epoch": 0.7679325882146409, "grad_norm": 1.34375, "learning_rate": 0.00015759979578412503, "loss": 2.4597, "step": 16404 }, { "epoch": 0.7679794019544737, "grad_norm": 1.2421875, "learning_rate": 0.00015759502395394797, "loss": 2.5071, "step": 16405 }, { "epoch": 0.7680262156943063, "grad_norm": 1.4140625, "learning_rate": 0.0001575902519275192, "loss": 2.6434, "step": 16406 }, { "epoch": 0.7680730294341389, "grad_norm": 1.40625, "learning_rate": 0.00015758547970485496, "loss": 2.5058, "step": 16407 }, { "epoch": 0.7681198431739715, "grad_norm": 1.09375, "learning_rate": 0.00015758070728597154, "loss": 2.7168, "step": 16408 }, { "epoch": 0.7681666569138041, "grad_norm": 1.3359375, "learning_rate": 0.00015757593467088517, "loss": 2.5286, "step": 16409 }, { "epoch": 0.7682134706536369, "grad_norm": 1.296875, "learning_rate": 0.00015757116185961215, "loss": 2.3969, "step": 16410 }, { "epoch": 0.7682602843934695, "grad_norm": 1.296875, "learning_rate": 0.00015756638885216871, "loss": 2.3964, "step": 16411 }, { "epoch": 0.7683070981333021, "grad_norm": 1.1953125, "learning_rate": 0.00015756161564857115, "loss": 2.5227, "step": 16412 }, { "epoch": 0.7683539118731347, "grad_norm": 1.171875, "learning_rate": 0.0001575568422488357, "loss": 2.5907, "step": 16413 }, { "epoch": 0.7684007256129675, "grad_norm": 1.1640625, "learning_rate": 0.00015755206865297866, "loss": 2.6955, "step": 16414 }, { "epoch": 0.7684475393528001, "grad_norm": 1.2421875, "learning_rate": 0.00015754729486101623, "loss": 2.692, "step": 16415 }, { "epoch": 0.7684943530926327, "grad_norm": 2.3125, "learning_rate": 0.00015754252087296473, "loss": 3.0106, "step": 16416 }, { "epoch": 0.7685411668324653, "grad_norm": 1.78125, "learning_rate": 0.00015753774668884042, "loss": 2.5569, "step": 16417 }, { "epoch": 0.7685879805722979, "grad_norm": 1.5078125, "learning_rate": 0.00015753297230865958, "loss": 2.3738, "step": 16418 }, { "epoch": 0.7686347943121307, "grad_norm": 1.3515625, "learning_rate": 0.00015752819773243846, "loss": 2.5619, "step": 16419 }, { "epoch": 0.7686816080519633, "grad_norm": 1.4609375, "learning_rate": 0.0001575234229601933, "loss": 2.3573, "step": 16420 }, { "epoch": 0.7687284217917959, "grad_norm": 1.9921875, "learning_rate": 0.00015751864799194043, "loss": 2.9325, "step": 16421 }, { "epoch": 0.7687752355316285, "grad_norm": 1.5078125, "learning_rate": 0.0001575138728276961, "loss": 2.4469, "step": 16422 }, { "epoch": 0.7688220492714611, "grad_norm": 1.375, "learning_rate": 0.00015750909746747654, "loss": 2.8597, "step": 16423 }, { "epoch": 0.7688688630112939, "grad_norm": 1.265625, "learning_rate": 0.00015750432191129806, "loss": 2.5608, "step": 16424 }, { "epoch": 0.7689156767511265, "grad_norm": 1.609375, "learning_rate": 0.00015749954615917696, "loss": 2.8853, "step": 16425 }, { "epoch": 0.7689624904909591, "grad_norm": 1.4375, "learning_rate": 0.00015749477021112943, "loss": 2.5143, "step": 16426 }, { "epoch": 0.7690093042307917, "grad_norm": 2.484375, "learning_rate": 0.00015748999406717184, "loss": 2.4816, "step": 16427 }, { "epoch": 0.7690561179706243, "grad_norm": 1.1796875, "learning_rate": 0.0001574852177273204, "loss": 2.7286, "step": 16428 }, { "epoch": 0.7691029317104571, "grad_norm": 1.203125, "learning_rate": 0.00015748044119159142, "loss": 2.4472, "step": 16429 }, { "epoch": 0.7691497454502897, "grad_norm": 1.6171875, "learning_rate": 0.00015747566446000115, "loss": 2.5053, "step": 16430 }, { "epoch": 0.7691965591901223, "grad_norm": 1.4921875, "learning_rate": 0.00015747088753256585, "loss": 2.48, "step": 16431 }, { "epoch": 0.7692433729299549, "grad_norm": 1.296875, "learning_rate": 0.00015746611040930186, "loss": 2.7477, "step": 16432 }, { "epoch": 0.7692901866697875, "grad_norm": 1.1875, "learning_rate": 0.0001574613330902254, "loss": 2.4983, "step": 16433 }, { "epoch": 0.7693370004096203, "grad_norm": 1.9296875, "learning_rate": 0.00015745655557535278, "loss": 2.6984, "step": 16434 }, { "epoch": 0.7693838141494529, "grad_norm": 1.4609375, "learning_rate": 0.00015745177786470024, "loss": 2.7148, "step": 16435 }, { "epoch": 0.7694306278892855, "grad_norm": 1.3125, "learning_rate": 0.00015744699995828411, "loss": 2.503, "step": 16436 }, { "epoch": 0.7694774416291181, "grad_norm": 1.1015625, "learning_rate": 0.00015744222185612065, "loss": 2.3344, "step": 16437 }, { "epoch": 0.7695242553689507, "grad_norm": 1.3359375, "learning_rate": 0.0001574374435582261, "loss": 2.6043, "step": 16438 }, { "epoch": 0.7695710691087835, "grad_norm": 1.5234375, "learning_rate": 0.00015743266506461684, "loss": 2.9524, "step": 16439 }, { "epoch": 0.7696178828486161, "grad_norm": 1.734375, "learning_rate": 0.00015742788637530908, "loss": 2.5727, "step": 16440 }, { "epoch": 0.7696646965884487, "grad_norm": 1.9140625, "learning_rate": 0.0001574231074903191, "loss": 2.5515, "step": 16441 }, { "epoch": 0.7697115103282813, "grad_norm": 1.5234375, "learning_rate": 0.00015741832840966322, "loss": 2.2426, "step": 16442 }, { "epoch": 0.769758324068114, "grad_norm": 1.4296875, "learning_rate": 0.00015741354913335767, "loss": 2.7452, "step": 16443 }, { "epoch": 0.7698051378079467, "grad_norm": 1.21875, "learning_rate": 0.00015740876966141882, "loss": 2.5581, "step": 16444 }, { "epoch": 0.7698519515477793, "grad_norm": 1.4921875, "learning_rate": 0.00015740398999386285, "loss": 2.8893, "step": 16445 }, { "epoch": 0.7698987652876119, "grad_norm": 4.40625, "learning_rate": 0.00015739921013070615, "loss": 3.4747, "step": 16446 }, { "epoch": 0.7699455790274445, "grad_norm": 1.96875, "learning_rate": 0.00015739443007196493, "loss": 2.7608, "step": 16447 }, { "epoch": 0.7699923927672772, "grad_norm": 1.3984375, "learning_rate": 0.00015738964981765552, "loss": 2.5373, "step": 16448 }, { "epoch": 0.7700392065071099, "grad_norm": 1.2890625, "learning_rate": 0.0001573848693677942, "loss": 2.9555, "step": 16449 }, { "epoch": 0.7700860202469425, "grad_norm": 2.171875, "learning_rate": 0.00015738008872239723, "loss": 2.3896, "step": 16450 }, { "epoch": 0.7701328339867751, "grad_norm": 1.390625, "learning_rate": 0.00015737530788148096, "loss": 2.3822, "step": 16451 }, { "epoch": 0.7701796477266077, "grad_norm": 1.5234375, "learning_rate": 0.00015737052684506163, "loss": 2.6732, "step": 16452 }, { "epoch": 0.7702264614664404, "grad_norm": 2.40625, "learning_rate": 0.00015736574561315552, "loss": 2.6737, "step": 16453 }, { "epoch": 0.7702732752062731, "grad_norm": 1.2734375, "learning_rate": 0.00015736096418577899, "loss": 2.4561, "step": 16454 }, { "epoch": 0.7703200889461057, "grad_norm": 2.0625, "learning_rate": 0.00015735618256294825, "loss": 2.6303, "step": 16455 }, { "epoch": 0.7703669026859383, "grad_norm": 1.4140625, "learning_rate": 0.00015735140074467967, "loss": 2.2101, "step": 16456 }, { "epoch": 0.770413716425771, "grad_norm": 1.7734375, "learning_rate": 0.00015734661873098948, "loss": 2.4318, "step": 16457 }, { "epoch": 0.7704605301656036, "grad_norm": 1.375, "learning_rate": 0.00015734183652189401, "loss": 2.2568, "step": 16458 }, { "epoch": 0.7705073439054363, "grad_norm": 1.3515625, "learning_rate": 0.00015733705411740954, "loss": 2.7638, "step": 16459 }, { "epoch": 0.7705541576452689, "grad_norm": 1.6640625, "learning_rate": 0.00015733227151755238, "loss": 2.2393, "step": 16460 }, { "epoch": 0.7706009713851015, "grad_norm": 1.3984375, "learning_rate": 0.00015732748872233882, "loss": 2.6243, "step": 16461 }, { "epoch": 0.7706477851249341, "grad_norm": 1.5234375, "learning_rate": 0.00015732270573178515, "loss": 2.7277, "step": 16462 }, { "epoch": 0.7706945988647668, "grad_norm": 1.359375, "learning_rate": 0.0001573179225459077, "loss": 2.2338, "step": 16463 }, { "epoch": 0.7707414126045995, "grad_norm": 1.3203125, "learning_rate": 0.00015731313916472273, "loss": 2.3413, "step": 16464 }, { "epoch": 0.7707882263444321, "grad_norm": 1.96875, "learning_rate": 0.0001573083555882465, "loss": 2.7092, "step": 16465 }, { "epoch": 0.7708350400842647, "grad_norm": 1.40625, "learning_rate": 0.00015730357181649543, "loss": 2.3369, "step": 16466 }, { "epoch": 0.7708818538240974, "grad_norm": 1.1953125, "learning_rate": 0.0001572987878494857, "loss": 2.6418, "step": 16467 }, { "epoch": 0.77092866756393, "grad_norm": 1.46875, "learning_rate": 0.0001572940036872337, "loss": 2.7607, "step": 16468 }, { "epoch": 0.7709754813037627, "grad_norm": 1.5859375, "learning_rate": 0.00015728921932975566, "loss": 2.3705, "step": 16469 }, { "epoch": 0.7710222950435953, "grad_norm": 1.671875, "learning_rate": 0.00015728443477706797, "loss": 2.2453, "step": 16470 }, { "epoch": 0.7710691087834279, "grad_norm": 2.015625, "learning_rate": 0.00015727965002918686, "loss": 2.4636, "step": 16471 }, { "epoch": 0.7711159225232606, "grad_norm": 1.5390625, "learning_rate": 0.00015727486508612863, "loss": 2.5431, "step": 16472 }, { "epoch": 0.7711627362630932, "grad_norm": 1.3046875, "learning_rate": 0.0001572700799479096, "loss": 3.7448, "step": 16473 }, { "epoch": 0.7712095500029259, "grad_norm": 1.25, "learning_rate": 0.0001572652946145461, "loss": 2.6876, "step": 16474 }, { "epoch": 0.7712563637427585, "grad_norm": 1.3984375, "learning_rate": 0.00015726050908605442, "loss": 2.5819, "step": 16475 }, { "epoch": 0.7713031774825911, "grad_norm": 1.390625, "learning_rate": 0.00015725572336245086, "loss": 2.5789, "step": 16476 }, { "epoch": 0.7713499912224238, "grad_norm": 1.4140625, "learning_rate": 0.00015725093744375175, "loss": 2.7312, "step": 16477 }, { "epoch": 0.7713968049622564, "grad_norm": 1.3046875, "learning_rate": 0.00015724615132997337, "loss": 2.8114, "step": 16478 }, { "epoch": 0.7714436187020891, "grad_norm": 1.46875, "learning_rate": 0.00015724136502113202, "loss": 2.5418, "step": 16479 }, { "epoch": 0.7714904324419217, "grad_norm": 1.4765625, "learning_rate": 0.00015723657851724406, "loss": 2.3592, "step": 16480 }, { "epoch": 0.7715372461817543, "grad_norm": 2.6875, "learning_rate": 0.00015723179181832575, "loss": 2.0679, "step": 16481 }, { "epoch": 0.771584059921587, "grad_norm": 1.1796875, "learning_rate": 0.00015722700492439343, "loss": 2.2437, "step": 16482 }, { "epoch": 0.7716308736614196, "grad_norm": 1.4765625, "learning_rate": 0.0001572222178354634, "loss": 2.2929, "step": 16483 }, { "epoch": 0.7716776874012523, "grad_norm": 2.0625, "learning_rate": 0.00015721743055155193, "loss": 2.592, "step": 16484 }, { "epoch": 0.7717245011410849, "grad_norm": 1.4453125, "learning_rate": 0.00015721264307267538, "loss": 2.3698, "step": 16485 }, { "epoch": 0.7717713148809175, "grad_norm": 1.4921875, "learning_rate": 0.0001572078553988501, "loss": 2.6202, "step": 16486 }, { "epoch": 0.7718181286207502, "grad_norm": 1.3515625, "learning_rate": 0.0001572030675300923, "loss": 2.779, "step": 16487 }, { "epoch": 0.7718649423605828, "grad_norm": 1.3515625, "learning_rate": 0.0001571982794664184, "loss": 2.3846, "step": 16488 }, { "epoch": 0.7719117561004155, "grad_norm": 1.484375, "learning_rate": 0.0001571934912078447, "loss": 2.6915, "step": 16489 }, { "epoch": 0.7719585698402481, "grad_norm": 1.6640625, "learning_rate": 0.00015718870275438742, "loss": 2.3783, "step": 16490 }, { "epoch": 0.7720053835800808, "grad_norm": 1.4140625, "learning_rate": 0.00015718391410606298, "loss": 2.6067, "step": 16491 }, { "epoch": 0.7720521973199134, "grad_norm": 1.3046875, "learning_rate": 0.00015717912526288762, "loss": 2.257, "step": 16492 }, { "epoch": 0.772099011059746, "grad_norm": 1.390625, "learning_rate": 0.00015717433622487772, "loss": 4.2734, "step": 16493 }, { "epoch": 0.7721458247995787, "grad_norm": 1.2890625, "learning_rate": 0.00015716954699204955, "loss": 2.6166, "step": 16494 }, { "epoch": 0.7721926385394113, "grad_norm": 2.1875, "learning_rate": 0.00015716475756441948, "loss": 2.446, "step": 16495 }, { "epoch": 0.772239452279244, "grad_norm": 1.3671875, "learning_rate": 0.00015715996794200382, "loss": 2.7388, "step": 16496 }, { "epoch": 0.7722862660190766, "grad_norm": 1.3671875, "learning_rate": 0.00015715517812481885, "loss": 2.3614, "step": 16497 }, { "epoch": 0.7723330797589092, "grad_norm": 1.4765625, "learning_rate": 0.0001571503881128809, "loss": 2.4319, "step": 16498 }, { "epoch": 0.7723798934987419, "grad_norm": 1.234375, "learning_rate": 0.00015714559790620634, "loss": 2.8827, "step": 16499 }, { "epoch": 0.7724267072385745, "grad_norm": 1.5546875, "learning_rate": 0.00015714080750481142, "loss": 2.4965, "step": 16500 }, { "epoch": 0.7724735209784072, "grad_norm": 1.375, "learning_rate": 0.00015713601690871252, "loss": 2.6999, "step": 16501 }, { "epoch": 0.7725203347182398, "grad_norm": 2.375, "learning_rate": 0.00015713122611792595, "loss": 2.8295, "step": 16502 }, { "epoch": 0.7725671484580724, "grad_norm": 0.97265625, "learning_rate": 0.00015712643513246803, "loss": 2.5539, "step": 16503 }, { "epoch": 0.7726139621979051, "grad_norm": 1.625, "learning_rate": 0.00015712164395235506, "loss": 2.312, "step": 16504 }, { "epoch": 0.7726607759377377, "grad_norm": 1.3984375, "learning_rate": 0.0001571168525776034, "loss": 2.4388, "step": 16505 }, { "epoch": 0.7727075896775704, "grad_norm": 1.3359375, "learning_rate": 0.00015711206100822936, "loss": 2.8685, "step": 16506 }, { "epoch": 0.772754403417403, "grad_norm": 1.359375, "learning_rate": 0.00015710726924424932, "loss": 2.4947, "step": 16507 }, { "epoch": 0.7728012171572356, "grad_norm": 1.4921875, "learning_rate": 0.00015710247728567954, "loss": 2.6448, "step": 16508 }, { "epoch": 0.7728480308970683, "grad_norm": 1.84375, "learning_rate": 0.00015709768513253635, "loss": 2.6962, "step": 16509 }, { "epoch": 0.772894844636901, "grad_norm": 1.828125, "learning_rate": 0.0001570928927848361, "loss": 2.3956, "step": 16510 }, { "epoch": 0.7729416583767336, "grad_norm": 1.3671875, "learning_rate": 0.00015708810024259512, "loss": 2.2958, "step": 16511 }, { "epoch": 0.7729884721165662, "grad_norm": 1.3515625, "learning_rate": 0.00015708330750582975, "loss": 4.4342, "step": 16512 }, { "epoch": 0.7730352858563988, "grad_norm": 1.4296875, "learning_rate": 0.0001570785145745563, "loss": 2.5041, "step": 16513 }, { "epoch": 0.7730820995962315, "grad_norm": 1.6875, "learning_rate": 0.00015707372144879112, "loss": 2.406, "step": 16514 }, { "epoch": 0.7731289133360641, "grad_norm": 1.2734375, "learning_rate": 0.00015706892812855053, "loss": 2.8408, "step": 16515 }, { "epoch": 0.7731757270758968, "grad_norm": 1.6171875, "learning_rate": 0.00015706413461385085, "loss": 2.4555, "step": 16516 }, { "epoch": 0.7732225408157294, "grad_norm": 1.3671875, "learning_rate": 0.00015705934090470844, "loss": 2.8161, "step": 16517 }, { "epoch": 0.773269354555562, "grad_norm": 1.875, "learning_rate": 0.00015705454700113965, "loss": 2.9043, "step": 16518 }, { "epoch": 0.7733161682953947, "grad_norm": 1.6875, "learning_rate": 0.00015704975290316075, "loss": 2.8049, "step": 16519 }, { "epoch": 0.7733629820352274, "grad_norm": 1.7265625, "learning_rate": 0.00015704495861078814, "loss": 2.9529, "step": 16520 }, { "epoch": 0.77340979577506, "grad_norm": 1.9921875, "learning_rate": 0.00015704016412403812, "loss": 2.7543, "step": 16521 }, { "epoch": 0.7734566095148926, "grad_norm": 1.234375, "learning_rate": 0.00015703536944292705, "loss": 2.935, "step": 16522 }, { "epoch": 0.7735034232547252, "grad_norm": 1.2265625, "learning_rate": 0.00015703057456747122, "loss": 2.4013, "step": 16523 }, { "epoch": 0.7735502369945579, "grad_norm": 2.09375, "learning_rate": 0.00015702577949768706, "loss": 2.3173, "step": 16524 }, { "epoch": 0.7735970507343906, "grad_norm": 1.3515625, "learning_rate": 0.0001570209842335908, "loss": 2.294, "step": 16525 }, { "epoch": 0.7736438644742232, "grad_norm": 1.25, "learning_rate": 0.00015701618877519884, "loss": 2.5875, "step": 16526 }, { "epoch": 0.7736906782140558, "grad_norm": 1.40625, "learning_rate": 0.00015701139312252754, "loss": 2.555, "step": 16527 }, { "epoch": 0.7737374919538884, "grad_norm": 1.40625, "learning_rate": 0.0001570065972755932, "loss": 2.3742, "step": 16528 }, { "epoch": 0.7737843056937211, "grad_norm": 1.3359375, "learning_rate": 0.00015700180123441216, "loss": 2.3118, "step": 16529 }, { "epoch": 0.7738311194335538, "grad_norm": 1.5, "learning_rate": 0.00015699700499900078, "loss": 2.8055, "step": 16530 }, { "epoch": 0.7738779331733864, "grad_norm": 1.5703125, "learning_rate": 0.00015699220856937545, "loss": 2.5425, "step": 16531 }, { "epoch": 0.773924746913219, "grad_norm": 1.453125, "learning_rate": 0.0001569874119455524, "loss": 2.9455, "step": 16532 }, { "epoch": 0.7739715606530516, "grad_norm": 2.09375, "learning_rate": 0.00015698261512754806, "loss": 2.237, "step": 16533 }, { "epoch": 0.7740183743928843, "grad_norm": 2.15625, "learning_rate": 0.00015697781811537876, "loss": 2.3421, "step": 16534 }, { "epoch": 0.774065188132717, "grad_norm": 1.453125, "learning_rate": 0.00015697302090906084, "loss": 2.8339, "step": 16535 }, { "epoch": 0.7741120018725496, "grad_norm": 1.6484375, "learning_rate": 0.00015696822350861063, "loss": 2.6084, "step": 16536 }, { "epoch": 0.7741588156123822, "grad_norm": 1.25, "learning_rate": 0.0001569634259140445, "loss": 2.7766, "step": 16537 }, { "epoch": 0.7742056293522149, "grad_norm": 1.2734375, "learning_rate": 0.00015695862812537881, "loss": 2.763, "step": 16538 }, { "epoch": 0.7742524430920475, "grad_norm": 1.453125, "learning_rate": 0.00015695383014262984, "loss": 2.4766, "step": 16539 }, { "epoch": 0.7742992568318802, "grad_norm": 1.46875, "learning_rate": 0.000156949031965814, "loss": 2.4803, "step": 16540 }, { "epoch": 0.7743460705717128, "grad_norm": 1.2890625, "learning_rate": 0.00015694423359494765, "loss": 2.5311, "step": 16541 }, { "epoch": 0.7743928843115454, "grad_norm": 1.328125, "learning_rate": 0.0001569394350300471, "loss": 2.6821, "step": 16542 }, { "epoch": 0.7744396980513781, "grad_norm": 1.25, "learning_rate": 0.00015693463627112873, "loss": 2.5819, "step": 16543 }, { "epoch": 0.7744865117912108, "grad_norm": 1.6171875, "learning_rate": 0.00015692983731820884, "loss": 2.6267, "step": 16544 }, { "epoch": 0.7745333255310434, "grad_norm": 1.34375, "learning_rate": 0.00015692503817130385, "loss": 2.5363, "step": 16545 }, { "epoch": 0.774580139270876, "grad_norm": 1.609375, "learning_rate": 0.0001569202388304301, "loss": 2.559, "step": 16546 }, { "epoch": 0.7746269530107086, "grad_norm": 1.3984375, "learning_rate": 0.0001569154392956039, "loss": 2.5894, "step": 16547 }, { "epoch": 0.7746737667505413, "grad_norm": 1.6015625, "learning_rate": 0.00015691063956684164, "loss": 2.5988, "step": 16548 }, { "epoch": 0.774720580490374, "grad_norm": 1.5, "learning_rate": 0.00015690583964415966, "loss": 2.1629, "step": 16549 }, { "epoch": 0.7747673942302066, "grad_norm": 2.25, "learning_rate": 0.0001569010395275743, "loss": 2.4663, "step": 16550 }, { "epoch": 0.7748142079700392, "grad_norm": 1.3515625, "learning_rate": 0.00015689623921710198, "loss": 2.1031, "step": 16551 }, { "epoch": 0.7748610217098718, "grad_norm": 1.265625, "learning_rate": 0.00015689143871275898, "loss": 2.6275, "step": 16552 }, { "epoch": 0.7749078354497045, "grad_norm": 1.28125, "learning_rate": 0.0001568866380145617, "loss": 2.4101, "step": 16553 }, { "epoch": 0.7749546491895372, "grad_norm": 1.2734375, "learning_rate": 0.00015688183712252648, "loss": 2.3734, "step": 16554 }, { "epoch": 0.7750014629293698, "grad_norm": 1.1640625, "learning_rate": 0.00015687703603666972, "loss": 2.7252, "step": 16555 }, { "epoch": 0.7750482766692024, "grad_norm": 1.09375, "learning_rate": 0.00015687223475700774, "loss": 2.6538, "step": 16556 }, { "epoch": 0.775095090409035, "grad_norm": 1.3125, "learning_rate": 0.0001568674332835569, "loss": 2.5155, "step": 16557 }, { "epoch": 0.7751419041488677, "grad_norm": 1.296875, "learning_rate": 0.00015686263161633355, "loss": 2.1087, "step": 16558 }, { "epoch": 0.7751887178887004, "grad_norm": 1.390625, "learning_rate": 0.00015685782975535405, "loss": 2.5333, "step": 16559 }, { "epoch": 0.775235531628533, "grad_norm": 1.7578125, "learning_rate": 0.00015685302770063485, "loss": 2.6889, "step": 16560 }, { "epoch": 0.7752823453683656, "grad_norm": 1.6640625, "learning_rate": 0.0001568482254521922, "loss": 2.1893, "step": 16561 }, { "epoch": 0.7753291591081982, "grad_norm": 1.3671875, "learning_rate": 0.0001568434230100425, "loss": 2.5656, "step": 16562 }, { "epoch": 0.775375972848031, "grad_norm": 1.2578125, "learning_rate": 0.00015683862037420214, "loss": 2.669, "step": 16563 }, { "epoch": 0.7754227865878636, "grad_norm": 2.046875, "learning_rate": 0.00015683381754468746, "loss": 2.8708, "step": 16564 }, { "epoch": 0.7754696003276962, "grad_norm": 1.40625, "learning_rate": 0.00015682901452151486, "loss": 2.4815, "step": 16565 }, { "epoch": 0.7755164140675288, "grad_norm": 1.6875, "learning_rate": 0.00015682421130470067, "loss": 2.3379, "step": 16566 }, { "epoch": 0.7755632278073614, "grad_norm": 1.9609375, "learning_rate": 0.00015681940789426122, "loss": 2.1524, "step": 16567 }, { "epoch": 0.7756100415471942, "grad_norm": 1.34375, "learning_rate": 0.00015681460429021294, "loss": 2.8401, "step": 16568 }, { "epoch": 0.7756568552870268, "grad_norm": 1.1328125, "learning_rate": 0.0001568098004925722, "loss": 2.7137, "step": 16569 }, { "epoch": 0.7757036690268594, "grad_norm": 1.359375, "learning_rate": 0.00015680499650135533, "loss": 2.6205, "step": 16570 }, { "epoch": 0.775750482766692, "grad_norm": 1.734375, "learning_rate": 0.00015680019231657875, "loss": 2.4021, "step": 16571 }, { "epoch": 0.7757972965065246, "grad_norm": 1.5546875, "learning_rate": 0.0001567953879382588, "loss": 2.4806, "step": 16572 }, { "epoch": 0.7758441102463574, "grad_norm": 1.203125, "learning_rate": 0.00015679058336641179, "loss": 2.4043, "step": 16573 }, { "epoch": 0.77589092398619, "grad_norm": 1.2890625, "learning_rate": 0.00015678577860105418, "loss": 2.5652, "step": 16574 }, { "epoch": 0.7759377377260226, "grad_norm": 1.109375, "learning_rate": 0.00015678097364220234, "loss": 2.0464, "step": 16575 }, { "epoch": 0.7759845514658552, "grad_norm": 1.328125, "learning_rate": 0.00015677616848987257, "loss": 2.856, "step": 16576 }, { "epoch": 0.7760313652056878, "grad_norm": 1.5390625, "learning_rate": 0.00015677136314408131, "loss": 2.3467, "step": 16577 }, { "epoch": 0.7760781789455206, "grad_norm": 1.453125, "learning_rate": 0.00015676655760484492, "loss": 2.5388, "step": 16578 }, { "epoch": 0.7761249926853532, "grad_norm": 1.3203125, "learning_rate": 0.00015676175187217975, "loss": 2.5916, "step": 16579 }, { "epoch": 0.7761718064251858, "grad_norm": 1.15625, "learning_rate": 0.00015675694594610218, "loss": 2.3883, "step": 16580 }, { "epoch": 0.7762186201650184, "grad_norm": 1.5390625, "learning_rate": 0.00015675213982662865, "loss": 2.4743, "step": 16581 }, { "epoch": 0.776265433904851, "grad_norm": 1.9140625, "learning_rate": 0.00015674733351377543, "loss": 2.7765, "step": 16582 }, { "epoch": 0.7763122476446838, "grad_norm": 2.6875, "learning_rate": 0.00015674252700755896, "loss": 1.9701, "step": 16583 }, { "epoch": 0.7763590613845164, "grad_norm": 1.3046875, "learning_rate": 0.00015673772030799562, "loss": 2.5823, "step": 16584 }, { "epoch": 0.776405875124349, "grad_norm": 1.65625, "learning_rate": 0.00015673291341510178, "loss": 2.5048, "step": 16585 }, { "epoch": 0.7764526888641816, "grad_norm": 1.71875, "learning_rate": 0.00015672810632889382, "loss": 2.4639, "step": 16586 }, { "epoch": 0.7764995026040142, "grad_norm": 1.59375, "learning_rate": 0.0001567232990493881, "loss": 2.6078, "step": 16587 }, { "epoch": 0.776546316343847, "grad_norm": 1.328125, "learning_rate": 0.00015671849157660104, "loss": 2.4061, "step": 16588 }, { "epoch": 0.7765931300836796, "grad_norm": 1.3828125, "learning_rate": 0.00015671368391054896, "loss": 2.5863, "step": 16589 }, { "epoch": 0.7766399438235122, "grad_norm": 1.046875, "learning_rate": 0.00015670887605124834, "loss": 2.6519, "step": 16590 }, { "epoch": 0.7766867575633448, "grad_norm": 1.53125, "learning_rate": 0.00015670406799871546, "loss": 2.3853, "step": 16591 }, { "epoch": 0.7767335713031774, "grad_norm": 1.4296875, "learning_rate": 0.00015669925975296675, "loss": 2.6852, "step": 16592 }, { "epoch": 0.7767803850430102, "grad_norm": 1.296875, "learning_rate": 0.00015669445131401857, "loss": 2.4749, "step": 16593 }, { "epoch": 0.7768271987828428, "grad_norm": 1.6640625, "learning_rate": 0.0001566896426818874, "loss": 2.5544, "step": 16594 }, { "epoch": 0.7768740125226754, "grad_norm": 1.2109375, "learning_rate": 0.00015668483385658947, "loss": 2.3473, "step": 16595 }, { "epoch": 0.776920826262508, "grad_norm": 1.3203125, "learning_rate": 0.00015668002483814128, "loss": 2.6235, "step": 16596 }, { "epoch": 0.7769676400023406, "grad_norm": 1.125, "learning_rate": 0.00015667521562655917, "loss": 3.3849, "step": 16597 }, { "epoch": 0.7770144537421734, "grad_norm": 1.375, "learning_rate": 0.00015667040622185954, "loss": 2.5007, "step": 16598 }, { "epoch": 0.777061267482006, "grad_norm": 2.140625, "learning_rate": 0.00015666559662405878, "loss": 2.8418, "step": 16599 }, { "epoch": 0.7771080812218386, "grad_norm": 1.6484375, "learning_rate": 0.00015666078683317328, "loss": 2.3005, "step": 16600 }, { "epoch": 0.7771548949616712, "grad_norm": 1.4609375, "learning_rate": 0.00015665597684921942, "loss": 2.7726, "step": 16601 }, { "epoch": 0.7772017087015038, "grad_norm": 2.1875, "learning_rate": 0.0001566511666722136, "loss": 2.42, "step": 16602 }, { "epoch": 0.7772485224413366, "grad_norm": 1.3515625, "learning_rate": 0.00015664635630217218, "loss": 2.5765, "step": 16603 }, { "epoch": 0.7772953361811692, "grad_norm": 1.59375, "learning_rate": 0.0001566415457391116, "loss": 2.6244, "step": 16604 }, { "epoch": 0.7773421499210018, "grad_norm": 1.484375, "learning_rate": 0.00015663673498304823, "loss": 2.6405, "step": 16605 }, { "epoch": 0.7773889636608344, "grad_norm": 1.203125, "learning_rate": 0.00015663192403399844, "loss": 2.5705, "step": 16606 }, { "epoch": 0.777435777400667, "grad_norm": 1.4609375, "learning_rate": 0.00015662711289197865, "loss": 2.522, "step": 16607 }, { "epoch": 0.7774825911404998, "grad_norm": 2.125, "learning_rate": 0.00015662230155700523, "loss": 2.8352, "step": 16608 }, { "epoch": 0.7775294048803324, "grad_norm": 1.7421875, "learning_rate": 0.00015661749002909462, "loss": 2.7055, "step": 16609 }, { "epoch": 0.777576218620165, "grad_norm": 1.40625, "learning_rate": 0.0001566126783082632, "loss": 2.2052, "step": 16610 }, { "epoch": 0.7776230323599976, "grad_norm": 1.765625, "learning_rate": 0.0001566078663945273, "loss": 2.3853, "step": 16611 }, { "epoch": 0.7776698460998303, "grad_norm": 1.359375, "learning_rate": 0.0001566030542879034, "loss": 2.6147, "step": 16612 }, { "epoch": 0.777716659839663, "grad_norm": 1.6484375, "learning_rate": 0.00015659824198840783, "loss": 2.8804, "step": 16613 }, { "epoch": 0.7777634735794956, "grad_norm": 1.5234375, "learning_rate": 0.00015659342949605706, "loss": 2.5581, "step": 16614 }, { "epoch": 0.7778102873193282, "grad_norm": 1.5546875, "learning_rate": 0.00015658861681086745, "loss": 2.3266, "step": 16615 }, { "epoch": 0.7778571010591608, "grad_norm": 2.859375, "learning_rate": 0.00015658380393285536, "loss": 3.1682, "step": 16616 }, { "epoch": 0.7779039147989935, "grad_norm": 1.5546875, "learning_rate": 0.0001565789908620373, "loss": 2.3952, "step": 16617 }, { "epoch": 0.7779507285388262, "grad_norm": 2.609375, "learning_rate": 0.00015657417759842952, "loss": 2.5029, "step": 16618 }, { "epoch": 0.7779975422786588, "grad_norm": 1.046875, "learning_rate": 0.00015656936414204853, "loss": 1.865, "step": 16619 }, { "epoch": 0.7780443560184914, "grad_norm": 1.265625, "learning_rate": 0.0001565645504929107, "loss": 2.3475, "step": 16620 }, { "epoch": 0.778091169758324, "grad_norm": 1.59375, "learning_rate": 0.00015655973665103243, "loss": 2.7116, "step": 16621 }, { "epoch": 0.7781379834981567, "grad_norm": 1.3671875, "learning_rate": 0.00015655492261643013, "loss": 2.3716, "step": 16622 }, { "epoch": 0.7781847972379894, "grad_norm": 1.71875, "learning_rate": 0.00015655010838912023, "loss": 4.2489, "step": 16623 }, { "epoch": 0.778231610977822, "grad_norm": 1.3515625, "learning_rate": 0.0001565452939691191, "loss": 2.4619, "step": 16624 }, { "epoch": 0.7782784247176546, "grad_norm": 1.5703125, "learning_rate": 0.00015654047935644312, "loss": 2.6543, "step": 16625 }, { "epoch": 0.7783252384574872, "grad_norm": 1.296875, "learning_rate": 0.00015653566455110874, "loss": 2.8191, "step": 16626 }, { "epoch": 0.7783720521973199, "grad_norm": 1.21875, "learning_rate": 0.00015653084955313232, "loss": 2.6562, "step": 16627 }, { "epoch": 0.7784188659371526, "grad_norm": 1.921875, "learning_rate": 0.00015652603436253033, "loss": 2.6167, "step": 16628 }, { "epoch": 0.7784656796769852, "grad_norm": 1.34375, "learning_rate": 0.00015652121897931915, "loss": 2.4826, "step": 16629 }, { "epoch": 0.7785124934168178, "grad_norm": 1.2890625, "learning_rate": 0.00015651640340351517, "loss": 2.5207, "step": 16630 }, { "epoch": 0.7785593071566504, "grad_norm": 1.90625, "learning_rate": 0.0001565115876351348, "loss": 2.6875, "step": 16631 }, { "epoch": 0.7786061208964831, "grad_norm": 1.2109375, "learning_rate": 0.0001565067716741945, "loss": 2.1662, "step": 16632 }, { "epoch": 0.7786529346363158, "grad_norm": 1.2734375, "learning_rate": 0.00015650195552071062, "loss": 2.2677, "step": 16633 }, { "epoch": 0.7786997483761484, "grad_norm": 1.3984375, "learning_rate": 0.00015649713917469956, "loss": 3.1016, "step": 16634 }, { "epoch": 0.778746562115981, "grad_norm": 1.5234375, "learning_rate": 0.0001564923226361778, "loss": 2.7738, "step": 16635 }, { "epoch": 0.7787933758558137, "grad_norm": 1.515625, "learning_rate": 0.00015648750590516172, "loss": 2.4405, "step": 16636 }, { "epoch": 0.7788401895956463, "grad_norm": 1.5234375, "learning_rate": 0.00015648268898166775, "loss": 1.968, "step": 16637 }, { "epoch": 0.778887003335479, "grad_norm": 1.3046875, "learning_rate": 0.00015647787186571222, "loss": 2.518, "step": 16638 }, { "epoch": 0.7789338170753116, "grad_norm": 1.2578125, "learning_rate": 0.00015647305455731165, "loss": 2.592, "step": 16639 }, { "epoch": 0.7789806308151442, "grad_norm": 1.328125, "learning_rate": 0.0001564682370564824, "loss": 2.5064, "step": 16640 }, { "epoch": 0.7790274445549769, "grad_norm": 1.3828125, "learning_rate": 0.00015646341936324088, "loss": 2.4608, "step": 16641 }, { "epoch": 0.7790742582948095, "grad_norm": 1.421875, "learning_rate": 0.00015645860147760353, "loss": 2.6974, "step": 16642 }, { "epoch": 0.7791210720346422, "grad_norm": 1.328125, "learning_rate": 0.0001564537833995868, "loss": 2.2687, "step": 16643 }, { "epoch": 0.7791678857744748, "grad_norm": 1.578125, "learning_rate": 0.00015644896512920703, "loss": 2.39, "step": 16644 }, { "epoch": 0.7792146995143074, "grad_norm": 2.140625, "learning_rate": 0.00015644414666648066, "loss": 2.8085, "step": 16645 }, { "epoch": 0.7792615132541401, "grad_norm": 1.421875, "learning_rate": 0.00015643932801142414, "loss": 2.6556, "step": 16646 }, { "epoch": 0.7793083269939727, "grad_norm": 1.59375, "learning_rate": 0.00015643450916405386, "loss": 3.9697, "step": 16647 }, { "epoch": 0.7793551407338054, "grad_norm": 1.1484375, "learning_rate": 0.0001564296901243863, "loss": 2.0487, "step": 16648 }, { "epoch": 0.779401954473638, "grad_norm": 2.171875, "learning_rate": 0.00015642487089243776, "loss": 3.0087, "step": 16649 }, { "epoch": 0.7794487682134706, "grad_norm": 1.3828125, "learning_rate": 0.00015642005146822478, "loss": 2.9021, "step": 16650 }, { "epoch": 0.7794955819533033, "grad_norm": 1.171875, "learning_rate": 0.00015641523185176373, "loss": 2.5236, "step": 16651 }, { "epoch": 0.7795423956931359, "grad_norm": 1.625, "learning_rate": 0.00015641041204307102, "loss": 2.6171, "step": 16652 }, { "epoch": 0.7795892094329686, "grad_norm": 1.3046875, "learning_rate": 0.00015640559204216313, "loss": 2.6463, "step": 16653 }, { "epoch": 0.7796360231728012, "grad_norm": 1.1796875, "learning_rate": 0.0001564007718490564, "loss": 2.2304, "step": 16654 }, { "epoch": 0.7796828369126338, "grad_norm": 1.2578125, "learning_rate": 0.00015639595146376732, "loss": 2.5218, "step": 16655 }, { "epoch": 0.7797296506524665, "grad_norm": 1.25, "learning_rate": 0.00015639113088631231, "loss": 2.0962, "step": 16656 }, { "epoch": 0.7797764643922991, "grad_norm": 1.7578125, "learning_rate": 0.00015638631011670776, "loss": 2.1884, "step": 16657 }, { "epoch": 0.7798232781321318, "grad_norm": 1.1953125, "learning_rate": 0.00015638148915497014, "loss": 3.7134, "step": 16658 }, { "epoch": 0.7798700918719644, "grad_norm": 1.328125, "learning_rate": 0.00015637666800111583, "loss": 2.8013, "step": 16659 }, { "epoch": 0.779916905611797, "grad_norm": 1.5703125, "learning_rate": 0.0001563718466551613, "loss": 2.6365, "step": 16660 }, { "epoch": 0.7799637193516297, "grad_norm": 1.5546875, "learning_rate": 0.00015636702511712296, "loss": 2.5888, "step": 16661 }, { "epoch": 0.7800105330914624, "grad_norm": 2.21875, "learning_rate": 0.00015636220338701725, "loss": 2.5472, "step": 16662 }, { "epoch": 0.780057346831295, "grad_norm": 1.4375, "learning_rate": 0.00015635738146486055, "loss": 2.4658, "step": 16663 }, { "epoch": 0.7801041605711276, "grad_norm": 1.1640625, "learning_rate": 0.00015635255935066934, "loss": 2.6838, "step": 16664 }, { "epoch": 0.7801509743109603, "grad_norm": 1.171875, "learning_rate": 0.00015634773704446006, "loss": 2.4315, "step": 16665 }, { "epoch": 0.7801977880507929, "grad_norm": 1.484375, "learning_rate": 0.00015634291454624913, "loss": 2.2426, "step": 16666 }, { "epoch": 0.7802446017906256, "grad_norm": 1.3046875, "learning_rate": 0.00015633809185605294, "loss": 2.7118, "step": 16667 }, { "epoch": 0.7802914155304582, "grad_norm": 1.203125, "learning_rate": 0.000156333268973888, "loss": 2.7031, "step": 16668 }, { "epoch": 0.7803382292702908, "grad_norm": 1.8046875, "learning_rate": 0.0001563284458997707, "loss": 2.6681, "step": 16669 }, { "epoch": 0.7803850430101235, "grad_norm": 1.5859375, "learning_rate": 0.00015632362263371747, "loss": 2.5933, "step": 16670 }, { "epoch": 0.7804318567499561, "grad_norm": 1.8125, "learning_rate": 0.00015631879917574475, "loss": 2.401, "step": 16671 }, { "epoch": 0.7804786704897888, "grad_norm": 1.2265625, "learning_rate": 0.00015631397552586895, "loss": 1.9057, "step": 16672 }, { "epoch": 0.7805254842296214, "grad_norm": 1.2578125, "learning_rate": 0.00015630915168410656, "loss": 2.7271, "step": 16673 }, { "epoch": 0.780572297969454, "grad_norm": 1.296875, "learning_rate": 0.00015630432765047399, "loss": 2.4168, "step": 16674 }, { "epoch": 0.7806191117092867, "grad_norm": 1.7265625, "learning_rate": 0.00015629950342498767, "loss": 2.4748, "step": 16675 }, { "epoch": 0.7806659254491193, "grad_norm": 1.4921875, "learning_rate": 0.00015629467900766402, "loss": 2.7557, "step": 16676 }, { "epoch": 0.780712739188952, "grad_norm": 1.921875, "learning_rate": 0.00015628985439851957, "loss": 2.9341, "step": 16677 }, { "epoch": 0.7807595529287846, "grad_norm": 1.3984375, "learning_rate": 0.00015628502959757064, "loss": 2.7035, "step": 16678 }, { "epoch": 0.7808063666686172, "grad_norm": 1.609375, "learning_rate": 0.00015628020460483371, "loss": 2.282, "step": 16679 }, { "epoch": 0.7808531804084499, "grad_norm": 1.21875, "learning_rate": 0.0001562753794203253, "loss": 2.5667, "step": 16680 }, { "epoch": 0.7808999941482825, "grad_norm": 1.3828125, "learning_rate": 0.00015627055404406173, "loss": 2.3913, "step": 16681 }, { "epoch": 0.7809468078881152, "grad_norm": 1.1171875, "learning_rate": 0.00015626572847605955, "loss": 2.2661, "step": 16682 }, { "epoch": 0.7809936216279478, "grad_norm": 1.3984375, "learning_rate": 0.00015626090271633514, "loss": 2.6291, "step": 16683 }, { "epoch": 0.7810404353677805, "grad_norm": 1.46875, "learning_rate": 0.00015625607676490491, "loss": 2.5152, "step": 16684 }, { "epoch": 0.7810872491076131, "grad_norm": 1.5234375, "learning_rate": 0.00015625125062178542, "loss": 2.3409, "step": 16685 }, { "epoch": 0.7811340628474457, "grad_norm": 1.25, "learning_rate": 0.000156246424286993, "loss": 2.1152, "step": 16686 }, { "epoch": 0.7811808765872784, "grad_norm": 1.234375, "learning_rate": 0.00015624159776054416, "loss": 2.6889, "step": 16687 }, { "epoch": 0.781227690327111, "grad_norm": 1.28125, "learning_rate": 0.0001562367710424553, "loss": 2.5205, "step": 16688 }, { "epoch": 0.7812745040669437, "grad_norm": 1.5, "learning_rate": 0.00015623194413274294, "loss": 2.6352, "step": 16689 }, { "epoch": 0.7813213178067763, "grad_norm": 1.5, "learning_rate": 0.00015622711703142342, "loss": 2.1612, "step": 16690 }, { "epoch": 0.7813681315466089, "grad_norm": 1.375, "learning_rate": 0.0001562222897385133, "loss": 2.3679, "step": 16691 }, { "epoch": 0.7814149452864416, "grad_norm": 2.3125, "learning_rate": 0.00015621746225402894, "loss": 2.51, "step": 16692 }, { "epoch": 0.7814617590262742, "grad_norm": 1.9140625, "learning_rate": 0.00015621263457798684, "loss": 2.2687, "step": 16693 }, { "epoch": 0.7815085727661069, "grad_norm": 1.6328125, "learning_rate": 0.00015620780671040345, "loss": 2.4319, "step": 16694 }, { "epoch": 0.7815553865059395, "grad_norm": 1.53125, "learning_rate": 0.0001562029786512952, "loss": 2.3344, "step": 16695 }, { "epoch": 0.7816022002457721, "grad_norm": 1.15625, "learning_rate": 0.00015619815040067856, "loss": 2.4076, "step": 16696 }, { "epoch": 0.7816490139856048, "grad_norm": 1.3125, "learning_rate": 0.00015619332195856994, "loss": 2.7126, "step": 16697 }, { "epoch": 0.7816958277254374, "grad_norm": 1.140625, "learning_rate": 0.00015618849332498585, "loss": 2.4154, "step": 16698 }, { "epoch": 0.7817426414652701, "grad_norm": 1.5, "learning_rate": 0.00015618366449994273, "loss": 2.1195, "step": 16699 }, { "epoch": 0.7817894552051027, "grad_norm": 1.421875, "learning_rate": 0.000156178835483457, "loss": 2.4871, "step": 16700 }, { "epoch": 0.7818362689449353, "grad_norm": 1.828125, "learning_rate": 0.00015617400627554513, "loss": 2.7737, "step": 16701 }, { "epoch": 0.781883082684768, "grad_norm": 1.578125, "learning_rate": 0.00015616917687622358, "loss": 2.3812, "step": 16702 }, { "epoch": 0.7819298964246006, "grad_norm": 1.2109375, "learning_rate": 0.0001561643472855088, "loss": 2.5613, "step": 16703 }, { "epoch": 0.7819767101644333, "grad_norm": 2.203125, "learning_rate": 0.00015615951750341727, "loss": 3.0451, "step": 16704 }, { "epoch": 0.7820235239042659, "grad_norm": 1.9296875, "learning_rate": 0.00015615468752996542, "loss": 2.4476, "step": 16705 }, { "epoch": 0.7820703376440985, "grad_norm": 1.3203125, "learning_rate": 0.0001561498573651697, "loss": 2.563, "step": 16706 }, { "epoch": 0.7821171513839312, "grad_norm": 2.703125, "learning_rate": 0.00015614502700904664, "loss": 3.0363, "step": 16707 }, { "epoch": 0.7821639651237638, "grad_norm": 1.3984375, "learning_rate": 0.0001561401964616126, "loss": 2.6191, "step": 16708 }, { "epoch": 0.7822107788635965, "grad_norm": 1.5703125, "learning_rate": 0.00015613536572288411, "loss": 2.2095, "step": 16709 }, { "epoch": 0.7822575926034291, "grad_norm": 2.609375, "learning_rate": 0.00015613053479287764, "loss": 2.3834, "step": 16710 }, { "epoch": 0.7823044063432617, "grad_norm": 1.359375, "learning_rate": 0.00015612570367160955, "loss": 2.8071, "step": 16711 }, { "epoch": 0.7823512200830944, "grad_norm": 1.4609375, "learning_rate": 0.00015612087235909638, "loss": 2.655, "step": 16712 }, { "epoch": 0.782398033822927, "grad_norm": 1.2890625, "learning_rate": 0.00015611604085535462, "loss": 2.6849, "step": 16713 }, { "epoch": 0.7824448475627597, "grad_norm": 1.484375, "learning_rate": 0.00015611120916040068, "loss": 2.5946, "step": 16714 }, { "epoch": 0.7824916613025923, "grad_norm": 1.078125, "learning_rate": 0.000156106377274251, "loss": 2.3655, "step": 16715 }, { "epoch": 0.7825384750424249, "grad_norm": 1.9140625, "learning_rate": 0.0001561015451969221, "loss": 2.3797, "step": 16716 }, { "epoch": 0.7825852887822576, "grad_norm": 1.796875, "learning_rate": 0.00015609671292843045, "loss": 2.5195, "step": 16717 }, { "epoch": 0.7826321025220903, "grad_norm": 1.234375, "learning_rate": 0.00015609188046879249, "loss": 2.3203, "step": 16718 }, { "epoch": 0.7826789162619229, "grad_norm": 1.65625, "learning_rate": 0.00015608704781802467, "loss": 2.5638, "step": 16719 }, { "epoch": 0.7827257300017555, "grad_norm": 1.4375, "learning_rate": 0.0001560822149761435, "loss": 2.3371, "step": 16720 }, { "epoch": 0.7827725437415881, "grad_norm": 1.5703125, "learning_rate": 0.00015607738194316543, "loss": 2.7632, "step": 16721 }, { "epoch": 0.7828193574814208, "grad_norm": 1.3359375, "learning_rate": 0.00015607254871910687, "loss": 2.4432, "step": 16722 }, { "epoch": 0.7828661712212535, "grad_norm": 1.9375, "learning_rate": 0.00015606771530398441, "loss": 2.5361, "step": 16723 }, { "epoch": 0.7829129849610861, "grad_norm": 1.4296875, "learning_rate": 0.0001560628816978144, "loss": 2.7635, "step": 16724 }, { "epoch": 0.7829597987009187, "grad_norm": 1.328125, "learning_rate": 0.00015605804790061337, "loss": 2.4912, "step": 16725 }, { "epoch": 0.7830066124407513, "grad_norm": 1.59375, "learning_rate": 0.0001560532139123978, "loss": 2.7229, "step": 16726 }, { "epoch": 0.783053426180584, "grad_norm": 1.5625, "learning_rate": 0.0001560483797331841, "loss": 2.9033, "step": 16727 }, { "epoch": 0.7831002399204167, "grad_norm": 2.296875, "learning_rate": 0.00015604354536298884, "loss": 2.8867, "step": 16728 }, { "epoch": 0.7831470536602493, "grad_norm": 2.59375, "learning_rate": 0.00015603871080182845, "loss": 2.2606, "step": 16729 }, { "epoch": 0.7831938674000819, "grad_norm": 1.3046875, "learning_rate": 0.00015603387604971934, "loss": 2.6973, "step": 16730 }, { "epoch": 0.7832406811399145, "grad_norm": 2.359375, "learning_rate": 0.00015602904110667803, "loss": 2.8452, "step": 16731 }, { "epoch": 0.7832874948797472, "grad_norm": 2.1875, "learning_rate": 0.00015602420597272105, "loss": 2.8667, "step": 16732 }, { "epoch": 0.7833343086195799, "grad_norm": 1.3515625, "learning_rate": 0.0001560193706478648, "loss": 2.3728, "step": 16733 }, { "epoch": 0.7833811223594125, "grad_norm": 1.84375, "learning_rate": 0.0001560145351321258, "loss": 2.7401, "step": 16734 }, { "epoch": 0.7834279360992451, "grad_norm": 2.515625, "learning_rate": 0.00015600969942552052, "loss": 2.2621, "step": 16735 }, { "epoch": 0.7834747498390777, "grad_norm": 2.09375, "learning_rate": 0.00015600486352806536, "loss": 2.7562, "step": 16736 }, { "epoch": 0.7835215635789105, "grad_norm": 1.4609375, "learning_rate": 0.00015600002743977693, "loss": 2.3005, "step": 16737 }, { "epoch": 0.7835683773187431, "grad_norm": 1.546875, "learning_rate": 0.00015599519116067163, "loss": 2.5854, "step": 16738 }, { "epoch": 0.7836151910585757, "grad_norm": 1.3125, "learning_rate": 0.00015599035469076594, "loss": 2.4777, "step": 16739 }, { "epoch": 0.7836620047984083, "grad_norm": 0.96484375, "learning_rate": 0.00015598551803007636, "loss": 2.1644, "step": 16740 }, { "epoch": 0.7837088185382409, "grad_norm": 1.34375, "learning_rate": 0.00015598068117861935, "loss": 2.3801, "step": 16741 }, { "epoch": 0.7837556322780737, "grad_norm": 1.265625, "learning_rate": 0.00015597584413641142, "loss": 2.6367, "step": 16742 }, { "epoch": 0.7838024460179063, "grad_norm": 1.2890625, "learning_rate": 0.000155971006903469, "loss": 2.5451, "step": 16743 }, { "epoch": 0.7838492597577389, "grad_norm": 1.4375, "learning_rate": 0.00015596616947980866, "loss": 2.5165, "step": 16744 }, { "epoch": 0.7838960734975715, "grad_norm": 1.9140625, "learning_rate": 0.0001559613318654468, "loss": 2.9761, "step": 16745 }, { "epoch": 0.7839428872374041, "grad_norm": 5.3125, "learning_rate": 0.00015595649406039997, "loss": 3.0676, "step": 16746 }, { "epoch": 0.7839897009772369, "grad_norm": 1.4375, "learning_rate": 0.0001559516560646846, "loss": 2.634, "step": 16747 }, { "epoch": 0.7840365147170695, "grad_norm": 1.0859375, "learning_rate": 0.0001559468178783172, "loss": 2.4681, "step": 16748 }, { "epoch": 0.7840833284569021, "grad_norm": 1.578125, "learning_rate": 0.00015594197950131425, "loss": 2.1337, "step": 16749 }, { "epoch": 0.7841301421967347, "grad_norm": 1.3671875, "learning_rate": 0.00015593714093369221, "loss": 2.7115, "step": 16750 }, { "epoch": 0.7841769559365673, "grad_norm": 3.234375, "learning_rate": 0.00015593230217546763, "loss": 2.3141, "step": 16751 }, { "epoch": 0.7842237696764001, "grad_norm": 1.359375, "learning_rate": 0.00015592746322665697, "loss": 2.3018, "step": 16752 }, { "epoch": 0.7842705834162327, "grad_norm": 1.5390625, "learning_rate": 0.0001559226240872767, "loss": 2.8354, "step": 16753 }, { "epoch": 0.7843173971560653, "grad_norm": 1.5859375, "learning_rate": 0.0001559177847573433, "loss": 2.0796, "step": 16754 }, { "epoch": 0.7843642108958979, "grad_norm": 1.5234375, "learning_rate": 0.00015591294523687333, "loss": 2.5086, "step": 16755 }, { "epoch": 0.7844110246357305, "grad_norm": 2.5, "learning_rate": 0.0001559081055258832, "loss": 2.3508, "step": 16756 }, { "epoch": 0.7844578383755633, "grad_norm": 1.21875, "learning_rate": 0.00015590326562438942, "loss": 2.5985, "step": 16757 }, { "epoch": 0.7845046521153959, "grad_norm": 1.140625, "learning_rate": 0.0001558984255324085, "loss": 2.2269, "step": 16758 }, { "epoch": 0.7845514658552285, "grad_norm": 1.453125, "learning_rate": 0.00015589358524995694, "loss": 2.5625, "step": 16759 }, { "epoch": 0.7845982795950611, "grad_norm": 1.3359375, "learning_rate": 0.00015588874477705122, "loss": 2.5598, "step": 16760 }, { "epoch": 0.7846450933348937, "grad_norm": 1.1484375, "learning_rate": 0.0001558839041137078, "loss": 3.8755, "step": 16761 }, { "epoch": 0.7846919070747265, "grad_norm": 2.078125, "learning_rate": 0.00015587906325994325, "loss": 2.5588, "step": 16762 }, { "epoch": 0.7847387208145591, "grad_norm": 1.5, "learning_rate": 0.00015587422221577399, "loss": 1.8934, "step": 16763 }, { "epoch": 0.7847855345543917, "grad_norm": 1.234375, "learning_rate": 0.00015586938098121657, "loss": 2.6348, "step": 16764 }, { "epoch": 0.7848323482942243, "grad_norm": 1.5703125, "learning_rate": 0.00015586453955628745, "loss": 2.7489, "step": 16765 }, { "epoch": 0.7848791620340569, "grad_norm": 1.3984375, "learning_rate": 0.00015585969794100314, "loss": 2.7945, "step": 16766 }, { "epoch": 0.7849259757738897, "grad_norm": 1.640625, "learning_rate": 0.00015585485613538015, "loss": 2.6902, "step": 16767 }, { "epoch": 0.7849727895137223, "grad_norm": 1.3046875, "learning_rate": 0.00015585001413943495, "loss": 2.8267, "step": 16768 }, { "epoch": 0.7850196032535549, "grad_norm": 1.5703125, "learning_rate": 0.00015584517195318406, "loss": 2.6504, "step": 16769 }, { "epoch": 0.7850664169933875, "grad_norm": 1.40625, "learning_rate": 0.000155840329576644, "loss": 2.4469, "step": 16770 }, { "epoch": 0.7851132307332201, "grad_norm": 1.3046875, "learning_rate": 0.00015583548700983123, "loss": 2.6815, "step": 16771 }, { "epoch": 0.7851600444730529, "grad_norm": 1.65625, "learning_rate": 0.00015583064425276223, "loss": 2.642, "step": 16772 }, { "epoch": 0.7852068582128855, "grad_norm": 1.2890625, "learning_rate": 0.00015582580130545356, "loss": 2.6555, "step": 16773 }, { "epoch": 0.7852536719527181, "grad_norm": 1.546875, "learning_rate": 0.0001558209581679217, "loss": 2.5333, "step": 16774 }, { "epoch": 0.7853004856925507, "grad_norm": 1.90625, "learning_rate": 0.00015581611484018318, "loss": 2.3822, "step": 16775 }, { "epoch": 0.7853472994323834, "grad_norm": 1.765625, "learning_rate": 0.00015581127132225446, "loss": 2.3309, "step": 16776 }, { "epoch": 0.7853941131722161, "grad_norm": 1.234375, "learning_rate": 0.00015580642761415205, "loss": 2.4483, "step": 16777 }, { "epoch": 0.7854409269120487, "grad_norm": 1.28125, "learning_rate": 0.00015580158371589247, "loss": 2.3902, "step": 16778 }, { "epoch": 0.7854877406518813, "grad_norm": 1.1484375, "learning_rate": 0.00015579673962749218, "loss": 2.6133, "step": 16779 }, { "epoch": 0.7855345543917139, "grad_norm": 1.34375, "learning_rate": 0.00015579189534896778, "loss": 2.606, "step": 16780 }, { "epoch": 0.7855813681315466, "grad_norm": 2.65625, "learning_rate": 0.0001557870508803357, "loss": 2.1231, "step": 16781 }, { "epoch": 0.7856281818713793, "grad_norm": 1.53125, "learning_rate": 0.00015578220622161247, "loss": 2.3842, "step": 16782 }, { "epoch": 0.7856749956112119, "grad_norm": 1.40625, "learning_rate": 0.00015577736137281458, "loss": 2.5973, "step": 16783 }, { "epoch": 0.7857218093510445, "grad_norm": 2.078125, "learning_rate": 0.00015577251633395857, "loss": 2.7428, "step": 16784 }, { "epoch": 0.7857686230908771, "grad_norm": 1.484375, "learning_rate": 0.00015576767110506097, "loss": 2.7547, "step": 16785 }, { "epoch": 0.7858154368307099, "grad_norm": 2.0625, "learning_rate": 0.0001557628256861382, "loss": 2.4834, "step": 16786 }, { "epoch": 0.7858622505705425, "grad_norm": 1.3125, "learning_rate": 0.00015575798007720683, "loss": 3.7972, "step": 16787 }, { "epoch": 0.7859090643103751, "grad_norm": 1.4765625, "learning_rate": 0.00015575313427828338, "loss": 2.627, "step": 16788 }, { "epoch": 0.7859558780502077, "grad_norm": 1.3671875, "learning_rate": 0.00015574828828938438, "loss": 2.5994, "step": 16789 }, { "epoch": 0.7860026917900403, "grad_norm": 1.65625, "learning_rate": 0.00015574344211052624, "loss": 2.465, "step": 16790 }, { "epoch": 0.7860495055298731, "grad_norm": 1.2890625, "learning_rate": 0.00015573859574172558, "loss": 2.667, "step": 16791 }, { "epoch": 0.7860963192697057, "grad_norm": 1.1171875, "learning_rate": 0.0001557337491829989, "loss": 2.3028, "step": 16792 }, { "epoch": 0.7861431330095383, "grad_norm": 1.7734375, "learning_rate": 0.00015572890243436267, "loss": 2.6646, "step": 16793 }, { "epoch": 0.7861899467493709, "grad_norm": 1.7578125, "learning_rate": 0.00015572405549583342, "loss": 2.7454, "step": 16794 }, { "epoch": 0.7862367604892035, "grad_norm": 1.890625, "learning_rate": 0.00015571920836742766, "loss": 2.3137, "step": 16795 }, { "epoch": 0.7862835742290363, "grad_norm": 1.2265625, "learning_rate": 0.00015571436104916194, "loss": 2.7487, "step": 16796 }, { "epoch": 0.7863303879688689, "grad_norm": 2.09375, "learning_rate": 0.00015570951354105278, "loss": 2.464, "step": 16797 }, { "epoch": 0.7863772017087015, "grad_norm": 1.265625, "learning_rate": 0.00015570466584311662, "loss": 2.7817, "step": 16798 }, { "epoch": 0.7864240154485341, "grad_norm": 1.5, "learning_rate": 0.00015569981795537005, "loss": 2.8465, "step": 16799 }, { "epoch": 0.7864708291883667, "grad_norm": 2.59375, "learning_rate": 0.00015569496987782956, "loss": 2.8709, "step": 16800 }, { "epoch": 0.7865176429281995, "grad_norm": 1.1640625, "learning_rate": 0.0001556901216105117, "loss": 2.354, "step": 16801 }, { "epoch": 0.7865644566680321, "grad_norm": 1.328125, "learning_rate": 0.00015568527315343298, "loss": 2.5363, "step": 16802 }, { "epoch": 0.7866112704078647, "grad_norm": 1.8125, "learning_rate": 0.00015568042450660988, "loss": 2.4177, "step": 16803 }, { "epoch": 0.7866580841476973, "grad_norm": 1.3046875, "learning_rate": 0.00015567557567005896, "loss": 2.4312, "step": 16804 }, { "epoch": 0.78670489788753, "grad_norm": 1.2578125, "learning_rate": 0.00015567072664379676, "loss": 2.3939, "step": 16805 }, { "epoch": 0.7867517116273627, "grad_norm": 1.3671875, "learning_rate": 0.00015566587742783976, "loss": 2.3498, "step": 16806 }, { "epoch": 0.7867985253671953, "grad_norm": 1.0234375, "learning_rate": 0.00015566102802220448, "loss": 2.7881, "step": 16807 }, { "epoch": 0.7868453391070279, "grad_norm": 1.5, "learning_rate": 0.0001556561784269075, "loss": 2.3521, "step": 16808 }, { "epoch": 0.7868921528468605, "grad_norm": 1.609375, "learning_rate": 0.0001556513286419653, "loss": 2.8651, "step": 16809 }, { "epoch": 0.7869389665866932, "grad_norm": 1.3671875, "learning_rate": 0.00015564647866739438, "loss": 2.0421, "step": 16810 }, { "epoch": 0.7869857803265259, "grad_norm": 1.71875, "learning_rate": 0.00015564162850321132, "loss": 2.6946, "step": 16811 }, { "epoch": 0.7870325940663585, "grad_norm": 1.953125, "learning_rate": 0.00015563677814943262, "loss": 2.5351, "step": 16812 }, { "epoch": 0.7870794078061911, "grad_norm": 1.5234375, "learning_rate": 0.00015563192760607484, "loss": 2.7231, "step": 16813 }, { "epoch": 0.7871262215460237, "grad_norm": 1.5078125, "learning_rate": 0.00015562707687315447, "loss": 2.4556, "step": 16814 }, { "epoch": 0.7871730352858564, "grad_norm": 1.3671875, "learning_rate": 0.00015562222595068805, "loss": 2.6006, "step": 16815 }, { "epoch": 0.7872198490256891, "grad_norm": 1.6640625, "learning_rate": 0.00015561737483869213, "loss": 2.4637, "step": 16816 }, { "epoch": 0.7872666627655217, "grad_norm": 1.6953125, "learning_rate": 0.00015561252353718318, "loss": 2.4664, "step": 16817 }, { "epoch": 0.7873134765053543, "grad_norm": 2.046875, "learning_rate": 0.00015560767204617778, "loss": 2.6919, "step": 16818 }, { "epoch": 0.787360290245187, "grad_norm": 1.3203125, "learning_rate": 0.00015560282036569247, "loss": 2.4648, "step": 16819 }, { "epoch": 0.7874071039850196, "grad_norm": 1.0390625, "learning_rate": 0.00015559796849574378, "loss": 2.065, "step": 16820 }, { "epoch": 0.7874539177248523, "grad_norm": 1.390625, "learning_rate": 0.00015559311643634818, "loss": 2.4165, "step": 16821 }, { "epoch": 0.7875007314646849, "grad_norm": 1.5546875, "learning_rate": 0.00015558826418752227, "loss": 2.6185, "step": 16822 }, { "epoch": 0.7875475452045175, "grad_norm": 1.921875, "learning_rate": 0.00015558341174928258, "loss": 2.4252, "step": 16823 }, { "epoch": 0.7875943589443501, "grad_norm": 1.59375, "learning_rate": 0.0001555785591216456, "loss": 2.6107, "step": 16824 }, { "epoch": 0.7876411726841828, "grad_norm": 1.328125, "learning_rate": 0.0001555737063046279, "loss": 2.0765, "step": 16825 }, { "epoch": 0.7876879864240155, "grad_norm": 1.2734375, "learning_rate": 0.00015556885329824603, "loss": 2.354, "step": 16826 }, { "epoch": 0.7877348001638481, "grad_norm": 1.65625, "learning_rate": 0.00015556400010251648, "loss": 2.4988, "step": 16827 }, { "epoch": 0.7877816139036807, "grad_norm": 2.28125, "learning_rate": 0.0001555591467174558, "loss": 2.3784, "step": 16828 }, { "epoch": 0.7878284276435134, "grad_norm": 1.1640625, "learning_rate": 0.00015555429314308058, "loss": 2.8314, "step": 16829 }, { "epoch": 0.787875241383346, "grad_norm": 1.796875, "learning_rate": 0.00015554943937940729, "loss": 2.3922, "step": 16830 }, { "epoch": 0.7879220551231787, "grad_norm": 1.484375, "learning_rate": 0.0001555445854264525, "loss": 2.6181, "step": 16831 }, { "epoch": 0.7879688688630113, "grad_norm": 1.1796875, "learning_rate": 0.00015553973128423274, "loss": 2.0721, "step": 16832 }, { "epoch": 0.7880156826028439, "grad_norm": 1.3828125, "learning_rate": 0.00015553487695276456, "loss": 2.484, "step": 16833 }, { "epoch": 0.7880624963426766, "grad_norm": 1.7421875, "learning_rate": 0.0001555300224320645, "loss": 2.7103, "step": 16834 }, { "epoch": 0.7881093100825092, "grad_norm": 1.2890625, "learning_rate": 0.0001555251677221491, "loss": 2.2883, "step": 16835 }, { "epoch": 0.7881561238223419, "grad_norm": 1.1484375, "learning_rate": 0.0001555203128230349, "loss": 2.4685, "step": 16836 }, { "epoch": 0.7882029375621745, "grad_norm": 1.1875, "learning_rate": 0.00015551545773473844, "loss": 2.3509, "step": 16837 }, { "epoch": 0.7882497513020071, "grad_norm": 1.203125, "learning_rate": 0.00015551060245727627, "loss": 2.4439, "step": 16838 }, { "epoch": 0.7882965650418398, "grad_norm": 15.625, "learning_rate": 0.00015550574699066494, "loss": 4.9098, "step": 16839 }, { "epoch": 0.7883433787816724, "grad_norm": 1.4140625, "learning_rate": 0.00015550089133492095, "loss": 2.802, "step": 16840 }, { "epoch": 0.7883901925215051, "grad_norm": 1.7109375, "learning_rate": 0.0001554960354900609, "loss": 2.4067, "step": 16841 }, { "epoch": 0.7884370062613377, "grad_norm": 1.2734375, "learning_rate": 0.00015549117945610135, "loss": 2.536, "step": 16842 }, { "epoch": 0.7884838200011703, "grad_norm": 1.390625, "learning_rate": 0.0001554863232330588, "loss": 2.5816, "step": 16843 }, { "epoch": 0.788530633741003, "grad_norm": 1.484375, "learning_rate": 0.00015548146682094977, "loss": 2.2215, "step": 16844 }, { "epoch": 0.7885774474808356, "grad_norm": 1.5234375, "learning_rate": 0.00015547661021979086, "loss": 2.8531, "step": 16845 }, { "epoch": 0.7886242612206683, "grad_norm": 1.28125, "learning_rate": 0.00015547175342959862, "loss": 2.3033, "step": 16846 }, { "epoch": 0.7886710749605009, "grad_norm": 1.1015625, "learning_rate": 0.0001554668964503896, "loss": 3.298, "step": 16847 }, { "epoch": 0.7887178887003335, "grad_norm": 1.3984375, "learning_rate": 0.0001554620392821803, "loss": 2.5618, "step": 16848 }, { "epoch": 0.7887647024401662, "grad_norm": 4.9375, "learning_rate": 0.00015545718192498735, "loss": 2.8329, "step": 16849 }, { "epoch": 0.7888115161799988, "grad_norm": 2.828125, "learning_rate": 0.00015545232437882725, "loss": 2.2523, "step": 16850 }, { "epoch": 0.7888583299198315, "grad_norm": 1.4921875, "learning_rate": 0.00015544746664371655, "loss": 2.6206, "step": 16851 }, { "epoch": 0.7889051436596641, "grad_norm": 1.640625, "learning_rate": 0.00015544260871967183, "loss": 2.7269, "step": 16852 }, { "epoch": 0.7889519573994968, "grad_norm": 2.1875, "learning_rate": 0.0001554377506067096, "loss": 2.3265, "step": 16853 }, { "epoch": 0.7889987711393294, "grad_norm": 1.4921875, "learning_rate": 0.00015543289230484644, "loss": 2.7132, "step": 16854 }, { "epoch": 0.789045584879162, "grad_norm": 1.2265625, "learning_rate": 0.00015542803381409892, "loss": 2.5284, "step": 16855 }, { "epoch": 0.7890923986189947, "grad_norm": 2.234375, "learning_rate": 0.00015542317513448354, "loss": 2.3541, "step": 16856 }, { "epoch": 0.7891392123588273, "grad_norm": 1.296875, "learning_rate": 0.00015541831626601695, "loss": 2.3311, "step": 16857 }, { "epoch": 0.78918602609866, "grad_norm": 1.359375, "learning_rate": 0.00015541345720871563, "loss": 2.3406, "step": 16858 }, { "epoch": 0.7892328398384926, "grad_norm": 2.625, "learning_rate": 0.00015540859796259616, "loss": 2.4244, "step": 16859 }, { "epoch": 0.7892796535783252, "grad_norm": 1.578125, "learning_rate": 0.00015540373852767508, "loss": 2.6169, "step": 16860 }, { "epoch": 0.7893264673181579, "grad_norm": 1.421875, "learning_rate": 0.00015539887890396898, "loss": 2.578, "step": 16861 }, { "epoch": 0.7893732810579905, "grad_norm": 1.4453125, "learning_rate": 0.0001553940190914944, "loss": 4.2083, "step": 16862 }, { "epoch": 0.7894200947978232, "grad_norm": 1.375, "learning_rate": 0.0001553891590902679, "loss": 3.3321, "step": 16863 }, { "epoch": 0.7894669085376558, "grad_norm": 1.1796875, "learning_rate": 0.000155384298900306, "loss": 2.6617, "step": 16864 }, { "epoch": 0.7895137222774884, "grad_norm": 1.71875, "learning_rate": 0.00015537943852162535, "loss": 2.8775, "step": 16865 }, { "epoch": 0.7895605360173211, "grad_norm": 2.46875, "learning_rate": 0.00015537457795424247, "loss": 2.4968, "step": 16866 }, { "epoch": 0.7896073497571537, "grad_norm": 1.453125, "learning_rate": 0.00015536971719817388, "loss": 2.8912, "step": 16867 }, { "epoch": 0.7896541634969864, "grad_norm": 1.171875, "learning_rate": 0.00015536485625343622, "loss": 2.2832, "step": 16868 }, { "epoch": 0.789700977236819, "grad_norm": 1.359375, "learning_rate": 0.00015535999512004598, "loss": 2.4808, "step": 16869 }, { "epoch": 0.7897477909766516, "grad_norm": 1.1796875, "learning_rate": 0.0001553551337980198, "loss": 3.3377, "step": 16870 }, { "epoch": 0.7897946047164843, "grad_norm": 1.7109375, "learning_rate": 0.00015535027228737413, "loss": 2.7798, "step": 16871 }, { "epoch": 0.789841418456317, "grad_norm": 1.5390625, "learning_rate": 0.00015534541058812566, "loss": 2.6136, "step": 16872 }, { "epoch": 0.7898882321961496, "grad_norm": 1.5078125, "learning_rate": 0.0001553405487002909, "loss": 2.2656, "step": 16873 }, { "epoch": 0.7899350459359822, "grad_norm": 1.6484375, "learning_rate": 0.0001553356866238864, "loss": 2.6996, "step": 16874 }, { "epoch": 0.7899818596758148, "grad_norm": 1.7265625, "learning_rate": 0.00015533082435892874, "loss": 2.7329, "step": 16875 }, { "epoch": 0.7900286734156475, "grad_norm": 1.5234375, "learning_rate": 0.00015532596190543453, "loss": 2.6597, "step": 16876 }, { "epoch": 0.7900754871554801, "grad_norm": 1.140625, "learning_rate": 0.00015532109926342028, "loss": 2.4254, "step": 16877 }, { "epoch": 0.7901223008953128, "grad_norm": 1.578125, "learning_rate": 0.0001553162364329026, "loss": 2.2448, "step": 16878 }, { "epoch": 0.7901691146351454, "grad_norm": 1.53125, "learning_rate": 0.00015531137341389802, "loss": 2.2301, "step": 16879 }, { "epoch": 0.790215928374978, "grad_norm": 2.203125, "learning_rate": 0.00015530651020642314, "loss": 2.5381, "step": 16880 }, { "epoch": 0.7902627421148107, "grad_norm": 1.328125, "learning_rate": 0.0001553016468104945, "loss": 2.0518, "step": 16881 }, { "epoch": 0.7903095558546434, "grad_norm": 1.546875, "learning_rate": 0.00015529678322612873, "loss": 2.4003, "step": 16882 }, { "epoch": 0.790356369594476, "grad_norm": 1.4921875, "learning_rate": 0.00015529191945334235, "loss": 2.5867, "step": 16883 }, { "epoch": 0.7904031833343086, "grad_norm": 1.4296875, "learning_rate": 0.00015528705549215194, "loss": 2.7769, "step": 16884 }, { "epoch": 0.7904499970741412, "grad_norm": 1.4375, "learning_rate": 0.0001552821913425741, "loss": 2.5509, "step": 16885 }, { "epoch": 0.7904968108139739, "grad_norm": 1.4921875, "learning_rate": 0.00015527732700462542, "loss": 2.5152, "step": 16886 }, { "epoch": 0.7905436245538066, "grad_norm": 1.4375, "learning_rate": 0.00015527246247832238, "loss": 2.8137, "step": 16887 }, { "epoch": 0.7905904382936392, "grad_norm": 2.34375, "learning_rate": 0.00015526759776368168, "loss": 2.1265, "step": 16888 }, { "epoch": 0.7906372520334718, "grad_norm": 1.6875, "learning_rate": 0.00015526273286071976, "loss": 2.9387, "step": 16889 }, { "epoch": 0.7906840657733044, "grad_norm": 1.3046875, "learning_rate": 0.00015525786776945333, "loss": 2.5127, "step": 16890 }, { "epoch": 0.7907308795131371, "grad_norm": 1.6796875, "learning_rate": 0.0001552530024898989, "loss": 2.55, "step": 16891 }, { "epoch": 0.7907776932529698, "grad_norm": 1.515625, "learning_rate": 0.00015524813702207308, "loss": 2.6482, "step": 16892 }, { "epoch": 0.7908245069928024, "grad_norm": 2.078125, "learning_rate": 0.0001552432713659924, "loss": 2.2037, "step": 16893 }, { "epoch": 0.790871320732635, "grad_norm": 1.8671875, "learning_rate": 0.00015523840552167346, "loss": 2.4878, "step": 16894 }, { "epoch": 0.7909181344724676, "grad_norm": 1.2578125, "learning_rate": 0.00015523353948913285, "loss": 2.2774, "step": 16895 }, { "epoch": 0.7909649482123003, "grad_norm": 1.3515625, "learning_rate": 0.00015522867326838715, "loss": 2.5779, "step": 16896 }, { "epoch": 0.791011761952133, "grad_norm": 1.3203125, "learning_rate": 0.00015522380685945294, "loss": 2.8616, "step": 16897 }, { "epoch": 0.7910585756919656, "grad_norm": 1.421875, "learning_rate": 0.00015521894026234681, "loss": 2.4304, "step": 16898 }, { "epoch": 0.7911053894317982, "grad_norm": 1.71875, "learning_rate": 0.00015521407347708532, "loss": 2.6496, "step": 16899 }, { "epoch": 0.7911522031716308, "grad_norm": 1.203125, "learning_rate": 0.00015520920650368507, "loss": 2.5304, "step": 16900 }, { "epoch": 0.7911990169114635, "grad_norm": 1.2890625, "learning_rate": 0.00015520433934216262, "loss": 2.7659, "step": 16901 }, { "epoch": 0.7912458306512962, "grad_norm": 1.5, "learning_rate": 0.00015519947199253462, "loss": 2.0148, "step": 16902 }, { "epoch": 0.7912926443911288, "grad_norm": 1.21875, "learning_rate": 0.00015519460445481757, "loss": 2.3932, "step": 16903 }, { "epoch": 0.7913394581309614, "grad_norm": 1.421875, "learning_rate": 0.0001551897367290281, "loss": 2.618, "step": 16904 }, { "epoch": 0.7913862718707941, "grad_norm": 1.546875, "learning_rate": 0.00015518486881518283, "loss": 2.5211, "step": 16905 }, { "epoch": 0.7914330856106268, "grad_norm": 1.3046875, "learning_rate": 0.00015518000071329825, "loss": 2.5776, "step": 16906 }, { "epoch": 0.7914798993504594, "grad_norm": 1.359375, "learning_rate": 0.00015517513242339104, "loss": 2.2597, "step": 16907 }, { "epoch": 0.791526713090292, "grad_norm": 1.3359375, "learning_rate": 0.00015517026394547775, "loss": 2.4089, "step": 16908 }, { "epoch": 0.7915735268301246, "grad_norm": 1.21875, "learning_rate": 0.000155165395279575, "loss": 2.1179, "step": 16909 }, { "epoch": 0.7916203405699573, "grad_norm": 1.2421875, "learning_rate": 0.0001551605264256993, "loss": 2.5361, "step": 16910 }, { "epoch": 0.79166715430979, "grad_norm": 1.296875, "learning_rate": 0.00015515565738386734, "loss": 2.6075, "step": 16911 }, { "epoch": 0.7917139680496226, "grad_norm": 2.625, "learning_rate": 0.00015515078815409565, "loss": 2.5448, "step": 16912 }, { "epoch": 0.7917607817894552, "grad_norm": 1.6640625, "learning_rate": 0.00015514591873640082, "loss": 2.3382, "step": 16913 }, { "epoch": 0.7918075955292878, "grad_norm": 1.546875, "learning_rate": 0.0001551410491307995, "loss": 2.3243, "step": 16914 }, { "epoch": 0.7918544092691205, "grad_norm": 1.34375, "learning_rate": 0.00015513617933730821, "loss": 2.3624, "step": 16915 }, { "epoch": 0.7919012230089532, "grad_norm": 1.34375, "learning_rate": 0.0001551313093559436, "loss": 2.4507, "step": 16916 }, { "epoch": 0.7919480367487858, "grad_norm": 1.2265625, "learning_rate": 0.0001551264391867222, "loss": 2.3933, "step": 16917 }, { "epoch": 0.7919948504886184, "grad_norm": 2.421875, "learning_rate": 0.00015512156882966068, "loss": 2.5552, "step": 16918 }, { "epoch": 0.792041664228451, "grad_norm": 1.3359375, "learning_rate": 0.0001551166982847756, "loss": 2.199, "step": 16919 }, { "epoch": 0.7920884779682837, "grad_norm": 1.546875, "learning_rate": 0.00015511182755208357, "loss": 2.3696, "step": 16920 }, { "epoch": 0.7921352917081164, "grad_norm": 1.21875, "learning_rate": 0.00015510695663160115, "loss": 2.6256, "step": 16921 }, { "epoch": 0.792182105447949, "grad_norm": 1.2890625, "learning_rate": 0.00015510208552334495, "loss": 2.4317, "step": 16922 }, { "epoch": 0.7922289191877816, "grad_norm": 1.6953125, "learning_rate": 0.00015509721422733158, "loss": 2.3828, "step": 16923 }, { "epoch": 0.7922757329276142, "grad_norm": 1.6640625, "learning_rate": 0.00015509234274357767, "loss": 2.3251, "step": 16924 }, { "epoch": 0.792322546667447, "grad_norm": 1.4921875, "learning_rate": 0.00015508747107209975, "loss": 2.7381, "step": 16925 }, { "epoch": 0.7923693604072796, "grad_norm": 1.484375, "learning_rate": 0.0001550825992129145, "loss": 2.493, "step": 16926 }, { "epoch": 0.7924161741471122, "grad_norm": 2.0, "learning_rate": 0.00015507772716603842, "loss": 2.5088, "step": 16927 }, { "epoch": 0.7924629878869448, "grad_norm": 2.46875, "learning_rate": 0.0001550728549314882, "loss": 2.4867, "step": 16928 }, { "epoch": 0.7925098016267774, "grad_norm": 1.2734375, "learning_rate": 0.00015506798250928042, "loss": 2.2001, "step": 16929 }, { "epoch": 0.7925566153666102, "grad_norm": 2.0, "learning_rate": 0.00015506310989943164, "loss": 2.7251, "step": 16930 }, { "epoch": 0.7926034291064428, "grad_norm": 1.2265625, "learning_rate": 0.00015505823710195853, "loss": 2.6748, "step": 16931 }, { "epoch": 0.7926502428462754, "grad_norm": 1.1875, "learning_rate": 0.00015505336411687762, "loss": 2.9126, "step": 16932 }, { "epoch": 0.792697056586108, "grad_norm": 1.6953125, "learning_rate": 0.0001550484909442056, "loss": 2.5994, "step": 16933 }, { "epoch": 0.7927438703259406, "grad_norm": 1.3203125, "learning_rate": 0.000155043617583959, "loss": 2.1797, "step": 16934 }, { "epoch": 0.7927906840657734, "grad_norm": 1.46875, "learning_rate": 0.00015503874403615447, "loss": 2.5083, "step": 16935 }, { "epoch": 0.792837497805606, "grad_norm": 1.546875, "learning_rate": 0.00015503387030080858, "loss": 2.6606, "step": 16936 }, { "epoch": 0.7928843115454386, "grad_norm": 1.171875, "learning_rate": 0.000155028996377938, "loss": 2.3252, "step": 16937 }, { "epoch": 0.7929311252852712, "grad_norm": 1.734375, "learning_rate": 0.00015502412226755928, "loss": 2.3439, "step": 16938 }, { "epoch": 0.7929779390251038, "grad_norm": 1.328125, "learning_rate": 0.00015501924796968903, "loss": 2.6584, "step": 16939 }, { "epoch": 0.7930247527649366, "grad_norm": 1.21875, "learning_rate": 0.00015501437348434385, "loss": 2.6211, "step": 16940 }, { "epoch": 0.7930715665047692, "grad_norm": 1.109375, "learning_rate": 0.0001550094988115404, "loss": 2.4108, "step": 16941 }, { "epoch": 0.7931183802446018, "grad_norm": 1.7109375, "learning_rate": 0.00015500462395129526, "loss": 2.5788, "step": 16942 }, { "epoch": 0.7931651939844344, "grad_norm": 1.75, "learning_rate": 0.00015499974890362502, "loss": 2.7449, "step": 16943 }, { "epoch": 0.793212007724267, "grad_norm": 1.4921875, "learning_rate": 0.00015499487366854635, "loss": 2.3231, "step": 16944 }, { "epoch": 0.7932588214640998, "grad_norm": 1.8828125, "learning_rate": 0.0001549899982460758, "loss": 2.8527, "step": 16945 }, { "epoch": 0.7933056352039324, "grad_norm": 1.109375, "learning_rate": 0.00015498512263623004, "loss": 2.0186, "step": 16946 }, { "epoch": 0.793352448943765, "grad_norm": 1.5, "learning_rate": 0.00015498024683902566, "loss": 2.5281, "step": 16947 }, { "epoch": 0.7933992626835976, "grad_norm": 1.1640625, "learning_rate": 0.00015497537085447922, "loss": 2.3529, "step": 16948 }, { "epoch": 0.7934460764234302, "grad_norm": 1.328125, "learning_rate": 0.00015497049468260742, "loss": 2.4549, "step": 16949 }, { "epoch": 0.793492890163263, "grad_norm": 1.8515625, "learning_rate": 0.0001549656183234268, "loss": 2.303, "step": 16950 }, { "epoch": 0.7935397039030956, "grad_norm": 1.7734375, "learning_rate": 0.00015496074177695404, "loss": 2.447, "step": 16951 }, { "epoch": 0.7935865176429282, "grad_norm": 1.4453125, "learning_rate": 0.00015495586504320574, "loss": 2.5192, "step": 16952 }, { "epoch": 0.7936333313827608, "grad_norm": 1.8984375, "learning_rate": 0.00015495098812219848, "loss": 2.4723, "step": 16953 }, { "epoch": 0.7936801451225934, "grad_norm": 1.234375, "learning_rate": 0.00015494611101394892, "loss": 2.4312, "step": 16954 }, { "epoch": 0.7937269588624262, "grad_norm": 2.125, "learning_rate": 0.00015494123371847363, "loss": 2.4497, "step": 16955 }, { "epoch": 0.7937737726022588, "grad_norm": 1.1640625, "learning_rate": 0.00015493635623578932, "loss": 1.953, "step": 16956 }, { "epoch": 0.7938205863420914, "grad_norm": 1.1484375, "learning_rate": 0.00015493147856591254, "loss": 1.9993, "step": 16957 }, { "epoch": 0.793867400081924, "grad_norm": 1.75, "learning_rate": 0.00015492660070885988, "loss": 2.4943, "step": 16958 }, { "epoch": 0.7939142138217566, "grad_norm": 1.25, "learning_rate": 0.00015492172266464804, "loss": 2.5214, "step": 16959 }, { "epoch": 0.7939610275615894, "grad_norm": 1.078125, "learning_rate": 0.00015491684443329359, "loss": 2.3487, "step": 16960 }, { "epoch": 0.794007841301422, "grad_norm": 1.2890625, "learning_rate": 0.00015491196601481315, "loss": 2.3645, "step": 16961 }, { "epoch": 0.7940546550412546, "grad_norm": 1.4453125, "learning_rate": 0.00015490708740922336, "loss": 2.8379, "step": 16962 }, { "epoch": 0.7941014687810872, "grad_norm": 1.25, "learning_rate": 0.00015490220861654087, "loss": 2.8206, "step": 16963 }, { "epoch": 0.7941482825209198, "grad_norm": 1.3984375, "learning_rate": 0.00015489732963678228, "loss": 2.8875, "step": 16964 }, { "epoch": 0.7941950962607526, "grad_norm": 1.265625, "learning_rate": 0.0001548924504699642, "loss": 2.3463, "step": 16965 }, { "epoch": 0.7942419100005852, "grad_norm": 1.5234375, "learning_rate": 0.00015488757111610326, "loss": 2.4502, "step": 16966 }, { "epoch": 0.7942887237404178, "grad_norm": 1.3515625, "learning_rate": 0.00015488269157521613, "loss": 2.9353, "step": 16967 }, { "epoch": 0.7943355374802504, "grad_norm": 1.3984375, "learning_rate": 0.00015487781184731937, "loss": 2.577, "step": 16968 }, { "epoch": 0.794382351220083, "grad_norm": 2.015625, "learning_rate": 0.00015487293193242963, "loss": 2.5581, "step": 16969 }, { "epoch": 0.7944291649599158, "grad_norm": 1.46875, "learning_rate": 0.00015486805183056353, "loss": 2.232, "step": 16970 }, { "epoch": 0.7944759786997484, "grad_norm": 1.1796875, "learning_rate": 0.00015486317154173778, "loss": 3.4787, "step": 16971 }, { "epoch": 0.794522792439581, "grad_norm": 1.2734375, "learning_rate": 0.0001548582910659689, "loss": 2.4564, "step": 16972 }, { "epoch": 0.7945696061794136, "grad_norm": 1.671875, "learning_rate": 0.00015485341040327356, "loss": 2.5926, "step": 16973 }, { "epoch": 0.7946164199192463, "grad_norm": 1.109375, "learning_rate": 0.0001548485295536684, "loss": 2.6804, "step": 16974 }, { "epoch": 0.794663233659079, "grad_norm": 1.328125, "learning_rate": 0.00015484364851717005, "loss": 2.3323, "step": 16975 }, { "epoch": 0.7947100473989116, "grad_norm": 1.75, "learning_rate": 0.00015483876729379515, "loss": 3.0499, "step": 16976 }, { "epoch": 0.7947568611387442, "grad_norm": 2.03125, "learning_rate": 0.00015483388588356033, "loss": 2.2456, "step": 16977 }, { "epoch": 0.7948036748785768, "grad_norm": 1.40625, "learning_rate": 0.00015482900428648218, "loss": 2.6512, "step": 16978 }, { "epoch": 0.7948504886184095, "grad_norm": 2.109375, "learning_rate": 0.00015482412250257738, "loss": 2.5736, "step": 16979 }, { "epoch": 0.7948973023582422, "grad_norm": 1.1953125, "learning_rate": 0.00015481924053186257, "loss": 2.4463, "step": 16980 }, { "epoch": 0.7949441160980748, "grad_norm": 1.8203125, "learning_rate": 0.00015481435837435433, "loss": 2.2596, "step": 16981 }, { "epoch": 0.7949909298379074, "grad_norm": 1.5078125, "learning_rate": 0.00015480947603006934, "loss": 2.4096, "step": 16982 }, { "epoch": 0.79503774357774, "grad_norm": 1.3828125, "learning_rate": 0.00015480459349902426, "loss": 2.7236, "step": 16983 }, { "epoch": 0.7950845573175727, "grad_norm": 4.8125, "learning_rate": 0.0001547997107812357, "loss": 2.1222, "step": 16984 }, { "epoch": 0.7951313710574054, "grad_norm": 1.15625, "learning_rate": 0.00015479482787672026, "loss": 2.5866, "step": 16985 }, { "epoch": 0.795178184797238, "grad_norm": 1.484375, "learning_rate": 0.0001547899447854946, "loss": 2.4547, "step": 16986 }, { "epoch": 0.7952249985370706, "grad_norm": 1.2890625, "learning_rate": 0.0001547850615075754, "loss": 2.6208, "step": 16987 }, { "epoch": 0.7952718122769032, "grad_norm": 1.3359375, "learning_rate": 0.00015478017804297926, "loss": 2.6496, "step": 16988 }, { "epoch": 0.7953186260167359, "grad_norm": 1.265625, "learning_rate": 0.0001547752943917228, "loss": 2.5755, "step": 16989 }, { "epoch": 0.7953654397565686, "grad_norm": 1.546875, "learning_rate": 0.00015477041055382275, "loss": 2.6998, "step": 16990 }, { "epoch": 0.7954122534964012, "grad_norm": 1.34375, "learning_rate": 0.00015476552652929565, "loss": 2.5139, "step": 16991 }, { "epoch": 0.7954590672362338, "grad_norm": 2.40625, "learning_rate": 0.00015476064231815818, "loss": 2.4871, "step": 16992 }, { "epoch": 0.7955058809760664, "grad_norm": 1.296875, "learning_rate": 0.000154755757920427, "loss": 2.2368, "step": 16993 }, { "epoch": 0.7955526947158991, "grad_norm": 1.2265625, "learning_rate": 0.00015475087333611875, "loss": 2.5206, "step": 16994 }, { "epoch": 0.7955995084557318, "grad_norm": 1.125, "learning_rate": 0.00015474598856525006, "loss": 2.5992, "step": 16995 }, { "epoch": 0.7956463221955644, "grad_norm": 2.546875, "learning_rate": 0.00015474110360783757, "loss": 2.5552, "step": 16996 }, { "epoch": 0.795693135935397, "grad_norm": 1.3359375, "learning_rate": 0.00015473621846389795, "loss": 2.5937, "step": 16997 }, { "epoch": 0.7957399496752297, "grad_norm": 1.6640625, "learning_rate": 0.00015473133313344782, "loss": 2.5344, "step": 16998 }, { "epoch": 0.7957867634150623, "grad_norm": 1.15625, "learning_rate": 0.00015472644761650384, "loss": 2.4859, "step": 16999 }, { "epoch": 0.795833577154895, "grad_norm": 1.28125, "learning_rate": 0.00015472156191308263, "loss": 2.3624, "step": 17000 }, { "epoch": 0.7958803908947276, "grad_norm": 1.40625, "learning_rate": 0.00015471667602320088, "loss": 2.5393, "step": 17001 }, { "epoch": 0.7959272046345602, "grad_norm": 1.2890625, "learning_rate": 0.0001547117899468752, "loss": 2.6434, "step": 17002 }, { "epoch": 0.7959740183743929, "grad_norm": 1.25, "learning_rate": 0.00015470690368412228, "loss": 2.4966, "step": 17003 }, { "epoch": 0.7960208321142255, "grad_norm": 1.421875, "learning_rate": 0.0001547020172349588, "loss": 2.7337, "step": 17004 }, { "epoch": 0.7960676458540582, "grad_norm": 1.890625, "learning_rate": 0.00015469713059940127, "loss": 2.5612, "step": 17005 }, { "epoch": 0.7961144595938908, "grad_norm": 1.6875, "learning_rate": 0.00015469224377746645, "loss": 2.4792, "step": 17006 }, { "epoch": 0.7961612733337234, "grad_norm": 1.4375, "learning_rate": 0.000154687356769171, "loss": 2.4073, "step": 17007 }, { "epoch": 0.7962080870735561, "grad_norm": 1.3203125, "learning_rate": 0.00015468246957453153, "loss": 2.5762, "step": 17008 }, { "epoch": 0.7962549008133887, "grad_norm": 1.46875, "learning_rate": 0.00015467758219356466, "loss": 3.2739, "step": 17009 }, { "epoch": 0.7963017145532214, "grad_norm": 1.2890625, "learning_rate": 0.00015467269462628714, "loss": 2.4036, "step": 17010 }, { "epoch": 0.796348528293054, "grad_norm": 1.375, "learning_rate": 0.00015466780687271555, "loss": 2.051, "step": 17011 }, { "epoch": 0.7963953420328866, "grad_norm": 1.671875, "learning_rate": 0.00015466291893286659, "loss": 2.7299, "step": 17012 }, { "epoch": 0.7964421557727193, "grad_norm": 1.9921875, "learning_rate": 0.0001546580308067569, "loss": 2.9536, "step": 17013 }, { "epoch": 0.7964889695125519, "grad_norm": 1.265625, "learning_rate": 0.00015465314249440312, "loss": 2.2664, "step": 17014 }, { "epoch": 0.7965357832523846, "grad_norm": 1.6484375, "learning_rate": 0.0001546482539958219, "loss": 2.6244, "step": 17015 }, { "epoch": 0.7965825969922172, "grad_norm": 1.4921875, "learning_rate": 0.00015464336531102992, "loss": 2.3349, "step": 17016 }, { "epoch": 0.7966294107320498, "grad_norm": 1.6015625, "learning_rate": 0.00015463847644004382, "loss": 2.2099, "step": 17017 }, { "epoch": 0.7966762244718825, "grad_norm": 1.625, "learning_rate": 0.00015463358738288029, "loss": 2.54, "step": 17018 }, { "epoch": 0.7967230382117151, "grad_norm": 1.515625, "learning_rate": 0.00015462869813955596, "loss": 2.5664, "step": 17019 }, { "epoch": 0.7967698519515478, "grad_norm": 1.4296875, "learning_rate": 0.0001546238087100875, "loss": 2.6616, "step": 17020 }, { "epoch": 0.7968166656913804, "grad_norm": 1.7734375, "learning_rate": 0.00015461891909449154, "loss": 2.1473, "step": 17021 }, { "epoch": 0.796863479431213, "grad_norm": 1.484375, "learning_rate": 0.00015461402929278483, "loss": 2.3711, "step": 17022 }, { "epoch": 0.7969102931710457, "grad_norm": 2.5625, "learning_rate": 0.00015460913930498394, "loss": 2.3187, "step": 17023 }, { "epoch": 0.7969571069108783, "grad_norm": 2.015625, "learning_rate": 0.00015460424913110558, "loss": 2.3467, "step": 17024 }, { "epoch": 0.797003920650711, "grad_norm": 2.09375, "learning_rate": 0.00015459935877116637, "loss": 2.436, "step": 17025 }, { "epoch": 0.7970507343905436, "grad_norm": 1.9453125, "learning_rate": 0.00015459446822518302, "loss": 2.7256, "step": 17026 }, { "epoch": 0.7970975481303763, "grad_norm": 1.1796875, "learning_rate": 0.00015458957749317219, "loss": 2.4544, "step": 17027 }, { "epoch": 0.7971443618702089, "grad_norm": 1.1640625, "learning_rate": 0.00015458468657515047, "loss": 2.275, "step": 17028 }, { "epoch": 0.7971911756100416, "grad_norm": 1.34375, "learning_rate": 0.00015457979547113464, "loss": 2.1852, "step": 17029 }, { "epoch": 0.7972379893498742, "grad_norm": 1.796875, "learning_rate": 0.00015457490418114132, "loss": 2.4622, "step": 17030 }, { "epoch": 0.7972848030897068, "grad_norm": 1.3671875, "learning_rate": 0.00015457001270518714, "loss": 2.5698, "step": 17031 }, { "epoch": 0.7973316168295395, "grad_norm": 1.1171875, "learning_rate": 0.0001545651210432888, "loss": 2.3219, "step": 17032 }, { "epoch": 0.7973784305693721, "grad_norm": 1.5859375, "learning_rate": 0.00015456022919546297, "loss": 2.5369, "step": 17033 }, { "epoch": 0.7974252443092048, "grad_norm": 1.5703125, "learning_rate": 0.0001545553371617263, "loss": 2.3536, "step": 17034 }, { "epoch": 0.7974720580490374, "grad_norm": 2.03125, "learning_rate": 0.0001545504449420955, "loss": 2.6028, "step": 17035 }, { "epoch": 0.79751887178887, "grad_norm": 1.8984375, "learning_rate": 0.00015454555253658717, "loss": 2.3965, "step": 17036 }, { "epoch": 0.7975656855287027, "grad_norm": 1.375, "learning_rate": 0.00015454065994521807, "loss": 2.409, "step": 17037 }, { "epoch": 0.7976124992685353, "grad_norm": 1.34375, "learning_rate": 0.00015453576716800477, "loss": 2.5148, "step": 17038 }, { "epoch": 0.797659313008368, "grad_norm": 1.9140625, "learning_rate": 0.00015453087420496403, "loss": 2.6045, "step": 17039 }, { "epoch": 0.7977061267482006, "grad_norm": 1.46875, "learning_rate": 0.00015452598105611252, "loss": 2.6486, "step": 17040 }, { "epoch": 0.7977529404880332, "grad_norm": 1.40625, "learning_rate": 0.0001545210877214668, "loss": 2.2913, "step": 17041 }, { "epoch": 0.7977997542278659, "grad_norm": 1.3203125, "learning_rate": 0.0001545161942010437, "loss": 2.6992, "step": 17042 }, { "epoch": 0.7978465679676985, "grad_norm": 1.4140625, "learning_rate": 0.0001545113004948598, "loss": 2.2417, "step": 17043 }, { "epoch": 0.7978933817075312, "grad_norm": 1.40625, "learning_rate": 0.00015450640660293177, "loss": 2.6484, "step": 17044 }, { "epoch": 0.7979401954473638, "grad_norm": 1.421875, "learning_rate": 0.00015450151252527632, "loss": 2.7253, "step": 17045 }, { "epoch": 0.7979870091871964, "grad_norm": 1.703125, "learning_rate": 0.00015449661826191013, "loss": 2.7578, "step": 17046 }, { "epoch": 0.7980338229270291, "grad_norm": 1.2265625, "learning_rate": 0.00015449172381284986, "loss": 2.4276, "step": 17047 }, { "epoch": 0.7980806366668617, "grad_norm": 1.53125, "learning_rate": 0.00015448682917811217, "loss": 2.4727, "step": 17048 }, { "epoch": 0.7981274504066944, "grad_norm": 1.421875, "learning_rate": 0.0001544819343577138, "loss": 2.1853, "step": 17049 }, { "epoch": 0.798174264146527, "grad_norm": 1.2109375, "learning_rate": 0.00015447703935167131, "loss": 2.353, "step": 17050 }, { "epoch": 0.7982210778863597, "grad_norm": 1.2890625, "learning_rate": 0.00015447214416000154, "loss": 2.8089, "step": 17051 }, { "epoch": 0.7982678916261923, "grad_norm": 1.6796875, "learning_rate": 0.00015446724878272105, "loss": 2.8317, "step": 17052 }, { "epoch": 0.7983147053660249, "grad_norm": 1.171875, "learning_rate": 0.0001544623532198466, "loss": 1.9299, "step": 17053 }, { "epoch": 0.7983615191058576, "grad_norm": 1.25, "learning_rate": 0.00015445745747139477, "loss": 2.5763, "step": 17054 }, { "epoch": 0.7984083328456902, "grad_norm": 1.3125, "learning_rate": 0.0001544525615373823, "loss": 2.2564, "step": 17055 }, { "epoch": 0.7984551465855229, "grad_norm": 1.5625, "learning_rate": 0.00015444766541782592, "loss": 2.8202, "step": 17056 }, { "epoch": 0.7985019603253555, "grad_norm": 1.3671875, "learning_rate": 0.00015444276911274224, "loss": 2.3023, "step": 17057 }, { "epoch": 0.7985487740651881, "grad_norm": 1.25, "learning_rate": 0.000154437872622148, "loss": 2.3158, "step": 17058 }, { "epoch": 0.7985955878050208, "grad_norm": 1.3671875, "learning_rate": 0.00015443297594605984, "loss": 2.4851, "step": 17059 }, { "epoch": 0.7986424015448534, "grad_norm": 1.40625, "learning_rate": 0.00015442807908449443, "loss": 2.4633, "step": 17060 }, { "epoch": 0.7986892152846861, "grad_norm": 1.2734375, "learning_rate": 0.00015442318203746854, "loss": 2.4105, "step": 17061 }, { "epoch": 0.7987360290245187, "grad_norm": 1.71875, "learning_rate": 0.00015441828480499876, "loss": 2.7747, "step": 17062 }, { "epoch": 0.7987828427643513, "grad_norm": 1.8515625, "learning_rate": 0.00015441338738710184, "loss": 2.1877, "step": 17063 }, { "epoch": 0.798829656504184, "grad_norm": 1.453125, "learning_rate": 0.00015440848978379442, "loss": 3.022, "step": 17064 }, { "epoch": 0.7988764702440166, "grad_norm": 1.4453125, "learning_rate": 0.00015440359199509325, "loss": 2.5249, "step": 17065 }, { "epoch": 0.7989232839838493, "grad_norm": 1.3203125, "learning_rate": 0.00015439869402101495, "loss": 2.723, "step": 17066 }, { "epoch": 0.7989700977236819, "grad_norm": 2.078125, "learning_rate": 0.0001543937958615763, "loss": 2.6109, "step": 17067 }, { "epoch": 0.7990169114635145, "grad_norm": 1.203125, "learning_rate": 0.0001543888975167939, "loss": 3.5812, "step": 17068 }, { "epoch": 0.7990637252033472, "grad_norm": 1.2265625, "learning_rate": 0.0001543839989866845, "loss": 2.4873, "step": 17069 }, { "epoch": 0.7991105389431798, "grad_norm": 1.3671875, "learning_rate": 0.0001543791002712647, "loss": 2.6311, "step": 17070 }, { "epoch": 0.7991573526830125, "grad_norm": 1.4765625, "learning_rate": 0.00015437420137055132, "loss": 2.4822, "step": 17071 }, { "epoch": 0.7992041664228451, "grad_norm": 1.265625, "learning_rate": 0.00015436930228456097, "loss": 2.6663, "step": 17072 }, { "epoch": 0.7992509801626777, "grad_norm": 1.28125, "learning_rate": 0.00015436440301331037, "loss": 2.5498, "step": 17073 }, { "epoch": 0.7992977939025104, "grad_norm": 1.3203125, "learning_rate": 0.00015435950355681622, "loss": 2.1837, "step": 17074 }, { "epoch": 0.799344607642343, "grad_norm": 1.4375, "learning_rate": 0.0001543546039150952, "loss": 2.4408, "step": 17075 }, { "epoch": 0.7993914213821757, "grad_norm": 1.6640625, "learning_rate": 0.000154349704088164, "loss": 2.478, "step": 17076 }, { "epoch": 0.7994382351220083, "grad_norm": 1.53125, "learning_rate": 0.0001543448040760393, "loss": 1.8653, "step": 17077 }, { "epoch": 0.7994850488618409, "grad_norm": 1.5859375, "learning_rate": 0.00015433990387873786, "loss": 2.7632, "step": 17078 }, { "epoch": 0.7995318626016736, "grad_norm": 1.46875, "learning_rate": 0.0001543350034962763, "loss": 2.7326, "step": 17079 }, { "epoch": 0.7995786763415063, "grad_norm": 1.453125, "learning_rate": 0.0001543301029286714, "loss": 2.6258, "step": 17080 }, { "epoch": 0.7996254900813389, "grad_norm": 2.734375, "learning_rate": 0.00015432520217593983, "loss": 2.1626, "step": 17081 }, { "epoch": 0.7996723038211715, "grad_norm": 1.28125, "learning_rate": 0.00015432030123809824, "loss": 2.38, "step": 17082 }, { "epoch": 0.7997191175610041, "grad_norm": 2.203125, "learning_rate": 0.00015431540011516336, "loss": 2.5843, "step": 17083 }, { "epoch": 0.7997659313008368, "grad_norm": 1.6484375, "learning_rate": 0.0001543104988071519, "loss": 2.2379, "step": 17084 }, { "epoch": 0.7998127450406695, "grad_norm": 1.3359375, "learning_rate": 0.00015430559731408056, "loss": 2.6087, "step": 17085 }, { "epoch": 0.7998595587805021, "grad_norm": 1.28125, "learning_rate": 0.000154300695635966, "loss": 2.4137, "step": 17086 }, { "epoch": 0.7999063725203347, "grad_norm": 1.2578125, "learning_rate": 0.00015429579377282502, "loss": 2.5856, "step": 17087 }, { "epoch": 0.7999531862601673, "grad_norm": 1.28125, "learning_rate": 0.0001542908917246742, "loss": 2.5729, "step": 17088 }, { "epoch": 0.8, "grad_norm": 1.46875, "learning_rate": 0.00015428598949153034, "loss": 2.6124, "step": 17089 }, { "epoch": 0.8000468137398327, "grad_norm": 3.546875, "learning_rate": 0.00015428108707341012, "loss": 2.7043, "step": 17090 }, { "epoch": 0.8000936274796653, "grad_norm": 1.6953125, "learning_rate": 0.00015427618447033022, "loss": 2.1812, "step": 17091 }, { "epoch": 0.8001404412194979, "grad_norm": 1.6640625, "learning_rate": 0.00015427128168230735, "loss": 2.3414, "step": 17092 }, { "epoch": 0.8001872549593305, "grad_norm": 1.4921875, "learning_rate": 0.00015426637870935821, "loss": 2.7542, "step": 17093 }, { "epoch": 0.8002340686991632, "grad_norm": 1.5, "learning_rate": 0.00015426147555149954, "loss": 2.4474, "step": 17094 }, { "epoch": 0.8002808824389959, "grad_norm": 1.28125, "learning_rate": 0.00015425657220874804, "loss": 2.4915, "step": 17095 }, { "epoch": 0.8003276961788285, "grad_norm": 1.765625, "learning_rate": 0.0001542516686811204, "loss": 2.9619, "step": 17096 }, { "epoch": 0.8003745099186611, "grad_norm": 2.203125, "learning_rate": 0.00015424676496863336, "loss": 2.463, "step": 17097 }, { "epoch": 0.8004213236584937, "grad_norm": 1.0703125, "learning_rate": 0.00015424186107130354, "loss": 2.6254, "step": 17098 }, { "epoch": 0.8004681373983265, "grad_norm": 1.2109375, "learning_rate": 0.00015423695698914776, "loss": 2.0971, "step": 17099 }, { "epoch": 0.8005149511381591, "grad_norm": 2.125, "learning_rate": 0.00015423205272218268, "loss": 2.4818, "step": 17100 }, { "epoch": 0.8005617648779917, "grad_norm": 1.3203125, "learning_rate": 0.000154227148270425, "loss": 2.4852, "step": 17101 }, { "epoch": 0.8006085786178243, "grad_norm": 1.46875, "learning_rate": 0.00015422224363389146, "loss": 2.4455, "step": 17102 }, { "epoch": 0.8006553923576569, "grad_norm": 1.2734375, "learning_rate": 0.00015421733881259875, "loss": 2.4699, "step": 17103 }, { "epoch": 0.8007022060974897, "grad_norm": 1.6953125, "learning_rate": 0.0001542124338065636, "loss": 2.5776, "step": 17104 }, { "epoch": 0.8007490198373223, "grad_norm": 1.203125, "learning_rate": 0.00015420752861580272, "loss": 2.3776, "step": 17105 }, { "epoch": 0.8007958335771549, "grad_norm": 1.734375, "learning_rate": 0.0001542026232403328, "loss": 2.743, "step": 17106 }, { "epoch": 0.8008426473169875, "grad_norm": 1.21875, "learning_rate": 0.0001541977176801706, "loss": 2.7854, "step": 17107 }, { "epoch": 0.8008894610568201, "grad_norm": 1.546875, "learning_rate": 0.00015419281193533276, "loss": 2.6883, "step": 17108 }, { "epoch": 0.8009362747966529, "grad_norm": 1.296875, "learning_rate": 0.0001541879060058361, "loss": 2.589, "step": 17109 }, { "epoch": 0.8009830885364855, "grad_norm": 1.4375, "learning_rate": 0.00015418299989169727, "loss": 2.6093, "step": 17110 }, { "epoch": 0.8010299022763181, "grad_norm": 1.15625, "learning_rate": 0.00015417809359293298, "loss": 2.9406, "step": 17111 }, { "epoch": 0.8010767160161507, "grad_norm": 2.046875, "learning_rate": 0.00015417318710955999, "loss": 2.6565, "step": 17112 }, { "epoch": 0.8011235297559833, "grad_norm": 1.34375, "learning_rate": 0.00015416828044159496, "loss": 2.1624, "step": 17113 }, { "epoch": 0.8011703434958161, "grad_norm": 1.75, "learning_rate": 0.00015416337358905468, "loss": 2.7049, "step": 17114 }, { "epoch": 0.8012171572356487, "grad_norm": 1.8984375, "learning_rate": 0.00015415846655195584, "loss": 2.4797, "step": 17115 }, { "epoch": 0.8012639709754813, "grad_norm": 1.34375, "learning_rate": 0.00015415355933031514, "loss": 2.5714, "step": 17116 }, { "epoch": 0.8013107847153139, "grad_norm": 1.8828125, "learning_rate": 0.00015414865192414932, "loss": 2.6686, "step": 17117 }, { "epoch": 0.8013575984551465, "grad_norm": 1.140625, "learning_rate": 0.0001541437443334751, "loss": 2.2777, "step": 17118 }, { "epoch": 0.8014044121949793, "grad_norm": 1.9375, "learning_rate": 0.0001541388365583092, "loss": 2.4575, "step": 17119 }, { "epoch": 0.8014512259348119, "grad_norm": 1.0234375, "learning_rate": 0.00015413392859866835, "loss": 2.4826, "step": 17120 }, { "epoch": 0.8014980396746445, "grad_norm": 1.234375, "learning_rate": 0.00015412902045456925, "loss": 2.6625, "step": 17121 }, { "epoch": 0.8015448534144771, "grad_norm": 3.09375, "learning_rate": 0.00015412411212602865, "loss": 2.8446, "step": 17122 }, { "epoch": 0.8015916671543097, "grad_norm": 1.484375, "learning_rate": 0.0001541192036130633, "loss": 2.5621, "step": 17123 }, { "epoch": 0.8016384808941425, "grad_norm": 1.5, "learning_rate": 0.00015411429491568983, "loss": 2.715, "step": 17124 }, { "epoch": 0.8016852946339751, "grad_norm": 1.34375, "learning_rate": 0.00015410938603392505, "loss": 3.3796, "step": 17125 }, { "epoch": 0.8017321083738077, "grad_norm": 1.9296875, "learning_rate": 0.0001541044769677857, "loss": 2.064, "step": 17126 }, { "epoch": 0.8017789221136403, "grad_norm": 1.359375, "learning_rate": 0.00015409956771728845, "loss": 2.6214, "step": 17127 }, { "epoch": 0.8018257358534729, "grad_norm": 3.25, "learning_rate": 0.00015409465828245005, "loss": 2.4735, "step": 17128 }, { "epoch": 0.8018725495933057, "grad_norm": 1.453125, "learning_rate": 0.0001540897486632872, "loss": 2.6128, "step": 17129 }, { "epoch": 0.8019193633331383, "grad_norm": 1.40625, "learning_rate": 0.0001540848388598167, "loss": 2.7427, "step": 17130 }, { "epoch": 0.8019661770729709, "grad_norm": 1.6171875, "learning_rate": 0.00015407992887205523, "loss": 2.4383, "step": 17131 }, { "epoch": 0.8020129908128035, "grad_norm": 1.53125, "learning_rate": 0.0001540750187000195, "loss": 2.6795, "step": 17132 }, { "epoch": 0.8020598045526361, "grad_norm": 1.34375, "learning_rate": 0.0001540701083437263, "loss": 2.1801, "step": 17133 }, { "epoch": 0.8021066182924689, "grad_norm": 1.4140625, "learning_rate": 0.0001540651978031923, "loss": 2.7837, "step": 17134 }, { "epoch": 0.8021534320323015, "grad_norm": 1.75, "learning_rate": 0.0001540602870784343, "loss": 3.0355, "step": 17135 }, { "epoch": 0.8022002457721341, "grad_norm": 1.3203125, "learning_rate": 0.00015405537616946898, "loss": 2.5467, "step": 17136 }, { "epoch": 0.8022470595119667, "grad_norm": 1.859375, "learning_rate": 0.00015405046507631308, "loss": 2.3354, "step": 17137 }, { "epoch": 0.8022938732517993, "grad_norm": 1.578125, "learning_rate": 0.00015404555379898338, "loss": 2.2813, "step": 17138 }, { "epoch": 0.8023406869916321, "grad_norm": 1.8671875, "learning_rate": 0.00015404064233749654, "loss": 2.4448, "step": 17139 }, { "epoch": 0.8023875007314647, "grad_norm": 1.4921875, "learning_rate": 0.00015403573069186933, "loss": 2.6259, "step": 17140 }, { "epoch": 0.8024343144712973, "grad_norm": 1.6171875, "learning_rate": 0.0001540308188621185, "loss": 2.602, "step": 17141 }, { "epoch": 0.8024811282111299, "grad_norm": 1.3515625, "learning_rate": 0.00015402590684826078, "loss": 2.572, "step": 17142 }, { "epoch": 0.8025279419509626, "grad_norm": 1.3671875, "learning_rate": 0.00015402099465031295, "loss": 2.639, "step": 17143 }, { "epoch": 0.8025747556907953, "grad_norm": 1.234375, "learning_rate": 0.00015401608226829166, "loss": 2.3799, "step": 17144 }, { "epoch": 0.8026215694306279, "grad_norm": 1.4140625, "learning_rate": 0.0001540111697022137, "loss": 2.3677, "step": 17145 }, { "epoch": 0.8026683831704605, "grad_norm": 1.53125, "learning_rate": 0.0001540062569520958, "loss": 2.2793, "step": 17146 }, { "epoch": 0.8027151969102931, "grad_norm": 1.5859375, "learning_rate": 0.00015400134401795469, "loss": 3.0494, "step": 17147 }, { "epoch": 0.8027620106501258, "grad_norm": 1.4765625, "learning_rate": 0.00015399643089980715, "loss": 2.3435, "step": 17148 }, { "epoch": 0.8028088243899585, "grad_norm": 1.4609375, "learning_rate": 0.00015399151759766988, "loss": 2.329, "step": 17149 }, { "epoch": 0.8028556381297911, "grad_norm": 1.8046875, "learning_rate": 0.00015398660411155963, "loss": 2.1887, "step": 17150 }, { "epoch": 0.8029024518696237, "grad_norm": 1.640625, "learning_rate": 0.0001539816904414931, "loss": 2.91, "step": 17151 }, { "epoch": 0.8029492656094563, "grad_norm": 2.390625, "learning_rate": 0.00015397677658748714, "loss": 2.6404, "step": 17152 }, { "epoch": 0.8029960793492891, "grad_norm": 1.5, "learning_rate": 0.00015397186254955845, "loss": 2.2855, "step": 17153 }, { "epoch": 0.8030428930891217, "grad_norm": 1.4375, "learning_rate": 0.0001539669483277237, "loss": 2.5273, "step": 17154 }, { "epoch": 0.8030897068289543, "grad_norm": 1.4140625, "learning_rate": 0.00015396203392199973, "loss": 2.5442, "step": 17155 }, { "epoch": 0.8031365205687869, "grad_norm": 2.265625, "learning_rate": 0.00015395711933240326, "loss": 2.3891, "step": 17156 }, { "epoch": 0.8031833343086195, "grad_norm": 2.734375, "learning_rate": 0.000153952204558951, "loss": 2.3469, "step": 17157 }, { "epoch": 0.8032301480484523, "grad_norm": 1.3828125, "learning_rate": 0.00015394728960165972, "loss": 2.4135, "step": 17158 }, { "epoch": 0.8032769617882849, "grad_norm": 2.078125, "learning_rate": 0.0001539423744605462, "loss": 3.0951, "step": 17159 }, { "epoch": 0.8033237755281175, "grad_norm": 1.265625, "learning_rate": 0.00015393745913562713, "loss": 2.4135, "step": 17160 }, { "epoch": 0.8033705892679501, "grad_norm": 1.625, "learning_rate": 0.00015393254362691927, "loss": 2.7914, "step": 17161 }, { "epoch": 0.8034174030077827, "grad_norm": 1.6328125, "learning_rate": 0.0001539276279344394, "loss": 2.6483, "step": 17162 }, { "epoch": 0.8034642167476155, "grad_norm": 2.5625, "learning_rate": 0.00015392271205820425, "loss": 2.4479, "step": 17163 }, { "epoch": 0.8035110304874481, "grad_norm": 1.453125, "learning_rate": 0.00015391779599823057, "loss": 2.5389, "step": 17164 }, { "epoch": 0.8035578442272807, "grad_norm": 1.828125, "learning_rate": 0.00015391287975453514, "loss": 2.5343, "step": 17165 }, { "epoch": 0.8036046579671133, "grad_norm": 1.734375, "learning_rate": 0.0001539079633271347, "loss": 2.3524, "step": 17166 }, { "epoch": 0.803651471706946, "grad_norm": 2.0625, "learning_rate": 0.00015390304671604596, "loss": 2.6132, "step": 17167 }, { "epoch": 0.8036982854467787, "grad_norm": 1.1640625, "learning_rate": 0.00015389812992128575, "loss": 2.761, "step": 17168 }, { "epoch": 0.8037450991866113, "grad_norm": 1.7734375, "learning_rate": 0.00015389321294287072, "loss": 2.613, "step": 17169 }, { "epoch": 0.8037919129264439, "grad_norm": 1.4296875, "learning_rate": 0.0001538882957808177, "loss": 2.6225, "step": 17170 }, { "epoch": 0.8038387266662765, "grad_norm": 1.3046875, "learning_rate": 0.00015388337843514341, "loss": 2.34, "step": 17171 }, { "epoch": 0.8038855404061092, "grad_norm": 1.8515625, "learning_rate": 0.00015387846090586467, "loss": 2.5095, "step": 17172 }, { "epoch": 0.8039323541459419, "grad_norm": 1.4453125, "learning_rate": 0.00015387354319299816, "loss": 2.5856, "step": 17173 }, { "epoch": 0.8039791678857745, "grad_norm": 1.3203125, "learning_rate": 0.00015386862529656066, "loss": 2.8261, "step": 17174 }, { "epoch": 0.8040259816256071, "grad_norm": 1.140625, "learning_rate": 0.00015386370721656893, "loss": 2.4482, "step": 17175 }, { "epoch": 0.8040727953654397, "grad_norm": 1.6171875, "learning_rate": 0.00015385878895303977, "loss": 2.8459, "step": 17176 }, { "epoch": 0.8041196091052724, "grad_norm": 1.6640625, "learning_rate": 0.00015385387050598985, "loss": 2.5617, "step": 17177 }, { "epoch": 0.8041664228451051, "grad_norm": 1.3125, "learning_rate": 0.00015384895187543602, "loss": 2.6211, "step": 17178 }, { "epoch": 0.8042132365849377, "grad_norm": 1.296875, "learning_rate": 0.00015384403306139497, "loss": 2.378, "step": 17179 }, { "epoch": 0.8042600503247703, "grad_norm": 1.2578125, "learning_rate": 0.0001538391140638835, "loss": 2.404, "step": 17180 }, { "epoch": 0.8043068640646029, "grad_norm": 2.125, "learning_rate": 0.00015383419488291836, "loss": 2.1071, "step": 17181 }, { "epoch": 0.8043536778044356, "grad_norm": 1.609375, "learning_rate": 0.0001538292755185163, "loss": 2.9536, "step": 17182 }, { "epoch": 0.8044004915442683, "grad_norm": 1.4921875, "learning_rate": 0.00015382435597069413, "loss": 2.43, "step": 17183 }, { "epoch": 0.8044473052841009, "grad_norm": 2.609375, "learning_rate": 0.00015381943623946854, "loss": 2.3573, "step": 17184 }, { "epoch": 0.8044941190239335, "grad_norm": 1.4921875, "learning_rate": 0.00015381451632485638, "loss": 2.4621, "step": 17185 }, { "epoch": 0.8045409327637661, "grad_norm": 1.7109375, "learning_rate": 0.00015380959622687433, "loss": 2.1971, "step": 17186 }, { "epoch": 0.8045877465035988, "grad_norm": 1.2734375, "learning_rate": 0.00015380467594553919, "loss": 2.7625, "step": 17187 }, { "epoch": 0.8046345602434315, "grad_norm": 1.2578125, "learning_rate": 0.00015379975548086773, "loss": 2.4632, "step": 17188 }, { "epoch": 0.8046813739832641, "grad_norm": 2.015625, "learning_rate": 0.00015379483483287672, "loss": 2.3066, "step": 17189 }, { "epoch": 0.8047281877230967, "grad_norm": 1.375, "learning_rate": 0.0001537899140015829, "loss": 2.6782, "step": 17190 }, { "epoch": 0.8047750014629294, "grad_norm": 1.21875, "learning_rate": 0.00015378499298700308, "loss": 2.312, "step": 17191 }, { "epoch": 0.804821815202762, "grad_norm": 1.359375, "learning_rate": 0.000153780071789154, "loss": 2.4534, "step": 17192 }, { "epoch": 0.8048686289425947, "grad_norm": 1.3359375, "learning_rate": 0.00015377515040805242, "loss": 2.4306, "step": 17193 }, { "epoch": 0.8049154426824273, "grad_norm": 1.296875, "learning_rate": 0.00015377022884371515, "loss": 2.6181, "step": 17194 }, { "epoch": 0.8049622564222599, "grad_norm": 1.4609375, "learning_rate": 0.00015376530709615893, "loss": 2.3041, "step": 17195 }, { "epoch": 0.8050090701620926, "grad_norm": 1.5234375, "learning_rate": 0.0001537603851654005, "loss": 2.3559, "step": 17196 }, { "epoch": 0.8050558839019252, "grad_norm": 2.1875, "learning_rate": 0.00015375546305145668, "loss": 2.1893, "step": 17197 }, { "epoch": 0.8051026976417579, "grad_norm": 1.90625, "learning_rate": 0.00015375054075434424, "loss": 1.9414, "step": 17198 }, { "epoch": 0.8051495113815905, "grad_norm": 1.46875, "learning_rate": 0.00015374561827407996, "loss": 2.1768, "step": 17199 }, { "epoch": 0.8051963251214231, "grad_norm": 1.359375, "learning_rate": 0.00015374069561068055, "loss": 2.3917, "step": 17200 }, { "epoch": 0.8052431388612558, "grad_norm": 1.40625, "learning_rate": 0.00015373577276416283, "loss": 2.7158, "step": 17201 }, { "epoch": 0.8052899526010884, "grad_norm": 1.65625, "learning_rate": 0.0001537308497345436, "loss": 2.49, "step": 17202 }, { "epoch": 0.8053367663409211, "grad_norm": 1.390625, "learning_rate": 0.00015372592652183957, "loss": 2.5535, "step": 17203 }, { "epoch": 0.8053835800807537, "grad_norm": 1.4453125, "learning_rate": 0.00015372100312606758, "loss": 2.6322, "step": 17204 }, { "epoch": 0.8054303938205863, "grad_norm": 1.2265625, "learning_rate": 0.00015371607954724436, "loss": 2.4343, "step": 17205 }, { "epoch": 0.805477207560419, "grad_norm": 1.375, "learning_rate": 0.00015371115578538674, "loss": 2.3238, "step": 17206 }, { "epoch": 0.8055240213002516, "grad_norm": 2.3125, "learning_rate": 0.0001537062318405114, "loss": 2.4851, "step": 17207 }, { "epoch": 0.8055708350400843, "grad_norm": 1.4921875, "learning_rate": 0.0001537013077126352, "loss": 2.131, "step": 17208 }, { "epoch": 0.8056176487799169, "grad_norm": 1.6953125, "learning_rate": 0.00015369638340177491, "loss": 2.9643, "step": 17209 }, { "epoch": 0.8056644625197495, "grad_norm": 1.6640625, "learning_rate": 0.0001536914589079473, "loss": 2.6453, "step": 17210 }, { "epoch": 0.8057112762595822, "grad_norm": 1.328125, "learning_rate": 0.00015368653423116917, "loss": 2.2164, "step": 17211 }, { "epoch": 0.8057580899994148, "grad_norm": 1.9609375, "learning_rate": 0.00015368160937145723, "loss": 2.101, "step": 17212 }, { "epoch": 0.8058049037392475, "grad_norm": 2.484375, "learning_rate": 0.00015367668432882833, "loss": 2.4958, "step": 17213 }, { "epoch": 0.8058517174790801, "grad_norm": 1.0625, "learning_rate": 0.00015367175910329926, "loss": 2.4116, "step": 17214 }, { "epoch": 0.8058985312189128, "grad_norm": 1.609375, "learning_rate": 0.00015366683369488676, "loss": 2.3931, "step": 17215 }, { "epoch": 0.8059453449587454, "grad_norm": 1.4765625, "learning_rate": 0.0001536619081036076, "loss": 2.4354, "step": 17216 }, { "epoch": 0.805992158698578, "grad_norm": 1.109375, "learning_rate": 0.0001536569823294786, "loss": 1.819, "step": 17217 }, { "epoch": 0.8060389724384107, "grad_norm": 1.15625, "learning_rate": 0.00015365205637251652, "loss": 2.315, "step": 17218 }, { "epoch": 0.8060857861782433, "grad_norm": 1.4375, "learning_rate": 0.00015364713023273817, "loss": 2.8422, "step": 17219 }, { "epoch": 0.806132599918076, "grad_norm": 1.390625, "learning_rate": 0.00015364220391016033, "loss": 2.3905, "step": 17220 }, { "epoch": 0.8061794136579086, "grad_norm": 1.25, "learning_rate": 0.0001536372774047998, "loss": 2.5416, "step": 17221 }, { "epoch": 0.8062262273977412, "grad_norm": 1.5546875, "learning_rate": 0.0001536323507166733, "loss": 2.4871, "step": 17222 }, { "epoch": 0.8062730411375739, "grad_norm": 1.53125, "learning_rate": 0.0001536274238457977, "loss": 2.4356, "step": 17223 }, { "epoch": 0.8063198548774065, "grad_norm": 1.5859375, "learning_rate": 0.00015362249679218973, "loss": 2.3744, "step": 17224 }, { "epoch": 0.8063666686172392, "grad_norm": 1.4765625, "learning_rate": 0.00015361756955586625, "loss": 2.6206, "step": 17225 }, { "epoch": 0.8064134823570718, "grad_norm": 1.828125, "learning_rate": 0.00015361264213684393, "loss": 2.6315, "step": 17226 }, { "epoch": 0.8064602960969044, "grad_norm": 1.515625, "learning_rate": 0.00015360771453513968, "loss": 2.1796, "step": 17227 }, { "epoch": 0.8065071098367371, "grad_norm": 1.3125, "learning_rate": 0.00015360278675077024, "loss": 2.4301, "step": 17228 }, { "epoch": 0.8065539235765697, "grad_norm": 1.1328125, "learning_rate": 0.0001535978587837524, "loss": 2.2661, "step": 17229 }, { "epoch": 0.8066007373164024, "grad_norm": 1.3203125, "learning_rate": 0.00015359293063410293, "loss": 2.5886, "step": 17230 }, { "epoch": 0.806647551056235, "grad_norm": 1.3203125, "learning_rate": 0.00015358800230183867, "loss": 2.234, "step": 17231 }, { "epoch": 0.8066943647960676, "grad_norm": 3.359375, "learning_rate": 0.0001535830737869764, "loss": 2.2954, "step": 17232 }, { "epoch": 0.8067411785359003, "grad_norm": 2.28125, "learning_rate": 0.0001535781450895329, "loss": 2.2325, "step": 17233 }, { "epoch": 0.806787992275733, "grad_norm": 1.34375, "learning_rate": 0.00015357321620952495, "loss": 2.4435, "step": 17234 }, { "epoch": 0.8068348060155656, "grad_norm": 2.03125, "learning_rate": 0.00015356828714696935, "loss": 2.5827, "step": 17235 }, { "epoch": 0.8068816197553982, "grad_norm": 2.015625, "learning_rate": 0.00015356335790188291, "loss": 1.9234, "step": 17236 }, { "epoch": 0.8069284334952308, "grad_norm": 1.40625, "learning_rate": 0.00015355842847428246, "loss": 2.4185, "step": 17237 }, { "epoch": 0.8069752472350635, "grad_norm": 1.171875, "learning_rate": 0.00015355349886418473, "loss": 2.1821, "step": 17238 }, { "epoch": 0.8070220609748961, "grad_norm": 1.5703125, "learning_rate": 0.00015354856907160656, "loss": 2.3782, "step": 17239 }, { "epoch": 0.8070688747147288, "grad_norm": 2.15625, "learning_rate": 0.00015354363909656472, "loss": 2.9309, "step": 17240 }, { "epoch": 0.8071156884545614, "grad_norm": 2.203125, "learning_rate": 0.00015353870893907603, "loss": 2.8925, "step": 17241 }, { "epoch": 0.807162502194394, "grad_norm": 1.328125, "learning_rate": 0.00015353377859915728, "loss": 2.6123, "step": 17242 }, { "epoch": 0.8072093159342267, "grad_norm": 1.328125, "learning_rate": 0.00015352884807682532, "loss": 2.6894, "step": 17243 }, { "epoch": 0.8072561296740594, "grad_norm": 1.3125, "learning_rate": 0.00015352391737209685, "loss": 2.4096, "step": 17244 }, { "epoch": 0.807302943413892, "grad_norm": 1.6328125, "learning_rate": 0.00015351898648498878, "loss": 2.6492, "step": 17245 }, { "epoch": 0.8073497571537246, "grad_norm": 4.0625, "learning_rate": 0.0001535140554155178, "loss": 2.0209, "step": 17246 }, { "epoch": 0.8073965708935572, "grad_norm": 1.75, "learning_rate": 0.0001535091241637008, "loss": 2.71, "step": 17247 }, { "epoch": 0.8074433846333899, "grad_norm": 1.3046875, "learning_rate": 0.00015350419272955454, "loss": 2.3888, "step": 17248 }, { "epoch": 0.8074901983732226, "grad_norm": 1.234375, "learning_rate": 0.00015349926111309586, "loss": 2.6493, "step": 17249 }, { "epoch": 0.8075370121130552, "grad_norm": 1.3984375, "learning_rate": 0.0001534943293143415, "loss": 2.4684, "step": 17250 }, { "epoch": 0.8075838258528878, "grad_norm": 1.3125, "learning_rate": 0.00015348939733330833, "loss": 2.6211, "step": 17251 }, { "epoch": 0.8076306395927204, "grad_norm": 1.2421875, "learning_rate": 0.00015348446517001314, "loss": 2.8858, "step": 17252 }, { "epoch": 0.8076774533325531, "grad_norm": 1.3046875, "learning_rate": 0.0001534795328244727, "loss": 2.3984, "step": 17253 }, { "epoch": 0.8077242670723858, "grad_norm": 1.40625, "learning_rate": 0.00015347460029670388, "loss": 2.4085, "step": 17254 }, { "epoch": 0.8077710808122184, "grad_norm": 1.40625, "learning_rate": 0.00015346966758672341, "loss": 3.044, "step": 17255 }, { "epoch": 0.807817894552051, "grad_norm": 1.5625, "learning_rate": 0.00015346473469454817, "loss": 2.4673, "step": 17256 }, { "epoch": 0.8078647082918836, "grad_norm": 1.1953125, "learning_rate": 0.00015345980162019492, "loss": 2.2566, "step": 17257 }, { "epoch": 0.8079115220317163, "grad_norm": 1.421875, "learning_rate": 0.0001534548683636805, "loss": 2.356, "step": 17258 }, { "epoch": 0.807958335771549, "grad_norm": 1.7578125, "learning_rate": 0.00015344993492502168, "loss": 2.8999, "step": 17259 }, { "epoch": 0.8080051495113816, "grad_norm": 1.4375, "learning_rate": 0.0001534450013042353, "loss": 2.4798, "step": 17260 }, { "epoch": 0.8080519632512142, "grad_norm": 1.7890625, "learning_rate": 0.00015344006750133817, "loss": 2.5709, "step": 17261 }, { "epoch": 0.8080987769910468, "grad_norm": 2.921875, "learning_rate": 0.00015343513351634712, "loss": 2.6196, "step": 17262 }, { "epoch": 0.8081455907308795, "grad_norm": 1.515625, "learning_rate": 0.00015343019934927895, "loss": 2.4161, "step": 17263 }, { "epoch": 0.8081924044707122, "grad_norm": 1.3828125, "learning_rate": 0.0001534252650001504, "loss": 2.4165, "step": 17264 }, { "epoch": 0.8082392182105448, "grad_norm": 1.5625, "learning_rate": 0.0001534203304689784, "loss": 2.7819, "step": 17265 }, { "epoch": 0.8082860319503774, "grad_norm": 1.296875, "learning_rate": 0.0001534153957557797, "loss": 2.4919, "step": 17266 }, { "epoch": 0.80833284569021, "grad_norm": 1.1640625, "learning_rate": 0.00015341046086057112, "loss": 2.3837, "step": 17267 }, { "epoch": 0.8083796594300428, "grad_norm": 1.6015625, "learning_rate": 0.0001534055257833695, "loss": 2.6283, "step": 17268 }, { "epoch": 0.8084264731698754, "grad_norm": 1.2265625, "learning_rate": 0.00015340059052419163, "loss": 2.3713, "step": 17269 }, { "epoch": 0.808473286909708, "grad_norm": 2.28125, "learning_rate": 0.00015339565508305433, "loss": 2.2479, "step": 17270 }, { "epoch": 0.8085201006495406, "grad_norm": 1.171875, "learning_rate": 0.00015339071945997442, "loss": 2.2831, "step": 17271 }, { "epoch": 0.8085669143893732, "grad_norm": 1.359375, "learning_rate": 0.00015338578365496874, "loss": 2.4677, "step": 17272 }, { "epoch": 0.808613728129206, "grad_norm": 1.5, "learning_rate": 0.00015338084766805405, "loss": 2.6083, "step": 17273 }, { "epoch": 0.8086605418690386, "grad_norm": 1.6953125, "learning_rate": 0.00015337591149924723, "loss": 2.6063, "step": 17274 }, { "epoch": 0.8087073556088712, "grad_norm": 1.4921875, "learning_rate": 0.0001533709751485651, "loss": 2.5494, "step": 17275 }, { "epoch": 0.8087541693487038, "grad_norm": 1.5546875, "learning_rate": 0.0001533660386160244, "loss": 2.4834, "step": 17276 }, { "epoch": 0.8088009830885365, "grad_norm": 1.578125, "learning_rate": 0.00015336110190164207, "loss": 2.8809, "step": 17277 }, { "epoch": 0.8088477968283692, "grad_norm": 1.625, "learning_rate": 0.00015335616500543483, "loss": 2.7532, "step": 17278 }, { "epoch": 0.8088946105682018, "grad_norm": 1.546875, "learning_rate": 0.00015335122792741954, "loss": 2.6423, "step": 17279 }, { "epoch": 0.8089414243080344, "grad_norm": 1.53125, "learning_rate": 0.00015334629066761303, "loss": 2.585, "step": 17280 }, { "epoch": 0.808988238047867, "grad_norm": 1.2421875, "learning_rate": 0.00015334135322603217, "loss": 2.4491, "step": 17281 }, { "epoch": 0.8090350517876997, "grad_norm": 1.2890625, "learning_rate": 0.00015333641560269365, "loss": 2.455, "step": 17282 }, { "epoch": 0.8090818655275324, "grad_norm": 1.453125, "learning_rate": 0.00015333147779761442, "loss": 2.7497, "step": 17283 }, { "epoch": 0.809128679267365, "grad_norm": 1.5546875, "learning_rate": 0.00015332653981081127, "loss": 2.4697, "step": 17284 }, { "epoch": 0.8091754930071976, "grad_norm": 1.6015625, "learning_rate": 0.000153321601642301, "loss": 2.2504, "step": 17285 }, { "epoch": 0.8092223067470302, "grad_norm": 1.6875, "learning_rate": 0.00015331666329210046, "loss": 2.4184, "step": 17286 }, { "epoch": 0.809269120486863, "grad_norm": 2.015625, "learning_rate": 0.00015331172476022647, "loss": 2.0126, "step": 17287 }, { "epoch": 0.8093159342266956, "grad_norm": 1.5625, "learning_rate": 0.00015330678604669586, "loss": 2.3386, "step": 17288 }, { "epoch": 0.8093627479665282, "grad_norm": 1.796875, "learning_rate": 0.00015330184715152544, "loss": 3.3827, "step": 17289 }, { "epoch": 0.8094095617063608, "grad_norm": 2.09375, "learning_rate": 0.00015329690807473209, "loss": 2.5734, "step": 17290 }, { "epoch": 0.8094563754461934, "grad_norm": 1.390625, "learning_rate": 0.0001532919688163326, "loss": 2.575, "step": 17291 }, { "epoch": 0.8095031891860262, "grad_norm": 1.6640625, "learning_rate": 0.00015328702937634378, "loss": 2.7589, "step": 17292 }, { "epoch": 0.8095500029258588, "grad_norm": 1.4140625, "learning_rate": 0.00015328208975478252, "loss": 2.2341, "step": 17293 }, { "epoch": 0.8095968166656914, "grad_norm": 1.3828125, "learning_rate": 0.0001532771499516656, "loss": 2.3055, "step": 17294 }, { "epoch": 0.809643630405524, "grad_norm": 1.6015625, "learning_rate": 0.00015327220996700985, "loss": 2.4339, "step": 17295 }, { "epoch": 0.8096904441453566, "grad_norm": 1.5, "learning_rate": 0.00015326726980083218, "loss": 2.6526, "step": 17296 }, { "epoch": 0.8097372578851894, "grad_norm": 1.515625, "learning_rate": 0.00015326232945314932, "loss": 2.6799, "step": 17297 }, { "epoch": 0.809784071625022, "grad_norm": 1.1796875, "learning_rate": 0.00015325738892397816, "loss": 2.2101, "step": 17298 }, { "epoch": 0.8098308853648546, "grad_norm": 2.734375, "learning_rate": 0.00015325244821333555, "loss": 3.0005, "step": 17299 }, { "epoch": 0.8098776991046872, "grad_norm": 1.359375, "learning_rate": 0.0001532475073212383, "loss": 2.7055, "step": 17300 }, { "epoch": 0.8099245128445198, "grad_norm": 1.265625, "learning_rate": 0.0001532425662477032, "loss": 2.2822, "step": 17301 }, { "epoch": 0.8099713265843526, "grad_norm": 2.046875, "learning_rate": 0.0001532376249927472, "loss": 2.9397, "step": 17302 }, { "epoch": 0.8100181403241852, "grad_norm": 1.515625, "learning_rate": 0.000153232683556387, "loss": 2.9234, "step": 17303 }, { "epoch": 0.8100649540640178, "grad_norm": 1.1796875, "learning_rate": 0.00015322774193863957, "loss": 2.2158, "step": 17304 }, { "epoch": 0.8101117678038504, "grad_norm": 1.3984375, "learning_rate": 0.00015322280013952165, "loss": 2.6553, "step": 17305 }, { "epoch": 0.810158581543683, "grad_norm": 1.4609375, "learning_rate": 0.00015321785815905012, "loss": 2.2247, "step": 17306 }, { "epoch": 0.8102053952835158, "grad_norm": 1.4140625, "learning_rate": 0.0001532129159972418, "loss": 2.5279, "step": 17307 }, { "epoch": 0.8102522090233484, "grad_norm": 3.875, "learning_rate": 0.00015320797365411356, "loss": 2.5237, "step": 17308 }, { "epoch": 0.810299022763181, "grad_norm": 1.2578125, "learning_rate": 0.00015320303112968225, "loss": 2.6335, "step": 17309 }, { "epoch": 0.8103458365030136, "grad_norm": 1.40625, "learning_rate": 0.00015319808842396464, "loss": 2.5828, "step": 17310 }, { "epoch": 0.8103926502428462, "grad_norm": 1.4609375, "learning_rate": 0.00015319314553697766, "loss": 2.6002, "step": 17311 }, { "epoch": 0.810439463982679, "grad_norm": 1.578125, "learning_rate": 0.00015318820246873808, "loss": 2.2902, "step": 17312 }, { "epoch": 0.8104862777225116, "grad_norm": 1.390625, "learning_rate": 0.0001531832592192628, "loss": 2.2145, "step": 17313 }, { "epoch": 0.8105330914623442, "grad_norm": 1.2109375, "learning_rate": 0.00015317831578856862, "loss": 2.5834, "step": 17314 }, { "epoch": 0.8105799052021768, "grad_norm": 1.6640625, "learning_rate": 0.00015317337217667242, "loss": 2.5656, "step": 17315 }, { "epoch": 0.8106267189420094, "grad_norm": 1.2734375, "learning_rate": 0.000153168428383591, "loss": 2.4246, "step": 17316 }, { "epoch": 0.8106735326818422, "grad_norm": 1.28125, "learning_rate": 0.00015316348440934125, "loss": 2.2479, "step": 17317 }, { "epoch": 0.8107203464216748, "grad_norm": 1.703125, "learning_rate": 0.00015315854025393997, "loss": 2.3751, "step": 17318 }, { "epoch": 0.8107671601615074, "grad_norm": 1.6953125, "learning_rate": 0.0001531535959174041, "loss": 2.4246, "step": 17319 }, { "epoch": 0.81081397390134, "grad_norm": 1.3203125, "learning_rate": 0.00015314865139975042, "loss": 2.3379, "step": 17320 }, { "epoch": 0.8108607876411726, "grad_norm": 1.4296875, "learning_rate": 0.00015314370670099574, "loss": 2.4526, "step": 17321 }, { "epoch": 0.8109076013810054, "grad_norm": 1.5078125, "learning_rate": 0.00015313876182115695, "loss": 2.6473, "step": 17322 }, { "epoch": 0.810954415120838, "grad_norm": 2.15625, "learning_rate": 0.00015313381676025092, "loss": 2.5032, "step": 17323 }, { "epoch": 0.8110012288606706, "grad_norm": 1.359375, "learning_rate": 0.0001531288715182945, "loss": 2.6508, "step": 17324 }, { "epoch": 0.8110480426005032, "grad_norm": 1.65625, "learning_rate": 0.0001531239260953045, "loss": 2.6068, "step": 17325 }, { "epoch": 0.8110948563403358, "grad_norm": 1.84375, "learning_rate": 0.0001531189804912978, "loss": 2.6311, "step": 17326 }, { "epoch": 0.8111416700801686, "grad_norm": 2.484375, "learning_rate": 0.0001531140347062912, "loss": 2.3771, "step": 17327 }, { "epoch": 0.8111884838200012, "grad_norm": 1.7421875, "learning_rate": 0.00015310908874030163, "loss": 2.615, "step": 17328 }, { "epoch": 0.8112352975598338, "grad_norm": 2.09375, "learning_rate": 0.00015310414259334592, "loss": 2.2862, "step": 17329 }, { "epoch": 0.8112821112996664, "grad_norm": 2.75, "learning_rate": 0.0001530991962654409, "loss": 2.2943, "step": 17330 }, { "epoch": 0.811328925039499, "grad_norm": 1.4140625, "learning_rate": 0.00015309424975660347, "loss": 2.9053, "step": 17331 }, { "epoch": 0.8113757387793318, "grad_norm": 1.2265625, "learning_rate": 0.00015308930306685042, "loss": 2.3706, "step": 17332 }, { "epoch": 0.8114225525191644, "grad_norm": 1.8828125, "learning_rate": 0.00015308435619619862, "loss": 2.8392, "step": 17333 }, { "epoch": 0.811469366258997, "grad_norm": 1.3515625, "learning_rate": 0.00015307940914466495, "loss": 1.934, "step": 17334 }, { "epoch": 0.8115161799988296, "grad_norm": 1.5703125, "learning_rate": 0.00015307446191226634, "loss": 2.1732, "step": 17335 }, { "epoch": 0.8115629937386623, "grad_norm": 1.2265625, "learning_rate": 0.00015306951449901946, "loss": 2.9105, "step": 17336 }, { "epoch": 0.811609807478495, "grad_norm": 1.2421875, "learning_rate": 0.00015306456690494132, "loss": 2.5992, "step": 17337 }, { "epoch": 0.8116566212183276, "grad_norm": 1.3046875, "learning_rate": 0.00015305961913004878, "loss": 2.4412, "step": 17338 }, { "epoch": 0.8117034349581602, "grad_norm": 1.265625, "learning_rate": 0.00015305467117435862, "loss": 2.5578, "step": 17339 }, { "epoch": 0.8117502486979928, "grad_norm": 1.8203125, "learning_rate": 0.00015304972303788775, "loss": 2.9472, "step": 17340 }, { "epoch": 0.8117970624378255, "grad_norm": 1.4921875, "learning_rate": 0.00015304477472065297, "loss": 2.5516, "step": 17341 }, { "epoch": 0.8118438761776582, "grad_norm": 1.3984375, "learning_rate": 0.00015303982622267122, "loss": 2.3491, "step": 17342 }, { "epoch": 0.8118906899174908, "grad_norm": 2.5, "learning_rate": 0.00015303487754395932, "loss": 2.8196, "step": 17343 }, { "epoch": 0.8119375036573234, "grad_norm": 1.75, "learning_rate": 0.00015302992868453414, "loss": 2.5343, "step": 17344 }, { "epoch": 0.811984317397156, "grad_norm": 1.6953125, "learning_rate": 0.00015302497964441252, "loss": 2.7014, "step": 17345 }, { "epoch": 0.8120311311369887, "grad_norm": 1.828125, "learning_rate": 0.0001530200304236114, "loss": 2.3673, "step": 17346 }, { "epoch": 0.8120779448768214, "grad_norm": 1.5390625, "learning_rate": 0.00015301508102214757, "loss": 2.3554, "step": 17347 }, { "epoch": 0.812124758616654, "grad_norm": 1.4296875, "learning_rate": 0.0001530101314400379, "loss": 2.8313, "step": 17348 }, { "epoch": 0.8121715723564866, "grad_norm": 1.4296875, "learning_rate": 0.0001530051816772993, "loss": 2.3401, "step": 17349 }, { "epoch": 0.8122183860963192, "grad_norm": 1.28125, "learning_rate": 0.00015300023173394855, "loss": 2.2164, "step": 17350 }, { "epoch": 0.8122651998361519, "grad_norm": 1.1484375, "learning_rate": 0.00015299528161000266, "loss": 2.0878, "step": 17351 }, { "epoch": 0.8123120135759846, "grad_norm": 1.3046875, "learning_rate": 0.00015299033130547834, "loss": 2.8345, "step": 17352 }, { "epoch": 0.8123588273158172, "grad_norm": 1.25, "learning_rate": 0.00015298538082039254, "loss": 2.538, "step": 17353 }, { "epoch": 0.8124056410556498, "grad_norm": 1.9453125, "learning_rate": 0.00015298043015476214, "loss": 2.1333, "step": 17354 }, { "epoch": 0.8124524547954824, "grad_norm": 1.1953125, "learning_rate": 0.00015297547930860397, "loss": 2.3592, "step": 17355 }, { "epoch": 0.8124992685353151, "grad_norm": 1.4140625, "learning_rate": 0.00015297052828193493, "loss": 2.5029, "step": 17356 }, { "epoch": 0.8125460822751478, "grad_norm": 1.375, "learning_rate": 0.00015296557707477186, "loss": 2.7643, "step": 17357 }, { "epoch": 0.8125928960149804, "grad_norm": 1.671875, "learning_rate": 0.00015296062568713166, "loss": 2.4184, "step": 17358 }, { "epoch": 0.812639709754813, "grad_norm": 1.171875, "learning_rate": 0.0001529556741190312, "loss": 2.2786, "step": 17359 }, { "epoch": 0.8126865234946457, "grad_norm": 1.8515625, "learning_rate": 0.0001529507223704873, "loss": 2.6166, "step": 17360 }, { "epoch": 0.8127333372344783, "grad_norm": 1.40625, "learning_rate": 0.00015294577044151693, "loss": 2.8519, "step": 17361 }, { "epoch": 0.812780150974311, "grad_norm": 2.25, "learning_rate": 0.00015294081833213685, "loss": 2.4453, "step": 17362 }, { "epoch": 0.8128269647141436, "grad_norm": 1.6953125, "learning_rate": 0.00015293586604236403, "loss": 2.9546, "step": 17363 }, { "epoch": 0.8128737784539762, "grad_norm": 1.9921875, "learning_rate": 0.00015293091357221526, "loss": 2.3326, "step": 17364 }, { "epoch": 0.8129205921938089, "grad_norm": 0.984375, "learning_rate": 0.0001529259609217075, "loss": 3.501, "step": 17365 }, { "epoch": 0.8129674059336415, "grad_norm": 1.8671875, "learning_rate": 0.0001529210080908576, "loss": 2.3257, "step": 17366 }, { "epoch": 0.8130142196734742, "grad_norm": 1.40625, "learning_rate": 0.00015291605507968242, "loss": 2.4981, "step": 17367 }, { "epoch": 0.8130610334133068, "grad_norm": 1.375, "learning_rate": 0.00015291110188819885, "loss": 2.564, "step": 17368 }, { "epoch": 0.8131078471531394, "grad_norm": 1.4296875, "learning_rate": 0.00015290614851642373, "loss": 2.1617, "step": 17369 }, { "epoch": 0.8131546608929721, "grad_norm": 1.4453125, "learning_rate": 0.00015290119496437398, "loss": 2.8848, "step": 17370 }, { "epoch": 0.8132014746328047, "grad_norm": 1.4375, "learning_rate": 0.00015289624123206646, "loss": 2.5904, "step": 17371 }, { "epoch": 0.8132482883726374, "grad_norm": 1.1328125, "learning_rate": 0.00015289128731951808, "loss": 2.587, "step": 17372 }, { "epoch": 0.81329510211247, "grad_norm": 1.609375, "learning_rate": 0.00015288633322674565, "loss": 2.5202, "step": 17373 }, { "epoch": 0.8133419158523026, "grad_norm": 1.859375, "learning_rate": 0.0001528813789537661, "loss": 2.6875, "step": 17374 }, { "epoch": 0.8133887295921353, "grad_norm": 1.265625, "learning_rate": 0.00015287642450059633, "loss": 2.5297, "step": 17375 }, { "epoch": 0.8134355433319679, "grad_norm": 1.796875, "learning_rate": 0.0001528714698672532, "loss": 2.4476, "step": 17376 }, { "epoch": 0.8134823570718006, "grad_norm": 1.59375, "learning_rate": 0.00015286651505375362, "loss": 2.6822, "step": 17377 }, { "epoch": 0.8135291708116332, "grad_norm": 1.2265625, "learning_rate": 0.0001528615600601144, "loss": 2.5928, "step": 17378 }, { "epoch": 0.8135759845514658, "grad_norm": 1.234375, "learning_rate": 0.0001528566048863525, "loss": 2.6554, "step": 17379 }, { "epoch": 0.8136227982912985, "grad_norm": 1.8671875, "learning_rate": 0.00015285164953248475, "loss": 2.7244, "step": 17380 }, { "epoch": 0.8136696120311311, "grad_norm": 1.28125, "learning_rate": 0.00015284669399852807, "loss": 2.3709, "step": 17381 }, { "epoch": 0.8137164257709638, "grad_norm": 1.3671875, "learning_rate": 0.0001528417382844993, "loss": 2.487, "step": 17382 }, { "epoch": 0.8137632395107964, "grad_norm": 1.6640625, "learning_rate": 0.00015283678239041545, "loss": 2.688, "step": 17383 }, { "epoch": 0.813810053250629, "grad_norm": 1.5234375, "learning_rate": 0.00015283182631629325, "loss": 2.3135, "step": 17384 }, { "epoch": 0.8138568669904617, "grad_norm": 1.5234375, "learning_rate": 0.00015282687006214966, "loss": 2.6769, "step": 17385 }, { "epoch": 0.8139036807302943, "grad_norm": 1.953125, "learning_rate": 0.00015282191362800157, "loss": 2.5341, "step": 17386 }, { "epoch": 0.813950494470127, "grad_norm": 1.4921875, "learning_rate": 0.00015281695701386587, "loss": 2.7792, "step": 17387 }, { "epoch": 0.8139973082099596, "grad_norm": 1.203125, "learning_rate": 0.00015281200021975946, "loss": 2.3045, "step": 17388 }, { "epoch": 0.8140441219497923, "grad_norm": 1.3046875, "learning_rate": 0.00015280704324569918, "loss": 2.6373, "step": 17389 }, { "epoch": 0.8140909356896249, "grad_norm": 1.5, "learning_rate": 0.00015280208609170197, "loss": 2.3415, "step": 17390 }, { "epoch": 0.8141377494294575, "grad_norm": 1.9609375, "learning_rate": 0.0001527971287577847, "loss": 2.5026, "step": 17391 }, { "epoch": 0.8141845631692902, "grad_norm": 1.3515625, "learning_rate": 0.00015279217124396425, "loss": 2.4518, "step": 17392 }, { "epoch": 0.8142313769091228, "grad_norm": 1.5703125, "learning_rate": 0.00015278721355025754, "loss": 2.4072, "step": 17393 }, { "epoch": 0.8142781906489555, "grad_norm": 1.2421875, "learning_rate": 0.00015278225567668145, "loss": 2.3005, "step": 17394 }, { "epoch": 0.8143250043887881, "grad_norm": 1.34375, "learning_rate": 0.00015277729762325288, "loss": 2.3886, "step": 17395 }, { "epoch": 0.8143718181286207, "grad_norm": 1.65625, "learning_rate": 0.00015277233938998868, "loss": 2.7091, "step": 17396 }, { "epoch": 0.8144186318684534, "grad_norm": 1.5, "learning_rate": 0.00015276738097690586, "loss": 2.5267, "step": 17397 }, { "epoch": 0.814465445608286, "grad_norm": 1.2265625, "learning_rate": 0.00015276242238402118, "loss": 2.4896, "step": 17398 }, { "epoch": 0.8145122593481187, "grad_norm": 1.5390625, "learning_rate": 0.00015275746361135162, "loss": 2.422, "step": 17399 }, { "epoch": 0.8145590730879513, "grad_norm": 1.359375, "learning_rate": 0.00015275250465891405, "loss": 2.486, "step": 17400 }, { "epoch": 0.814605886827784, "grad_norm": 1.2578125, "learning_rate": 0.00015274754552672536, "loss": 2.7399, "step": 17401 }, { "epoch": 0.8146527005676166, "grad_norm": 1.3984375, "learning_rate": 0.00015274258621480246, "loss": 2.7632, "step": 17402 }, { "epoch": 0.8146995143074492, "grad_norm": 1.859375, "learning_rate": 0.00015273762672316226, "loss": 2.0605, "step": 17403 }, { "epoch": 0.8147463280472819, "grad_norm": 1.5546875, "learning_rate": 0.00015273266705182162, "loss": 2.4061, "step": 17404 }, { "epoch": 0.8147931417871145, "grad_norm": 1.125, "learning_rate": 0.00015272770720079748, "loss": 2.4024, "step": 17405 }, { "epoch": 0.8148399555269472, "grad_norm": 1.4921875, "learning_rate": 0.00015272274717010676, "loss": 2.4464, "step": 17406 }, { "epoch": 0.8148867692667798, "grad_norm": 1.5625, "learning_rate": 0.00015271778695976623, "loss": 2.7681, "step": 17407 }, { "epoch": 0.8149335830066124, "grad_norm": 1.3828125, "learning_rate": 0.00015271282656979297, "loss": 2.6249, "step": 17408 }, { "epoch": 0.8149803967464451, "grad_norm": 1.4296875, "learning_rate": 0.00015270786600020378, "loss": 2.3451, "step": 17409 }, { "epoch": 0.8150272104862777, "grad_norm": 1.2578125, "learning_rate": 0.00015270290525101562, "loss": 2.351, "step": 17410 }, { "epoch": 0.8150740242261104, "grad_norm": 1.109375, "learning_rate": 0.0001526979443222453, "loss": 3.0087, "step": 17411 }, { "epoch": 0.815120837965943, "grad_norm": 2.109375, "learning_rate": 0.0001526929832139098, "loss": 2.5757, "step": 17412 }, { "epoch": 0.8151676517057757, "grad_norm": 1.03125, "learning_rate": 0.00015268802192602602, "loss": 4.902, "step": 17413 }, { "epoch": 0.8152144654456083, "grad_norm": 1.7109375, "learning_rate": 0.00015268306045861083, "loss": 2.8298, "step": 17414 }, { "epoch": 0.8152612791854409, "grad_norm": 1.2578125, "learning_rate": 0.00015267809881168115, "loss": 2.9069, "step": 17415 }, { "epoch": 0.8153080929252736, "grad_norm": 1.5703125, "learning_rate": 0.00015267313698525393, "loss": 1.9523, "step": 17416 }, { "epoch": 0.8153549066651062, "grad_norm": 1.5546875, "learning_rate": 0.000152668174979346, "loss": 2.6804, "step": 17417 }, { "epoch": 0.8154017204049389, "grad_norm": 1.6328125, "learning_rate": 0.00015266321279397432, "loss": 2.3042, "step": 17418 }, { "epoch": 0.8154485341447715, "grad_norm": 1.34375, "learning_rate": 0.0001526582504291558, "loss": 2.3292, "step": 17419 }, { "epoch": 0.8154953478846041, "grad_norm": 1.171875, "learning_rate": 0.0001526532878849073, "loss": 2.6209, "step": 17420 }, { "epoch": 0.8155421616244368, "grad_norm": 1.546875, "learning_rate": 0.00015264832516124582, "loss": 2.5959, "step": 17421 }, { "epoch": 0.8155889753642694, "grad_norm": 1.203125, "learning_rate": 0.00015264336225818815, "loss": 1.9175, "step": 17422 }, { "epoch": 0.8156357891041021, "grad_norm": 1.4453125, "learning_rate": 0.00015263839917575132, "loss": 2.3777, "step": 17423 }, { "epoch": 0.8156826028439347, "grad_norm": 1.6484375, "learning_rate": 0.0001526334359139522, "loss": 2.1185, "step": 17424 }, { "epoch": 0.8157294165837673, "grad_norm": 1.484375, "learning_rate": 0.00015262847247280765, "loss": 2.3221, "step": 17425 }, { "epoch": 0.8157762303236, "grad_norm": 1.328125, "learning_rate": 0.00015262350885233462, "loss": 2.2903, "step": 17426 }, { "epoch": 0.8158230440634326, "grad_norm": 1.3671875, "learning_rate": 0.00015261854505255003, "loss": 2.2023, "step": 17427 }, { "epoch": 0.8158698578032653, "grad_norm": 1.2578125, "learning_rate": 0.00015261358107347084, "loss": 2.52, "step": 17428 }, { "epoch": 0.8159166715430979, "grad_norm": 1.2734375, "learning_rate": 0.00015260861691511384, "loss": 2.4416, "step": 17429 }, { "epoch": 0.8159634852829305, "grad_norm": 1.296875, "learning_rate": 0.00015260365257749604, "loss": 2.5878, "step": 17430 }, { "epoch": 0.8160102990227632, "grad_norm": 1.609375, "learning_rate": 0.00015259868806063437, "loss": 2.596, "step": 17431 }, { "epoch": 0.8160571127625958, "grad_norm": 1.5, "learning_rate": 0.00015259372336454567, "loss": 2.3999, "step": 17432 }, { "epoch": 0.8161039265024285, "grad_norm": 1.2109375, "learning_rate": 0.0001525887584892469, "loss": 2.7462, "step": 17433 }, { "epoch": 0.8161507402422611, "grad_norm": 2.03125, "learning_rate": 0.00015258379343475502, "loss": 2.7147, "step": 17434 }, { "epoch": 0.8161975539820937, "grad_norm": 1.7265625, "learning_rate": 0.00015257882820108687, "loss": 2.8198, "step": 17435 }, { "epoch": 0.8162443677219264, "grad_norm": 1.3203125, "learning_rate": 0.0001525738627882594, "loss": 2.3114, "step": 17436 }, { "epoch": 0.816291181461759, "grad_norm": 1.28125, "learning_rate": 0.00015256889719628952, "loss": 2.8677, "step": 17437 }, { "epoch": 0.8163379952015917, "grad_norm": 1.4765625, "learning_rate": 0.00015256393142519417, "loss": 2.8085, "step": 17438 }, { "epoch": 0.8163848089414243, "grad_norm": 1.125, "learning_rate": 0.00015255896547499028, "loss": 2.3262, "step": 17439 }, { "epoch": 0.8164316226812569, "grad_norm": 1.59375, "learning_rate": 0.00015255399934569475, "loss": 2.475, "step": 17440 }, { "epoch": 0.8164784364210896, "grad_norm": 1.4375, "learning_rate": 0.00015254903303732448, "loss": 2.7243, "step": 17441 }, { "epoch": 0.8165252501609223, "grad_norm": 1.5, "learning_rate": 0.00015254406654989643, "loss": 3.0671, "step": 17442 }, { "epoch": 0.8165720639007549, "grad_norm": 1.2421875, "learning_rate": 0.00015253909988342752, "loss": 2.7923, "step": 17443 }, { "epoch": 0.8166188776405875, "grad_norm": 1.390625, "learning_rate": 0.00015253413303793466, "loss": 2.1366, "step": 17444 }, { "epoch": 0.8166656913804201, "grad_norm": 1.3515625, "learning_rate": 0.00015252916601343479, "loss": 2.6099, "step": 17445 }, { "epoch": 0.8167125051202528, "grad_norm": 1.5390625, "learning_rate": 0.0001525241988099448, "loss": 2.7903, "step": 17446 }, { "epoch": 0.8167593188600855, "grad_norm": 1.296875, "learning_rate": 0.00015251923142748166, "loss": 2.5756, "step": 17447 }, { "epoch": 0.8168061325999181, "grad_norm": 3.09375, "learning_rate": 0.00015251426386606224, "loss": 2.5893, "step": 17448 }, { "epoch": 0.8168529463397507, "grad_norm": 1.3125, "learning_rate": 0.00015250929612570352, "loss": 2.6245, "step": 17449 }, { "epoch": 0.8168997600795833, "grad_norm": 1.1953125, "learning_rate": 0.0001525043282064224, "loss": 2.217, "step": 17450 }, { "epoch": 0.816946573819416, "grad_norm": 1.2734375, "learning_rate": 0.00015249936010823585, "loss": 2.5359, "step": 17451 }, { "epoch": 0.8169933875592487, "grad_norm": 1.2578125, "learning_rate": 0.00015249439183116073, "loss": 2.4984, "step": 17452 }, { "epoch": 0.8170402012990813, "grad_norm": 2.34375, "learning_rate": 0.00015248942337521405, "loss": 2.8921, "step": 17453 }, { "epoch": 0.8170870150389139, "grad_norm": 1.6484375, "learning_rate": 0.00015248445474041266, "loss": 2.8148, "step": 17454 }, { "epoch": 0.8171338287787465, "grad_norm": 1.46875, "learning_rate": 0.00015247948592677353, "loss": 2.3421, "step": 17455 }, { "epoch": 0.8171806425185792, "grad_norm": 1.5859375, "learning_rate": 0.0001524745169343136, "loss": 2.508, "step": 17456 }, { "epoch": 0.8172274562584119, "grad_norm": 1.421875, "learning_rate": 0.00015246954776304973, "loss": 2.4869, "step": 17457 }, { "epoch": 0.8172742699982445, "grad_norm": 1.640625, "learning_rate": 0.00015246457841299897, "loss": 2.5641, "step": 17458 }, { "epoch": 0.8173210837380771, "grad_norm": 1.34375, "learning_rate": 0.00015245960888417813, "loss": 2.4428, "step": 17459 }, { "epoch": 0.8173678974779097, "grad_norm": 2.21875, "learning_rate": 0.00015245463917660428, "loss": 2.5603, "step": 17460 }, { "epoch": 0.8174147112177425, "grad_norm": 2.234375, "learning_rate": 0.00015244966929029422, "loss": 2.7405, "step": 17461 }, { "epoch": 0.8174615249575751, "grad_norm": 1.5, "learning_rate": 0.000152444699225265, "loss": 2.6971, "step": 17462 }, { "epoch": 0.8175083386974077, "grad_norm": 2.015625, "learning_rate": 0.00015243972898153346, "loss": 2.5343, "step": 17463 }, { "epoch": 0.8175551524372403, "grad_norm": 1.734375, "learning_rate": 0.00015243475855911657, "loss": 2.8239, "step": 17464 }, { "epoch": 0.8176019661770729, "grad_norm": 1.1796875, "learning_rate": 0.0001524297879580313, "loss": 2.2468, "step": 17465 }, { "epoch": 0.8176487799169057, "grad_norm": 1.53125, "learning_rate": 0.00015242481717829453, "loss": 3.1652, "step": 17466 }, { "epoch": 0.8176955936567383, "grad_norm": 1.3828125, "learning_rate": 0.00015241984621992325, "loss": 2.6664, "step": 17467 }, { "epoch": 0.8177424073965709, "grad_norm": 1.4921875, "learning_rate": 0.00015241487508293436, "loss": 2.6052, "step": 17468 }, { "epoch": 0.8177892211364035, "grad_norm": 1.234375, "learning_rate": 0.0001524099037673448, "loss": 2.0745, "step": 17469 }, { "epoch": 0.8178360348762361, "grad_norm": 1.828125, "learning_rate": 0.00015240493227317153, "loss": 2.0604, "step": 17470 }, { "epoch": 0.8178828486160689, "grad_norm": 1.4453125, "learning_rate": 0.0001523999606004315, "loss": 2.592, "step": 17471 }, { "epoch": 0.8179296623559015, "grad_norm": 1.1015625, "learning_rate": 0.00015239498874914162, "loss": 2.1066, "step": 17472 }, { "epoch": 0.8179764760957341, "grad_norm": 1.7265625, "learning_rate": 0.00015239001671931882, "loss": 2.9381, "step": 17473 }, { "epoch": 0.8180232898355667, "grad_norm": 1.796875, "learning_rate": 0.0001523850445109801, "loss": 2.5331, "step": 17474 }, { "epoch": 0.8180701035753993, "grad_norm": 1.4375, "learning_rate": 0.00015238007212414235, "loss": 2.4271, "step": 17475 }, { "epoch": 0.8181169173152321, "grad_norm": 1.890625, "learning_rate": 0.00015237509955882256, "loss": 2.7615, "step": 17476 }, { "epoch": 0.8181637310550647, "grad_norm": 1.5390625, "learning_rate": 0.00015237012681503762, "loss": 2.7256, "step": 17477 }, { "epoch": 0.8182105447948973, "grad_norm": 1.4140625, "learning_rate": 0.00015236515389280451, "loss": 3.4693, "step": 17478 }, { "epoch": 0.8182573585347299, "grad_norm": 2.296875, "learning_rate": 0.00015236018079214017, "loss": 2.4393, "step": 17479 }, { "epoch": 0.8183041722745625, "grad_norm": 2.015625, "learning_rate": 0.00015235520751306152, "loss": 2.9884, "step": 17480 }, { "epoch": 0.8183509860143953, "grad_norm": 1.859375, "learning_rate": 0.00015235023405558559, "loss": 3.2988, "step": 17481 }, { "epoch": 0.8183977997542279, "grad_norm": 1.390625, "learning_rate": 0.00015234526041972918, "loss": 2.6227, "step": 17482 }, { "epoch": 0.8184446134940605, "grad_norm": 1.3046875, "learning_rate": 0.00015234028660550934, "loss": 2.8643, "step": 17483 }, { "epoch": 0.8184914272338931, "grad_norm": 1.2578125, "learning_rate": 0.00015233531261294302, "loss": 2.6271, "step": 17484 }, { "epoch": 0.8185382409737257, "grad_norm": 1.296875, "learning_rate": 0.00015233033844204715, "loss": 2.6772, "step": 17485 }, { "epoch": 0.8185850547135585, "grad_norm": 1.875, "learning_rate": 0.00015232536409283864, "loss": 2.482, "step": 17486 }, { "epoch": 0.8186318684533911, "grad_norm": 1.1640625, "learning_rate": 0.00015232038956533453, "loss": 2.6356, "step": 17487 }, { "epoch": 0.8186786821932237, "grad_norm": 1.6015625, "learning_rate": 0.00015231541485955167, "loss": 2.6297, "step": 17488 }, { "epoch": 0.8187254959330563, "grad_norm": 1.6328125, "learning_rate": 0.0001523104399755071, "loss": 2.497, "step": 17489 }, { "epoch": 0.8187723096728889, "grad_norm": 1.5625, "learning_rate": 0.0001523054649132177, "loss": 2.0648, "step": 17490 }, { "epoch": 0.8188191234127217, "grad_norm": 1.3671875, "learning_rate": 0.00015230048967270046, "loss": 2.5125, "step": 17491 }, { "epoch": 0.8188659371525543, "grad_norm": 1.4296875, "learning_rate": 0.00015229551425397232, "loss": 2.5943, "step": 17492 }, { "epoch": 0.8189127508923869, "grad_norm": 1.8359375, "learning_rate": 0.00015229053865705025, "loss": 2.1279, "step": 17493 }, { "epoch": 0.8189595646322195, "grad_norm": 1.546875, "learning_rate": 0.0001522855628819512, "loss": 2.5704, "step": 17494 }, { "epoch": 0.8190063783720521, "grad_norm": 1.140625, "learning_rate": 0.00015228058692869206, "loss": 2.5052, "step": 17495 }, { "epoch": 0.8190531921118849, "grad_norm": 1.171875, "learning_rate": 0.0001522756107972899, "loss": 2.206, "step": 17496 }, { "epoch": 0.8191000058517175, "grad_norm": 2.59375, "learning_rate": 0.00015227063448776157, "loss": 2.1383, "step": 17497 }, { "epoch": 0.8191468195915501, "grad_norm": 1.1953125, "learning_rate": 0.00015226565800012412, "loss": 2.4092, "step": 17498 }, { "epoch": 0.8191936333313827, "grad_norm": 1.2421875, "learning_rate": 0.00015226068133439443, "loss": 2.7222, "step": 17499 }, { "epoch": 0.8192404470712153, "grad_norm": 1.3984375, "learning_rate": 0.00015225570449058955, "loss": 2.8657, "step": 17500 }, { "epoch": 0.8192872608110481, "grad_norm": 1.6875, "learning_rate": 0.00015225072746872633, "loss": 2.6507, "step": 17501 }, { "epoch": 0.8193340745508807, "grad_norm": 1.3671875, "learning_rate": 0.00015224575026882175, "loss": 2.6115, "step": 17502 }, { "epoch": 0.8193808882907133, "grad_norm": 1.8125, "learning_rate": 0.00015224077289089285, "loss": 2.5789, "step": 17503 }, { "epoch": 0.8194277020305459, "grad_norm": 1.671875, "learning_rate": 0.0001522357953349565, "loss": 2.2421, "step": 17504 }, { "epoch": 0.8194745157703786, "grad_norm": 1.6875, "learning_rate": 0.00015223081760102974, "loss": 2.1231, "step": 17505 }, { "epoch": 0.8195213295102113, "grad_norm": 1.7734375, "learning_rate": 0.00015222583968912943, "loss": 2.3477, "step": 17506 }, { "epoch": 0.8195681432500439, "grad_norm": 1.3984375, "learning_rate": 0.0001522208615992726, "loss": 2.2102, "step": 17507 }, { "epoch": 0.8196149569898765, "grad_norm": 1.15625, "learning_rate": 0.00015221588333147622, "loss": 2.397, "step": 17508 }, { "epoch": 0.8196617707297091, "grad_norm": 1.890625, "learning_rate": 0.00015221090488575723, "loss": 2.9328, "step": 17509 }, { "epoch": 0.8197085844695418, "grad_norm": 1.6953125, "learning_rate": 0.00015220592626213262, "loss": 2.1377, "step": 17510 }, { "epoch": 0.8197553982093745, "grad_norm": 1.4375, "learning_rate": 0.00015220094746061933, "loss": 2.6143, "step": 17511 }, { "epoch": 0.8198022119492071, "grad_norm": 1.4453125, "learning_rate": 0.00015219596848123433, "loss": 2.5163, "step": 17512 }, { "epoch": 0.8198490256890397, "grad_norm": 1.2421875, "learning_rate": 0.00015219098932399457, "loss": 2.5926, "step": 17513 }, { "epoch": 0.8198958394288723, "grad_norm": 1.8203125, "learning_rate": 0.000152186009988917, "loss": 2.8068, "step": 17514 }, { "epoch": 0.819942653168705, "grad_norm": 1.453125, "learning_rate": 0.00015218103047601864, "loss": 2.7687, "step": 17515 }, { "epoch": 0.8199894669085377, "grad_norm": 1.1328125, "learning_rate": 0.00015217605078531645, "loss": 3.8105, "step": 17516 }, { "epoch": 0.8200362806483703, "grad_norm": 1.4609375, "learning_rate": 0.0001521710709168274, "loss": 2.6464, "step": 17517 }, { "epoch": 0.8200830943882029, "grad_norm": 1.3828125, "learning_rate": 0.0001521660908705684, "loss": 2.6843, "step": 17518 }, { "epoch": 0.8201299081280355, "grad_norm": 1.5, "learning_rate": 0.0001521611106465565, "loss": 2.3969, "step": 17519 }, { "epoch": 0.8201767218678683, "grad_norm": 0.953125, "learning_rate": 0.0001521561302448086, "loss": 3.2767, "step": 17520 }, { "epoch": 0.8202235356077009, "grad_norm": 1.359375, "learning_rate": 0.00015215114966534173, "loss": 2.91, "step": 17521 }, { "epoch": 0.8202703493475335, "grad_norm": 1.4453125, "learning_rate": 0.0001521461689081728, "loss": 2.712, "step": 17522 }, { "epoch": 0.8203171630873661, "grad_norm": 1.2734375, "learning_rate": 0.00015214118797331883, "loss": 2.7221, "step": 17523 }, { "epoch": 0.8203639768271987, "grad_norm": 1.34375, "learning_rate": 0.00015213620686079678, "loss": 2.1127, "step": 17524 }, { "epoch": 0.8204107905670315, "grad_norm": 1.4375, "learning_rate": 0.00015213122557062358, "loss": 2.4334, "step": 17525 }, { "epoch": 0.8204576043068641, "grad_norm": 1.3828125, "learning_rate": 0.0001521262441028163, "loss": 2.344, "step": 17526 }, { "epoch": 0.8205044180466967, "grad_norm": 1.9375, "learning_rate": 0.0001521212624573918, "loss": 2.5886, "step": 17527 }, { "epoch": 0.8205512317865293, "grad_norm": 1.6640625, "learning_rate": 0.00015211628063436714, "loss": 2.6541, "step": 17528 }, { "epoch": 0.820598045526362, "grad_norm": 1.5703125, "learning_rate": 0.0001521112986337593, "loss": 2.9354, "step": 17529 }, { "epoch": 0.8206448592661947, "grad_norm": 1.25, "learning_rate": 0.00015210631645558515, "loss": 2.409, "step": 17530 }, { "epoch": 0.8206916730060273, "grad_norm": 1.34375, "learning_rate": 0.0001521013340998618, "loss": 2.2267, "step": 17531 }, { "epoch": 0.8207384867458599, "grad_norm": 1.5234375, "learning_rate": 0.00015209635156660612, "loss": 2.6168, "step": 17532 }, { "epoch": 0.8207853004856925, "grad_norm": 1.34375, "learning_rate": 0.00015209136885583518, "loss": 2.5006, "step": 17533 }, { "epoch": 0.8208321142255252, "grad_norm": 1.6953125, "learning_rate": 0.00015208638596756586, "loss": 2.4243, "step": 17534 }, { "epoch": 0.8208789279653579, "grad_norm": 1.5234375, "learning_rate": 0.00015208140290181525, "loss": 2.2516, "step": 17535 }, { "epoch": 0.8209257417051905, "grad_norm": 1.2890625, "learning_rate": 0.00015207641965860023, "loss": 2.4395, "step": 17536 }, { "epoch": 0.8209725554450231, "grad_norm": 1.421875, "learning_rate": 0.00015207143623793782, "loss": 2.5637, "step": 17537 }, { "epoch": 0.8210193691848557, "grad_norm": 1.265625, "learning_rate": 0.00015206645263984503, "loss": 2.5276, "step": 17538 }, { "epoch": 0.8210661829246884, "grad_norm": 1.3671875, "learning_rate": 0.00015206146886433879, "loss": 2.6895, "step": 17539 }, { "epoch": 0.8211129966645211, "grad_norm": 1.9453125, "learning_rate": 0.0001520564849114361, "loss": 2.9245, "step": 17540 }, { "epoch": 0.8211598104043537, "grad_norm": 5.6875, "learning_rate": 0.00015205150078115396, "loss": 2.6028, "step": 17541 }, { "epoch": 0.8212066241441863, "grad_norm": 1.46875, "learning_rate": 0.00015204651647350933, "loss": 2.6199, "step": 17542 }, { "epoch": 0.8212534378840189, "grad_norm": 1.6640625, "learning_rate": 0.0001520415319885192, "loss": 2.6664, "step": 17543 }, { "epoch": 0.8213002516238516, "grad_norm": 1.4921875, "learning_rate": 0.00015203654732620056, "loss": 2.4922, "step": 17544 }, { "epoch": 0.8213470653636843, "grad_norm": 1.4375, "learning_rate": 0.00015203156248657038, "loss": 2.7384, "step": 17545 }, { "epoch": 0.8213938791035169, "grad_norm": 2.8125, "learning_rate": 0.0001520265774696457, "loss": 2.0638, "step": 17546 }, { "epoch": 0.8214406928433495, "grad_norm": 1.4453125, "learning_rate": 0.0001520215922754434, "loss": 2.7149, "step": 17547 }, { "epoch": 0.8214875065831821, "grad_norm": 1.703125, "learning_rate": 0.00015201660690398062, "loss": 2.4743, "step": 17548 }, { "epoch": 0.8215343203230148, "grad_norm": 1.4296875, "learning_rate": 0.0001520116213552742, "loss": 2.8633, "step": 17549 }, { "epoch": 0.8215811340628475, "grad_norm": 1.4140625, "learning_rate": 0.00015200663562934118, "loss": 2.5717, "step": 17550 }, { "epoch": 0.8216279478026801, "grad_norm": 1.25, "learning_rate": 0.0001520016497261986, "loss": 2.7065, "step": 17551 }, { "epoch": 0.8216747615425127, "grad_norm": 1.3515625, "learning_rate": 0.00015199666364586336, "loss": 2.2531, "step": 17552 }, { "epoch": 0.8217215752823454, "grad_norm": 1.390625, "learning_rate": 0.00015199167738835252, "loss": 2.2341, "step": 17553 }, { "epoch": 0.821768389022178, "grad_norm": 4.125, "learning_rate": 0.000151986690953683, "loss": 2.5087, "step": 17554 }, { "epoch": 0.8218152027620107, "grad_norm": 1.28125, "learning_rate": 0.0001519817043418719, "loss": 2.5569, "step": 17555 }, { "epoch": 0.8218620165018433, "grad_norm": 1.515625, "learning_rate": 0.0001519767175529361, "loss": 2.6944, "step": 17556 }, { "epoch": 0.8219088302416759, "grad_norm": 1.2421875, "learning_rate": 0.00015197173058689267, "loss": 2.6367, "step": 17557 }, { "epoch": 0.8219556439815086, "grad_norm": 1.921875, "learning_rate": 0.00015196674344375856, "loss": 2.3343, "step": 17558 }, { "epoch": 0.8220024577213412, "grad_norm": 1.6328125, "learning_rate": 0.00015196175612355078, "loss": 2.5919, "step": 17559 }, { "epoch": 0.8220492714611739, "grad_norm": 1.875, "learning_rate": 0.00015195676862628628, "loss": 2.7237, "step": 17560 }, { "epoch": 0.8220960852010065, "grad_norm": 1.4921875, "learning_rate": 0.00015195178095198214, "loss": 2.1786, "step": 17561 }, { "epoch": 0.8221428989408391, "grad_norm": 1.4765625, "learning_rate": 0.00015194679310065532, "loss": 2.3834, "step": 17562 }, { "epoch": 0.8221897126806718, "grad_norm": 1.8046875, "learning_rate": 0.00015194180507232277, "loss": 2.7989, "step": 17563 }, { "epoch": 0.8222365264205044, "grad_norm": 2.140625, "learning_rate": 0.00015193681686700153, "loss": 2.4519, "step": 17564 }, { "epoch": 0.8222833401603371, "grad_norm": 1.3125, "learning_rate": 0.0001519318284847086, "loss": 2.2687, "step": 17565 }, { "epoch": 0.8223301539001697, "grad_norm": 2.875, "learning_rate": 0.00015192683992546098, "loss": 2.3272, "step": 17566 }, { "epoch": 0.8223769676400023, "grad_norm": 2.171875, "learning_rate": 0.00015192185118927562, "loss": 2.8859, "step": 17567 }, { "epoch": 0.822423781379835, "grad_norm": 1.5390625, "learning_rate": 0.00015191686227616956, "loss": 2.0737, "step": 17568 }, { "epoch": 0.8224705951196676, "grad_norm": 1.2890625, "learning_rate": 0.00015191187318615981, "loss": 2.1264, "step": 17569 }, { "epoch": 0.8225174088595003, "grad_norm": 1.125, "learning_rate": 0.00015190688391926332, "loss": 2.3377, "step": 17570 }, { "epoch": 0.8225642225993329, "grad_norm": 1.6015625, "learning_rate": 0.00015190189447549716, "loss": 2.6572, "step": 17571 }, { "epoch": 0.8226110363391655, "grad_norm": 1.5, "learning_rate": 0.00015189690485487825, "loss": 2.3978, "step": 17572 }, { "epoch": 0.8226578500789982, "grad_norm": 1.4375, "learning_rate": 0.00015189191505742371, "loss": 2.6968, "step": 17573 }, { "epoch": 0.8227046638188308, "grad_norm": 1.5078125, "learning_rate": 0.0001518869250831504, "loss": 2.6619, "step": 17574 }, { "epoch": 0.8227514775586635, "grad_norm": 1.65625, "learning_rate": 0.00015188193493207542, "loss": 2.4278, "step": 17575 }, { "epoch": 0.8227982912984961, "grad_norm": 1.5390625, "learning_rate": 0.00015187694460421574, "loss": 2.7731, "step": 17576 }, { "epoch": 0.8228451050383288, "grad_norm": 1.7421875, "learning_rate": 0.00015187195409958838, "loss": 2.6322, "step": 17577 }, { "epoch": 0.8228919187781614, "grad_norm": 1.2421875, "learning_rate": 0.00015186696341821035, "loss": 2.6778, "step": 17578 }, { "epoch": 0.822938732517994, "grad_norm": 1.734375, "learning_rate": 0.0001518619725600986, "loss": 2.4931, "step": 17579 }, { "epoch": 0.8229855462578267, "grad_norm": 1.5234375, "learning_rate": 0.0001518569815252702, "loss": 2.2794, "step": 17580 }, { "epoch": 0.8230323599976593, "grad_norm": 1.9453125, "learning_rate": 0.00015185199031374214, "loss": 3.0597, "step": 17581 }, { "epoch": 0.823079173737492, "grad_norm": 1.953125, "learning_rate": 0.00015184699892553137, "loss": 3.3291, "step": 17582 }, { "epoch": 0.8231259874773246, "grad_norm": 1.421875, "learning_rate": 0.00015184200736065501, "loss": 2.5462, "step": 17583 }, { "epoch": 0.8231728012171572, "grad_norm": 1.5859375, "learning_rate": 0.00015183701561912995, "loss": 2.5286, "step": 17584 }, { "epoch": 0.8232196149569899, "grad_norm": 1.71875, "learning_rate": 0.0001518320237009733, "loss": 2.0423, "step": 17585 }, { "epoch": 0.8232664286968225, "grad_norm": 2.0625, "learning_rate": 0.00015182703160620205, "loss": 2.051, "step": 17586 }, { "epoch": 0.8233132424366552, "grad_norm": 1.4296875, "learning_rate": 0.00015182203933483313, "loss": 2.7263, "step": 17587 }, { "epoch": 0.8233600561764878, "grad_norm": 1.0546875, "learning_rate": 0.00015181704688688362, "loss": 2.4393, "step": 17588 }, { "epoch": 0.8234068699163204, "grad_norm": 1.6484375, "learning_rate": 0.00015181205426237053, "loss": 2.5731, "step": 17589 }, { "epoch": 0.8234536836561531, "grad_norm": 1.5703125, "learning_rate": 0.00015180706146131085, "loss": 2.6162, "step": 17590 }, { "epoch": 0.8235004973959857, "grad_norm": 1.1875, "learning_rate": 0.00015180206848372158, "loss": 2.1977, "step": 17591 }, { "epoch": 0.8235473111358184, "grad_norm": 1.765625, "learning_rate": 0.0001517970753296198, "loss": 2.5597, "step": 17592 }, { "epoch": 0.823594124875651, "grad_norm": 1.328125, "learning_rate": 0.00015179208199902244, "loss": 2.4817, "step": 17593 }, { "epoch": 0.8236409386154836, "grad_norm": 1.5234375, "learning_rate": 0.00015178708849194661, "loss": 2.367, "step": 17594 }, { "epoch": 0.8236877523553163, "grad_norm": 1.484375, "learning_rate": 0.00015178209480840922, "loss": 2.2249, "step": 17595 }, { "epoch": 0.823734566095149, "grad_norm": 1.890625, "learning_rate": 0.00015177710094842737, "loss": 2.7438, "step": 17596 }, { "epoch": 0.8237813798349816, "grad_norm": 1.546875, "learning_rate": 0.00015177210691201803, "loss": 2.4232, "step": 17597 }, { "epoch": 0.8238281935748142, "grad_norm": 1.8046875, "learning_rate": 0.0001517671126991982, "loss": 2.4312, "step": 17598 }, { "epoch": 0.8238750073146468, "grad_norm": 1.703125, "learning_rate": 0.00015176211830998494, "loss": 2.2893, "step": 17599 }, { "epoch": 0.8239218210544795, "grad_norm": 1.6640625, "learning_rate": 0.00015175712374439527, "loss": 2.5281, "step": 17600 }, { "epoch": 0.8239686347943121, "grad_norm": 1.3984375, "learning_rate": 0.00015175212900244618, "loss": 2.5386, "step": 17601 }, { "epoch": 0.8240154485341448, "grad_norm": 1.7734375, "learning_rate": 0.00015174713408415468, "loss": 2.8061, "step": 17602 }, { "epoch": 0.8240622622739774, "grad_norm": 1.2421875, "learning_rate": 0.00015174213898953785, "loss": 4.0535, "step": 17603 }, { "epoch": 0.82410907601381, "grad_norm": 1.4140625, "learning_rate": 0.00015173714371861265, "loss": 2.4607, "step": 17604 }, { "epoch": 0.8241558897536427, "grad_norm": 1.7421875, "learning_rate": 0.00015173214827139618, "loss": 2.4384, "step": 17605 }, { "epoch": 0.8242027034934754, "grad_norm": 1.390625, "learning_rate": 0.00015172715264790532, "loss": 2.2802, "step": 17606 }, { "epoch": 0.824249517233308, "grad_norm": 1.2890625, "learning_rate": 0.0001517221568481572, "loss": 2.2994, "step": 17607 }, { "epoch": 0.8242963309731406, "grad_norm": 1.4921875, "learning_rate": 0.00015171716087216886, "loss": 2.1047, "step": 17608 }, { "epoch": 0.8243431447129732, "grad_norm": 1.4765625, "learning_rate": 0.00015171216471995724, "loss": 2.6627, "step": 17609 }, { "epoch": 0.8243899584528059, "grad_norm": 5.5625, "learning_rate": 0.00015170716839153943, "loss": 2.2351, "step": 17610 }, { "epoch": 0.8244367721926386, "grad_norm": 1.5625, "learning_rate": 0.00015170217188693243, "loss": 2.5649, "step": 17611 }, { "epoch": 0.8244835859324712, "grad_norm": 1.5078125, "learning_rate": 0.00015169717520615325, "loss": 2.7993, "step": 17612 }, { "epoch": 0.8245303996723038, "grad_norm": 1.375, "learning_rate": 0.00015169217834921895, "loss": 2.1804, "step": 17613 }, { "epoch": 0.8245772134121364, "grad_norm": 1.3359375, "learning_rate": 0.00015168718131614654, "loss": 2.3528, "step": 17614 }, { "epoch": 0.8246240271519691, "grad_norm": 1.15625, "learning_rate": 0.00015168218410695306, "loss": 2.1479, "step": 17615 }, { "epoch": 0.8246708408918018, "grad_norm": 2.765625, "learning_rate": 0.0001516771867216555, "loss": 2.5292, "step": 17616 }, { "epoch": 0.8247176546316344, "grad_norm": 1.96875, "learning_rate": 0.00015167218916027094, "loss": 2.3001, "step": 17617 }, { "epoch": 0.824764468371467, "grad_norm": 1.734375, "learning_rate": 0.00015166719142281637, "loss": 3.8206, "step": 17618 }, { "epoch": 0.8248112821112996, "grad_norm": 1.3359375, "learning_rate": 0.0001516621935093088, "loss": 2.392, "step": 17619 }, { "epoch": 0.8248580958511323, "grad_norm": 0.98828125, "learning_rate": 0.00015165719541976533, "loss": 1.3774, "step": 17620 }, { "epoch": 0.824904909590965, "grad_norm": 1.796875, "learning_rate": 0.00015165219715420296, "loss": 2.5718, "step": 17621 }, { "epoch": 0.8249517233307976, "grad_norm": 1.640625, "learning_rate": 0.0001516471987126387, "loss": 2.3752, "step": 17622 }, { "epoch": 0.8249985370706302, "grad_norm": 1.3359375, "learning_rate": 0.00015164220009508957, "loss": 2.4256, "step": 17623 }, { "epoch": 0.8250453508104628, "grad_norm": 1.71875, "learning_rate": 0.00015163720130157267, "loss": 2.7161, "step": 17624 }, { "epoch": 0.8250921645502955, "grad_norm": 1.28125, "learning_rate": 0.00015163220233210497, "loss": 2.3849, "step": 17625 }, { "epoch": 0.8251389782901282, "grad_norm": 1.296875, "learning_rate": 0.00015162720318670354, "loss": 2.5529, "step": 17626 }, { "epoch": 0.8251857920299608, "grad_norm": 2.046875, "learning_rate": 0.00015162220386538537, "loss": 2.509, "step": 17627 }, { "epoch": 0.8252326057697934, "grad_norm": 1.5390625, "learning_rate": 0.00015161720436816757, "loss": 2.639, "step": 17628 }, { "epoch": 0.825279419509626, "grad_norm": 1.9921875, "learning_rate": 0.00015161220469506707, "loss": 2.6232, "step": 17629 }, { "epoch": 0.8253262332494588, "grad_norm": 1.6015625, "learning_rate": 0.00015160720484610102, "loss": 2.3398, "step": 17630 }, { "epoch": 0.8253730469892914, "grad_norm": 1.2890625, "learning_rate": 0.00015160220482128635, "loss": 2.7886, "step": 17631 }, { "epoch": 0.825419860729124, "grad_norm": 1.5546875, "learning_rate": 0.0001515972046206402, "loss": 2.5693, "step": 17632 }, { "epoch": 0.8254666744689566, "grad_norm": 2.453125, "learning_rate": 0.00015159220424417952, "loss": 2.5264, "step": 17633 }, { "epoch": 0.8255134882087892, "grad_norm": 1.4453125, "learning_rate": 0.00015158720369192144, "loss": 2.3088, "step": 17634 }, { "epoch": 0.825560301948622, "grad_norm": 1.9453125, "learning_rate": 0.00015158220296388288, "loss": 2.5867, "step": 17635 }, { "epoch": 0.8256071156884546, "grad_norm": 1.4375, "learning_rate": 0.00015157720206008098, "loss": 2.3299, "step": 17636 }, { "epoch": 0.8256539294282872, "grad_norm": 1.90625, "learning_rate": 0.00015157220098053274, "loss": 1.8739, "step": 17637 }, { "epoch": 0.8257007431681198, "grad_norm": 1.1796875, "learning_rate": 0.00015156719972525518, "loss": 2.5659, "step": 17638 }, { "epoch": 0.8257475569079524, "grad_norm": 1.578125, "learning_rate": 0.00015156219829426544, "loss": 2.3893, "step": 17639 }, { "epoch": 0.8257943706477852, "grad_norm": 1.25, "learning_rate": 0.00015155719668758042, "loss": 2.3403, "step": 17640 }, { "epoch": 0.8258411843876178, "grad_norm": 1.4921875, "learning_rate": 0.00015155219490521727, "loss": 2.708, "step": 17641 }, { "epoch": 0.8258879981274504, "grad_norm": 2.375, "learning_rate": 0.00015154719294719295, "loss": 2.8466, "step": 17642 }, { "epoch": 0.825934811867283, "grad_norm": 1.2578125, "learning_rate": 0.0001515421908135246, "loss": 2.4793, "step": 17643 }, { "epoch": 0.8259816256071157, "grad_norm": 1.328125, "learning_rate": 0.0001515371885042292, "loss": 2.53, "step": 17644 }, { "epoch": 0.8260284393469484, "grad_norm": 1.6484375, "learning_rate": 0.00015153218601932376, "loss": 2.4576, "step": 17645 }, { "epoch": 0.826075253086781, "grad_norm": 1.671875, "learning_rate": 0.00015152718335882545, "loss": 2.4572, "step": 17646 }, { "epoch": 0.8261220668266136, "grad_norm": 1.3203125, "learning_rate": 0.00015152218052275122, "loss": 2.5552, "step": 17647 }, { "epoch": 0.8261688805664462, "grad_norm": 1.703125, "learning_rate": 0.0001515171775111181, "loss": 2.6873, "step": 17648 }, { "epoch": 0.826215694306279, "grad_norm": 1.3828125, "learning_rate": 0.0001515121743239432, "loss": 2.5881, "step": 17649 }, { "epoch": 0.8262625080461116, "grad_norm": 1.1875, "learning_rate": 0.0001515071709612436, "loss": 2.1804, "step": 17650 }, { "epoch": 0.8263093217859442, "grad_norm": 1.484375, "learning_rate": 0.0001515021674230362, "loss": 2.6154, "step": 17651 }, { "epoch": 0.8263561355257768, "grad_norm": 1.609375, "learning_rate": 0.00015149716370933816, "loss": 2.8377, "step": 17652 }, { "epoch": 0.8264029492656094, "grad_norm": 3.15625, "learning_rate": 0.00015149215982016657, "loss": 4.6235, "step": 17653 }, { "epoch": 0.8264497630054422, "grad_norm": 2.109375, "learning_rate": 0.00015148715575553836, "loss": 2.6576, "step": 17654 }, { "epoch": 0.8264965767452748, "grad_norm": 1.40625, "learning_rate": 0.0001514821515154707, "loss": 2.3238, "step": 17655 }, { "epoch": 0.8265433904851074, "grad_norm": 2.03125, "learning_rate": 0.0001514771470999805, "loss": 2.6252, "step": 17656 }, { "epoch": 0.82659020422494, "grad_norm": 1.625, "learning_rate": 0.00015147214250908498, "loss": 2.4521, "step": 17657 }, { "epoch": 0.8266370179647726, "grad_norm": 1.7109375, "learning_rate": 0.00015146713774280107, "loss": 2.5865, "step": 17658 }, { "epoch": 0.8266838317046054, "grad_norm": 1.296875, "learning_rate": 0.00015146213280114586, "loss": 2.7755, "step": 17659 }, { "epoch": 0.826730645444438, "grad_norm": 1.3359375, "learning_rate": 0.00015145712768413643, "loss": 2.5195, "step": 17660 }, { "epoch": 0.8267774591842706, "grad_norm": 1.4765625, "learning_rate": 0.0001514521223917898, "loss": 2.4258, "step": 17661 }, { "epoch": 0.8268242729241032, "grad_norm": 1.4453125, "learning_rate": 0.00015144711692412303, "loss": 2.7537, "step": 17662 }, { "epoch": 0.8268710866639358, "grad_norm": 3.46875, "learning_rate": 0.0001514421112811532, "loss": 2.4351, "step": 17663 }, { "epoch": 0.8269179004037686, "grad_norm": 1.28125, "learning_rate": 0.00015143710546289735, "loss": 2.2996, "step": 17664 }, { "epoch": 0.8269647141436012, "grad_norm": 1.4453125, "learning_rate": 0.0001514320994693725, "loss": 2.5584, "step": 17665 }, { "epoch": 0.8270115278834338, "grad_norm": 1.171875, "learning_rate": 0.00015142709330059578, "loss": 2.8167, "step": 17666 }, { "epoch": 0.8270583416232664, "grad_norm": 1.71875, "learning_rate": 0.0001514220869565842, "loss": 2.6364, "step": 17667 }, { "epoch": 0.827105155363099, "grad_norm": 1.59375, "learning_rate": 0.00015141708043735482, "loss": 2.6968, "step": 17668 }, { "epoch": 0.8271519691029318, "grad_norm": 1.671875, "learning_rate": 0.0001514120737429247, "loss": 2.3907, "step": 17669 }, { "epoch": 0.8271987828427644, "grad_norm": 1.4453125, "learning_rate": 0.00015140706687331095, "loss": 2.4152, "step": 17670 }, { "epoch": 0.827245596582597, "grad_norm": 1.3515625, "learning_rate": 0.00015140205982853058, "loss": 2.4856, "step": 17671 }, { "epoch": 0.8272924103224296, "grad_norm": 1.21875, "learning_rate": 0.00015139705260860066, "loss": 2.3685, "step": 17672 }, { "epoch": 0.8273392240622622, "grad_norm": 1.53125, "learning_rate": 0.00015139204521353827, "loss": 2.3064, "step": 17673 }, { "epoch": 0.827386037802095, "grad_norm": 3.484375, "learning_rate": 0.0001513870376433604, "loss": 2.3709, "step": 17674 }, { "epoch": 0.8274328515419276, "grad_norm": 4.09375, "learning_rate": 0.0001513820298980842, "loss": 3.0362, "step": 17675 }, { "epoch": 0.8274796652817602, "grad_norm": 1.953125, "learning_rate": 0.00015137702197772672, "loss": 2.6119, "step": 17676 }, { "epoch": 0.8275264790215928, "grad_norm": 1.953125, "learning_rate": 0.000151372013882305, "loss": 2.7549, "step": 17677 }, { "epoch": 0.8275732927614254, "grad_norm": 1.6015625, "learning_rate": 0.0001513670056118361, "loss": 2.3989, "step": 17678 }, { "epoch": 0.8276201065012582, "grad_norm": 1.71875, "learning_rate": 0.0001513619971663371, "loss": 2.1201, "step": 17679 }, { "epoch": 0.8276669202410908, "grad_norm": 1.296875, "learning_rate": 0.00015135698854582506, "loss": 3.3225, "step": 17680 }, { "epoch": 0.8277137339809234, "grad_norm": 1.4140625, "learning_rate": 0.0001513519797503171, "loss": 2.7314, "step": 17681 }, { "epoch": 0.827760547720756, "grad_norm": 1.6015625, "learning_rate": 0.0001513469707798302, "loss": 2.5764, "step": 17682 }, { "epoch": 0.8278073614605886, "grad_norm": 1.3984375, "learning_rate": 0.00015134196163438146, "loss": 2.3155, "step": 17683 }, { "epoch": 0.8278541752004214, "grad_norm": 1.6484375, "learning_rate": 0.00015133695231398794, "loss": 2.5149, "step": 17684 }, { "epoch": 0.827900988940254, "grad_norm": 1.5546875, "learning_rate": 0.00015133194281866677, "loss": 2.5432, "step": 17685 }, { "epoch": 0.8279478026800866, "grad_norm": 1.453125, "learning_rate": 0.0001513269331484349, "loss": 2.8315, "step": 17686 }, { "epoch": 0.8279946164199192, "grad_norm": 1.6875, "learning_rate": 0.00015132192330330954, "loss": 2.9497, "step": 17687 }, { "epoch": 0.8280414301597518, "grad_norm": 1.1875, "learning_rate": 0.00015131691328330765, "loss": 2.698, "step": 17688 }, { "epoch": 0.8280882438995846, "grad_norm": 1.0625, "learning_rate": 0.00015131190308844637, "loss": 2.1881, "step": 17689 }, { "epoch": 0.8281350576394172, "grad_norm": 1.3671875, "learning_rate": 0.0001513068927187427, "loss": 2.782, "step": 17690 }, { "epoch": 0.8281818713792498, "grad_norm": 1.6875, "learning_rate": 0.0001513018821742138, "loss": 2.4793, "step": 17691 }, { "epoch": 0.8282286851190824, "grad_norm": 1.390625, "learning_rate": 0.00015129687145487672, "loss": 2.2444, "step": 17692 }, { "epoch": 0.828275498858915, "grad_norm": 1.46875, "learning_rate": 0.0001512918605607485, "loss": 2.8516, "step": 17693 }, { "epoch": 0.8283223125987478, "grad_norm": 1.2890625, "learning_rate": 0.0001512868494918462, "loss": 2.2816, "step": 17694 }, { "epoch": 0.8283691263385804, "grad_norm": 1.2109375, "learning_rate": 0.00015128183824818694, "loss": 2.6154, "step": 17695 }, { "epoch": 0.828415940078413, "grad_norm": 2.25, "learning_rate": 0.00015127682682978782, "loss": 2.8849, "step": 17696 }, { "epoch": 0.8284627538182456, "grad_norm": 1.421875, "learning_rate": 0.00015127181523666582, "loss": 2.6148, "step": 17697 }, { "epoch": 0.8285095675580783, "grad_norm": 1.390625, "learning_rate": 0.00015126680346883812, "loss": 2.4513, "step": 17698 }, { "epoch": 0.828556381297911, "grad_norm": 1.1875, "learning_rate": 0.0001512617915263217, "loss": 2.2352, "step": 17699 }, { "epoch": 0.8286031950377436, "grad_norm": 1.25, "learning_rate": 0.00015125677940913375, "loss": 2.9732, "step": 17700 }, { "epoch": 0.8286500087775762, "grad_norm": 1.1875, "learning_rate": 0.00015125176711729124, "loss": 2.5399, "step": 17701 }, { "epoch": 0.8286968225174088, "grad_norm": 1.2734375, "learning_rate": 0.0001512467546508113, "loss": 2.6354, "step": 17702 }, { "epoch": 0.8287436362572415, "grad_norm": 3.640625, "learning_rate": 0.000151241742009711, "loss": 2.8331, "step": 17703 }, { "epoch": 0.8287904499970742, "grad_norm": 1.5546875, "learning_rate": 0.00015123672919400746, "loss": 2.5939, "step": 17704 }, { "epoch": 0.8288372637369068, "grad_norm": 2.328125, "learning_rate": 0.00015123171620371769, "loss": 2.0957, "step": 17705 }, { "epoch": 0.8288840774767394, "grad_norm": 1.2578125, "learning_rate": 0.00015122670303885884, "loss": 2.6796, "step": 17706 }, { "epoch": 0.828930891216572, "grad_norm": 1.21875, "learning_rate": 0.00015122168969944797, "loss": 2.0333, "step": 17707 }, { "epoch": 0.8289777049564047, "grad_norm": 1.6875, "learning_rate": 0.0001512166761855021, "loss": 2.7446, "step": 17708 }, { "epoch": 0.8290245186962374, "grad_norm": 1.375, "learning_rate": 0.00015121166249703843, "loss": 2.6908, "step": 17709 }, { "epoch": 0.82907133243607, "grad_norm": 1.609375, "learning_rate": 0.00015120664863407392, "loss": 2.4717, "step": 17710 }, { "epoch": 0.8291181461759026, "grad_norm": 1.578125, "learning_rate": 0.00015120163459662574, "loss": 2.9552, "step": 17711 }, { "epoch": 0.8291649599157352, "grad_norm": 1.3984375, "learning_rate": 0.00015119662038471095, "loss": 2.4252, "step": 17712 }, { "epoch": 0.8292117736555679, "grad_norm": 1.1875, "learning_rate": 0.00015119160599834662, "loss": 2.4587, "step": 17713 }, { "epoch": 0.8292585873954006, "grad_norm": 1.28125, "learning_rate": 0.0001511865914375499, "loss": 2.7187, "step": 17714 }, { "epoch": 0.8293054011352332, "grad_norm": 1.6171875, "learning_rate": 0.00015118157670233776, "loss": 2.4711, "step": 17715 }, { "epoch": 0.8293522148750658, "grad_norm": 1.875, "learning_rate": 0.0001511765617927274, "loss": 2.6966, "step": 17716 }, { "epoch": 0.8293990286148984, "grad_norm": 1.296875, "learning_rate": 0.00015117154670873587, "loss": 2.58, "step": 17717 }, { "epoch": 0.8294458423547311, "grad_norm": 1.25, "learning_rate": 0.00015116653145038025, "loss": 3.9914, "step": 17718 }, { "epoch": 0.8294926560945638, "grad_norm": 1.3515625, "learning_rate": 0.0001511615160176776, "loss": 2.4326, "step": 17719 }, { "epoch": 0.8295394698343964, "grad_norm": 1.2578125, "learning_rate": 0.00015115650041064505, "loss": 2.4264, "step": 17720 }, { "epoch": 0.829586283574229, "grad_norm": 1.4765625, "learning_rate": 0.00015115148462929972, "loss": 2.528, "step": 17721 }, { "epoch": 0.8296330973140617, "grad_norm": 1.40625, "learning_rate": 0.00015114646867365862, "loss": 2.2716, "step": 17722 }, { "epoch": 0.8296799110538943, "grad_norm": 1.328125, "learning_rate": 0.0001511414525437389, "loss": 2.0254, "step": 17723 }, { "epoch": 0.829726724793727, "grad_norm": 1.3515625, "learning_rate": 0.00015113643623955762, "loss": 2.3021, "step": 17724 }, { "epoch": 0.8297735385335596, "grad_norm": 1.34375, "learning_rate": 0.00015113141976113195, "loss": 2.1611, "step": 17725 }, { "epoch": 0.8298203522733922, "grad_norm": 1.390625, "learning_rate": 0.00015112640310847885, "loss": 2.1806, "step": 17726 }, { "epoch": 0.8298671660132249, "grad_norm": 1.328125, "learning_rate": 0.00015112138628161552, "loss": 2.5269, "step": 17727 }, { "epoch": 0.8299139797530575, "grad_norm": 1.8671875, "learning_rate": 0.000151116369280559, "loss": 3.5285, "step": 17728 }, { "epoch": 0.8299607934928902, "grad_norm": 1.5625, "learning_rate": 0.00015111135210532645, "loss": 2.5771, "step": 17729 }, { "epoch": 0.8300076072327228, "grad_norm": 1.1796875, "learning_rate": 0.00015110633475593489, "loss": 3.2029, "step": 17730 }, { "epoch": 0.8300544209725554, "grad_norm": 1.0078125, "learning_rate": 0.00015110131723240143, "loss": 1.6776, "step": 17731 }, { "epoch": 0.8301012347123881, "grad_norm": 2.125, "learning_rate": 0.00015109629953474323, "loss": 2.6576, "step": 17732 }, { "epoch": 0.8301480484522207, "grad_norm": 1.4296875, "learning_rate": 0.0001510912816629773, "loss": 2.57, "step": 17733 }, { "epoch": 0.8301948621920534, "grad_norm": 1.375, "learning_rate": 0.00015108626361712082, "loss": 2.339, "step": 17734 }, { "epoch": 0.830241675931886, "grad_norm": 1.328125, "learning_rate": 0.0001510812453971908, "loss": 2.4118, "step": 17735 }, { "epoch": 0.8302884896717186, "grad_norm": 1.4921875, "learning_rate": 0.00015107622700320444, "loss": 2.0345, "step": 17736 }, { "epoch": 0.8303353034115513, "grad_norm": 1.875, "learning_rate": 0.00015107120843517878, "loss": 2.7658, "step": 17737 }, { "epoch": 0.8303821171513839, "grad_norm": 1.78125, "learning_rate": 0.0001510661896931309, "loss": 2.7215, "step": 17738 }, { "epoch": 0.8304289308912166, "grad_norm": 1.546875, "learning_rate": 0.00015106117077707797, "loss": 2.4528, "step": 17739 }, { "epoch": 0.8304757446310492, "grad_norm": 1.46875, "learning_rate": 0.00015105615168703703, "loss": 2.3384, "step": 17740 }, { "epoch": 0.8305225583708818, "grad_norm": 1.265625, "learning_rate": 0.0001510511324230252, "loss": 2.5667, "step": 17741 }, { "epoch": 0.8305693721107145, "grad_norm": 1.625, "learning_rate": 0.0001510461129850596, "loss": 2.1706, "step": 17742 }, { "epoch": 0.8306161858505471, "grad_norm": 2.8125, "learning_rate": 0.00015104109337315733, "loss": 2.7483, "step": 17743 }, { "epoch": 0.8306629995903798, "grad_norm": 1.90625, "learning_rate": 0.00015103607358733548, "loss": 2.4523, "step": 17744 }, { "epoch": 0.8307098133302124, "grad_norm": 1.5703125, "learning_rate": 0.00015103105362761113, "loss": 2.7856, "step": 17745 }, { "epoch": 0.830756627070045, "grad_norm": 1.9921875, "learning_rate": 0.00015102603349400142, "loss": 2.5292, "step": 17746 }, { "epoch": 0.8308034408098777, "grad_norm": 1.3203125, "learning_rate": 0.00015102101318652345, "loss": 2.5311, "step": 17747 }, { "epoch": 0.8308502545497103, "grad_norm": 3.421875, "learning_rate": 0.00015101599270519436, "loss": 3.0545, "step": 17748 }, { "epoch": 0.830897068289543, "grad_norm": 1.46875, "learning_rate": 0.0001510109720500312, "loss": 2.421, "step": 17749 }, { "epoch": 0.8309438820293756, "grad_norm": 2.78125, "learning_rate": 0.0001510059512210511, "loss": 2.1587, "step": 17750 }, { "epoch": 0.8309906957692083, "grad_norm": 1.7265625, "learning_rate": 0.00015100093021827118, "loss": 3.2059, "step": 17751 }, { "epoch": 0.8310375095090409, "grad_norm": 2.015625, "learning_rate": 0.00015099590904170854, "loss": 2.3911, "step": 17752 }, { "epoch": 0.8310843232488735, "grad_norm": 1.25, "learning_rate": 0.00015099088769138027, "loss": 2.521, "step": 17753 }, { "epoch": 0.8311311369887062, "grad_norm": 1.375, "learning_rate": 0.0001509858661673035, "loss": 2.7503, "step": 17754 }, { "epoch": 0.8311779507285388, "grad_norm": 1.515625, "learning_rate": 0.00015098084446949533, "loss": 2.9233, "step": 17755 }, { "epoch": 0.8312247644683715, "grad_norm": 1.4375, "learning_rate": 0.0001509758225979729, "loss": 2.21, "step": 17756 }, { "epoch": 0.8312715782082041, "grad_norm": 1.4140625, "learning_rate": 0.0001509708005527533, "loss": 2.4948, "step": 17757 }, { "epoch": 0.8313183919480367, "grad_norm": 1.734375, "learning_rate": 0.00015096577833385367, "loss": 2.1615, "step": 17758 }, { "epoch": 0.8313652056878694, "grad_norm": 2.234375, "learning_rate": 0.00015096075594129104, "loss": 2.6954, "step": 17759 }, { "epoch": 0.831412019427702, "grad_norm": 1.5, "learning_rate": 0.00015095573337508262, "loss": 2.6222, "step": 17760 }, { "epoch": 0.8314588331675347, "grad_norm": 1.7109375, "learning_rate": 0.00015095071063524546, "loss": 2.6792, "step": 17761 }, { "epoch": 0.8315056469073673, "grad_norm": 1.609375, "learning_rate": 0.0001509456877217967, "loss": 2.3889, "step": 17762 }, { "epoch": 0.8315524606471999, "grad_norm": 2.015625, "learning_rate": 0.00015094066463475344, "loss": 2.8515, "step": 17763 }, { "epoch": 0.8315992743870326, "grad_norm": 1.328125, "learning_rate": 0.00015093564137413288, "loss": 2.538, "step": 17764 }, { "epoch": 0.8316460881268652, "grad_norm": 1.25, "learning_rate": 0.00015093061793995197, "loss": 2.6339, "step": 17765 }, { "epoch": 0.8316929018666979, "grad_norm": 4.53125, "learning_rate": 0.00015092559433222799, "loss": 2.3442, "step": 17766 }, { "epoch": 0.8317397156065305, "grad_norm": 1.65625, "learning_rate": 0.000150920570550978, "loss": 2.3909, "step": 17767 }, { "epoch": 0.8317865293463632, "grad_norm": 1.171875, "learning_rate": 0.00015091554659621905, "loss": 2.4444, "step": 17768 }, { "epoch": 0.8318333430861958, "grad_norm": 1.3671875, "learning_rate": 0.00015091052246796836, "loss": 2.5849, "step": 17769 }, { "epoch": 0.8318801568260284, "grad_norm": 1.15625, "learning_rate": 0.000150905498166243, "loss": 2.2234, "step": 17770 }, { "epoch": 0.8319269705658611, "grad_norm": 2.203125, "learning_rate": 0.0001509004736910601, "loss": 2.7818, "step": 17771 }, { "epoch": 0.8319737843056937, "grad_norm": 1.4921875, "learning_rate": 0.00015089544904243676, "loss": 2.5569, "step": 17772 }, { "epoch": 0.8320205980455264, "grad_norm": 1.625, "learning_rate": 0.0001508904242203901, "loss": 2.8474, "step": 17773 }, { "epoch": 0.832067411785359, "grad_norm": 1.296875, "learning_rate": 0.0001508853992249373, "loss": 2.7022, "step": 17774 }, { "epoch": 0.8321142255251917, "grad_norm": 1.375, "learning_rate": 0.00015088037405609547, "loss": 2.8606, "step": 17775 }, { "epoch": 0.8321610392650243, "grad_norm": 1.625, "learning_rate": 0.00015087534871388166, "loss": 2.7416, "step": 17776 }, { "epoch": 0.8322078530048569, "grad_norm": 1.3359375, "learning_rate": 0.00015087032319831308, "loss": 2.4964, "step": 17777 }, { "epoch": 0.8322546667446896, "grad_norm": 1.75, "learning_rate": 0.00015086529750940679, "loss": 2.6324, "step": 17778 }, { "epoch": 0.8323014804845222, "grad_norm": 1.328125, "learning_rate": 0.0001508602716471799, "loss": 2.3542, "step": 17779 }, { "epoch": 0.8323482942243549, "grad_norm": 1.6640625, "learning_rate": 0.00015085524561164962, "loss": 1.9224, "step": 17780 }, { "epoch": 0.8323951079641875, "grad_norm": 1.1015625, "learning_rate": 0.000150850219402833, "loss": 2.5405, "step": 17781 }, { "epoch": 0.8324419217040201, "grad_norm": 1.4453125, "learning_rate": 0.00015084519302074724, "loss": 2.2302, "step": 17782 }, { "epoch": 0.8324887354438528, "grad_norm": 2.015625, "learning_rate": 0.0001508401664654094, "loss": 2.5801, "step": 17783 }, { "epoch": 0.8325355491836854, "grad_norm": 1.640625, "learning_rate": 0.00015083513973683662, "loss": 3.0721, "step": 17784 }, { "epoch": 0.8325823629235181, "grad_norm": 1.453125, "learning_rate": 0.00015083011283504604, "loss": 2.4629, "step": 17785 }, { "epoch": 0.8326291766633507, "grad_norm": 1.3984375, "learning_rate": 0.00015082508576005484, "loss": 2.3775, "step": 17786 }, { "epoch": 0.8326759904031833, "grad_norm": 0.953125, "learning_rate": 0.00015082005851188007, "loss": 2.5183, "step": 17787 }, { "epoch": 0.832722804143016, "grad_norm": 1.3125, "learning_rate": 0.00015081503109053883, "loss": 2.2836, "step": 17788 }, { "epoch": 0.8327696178828486, "grad_norm": 1.2265625, "learning_rate": 0.00015081000349604837, "loss": 2.6583, "step": 17789 }, { "epoch": 0.8328164316226813, "grad_norm": 1.234375, "learning_rate": 0.00015080497572842572, "loss": 2.2774, "step": 17790 }, { "epoch": 0.8328632453625139, "grad_norm": 1.140625, "learning_rate": 0.0001507999477876881, "loss": 2.5796, "step": 17791 }, { "epoch": 0.8329100591023465, "grad_norm": 1.59375, "learning_rate": 0.00015079491967385256, "loss": 2.8111, "step": 17792 }, { "epoch": 0.8329568728421792, "grad_norm": 1.5, "learning_rate": 0.0001507898913869363, "loss": 2.9107, "step": 17793 }, { "epoch": 0.8330036865820118, "grad_norm": 1.546875, "learning_rate": 0.00015078486292695637, "loss": 2.8087, "step": 17794 }, { "epoch": 0.8330505003218445, "grad_norm": 1.375, "learning_rate": 0.00015077983429393, "loss": 2.6258, "step": 17795 }, { "epoch": 0.8330973140616771, "grad_norm": 1.6015625, "learning_rate": 0.0001507748054878743, "loss": 2.7971, "step": 17796 }, { "epoch": 0.8331441278015097, "grad_norm": 1.2265625, "learning_rate": 0.00015076977650880632, "loss": 2.5893, "step": 17797 }, { "epoch": 0.8331909415413424, "grad_norm": 1.6796875, "learning_rate": 0.00015076474735674329, "loss": 2.8948, "step": 17798 }, { "epoch": 0.833237755281175, "grad_norm": 1.9609375, "learning_rate": 0.00015075971803170233, "loss": 1.9303, "step": 17799 }, { "epoch": 0.8332845690210077, "grad_norm": 1.4921875, "learning_rate": 0.00015075468853370055, "loss": 2.7195, "step": 17800 }, { "epoch": 0.8333313827608403, "grad_norm": 1.65625, "learning_rate": 0.0001507496588627551, "loss": 2.4019, "step": 17801 }, { "epoch": 0.8333781965006729, "grad_norm": 1.765625, "learning_rate": 0.00015074462901888314, "loss": 2.7582, "step": 17802 }, { "epoch": 0.8334250102405056, "grad_norm": 1.6328125, "learning_rate": 0.00015073959900210174, "loss": 2.547, "step": 17803 }, { "epoch": 0.8334718239803383, "grad_norm": 1.1640625, "learning_rate": 0.00015073456881242815, "loss": 2.1382, "step": 17804 }, { "epoch": 0.8335186377201709, "grad_norm": 1.984375, "learning_rate": 0.00015072953844987944, "loss": 2.7455, "step": 17805 }, { "epoch": 0.8335654514600035, "grad_norm": 1.515625, "learning_rate": 0.00015072450791447274, "loss": 2.5635, "step": 17806 }, { "epoch": 0.8336122651998361, "grad_norm": 1.578125, "learning_rate": 0.00015071947720622522, "loss": 2.3422, "step": 17807 }, { "epoch": 0.8336590789396688, "grad_norm": 1.40625, "learning_rate": 0.000150714446325154, "loss": 2.4223, "step": 17808 }, { "epoch": 0.8337058926795015, "grad_norm": 1.5078125, "learning_rate": 0.00015070941527127625, "loss": 2.2858, "step": 17809 }, { "epoch": 0.8337527064193341, "grad_norm": 1.6015625, "learning_rate": 0.0001507043840446091, "loss": 2.3487, "step": 17810 }, { "epoch": 0.8337995201591667, "grad_norm": 1.3828125, "learning_rate": 0.00015069935264516967, "loss": 2.2194, "step": 17811 }, { "epoch": 0.8338463338989993, "grad_norm": 1.8515625, "learning_rate": 0.00015069432107297514, "loss": 2.522, "step": 17812 }, { "epoch": 0.833893147638832, "grad_norm": 2.0, "learning_rate": 0.00015068928932804264, "loss": 2.6734, "step": 17813 }, { "epoch": 0.8339399613786647, "grad_norm": 1.5546875, "learning_rate": 0.0001506842574103893, "loss": 2.6542, "step": 17814 }, { "epoch": 0.8339867751184973, "grad_norm": 1.296875, "learning_rate": 0.00015067922532003232, "loss": 2.365, "step": 17815 }, { "epoch": 0.8340335888583299, "grad_norm": 1.46875, "learning_rate": 0.00015067419305698883, "loss": 2.5474, "step": 17816 }, { "epoch": 0.8340804025981625, "grad_norm": 1.6875, "learning_rate": 0.00015066916062127589, "loss": 2.3872, "step": 17817 }, { "epoch": 0.8341272163379952, "grad_norm": 1.796875, "learning_rate": 0.00015066412801291077, "loss": 2.2463, "step": 17818 }, { "epoch": 0.8341740300778279, "grad_norm": 1.84375, "learning_rate": 0.0001506590952319105, "loss": 2.5021, "step": 17819 }, { "epoch": 0.8342208438176605, "grad_norm": 1.375, "learning_rate": 0.00015065406227829232, "loss": 2.4434, "step": 17820 }, { "epoch": 0.8342676575574931, "grad_norm": 1.8203125, "learning_rate": 0.00015064902915207337, "loss": 2.4097, "step": 17821 }, { "epoch": 0.8343144712973257, "grad_norm": 1.609375, "learning_rate": 0.00015064399585327075, "loss": 2.2849, "step": 17822 }, { "epoch": 0.8343612850371585, "grad_norm": 1.1015625, "learning_rate": 0.00015063896238190169, "loss": 2.147, "step": 17823 }, { "epoch": 0.8344080987769911, "grad_norm": 1.4296875, "learning_rate": 0.00015063392873798325, "loss": 2.0301, "step": 17824 }, { "epoch": 0.8344549125168237, "grad_norm": 1.671875, "learning_rate": 0.00015062889492153264, "loss": 2.4229, "step": 17825 }, { "epoch": 0.8345017262566563, "grad_norm": 1.296875, "learning_rate": 0.000150623860932567, "loss": 2.484, "step": 17826 }, { "epoch": 0.8345485399964889, "grad_norm": 1.9765625, "learning_rate": 0.00015061882677110347, "loss": 2.5736, "step": 17827 }, { "epoch": 0.8345953537363217, "grad_norm": 1.2265625, "learning_rate": 0.0001506137924371592, "loss": 2.5495, "step": 17828 }, { "epoch": 0.8346421674761543, "grad_norm": 2.203125, "learning_rate": 0.00015060875793075138, "loss": 2.3171, "step": 17829 }, { "epoch": 0.8346889812159869, "grad_norm": 1.734375, "learning_rate": 0.00015060372325189714, "loss": 2.5249, "step": 17830 }, { "epoch": 0.8347357949558195, "grad_norm": 1.6640625, "learning_rate": 0.00015059868840061364, "loss": 2.685, "step": 17831 }, { "epoch": 0.8347826086956521, "grad_norm": 1.1640625, "learning_rate": 0.00015059365337691803, "loss": 2.5835, "step": 17832 }, { "epoch": 0.8348294224354849, "grad_norm": 1.171875, "learning_rate": 0.00015058861818082747, "loss": 2.4389, "step": 17833 }, { "epoch": 0.8348762361753175, "grad_norm": 1.8125, "learning_rate": 0.00015058358281235914, "loss": 2.5354, "step": 17834 }, { "epoch": 0.8349230499151501, "grad_norm": 1.4921875, "learning_rate": 0.00015057854727153013, "loss": 2.4776, "step": 17835 }, { "epoch": 0.8349698636549827, "grad_norm": 1.5703125, "learning_rate": 0.00015057351155835767, "loss": 2.1498, "step": 17836 }, { "epoch": 0.8350166773948153, "grad_norm": 1.359375, "learning_rate": 0.0001505684756728589, "loss": 2.2321, "step": 17837 }, { "epoch": 0.8350634911346481, "grad_norm": 1.28125, "learning_rate": 0.00015056343961505095, "loss": 2.6155, "step": 17838 }, { "epoch": 0.8351103048744807, "grad_norm": 2.046875, "learning_rate": 0.00015055840338495103, "loss": 2.5869, "step": 17839 }, { "epoch": 0.8351571186143133, "grad_norm": 1.1875, "learning_rate": 0.00015055336698257624, "loss": 2.2017, "step": 17840 }, { "epoch": 0.8352039323541459, "grad_norm": 1.375, "learning_rate": 0.0001505483304079438, "loss": 2.3549, "step": 17841 }, { "epoch": 0.8352507460939785, "grad_norm": 1.703125, "learning_rate": 0.00015054329366107083, "loss": 2.7353, "step": 17842 }, { "epoch": 0.8352975598338113, "grad_norm": 1.5390625, "learning_rate": 0.0001505382567419745, "loss": 2.3287, "step": 17843 }, { "epoch": 0.8353443735736439, "grad_norm": 1.9453125, "learning_rate": 0.00015053321965067203, "loss": 2.6615, "step": 17844 }, { "epoch": 0.8353911873134765, "grad_norm": 1.578125, "learning_rate": 0.00015052818238718048, "loss": 2.2846, "step": 17845 }, { "epoch": 0.8354380010533091, "grad_norm": 1.2578125, "learning_rate": 0.0001505231449515171, "loss": 2.5916, "step": 17846 }, { "epoch": 0.8354848147931417, "grad_norm": 1.9375, "learning_rate": 0.00015051810734369902, "loss": 2.0853, "step": 17847 }, { "epoch": 0.8355316285329745, "grad_norm": 1.734375, "learning_rate": 0.0001505130695637434, "loss": 2.7426, "step": 17848 }, { "epoch": 0.8355784422728071, "grad_norm": 1.3984375, "learning_rate": 0.00015050803161166743, "loss": 2.2828, "step": 17849 }, { "epoch": 0.8356252560126397, "grad_norm": 1.34375, "learning_rate": 0.00015050299348748824, "loss": 2.2924, "step": 17850 }, { "epoch": 0.8356720697524723, "grad_norm": 1.296875, "learning_rate": 0.00015049795519122303, "loss": 2.5118, "step": 17851 }, { "epoch": 0.8357188834923049, "grad_norm": 1.4140625, "learning_rate": 0.00015049291672288896, "loss": 2.129, "step": 17852 }, { "epoch": 0.8357656972321377, "grad_norm": 2.015625, "learning_rate": 0.0001504878780825032, "loss": 2.7266, "step": 17853 }, { "epoch": 0.8358125109719703, "grad_norm": 1.8671875, "learning_rate": 0.00015048283927008291, "loss": 2.3131, "step": 17854 }, { "epoch": 0.8358593247118029, "grad_norm": 1.2109375, "learning_rate": 0.00015047780028564525, "loss": 2.3971, "step": 17855 }, { "epoch": 0.8359061384516355, "grad_norm": 1.5859375, "learning_rate": 0.0001504727611292074, "loss": 2.3743, "step": 17856 }, { "epoch": 0.8359529521914681, "grad_norm": 1.9140625, "learning_rate": 0.00015046772180078656, "loss": 2.4207, "step": 17857 }, { "epoch": 0.8359997659313009, "grad_norm": 1.3203125, "learning_rate": 0.00015046268230039982, "loss": 2.571, "step": 17858 }, { "epoch": 0.8360465796711335, "grad_norm": 1.2734375, "learning_rate": 0.00015045764262806448, "loss": 2.3959, "step": 17859 }, { "epoch": 0.8360933934109661, "grad_norm": 1.296875, "learning_rate": 0.0001504526027837976, "loss": 2.5746, "step": 17860 }, { "epoch": 0.8361402071507987, "grad_norm": 1.6796875, "learning_rate": 0.00015044756276761642, "loss": 2.6321, "step": 17861 }, { "epoch": 0.8361870208906313, "grad_norm": 1.875, "learning_rate": 0.00015044252257953805, "loss": 2.6914, "step": 17862 }, { "epoch": 0.8362338346304641, "grad_norm": 1.4140625, "learning_rate": 0.0001504374822195797, "loss": 2.3753, "step": 17863 }, { "epoch": 0.8362806483702967, "grad_norm": 1.921875, "learning_rate": 0.0001504324416877586, "loss": 2.346, "step": 17864 }, { "epoch": 0.8363274621101293, "grad_norm": 1.4375, "learning_rate": 0.0001504274009840918, "loss": 2.7944, "step": 17865 }, { "epoch": 0.8363742758499619, "grad_norm": 1.9375, "learning_rate": 0.0001504223601085966, "loss": 2.6351, "step": 17866 }, { "epoch": 0.8364210895897946, "grad_norm": 1.5546875, "learning_rate": 0.00015041731906129008, "loss": 2.5168, "step": 17867 }, { "epoch": 0.8364679033296273, "grad_norm": 1.453125, "learning_rate": 0.0001504122778421895, "loss": 2.4035, "step": 17868 }, { "epoch": 0.8365147170694599, "grad_norm": 1.2421875, "learning_rate": 0.00015040723645131195, "loss": 1.966, "step": 17869 }, { "epoch": 0.8365615308092925, "grad_norm": 1.375, "learning_rate": 0.0001504021948886747, "loss": 2.381, "step": 17870 }, { "epoch": 0.8366083445491251, "grad_norm": 1.3203125, "learning_rate": 0.00015039715315429486, "loss": 3.4191, "step": 17871 }, { "epoch": 0.8366551582889578, "grad_norm": 1.3984375, "learning_rate": 0.00015039211124818965, "loss": 2.5346, "step": 17872 }, { "epoch": 0.8367019720287905, "grad_norm": 1.4296875, "learning_rate": 0.00015038706917037625, "loss": 2.5029, "step": 17873 }, { "epoch": 0.8367487857686231, "grad_norm": 1.7734375, "learning_rate": 0.00015038202692087177, "loss": 2.3138, "step": 17874 }, { "epoch": 0.8367955995084557, "grad_norm": 1.53125, "learning_rate": 0.0001503769844996935, "loss": 2.4401, "step": 17875 }, { "epoch": 0.8368424132482883, "grad_norm": 2.015625, "learning_rate": 0.0001503719419068585, "loss": 2.4677, "step": 17876 }, { "epoch": 0.836889226988121, "grad_norm": 2.046875, "learning_rate": 0.00015036689914238406, "loss": 2.581, "step": 17877 }, { "epoch": 0.8369360407279537, "grad_norm": 1.703125, "learning_rate": 0.0001503618562062873, "loss": 2.8299, "step": 17878 }, { "epoch": 0.8369828544677863, "grad_norm": 1.5234375, "learning_rate": 0.00015035681309858546, "loss": 3.8117, "step": 17879 }, { "epoch": 0.8370296682076189, "grad_norm": 1.328125, "learning_rate": 0.0001503517698192957, "loss": 2.3225, "step": 17880 }, { "epoch": 0.8370764819474515, "grad_norm": 1.328125, "learning_rate": 0.00015034672636843514, "loss": 2.2319, "step": 17881 }, { "epoch": 0.8371232956872842, "grad_norm": 1.75, "learning_rate": 0.00015034168274602109, "loss": 2.6927, "step": 17882 }, { "epoch": 0.8371701094271169, "grad_norm": 1.71875, "learning_rate": 0.0001503366389520706, "loss": 2.6762, "step": 17883 }, { "epoch": 0.8372169231669495, "grad_norm": 1.5703125, "learning_rate": 0.000150331594986601, "loss": 2.7679, "step": 17884 }, { "epoch": 0.8372637369067821, "grad_norm": 1.6171875, "learning_rate": 0.00015032655084962934, "loss": 2.3372, "step": 17885 }, { "epoch": 0.8373105506466147, "grad_norm": 1.71875, "learning_rate": 0.00015032150654117288, "loss": 2.6325, "step": 17886 }, { "epoch": 0.8373573643864474, "grad_norm": 1.5078125, "learning_rate": 0.00015031646206124878, "loss": 2.8002, "step": 17887 }, { "epoch": 0.8374041781262801, "grad_norm": 1.8125, "learning_rate": 0.00015031141740987427, "loss": 2.4102, "step": 17888 }, { "epoch": 0.8374509918661127, "grad_norm": 1.0078125, "learning_rate": 0.00015030637258706649, "loss": 2.1218, "step": 17889 }, { "epoch": 0.8374978056059453, "grad_norm": 2.171875, "learning_rate": 0.00015030132759284265, "loss": 2.8114, "step": 17890 }, { "epoch": 0.837544619345778, "grad_norm": 1.2421875, "learning_rate": 0.00015029628242721997, "loss": 2.5005, "step": 17891 }, { "epoch": 0.8375914330856107, "grad_norm": 1.28125, "learning_rate": 0.00015029123709021559, "loss": 2.4936, "step": 17892 }, { "epoch": 0.8376382468254433, "grad_norm": 2.5, "learning_rate": 0.00015028619158184676, "loss": 2.5988, "step": 17893 }, { "epoch": 0.8376850605652759, "grad_norm": 1.375, "learning_rate": 0.0001502811459021306, "loss": 2.3778, "step": 17894 }, { "epoch": 0.8377318743051085, "grad_norm": 1.53125, "learning_rate": 0.00015027610005108438, "loss": 2.4371, "step": 17895 }, { "epoch": 0.8377786880449412, "grad_norm": 1.59375, "learning_rate": 0.00015027105402872523, "loss": 2.226, "step": 17896 }, { "epoch": 0.8378255017847739, "grad_norm": 1.3515625, "learning_rate": 0.00015026600783507035, "loss": 2.4407, "step": 17897 }, { "epoch": 0.8378723155246065, "grad_norm": 1.359375, "learning_rate": 0.000150260961470137, "loss": 2.7509, "step": 17898 }, { "epoch": 0.8379191292644391, "grad_norm": 1.2734375, "learning_rate": 0.0001502559149339423, "loss": 2.1583, "step": 17899 }, { "epoch": 0.8379659430042717, "grad_norm": 1.609375, "learning_rate": 0.0001502508682265035, "loss": 2.1254, "step": 17900 }, { "epoch": 0.8380127567441044, "grad_norm": 1.3515625, "learning_rate": 0.00015024582134783775, "loss": 2.5563, "step": 17901 }, { "epoch": 0.8380595704839371, "grad_norm": 1.8359375, "learning_rate": 0.00015024077429796225, "loss": 2.2032, "step": 17902 }, { "epoch": 0.8381063842237697, "grad_norm": 2.03125, "learning_rate": 0.00015023572707689424, "loss": 2.78, "step": 17903 }, { "epoch": 0.8381531979636023, "grad_norm": 1.59375, "learning_rate": 0.00015023067968465088, "loss": 2.5707, "step": 17904 }, { "epoch": 0.8382000117034349, "grad_norm": 1.2421875, "learning_rate": 0.0001502256321212494, "loss": 2.7001, "step": 17905 }, { "epoch": 0.8382468254432676, "grad_norm": 1.75, "learning_rate": 0.00015022058438670698, "loss": 2.4545, "step": 17906 }, { "epoch": 0.8382936391831003, "grad_norm": 3.109375, "learning_rate": 0.00015021553648104084, "loss": 2.4251, "step": 17907 }, { "epoch": 0.8383404529229329, "grad_norm": 1.6171875, "learning_rate": 0.0001502104884042681, "loss": 2.4025, "step": 17908 }, { "epoch": 0.8383872666627655, "grad_norm": 1.625, "learning_rate": 0.00015020544015640608, "loss": 2.6086, "step": 17909 }, { "epoch": 0.8384340804025981, "grad_norm": 3.984375, "learning_rate": 0.0001502003917374719, "loss": 2.4789, "step": 17910 }, { "epoch": 0.8384808941424308, "grad_norm": 1.6796875, "learning_rate": 0.00015019534314748284, "loss": 2.9617, "step": 17911 }, { "epoch": 0.8385277078822635, "grad_norm": 1.2578125, "learning_rate": 0.00015019029438645596, "loss": 2.4996, "step": 17912 }, { "epoch": 0.8385745216220961, "grad_norm": 1.546875, "learning_rate": 0.0001501852454544086, "loss": 2.4584, "step": 17913 }, { "epoch": 0.8386213353619287, "grad_norm": 1.359375, "learning_rate": 0.00015018019635135792, "loss": 2.6506, "step": 17914 }, { "epoch": 0.8386681491017614, "grad_norm": 2.296875, "learning_rate": 0.00015017514707732108, "loss": 2.0847, "step": 17915 }, { "epoch": 0.838714962841594, "grad_norm": 1.3046875, "learning_rate": 0.00015017009763231538, "loss": 2.4943, "step": 17916 }, { "epoch": 0.8387617765814267, "grad_norm": 1.453125, "learning_rate": 0.00015016504801635794, "loss": 2.1628, "step": 17917 }, { "epoch": 0.8388085903212593, "grad_norm": 1.4296875, "learning_rate": 0.00015015999822946602, "loss": 2.7893, "step": 17918 }, { "epoch": 0.8388554040610919, "grad_norm": 1.453125, "learning_rate": 0.00015015494827165678, "loss": 2.7849, "step": 17919 }, { "epoch": 0.8389022178009246, "grad_norm": 1.3125, "learning_rate": 0.0001501498981429475, "loss": 1.9934, "step": 17920 }, { "epoch": 0.8389490315407572, "grad_norm": 1.7265625, "learning_rate": 0.00015014484784335531, "loss": 2.405, "step": 17921 }, { "epoch": 0.8389958452805899, "grad_norm": 1.3203125, "learning_rate": 0.00015013979737289743, "loss": 2.44, "step": 17922 }, { "epoch": 0.8390426590204225, "grad_norm": 1.515625, "learning_rate": 0.00015013474673159115, "loss": 2.4183, "step": 17923 }, { "epoch": 0.8390894727602551, "grad_norm": 1.4921875, "learning_rate": 0.00015012969591945353, "loss": 2.3448, "step": 17924 }, { "epoch": 0.8391362865000878, "grad_norm": 1.9375, "learning_rate": 0.00015012464493650192, "loss": 2.3636, "step": 17925 }, { "epoch": 0.8391831002399204, "grad_norm": 1.7421875, "learning_rate": 0.00015011959378275348, "loss": 2.637, "step": 17926 }, { "epoch": 0.8392299139797531, "grad_norm": 1.7421875, "learning_rate": 0.00015011454245822544, "loss": 3.1061, "step": 17927 }, { "epoch": 0.8392767277195857, "grad_norm": 1.1640625, "learning_rate": 0.00015010949096293493, "loss": 2.4471, "step": 17928 }, { "epoch": 0.8393235414594183, "grad_norm": 1.5390625, "learning_rate": 0.00015010443929689928, "loss": 2.2604, "step": 17929 }, { "epoch": 0.839370355199251, "grad_norm": 1.703125, "learning_rate": 0.00015009938746013565, "loss": 2.0557, "step": 17930 }, { "epoch": 0.8394171689390836, "grad_norm": 1.3671875, "learning_rate": 0.00015009433545266123, "loss": 2.6983, "step": 17931 }, { "epoch": 0.8394639826789163, "grad_norm": 1.21875, "learning_rate": 0.00015008928327449328, "loss": 2.51, "step": 17932 }, { "epoch": 0.8395107964187489, "grad_norm": 1.5, "learning_rate": 0.00015008423092564897, "loss": 2.5087, "step": 17933 }, { "epoch": 0.8395576101585815, "grad_norm": 1.6328125, "learning_rate": 0.00015007917840614553, "loss": 2.5092, "step": 17934 }, { "epoch": 0.8396044238984142, "grad_norm": 1.421875, "learning_rate": 0.0001500741257160002, "loss": 2.3083, "step": 17935 }, { "epoch": 0.8396512376382468, "grad_norm": 1.671875, "learning_rate": 0.0001500690728552302, "loss": 2.1336, "step": 17936 }, { "epoch": 0.8396980513780795, "grad_norm": 1.453125, "learning_rate": 0.0001500640198238527, "loss": 2.5686, "step": 17937 }, { "epoch": 0.8397448651179121, "grad_norm": 1.5234375, "learning_rate": 0.00015005896662188497, "loss": 2.6429, "step": 17938 }, { "epoch": 0.8397916788577447, "grad_norm": 1.59375, "learning_rate": 0.00015005391324934418, "loss": 2.6908, "step": 17939 }, { "epoch": 0.8398384925975774, "grad_norm": 1.2890625, "learning_rate": 0.00015004885970624758, "loss": 2.5825, "step": 17940 }, { "epoch": 0.83988530633741, "grad_norm": 1.484375, "learning_rate": 0.0001500438059926124, "loss": 2.5996, "step": 17941 }, { "epoch": 0.8399321200772427, "grad_norm": 1.890625, "learning_rate": 0.00015003875210845583, "loss": 2.6324, "step": 17942 }, { "epoch": 0.8399789338170753, "grad_norm": 1.21875, "learning_rate": 0.00015003369805379512, "loss": 2.7621, "step": 17943 } ], "logging_steps": 1, "max_steps": 53829, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 17943, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.3550485044304282e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }