diff --git "a/checkpoint-1145/trainer_state.json" "b/checkpoint-1145/trainer_state.json"
new file mode 100644--- /dev/null
+++ "b/checkpoint-1145/trainer_state.json"
@@ -0,0 +1,8328 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.9901989045834535,
+  "eval_steps": 115,
+  "global_step": 1145,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0008648025367541078,
+      "grad_norm": NaN,
+      "learning_rate": 0.0,
+      "loss": 8.1889,
+      "step": 1
+    },
+    {
+      "epoch": 0.0017296050735082155,
+      "grad_norm": 25.332334518432617,
+      "learning_rate": 0.0,
+      "loss": 9.3962,
+      "step": 2
+    },
+    {
+      "epoch": 0.0025944076102623233,
+      "grad_norm": 21.9357967376709,
+      "learning_rate": 1.9193857965451055e-07,
+      "loss": 9.4132,
+      "step": 3
+    },
+    {
+      "epoch": 0.003459210147016431,
+      "grad_norm": 26.18132781982422,
+      "learning_rate": 3.838771593090211e-07,
+      "loss": 9.1807,
+      "step": 4
+    },
+    {
+      "epoch": 0.004324012683770539,
+      "grad_norm": 24.46787452697754,
+      "learning_rate": 5.758157389635317e-07,
+      "loss": 8.1559,
+      "step": 5
+    },
+    {
+      "epoch": 0.0051888152205246466,
+      "grad_norm": 25.57164764404297,
+      "learning_rate": 7.677543186180422e-07,
+      "loss": 9.9007,
+      "step": 6
+    },
+    {
+      "epoch": 0.006053617757278754,
+      "grad_norm": 19.92409324645996,
+      "learning_rate": 9.596928982725527e-07,
+      "loss": 9.634,
+      "step": 7
+    },
+    {
+      "epoch": 0.006918420294032862,
+      "grad_norm": 23.451889038085938,
+      "learning_rate": 1.1516314779270634e-06,
+      "loss": 10.8722,
+      "step": 8
+    },
+    {
+      "epoch": 0.007783222830786971,
+      "grad_norm": 22.753061294555664,
+      "learning_rate": 1.343570057581574e-06,
+      "loss": 8.6969,
+      "step": 9
+    },
+    {
+      "epoch": 0.008648025367541078,
+      "grad_norm": 23.012781143188477,
+      "learning_rate": 1.5355086372360844e-06,
+      "loss": 8.9552,
+      "step": 10
+    },
+    {
+      "epoch": 0.009512827904295185,
+      "grad_norm": 20.173051834106445,
+      "learning_rate": 1.7274472168905951e-06,
+      "loss": 7.5245,
+      "step": 11
+    },
+    {
+      "epoch": 0.010377630441049293,
+      "grad_norm": 24.979217529296875,
+      "learning_rate": 1.9193857965451054e-06,
+      "loss": 9.1573,
+      "step": 12
+    },
+    {
+      "epoch": 0.011242432977803401,
+      "grad_norm": 24.23455810546875,
+      "learning_rate": 2.1113243761996164e-06,
+      "loss": 9.2615,
+      "step": 13
+    },
+    {
+      "epoch": 0.012107235514557509,
+      "grad_norm": 25.147851943969727,
+      "learning_rate": 2.303262955854127e-06,
+      "loss": 9.1465,
+      "step": 14
+    },
+    {
+      "epoch": 0.012972038051311616,
+      "grad_norm": 21.937841415405273,
+      "learning_rate": 2.4952015355086374e-06,
+      "loss": 9.3845,
+      "step": 15
+    },
+    {
+      "epoch": 0.013836840588065724,
+      "grad_norm": 24.25821304321289,
+      "learning_rate": 2.687140115163148e-06,
+      "loss": 9.3638,
+      "step": 16
+    },
+    {
+      "epoch": 0.014701643124819834,
+      "grad_norm": 22.018434524536133,
+      "learning_rate": 2.879078694817659e-06,
+      "loss": 9.3365,
+      "step": 17
+    },
+    {
+      "epoch": 0.015566445661573941,
+      "grad_norm": 19.021236419677734,
+      "learning_rate": 3.071017274472169e-06,
+      "loss": 8.267,
+      "step": 18
+    },
+    {
+      "epoch": 0.016431248198328047,
+      "grad_norm": 24.68037986755371,
+      "learning_rate": 3.2629558541266794e-06,
+      "loss": 9.8727,
+      "step": 19
+    },
+    {
+      "epoch": 0.017296050735082155,
+      "grad_norm": 18.95473861694336,
+      "learning_rate": 3.4548944337811903e-06,
+      "loss": 9.6234,
+      "step": 20
+    },
+    {
+      "epoch": 0.018160853271836263,
+      "grad_norm": 20.910001754760742,
+      "learning_rate": 3.646833013435701e-06,
+      "loss": 9.0452,
+      "step": 21
+    },
+    {
+      "epoch": 0.01902565580859037,
+      "grad_norm": 23.27020835876465,
+      "learning_rate": 3.838771593090211e-06,
+      "loss": 8.5295,
+      "step": 22
+    },
+    {
+      "epoch": 0.01989045834534448,
+      "grad_norm": 20.173105239868164,
+      "learning_rate": 4.030710172744722e-06,
+      "loss": 7.8237,
+      "step": 23
+    },
+    {
+      "epoch": 0.020755260882098586,
+      "grad_norm": 21.863664627075195,
+      "learning_rate": 4.222648752399233e-06,
+      "loss": 8.5162,
+      "step": 24
+    },
+    {
+      "epoch": 0.021620063418852694,
+      "grad_norm": 19.267335891723633,
+      "learning_rate": 4.414587332053743e-06,
+      "loss": 8.312,
+      "step": 25
+    },
+    {
+      "epoch": 0.022484865955606802,
+      "grad_norm": 18.204317092895508,
+      "learning_rate": 4.606525911708254e-06,
+      "loss": 9.2816,
+      "step": 26
+    },
+    {
+      "epoch": 0.02334966849236091,
+      "grad_norm": 18.67437744140625,
+      "learning_rate": 4.798464491362764e-06,
+      "loss": 7.9128,
+      "step": 27
+    },
+    {
+      "epoch": 0.024214471029115017,
+      "grad_norm": 18.21224594116211,
+      "learning_rate": 4.990403071017275e-06,
+      "loss": 7.6773,
+      "step": 28
+    },
+    {
+      "epoch": 0.025079273565869125,
+      "grad_norm": 18.150562286376953,
+      "learning_rate": 5.182341650671786e-06,
+      "loss": 7.4793,
+      "step": 29
+    },
+    {
+      "epoch": 0.025944076102623233,
+      "grad_norm": 17.31494140625,
+      "learning_rate": 5.374280230326296e-06,
+      "loss": 8.2437,
+      "step": 30
+    },
+    {
+      "epoch": 0.02680887863937734,
+      "grad_norm": 16.86028289794922,
+      "learning_rate": 5.566218809980806e-06,
+      "loss": 8.6053,
+      "step": 31
+    },
+    {
+      "epoch": 0.02767368117613145,
+      "grad_norm": 15.361474990844727,
+      "learning_rate": 5.758157389635318e-06,
+      "loss": 7.4762,
+      "step": 32
+    },
+    {
+      "epoch": 0.02853848371288556,
+      "grad_norm": 18.730810165405273,
+      "learning_rate": 5.950095969289828e-06,
+      "loss": 10.5289,
+      "step": 33
+    },
+    {
+      "epoch": 0.029403286249639667,
+      "grad_norm": 15.356877326965332,
+      "learning_rate": 6.142034548944338e-06,
+      "loss": 7.8911,
+      "step": 34
+    },
+    {
+      "epoch": 0.030268088786393775,
+      "grad_norm": 17.622791290283203,
+      "learning_rate": 6.333973128598848e-06,
+      "loss": 7.9708,
+      "step": 35
+    },
+    {
+      "epoch": 0.031132891323147883,
+      "grad_norm": 17.50615882873535,
+      "learning_rate": 6.525911708253359e-06,
+      "loss": 7.2581,
+      "step": 36
+    },
+    {
+      "epoch": 0.03199769385990199,
+      "grad_norm": 16.068561553955078,
+      "learning_rate": 6.7178502879078705e-06,
+      "loss": 8.6747,
+      "step": 37
+    },
+    {
+      "epoch": 0.032862496396656095,
+      "grad_norm": 13.518677711486816,
+      "learning_rate": 6.909788867562381e-06,
+      "loss": 7.308,
+      "step": 38
+    },
+    {
+      "epoch": 0.033727298933410206,
+      "grad_norm": 18.713558197021484,
+      "learning_rate": 7.101727447216891e-06,
+      "loss": 8.6224,
+      "step": 39
+    },
+    {
+      "epoch": 0.03459210147016431,
+      "grad_norm": 20.201255798339844,
+      "learning_rate": 7.293666026871402e-06,
+      "loss": 12.3613,
+      "step": 40
+    },
+    {
+      "epoch": 0.03545690400691842,
+      "grad_norm": 13.44450855255127,
+      "learning_rate": 7.485604606525912e-06,
+      "loss": 7.2108,
+      "step": 41
+    },
+    {
+      "epoch": 0.036321706543672526,
+      "grad_norm": 15.1000394821167,
+      "learning_rate": 7.677543186180422e-06,
+      "loss": 7.831,
+      "step": 42
+    },
+    {
+      "epoch": 0.03718650908042664,
+      "grad_norm": 14.707894325256348,
+      "learning_rate": 7.869481765834934e-06,
+      "loss": 6.9282,
+      "step": 43
+    },
+    {
+      "epoch": 0.03805131161718074,
+      "grad_norm": 13.331870079040527,
+      "learning_rate": 8.061420345489444e-06,
+      "loss": 6.9092,
+      "step": 44
+    },
+    {
+      "epoch": 0.03891611415393485,
+      "grad_norm": 14.49152660369873,
+      "learning_rate": 8.253358925143954e-06,
+      "loss": 8.9053,
+      "step": 45
+    },
+    {
+      "epoch": 0.03978091669068896,
+      "grad_norm": 13.79437255859375,
+      "learning_rate": 8.445297504798465e-06,
+      "loss": 7.5276,
+      "step": 46
+    },
+    {
+      "epoch": 0.04064571922744307,
+      "grad_norm": 15.470795631408691,
+      "learning_rate": 8.637236084452976e-06,
+      "loss": 7.4793,
+      "step": 47
+    },
+    {
+      "epoch": 0.04151052176419717,
+      "grad_norm": 13.469670295715332,
+      "learning_rate": 8.829174664107486e-06,
+      "loss": 7.4401,
+      "step": 48
+    },
+    {
+      "epoch": 0.042375324300951284,
+      "grad_norm": 12.38973617553711,
+      "learning_rate": 9.021113243761997e-06,
+      "loss": 6.6742,
+      "step": 49
+    },
+    {
+      "epoch": 0.04324012683770539,
+      "grad_norm": 14.353404998779297,
+      "learning_rate": 9.213051823416507e-06,
+      "loss": 8.89,
+      "step": 50
+    },
+    {
+      "epoch": 0.0441049293744595,
+      "grad_norm": 12.149626731872559,
+      "learning_rate": 9.404990403071018e-06,
+      "loss": 8.6311,
+      "step": 51
+    },
+    {
+      "epoch": 0.044969731911213603,
+      "grad_norm": 12.504135131835938,
+      "learning_rate": 9.596928982725528e-06,
+      "loss": 6.9648,
+      "step": 52
+    },
+    {
+      "epoch": 0.045834534447967715,
+      "grad_norm": 12.439926147460938,
+      "learning_rate": 9.78886756238004e-06,
+      "loss": 7.0633,
+      "step": 53
+    },
+    {
+      "epoch": 0.04669933698472182,
+      "grad_norm": 13.445518493652344,
+      "learning_rate": 9.98080614203455e-06,
+      "loss": 8.1331,
+      "step": 54
+    },
+    {
+      "epoch": 0.04756413952147593,
+      "grad_norm": 12.668989181518555,
+      "learning_rate": 1.0172744721689061e-05,
+      "loss": 8.4931,
+      "step": 55
+    },
+    {
+      "epoch": 0.048428942058230035,
+      "grad_norm": 11.86841869354248,
+      "learning_rate": 1.0364683301343571e-05,
+      "loss": 6.9534,
+      "step": 56
+    },
+    {
+      "epoch": 0.049293744594984146,
+      "grad_norm": 12.336670875549316,
+      "learning_rate": 1.0556621880998081e-05,
+      "loss": 6.9585,
+      "step": 57
+    },
+    {
+      "epoch": 0.05015854713173825,
+      "grad_norm": 12.496221542358398,
+      "learning_rate": 1.0748560460652591e-05,
+      "loss": 7.6699,
+      "step": 58
+    },
+    {
+      "epoch": 0.05102334966849236,
+      "grad_norm": 11.765594482421875,
+      "learning_rate": 1.0940499040307102e-05,
+      "loss": 6.5076,
+      "step": 59
+    },
+    {
+      "epoch": 0.051888152205246466,
+      "grad_norm": 13.426615715026855,
+      "learning_rate": 1.1132437619961612e-05,
+      "loss": 9.5443,
+      "step": 60
+    },
+    {
+      "epoch": 0.05275295474200058,
+      "grad_norm": 12.127195358276367,
+      "learning_rate": 1.1324376199616123e-05,
+      "loss": 6.7481,
+      "step": 61
+    },
+    {
+      "epoch": 0.05361775727875468,
+      "grad_norm": 10.69729232788086,
+      "learning_rate": 1.1516314779270635e-05,
+      "loss": 6.4521,
+      "step": 62
+    },
+    {
+      "epoch": 0.05448255981550879,
+      "grad_norm": 12.042082786560059,
+      "learning_rate": 1.1708253358925145e-05,
+      "loss": 8.1839,
+      "step": 63
+    },
+    {
+      "epoch": 0.0553473623522629,
+      "grad_norm": 13.164307594299316,
+      "learning_rate": 1.1900191938579655e-05,
+      "loss": 7.1924,
+      "step": 64
+    },
+    {
+      "epoch": 0.05621216488901701,
+      "grad_norm": 10.799245834350586,
+      "learning_rate": 1.2092130518234165e-05,
+      "loss": 7.5767,
+      "step": 65
+    },
+    {
+      "epoch": 0.05707696742577112,
+      "grad_norm": 10.165273666381836,
+      "learning_rate": 1.2284069097888675e-05,
+      "loss": 7.2645,
+      "step": 66
+    },
+    {
+      "epoch": 0.05794176996252522,
+      "grad_norm": 12.342886924743652,
+      "learning_rate": 1.2476007677543186e-05,
+      "loss": 6.175,
+      "step": 67
+    },
+    {
+      "epoch": 0.058806572499279335,
+      "grad_norm": 10.652329444885254,
+      "learning_rate": 1.2667946257197696e-05,
+      "loss": 6.5491,
+      "step": 68
+    },
+    {
+      "epoch": 0.05967137503603344,
+      "grad_norm": 10.688251495361328,
+      "learning_rate": 1.2859884836852207e-05,
+      "loss": 6.7543,
+      "step": 69
+    },
+    {
+      "epoch": 0.06053617757278755,
+      "grad_norm": 11.341581344604492,
+      "learning_rate": 1.3051823416506717e-05,
+      "loss": 6.98,
+      "step": 70
+    },
+    {
+      "epoch": 0.061400980109541654,
+      "grad_norm": 10.539051055908203,
+      "learning_rate": 1.3243761996161231e-05,
+      "loss": 6.76,
+      "step": 71
+    },
+    {
+      "epoch": 0.062265782646295766,
+      "grad_norm": 10.746752738952637,
+      "learning_rate": 1.3435700575815741e-05,
+      "loss": 7.2167,
+      "step": 72
+    },
+    {
+      "epoch": 0.06313058518304987,
+      "grad_norm": 12.96174144744873,
+      "learning_rate": 1.3627639155470251e-05,
+      "loss": 9.219,
+      "step": 73
+    },
+    {
+      "epoch": 0.06399538771980398,
+      "grad_norm": 10.668299674987793,
+      "learning_rate": 1.3819577735124761e-05,
+      "loss": 7.3113,
+      "step": 74
+    },
+    {
+      "epoch": 0.06486019025655809,
+      "grad_norm": 10.878615379333496,
+      "learning_rate": 1.4011516314779271e-05,
+      "loss": 6.4098,
+      "step": 75
+    },
+    {
+      "epoch": 0.06572499279331219,
+      "grad_norm": 12.29603099822998,
+      "learning_rate": 1.4203454894433781e-05,
+      "loss": 8.3399,
+      "step": 76
+    },
+    {
+      "epoch": 0.0665897953300663,
+      "grad_norm": 13.01440143585205,
+      "learning_rate": 1.4395393474088293e-05,
+      "loss": 8.6991,
+      "step": 77
+    },
+    {
+      "epoch": 0.06745459786682041,
+      "grad_norm": 10.999458312988281,
+      "learning_rate": 1.4587332053742803e-05,
+      "loss": 9.5087,
+      "step": 78
+    },
+    {
+      "epoch": 0.06831940040357452,
+      "grad_norm": 11.303417205810547,
+      "learning_rate": 1.4779270633397313e-05,
+      "loss": 7.3491,
+      "step": 79
+    },
+    {
+      "epoch": 0.06918420294032862,
+      "grad_norm": 10.507055282592773,
+      "learning_rate": 1.4971209213051823e-05,
+      "loss": 6.8214,
+      "step": 80
+    },
+    {
+      "epoch": 0.07004900547708273,
+      "grad_norm": 11.467567443847656,
+      "learning_rate": 1.5163147792706333e-05,
+      "loss": 6.5489,
+      "step": 81
+    },
+    {
+      "epoch": 0.07091380801383684,
+      "grad_norm": 10.555798530578613,
+      "learning_rate": 1.5355086372360844e-05,
+      "loss": 6.7692,
+      "step": 82
+    },
+    {
+      "epoch": 0.07177861055059095,
+      "grad_norm": 12.266429901123047,
+      "learning_rate": 1.5547024952015357e-05,
+      "loss": 8.8059,
+      "step": 83
+    },
+    {
+      "epoch": 0.07264341308734505,
+      "grad_norm": 9.898346900939941,
+      "learning_rate": 1.5738963531669867e-05,
+      "loss": 6.4811,
+      "step": 84
+    },
+    {
+      "epoch": 0.07350821562409916,
+      "grad_norm": 11.04404067993164,
+      "learning_rate": 1.5930902111324377e-05,
+      "loss": 7.0495,
+      "step": 85
+    },
+    {
+      "epoch": 0.07437301816085327,
+      "grad_norm": 11.240497589111328,
+      "learning_rate": 1.6122840690978887e-05,
+      "loss": 5.8256,
+      "step": 86
+    },
+    {
+      "epoch": 0.07523782069760739,
+      "grad_norm": 10.409235000610352,
+      "learning_rate": 1.6314779270633397e-05,
+      "loss": 5.7203,
+      "step": 87
+    },
+    {
+      "epoch": 0.07610262323436148,
+      "grad_norm": 11.557363510131836,
+      "learning_rate": 1.6506717850287907e-05,
+      "loss": 6.5094,
+      "step": 88
+    },
+    {
+      "epoch": 0.0769674257711156,
+      "grad_norm": 9.760974884033203,
+      "learning_rate": 1.669865642994242e-05,
+      "loss": 5.7523,
+      "step": 89
+    },
+    {
+      "epoch": 0.0778322283078697,
+      "grad_norm": 9.31316089630127,
+      "learning_rate": 1.689059500959693e-05,
+      "loss": 6.0464,
+      "step": 90
+    },
+    {
+      "epoch": 0.07869703084462382,
+      "grad_norm": 11.943814277648926,
+      "learning_rate": 1.708253358925144e-05,
+      "loss": 6.5233,
+      "step": 91
+    },
+    {
+      "epoch": 0.07956183338137791,
+      "grad_norm": 9.126127243041992,
+      "learning_rate": 1.727447216890595e-05,
+      "loss": 6.8966,
+      "step": 92
+    },
+    {
+      "epoch": 0.08042663591813203,
+      "grad_norm": 9.386579513549805,
+      "learning_rate": 1.746641074856046e-05,
+      "loss": 6.3621,
+      "step": 93
+    },
+    {
+      "epoch": 0.08129143845488614,
+      "grad_norm": 10.63054370880127,
+      "learning_rate": 1.765834932821497e-05,
+      "loss": 6.0194,
+      "step": 94
+    },
+    {
+      "epoch": 0.08215624099164025,
+      "grad_norm": 10.119132995605469,
+      "learning_rate": 1.785028790786948e-05,
+      "loss": 6.6797,
+      "step": 95
+    },
+    {
+      "epoch": 0.08302104352839434,
+      "grad_norm": 10.746257781982422,
+      "learning_rate": 1.8042226487523995e-05,
+      "loss": 5.6214,
+      "step": 96
+    },
+    {
+      "epoch": 0.08388584606514846,
+      "grad_norm": 10.64887809753418,
+      "learning_rate": 1.8234165067178505e-05,
+      "loss": 6.4946,
+      "step": 97
+    },
+    {
+      "epoch": 0.08475064860190257,
+      "grad_norm": 11.115398406982422,
+      "learning_rate": 1.8426103646833015e-05,
+      "loss": 5.9069,
+      "step": 98
+    },
+    {
+      "epoch": 0.08561545113865668,
+      "grad_norm": 11.452004432678223,
+      "learning_rate": 1.8618042226487525e-05,
+      "loss": 6.8848,
+      "step": 99
+    },
+    {
+      "epoch": 0.08648025367541078,
+      "grad_norm": 12.722066879272461,
+      "learning_rate": 1.8809980806142035e-05,
+      "loss": 7.7248,
+      "step": 100
+    },
+    {
+      "epoch": 0.08734505621216489,
+      "grad_norm": 10.500570297241211,
+      "learning_rate": 1.9001919385796545e-05,
+      "loss": 6.9069,
+      "step": 101
+    },
+    {
+      "epoch": 0.088209858748919,
+      "grad_norm": 10.750312805175781,
+      "learning_rate": 1.9193857965451055e-05,
+      "loss": 6.3612,
+      "step": 102
+    },
+    {
+      "epoch": 0.08907466128567311,
+      "grad_norm": 12.96158218383789,
+      "learning_rate": 1.9385796545105565e-05,
+      "loss": 7.6664,
+      "step": 103
+    },
+    {
+      "epoch": 0.08993946382242721,
+      "grad_norm": 11.477307319641113,
+      "learning_rate": 1.957773512476008e-05,
+      "loss": 5.4654,
+      "step": 104
+    },
+    {
+      "epoch": 0.09080426635918132,
+      "grad_norm": 13.458792686462402,
+      "learning_rate": 1.976967370441459e-05,
+      "loss": 6.7583,
+      "step": 105
+    },
+    {
+      "epoch": 0.09166906889593543,
+      "grad_norm": 11.862403869628906,
+      "learning_rate": 1.99616122840691e-05,
+      "loss": 6.354,
+      "step": 106
+    },
+    {
+      "epoch": 0.09253387143268954,
+      "grad_norm": 15.43807601928711,
+      "learning_rate": 2.015355086372361e-05,
+      "loss": 5.0476,
+      "step": 107
+    },
+    {
+      "epoch": 0.09339867396944364,
+      "grad_norm": 15.703176498413086,
+      "learning_rate": 2.0345489443378122e-05,
+      "loss": 5.535,
+      "step": 108
+    },
+    {
+      "epoch": 0.09426347650619775,
+      "grad_norm": 15.830728530883789,
+      "learning_rate": 2.0537428023032633e-05,
+      "loss": 5.125,
+      "step": 109
+    },
+    {
+      "epoch": 0.09512827904295186,
+      "grad_norm": 18.535364151000977,
+      "learning_rate": 2.0729366602687143e-05,
+      "loss": 5.3941,
+      "step": 110
+    },
+    {
+      "epoch": 0.09599308157970597,
+      "grad_norm": 20.664087295532227,
+      "learning_rate": 2.0921305182341653e-05,
+      "loss": 7.6313,
+      "step": 111
+    },
+    {
+      "epoch": 0.09685788411646007,
+      "grad_norm": 26.702512741088867,
+      "learning_rate": 2.1113243761996163e-05,
+      "loss": 5.584,
+      "step": 112
+    },
+    {
+      "epoch": 0.09772268665321418,
+      "grad_norm": 24.893169403076172,
+      "learning_rate": 2.1305182341650673e-05,
+      "loss": 6.7148,
+      "step": 113
+    },
+    {
+      "epoch": 0.09858748918996829,
+      "grad_norm": 23.61020278930664,
+      "learning_rate": 2.1497120921305183e-05,
+      "loss": 4.3739,
+      "step": 114
+    },
+    {
+      "epoch": 0.0994522917267224,
+      "grad_norm": 30.567276000976562,
+      "learning_rate": 2.1689059500959693e-05,
+      "loss": 7.8202,
+      "step": 115
+    },
+    {
+      "epoch": 0.0994522917267224,
+      "eval_Qnli-dev-1024_cosine_accuracy": 0.6979166666666666,
+      "eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.9675614833831787,
+      "eval_Qnli-dev-1024_cosine_ap": 0.688956658941829,
+      "eval_Qnli-dev-1024_cosine_f1": 0.6881720430107527,
+      "eval_Qnli-dev-1024_cosine_f1_threshold": 0.9675614833831787,
+      "eval_Qnli-dev-1024_cosine_mcc": 0.3966087176872613,
+      "eval_Qnli-dev-1024_cosine_precision": 0.6666666666666666,
+      "eval_Qnli-dev-1024_cosine_recall": 0.7111111111111111,
+      "eval_Qnli-dev_cosine_accuracy": 0.7395833333333334,
+      "eval_Qnli-dev_cosine_accuracy_threshold": 0.8765031099319458,
+      "eval_Qnli-dev_cosine_ap": 0.760920950345153,
+      "eval_Qnli-dev_cosine_f1": 0.7272727272727272,
+      "eval_Qnli-dev_cosine_f1_threshold": 0.8635396957397461,
+      "eval_Qnli-dev_cosine_mcc": 0.4497120149145933,
+      "eval_Qnli-dev_cosine_precision": 0.6666666666666666,
+      "eval_Qnli-dev_cosine_recall": 0.8,
+      "eval_allNLI--triplets-1024_cosine_accuracy": 0.8020833134651184,
+      "eval_allNLI-triplets_cosine_accuracy": 0.9791666865348816,
+      "eval_global_dataset_loss": 1.940317153930664,
+      "eval_global_dataset_runtime": 67.8871,
+      "eval_global_dataset_samples_per_second": 14.318,
+      "eval_global_dataset_steps_per_second": 0.309,
+      "eval_sequential_score": 0.8020833134651184,
+      "eval_sts-test-1024_pearson_cosine": 0.6710405361187501,
+      "eval_sts-test-1024_spearman_cosine": 0.8309834676298848,
+      "eval_sts-test_pearson_cosine": 0.9054066453363472,
+      "eval_sts-test_spearman_cosine": 0.9155824166550393,
+      "step": 115
+    },
+    {
+      "epoch": 0.1003170942634765,
+      "grad_norm": 31.343202590942383,
+      "learning_rate": 2.1880998080614203e-05,
+      "loss": 4.42,
+      "step": 116
+    },
+    {
+      "epoch": 0.10118189680023061,
+      "grad_norm": 28.399757385253906,
+      "learning_rate": 2.2072936660268713e-05,
+      "loss": 4.5047,
+      "step": 117
+    },
+    {
+      "epoch": 0.10204669933698472,
+      "grad_norm": 32.25544357299805,
+      "learning_rate": 2.2264875239923223e-05,
+      "loss": 4.427,
+      "step": 118
+    },
+    {
+      "epoch": 0.10291150187373883,
+      "grad_norm": 27.07774543762207,
+      "learning_rate": 2.2456813819577733e-05,
+      "loss": 3.1201,
+      "step": 119
+    },
+    {
+      "epoch": 0.10377630441049293,
+      "grad_norm": 31.4462833404541,
+      "learning_rate": 2.2648752399232247e-05,
+      "loss": 4.3632,
+      "step": 120
+    },
+    {
+      "epoch": 0.10464110694724704,
+      "grad_norm": 27.67288589477539,
+      "learning_rate": 2.2840690978886757e-05,
+      "loss": 3.5101,
+      "step": 121
+    },
+    {
+      "epoch": 0.10550590948400115,
+      "grad_norm": 29.23362922668457,
+      "learning_rate": 2.303262955854127e-05,
+      "loss": 4.7499,
+      "step": 122
+    },
+    {
+      "epoch": 0.10637071202075526,
+      "grad_norm": 27.85274887084961,
+      "learning_rate": 2.322456813819578e-05,
+      "loss": 4.5242,
+      "step": 123
+    },
+    {
+      "epoch": 0.10723551455750936,
+      "grad_norm": 21.893939971923828,
+      "learning_rate": 2.341650671785029e-05,
+      "loss": 3.379,
+      "step": 124
+    },
+    {
+      "epoch": 0.10810031709426347,
+      "grad_norm": 18.63385772705078,
+      "learning_rate": 2.36084452975048e-05,
+      "loss": 2.8004,
+      "step": 125
+    },
+    {
+      "epoch": 0.10896511963101758,
+      "grad_norm": 16.17616844177246,
+      "learning_rate": 2.380038387715931e-05,
+      "loss": 2.8855,
+      "step": 126
+    },
+    {
+      "epoch": 0.1098299221677717,
+      "grad_norm": 17.123281478881836,
+      "learning_rate": 2.399232245681382e-05,
+      "loss": 3.937,
+      "step": 127
+    },
+    {
+      "epoch": 0.1106947247045258,
+      "grad_norm": 14.539612770080566,
+      "learning_rate": 2.418426103646833e-05,
+      "loss": 3.5914,
+      "step": 128
+    },
+    {
+      "epoch": 0.1115595272412799,
+      "grad_norm": 12.644956588745117,
+      "learning_rate": 2.437619961612284e-05,
+      "loss": 2.6459,
+      "step": 129
+    },
+    {
+      "epoch": 0.11242432977803402,
+      "grad_norm": 10.95170783996582,
+      "learning_rate": 2.456813819577735e-05,
+      "loss": 2.3887,
+      "step": 130
+    },
+    {
+      "epoch": 0.11328913231478813,
+      "grad_norm": 12.561387062072754,
+      "learning_rate": 2.476007677543186e-05,
+      "loss": 4.1043,
+      "step": 131
+    },
+    {
+      "epoch": 0.11415393485154224,
+      "grad_norm": 9.273588180541992,
+      "learning_rate": 2.495201535508637e-05,
+      "loss": 2.2758,
+      "step": 132
+    },
+    {
+      "epoch": 0.11501873738829634,
+      "grad_norm": 9.219544410705566,
+      "learning_rate": 2.514395393474088e-05,
+      "loss": 2.859,
+      "step": 133
+    },
+    {
+      "epoch": 0.11588353992505045,
+      "grad_norm": 8.443903923034668,
+      "learning_rate": 2.533589251439539e-05,
+      "loss": 2.0162,
+      "step": 134
+    },
+    {
+      "epoch": 0.11674834246180456,
+      "grad_norm": 9.522578239440918,
+      "learning_rate": 2.5527831094049905e-05,
+      "loss": 2.7069,
+      "step": 135
+    },
+    {
+      "epoch": 0.11761314499855867,
+      "grad_norm": 8.184837341308594,
+      "learning_rate": 2.5719769673704415e-05,
+      "loss": 1.9536,
+      "step": 136
+    },
+    {
+      "epoch": 0.11847794753531277,
+      "grad_norm": 9.079197883605957,
+      "learning_rate": 2.5911708253358925e-05,
+      "loss": 2.3063,
+      "step": 137
+    },
+    {
+      "epoch": 0.11934275007206688,
+      "grad_norm": 9.438823699951172,
+      "learning_rate": 2.6103646833013435e-05,
+      "loss": 3.3783,
+      "step": 138
+    },
+    {
+      "epoch": 0.12020755260882099,
+      "grad_norm": 8.003981590270996,
+      "learning_rate": 2.6295585412667952e-05,
+      "loss": 1.9538,
+      "step": 139
+    },
+    {
+      "epoch": 0.1210723551455751,
+      "grad_norm": 8.199268341064453,
+      "learning_rate": 2.6487523992322462e-05,
+      "loss": 2.8959,
+      "step": 140
+    },
+    {
+      "epoch": 0.1219371576823292,
+      "grad_norm": 9.071074485778809,
+      "learning_rate": 2.6679462571976972e-05,
+      "loss": 2.3064,
+      "step": 141
+    },
+    {
+      "epoch": 0.12280196021908331,
+      "grad_norm": 10.237217903137207,
+      "learning_rate": 2.6871401151631482e-05,
+      "loss": 2.4625,
+      "step": 142
+    },
+    {
+      "epoch": 0.12366676275583742,
+      "grad_norm": 7.96627950668335,
+      "learning_rate": 2.7063339731285992e-05,
+      "loss": 2.4083,
+      "step": 143
+    },
+    {
+      "epoch": 0.12453156529259153,
+      "grad_norm": 8.751070022583008,
+      "learning_rate": 2.7255278310940502e-05,
+      "loss": 1.5914,
+      "step": 144
+    },
+    {
+      "epoch": 0.12539636782934563,
+      "grad_norm": 6.843534469604492,
+      "learning_rate": 2.7447216890595012e-05,
+      "loss": 1.5798,
+      "step": 145
+    },
+    {
+      "epoch": 0.12626117036609974,
+      "grad_norm": 7.700779438018799,
+      "learning_rate": 2.7639155470249522e-05,
+      "loss": 1.5194,
+      "step": 146
+    },
+    {
+      "epoch": 0.12712597290285385,
+      "grad_norm": 8.954259872436523,
+      "learning_rate": 2.7831094049904032e-05,
+      "loss": 1.5924,
+      "step": 147
+    },
+    {
+      "epoch": 0.12799077543960796,
+      "grad_norm": 10.815597534179688,
+      "learning_rate": 2.8023032629558543e-05,
+      "loss": 3.1143,
+      "step": 148
+    },
+    {
+      "epoch": 0.12885557797636207,
+      "grad_norm": 9.539572715759277,
+      "learning_rate": 2.8214971209213053e-05,
+      "loss": 1.8632,
+      "step": 149
+    },
+    {
+      "epoch": 0.12972038051311618,
+      "grad_norm": 6.322872638702393,
+      "learning_rate": 2.8406909788867563e-05,
+      "loss": 2.0489,
+      "step": 150
+    },
+    {
+      "epoch": 0.13058518304987027,
+      "grad_norm": 6.538212776184082,
+      "learning_rate": 2.8598848368522073e-05,
+      "loss": 1.5573,
+      "step": 151
+    },
+    {
+      "epoch": 0.13144998558662438,
+      "grad_norm": 6.798872470855713,
+      "learning_rate": 2.8790786948176586e-05,
+      "loss": 2.8024,
+      "step": 152
+    },
+    {
+      "epoch": 0.1323147881233785,
+      "grad_norm": 8.393974304199219,
+      "learning_rate": 2.8982725527831096e-05,
+      "loss": 1.9423,
+      "step": 153
+    },
+    {
+      "epoch": 0.1331795906601326,
+      "grad_norm": 8.043729782104492,
+      "learning_rate": 2.9174664107485606e-05,
+      "loss": 3.1444,
+      "step": 154
+    },
+    {
+      "epoch": 0.1340443931968867,
+      "grad_norm": 9.158576965332031,
+      "learning_rate": 2.9366602687140116e-05,
+      "loss": 2.5482,
+      "step": 155
+    },
+    {
+      "epoch": 0.13490919573364082,
+      "grad_norm": 6.786825180053711,
+      "learning_rate": 2.9558541266794627e-05,
+      "loss": 1.0428,
+      "step": 156
+    },
+    {
+      "epoch": 0.13577399827039494,
+      "grad_norm": 12.157453536987305,
+      "learning_rate": 2.9750479846449137e-05,
+      "loss": 5.8267,
+      "step": 157
+    },
+    {
+      "epoch": 0.13663880080714905,
+      "grad_norm": 10.719176292419434,
+      "learning_rate": 2.9942418426103647e-05,
+      "loss": 1.9785,
+      "step": 158
+    },
+    {
+      "epoch": 0.13750360334390313,
+      "grad_norm": 8.25823974609375,
+      "learning_rate": 3.0134357005758157e-05,
+      "loss": 2.5306,
+      "step": 159
+    },
+    {
+      "epoch": 0.13836840588065724,
+      "grad_norm": 8.451217651367188,
+      "learning_rate": 3.0326295585412667e-05,
+      "loss": 1.8271,
+      "step": 160
+    },
+    {
+      "epoch": 0.13923320841741135,
+      "grad_norm": 9.387060165405273,
+      "learning_rate": 3.051823416506718e-05,
+      "loss": 2.6579,
+      "step": 161
+    },
+    {
+      "epoch": 0.14009801095416546,
+      "grad_norm": 8.968480110168457,
+      "learning_rate": 3.071017274472169e-05,
+      "loss": 3.0193,
+      "step": 162
+    },
+    {
+      "epoch": 0.14096281349091958,
+      "grad_norm": 8.816688537597656,
+      "learning_rate": 3.09021113243762e-05,
+      "loss": 1.5596,
+      "step": 163
+    },
+    {
+      "epoch": 0.1418276160276737,
+      "grad_norm": 5.402006149291992,
+      "learning_rate": 3.1094049904030714e-05,
+      "loss": 1.4505,
+      "step": 164
+    },
+    {
+      "epoch": 0.1426924185644278,
+      "grad_norm": 7.654393196105957,
+      "learning_rate": 3.128598848368523e-05,
+      "loss": 2.5331,
+      "step": 165
+    },
+    {
+      "epoch": 0.1435572211011819,
+      "grad_norm": 6.393066883087158,
+      "learning_rate": 3.1477927063339734e-05,
+      "loss": 1.384,
+      "step": 166
+    },
+    {
+      "epoch": 0.144422023637936,
+      "grad_norm": 8.975717544555664,
+      "learning_rate": 3.166986564299425e-05,
+      "loss": 3.3553,
+      "step": 167
+    },
+    {
+      "epoch": 0.1452868261746901,
+      "grad_norm": 8.812336921691895,
+      "learning_rate": 3.1861804222648754e-05,
+      "loss": 2.2541,
+      "step": 168
+    },
+    {
+      "epoch": 0.14615162871144421,
+      "grad_norm": 7.189652919769287,
+      "learning_rate": 3.205374280230327e-05,
+      "loss": 1.1827,
+      "step": 169
+    },
+    {
+      "epoch": 0.14701643124819833,
+      "grad_norm": 7.888529300689697,
+      "learning_rate": 3.2245681381957774e-05,
+      "loss": 1.3643,
+      "step": 170
+    },
+    {
+      "epoch": 0.14788123378495244,
+      "grad_norm": 6.611407279968262,
+      "learning_rate": 3.243761996161229e-05,
+      "loss": 1.9817,
+      "step": 171
+    },
+    {
+      "epoch": 0.14874603632170655,
+      "grad_norm": 6.734430313110352,
+      "learning_rate": 3.2629558541266795e-05,
+      "loss": 2.3332,
+      "step": 172
+    },
+    {
+      "epoch": 0.14961083885846066,
+      "grad_norm": 6.5995306968688965,
+      "learning_rate": 3.282149712092131e-05,
+      "loss": 1.4638,
+      "step": 173
+    },
+    {
+      "epoch": 0.15047564139521477,
+      "grad_norm": 7.57749605178833,
+      "learning_rate": 3.3013435700575815e-05,
+      "loss": 1.7929,
+      "step": 174
+    },
+    {
+      "epoch": 0.15134044393196885,
+      "grad_norm": 4.956903457641602,
+      "learning_rate": 3.320537428023033e-05,
+      "loss": 1.0457,
+      "step": 175
+    },
+    {
+      "epoch": 0.15220524646872297,
+      "grad_norm": 9.929686546325684,
+      "learning_rate": 3.339731285988484e-05,
+      "loss": 1.4866,
+      "step": 176
+    },
+    {
+      "epoch": 0.15307004900547708,
+      "grad_norm": 7.194726467132568,
+      "learning_rate": 3.358925143953935e-05,
+      "loss": 1.7834,
+      "step": 177
+    },
+    {
+      "epoch": 0.1539348515422312,
+      "grad_norm": 6.916417598724365,
+      "learning_rate": 3.378119001919386e-05,
+      "loss": 1.1396,
+      "step": 178
+    },
+    {
+      "epoch": 0.1547996540789853,
+      "grad_norm": 9.47856330871582,
+      "learning_rate": 3.397312859884837e-05,
+      "loss": 1.9811,
+      "step": 179
+    },
+    {
+      "epoch": 0.1556644566157394,
+      "grad_norm": 7.894885540008545,
+      "learning_rate": 3.416506717850288e-05,
+      "loss": 1.1859,
+      "step": 180
+    },
+    {
+      "epoch": 0.15652925915249352,
+      "grad_norm": 7.631194114685059,
+      "learning_rate": 3.435700575815739e-05,
+      "loss": 1.5481,
+      "step": 181
+    },
+    {
+      "epoch": 0.15739406168924763,
+      "grad_norm": 5.6157073974609375,
+      "learning_rate": 3.45489443378119e-05,
+      "loss": 1.5954,
+      "step": 182
+    },
+    {
+      "epoch": 0.15825886422600172,
+      "grad_norm": 9.201720237731934,
+      "learning_rate": 3.474088291746641e-05,
+      "loss": 2.2163,
+      "step": 183
+    },
+    {
+      "epoch": 0.15912366676275583,
+      "grad_norm": 5.702026844024658,
+      "learning_rate": 3.493282149712092e-05,
+      "loss": 1.475,
+      "step": 184
+    },
+    {
+      "epoch": 0.15998846929950994,
+      "grad_norm": 5.93116569519043,
+      "learning_rate": 3.512476007677543e-05,
+      "loss": 1.2394,
+      "step": 185
+    },
+    {
+      "epoch": 0.16085327183626405,
+      "grad_norm": 3.9884233474731445,
+      "learning_rate": 3.531669865642994e-05,
+      "loss": 1.2713,
+      "step": 186
+    },
+    {
+      "epoch": 0.16171807437301816,
+      "grad_norm": 7.569946765899658,
+      "learning_rate": 3.550863723608445e-05,
+      "loss": 1.435,
+      "step": 187
+    },
+    {
+      "epoch": 0.16258287690977227,
+      "grad_norm": 7.594637393951416,
+      "learning_rate": 3.570057581573896e-05,
+      "loss": 1.1762,
+      "step": 188
+    },
+    {
+      "epoch": 0.16344767944652638,
+      "grad_norm": 7.092876434326172,
+      "learning_rate": 3.5892514395393476e-05,
+      "loss": 2.3349,
+      "step": 189
+    },
+    {
+      "epoch": 0.1643124819832805,
+      "grad_norm": 6.997330188751221,
+      "learning_rate": 3.608445297504799e-05,
+      "loss": 1.1459,
+      "step": 190
+    },
+    {
+      "epoch": 0.16517728452003458,
+      "grad_norm": 9.205595016479492,
+      "learning_rate": 3.6276391554702496e-05,
+      "loss": 1.313,
+      "step": 191
+    },
+    {
+      "epoch": 0.1660420870567887,
+      "grad_norm": 6.776134014129639,
+      "learning_rate": 3.646833013435701e-05,
+      "loss": 1.1422,
+      "step": 192
+    },
+    {
+      "epoch": 0.1669068895935428,
+      "grad_norm": 9.902478218078613,
+      "learning_rate": 3.6660268714011516e-05,
+      "loss": 1.4937,
+      "step": 193
+    },
+    {
+      "epoch": 0.1677716921302969,
+      "grad_norm": 8.630653381347656,
+      "learning_rate": 3.685220729366603e-05,
+      "loss": 1.351,
+      "step": 194
+    },
+    {
+      "epoch": 0.16863649466705102,
+      "grad_norm": 8.957950592041016,
+      "learning_rate": 3.704414587332054e-05,
+      "loss": 1.1581,
+      "step": 195
+    },
+    {
+      "epoch": 0.16950129720380513,
+      "grad_norm": 8.303983688354492,
+      "learning_rate": 3.723608445297505e-05,
+      "loss": 2.1473,
+      "step": 196
+    },
+    {
+      "epoch": 0.17036609974055925,
+      "grad_norm": 8.272674560546875,
+      "learning_rate": 3.7428023032629563e-05,
+      "loss": 0.8801,
+      "step": 197
+    },
+    {
+      "epoch": 0.17123090227731336,
+      "grad_norm": 7.904557228088379,
+      "learning_rate": 3.761996161228407e-05,
+      "loss": 1.3985,
+      "step": 198
+    },
+    {
+      "epoch": 0.17209570481406747,
+      "grad_norm": 5.652804851531982,
+      "learning_rate": 3.7811900191938584e-05,
+      "loss": 0.8468,
+      "step": 199
+    },
+    {
+      "epoch": 0.17296050735082155,
+      "grad_norm": 5.771730422973633,
+      "learning_rate": 3.800383877159309e-05,
+      "loss": 1.0563,
+      "step": 200
+    },
+    {
+      "epoch": 0.17382530988757566,
+      "grad_norm": 6.634278297424316,
+      "learning_rate": 3.8195777351247604e-05,
+      "loss": 0.9612,
+      "step": 201
+    },
+    {
+      "epoch": 0.17469011242432977,
+      "grad_norm": 8.659712791442871,
+      "learning_rate": 3.838771593090211e-05,
+      "loss": 1.665,
+      "step": 202
+    },
+    {
+      "epoch": 0.17555491496108389,
+      "grad_norm": 6.617002487182617,
+      "learning_rate": 3.8579654510556624e-05,
+      "loss": 1.1505,
+      "step": 203
+    },
+    {
+      "epoch": 0.176419717497838,
+      "grad_norm": 10.3783597946167,
+      "learning_rate": 3.877159309021113e-05,
+      "loss": 1.7958,
+      "step": 204
+    },
+    {
+      "epoch": 0.1772845200345921,
+      "grad_norm": 9.473942756652832,
+      "learning_rate": 3.8963531669865644e-05,
+      "loss": 1.3115,
+      "step": 205
+    },
+    {
+      "epoch": 0.17814932257134622,
+      "grad_norm": 7.500204563140869,
+      "learning_rate": 3.915547024952016e-05,
+      "loss": 1.0855,
+      "step": 206
+    },
+    {
+      "epoch": 0.17901412510810033,
+      "grad_norm": 6.897130012512207,
+      "learning_rate": 3.9347408829174664e-05,
+      "loss": 1.1051,
+      "step": 207
+    },
+    {
+      "epoch": 0.17987892764485441,
+      "grad_norm": 9.034842491149902,
+      "learning_rate": 3.953934740882918e-05,
+      "loss": 2.5371,
+      "step": 208
+    },
+    {
+      "epoch": 0.18074373018160853,
+      "grad_norm": 9.812570571899414,
+      "learning_rate": 3.9731285988483684e-05,
+      "loss": 1.7992,
+      "step": 209
+    },
+    {
+      "epoch": 0.18160853271836264,
+      "grad_norm": 7.528004169464111,
+      "learning_rate": 3.99232245681382e-05,
+      "loss": 1.7798,
+      "step": 210
+    },
+    {
+      "epoch": 0.18247333525511675,
+      "grad_norm": 7.52139139175415,
+      "learning_rate": 4.0115163147792705e-05,
+      "loss": 0.7093,
+      "step": 211
+    },
+    {
+      "epoch": 0.18333813779187086,
+      "grad_norm": 9.2921142578125,
+      "learning_rate": 4.030710172744722e-05,
+      "loss": 1.2681,
+      "step": 212
+    },
+    {
+      "epoch": 0.18420294032862497,
+      "grad_norm": 4.883711814880371,
+      "learning_rate": 4.049904030710173e-05,
+      "loss": 0.911,
+      "step": 213
+    },
+    {
+      "epoch": 0.18506774286537908,
+      "grad_norm": 8.103593826293945,
+      "learning_rate": 4.0690978886756245e-05,
+      "loss": 1.1144,
+      "step": 214
+    },
+    {
+      "epoch": 0.1859325454021332,
+      "grad_norm": 6.5846381187438965,
+      "learning_rate": 4.088291746641075e-05,
+      "loss": 0.8362,
+      "step": 215
+    },
+    {
+      "epoch": 0.18679734793888728,
+      "grad_norm": 5.238864421844482,
+      "learning_rate": 4.1074856046065265e-05,
+      "loss": 0.838,
+      "step": 216
+    },
+    {
+      "epoch": 0.1876621504756414,
+      "grad_norm": 7.091164588928223,
+      "learning_rate": 4.126679462571977e-05,
+      "loss": 1.3143,
+      "step": 217
+    },
+    {
+      "epoch": 0.1885269530123955,
+      "grad_norm": 4.529580116271973,
+      "learning_rate": 4.1458733205374285e-05,
+      "loss": 0.8799,
+      "step": 218
+    },
+    {
+      "epoch": 0.1893917555491496,
+      "grad_norm": 5.912927627563477,
+      "learning_rate": 4.165067178502879e-05,
+      "loss": 0.7928,
+      "step": 219
+    },
+    {
+      "epoch": 0.19025655808590372,
+      "grad_norm": 7.802720069885254,
+      "learning_rate": 4.1842610364683305e-05,
+      "loss": 0.7077,
+      "step": 220
+    },
+    {
+      "epoch": 0.19112136062265783,
+      "grad_norm": 7.49670934677124,
+      "learning_rate": 4.203454894433781e-05,
+      "loss": 1.7815,
+      "step": 221
+    },
+    {
+      "epoch": 0.19198616315941194,
+      "grad_norm": 5.978695392608643,
+      "learning_rate": 4.2226487523992326e-05,
+      "loss": 1.3599,
+      "step": 222
+    },
+    {
+      "epoch": 0.19285096569616605,
+      "grad_norm": 8.289727210998535,
+      "learning_rate": 4.241842610364683e-05,
+      "loss": 0.7413,
+      "step": 223
+    },
+    {
+      "epoch": 0.19371576823292014,
+      "grad_norm": 7.663917541503906,
+      "learning_rate": 4.2610364683301346e-05,
+      "loss": 1.9959,
+      "step": 224
+    },
+    {
+      "epoch": 0.19458057076967425,
+      "grad_norm": 9.845619201660156,
+      "learning_rate": 4.280230326295586e-05,
+      "loss": 1.9112,
+      "step": 225
+    },
+    {
+      "epoch": 0.19544537330642836,
+      "grad_norm": 5.703056812286377,
+      "learning_rate": 4.2994241842610366e-05,
+      "loss": 0.5033,
+      "step": 226
+    },
+    {
+      "epoch": 0.19631017584318247,
+      "grad_norm": 9.209814071655273,
+      "learning_rate": 4.318618042226488e-05,
+      "loss": 1.1669,
+      "step": 227
+    },
+    {
+      "epoch": 0.19717497837993658,
+      "grad_norm": 8.577181816101074,
+      "learning_rate": 4.3378119001919386e-05,
+      "loss": 1.2109,
+      "step": 228
+    },
+    {
+      "epoch": 0.1980397809166907,
+      "grad_norm": 7.078784942626953,
+      "learning_rate": 4.35700575815739e-05,
+      "loss": 0.781,
+      "step": 229
+    },
+    {
+      "epoch": 0.1989045834534448,
+      "grad_norm": 9.162598609924316,
+      "learning_rate": 4.3761996161228406e-05,
+      "loss": 1.5895,
+      "step": 230
+    },
+    {
+      "epoch": 0.1989045834534448,
+      "eval_Qnli-dev-1024_cosine_accuracy": 0.7395833333333334,
+      "eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.867725133895874,
+      "eval_Qnli-dev-1024_cosine_ap": 0.713229712410124,
+      "eval_Qnli-dev-1024_cosine_f1": 0.7291666666666667,
+      "eval_Qnli-dev-1024_cosine_f1_threshold": 0.8537728786468506,
+      "eval_Qnli-dev-1024_cosine_mcc": 0.46405228758169936,
+      "eval_Qnli-dev-1024_cosine_precision": 0.6862745098039216,
+      "eval_Qnli-dev-1024_cosine_recall": 0.7777777777777778,
+      "eval_Qnli-dev_cosine_accuracy": 0.7291666666666666,
+      "eval_Qnli-dev_cosine_accuracy_threshold": 0.8137844204902649,
+      "eval_Qnli-dev_cosine_ap": 0.7513782450871136,
+      "eval_Qnli-dev_cosine_f1": 0.7222222222222222,
+      "eval_Qnli-dev_cosine_f1_threshold": 0.7686975002288818,
+      "eval_Qnli-dev_cosine_mcc": 0.41614558708189836,
+      "eval_Qnli-dev_cosine_precision": 0.6190476190476191,
+      "eval_Qnli-dev_cosine_recall": 0.8666666666666667,
+      "eval_allNLI--triplets-1024_cosine_accuracy": 0.9270833134651184,
+      "eval_allNLI-triplets_cosine_accuracy": 0.9791666865348816,
+      "eval_global_dataset_loss": 0.7282267808914185,
+      "eval_global_dataset_runtime": 67.7277,
+      "eval_global_dataset_samples_per_second": 14.352,
+      "eval_global_dataset_steps_per_second": 0.31,
+      "eval_sequential_score": 0.9270833134651184,
+      "eval_sts-test-1024_pearson_cosine": 0.8484219629681994,
+      "eval_sts-test-1024_spearman_cosine": 0.8984444397927454,
+      "eval_sts-test_pearson_cosine": 0.9066337545995211,
+      "eval_sts-test_spearman_cosine": 0.9170443296862147,
+      "step": 230
+    },
+    {
+      "epoch": 0.19976938599019892,
+      "grad_norm": 6.589021682739258,
+      "learning_rate": 4.395393474088292e-05,
+      "loss": 1.2439,
+      "step": 231
+    },
+    {
+      "epoch": 0.200634188526953,
+      "grad_norm": 4.9722089767456055,
+      "learning_rate": 4.4145873320537426e-05,
+      "loss": 0.5947,
+      "step": 232
+    },
+    {
+      "epoch": 0.2014989910637071,
+      "grad_norm": 6.424257278442383,
+      "learning_rate": 4.433781190019194e-05,
+      "loss": 1.1687,
+      "step": 233
+    },
+    {
+      "epoch": 0.20236379360046122,
+      "grad_norm": 10.21776008605957,
+      "learning_rate": 4.4529750479846447e-05,
+      "loss": 2.082,
+      "step": 234
+    },
+    {
+      "epoch": 0.20322859613721533,
+      "grad_norm": 6.3251633644104,
+      "learning_rate": 4.472168905950096e-05,
+      "loss": 0.521,
+      "step": 235
+    },
+    {
+      "epoch": 0.20409339867396945,
+      "grad_norm": 6.459076881408691,
+      "learning_rate": 4.491362763915547e-05,
+      "loss": 1.2406,
+      "step": 236
+    },
+    {
+      "epoch": 0.20495820121072356,
+      "grad_norm": 6.254432201385498,
+      "learning_rate": 4.510556621880998e-05,
+      "loss": 0.6586,
+      "step": 237
+    },
+    {
+      "epoch": 0.20582300374747767,
+      "grad_norm": 6.352238655090332,
+      "learning_rate": 4.5297504798464494e-05,
+      "loss": 0.6746,
+      "step": 238
+    },
+    {
+      "epoch": 0.20668780628423178,
+      "grad_norm": 4.247053146362305,
+      "learning_rate": 4.548944337811901e-05,
+      "loss": 0.3925,
+      "step": 239
+    },
+    {
+      "epoch": 0.20755260882098586,
+      "grad_norm": 6.61681604385376,
+      "learning_rate": 4.5681381957773514e-05,
+      "loss": 0.8654,
+      "step": 240
+    },
+    {
+      "epoch": 0.20841741135773997,
+      "grad_norm": 7.9061408042907715,
+      "learning_rate": 4.587332053742803e-05,
+      "loss": 0.6723,
+      "step": 241
+    },
+    {
+      "epoch": 0.20928221389449408,
+      "grad_norm": 3.9183671474456787,
+      "learning_rate": 4.606525911708254e-05,
+      "loss": 0.4345,
+      "step": 242
+    },
+    {
+      "epoch": 0.2101470164312482,
+      "grad_norm": 8.863993644714355,
+      "learning_rate": 4.625719769673705e-05,
+      "loss": 1.0822,
+      "step": 243
+    },
+    {
+      "epoch": 0.2110118189680023,
+      "grad_norm": 8.070558547973633,
+      "learning_rate": 4.644913627639156e-05,
+      "loss": 1.5697,
+      "step": 244
+    },
+    {
+      "epoch": 0.21187662150475642,
+      "grad_norm": 3.8370699882507324,
+      "learning_rate": 4.664107485604607e-05,
+      "loss": 0.3771,
+      "step": 245
+    },
+    {
+      "epoch": 0.21274142404151053,
+      "grad_norm": 4.8743486404418945,
+      "learning_rate": 4.683301343570058e-05,
+      "loss": 0.484,
+      "step": 246
+    },
+    {
+      "epoch": 0.21360622657826464,
+      "grad_norm": 6.827274322509766,
+      "learning_rate": 4.702495201535509e-05,
+      "loss": 1.0994,
+      "step": 247
+    },
+    {
+      "epoch": 0.21447102911501872,
+      "grad_norm": 6.400326251983643,
+      "learning_rate": 4.72168905950096e-05,
+      "loss": 1.0543,
+      "step": 248
+    },
+    {
+      "epoch": 0.21533583165177284,
+      "grad_norm": 9.760299682617188,
+      "learning_rate": 4.740882917466411e-05,
+      "loss": 1.1132,
+      "step": 249
+    },
+    {
+      "epoch": 0.21620063418852695,
+      "grad_norm": 9.413398742675781,
+      "learning_rate": 4.760076775431862e-05,
+      "loss": 1.7051,
+      "step": 250
+    },
+    {
+      "epoch": 0.21706543672528106,
+      "grad_norm": 6.986111164093018,
+      "learning_rate": 4.779270633397313e-05,
+      "loss": 0.5029,
+      "step": 251
+    },
+    {
+      "epoch": 0.21793023926203517,
+      "grad_norm": 11.26386547088623,
+      "learning_rate": 4.798464491362764e-05,
+      "loss": 1.5927,
+      "step": 252
+    },
+    {
+      "epoch": 0.21879504179878928,
+      "grad_norm": 5.758693695068359,
+      "learning_rate": 4.817658349328215e-05,
+      "loss": 0.9221,
+      "step": 253
+    },
+    {
+      "epoch": 0.2196598443355434,
+      "grad_norm": 6.061553478240967,
+      "learning_rate": 4.836852207293666e-05,
+      "loss": 0.989,
+      "step": 254
+    },
+    {
+      "epoch": 0.2205246468722975,
+      "grad_norm": 7.509443759918213,
+      "learning_rate": 4.8560460652591175e-05,
+      "loss": 1.9468,
+      "step": 255
+    },
+    {
+      "epoch": 0.2213894494090516,
+      "grad_norm": 7.857194900512695,
+      "learning_rate": 4.875239923224568e-05,
+      "loss": 0.7299,
+      "step": 256
+    },
+    {
+      "epoch": 0.2222542519458057,
+      "grad_norm": 9.96574592590332,
+      "learning_rate": 4.8944337811900195e-05,
+      "loss": 1.0199,
+      "step": 257
+    },
+    {
+      "epoch": 0.2231190544825598,
+      "grad_norm": 8.403667449951172,
+      "learning_rate": 4.91362763915547e-05,
+      "loss": 1.0238,
+      "step": 258
+    },
+    {
+      "epoch": 0.22398385701931392,
+      "grad_norm": 8.612835884094238,
+      "learning_rate": 4.9328214971209215e-05,
+      "loss": 1.8386,
+      "step": 259
+    },
+    {
+      "epoch": 0.22484865955606803,
+      "grad_norm": 7.690261363983154,
+      "learning_rate": 4.952015355086372e-05,
+      "loss": 0.7887,
+      "step": 260
+    },
+    {
+      "epoch": 0.22571346209282214,
+      "grad_norm": 9.24271011352539,
+      "learning_rate": 4.9712092130518236e-05,
+      "loss": 1.0248,
+      "step": 261
+    },
+    {
+      "epoch": 0.22657826462957625,
+      "grad_norm": 6.5738525390625,
+      "learning_rate": 4.990403071017274e-05,
+      "loss": 0.9891,
+      "step": 262
+    },
+    {
+      "epoch": 0.22744306716633036,
+      "grad_norm": 10.909134864807129,
+      "learning_rate": 5.009596928982726e-05,
+      "loss": 2.007,
+      "step": 263
+    },
+    {
+      "epoch": 0.22830786970308448,
+      "grad_norm": 7.512816905975342,
+      "learning_rate": 5.028790786948176e-05,
+      "loss": 1.6522,
+      "step": 264
+    },
+    {
+      "epoch": 0.22917267223983856,
+      "grad_norm": 4.3134446144104,
+      "learning_rate": 5.047984644913628e-05,
+      "loss": 0.8482,
+      "step": 265
+    },
+    {
+      "epoch": 0.23003747477659267,
+      "grad_norm": 6.679250240325928,
+      "learning_rate": 5.067178502879078e-05,
+      "loss": 0.7231,
+      "step": 266
+    },
+    {
+      "epoch": 0.23090227731334678,
+      "grad_norm": 8.060896873474121,
+      "learning_rate": 5.08637236084453e-05,
+      "loss": 0.9017,
+      "step": 267
+    },
+    {
+      "epoch": 0.2317670798501009,
+      "grad_norm": 10.473666191101074,
+      "learning_rate": 5.105566218809981e-05,
+      "loss": 1.2073,
+      "step": 268
+    },
+    {
+      "epoch": 0.232631882386855,
+      "grad_norm": 5.640207290649414,
+      "learning_rate": 5.124760076775432e-05,
+      "loss": 0.3825,
+      "step": 269
+    },
+    {
+      "epoch": 0.23349668492360912,
+      "grad_norm": 7.310571193695068,
+      "learning_rate": 5.143953934740883e-05,
+      "loss": 0.6634,
+      "step": 270
+    },
+    {
+      "epoch": 0.23436148746036323,
+      "grad_norm": 10.224222183227539,
+      "learning_rate": 5.163147792706334e-05,
+      "loss": 1.3564,
+      "step": 271
+    },
+    {
+      "epoch": 0.23522628999711734,
+      "grad_norm": 4.993323802947998,
+      "learning_rate": 5.182341650671785e-05,
+      "loss": 1.1294,
+      "step": 272
+    },
+    {
+      "epoch": 0.23609109253387142,
+      "grad_norm": 6.149577617645264,
+      "learning_rate": 5.201535508637236e-05,
+      "loss": 0.5599,
+      "step": 273
+    },
+    {
+      "epoch": 0.23695589507062553,
+      "grad_norm": 6.756112098693848,
+      "learning_rate": 5.220729366602687e-05,
+      "loss": 0.6844,
+      "step": 274
+    },
+    {
+      "epoch": 0.23782069760737964,
+      "grad_norm": 8.450921058654785,
+      "learning_rate": 5.2399232245681383e-05,
+      "loss": 0.7783,
+      "step": 275
+    },
+    {
+      "epoch": 0.23868550014413376,
+      "grad_norm": 7.2079267501831055,
+      "learning_rate": 5.2591170825335904e-05,
+      "loss": 1.101,
+      "step": 276
+    },
+    {
+      "epoch": 0.23955030268088787,
+      "grad_norm": 6.447202205657959,
+      "learning_rate": 5.2783109404990404e-05,
+      "loss": 0.9447,
+      "step": 277
+    },
+    {
+      "epoch": 0.24041510521764198,
+      "grad_norm": 10.80993366241455,
+      "learning_rate": 5.2975047984644924e-05,
+      "loss": 2.4452,
+      "step": 278
+    },
+    {
+      "epoch": 0.2412799077543961,
+      "grad_norm": 7.458428859710693,
+      "learning_rate": 5.3166986564299424e-05,
+      "loss": 1.2032,
+      "step": 279
+    },
+    {
+      "epoch": 0.2421447102911502,
+      "grad_norm": 11.762413024902344,
+      "learning_rate": 5.3358925143953944e-05,
+      "loss": 1.9775,
+      "step": 280
+    },
+    {
+      "epoch": 0.24300951282790428,
+      "grad_norm": 6.029952049255371,
+      "learning_rate": 5.3550863723608444e-05,
+      "loss": 0.523,
+      "step": 281
+    },
+    {
+      "epoch": 0.2438743153646584,
+      "grad_norm": 7.083131313323975,
+      "learning_rate": 5.3742802303262964e-05,
+      "loss": 0.6166,
+      "step": 282
+    },
+    {
+      "epoch": 0.2447391179014125,
+      "grad_norm": 8.343469619750977,
+      "learning_rate": 5.3934740882917464e-05,
+      "loss": 0.7902,
+      "step": 283
+    },
+    {
+      "epoch": 0.24560392043816662,
+      "grad_norm": 11.58956241607666,
+      "learning_rate": 5.4126679462571984e-05,
+      "loss": 1.1019,
+      "step": 284
+    },
+    {
+      "epoch": 0.24646872297492073,
+      "grad_norm": 6.451682090759277,
+      "learning_rate": 5.431861804222649e-05,
+      "loss": 1.1185,
+      "step": 285
+    },
+    {
+      "epoch": 0.24733352551167484,
+      "grad_norm": 8.293807983398438,
+      "learning_rate": 5.4510556621881004e-05,
+      "loss": 0.7051,
+      "step": 286
+    },
+    {
+      "epoch": 0.24819832804842895,
+      "grad_norm": 6.799464702606201,
+      "learning_rate": 5.470249520153551e-05,
+      "loss": 1.076,
+      "step": 287
+    },
+    {
+      "epoch": 0.24906313058518306,
+      "grad_norm": 6.457718849182129,
+      "learning_rate": 5.4894433781190025e-05,
+      "loss": 1.5065,
+      "step": 288
+    },
+    {
+      "epoch": 0.24992793312193715,
+      "grad_norm": 8.503544807434082,
+      "learning_rate": 5.508637236084453e-05,
+      "loss": 0.9986,
+      "step": 289
+    },
+    {
+      "epoch": 0.25079273565869126,
+      "grad_norm": 8.062347412109375,
+      "learning_rate": 5.5278310940499045e-05,
+      "loss": 1.1196,
+      "step": 290
+    },
+    {
+      "epoch": 0.2516575381954454,
+      "grad_norm": 5.3419508934021,
+      "learning_rate": 5.547024952015355e-05,
+      "loss": 0.7055,
+      "step": 291
+    },
+    {
+      "epoch": 0.2525223407321995,
+      "grad_norm": 3.2817585468292236,
+      "learning_rate": 5.5662188099808065e-05,
+      "loss": 0.2865,
+      "step": 292
+    },
+    {
+      "epoch": 0.25338714326895356,
+      "grad_norm": 8.452672004699707,
+      "learning_rate": 5.585412667946257e-05,
+      "loss": 0.6973,
+      "step": 293
+    },
+    {
+      "epoch": 0.2542519458057077,
+      "grad_norm": 9.172618865966797,
+      "learning_rate": 5.6046065259117085e-05,
+      "loss": 1.0347,
+      "step": 294
+    },
+    {
+      "epoch": 0.2551167483424618,
+      "grad_norm": 7.101957321166992,
+      "learning_rate": 5.623800383877159e-05,
+      "loss": 0.5065,
+      "step": 295
+    },
+    {
+      "epoch": 0.2559815508792159,
+      "grad_norm": 8.655692100524902,
+      "learning_rate": 5.6429942418426105e-05,
+      "loss": 0.7479,
+      "step": 296
+    },
+    {
+      "epoch": 0.25684635341597,
+      "grad_norm": 6.224137306213379,
+      "learning_rate": 5.662188099808061e-05,
+      "loss": 0.5214,
+      "step": 297
+    },
+    {
+      "epoch": 0.25771115595272415,
+      "grad_norm": 5.057961463928223,
+      "learning_rate": 5.6813819577735125e-05,
+      "loss": 0.4925,
+      "step": 298
+    },
+    {
+      "epoch": 0.25857595848947823,
+      "grad_norm": 5.989309787750244,
+      "learning_rate": 5.700575815738963e-05,
+      "loss": 0.9331,
+      "step": 299
+    },
+    {
+      "epoch": 0.25944076102623237,
+      "grad_norm": 5.4001336097717285,
+      "learning_rate": 5.7197696737044146e-05,
+      "loss": 0.4239,
+      "step": 300
+    },
+    {
+      "epoch": 0.26030556356298645,
+      "grad_norm": 8.392406463623047,
+      "learning_rate": 5.7389635316698666e-05,
+      "loss": 0.7426,
+      "step": 301
+    },
+    {
+      "epoch": 0.26117036609974054,
+      "grad_norm": 9.140869140625,
+      "learning_rate": 5.758157389635317e-05,
+      "loss": 1.292,
+      "step": 302
+    },
+    {
+      "epoch": 0.2620351686364947,
+      "grad_norm": 5.900636196136475,
+      "learning_rate": 5.7773512476007686e-05,
+      "loss": 1.1471,
+      "step": 303
+    },
+    {
+      "epoch": 0.26289997117324876,
+      "grad_norm": 2.76983904838562,
+      "learning_rate": 5.796545105566219e-05,
+      "loss": 0.5639,
+      "step": 304
+    },
+    {
+      "epoch": 0.2637647737100029,
+      "grad_norm": 8.212996482849121,
+      "learning_rate": 5.8157389635316706e-05,
+      "loss": 1.592,
+      "step": 305
+    },
+    {
+      "epoch": 0.264629576246757,
+      "grad_norm": 6.7358174324035645,
+      "learning_rate": 5.834932821497121e-05,
+      "loss": 0.6063,
+      "step": 306
+    },
+    {
+      "epoch": 0.2654943787835111,
+      "grad_norm": 9.422693252563477,
+      "learning_rate": 5.8541266794625726e-05,
+      "loss": 0.665,
+      "step": 307
+    },
+    {
+      "epoch": 0.2663591813202652,
+      "grad_norm": 10.346942901611328,
+      "learning_rate": 5.873320537428023e-05,
+      "loss": 0.7966,
+      "step": 308
+    },
+    {
+      "epoch": 0.2672239838570193,
+      "grad_norm": 8.950202941894531,
+      "learning_rate": 5.8925143953934746e-05,
+      "loss": 0.6255,
+      "step": 309
+    },
+    {
+      "epoch": 0.2680887863937734,
+      "grad_norm": 6.519852638244629,
+      "learning_rate": 5.911708253358925e-05,
+      "loss": 0.7197,
+      "step": 310
+    },
+    {
+      "epoch": 0.2689535889305275,
+      "grad_norm": 12.285760879516602,
+      "learning_rate": 5.9309021113243767e-05,
+      "loss": 2.22,
+      "step": 311
+    },
+    {
+      "epoch": 0.26981839146728165,
+      "grad_norm": 9.598986625671387,
+      "learning_rate": 5.950095969289827e-05,
+      "loss": 0.7472,
+      "step": 312
+    },
+    {
+      "epoch": 0.27068319400403573,
+      "grad_norm": 13.030138969421387,
+      "learning_rate": 5.969289827255279e-05,
+      "loss": 1.0278,
+      "step": 313
+    },
+    {
+      "epoch": 0.27154799654078987,
+      "grad_norm": 9.371500015258789,
+      "learning_rate": 5.9884836852207293e-05,
+      "loss": 0.6434,
+      "step": 314
+    },
+    {
+      "epoch": 0.27241279907754395,
+      "grad_norm": 7.387608528137207,
+      "learning_rate": 6.007677543186181e-05,
+      "loss": 0.4596,
+      "step": 315
+    },
+    {
+      "epoch": 0.2732776016142981,
+      "grad_norm": 6.994756698608398,
+      "learning_rate": 6.0268714011516314e-05,
+      "loss": 0.5547,
+      "step": 316
+    },
+    {
+      "epoch": 0.2741424041510522,
+      "grad_norm": 7.713170528411865,
+      "learning_rate": 6.046065259117083e-05,
+      "loss": 1.2906,
+      "step": 317
+    },
+    {
+      "epoch": 0.27500720668780626,
+      "grad_norm": 12.936992645263672,
+      "learning_rate": 6.0652591170825334e-05,
+      "loss": 2.2893,
+      "step": 318
+    },
+    {
+      "epoch": 0.2758720092245604,
+      "grad_norm": 12.210866928100586,
+      "learning_rate": 6.084452975047985e-05,
+      "loss": 2.0067,
+      "step": 319
+    },
+    {
+      "epoch": 0.2767368117613145,
+      "grad_norm": 9.767999649047852,
+      "learning_rate": 6.103646833013436e-05,
+      "loss": 1.0523,
+      "step": 320
+    },
+    {
+      "epoch": 0.2776016142980686,
+      "grad_norm": 10.349803924560547,
+      "learning_rate": 6.122840690978887e-05,
+      "loss": 1.425,
+      "step": 321
+    },
+    {
+      "epoch": 0.2784664168348227,
+      "grad_norm": 8.848223686218262,
+      "learning_rate": 6.142034548944337e-05,
+      "loss": 1.0846,
+      "step": 322
+    },
+    {
+      "epoch": 0.27933121937157684,
+      "grad_norm": 12.004369735717773,
+      "learning_rate": 6.16122840690979e-05,
+      "loss": 1.614,
+      "step": 323
+    },
+    {
+      "epoch": 0.28019602190833093,
+      "grad_norm": 4.841424465179443,
+      "learning_rate": 6.18042226487524e-05,
+      "loss": 0.831,
+      "step": 324
+    },
+    {
+      "epoch": 0.281060824445085,
+      "grad_norm": 10.002786636352539,
+      "learning_rate": 6.199616122840691e-05,
+      "loss": 0.8297,
+      "step": 325
+    },
+    {
+      "epoch": 0.28192562698183915,
+      "grad_norm": 6.301035404205322,
+      "learning_rate": 6.218809980806143e-05,
+      "loss": 0.5425,
+      "step": 326
+    },
+    {
+      "epoch": 0.28279042951859323,
+      "grad_norm": 5.8098626136779785,
+      "learning_rate": 6.238003838771593e-05,
+      "loss": 0.6583,
+      "step": 327
+    },
+    {
+      "epoch": 0.2836552320553474,
+      "grad_norm": 5.272045135498047,
+      "learning_rate": 6.257197696737045e-05,
+      "loss": 1.0148,
+      "step": 328
+    },
+    {
+      "epoch": 0.28452003459210146,
+      "grad_norm": 8.22673511505127,
+      "learning_rate": 6.276391554702495e-05,
+      "loss": 1.4798,
+      "step": 329
+    },
+    {
+      "epoch": 0.2853848371288556,
+      "grad_norm": 3.6933820247650146,
+      "learning_rate": 6.295585412667947e-05,
+      "loss": 0.3907,
+      "step": 330
+    },
+    {
+      "epoch": 0.2862496396656097,
+      "grad_norm": 9.97194766998291,
+      "learning_rate": 6.314779270633397e-05,
+      "loss": 1.2206,
+      "step": 331
+    },
+    {
+      "epoch": 0.2871144422023638,
+      "grad_norm": 3.41243577003479,
+      "learning_rate": 6.33397312859885e-05,
+      "loss": 0.6509,
+      "step": 332
+    },
+    {
+      "epoch": 0.2879792447391179,
+      "grad_norm": 5.184510231018066,
+      "learning_rate": 6.3531669865643e-05,
+      "loss": 0.5982,
+      "step": 333
+    },
+    {
+      "epoch": 0.288844047275872,
+      "grad_norm": 6.894106864929199,
+      "learning_rate": 6.372360844529751e-05,
+      "loss": 1.066,
+      "step": 334
+    },
+    {
+      "epoch": 0.2897088498126261,
+      "grad_norm": 6.806879997253418,
+      "learning_rate": 6.391554702495202e-05,
+      "loss": 0.6874,
+      "step": 335
+    },
+    {
+      "epoch": 0.2905736523493802,
+      "grad_norm": 4.7376933097839355,
+      "learning_rate": 6.410748560460654e-05,
+      "loss": 0.2232,
+      "step": 336
+    },
+    {
+      "epoch": 0.29143845488613435,
+      "grad_norm": 7.3895745277404785,
+      "learning_rate": 6.429942418426104e-05,
+      "loss": 0.8978,
+      "step": 337
+    },
+    {
+      "epoch": 0.29230325742288843,
+      "grad_norm": 4.52320671081543,
+      "learning_rate": 6.449136276391555e-05,
+      "loss": 0.5689,
+      "step": 338
+    },
+    {
+      "epoch": 0.29316805995964257,
+      "grad_norm": 10.309342384338379,
+      "learning_rate": 6.468330134357006e-05,
+      "loss": 1.1131,
+      "step": 339
+    },
+    {
+      "epoch": 0.29403286249639665,
+      "grad_norm": 7.698537826538086,
+      "learning_rate": 6.487523992322458e-05,
+      "loss": 0.4493,
+      "step": 340
+    },
+    {
+      "epoch": 0.2948976650331508,
+      "grad_norm": 9.31425952911377,
+      "learning_rate": 6.506717850287908e-05,
+      "loss": 0.5409,
+      "step": 341
+    },
+    {
+      "epoch": 0.2957624675699049,
+      "grad_norm": 3.6749117374420166,
+      "learning_rate": 6.525911708253359e-05,
+      "loss": 0.5921,
+      "step": 342
+    },
+    {
+      "epoch": 0.29662727010665896,
+      "grad_norm": 8.300640106201172,
+      "learning_rate": 6.54510556621881e-05,
+      "loss": 0.6657,
+      "step": 343
+    },
+    {
+      "epoch": 0.2974920726434131,
+      "grad_norm": 7.509027481079102,
+      "learning_rate": 6.564299424184262e-05,
+      "loss": 0.8345,
+      "step": 344
+    },
+    {
+      "epoch": 0.2983568751801672,
+      "grad_norm": 6.161888122558594,
+      "learning_rate": 6.583493282149712e-05,
+      "loss": 0.9418,
+      "step": 345
+    },
+    {
+      "epoch": 0.2983568751801672,
+      "eval_Qnli-dev-1024_cosine_accuracy": 0.7291666666666666,
+      "eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.8395562767982483,
+      "eval_Qnli-dev-1024_cosine_ap": 0.753054394869091,
+      "eval_Qnli-dev-1024_cosine_f1": 0.7216494845360825,
+      "eval_Qnli-dev-1024_cosine_f1_threshold": 0.8208398818969727,
+      "eval_Qnli-dev-1024_cosine_mcc": 0.44512380090846426,
+      "eval_Qnli-dev-1024_cosine_precision": 0.6730769230769231,
+      "eval_Qnli-dev-1024_cosine_recall": 0.7777777777777778,
+      "eval_Qnli-dev_cosine_accuracy": 0.7291666666666666,
+      "eval_Qnli-dev_cosine_accuracy_threshold": 0.7856455445289612,
+      "eval_Qnli-dev_cosine_ap": 0.7529763141762885,
+      "eval_Qnli-dev_cosine_f1": 0.7169811320754719,
+      "eval_Qnli-dev_cosine_f1_threshold": 0.7426920533180237,
+      "eval_Qnli-dev_cosine_mcc": 0.4079411028893153,
+      "eval_Qnli-dev_cosine_precision": 0.6229508196721312,
+      "eval_Qnli-dev_cosine_recall": 0.8444444444444444,
+      "eval_allNLI--triplets-1024_cosine_accuracy": 0.9270833134651184,
+      "eval_allNLI-triplets_cosine_accuracy": 0.9791666865348816,
+      "eval_global_dataset_loss": 0.6650346517562866,
+      "eval_global_dataset_runtime": 67.8938,
+      "eval_global_dataset_samples_per_second": 14.316,
+      "eval_global_dataset_steps_per_second": 0.309,
+      "eval_sequential_score": 0.9270833134651184,
+      "eval_sts-test-1024_pearson_cosine": 0.8520971782224942,
+      "eval_sts-test-1024_spearman_cosine": 0.894236476710775,
+      "eval_sts-test_pearson_cosine": 0.9080363785366253,
+      "eval_sts-test_spearman_cosine": 0.9193020252854658,
+      "step": 345
+    },
+    {
+      "epoch": 0.2992216777169213,
+      "grad_norm": 3.2981910705566406,
+      "learning_rate": 6.602687140115163e-05,
+      "loss": 0.3752,
+      "step": 346
+    },
+    {
+      "epoch": 0.3000864802536754,
+      "grad_norm": 5.650154113769531,
+      "learning_rate": 6.621880998080614e-05,
+      "loss": 0.362,
+      "step": 347
+    },
+    {
+      "epoch": 0.30095128279042954,
+      "grad_norm": 7.166718482971191,
+      "learning_rate": 6.641074856046066e-05,
+      "loss": 0.99,
+      "step": 348
+    },
+    {
+      "epoch": 0.3018160853271836,
+      "grad_norm": 5.6671295166015625,
+      "learning_rate": 6.660268714011516e-05,
+      "loss": 0.4309,
+      "step": 349
+    },
+    {
+      "epoch": 0.3026808878639377,
+      "grad_norm": 7.15688943862915,
+      "learning_rate": 6.679462571976968e-05,
+      "loss": 0.4101,
+      "step": 350
+    },
+    {
+      "epoch": 0.30354569040069185,
+      "grad_norm": 4.845415115356445,
+      "learning_rate": 6.698656429942419e-05,
+      "loss": 0.357,
+      "step": 351
+    },
+    {
+      "epoch": 0.30441049293744593,
+      "grad_norm": 6.766101360321045,
+      "learning_rate": 6.71785028790787e-05,
+      "loss": 0.4257,
+      "step": 352
+    },
+    {
+      "epoch": 0.30527529547420007,
+      "grad_norm": 9.900660514831543,
+      "learning_rate": 6.737044145873322e-05,
+      "loss": 0.6665,
+      "step": 353
+    },
+    {
+      "epoch": 0.30614009801095415,
+      "grad_norm": 4.632408142089844,
+      "learning_rate": 6.756238003838772e-05,
+      "loss": 0.4523,
+      "step": 354
+    },
+    {
+      "epoch": 0.3070049005477083,
+      "grad_norm": 9.697669982910156,
+      "learning_rate": 6.775431861804223e-05,
+      "loss": 1.4959,
+      "step": 355
+    },
+    {
+      "epoch": 0.3078697030844624,
+      "grad_norm": 9.970297813415527,
+      "learning_rate": 6.794625719769674e-05,
+      "loss": 0.833,
+      "step": 356
+    },
+    {
+      "epoch": 0.3087345056212165,
+      "grad_norm": 9.964993476867676,
+      "learning_rate": 6.813819577735126e-05,
+      "loss": 0.7928,
+      "step": 357
+    },
+    {
+      "epoch": 0.3095993081579706,
+      "grad_norm": 3.866377353668213,
+      "learning_rate": 6.833013435700576e-05,
+      "loss": 0.3277,
+      "step": 358
+    },
+    {
+      "epoch": 0.3104641106947247,
+      "grad_norm": 6.179882526397705,
+      "learning_rate": 6.852207293666027e-05,
+      "loss": 0.5336,
+      "step": 359
+    },
+    {
+      "epoch": 0.3113289132314788,
+      "grad_norm": 5.517486095428467,
+      "learning_rate": 6.871401151631478e-05,
+      "loss": 0.4663,
+      "step": 360
+    },
+    {
+      "epoch": 0.3121937157682329,
+      "grad_norm": 6.7118306159973145,
+      "learning_rate": 6.89059500959693e-05,
+      "loss": 0.5869,
+      "step": 361
+    },
+    {
+      "epoch": 0.31305851830498704,
+      "grad_norm": 8.203336715698242,
+      "learning_rate": 6.90978886756238e-05,
+      "loss": 0.6056,
+      "step": 362
+    },
+    {
+      "epoch": 0.3139233208417411,
+      "grad_norm": 4.762539863586426,
+      "learning_rate": 6.928982725527831e-05,
+      "loss": 0.5402,
+      "step": 363
+    },
+    {
+      "epoch": 0.31478812337849527,
+      "grad_norm": 5.39819860458374,
+      "learning_rate": 6.948176583493282e-05,
+      "loss": 1.046,
+      "step": 364
+    },
+    {
+      "epoch": 0.31565292591524935,
+      "grad_norm": 4.130873680114746,
+      "learning_rate": 6.967370441458734e-05,
+      "loss": 0.3102,
+      "step": 365
+    },
+    {
+      "epoch": 0.31651772845200343,
+      "grad_norm": 7.361220359802246,
+      "learning_rate": 6.986564299424184e-05,
+      "loss": 0.412,
+      "step": 366
+    },
+    {
+      "epoch": 0.31738253098875757,
+      "grad_norm": 7.686898708343506,
+      "learning_rate": 7.005758157389636e-05,
+      "loss": 0.5703,
+      "step": 367
+    },
+    {
+      "epoch": 0.31824733352551166,
+      "grad_norm": 10.829538345336914,
+      "learning_rate": 7.024952015355086e-05,
+      "loss": 1.6531,
+      "step": 368
+    },
+    {
+      "epoch": 0.3191121360622658,
+      "grad_norm": 5.71692419052124,
+      "learning_rate": 7.044145873320538e-05,
+      "loss": 0.4314,
+      "step": 369
+    },
+    {
+      "epoch": 0.3199769385990199,
+      "grad_norm": 8.669037818908691,
+      "learning_rate": 7.063339731285988e-05,
+      "loss": 0.7062,
+      "step": 370
+    },
+    {
+      "epoch": 0.320841741135774,
+      "grad_norm": 5.996104717254639,
+      "learning_rate": 7.08253358925144e-05,
+      "loss": 0.5788,
+      "step": 371
+    },
+    {
+      "epoch": 0.3217065436725281,
+      "grad_norm": 12.612412452697754,
+      "learning_rate": 7.10172744721689e-05,
+      "loss": 1.8529,
+      "step": 372
+    },
+    {
+      "epoch": 0.32257134620928224,
+      "grad_norm": 8.934858322143555,
+      "learning_rate": 7.120921305182342e-05,
+      "loss": 0.6606,
+      "step": 373
+    },
+    {
+      "epoch": 0.3234361487460363,
+      "grad_norm": 10.218025207519531,
+      "learning_rate": 7.140115163147793e-05,
+      "loss": 0.8089,
+      "step": 374
+    },
+    {
+      "epoch": 0.3243009512827904,
+      "grad_norm": 5.20566987991333,
+      "learning_rate": 7.159309021113245e-05,
+      "loss": 0.3905,
+      "step": 375
+    },
+    {
+      "epoch": 0.32516575381954455,
+      "grad_norm": 10.471417427062988,
+      "learning_rate": 7.178502879078695e-05,
+      "loss": 1.2417,
+      "step": 376
+    },
+    {
+      "epoch": 0.32603055635629863,
+      "grad_norm": 7.703388690948486,
+      "learning_rate": 7.197696737044146e-05,
+      "loss": 0.8738,
+      "step": 377
+    },
+    {
+      "epoch": 0.32689535889305277,
+      "grad_norm": 8.099038124084473,
+      "learning_rate": 7.216890595009598e-05,
+      "loss": 0.8544,
+      "step": 378
+    },
+    {
+      "epoch": 0.32776016142980685,
+      "grad_norm": 6.550043106079102,
+      "learning_rate": 7.236084452975049e-05,
+      "loss": 0.4667,
+      "step": 379
+    },
+    {
+      "epoch": 0.328624963966561,
+      "grad_norm": 10.672149658203125,
+      "learning_rate": 7.255278310940499e-05,
+      "loss": 0.8825,
+      "step": 380
+    },
+    {
+      "epoch": 0.3294897665033151,
+      "grad_norm": 7.584779262542725,
+      "learning_rate": 7.27447216890595e-05,
+      "loss": 0.6003,
+      "step": 381
+    },
+    {
+      "epoch": 0.33035456904006916,
+      "grad_norm": 5.818914890289307,
+      "learning_rate": 7.293666026871402e-05,
+      "loss": 0.4643,
+      "step": 382
+    },
+    {
+      "epoch": 0.3312193715768233,
+      "grad_norm": 6.871515274047852,
+      "learning_rate": 7.312859884836853e-05,
+      "loss": 0.5097,
+      "step": 383
+    },
+    {
+      "epoch": 0.3320841741135774,
+      "grad_norm": 3.9484200477600098,
+      "learning_rate": 7.332053742802303e-05,
+      "loss": 0.4679,
+      "step": 384
+    },
+    {
+      "epoch": 0.3329489766503315,
+      "grad_norm": 3.8606741428375244,
+      "learning_rate": 7.351247600767754e-05,
+      "loss": 0.3732,
+      "step": 385
+    },
+    {
+      "epoch": 0.3338137791870856,
+      "grad_norm": 10.65389347076416,
+      "learning_rate": 7.370441458733206e-05,
+      "loss": 0.9031,
+      "step": 386
+    },
+    {
+      "epoch": 0.33467858172383974,
+      "grad_norm": 10.56472396850586,
+      "learning_rate": 7.389635316698657e-05,
+      "loss": 0.6668,
+      "step": 387
+    },
+    {
+      "epoch": 0.3355433842605938,
+      "grad_norm": 9.798723220825195,
+      "learning_rate": 7.408829174664109e-05,
+      "loss": 0.7715,
+      "step": 388
+    },
+    {
+      "epoch": 0.33640818679734796,
+      "grad_norm": 8.35350227355957,
+      "learning_rate": 7.428023032629558e-05,
+      "loss": 0.8536,
+      "step": 389
+    },
+    {
+      "epoch": 0.33727298933410205,
+      "grad_norm": 7.99412727355957,
+      "learning_rate": 7.44721689059501e-05,
+      "loss": 0.9303,
+      "step": 390
+    },
+    {
+      "epoch": 0.33813779187085613,
+      "grad_norm": 8.098565101623535,
+      "learning_rate": 7.46641074856046e-05,
+      "loss": 0.3704,
+      "step": 391
+    },
+    {
+      "epoch": 0.33900259440761027,
+      "grad_norm": 7.83499002456665,
+      "learning_rate": 7.485604606525913e-05,
+      "loss": 0.3678,
+      "step": 392
+    },
+    {
+      "epoch": 0.33986739694436435,
+      "grad_norm": 9.846261978149414,
+      "learning_rate": 7.504798464491363e-05,
+      "loss": 1.6854,
+      "step": 393
+    },
+    {
+      "epoch": 0.3407321994811185,
+      "grad_norm": 10.261216163635254,
+      "learning_rate": 7.523992322456814e-05,
+      "loss": 0.7636,
+      "step": 394
+    },
+    {
+      "epoch": 0.3415970020178726,
+      "grad_norm": 5.547618389129639,
+      "learning_rate": 7.543186180422265e-05,
+      "loss": 0.3462,
+      "step": 395
+    },
+    {
+      "epoch": 0.3424618045546267,
+      "grad_norm": 6.500753402709961,
+      "learning_rate": 7.562380038387717e-05,
+      "loss": 0.644,
+      "step": 396
+    },
+    {
+      "epoch": 0.3433266070913808,
+      "grad_norm": 8.669839859008789,
+      "learning_rate": 7.581573896353167e-05,
+      "loss": 0.7317,
+      "step": 397
+    },
+    {
+      "epoch": 0.34419140962813494,
+      "grad_norm": 6.280559062957764,
+      "learning_rate": 7.600767754318618e-05,
+      "loss": 0.7023,
+      "step": 398
+    },
+    {
+      "epoch": 0.345056212164889,
+      "grad_norm": 7.725942611694336,
+      "learning_rate": 7.61996161228407e-05,
+      "loss": 0.7164,
+      "step": 399
+    },
+    {
+      "epoch": 0.3459210147016431,
+      "grad_norm": 7.478891849517822,
+      "learning_rate": 7.639155470249521e-05,
+      "loss": 0.4271,
+      "step": 400
+    },
+    {
+      "epoch": 0.34678581723839724,
+      "grad_norm": 4.877331256866455,
+      "learning_rate": 7.658349328214971e-05,
+      "loss": 0.7332,
+      "step": 401
+    },
+    {
+      "epoch": 0.3476506197751513,
+      "grad_norm": 8.025667190551758,
+      "learning_rate": 7.677543186180422e-05,
+      "loss": 0.3978,
+      "step": 402
+    },
+    {
+      "epoch": 0.34851542231190547,
+      "grad_norm": 7.804194450378418,
+      "learning_rate": 7.696737044145874e-05,
+      "loss": 0.5208,
+      "step": 403
+    },
+    {
+      "epoch": 0.34938022484865955,
+      "grad_norm": 5.8793230056762695,
+      "learning_rate": 7.715930902111325e-05,
+      "loss": 0.4889,
+      "step": 404
+    },
+    {
+      "epoch": 0.3502450273854137,
+      "grad_norm": 8.609319686889648,
+      "learning_rate": 7.735124760076777e-05,
+      "loss": 0.769,
+      "step": 405
+    },
+    {
+      "epoch": 0.35110982992216777,
+      "grad_norm": 6.56134033203125,
+      "learning_rate": 7.754318618042226e-05,
+      "loss": 0.3932,
+      "step": 406
+    },
+    {
+      "epoch": 0.35197463245892185,
+      "grad_norm": 8.588756561279297,
+      "learning_rate": 7.773512476007678e-05,
+      "loss": 0.5919,
+      "step": 407
+    },
+    {
+      "epoch": 0.352839434995676,
+      "grad_norm": 7.530106067657471,
+      "learning_rate": 7.792706333973129e-05,
+      "loss": 0.6037,
+      "step": 408
+    },
+    {
+      "epoch": 0.3537042375324301,
+      "grad_norm": 7.5281853675842285,
+      "learning_rate": 7.811900191938581e-05,
+      "loss": 0.4321,
+      "step": 409
+    },
+    {
+      "epoch": 0.3545690400691842,
+      "grad_norm": 8.16552448272705,
+      "learning_rate": 7.831094049904032e-05,
+      "loss": 1.1022,
+      "step": 410
+    },
+    {
+      "epoch": 0.3554338426059383,
+      "grad_norm": 8.752754211425781,
+      "learning_rate": 7.850287907869482e-05,
+      "loss": 0.5996,
+      "step": 411
+    },
+    {
+      "epoch": 0.35629864514269244,
+      "grad_norm": 7.659090995788574,
+      "learning_rate": 7.869481765834933e-05,
+      "loss": 0.5673,
+      "step": 412
+    },
+    {
+      "epoch": 0.3571634476794465,
+      "grad_norm": 6.884600639343262,
+      "learning_rate": 7.888675623800385e-05,
+      "loss": 0.3437,
+      "step": 413
+    },
+    {
+      "epoch": 0.35802825021620066,
+      "grad_norm": 5.328488349914551,
+      "learning_rate": 7.907869481765836e-05,
+      "loss": 0.519,
+      "step": 414
+    },
+    {
+      "epoch": 0.35889305275295474,
+      "grad_norm": 10.308977127075195,
+      "learning_rate": 7.927063339731286e-05,
+      "loss": 1.5373,
+      "step": 415
+    },
+    {
+      "epoch": 0.35975785528970883,
+      "grad_norm": 7.618837356567383,
+      "learning_rate": 7.946257197696737e-05,
+      "loss": 0.764,
+      "step": 416
+    },
+    {
+      "epoch": 0.36062265782646297,
+      "grad_norm": 8.787110328674316,
+      "learning_rate": 7.965451055662189e-05,
+      "loss": 0.6131,
+      "step": 417
+    },
+    {
+      "epoch": 0.36148746036321705,
+      "grad_norm": 6.432898998260498,
+      "learning_rate": 7.98464491362764e-05,
+      "loss": 0.6826,
+      "step": 418
+    },
+    {
+      "epoch": 0.3623522628999712,
+      "grad_norm": 8.762993812561035,
+      "learning_rate": 8.00383877159309e-05,
+      "loss": 0.9631,
+      "step": 419
+    },
+    {
+      "epoch": 0.3632170654367253,
+      "grad_norm": 5.939430236816406,
+      "learning_rate": 8.023032629558541e-05,
+      "loss": 0.4283,
+      "step": 420
+    },
+    {
+      "epoch": 0.3640818679734794,
+      "grad_norm": 8.092362403869629,
+      "learning_rate": 8.042226487523993e-05,
+      "loss": 1.2001,
+      "step": 421
+    },
+    {
+      "epoch": 0.3649466705102335,
+      "grad_norm": 7.594040870666504,
+      "learning_rate": 8.061420345489444e-05,
+      "loss": 0.4499,
+      "step": 422
+    },
+    {
+      "epoch": 0.3658114730469876,
+      "grad_norm": 12.614463806152344,
+      "learning_rate": 8.080614203454894e-05,
+      "loss": 1.4073,
+      "step": 423
+    },
+    {
+      "epoch": 0.3666762755837417,
+      "grad_norm": 6.807295322418213,
+      "learning_rate": 8.099808061420346e-05,
+      "loss": 0.8035,
+      "step": 424
+    },
+    {
+      "epoch": 0.3675410781204958,
+      "grad_norm": 3.6670141220092773,
+      "learning_rate": 8.119001919385797e-05,
+      "loss": 0.3207,
+      "step": 425
+    },
+    {
+      "epoch": 0.36840588065724994,
+      "grad_norm": 7.3801445960998535,
+      "learning_rate": 8.138195777351249e-05,
+      "loss": 0.4752,
+      "step": 426
+    },
+    {
+      "epoch": 0.369270683194004,
+      "grad_norm": 9.895638465881348,
+      "learning_rate": 8.157389635316698e-05,
+      "loss": 1.1256,
+      "step": 427
+    },
+    {
+      "epoch": 0.37013548573075816,
+      "grad_norm": 6.200985431671143,
+      "learning_rate": 8.17658349328215e-05,
+      "loss": 0.4226,
+      "step": 428
+    },
+    {
+      "epoch": 0.37100028826751225,
+      "grad_norm": 9.858406066894531,
+      "learning_rate": 8.195777351247601e-05,
+      "loss": 1.123,
+      "step": 429
+    },
+    {
+      "epoch": 0.3718650908042664,
+      "grad_norm": 7.274184703826904,
+      "learning_rate": 8.214971209213053e-05,
+      "loss": 0.4425,
+      "step": 430
+    },
+    {
+      "epoch": 0.37272989334102047,
+      "grad_norm": 4.712157249450684,
+      "learning_rate": 8.234165067178504e-05,
+      "loss": 0.4242,
+      "step": 431
+    },
+    {
+      "epoch": 0.37359469587777455,
+      "grad_norm": 7.515327453613281,
+      "learning_rate": 8.253358925143954e-05,
+      "loss": 1.0072,
+      "step": 432
+    },
+    {
+      "epoch": 0.3744594984145287,
+      "grad_norm": 3.97876238822937,
+      "learning_rate": 8.272552783109405e-05,
+      "loss": 0.241,
+      "step": 433
+    },
+    {
+      "epoch": 0.3753243009512828,
+      "grad_norm": 7.888240337371826,
+      "learning_rate": 8.291746641074857e-05,
+      "loss": 0.7359,
+      "step": 434
+    },
+    {
+      "epoch": 0.3761891034880369,
+      "grad_norm": 6.10671329498291,
+      "learning_rate": 8.310940499040308e-05,
+      "loss": 0.4583,
+      "step": 435
+    },
+    {
+      "epoch": 0.377053906024791,
+      "grad_norm": 6.102023601531982,
+      "learning_rate": 8.330134357005758e-05,
+      "loss": 0.9001,
+      "step": 436
+    },
+    {
+      "epoch": 0.37791870856154514,
+      "grad_norm": 7.122408390045166,
+      "learning_rate": 8.349328214971209e-05,
+      "loss": 0.4614,
+      "step": 437
+    },
+    {
+      "epoch": 0.3787835110982992,
+      "grad_norm": 9.432422637939453,
+      "learning_rate": 8.368522072936661e-05,
+      "loss": 1.238,
+      "step": 438
+    },
+    {
+      "epoch": 0.3796483136350533,
+      "grad_norm": 9.530061721801758,
+      "learning_rate": 8.387715930902112e-05,
+      "loss": 1.5289,
+      "step": 439
+    },
+    {
+      "epoch": 0.38051311617180744,
+      "grad_norm": 7.045010566711426,
+      "learning_rate": 8.406909788867562e-05,
+      "loss": 0.3283,
+      "step": 440
+    },
+    {
+      "epoch": 0.3813779187085615,
+      "grad_norm": 6.275206089019775,
+      "learning_rate": 8.426103646833013e-05,
+      "loss": 0.4147,
+      "step": 441
+    },
+    {
+      "epoch": 0.38224272124531566,
+      "grad_norm": 4.124218940734863,
+      "learning_rate": 8.445297504798465e-05,
+      "loss": 0.4956,
+      "step": 442
+    },
+    {
+      "epoch": 0.38310752378206975,
+      "grad_norm": 5.8184895515441895,
+      "learning_rate": 8.464491362763916e-05,
+      "loss": 0.5166,
+      "step": 443
+    },
+    {
+      "epoch": 0.3839723263188239,
+      "grad_norm": 2.6442999839782715,
+      "learning_rate": 8.483685220729366e-05,
+      "loss": 0.2486,
+      "step": 444
+    },
+    {
+      "epoch": 0.38483712885557797,
+      "grad_norm": 3.8425562381744385,
+      "learning_rate": 8.502879078694817e-05,
+      "loss": 0.4493,
+      "step": 445
+    },
+    {
+      "epoch": 0.3857019313923321,
+      "grad_norm": 9.125511169433594,
+      "learning_rate": 8.522072936660269e-05,
+      "loss": 1.0439,
+      "step": 446
+    },
+    {
+      "epoch": 0.3865667339290862,
+      "grad_norm": 9.67273998260498,
+      "learning_rate": 8.54126679462572e-05,
+      "loss": 1.249,
+      "step": 447
+    },
+    {
+      "epoch": 0.3874315364658403,
+      "grad_norm": 7.822050094604492,
+      "learning_rate": 8.560460652591172e-05,
+      "loss": 0.8329,
+      "step": 448
+    },
+    {
+      "epoch": 0.3882963390025944,
+      "grad_norm": 5.747166633605957,
+      "learning_rate": 8.579654510556623e-05,
+      "loss": 0.3256,
+      "step": 449
+    },
+    {
+      "epoch": 0.3891611415393485,
+      "grad_norm": 7.257145404815674,
+      "learning_rate": 8.598848368522073e-05,
+      "loss": 1.0333,
+      "step": 450
+    },
+    {
+      "epoch": 0.39002594407610264,
+      "grad_norm": 7.6516642570495605,
+      "learning_rate": 8.618042226487525e-05,
+      "loss": 0.3821,
+      "step": 451
+    },
+    {
+      "epoch": 0.3908907466128567,
+      "grad_norm": 6.943114757537842,
+      "learning_rate": 8.637236084452976e-05,
+      "loss": 0.4578,
+      "step": 452
+    },
+    {
+      "epoch": 0.39175554914961086,
+      "grad_norm": 6.90556526184082,
+      "learning_rate": 8.656429942418427e-05,
+      "loss": 0.6716,
+      "step": 453
+    },
+    {
+      "epoch": 0.39262035168636494,
+      "grad_norm": 5.005017280578613,
+      "learning_rate": 8.675623800383877e-05,
+      "loss": 0.2694,
+      "step": 454
+    },
+    {
+      "epoch": 0.393485154223119,
+      "grad_norm": 9.84821605682373,
+      "learning_rate": 8.694817658349329e-05,
+      "loss": 1.7739,
+      "step": 455
+    },
+    {
+      "epoch": 0.39434995675987317,
+      "grad_norm": 7.2032647132873535,
+      "learning_rate": 8.71401151631478e-05,
+      "loss": 0.7109,
+      "step": 456
+    },
+    {
+      "epoch": 0.39521475929662725,
+      "grad_norm": 10.030957221984863,
+      "learning_rate": 8.73320537428023e-05,
+      "loss": 0.5733,
+      "step": 457
+    },
+    {
+      "epoch": 0.3960795618333814,
+      "grad_norm": 3.6352131366729736,
+      "learning_rate": 8.752399232245681e-05,
+      "loss": 0.283,
+      "step": 458
+    },
+    {
+      "epoch": 0.39694436437013547,
+      "grad_norm": 3.4260525703430176,
+      "learning_rate": 8.771593090211133e-05,
+      "loss": 0.4214,
+      "step": 459
+    },
+    {
+      "epoch": 0.3978091669068896,
+      "grad_norm": 4.595706462860107,
+      "learning_rate": 8.790786948176584e-05,
+      "loss": 0.6332,
+      "step": 460
+    },
+    {
+      "epoch": 0.3978091669068896,
+      "eval_Qnli-dev-1024_cosine_accuracy": 0.75,
+      "eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.879416823387146,
+      "eval_Qnli-dev-1024_cosine_ap": 0.781841863547347,
+      "eval_Qnli-dev-1024_cosine_f1": 0.7346938775510203,
+      "eval_Qnli-dev-1024_cosine_f1_threshold": 0.8326764106750488,
+      "eval_Qnli-dev-1024_cosine_mcc": 0.4683019469005233,
+      "eval_Qnli-dev-1024_cosine_precision": 0.6792452830188679,
+      "eval_Qnli-dev-1024_cosine_recall": 0.8,
+      "eval_Qnli-dev_cosine_accuracy": 0.7291666666666666,
+      "eval_Qnli-dev_cosine_accuracy_threshold": 0.8109143972396851,
+      "eval_Qnli-dev_cosine_ap": 0.7568700790587495,
+      "eval_Qnli-dev_cosine_f1": 0.7346938775510203,
+      "eval_Qnli-dev_cosine_f1_threshold": 0.742037296295166,
+      "eval_Qnli-dev_cosine_mcc": 0.4683019469005233,
+      "eval_Qnli-dev_cosine_precision": 0.6792452830188679,
+      "eval_Qnli-dev_cosine_recall": 0.8,
+      "eval_allNLI--triplets-1024_cosine_accuracy": 0.9375,
+      "eval_allNLI-triplets_cosine_accuracy": 0.9791666865348816,
+      "eval_global_dataset_loss": 0.7834931015968323,
+      "eval_global_dataset_runtime": 68.0114,
+      "eval_global_dataset_samples_per_second": 14.292,
+      "eval_global_dataset_steps_per_second": 0.309,
+      "eval_sequential_score": 0.9375,
+      "eval_sts-test-1024_pearson_cosine": 0.8881975899886265,
+      "eval_sts-test-1024_spearman_cosine": 0.9114913957523785,
+      "eval_sts-test_pearson_cosine": 0.9096311897411768,
+      "eval_sts-test_spearman_cosine": 0.9200152476526354,
+      "step": 460
+    },
+    {
+      "epoch": 0.3986739694436437,
+      "grad_norm": 10.936285018920898,
+      "learning_rate": 8.809980806142035e-05,
+      "loss": 1.2458,
+      "step": 461
+    },
+    {
+      "epoch": 0.39953877198039783,
+      "grad_norm": 5.99333381652832,
+      "learning_rate": 8.829174664107485e-05,
+      "loss": 0.3104,
+      "step": 462
+    },
+    {
+      "epoch": 0.4004035745171519,
+      "grad_norm": 6.260789394378662,
+      "learning_rate": 8.848368522072937e-05,
+      "loss": 0.4634,
+      "step": 463
+    },
+    {
+      "epoch": 0.401268377053906,
+      "grad_norm": 4.397698879241943,
+      "learning_rate": 8.867562380038388e-05,
+      "loss": 0.5893,
+      "step": 464
+    },
+    {
+      "epoch": 0.40213317959066014,
+      "grad_norm": 4.650321960449219,
+      "learning_rate": 8.88675623800384e-05,
+      "loss": 0.377,
+      "step": 465
+    },
+    {
+      "epoch": 0.4029979821274142,
+      "grad_norm": 9.407013893127441,
+      "learning_rate": 8.905950095969289e-05,
+      "loss": 0.5403,
+      "step": 466
+    },
+    {
+      "epoch": 0.40386278466416836,
+      "grad_norm": 10.255672454833984,
+      "learning_rate": 8.925143953934741e-05,
+      "loss": 0.9454,
+      "step": 467
+    },
+    {
+      "epoch": 0.40472758720092245,
+      "grad_norm": 11.186202049255371,
+      "learning_rate": 8.944337811900192e-05,
+      "loss": 0.9292,
+      "step": 468
+    },
+    {
+      "epoch": 0.4055923897376766,
+      "grad_norm": 6.13421630859375,
+      "learning_rate": 8.963531669865644e-05,
+      "loss": 0.4132,
+      "step": 469
+    },
+    {
+      "epoch": 0.40645719227443067,
+      "grad_norm": 12.439327239990234,
+      "learning_rate": 8.982725527831093e-05,
+      "loss": 0.9805,
+      "step": 470
+    },
+    {
+      "epoch": 0.4073219948111848,
+      "grad_norm": 10.574874877929688,
+      "learning_rate": 9.001919385796545e-05,
+      "loss": 0.7784,
+      "step": 471
+    },
+    {
+      "epoch": 0.4081867973479389,
+      "grad_norm": 5.993617057800293,
+      "learning_rate": 9.021113243761996e-05,
+      "loss": 0.369,
+      "step": 472
+    },
+    {
+      "epoch": 0.409051599884693,
+      "grad_norm": 10.213888168334961,
+      "learning_rate": 9.040307101727448e-05,
+      "loss": 1.4911,
+      "step": 473
+    },
+    {
+      "epoch": 0.4099164024214471,
+      "grad_norm": 7.043622016906738,
+      "learning_rate": 9.059500959692899e-05,
+      "loss": 0.4223,
+      "step": 474
+    },
+    {
+      "epoch": 0.4107812049582012,
+      "grad_norm": 8.350674629211426,
+      "learning_rate": 9.07869481765835e-05,
+      "loss": 1.2959,
+      "step": 475
+    },
+    {
+      "epoch": 0.41164600749495533,
+      "grad_norm": 8.64110279083252,
+      "learning_rate": 9.097888675623801e-05,
+      "loss": 0.4928,
+      "step": 476
+    },
+    {
+      "epoch": 0.4125108100317094,
+      "grad_norm": 6.388255596160889,
+      "learning_rate": 9.117082533589252e-05,
+      "loss": 1.02,
+      "step": 477
+    },
+    {
+      "epoch": 0.41337561256846356,
+      "grad_norm": 4.513554096221924,
+      "learning_rate": 9.136276391554703e-05,
+      "loss": 0.5034,
+      "step": 478
+    },
+    {
+      "epoch": 0.41424041510521764,
+      "grad_norm": 10.509414672851562,
+      "learning_rate": 9.155470249520153e-05,
+      "loss": 1.6119,
+      "step": 479
+    },
+    {
+      "epoch": 0.4151052176419717,
+      "grad_norm": 5.085805416107178,
+      "learning_rate": 9.174664107485605e-05,
+      "loss": 0.59,
+      "step": 480
+    },
+    {
+      "epoch": 0.41597002017872586,
+      "grad_norm": 8.275995254516602,
+      "learning_rate": 9.193857965451056e-05,
+      "loss": 0.5589,
+      "step": 481
+    },
+    {
+      "epoch": 0.41683482271547995,
+      "grad_norm": 9.266075134277344,
+      "learning_rate": 9.213051823416508e-05,
+      "loss": 0.8402,
+      "step": 482
+    },
+    {
+      "epoch": 0.4176996252522341,
+      "grad_norm": 9.998162269592285,
+      "learning_rate": 9.232245681381957e-05,
+      "loss": 0.5293,
+      "step": 483
+    },
+    {
+      "epoch": 0.41856442778898817,
+      "grad_norm": 6.49897575378418,
+      "learning_rate": 9.25143953934741e-05,
+      "loss": 0.5911,
+      "step": 484
+    },
+    {
+      "epoch": 0.4194292303257423,
+      "grad_norm": 2.814267158508301,
+      "learning_rate": 9.27063339731286e-05,
+      "loss": 0.1819,
+      "step": 485
+    },
+    {
+      "epoch": 0.4202940328624964,
+      "grad_norm": 6.657732009887695,
+      "learning_rate": 9.289827255278312e-05,
+      "loss": 0.6963,
+      "step": 486
+    },
+    {
+      "epoch": 0.42115883539925053,
+      "grad_norm": 10.121885299682617,
+      "learning_rate": 9.309021113243761e-05,
+      "loss": 0.6004,
+      "step": 487
+    },
+    {
+      "epoch": 0.4220236379360046,
+      "grad_norm": 15.206952095031738,
+      "learning_rate": 9.328214971209214e-05,
+      "loss": 2.0825,
+      "step": 488
+    },
+    {
+      "epoch": 0.4228884404727587,
+      "grad_norm": 11.911534309387207,
+      "learning_rate": 9.347408829174664e-05,
+      "loss": 1.6674,
+      "step": 489
+    },
+    {
+      "epoch": 0.42375324300951284,
+      "grad_norm": 6.184067726135254,
+      "learning_rate": 9.366602687140116e-05,
+      "loss": 0.5685,
+      "step": 490
+    },
+    {
+      "epoch": 0.4246180455462669,
+      "grad_norm": 7.771515846252441,
+      "learning_rate": 9.385796545105567e-05,
+      "loss": 0.7835,
+      "step": 491
+    },
+    {
+      "epoch": 0.42548284808302106,
+      "grad_norm": 8.338656425476074,
+      "learning_rate": 9.404990403071018e-05,
+      "loss": 0.4613,
+      "step": 492
+    },
+    {
+      "epoch": 0.42634765061977514,
+      "grad_norm": 9.678628921508789,
+      "learning_rate": 9.424184261036468e-05,
+      "loss": 1.0492,
+      "step": 493
+    },
+    {
+      "epoch": 0.4272124531565293,
+      "grad_norm": 5.315983772277832,
+      "learning_rate": 9.44337811900192e-05,
+      "loss": 0.3512,
+      "step": 494
+    },
+    {
+      "epoch": 0.42807725569328337,
+      "grad_norm": 7.20918607711792,
+      "learning_rate": 9.462571976967371e-05,
+      "loss": 0.8227,
+      "step": 495
+    },
+    {
+      "epoch": 0.42894205823003745,
+      "grad_norm": 8.044875144958496,
+      "learning_rate": 9.481765834932822e-05,
+      "loss": 0.7849,
+      "step": 496
+    },
+    {
+      "epoch": 0.4298068607667916,
+      "grad_norm": 8.14607048034668,
+      "learning_rate": 9.500959692898272e-05,
+      "loss": 1.2823,
+      "step": 497
+    },
+    {
+      "epoch": 0.43067166330354567,
+      "grad_norm": 9.731268882751465,
+      "learning_rate": 9.520153550863724e-05,
+      "loss": 1.2799,
+      "step": 498
+    },
+    {
+      "epoch": 0.4315364658402998,
+      "grad_norm": 9.654071807861328,
+      "learning_rate": 9.539347408829176e-05,
+      "loss": 0.596,
+      "step": 499
+    },
+    {
+      "epoch": 0.4324012683770539,
+      "grad_norm": 9.026534080505371,
+      "learning_rate": 9.558541266794626e-05,
+      "loss": 0.6793,
+      "step": 500
+    },
+    {
+      "epoch": 0.43326607091380803,
+      "grad_norm": 7.325682163238525,
+      "learning_rate": 9.577735124760078e-05,
+      "loss": 0.575,
+      "step": 501
+    },
+    {
+      "epoch": 0.4341308734505621,
+      "grad_norm": 4.846238136291504,
+      "learning_rate": 9.596928982725528e-05,
+      "loss": 0.2631,
+      "step": 502
+    },
+    {
+      "epoch": 0.43499567598731625,
+      "grad_norm": 8.93980598449707,
+      "learning_rate": 9.61612284069098e-05,
+      "loss": 0.5173,
+      "step": 503
+    },
+    {
+      "epoch": 0.43586047852407034,
+      "grad_norm": 11.70151138305664,
+      "learning_rate": 9.63531669865643e-05,
+      "loss": 0.9963,
+      "step": 504
+    },
+    {
+      "epoch": 0.4367252810608244,
+      "grad_norm": 6.328804016113281,
+      "learning_rate": 9.654510556621882e-05,
+      "loss": 0.9315,
+      "step": 505
+    },
+    {
+      "epoch": 0.43759008359757856,
+      "grad_norm": 9.678471565246582,
+      "learning_rate": 9.673704414587332e-05,
+      "loss": 0.7878,
+      "step": 506
+    },
+    {
+      "epoch": 0.43845488613433264,
+      "grad_norm": 6.569301128387451,
+      "learning_rate": 9.692898272552784e-05,
+      "loss": 0.4346,
+      "step": 507
+    },
+    {
+      "epoch": 0.4393196886710868,
+      "grad_norm": 6.5204596519470215,
+      "learning_rate": 9.712092130518235e-05,
+      "loss": 0.7662,
+      "step": 508
+    },
+    {
+      "epoch": 0.44018449120784087,
+      "grad_norm": 8.459349632263184,
+      "learning_rate": 9.731285988483686e-05,
+      "loss": 0.5221,
+      "step": 509
+    },
+    {
+      "epoch": 0.441049293744595,
+      "grad_norm": 8.08749008178711,
+      "learning_rate": 9.750479846449136e-05,
+      "loss": 0.9803,
+      "step": 510
+    },
+    {
+      "epoch": 0.4419140962813491,
+      "grad_norm": 8.031821250915527,
+      "learning_rate": 9.769673704414588e-05,
+      "loss": 1.1605,
+      "step": 511
+    },
+    {
+      "epoch": 0.4427788988181032,
+      "grad_norm": 9.393692016601562,
+      "learning_rate": 9.788867562380039e-05,
+      "loss": 0.6801,
+      "step": 512
+    },
+    {
+      "epoch": 0.4436437013548573,
+      "grad_norm": 5.011040687561035,
+      "learning_rate": 9.80806142034549e-05,
+      "loss": 0.3935,
+      "step": 513
+    },
+    {
+      "epoch": 0.4445085038916114,
+      "grad_norm": 2.235301971435547,
+      "learning_rate": 9.82725527831094e-05,
+      "loss": 0.1377,
+      "step": 514
+    },
+    {
+      "epoch": 0.44537330642836553,
+      "grad_norm": 5.642356872558594,
+      "learning_rate": 9.846449136276392e-05,
+      "loss": 0.985,
+      "step": 515
+    },
+    {
+      "epoch": 0.4462381089651196,
+      "grad_norm": 9.386540412902832,
+      "learning_rate": 9.865642994241843e-05,
+      "loss": 0.7949,
+      "step": 516
+    },
+    {
+      "epoch": 0.44710291150187376,
+      "grad_norm": 5.782979965209961,
+      "learning_rate": 9.884836852207294e-05,
+      "loss": 0.5974,
+      "step": 517
+    },
+    {
+      "epoch": 0.44796771403862784,
+      "grad_norm": 5.323793888092041,
+      "learning_rate": 9.904030710172744e-05,
+      "loss": 0.6797,
+      "step": 518
+    },
+    {
+      "epoch": 0.448832516575382,
+      "grad_norm": 8.012255668640137,
+      "learning_rate": 9.923224568138196e-05,
+      "loss": 0.6953,
+      "step": 519
+    },
+    {
+      "epoch": 0.44969731911213606,
+      "grad_norm": 6.930400371551514,
+      "learning_rate": 9.942418426103647e-05,
+      "loss": 0.4729,
+      "step": 520
+    },
+    {
+      "epoch": 0.45056212164889015,
+      "grad_norm": 10.408514976501465,
+      "learning_rate": 9.961612284069098e-05,
+      "loss": 0.5509,
+      "step": 521
+    },
+    {
+      "epoch": 0.4514269241856443,
+      "grad_norm": 5.082659721374512,
+      "learning_rate": 9.980806142034548e-05,
+      "loss": 0.4549,
+      "step": 522
+    },
+    {
+      "epoch": 0.45229172672239837,
+      "grad_norm": 10.625167846679688,
+      "learning_rate": 0.0001,
+      "loss": 1.6151,
+      "step": 523
+    },
+    {
+      "epoch": 0.4531565292591525,
+      "grad_norm": 7.423165798187256,
+      "learning_rate": 9.999974430536151e-05,
+      "loss": 0.4466,
+      "step": 524
+    },
+    {
+      "epoch": 0.4540213317959066,
+      "grad_norm": 10.48806095123291,
+      "learning_rate": 9.999897722406126e-05,
+      "loss": 1.3489,
+      "step": 525
+    },
+    {
+      "epoch": 0.45488613433266073,
+      "grad_norm": 8.526479721069336,
+      "learning_rate": 9.999769876394478e-05,
+      "loss": 0.6699,
+      "step": 526
+    },
+    {
+      "epoch": 0.4557509368694148,
+      "grad_norm": 7.596718788146973,
+      "learning_rate": 9.999590893808788e-05,
+      "loss": 0.5189,
+      "step": 527
+    },
+    {
+      "epoch": 0.45661573940616895,
+      "grad_norm": 10.089831352233887,
+      "learning_rate": 9.999360776479651e-05,
+      "loss": 0.7617,
+      "step": 528
+    },
+    {
+      "epoch": 0.45748054194292304,
+      "grad_norm": 7.766354560852051,
+      "learning_rate": 9.999079526760659e-05,
+      "loss": 0.5148,
+      "step": 529
+    },
+    {
+      "epoch": 0.4583453444796771,
+      "grad_norm": 6.268951892852783,
+      "learning_rate": 9.998747147528374e-05,
+      "loss": 0.5564,
+      "step": 530
+    },
+    {
+      "epoch": 0.45921014701643126,
+      "grad_norm": 5.794777870178223,
+      "learning_rate": 9.9983636421823e-05,
+      "loss": 0.4038,
+      "step": 531
+    },
+    {
+      "epoch": 0.46007494955318534,
+      "grad_norm": 8.995209693908691,
+      "learning_rate": 9.997929014644845e-05,
+      "loss": 0.6968,
+      "step": 532
+    },
+    {
+      "epoch": 0.4609397520899395,
+      "grad_norm": 6.833916187286377,
+      "learning_rate": 9.997443269361289e-05,
+      "loss": 0.4393,
+      "step": 533
+    },
+    {
+      "epoch": 0.46180455462669356,
+      "grad_norm": 9.531277656555176,
+      "learning_rate": 9.996906411299726e-05,
+      "loss": 0.7228,
+      "step": 534
+    },
+    {
+      "epoch": 0.4626693571634477,
+      "grad_norm": 11.1766939163208,
+      "learning_rate": 9.996318445951032e-05,
+      "loss": 0.898,
+      "step": 535
+    },
+    {
+      "epoch": 0.4635341597002018,
+      "grad_norm": 4.982804298400879,
+      "learning_rate": 9.995679379328785e-05,
+      "loss": 0.3461,
+      "step": 536
+    },
+    {
+      "epoch": 0.46439896223695587,
+      "grad_norm": 3.0458362102508545,
+      "learning_rate": 9.994989217969224e-05,
+      "loss": 0.4753,
+      "step": 537
+    },
+    {
+      "epoch": 0.46526376477371,
+      "grad_norm": 7.552469253540039,
+      "learning_rate": 9.99424796893117e-05,
+      "loss": 0.4446,
+      "step": 538
+    },
+    {
+      "epoch": 0.4661285673104641,
+      "grad_norm": 10.52206039428711,
+      "learning_rate": 9.99345563979596e-05,
+      "loss": 0.8696,
+      "step": 539
+    },
+    {
+      "epoch": 0.46699336984721823,
+      "grad_norm": 9.044191360473633,
+      "learning_rate": 9.992612238667368e-05,
+      "loss": 1.0505,
+      "step": 540
+    },
+    {
+      "epoch": 0.4678581723839723,
+      "grad_norm": 7.528494834899902,
+      "learning_rate": 9.991717774171514e-05,
+      "loss": 0.3523,
+      "step": 541
+    },
+    {
+      "epoch": 0.46872297492072645,
+      "grad_norm": 8.00634765625,
+      "learning_rate": 9.990772255456797e-05,
+      "loss": 0.6452,
+      "step": 542
+    },
+    {
+      "epoch": 0.46958777745748054,
+      "grad_norm": 6.528989315032959,
+      "learning_rate": 9.989775692193773e-05,
+      "loss": 0.5005,
+      "step": 543
+    },
+    {
+      "epoch": 0.4704525799942347,
+      "grad_norm": 7.66871452331543,
+      "learning_rate": 9.988728094575082e-05,
+      "loss": 0.5364,
+      "step": 544
+    },
+    {
+      "epoch": 0.47131738253098876,
+      "grad_norm": 6.3178558349609375,
+      "learning_rate": 9.987629473315325e-05,
+      "loss": 0.6121,
+      "step": 545
+    },
+    {
+      "epoch": 0.47218218506774284,
+      "grad_norm": 3.713564872741699,
+      "learning_rate": 9.986479839650966e-05,
+      "loss": 0.2326,
+      "step": 546
+    },
+    {
+      "epoch": 0.473046987604497,
+      "grad_norm": 11.291918754577637,
+      "learning_rate": 9.98527920534021e-05,
+      "loss": 1.2339,
+      "step": 547
+    },
+    {
+      "epoch": 0.47391179014125107,
+      "grad_norm": 8.482532501220703,
+      "learning_rate": 9.984027582662892e-05,
+      "loss": 0.8196,
+      "step": 548
+    },
+    {
+      "epoch": 0.4747765926780052,
+      "grad_norm": 2.9724512100219727,
+      "learning_rate": 9.982724984420333e-05,
+      "loss": 0.2354,
+      "step": 549
+    },
+    {
+      "epoch": 0.4756413952147593,
+      "grad_norm": 9.461052894592285,
+      "learning_rate": 9.981371423935233e-05,
+      "loss": 0.6666,
+      "step": 550
+    },
+    {
+      "epoch": 0.4765061977515134,
+      "grad_norm": 5.076896667480469,
+      "learning_rate": 9.979966915051517e-05,
+      "loss": 0.3125,
+      "step": 551
+    },
+    {
+      "epoch": 0.4773710002882675,
+      "grad_norm": 8.995684623718262,
+      "learning_rate": 9.978511472134203e-05,
+      "loss": 0.7455,
+      "step": 552
+    },
+    {
+      "epoch": 0.4782358028250216,
+      "grad_norm": 2.971757173538208,
+      "learning_rate": 9.977005110069245e-05,
+      "loss": 0.32,
+      "step": 553
+    },
+    {
+      "epoch": 0.47910060536177573,
+      "grad_norm": 7.4964399337768555,
+      "learning_rate": 9.975447844263395e-05,
+      "loss": 0.9793,
+      "step": 554
+    },
+    {
+      "epoch": 0.4799654078985298,
+      "grad_norm": 6.13850736618042,
+      "learning_rate": 9.973839690644032e-05,
+      "loss": 0.7821,
+      "step": 555
+    },
+    {
+      "epoch": 0.48083021043528396,
+      "grad_norm": 8.951305389404297,
+      "learning_rate": 9.972180665659004e-05,
+      "loss": 0.6022,
+      "step": 556
+    },
+    {
+      "epoch": 0.48169501297203804,
+      "grad_norm": 6.228058338165283,
+      "learning_rate": 9.970470786276467e-05,
+      "loss": 0.8369,
+      "step": 557
+    },
+    {
+      "epoch": 0.4825598155087922,
+      "grad_norm": 10.346866607666016,
+      "learning_rate": 9.968710069984698e-05,
+      "loss": 0.8025,
+      "step": 558
+    },
+    {
+      "epoch": 0.48342461804554626,
+      "grad_norm": 2.9348461627960205,
+      "learning_rate": 9.966898534791926e-05,
+      "loss": 0.1631,
+      "step": 559
+    },
+    {
+      "epoch": 0.4842894205823004,
+      "grad_norm": 8.404128074645996,
+      "learning_rate": 9.965036199226147e-05,
+      "loss": 0.7858,
+      "step": 560
+    },
+    {
+      "epoch": 0.4851542231190545,
+      "grad_norm": 3.0906944274902344,
+      "learning_rate": 9.963123082334925e-05,
+      "loss": 0.3223,
+      "step": 561
+    },
+    {
+      "epoch": 0.48601902565580857,
+      "grad_norm": 4.46307373046875,
+      "learning_rate": 9.961159203685212e-05,
+      "loss": 0.2361,
+      "step": 562
+    },
+    {
+      "epoch": 0.4868838281925627,
+      "grad_norm": 7.367444038391113,
+      "learning_rate": 9.959144583363141e-05,
+      "loss": 1.2893,
+      "step": 563
+    },
+    {
+      "epoch": 0.4877486307293168,
+      "grad_norm": 4.720983505249023,
+      "learning_rate": 9.957079241973809e-05,
+      "loss": 0.5666,
+      "step": 564
+    },
+    {
+      "epoch": 0.48861343326607093,
+      "grad_norm": 5.1994829177856445,
+      "learning_rate": 9.95496320064109e-05,
+      "loss": 0.2794,
+      "step": 565
+    },
+    {
+      "epoch": 0.489478235802825,
+      "grad_norm": 8.899139404296875,
+      "learning_rate": 9.952796481007401e-05,
+      "loss": 0.6303,
+      "step": 566
+    },
+    {
+      "epoch": 0.49034303833957915,
+      "grad_norm": 4.118505477905273,
+      "learning_rate": 9.950579105233483e-05,
+      "loss": 0.1724,
+      "step": 567
+    },
+    {
+      "epoch": 0.49120784087633323,
+      "grad_norm": 6.728652477264404,
+      "learning_rate": 9.948311095998181e-05,
+      "loss": 0.662,
+      "step": 568
+    },
+    {
+      "epoch": 0.4920726434130873,
+      "grad_norm": 7.761811256408691,
+      "learning_rate": 9.945992476498209e-05,
+      "loss": 0.4051,
+      "step": 569
+    },
+    {
+      "epoch": 0.49293744594984146,
+      "grad_norm": 10.437024116516113,
+      "learning_rate": 9.943623270447909e-05,
+      "loss": 0.7596,
+      "step": 570
+    },
+    {
+      "epoch": 0.49380224848659554,
+      "grad_norm": 8.579437255859375,
+      "learning_rate": 9.94120350207901e-05,
+      "loss": 0.4666,
+      "step": 571
+    },
+    {
+      "epoch": 0.4946670510233497,
+      "grad_norm": 11.050808906555176,
+      "learning_rate": 9.938733196140386e-05,
+      "loss": 0.8923,
+      "step": 572
+    },
+    {
+      "epoch": 0.49553185356010376,
+      "grad_norm": 6.367518901824951,
+      "learning_rate": 9.936212377897798e-05,
+      "loss": 0.3065,
+      "step": 573
+    },
+    {
+      "epoch": 0.4963966560968579,
+      "grad_norm": 5.786684036254883,
+      "learning_rate": 9.933641073133631e-05,
+      "loss": 0.6386,
+      "step": 574
+    },
+    {
+      "epoch": 0.497261458633612,
+      "grad_norm": 3.814639091491699,
+      "learning_rate": 9.93101930814664e-05,
+      "loss": 0.2868,
+      "step": 575
+    },
+    {
+      "epoch": 0.497261458633612,
+      "eval_Qnli-dev-1024_cosine_accuracy": 0.6979166666666666,
+      "eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.8905854225158691,
+      "eval_Qnli-dev-1024_cosine_ap": 0.7246322873104885,
+      "eval_Qnli-dev-1024_cosine_f1": 0.6909090909090909,
+      "eval_Qnli-dev-1024_cosine_f1_threshold": 0.805380642414093,
+      "eval_Qnli-dev-1024_cosine_mcc": 0.33620907137955974,
+      "eval_Qnli-dev-1024_cosine_precision": 0.5846153846153846,
+      "eval_Qnli-dev-1024_cosine_recall": 0.8444444444444444,
+      "eval_Qnli-dev_cosine_accuracy": 0.6979166666666666,
+      "eval_Qnli-dev_cosine_accuracy_threshold": 0.8145653009414673,
+      "eval_Qnli-dev_cosine_ap": 0.7254668033788828,
+      "eval_Qnli-dev_cosine_f1": 0.7289719626168225,
+      "eval_Qnli-dev_cosine_f1_threshold": 0.7076575756072998,
+      "eval_Qnli-dev_cosine_mcc": 0.43373226132862797,
+      "eval_Qnli-dev_cosine_precision": 0.6290322580645161,
+      "eval_Qnli-dev_cosine_recall": 0.8666666666666667,
+      "eval_allNLI--triplets-1024_cosine_accuracy": 0.9479166865348816,
+      "eval_allNLI-triplets_cosine_accuracy": 0.96875,
+      "eval_global_dataset_loss": 0.5409160852432251,
+      "eval_global_dataset_runtime": 68.0813,
+      "eval_global_dataset_samples_per_second": 14.277,
+      "eval_global_dataset_steps_per_second": 0.308,
+      "eval_sequential_score": 0.9479166865348816,
+      "eval_sts-test-1024_pearson_cosine": 0.8845045352861245,
+      "eval_sts-test-1024_spearman_cosine": 0.9123160743907711,
+      "eval_sts-test_pearson_cosine": 0.9122846955191348,
+      "eval_sts-test_spearman_cosine": 0.920479051307594,
+      "step": 575
+    },
+    {
+      "epoch": 0.4981262611703661,
+      "grad_norm": 9.594101905822754,
+      "learning_rate": 9.928347109751677e-05,
+      "loss": 0.6007,
+      "step": 576
+    },
+    {
+      "epoch": 0.4989910637071202,
+      "grad_norm": 5.121261119842529,
+      "learning_rate": 9.925624505279411e-05,
+      "loss": 0.2853,
+      "step": 577
+    },
+    {
+      "epoch": 0.4998558662438743,
+      "grad_norm": 4.0415215492248535,
+      "learning_rate": 9.922851522576058e-05,
+      "loss": 0.3982,
+      "step": 578
+    },
+    {
+      "epoch": 0.5007206687806284,
+      "grad_norm": 11.199448585510254,
+      "learning_rate": 9.92002819000309e-05,
+      "loss": 0.7686,
+      "step": 579
+    },
+    {
+      "epoch": 0.5015854713173825,
+      "grad_norm": 5.71658992767334,
+      "learning_rate": 9.917154536436948e-05,
+      "loss": 0.3809,
+      "step": 580
+    },
+    {
+      "epoch": 0.5024502738541367,
+      "grad_norm": 10.616915702819824,
+      "learning_rate": 9.914230591268743e-05,
+      "loss": 1.1228,
+      "step": 581
+    },
+    {
+      "epoch": 0.5033150763908908,
+      "grad_norm": 4.5049028396606445,
+      "learning_rate": 9.911256384403961e-05,
+      "loss": 0.3737,
+      "step": 582
+    },
+    {
+      "epoch": 0.5041798789276448,
+      "grad_norm": 5.022185325622559,
+      "learning_rate": 9.90823194626215e-05,
+      "loss": 0.2141,
+      "step": 583
+    },
+    {
+      "epoch": 0.505044681464399,
+      "grad_norm": 10.781139373779297,
+      "learning_rate": 9.905157307776616e-05,
+      "loss": 1.2942,
+      "step": 584
+    },
+    {
+      "epoch": 0.5059094840011531,
+      "grad_norm": 10.425268173217773,
+      "learning_rate": 9.902032500394103e-05,
+      "loss": 1.4177,
+      "step": 585
+    },
+    {
+      "epoch": 0.5067742865379071,
+      "grad_norm": 6.989367961883545,
+      "learning_rate": 9.898857556074468e-05,
+      "loss": 0.5832,
+      "step": 586
+    },
+    {
+      "epoch": 0.5076390890746613,
+      "grad_norm": 6.156850814819336,
+      "learning_rate": 9.895632507290362e-05,
+      "loss": 0.4419,
+      "step": 587
+    },
+    {
+      "epoch": 0.5085038916114154,
+      "grad_norm": 6.66822624206543,
+      "learning_rate": 9.892357387026892e-05,
+      "loss": 0.5903,
+      "step": 588
+    },
+    {
+      "epoch": 0.5093686941481695,
+      "grad_norm": 8.24500560760498,
+      "learning_rate": 9.889032228781285e-05,
+      "loss": 0.44,
+      "step": 589
+    },
+    {
+      "epoch": 0.5102334966849236,
+      "grad_norm": 6.062635898590088,
+      "learning_rate": 9.88565706656255e-05,
+      "loss": 0.3002,
+      "step": 590
+    },
+    {
+      "epoch": 0.5110982992216777,
+      "grad_norm": 8.822070121765137,
+      "learning_rate": 9.882231934891119e-05,
+      "loss": 0.6883,
+      "step": 591
+    },
+    {
+      "epoch": 0.5119631017584318,
+      "grad_norm": 6.581031322479248,
+      "learning_rate": 9.878756868798504e-05,
+      "loss": 0.7068,
+      "step": 592
+    },
+    {
+      "epoch": 0.512827904295186,
+      "grad_norm": 6.801186561584473,
+      "learning_rate": 9.875231903826936e-05,
+      "loss": 0.5245,
+      "step": 593
+    },
+    {
+      "epoch": 0.51369270683194,
+      "grad_norm": 8.146296501159668,
+      "learning_rate": 9.871657076029003e-05,
+      "loss": 0.7089,
+      "step": 594
+    },
+    {
+      "epoch": 0.5145575093686942,
+      "grad_norm": 12.6628999710083,
+      "learning_rate": 9.868032421967275e-05,
+      "loss": 1.8026,
+      "step": 595
+    },
+    {
+      "epoch": 0.5154223119054483,
+      "grad_norm": 3.0164332389831543,
+      "learning_rate": 9.864357978713936e-05,
+      "loss": 0.2736,
+      "step": 596
+    },
+    {
+      "epoch": 0.5162871144422023,
+      "grad_norm": 3.916259527206421,
+      "learning_rate": 9.860633783850406e-05,
+      "loss": 0.3196,
+      "step": 597
+    },
+    {
+      "epoch": 0.5171519169789565,
+      "grad_norm": 8.493870735168457,
+      "learning_rate": 9.856859875466948e-05,
+      "loss": 0.7005,
+      "step": 598
+    },
+    {
+      "epoch": 0.5180167195157106,
+      "grad_norm": 8.802308082580566,
+      "learning_rate": 9.853036292162291e-05,
+      "loss": 0.4239,
+      "step": 599
+    },
+    {
+      "epoch": 0.5188815220524647,
+      "grad_norm": 10.11483383178711,
+      "learning_rate": 9.849163073043223e-05,
+      "loss": 0.5686,
+      "step": 600
+    },
+    {
+      "epoch": 0.5197463245892188,
+      "grad_norm": 7.787915229797363,
+      "learning_rate": 9.845240257724198e-05,
+      "loss": 0.6015,
+      "step": 601
+    },
+    {
+      "epoch": 0.5206111271259729,
+      "grad_norm": 3.49916410446167,
+      "learning_rate": 9.841267886326932e-05,
+      "loss": 0.1611,
+      "step": 602
+    },
+    {
+      "epoch": 0.521475929662727,
+      "grad_norm": 8.411331176757812,
+      "learning_rate": 9.837245999479985e-05,
+      "loss": 0.6458,
+      "step": 603
+    },
+    {
+      "epoch": 0.5223407321994811,
+      "grad_norm": 7.405316352844238,
+      "learning_rate": 9.833174638318356e-05,
+      "loss": 0.7173,
+      "step": 604
+    },
+    {
+      "epoch": 0.5232055347362352,
+      "grad_norm": 8.42251968383789,
+      "learning_rate": 9.829053844483052e-05,
+      "loss": 0.8808,
+      "step": 605
+    },
+    {
+      "epoch": 0.5240703372729894,
+      "grad_norm": 6.8583269119262695,
+      "learning_rate": 9.824883660120667e-05,
+      "loss": 0.625,
+      "step": 606
+    },
+    {
+      "epoch": 0.5249351398097435,
+      "grad_norm": 6.834749698638916,
+      "learning_rate": 9.820664127882957e-05,
+      "loss": 0.4378,
+      "step": 607
+    },
+    {
+      "epoch": 0.5257999423464975,
+      "grad_norm": 5.739812850952148,
+      "learning_rate": 9.81639529092639e-05,
+      "loss": 0.7798,
+      "step": 608
+    },
+    {
+      "epoch": 0.5266647448832517,
+      "grad_norm": 7.9455084800720215,
+      "learning_rate": 9.812077192911713e-05,
+      "loss": 0.6586,
+      "step": 609
+    },
+    {
+      "epoch": 0.5275295474200058,
+      "grad_norm": 7.959743499755859,
+      "learning_rate": 9.80770987800351e-05,
+      "loss": 0.8475,
+      "step": 610
+    },
+    {
+      "epoch": 0.5283943499567598,
+      "grad_norm": 5.485658168792725,
+      "learning_rate": 9.803293390869739e-05,
+      "loss": 0.4095,
+      "step": 611
+    },
+    {
+      "epoch": 0.529259152493514,
+      "grad_norm": 7.284278392791748,
+      "learning_rate": 9.798827776681286e-05,
+      "loss": 0.4946,
+      "step": 612
+    },
+    {
+      "epoch": 0.5301239550302681,
+      "grad_norm": 8.508416175842285,
+      "learning_rate": 9.79431308111149e-05,
+      "loss": 0.3962,
+      "step": 613
+    },
+    {
+      "epoch": 0.5309887575670222,
+      "grad_norm": 5.56104850769043,
+      "learning_rate": 9.789749350335693e-05,
+      "loss": 0.7191,
+      "step": 614
+    },
+    {
+      "epoch": 0.5318535601037763,
+      "grad_norm": 11.444177627563477,
+      "learning_rate": 9.785136631030755e-05,
+      "loss": 0.6589,
+      "step": 615
+    },
+    {
+      "epoch": 0.5327183626405304,
+      "grad_norm": 8.934037208557129,
+      "learning_rate": 9.780474970374578e-05,
+      "loss": 0.5603,
+      "step": 616
+    },
+    {
+      "epoch": 0.5335831651772845,
+      "grad_norm": 12.182479858398438,
+      "learning_rate": 9.775764416045628e-05,
+      "loss": 1.3667,
+      "step": 617
+    },
+    {
+      "epoch": 0.5344479677140386,
+      "grad_norm": 6.506429195404053,
+      "learning_rate": 9.771005016222446e-05,
+      "loss": 0.5623,
+      "step": 618
+    },
+    {
+      "epoch": 0.5353127702507927,
+      "grad_norm": 8.439187049865723,
+      "learning_rate": 9.766196819583149e-05,
+      "loss": 0.6174,
+      "step": 619
+    },
+    {
+      "epoch": 0.5361775727875469,
+      "grad_norm": 9.493589401245117,
+      "learning_rate": 9.761339875304945e-05,
+      "loss": 0.6462,
+      "step": 620
+    },
+    {
+      "epoch": 0.537042375324301,
+      "grad_norm": 2.347870111465454,
+      "learning_rate": 9.756434233063616e-05,
+      "loss": 0.1693,
+      "step": 621
+    },
+    {
+      "epoch": 0.537907177861055,
+      "grad_norm": 8.565069198608398,
+      "learning_rate": 9.751479943033019e-05,
+      "loss": 0.4887,
+      "step": 622
+    },
+    {
+      "epoch": 0.5387719803978092,
+      "grad_norm": 8.762991905212402,
+      "learning_rate": 9.746477055884571e-05,
+      "loss": 0.9039,
+      "step": 623
+    },
+    {
+      "epoch": 0.5396367829345633,
+      "grad_norm": 5.132269382476807,
+      "learning_rate": 9.741425622786728e-05,
+      "loss": 0.3159,
+      "step": 624
+    },
+    {
+      "epoch": 0.5405015854713174,
+      "grad_norm": 6.715843677520752,
+      "learning_rate": 9.736325695404464e-05,
+      "loss": 0.6409,
+      "step": 625
+    },
+    {
+      "epoch": 0.5413663880080715,
+      "grad_norm": 2.351118803024292,
+      "learning_rate": 9.731177325898746e-05,
+      "loss": 0.1413,
+      "step": 626
+    },
+    {
+      "epoch": 0.5422311905448256,
+      "grad_norm": 5.473691940307617,
+      "learning_rate": 9.725980566925989e-05,
+      "loss": 0.3963,
+      "step": 627
+    },
+    {
+      "epoch": 0.5430959930815797,
+      "grad_norm": 6.525996685028076,
+      "learning_rate": 9.72073547163753e-05,
+      "loss": 0.4283,
+      "step": 628
+    },
+    {
+      "epoch": 0.5439607956183338,
+      "grad_norm": 9.671774864196777,
+      "learning_rate": 9.71544209367908e-05,
+      "loss": 0.8147,
+      "step": 629
+    },
+    {
+      "epoch": 0.5448255981550879,
+      "grad_norm": 7.720305919647217,
+      "learning_rate": 9.710100487190173e-05,
+      "loss": 0.7238,
+      "step": 630
+    },
+    {
+      "epoch": 0.545690400691842,
+      "grad_norm": 6.962470531463623,
+      "learning_rate": 9.704710706803613e-05,
+      "loss": 0.3583,
+      "step": 631
+    },
+    {
+      "epoch": 0.5465552032285962,
+      "grad_norm": 7.1871819496154785,
+      "learning_rate": 9.699272807644921e-05,
+      "loss": 0.5934,
+      "step": 632
+    },
+    {
+      "epoch": 0.5474200057653502,
+      "grad_norm": 8.43585205078125,
+      "learning_rate": 9.693786845331761e-05,
+      "loss": 0.3339,
+      "step": 633
+    },
+    {
+      "epoch": 0.5482848083021044,
+      "grad_norm": 8.839116096496582,
+      "learning_rate": 9.68825287597338e-05,
+      "loss": 0.5551,
+      "step": 634
+    },
+    {
+      "epoch": 0.5491496108388585,
+      "grad_norm": 7.399514675140381,
+      "learning_rate": 9.68267095617003e-05,
+      "loss": 0.7277,
+      "step": 635
+    },
+    {
+      "epoch": 0.5500144133756125,
+      "grad_norm": 3.7421650886535645,
+      "learning_rate": 9.677041143012391e-05,
+      "loss": 0.3276,
+      "step": 636
+    },
+    {
+      "epoch": 0.5508792159123667,
+      "grad_norm": 6.863941669464111,
+      "learning_rate": 9.67136349408098e-05,
+      "loss": 0.3983,
+      "step": 637
+    },
+    {
+      "epoch": 0.5517440184491208,
+      "grad_norm": 8.192028999328613,
+      "learning_rate": 9.665638067445577e-05,
+      "loss": 0.5536,
+      "step": 638
+    },
+    {
+      "epoch": 0.5526088209858749,
+      "grad_norm": 6.802035331726074,
+      "learning_rate": 9.659864921664617e-05,
+      "loss": 0.4256,
+      "step": 639
+    },
+    {
+      "epoch": 0.553473623522629,
+      "grad_norm": 8.902397155761719,
+      "learning_rate": 9.654044115784594e-05,
+      "loss": 0.6132,
+      "step": 640
+    },
+    {
+      "epoch": 0.5543384260593831,
+      "grad_norm": 3.023282289505005,
+      "learning_rate": 9.648175709339465e-05,
+      "loss": 0.1601,
+      "step": 641
+    },
+    {
+      "epoch": 0.5552032285961372,
+      "grad_norm": 6.913763523101807,
+      "learning_rate": 9.642259762350032e-05,
+      "loss": 0.8637,
+      "step": 642
+    },
+    {
+      "epoch": 0.5560680311328913,
+      "grad_norm": 5.186830043792725,
+      "learning_rate": 9.636296335323334e-05,
+      "loss": 0.2678,
+      "step": 643
+    },
+    {
+      "epoch": 0.5569328336696454,
+      "grad_norm": 8.123047828674316,
+      "learning_rate": 9.63028548925202e-05,
+      "loss": 0.4715,
+      "step": 644
+    },
+    {
+      "epoch": 0.5577976362063995,
+      "grad_norm": 8.248505592346191,
+      "learning_rate": 9.624227285613736e-05,
+      "loss": 0.4066,
+      "step": 645
+    },
+    {
+      "epoch": 0.5586624387431537,
+      "grad_norm": 7.174196243286133,
+      "learning_rate": 9.618121786370491e-05,
+      "loss": 0.2985,
+      "step": 646
+    },
+    {
+      "epoch": 0.5595272412799077,
+      "grad_norm": 9.055746078491211,
+      "learning_rate": 9.61196905396802e-05,
+      "loss": 0.4818,
+      "step": 647
+    },
+    {
+      "epoch": 0.5603920438166619,
+      "grad_norm": 5.331139087677002,
+      "learning_rate": 9.605769151335151e-05,
+      "loss": 0.3297,
+      "step": 648
+    },
+    {
+      "epoch": 0.561256846353416,
+      "grad_norm": 4.492726802825928,
+      "learning_rate": 9.59952214188316e-05,
+      "loss": 0.2309,
+      "step": 649
+    },
+    {
+      "epoch": 0.56212164889017,
+      "grad_norm": 7.451852798461914,
+      "learning_rate": 9.593228089505117e-05,
+      "loss": 0.3733,
+      "step": 650
+    },
+    {
+      "epoch": 0.5629864514269242,
+      "grad_norm": 9.455964088439941,
+      "learning_rate": 9.586887058575243e-05,
+      "loss": 0.471,
+      "step": 651
+    },
+    {
+      "epoch": 0.5638512539636783,
+      "grad_norm": 4.70458984375,
+      "learning_rate": 9.58049911394824e-05,
+      "loss": 0.1841,
+      "step": 652
+    },
+    {
+      "epoch": 0.5647160565004324,
+      "grad_norm": 3.027376413345337,
+      "learning_rate": 9.574064320958637e-05,
+      "loss": 0.1042,
+      "step": 653
+    },
+    {
+      "epoch": 0.5655808590371865,
+      "grad_norm": 13.047475814819336,
+      "learning_rate": 9.567582745420117e-05,
+      "loss": 1.7486,
+      "step": 654
+    },
+    {
+      "epoch": 0.5664456615739406,
+      "grad_norm": 5.038949489593506,
+      "learning_rate": 9.561054453624842e-05,
+      "loss": 0.7092,
+      "step": 655
+    },
+    {
+      "epoch": 0.5673104641106947,
+      "grad_norm": 6.817296981811523,
+      "learning_rate": 9.554479512342784e-05,
+      "loss": 0.4515,
+      "step": 656
+    },
+    {
+      "epoch": 0.5681752666474489,
+      "grad_norm": 6.715672969818115,
+      "learning_rate": 9.54785798882103e-05,
+      "loss": 0.5267,
+      "step": 657
+    },
+    {
+      "epoch": 0.5690400691842029,
+      "grad_norm": 12.338273048400879,
+      "learning_rate": 9.541189950783104e-05,
+      "loss": 0.8779,
+      "step": 658
+    },
+    {
+      "epoch": 0.569904871720957,
+      "grad_norm": 6.969177722930908,
+      "learning_rate": 9.534475466428267e-05,
+      "loss": 0.3105,
+      "step": 659
+    },
+    {
+      "epoch": 0.5707696742577112,
+      "grad_norm": 4.153381824493408,
+      "learning_rate": 9.527714604430827e-05,
+      "loss": 0.2972,
+      "step": 660
+    },
+    {
+      "epoch": 0.5716344767944652,
+      "grad_norm": 9.585479736328125,
+      "learning_rate": 9.52090743393943e-05,
+      "loss": 0.7349,
+      "step": 661
+    },
+    {
+      "epoch": 0.5724992793312194,
+      "grad_norm": 8.285649299621582,
+      "learning_rate": 9.514054024576356e-05,
+      "loss": 0.3054,
+      "step": 662
+    },
+    {
+      "epoch": 0.5733640818679735,
+      "grad_norm": 8.23316764831543,
+      "learning_rate": 9.507154446436805e-05,
+      "loss": 0.3722,
+      "step": 663
+    },
+    {
+      "epoch": 0.5742288844047276,
+      "grad_norm": 3.4087507724761963,
+      "learning_rate": 9.500208770088183e-05,
+      "loss": 0.3515,
+      "step": 664
+    },
+    {
+      "epoch": 0.5750936869414817,
+      "grad_norm": 11.583375930786133,
+      "learning_rate": 9.49321706656938e-05,
+      "loss": 1.0321,
+      "step": 665
+    },
+    {
+      "epoch": 0.5759584894782358,
+      "grad_norm": 9.680198669433594,
+      "learning_rate": 9.48617940739004e-05,
+      "loss": 0.6996,
+      "step": 666
+    },
+    {
+      "epoch": 0.5768232920149899,
+      "grad_norm": 5.860654354095459,
+      "learning_rate": 9.479095864529828e-05,
+      "loss": 0.584,
+      "step": 667
+    },
+    {
+      "epoch": 0.577688094551744,
+      "grad_norm": 8.714286804199219,
+      "learning_rate": 9.471966510437704e-05,
+      "loss": 0.8377,
+      "step": 668
+    },
+    {
+      "epoch": 0.5785528970884981,
+      "grad_norm": 5.863884925842285,
+      "learning_rate": 9.464791418031172e-05,
+      "loss": 0.3194,
+      "step": 669
+    },
+    {
+      "epoch": 0.5794176996252522,
+      "grad_norm": 3.8105716705322266,
+      "learning_rate": 9.457570660695541e-05,
+      "loss": 0.2197,
+      "step": 670
+    },
+    {
+      "epoch": 0.5802825021620064,
+      "grad_norm": 7.818668842315674,
+      "learning_rate": 9.450304312283164e-05,
+      "loss": 0.5296,
+      "step": 671
+    },
+    {
+      "epoch": 0.5811473046987604,
+      "grad_norm": 3.5748655796051025,
+      "learning_rate": 9.442992447112697e-05,
+      "loss": 0.2199,
+      "step": 672
+    },
+    {
+      "epoch": 0.5820121072355146,
+      "grad_norm": 9.74962043762207,
+      "learning_rate": 9.435635139968328e-05,
+      "loss": 0.7576,
+      "step": 673
+    },
+    {
+      "epoch": 0.5828769097722687,
+      "grad_norm": 5.957652568817139,
+      "learning_rate": 9.428232466099018e-05,
+      "loss": 0.4388,
+      "step": 674
+    },
+    {
+      "epoch": 0.5837417123090227,
+      "grad_norm": 1.4129705429077148,
+      "learning_rate": 9.420784501217726e-05,
+      "loss": 0.0997,
+      "step": 675
+    },
+    {
+      "epoch": 0.5846065148457769,
+      "grad_norm": 6.296298503875732,
+      "learning_rate": 9.41329132150064e-05,
+      "loss": 0.4806,
+      "step": 676
+    },
+    {
+      "epoch": 0.585471317382531,
+      "grad_norm": 8.789826393127441,
+      "learning_rate": 9.405753003586395e-05,
+      "loss": 0.7328,
+      "step": 677
+    },
+    {
+      "epoch": 0.5863361199192851,
+      "grad_norm": 9.228763580322266,
+      "learning_rate": 9.39816962457529e-05,
+      "loss": 0.4772,
+      "step": 678
+    },
+    {
+      "epoch": 0.5872009224560392,
+      "grad_norm": 5.72409725189209,
+      "learning_rate": 9.3905412620285e-05,
+      "loss": 0.3285,
+      "step": 679
+    },
+    {
+      "epoch": 0.5880657249927933,
+      "grad_norm": 10.633530616760254,
+      "learning_rate": 9.382867993967281e-05,
+      "loss": 0.9213,
+      "step": 680
+    },
+    {
+      "epoch": 0.5889305275295474,
+      "grad_norm": 10.06709098815918,
+      "learning_rate": 9.375149898872172e-05,
+      "loss": 0.5335,
+      "step": 681
+    },
+    {
+      "epoch": 0.5897953300663016,
+      "grad_norm": 5.641694068908691,
+      "learning_rate": 9.367387055682197e-05,
+      "loss": 0.3178,
+      "step": 682
+    },
+    {
+      "epoch": 0.5906601326030556,
+      "grad_norm": 8.637955665588379,
+      "learning_rate": 9.359579543794048e-05,
+      "loss": 0.7194,
+      "step": 683
+    },
+    {
+      "epoch": 0.5915249351398097,
+      "grad_norm": 5.672209739685059,
+      "learning_rate": 9.351727443061283e-05,
+      "loss": 0.5559,
+      "step": 684
+    },
+    {
+      "epoch": 0.5923897376765639,
+      "grad_norm": 6.293837547302246,
+      "learning_rate": 9.343830833793505e-05,
+      "loss": 0.489,
+      "step": 685
+    },
+    {
+      "epoch": 0.5932545402133179,
+      "grad_norm": 5.788215160369873,
+      "learning_rate": 9.335889796755541e-05,
+      "loss": 0.2563,
+      "step": 686
+    },
+    {
+      "epoch": 0.594119342750072,
+      "grad_norm": 8.539923667907715,
+      "learning_rate": 9.327904413166615e-05,
+      "loss": 0.8217,
+      "step": 687
+    },
+    {
+      "epoch": 0.5949841452868262,
+      "grad_norm": 4.539181709289551,
+      "learning_rate": 9.319874764699515e-05,
+      "loss": 0.371,
+      "step": 688
+    },
+    {
+      "epoch": 0.5958489478235803,
+      "grad_norm": 4.926830291748047,
+      "learning_rate": 9.311800933479764e-05,
+      "loss": 0.3217,
+      "step": 689
+    },
+    {
+      "epoch": 0.5967137503603344,
+      "grad_norm": 8.856836318969727,
+      "learning_rate": 9.30368300208478e-05,
+      "loss": 0.5505,
+      "step": 690
+    },
+    {
+      "epoch": 0.5967137503603344,
+      "eval_Qnli-dev-1024_cosine_accuracy": 0.71875,
+      "eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.868374228477478,
+      "eval_Qnli-dev-1024_cosine_ap": 0.7082660274050915,
+      "eval_Qnli-dev-1024_cosine_f1": 0.6938775510204082,
+      "eval_Qnli-dev-1024_cosine_f1_threshold": 0.8440404534339905,
+      "eval_Qnli-dev-1024_cosine_mcc": 0.3843486566998693,
+      "eval_Qnli-dev-1024_cosine_precision": 0.6415094339622641,
+      "eval_Qnli-dev-1024_cosine_recall": 0.7555555555555555,
+      "eval_Qnli-dev_cosine_accuracy": 0.7291666666666666,
+      "eval_Qnli-dev_cosine_accuracy_threshold": 0.7942297458648682,
+      "eval_Qnli-dev_cosine_ap": 0.7468079642036429,
+      "eval_Qnli-dev_cosine_f1": 0.7222222222222222,
+      "eval_Qnli-dev_cosine_f1_threshold": 0.6964967250823975,
+      "eval_Qnli-dev_cosine_mcc": 0.41614558708189836,
+      "eval_Qnli-dev_cosine_precision": 0.6190476190476191,
+      "eval_Qnli-dev_cosine_recall": 0.8666666666666667,
+      "eval_allNLI--triplets-1024_cosine_accuracy": 0.9375,
+      "eval_allNLI-triplets_cosine_accuracy": 0.96875,
+      "eval_global_dataset_loss": 0.6163961887359619,
+      "eval_global_dataset_runtime": 68.0763,
+      "eval_global_dataset_samples_per_second": 14.278,
+      "eval_global_dataset_steps_per_second": 0.308,
+      "eval_sequential_score": 0.9375,
+      "eval_sts-test-1024_pearson_cosine": 0.8687143218667199,
+      "eval_sts-test-1024_spearman_cosine": 0.9060527968336128,
+      "eval_sts-test_pearson_cosine": 0.9081393663808583,
+      "eval_sts-test_spearman_cosine": 0.920736019932914,
+      "step": 690
+    },
+    {
+      "epoch": 0.5975785528970885,
+      "grad_norm": 7.396856784820557,
+      "learning_rate": 9.295521053543019e-05,
+      "loss": 0.5105,
+      "step": 691
+    },
+    {
+      "epoch": 0.5984433554338426,
+      "grad_norm": 8.075766563415527,
+      "learning_rate": 9.287315171333144e-05,
+      "loss": 0.5877,
+      "step": 692
+    },
+    {
+      "epoch": 0.5993081579705967,
+      "grad_norm": 8.688477516174316,
+      "learning_rate": 9.279065439383157e-05,
+      "loss": 0.7346,
+      "step": 693
+    },
+    {
+      "epoch": 0.6001729605073508,
+      "grad_norm": 5.448639869689941,
+      "learning_rate": 9.27077194206955e-05,
+      "loss": 0.3644,
+      "step": 694
+    },
+    {
+      "epoch": 0.601037763044105,
+      "grad_norm": 5.420974254608154,
+      "learning_rate": 9.262434764216428e-05,
+      "loss": 0.2205,
+      "step": 695
+    },
+    {
+      "epoch": 0.6019025655808591,
+      "grad_norm": 6.542895793914795,
+      "learning_rate": 9.254053991094666e-05,
+      "loss": 0.2832,
+      "step": 696
+    },
+    {
+      "epoch": 0.6027673681176131,
+      "grad_norm": 3.4809961318969727,
+      "learning_rate": 9.245629708421008e-05,
+      "loss": 0.145,
+      "step": 697
+    },
+    {
+      "epoch": 0.6036321706543673,
+      "grad_norm": 10.80398941040039,
+      "learning_rate": 9.237162002357214e-05,
+      "loss": 0.8998,
+      "step": 698
+    },
+    {
+      "epoch": 0.6044969731911214,
+      "grad_norm": 2.6544158458709717,
+      "learning_rate": 9.228650959509166e-05,
+      "loss": 0.1194,
+      "step": 699
+    },
+    {
+      "epoch": 0.6053617757278754,
+      "grad_norm": 4.051424980163574,
+      "learning_rate": 9.220096666925982e-05,
+      "loss": 0.1845,
+      "step": 700
+    },
+    {
+      "epoch": 0.6062265782646296,
+      "grad_norm": 10.206416130065918,
+      "learning_rate": 9.211499212099135e-05,
+      "loss": 0.6004,
+      "step": 701
+    },
+    {
+      "epoch": 0.6070913808013837,
+      "grad_norm": 11.007821083068848,
+      "learning_rate": 9.202858682961545e-05,
+      "loss": 0.5262,
+      "step": 702
+    },
+    {
+      "epoch": 0.6079561833381378,
+      "grad_norm": 9.616263389587402,
+      "learning_rate": 9.194175167886698e-05,
+      "loss": 1.3073,
+      "step": 703
+    },
+    {
+      "epoch": 0.6088209858748919,
+      "grad_norm": 7.583075523376465,
+      "learning_rate": 9.185448755687717e-05,
+      "loss": 0.4977,
+      "step": 704
+    },
+    {
+      "epoch": 0.609685788411646,
+      "grad_norm": 3.0454254150390625,
+      "learning_rate": 9.176679535616477e-05,
+      "loss": 0.1434,
+      "step": 705
+    },
+    {
+      "epoch": 0.6105505909484001,
+      "grad_norm": 5.383974075317383,
+      "learning_rate": 9.167867597362682e-05,
+      "loss": 0.1923,
+      "step": 706
+    },
+    {
+      "epoch": 0.6114153934851542,
+      "grad_norm": 10.157812118530273,
+      "learning_rate": 9.159013031052943e-05,
+      "loss": 0.5597,
+      "step": 707
+    },
+    {
+      "epoch": 0.6122801960219083,
+      "grad_norm": 12.371292114257812,
+      "learning_rate": 9.150115927249869e-05,
+      "loss": 0.8295,
+      "step": 708
+    },
+    {
+      "epoch": 0.6131449985586624,
+      "grad_norm": 9.299467086791992,
+      "learning_rate": 9.141176376951128e-05,
+      "loss": 0.5907,
+      "step": 709
+    },
+    {
+      "epoch": 0.6140098010954166,
+      "grad_norm": 7.16170597076416,
+      "learning_rate": 9.132194471588522e-05,
+      "loss": 0.5436,
+      "step": 710
+    },
+    {
+      "epoch": 0.6148746036321706,
+      "grad_norm": 6.266456127166748,
+      "learning_rate": 9.123170303027055e-05,
+      "loss": 0.309,
+      "step": 711
+    },
+    {
+      "epoch": 0.6157394061689248,
+      "grad_norm": 10.73092269897461,
+      "learning_rate": 9.114103963563985e-05,
+      "loss": 0.7257,
+      "step": 712
+    },
+    {
+      "epoch": 0.6166042087056789,
+      "grad_norm": 8.290569305419922,
+      "learning_rate": 9.104995545927893e-05,
+      "loss": 0.6665,
+      "step": 713
+    },
+    {
+      "epoch": 0.617469011242433,
+      "grad_norm": 6.256021499633789,
+      "learning_rate": 9.095845143277714e-05,
+      "loss": 0.281,
+      "step": 714
+    },
+    {
+      "epoch": 0.6183338137791871,
+      "grad_norm": 3.134965419769287,
+      "learning_rate": 9.086652849201807e-05,
+      "loss": 0.2241,
+      "step": 715
+    },
+    {
+      "epoch": 0.6191986163159412,
+      "grad_norm": 8.62253475189209,
+      "learning_rate": 9.077418757716988e-05,
+      "loss": 0.7341,
+      "step": 716
+    },
+    {
+      "epoch": 0.6200634188526953,
+      "grad_norm": 8.627028465270996,
+      "learning_rate": 9.06814296326756e-05,
+      "loss": 0.3946,
+      "step": 717
+    },
+    {
+      "epoch": 0.6209282213894494,
+      "grad_norm": 5.673067092895508,
+      "learning_rate": 9.05882556072436e-05,
+      "loss": 0.4353,
+      "step": 718
+    },
+    {
+      "epoch": 0.6217930239262035,
+      "grad_norm": 5.314984321594238,
+      "learning_rate": 9.049466645383784e-05,
+      "loss": 0.3919,
+      "step": 719
+    },
+    {
+      "epoch": 0.6226578264629576,
+      "grad_norm": 8.689918518066406,
+      "learning_rate": 9.040066312966811e-05,
+      "loss": 0.5087,
+      "step": 720
+    },
+    {
+      "epoch": 0.6235226289997118,
+      "grad_norm": 5.046836853027344,
+      "learning_rate": 9.030624659618023e-05,
+      "loss": 0.2345,
+      "step": 721
+    },
+    {
+      "epoch": 0.6243874315364658,
+      "grad_norm": 12.160417556762695,
+      "learning_rate": 9.021141781904627e-05,
+      "loss": 0.8855,
+      "step": 722
+    },
+    {
+      "epoch": 0.62525223407322,
+      "grad_norm": 9.182302474975586,
+      "learning_rate": 9.011617776815464e-05,
+      "loss": 0.7187,
+      "step": 723
+    },
+    {
+      "epoch": 0.6261170366099741,
+      "grad_norm": 6.717326641082764,
+      "learning_rate": 9.002052741760015e-05,
+      "loss": 0.5225,
+      "step": 724
+    },
+    {
+      "epoch": 0.6269818391467281,
+      "grad_norm": 11.271307945251465,
+      "learning_rate": 8.992446774567405e-05,
+      "loss": 0.9725,
+      "step": 725
+    },
+    {
+      "epoch": 0.6278466416834823,
+      "grad_norm": 12.319371223449707,
+      "learning_rate": 8.982799973485407e-05,
+      "loss": 0.6209,
+      "step": 726
+    },
+    {
+      "epoch": 0.6287114442202364,
+      "grad_norm": 7.424941062927246,
+      "learning_rate": 8.973112437179436e-05,
+      "loss": 0.478,
+      "step": 727
+    },
+    {
+      "epoch": 0.6295762467569905,
+      "grad_norm": 6.208258628845215,
+      "learning_rate": 8.963384264731533e-05,
+      "loss": 0.2833,
+      "step": 728
+    },
+    {
+      "epoch": 0.6304410492937446,
+      "grad_norm": 4.718559265136719,
+      "learning_rate": 8.95361555563936e-05,
+      "loss": 0.2356,
+      "step": 729
+    },
+    {
+      "epoch": 0.6313058518304987,
+      "grad_norm": 9.238673210144043,
+      "learning_rate": 8.943806409815181e-05,
+      "loss": 0.6937,
+      "step": 730
+    },
+    {
+      "epoch": 0.6321706543672528,
+      "grad_norm": 11.935426712036133,
+      "learning_rate": 8.933956927584832e-05,
+      "loss": 0.8793,
+      "step": 731
+    },
+    {
+      "epoch": 0.6330354569040069,
+      "grad_norm": 8.183321952819824,
+      "learning_rate": 8.924067209686709e-05,
+      "loss": 0.6845,
+      "step": 732
+    },
+    {
+      "epoch": 0.633900259440761,
+      "grad_norm": 4.494237422943115,
+      "learning_rate": 8.914137357270723e-05,
+      "loss": 0.2744,
+      "step": 733
+    },
+    {
+      "epoch": 0.6347650619775151,
+      "grad_norm": 10.111383438110352,
+      "learning_rate": 8.904167471897274e-05,
+      "loss": 0.8681,
+      "step": 734
+    },
+    {
+      "epoch": 0.6356298645142693,
+      "grad_norm": 10.407071113586426,
+      "learning_rate": 8.894157655536216e-05,
+      "loss": 1.0385,
+      "step": 735
+    },
+    {
+      "epoch": 0.6364946670510233,
+      "grad_norm": 6.472255706787109,
+      "learning_rate": 8.884108010565797e-05,
+      "loss": 0.2331,
+      "step": 736
+    },
+    {
+      "epoch": 0.6373594695877774,
+      "grad_norm": 4.348916530609131,
+      "learning_rate": 8.874018639771637e-05,
+      "loss": 0.3183,
+      "step": 737
+    },
+    {
+      "epoch": 0.6382242721245316,
+      "grad_norm": 3.087089776992798,
+      "learning_rate": 8.863889646345653e-05,
+      "loss": 0.1691,
+      "step": 738
+    },
+    {
+      "epoch": 0.6390890746612857,
+      "grad_norm": 5.743144512176514,
+      "learning_rate": 8.85372113388502e-05,
+      "loss": 0.4625,
+      "step": 739
+    },
+    {
+      "epoch": 0.6399538771980398,
+      "grad_norm": 4.561880111694336,
+      "learning_rate": 8.843513206391101e-05,
+      "loss": 0.2338,
+      "step": 740
+    },
+    {
+      "epoch": 0.6408186797347939,
+      "grad_norm": 10.266475677490234,
+      "learning_rate": 8.83326596826839e-05,
+      "loss": 1.1701,
+      "step": 741
+    },
+    {
+      "epoch": 0.641683482271548,
+      "grad_norm": 8.521928787231445,
+      "learning_rate": 8.822979524323441e-05,
+      "loss": 0.7673,
+      "step": 742
+    },
+    {
+      "epoch": 0.6425482848083021,
+      "grad_norm": 8.54457950592041,
+      "learning_rate": 8.812653979763795e-05,
+      "loss": 0.5481,
+      "step": 743
+    },
+    {
+      "epoch": 0.6434130873450562,
+      "grad_norm": 5.748913288116455,
+      "learning_rate": 8.802289440196908e-05,
+      "loss": 0.3357,
+      "step": 744
+    },
+    {
+      "epoch": 0.6442778898818103,
+      "grad_norm": 4.804452896118164,
+      "learning_rate": 8.791886011629068e-05,
+      "loss": 0.263,
+      "step": 745
+    },
+    {
+      "epoch": 0.6451426924185645,
+      "grad_norm": 3.707672119140625,
+      "learning_rate": 8.781443800464316e-05,
+      "loss": 0.1461,
+      "step": 746
+    },
+    {
+      "epoch": 0.6460074949553185,
+      "grad_norm": 7.357616901397705,
+      "learning_rate": 8.77096291350334e-05,
+      "loss": 0.3193,
+      "step": 747
+    },
+    {
+      "epoch": 0.6468722974920726,
+      "grad_norm": 4.722273349761963,
+      "learning_rate": 8.760443457942408e-05,
+      "loss": 0.2647,
+      "step": 748
+    },
+    {
+      "epoch": 0.6477371000288268,
+      "grad_norm": 5.43215799331665,
+      "learning_rate": 8.749885541372257e-05,
+      "loss": 0.2494,
+      "step": 749
+    },
+    {
+      "epoch": 0.6486019025655808,
+      "grad_norm": 4.395086765289307,
+      "learning_rate": 8.739289271776991e-05,
+      "loss": 0.1905,
+      "step": 750
+    },
+    {
+      "epoch": 0.649466705102335,
+      "grad_norm": 6.617416858673096,
+      "learning_rate": 8.728654757532984e-05,
+      "loss": 0.6302,
+      "step": 751
+    },
+    {
+      "epoch": 0.6503315076390891,
+      "grad_norm": 3.7228050231933594,
+      "learning_rate": 8.717982107407768e-05,
+      "loss": 0.3397,
+      "step": 752
+    },
+    {
+      "epoch": 0.6511963101758432,
+      "grad_norm": 9.654953002929688,
+      "learning_rate": 8.707271430558919e-05,
+      "loss": 0.6679,
+      "step": 753
+    },
+    {
+      "epoch": 0.6520611127125973,
+      "grad_norm": 4.019669532775879,
+      "learning_rate": 8.69652283653294e-05,
+      "loss": 0.3372,
+      "step": 754
+    },
+    {
+      "epoch": 0.6529259152493514,
+      "grad_norm": 7.510921478271484,
+      "learning_rate": 8.68573643526415e-05,
+      "loss": 0.6676,
+      "step": 755
+    },
+    {
+      "epoch": 0.6537907177861055,
+      "grad_norm": 13.126535415649414,
+      "learning_rate": 8.674912337073544e-05,
+      "loss": 1.2867,
+      "step": 756
+    },
+    {
+      "epoch": 0.6546555203228596,
+      "grad_norm": 9.412704467773438,
+      "learning_rate": 8.66405065266768e-05,
+      "loss": 0.8248,
+      "step": 757
+    },
+    {
+      "epoch": 0.6555203228596137,
+      "grad_norm": 6.785587787628174,
+      "learning_rate": 8.653151493137536e-05,
+      "loss": 0.4971,
+      "step": 758
+    },
+    {
+      "epoch": 0.6563851253963678,
+      "grad_norm": 12.77095890045166,
+      "learning_rate": 8.642214969957376e-05,
+      "loss": 1.4049,
+      "step": 759
+    },
+    {
+      "epoch": 0.657249927933122,
+      "grad_norm": 6.501046180725098,
+      "learning_rate": 8.631241194983616e-05,
+      "loss": 0.3086,
+      "step": 760
+    },
+    {
+      "epoch": 0.658114730469876,
+      "grad_norm": 6.871536731719971,
+      "learning_rate": 8.620230280453673e-05,
+      "loss": 0.6796,
+      "step": 761
+    },
+    {
+      "epoch": 0.6589795330066301,
+      "grad_norm": 6.746383190155029,
+      "learning_rate": 8.609182338984818e-05,
+      "loss": 0.4314,
+      "step": 762
+    },
+    {
+      "epoch": 0.6598443355433843,
+      "grad_norm": 4.454339504241943,
+      "learning_rate": 8.598097483573029e-05,
+      "loss": 0.2843,
+      "step": 763
+    },
+    {
+      "epoch": 0.6607091380801383,
+      "grad_norm": 5.15504789352417,
+      "learning_rate": 8.586975827591825e-05,
+      "loss": 0.4569,
+      "step": 764
+    },
+    {
+      "epoch": 0.6615739406168925,
+      "grad_norm": 6.545773506164551,
+      "learning_rate": 8.575817484791127e-05,
+      "loss": 0.3931,
+      "step": 765
+    },
+    {
+      "epoch": 0.6624387431536466,
+      "grad_norm": 4.9794511795043945,
+      "learning_rate": 8.564622569296063e-05,
+      "loss": 0.2155,
+      "step": 766
+    },
+    {
+      "epoch": 0.6633035456904007,
+      "grad_norm": 8.013479232788086,
+      "learning_rate": 8.553391195605833e-05,
+      "loss": 0.3245,
+      "step": 767
+    },
+    {
+      "epoch": 0.6641683482271548,
+      "grad_norm": 9.687097549438477,
+      "learning_rate": 8.542123478592518e-05,
+      "loss": 0.7824,
+      "step": 768
+    },
+    {
+      "epoch": 0.6650331507639089,
+      "grad_norm": 5.516420364379883,
+      "learning_rate": 8.530819533499909e-05,
+      "loss": 0.3537,
+      "step": 769
+    },
+    {
+      "epoch": 0.665897953300663,
+      "grad_norm": 6.398399353027344,
+      "learning_rate": 8.519479475942334e-05,
+      "loss": 0.2212,
+      "step": 770
+    },
+    {
+      "epoch": 0.6667627558374172,
+      "grad_norm": 6.814426898956299,
+      "learning_rate": 8.508103421903468e-05,
+      "loss": 0.5911,
+      "step": 771
+    },
+    {
+      "epoch": 0.6676275583741712,
+      "grad_norm": 6.5453410148620605,
+      "learning_rate": 8.496691487735156e-05,
+      "loss": 0.4524,
+      "step": 772
+    },
+    {
+      "epoch": 0.6684923609109253,
+      "grad_norm": 3.5740625858306885,
+      "learning_rate": 8.485243790156208e-05,
+      "loss": 0.2604,
+      "step": 773
+    },
+    {
+      "epoch": 0.6693571634476795,
+      "grad_norm": 12.454208374023438,
+      "learning_rate": 8.473760446251221e-05,
+      "loss": 0.8186,
+      "step": 774
+    },
+    {
+      "epoch": 0.6702219659844335,
+      "grad_norm": 5.322040557861328,
+      "learning_rate": 8.462241573469379e-05,
+      "loss": 0.4612,
+      "step": 775
+    },
+    {
+      "epoch": 0.6710867685211876,
+      "grad_norm": 7.373685359954834,
+      "learning_rate": 8.450687289623235e-05,
+      "loss": 0.5306,
+      "step": 776
+    },
+    {
+      "epoch": 0.6719515710579418,
+      "grad_norm": 11.016031265258789,
+      "learning_rate": 8.439097712887531e-05,
+      "loss": 1.0424,
+      "step": 777
+    },
+    {
+      "epoch": 0.6728163735946959,
+      "grad_norm": 8.017274856567383,
+      "learning_rate": 8.427472961797971e-05,
+      "loss": 0.473,
+      "step": 778
+    },
+    {
+      "epoch": 0.67368117613145,
+      "grad_norm": 5.788976669311523,
+      "learning_rate": 8.415813155250017e-05,
+      "loss": 0.2846,
+      "step": 779
+    },
+    {
+      "epoch": 0.6745459786682041,
+      "grad_norm": 4.2314558029174805,
+      "learning_rate": 8.404118412497666e-05,
+      "loss": 0.4083,
+      "step": 780
+    },
+    {
+      "epoch": 0.6754107812049582,
+      "grad_norm": 3.476349115371704,
+      "learning_rate": 8.392388853152245e-05,
+      "loss": 0.236,
+      "step": 781
+    },
+    {
+      "epoch": 0.6762755837417123,
+      "grad_norm": 10.38036823272705,
+      "learning_rate": 8.380624597181165e-05,
+      "loss": 0.6732,
+      "step": 782
+    },
+    {
+      "epoch": 0.6771403862784664,
+      "grad_norm": 7.326548099517822,
+      "learning_rate": 8.368825764906716e-05,
+      "loss": 0.6798,
+      "step": 783
+    },
+    {
+      "epoch": 0.6780051888152205,
+      "grad_norm": 8.5910062789917,
+      "learning_rate": 8.356992477004828e-05,
+      "loss": 0.75,
+      "step": 784
+    },
+    {
+      "epoch": 0.6788699913519747,
+      "grad_norm": 4.450828552246094,
+      "learning_rate": 8.345124854503825e-05,
+      "loss": 0.2198,
+      "step": 785
+    },
+    {
+      "epoch": 0.6797347938887287,
+      "grad_norm": 3.15915584564209,
+      "learning_rate": 8.33322301878321e-05,
+      "loss": 0.1629,
+      "step": 786
+    },
+    {
+      "epoch": 0.6805995964254828,
+      "grad_norm": 3.2538440227508545,
+      "learning_rate": 8.321287091572403e-05,
+      "loss": 0.1949,
+      "step": 787
+    },
+    {
+      "epoch": 0.681464398962237,
+      "grad_norm": 8.031615257263184,
+      "learning_rate": 8.309317194949509e-05,
+      "loss": 0.3901,
+      "step": 788
+    },
+    {
+      "epoch": 0.682329201498991,
+      "grad_norm": 2.7871859073638916,
+      "learning_rate": 8.297313451340064e-05,
+      "loss": 0.2184,
+      "step": 789
+    },
+    {
+      "epoch": 0.6831940040357452,
+      "grad_norm": 6.6741204261779785,
+      "learning_rate": 8.285275983515783e-05,
+      "loss": 0.3516,
+      "step": 790
+    },
+    {
+      "epoch": 0.6840588065724993,
+      "grad_norm": 9.924346923828125,
+      "learning_rate": 8.273204914593304e-05,
+      "loss": 0.9001,
+      "step": 791
+    },
+    {
+      "epoch": 0.6849236091092534,
+      "grad_norm": 2.0380783081054688,
+      "learning_rate": 8.261100368032934e-05,
+      "loss": 0.0729,
+      "step": 792
+    },
+    {
+      "epoch": 0.6857884116460075,
+      "grad_norm": 4.190455913543701,
+      "learning_rate": 8.248962467637378e-05,
+      "loss": 0.1484,
+      "step": 793
+    },
+    {
+      "epoch": 0.6866532141827616,
+      "grad_norm": 10.513288497924805,
+      "learning_rate": 8.236791337550478e-05,
+      "loss": 0.8013,
+      "step": 794
+    },
+    {
+      "epoch": 0.6875180167195157,
+      "grad_norm": 5.367727279663086,
+      "learning_rate": 8.22458710225594e-05,
+      "loss": 0.2315,
+      "step": 795
+    },
+    {
+      "epoch": 0.6883828192562699,
+      "grad_norm": 4.737613201141357,
+      "learning_rate": 8.21234988657607e-05,
+      "loss": 0.2135,
+      "step": 796
+    },
+    {
+      "epoch": 0.6892476217930239,
+      "grad_norm": 7.230178356170654,
+      "learning_rate": 8.20007981567048e-05,
+      "loss": 0.6123,
+      "step": 797
+    },
+    {
+      "epoch": 0.690112424329778,
+      "grad_norm": 5.188995361328125,
+      "learning_rate": 8.18777701503483e-05,
+      "loss": 0.2533,
+      "step": 798
+    },
+    {
+      "epoch": 0.6909772268665322,
+      "grad_norm": 9.257750511169434,
+      "learning_rate": 8.175441610499522e-05,
+      "loss": 0.6212,
+      "step": 799
+    },
+    {
+      "epoch": 0.6918420294032862,
+      "grad_norm": 1.5883065462112427,
+      "learning_rate": 8.163073728228427e-05,
+      "loss": 0.0883,
+      "step": 800
+    },
+    {
+      "epoch": 0.6927068319400403,
+      "grad_norm": 8.530162811279297,
+      "learning_rate": 8.150673494717597e-05,
+      "loss": 0.3946,
+      "step": 801
+    },
+    {
+      "epoch": 0.6935716344767945,
+      "grad_norm": 7.668551445007324,
+      "learning_rate": 8.138241036793958e-05,
+      "loss": 0.4277,
+      "step": 802
+    },
+    {
+      "epoch": 0.6944364370135486,
+      "grad_norm": 8.265761375427246,
+      "learning_rate": 8.125776481614024e-05,
+      "loss": 0.5575,
+      "step": 803
+    },
+    {
+      "epoch": 0.6953012395503027,
+      "grad_norm": 7.973784446716309,
+      "learning_rate": 8.113279956662594e-05,
+      "loss": 0.4164,
+      "step": 804
+    },
+    {
+      "epoch": 0.6961660420870568,
+      "grad_norm": 4.912955284118652,
+      "learning_rate": 8.100751589751442e-05,
+      "loss": 0.1826,
+      "step": 805
+    },
+    {
+      "epoch": 0.6961660420870568,
+      "eval_Qnli-dev-1024_cosine_accuracy": 0.71875,
+      "eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.8731638193130493,
+      "eval_Qnli-dev-1024_cosine_ap": 0.724535579920194,
+      "eval_Qnli-dev-1024_cosine_f1": 0.7037037037037037,
+      "eval_Qnli-dev-1024_cosine_f1_threshold": 0.781539261341095,
+      "eval_Qnli-dev-1024_cosine_mcc": 0.3721962181491566,
+      "eval_Qnli-dev-1024_cosine_precision": 0.6031746031746031,
+      "eval_Qnli-dev-1024_cosine_recall": 0.8444444444444444,
+      "eval_Qnli-dev_cosine_accuracy": 0.7291666666666666,
+      "eval_Qnli-dev_cosine_accuracy_threshold": 0.778366208076477,
+      "eval_Qnli-dev_cosine_ap": 0.7396468214578549,
+      "eval_Qnli-dev_cosine_f1": 0.7142857142857142,
+      "eval_Qnli-dev_cosine_f1_threshold": 0.7083452939987183,
+      "eval_Qnli-dev_cosine_mcc": 0.4263253018001963,
+      "eval_Qnli-dev_cosine_precision": 0.660377358490566,
+      "eval_Qnli-dev_cosine_recall": 0.7777777777777778,
+      "eval_allNLI--triplets-1024_cosine_accuracy": 0.9583333134651184,
+      "eval_allNLI-triplets_cosine_accuracy": 0.9791666865348816,
+      "eval_global_dataset_loss": 0.6504772901535034,
+      "eval_global_dataset_runtime": 67.87,
+      "eval_global_dataset_samples_per_second": 14.321,
+      "eval_global_dataset_steps_per_second": 0.309,
+      "eval_sequential_score": 0.9583333134651184,
+      "eval_sts-test-1024_pearson_cosine": 0.8671751973788917,
+      "eval_sts-test-1024_spearman_cosine": 0.9100830925358703,
+      "eval_sts-test_pearson_cosine": 0.9084514358185803,
+      "eval_sts-test_spearman_cosine": 0.9218648677325396,
+      "step": 805
+    },
+    {
+      "epoch": 0.6970308446238109,
+      "grad_norm": 6.679697513580322,
+      "learning_rate": 8.08819150901802e-05,
+      "loss": 0.1903,
+      "step": 806
+    },
+    {
+      "epoch": 0.697895647160565,
+      "grad_norm": 5.046963214874268,
+      "learning_rate": 8.075599842924139e-05,
+      "loss": 0.2428,
+      "step": 807
+    },
+    {
+      "epoch": 0.6987604496973191,
+      "grad_norm": 2.3785789012908936,
+      "learning_rate": 8.06297672025466e-05,
+      "loss": 0.0856,
+      "step": 808
+    },
+    {
+      "epoch": 0.6996252522340732,
+      "grad_norm": 3.804054021835327,
+      "learning_rate": 8.050322270116174e-05,
+      "loss": 0.1363,
+      "step": 809
+    },
+    {
+      "epoch": 0.7004900547708274,
+      "grad_norm": 7.834051132202148,
+      "learning_rate": 8.037636621935685e-05,
+      "loss": 0.3135,
+      "step": 810
+    },
+    {
+      "epoch": 0.7013548573075814,
+      "grad_norm": 9.44914722442627,
+      "learning_rate": 8.02491990545928e-05,
+      "loss": 0.758,
+      "step": 811
+    },
+    {
+      "epoch": 0.7022196598443355,
+      "grad_norm": 4.096058368682861,
+      "learning_rate": 8.012172250750807e-05,
+      "loss": 0.3242,
+      "step": 812
+    },
+    {
+      "epoch": 0.7030844623810897,
+      "grad_norm": 9.295559883117676,
+      "learning_rate": 7.999393788190548e-05,
+      "loss": 0.5513,
+      "step": 813
+    },
+    {
+      "epoch": 0.7039492649178437,
+      "grad_norm": 8.359066009521484,
+      "learning_rate": 7.986584648473874e-05,
+      "loss": 0.8495,
+      "step": 814
+    },
+    {
+      "epoch": 0.7048140674545978,
+      "grad_norm": 11.649872779846191,
+      "learning_rate": 7.973744962609921e-05,
+      "loss": 0.6741,
+      "step": 815
+    },
+    {
+      "epoch": 0.705678869991352,
+      "grad_norm": 7.094292163848877,
+      "learning_rate": 7.960874861920242e-05,
+      "loss": 0.3723,
+      "step": 816
+    },
+    {
+      "epoch": 0.7065436725281061,
+      "grad_norm": 7.240077495574951,
+      "learning_rate": 7.947974478037468e-05,
+      "loss": 0.5434,
+      "step": 817
+    },
+    {
+      "epoch": 0.7074084750648602,
+      "grad_norm": 2.250290632247925,
+      "learning_rate": 7.935043942903955e-05,
+      "loss": 0.1101,
+      "step": 818
+    },
+    {
+      "epoch": 0.7082732776016143,
+      "grad_norm": 4.252150058746338,
+      "learning_rate": 7.922083388770447e-05,
+      "loss": 0.1888,
+      "step": 819
+    },
+    {
+      "epoch": 0.7091380801383684,
+      "grad_norm": 4.072676181793213,
+      "learning_rate": 7.90909294819471e-05,
+      "loss": 0.2415,
+      "step": 820
+    },
+    {
+      "epoch": 0.7100028826751225,
+      "grad_norm": 9.136191368103027,
+      "learning_rate": 7.896072754040186e-05,
+      "loss": 0.4492,
+      "step": 821
+    },
+    {
+      "epoch": 0.7108676852118766,
+      "grad_norm": 10.30457878112793,
+      "learning_rate": 7.883022939474626e-05,
+      "loss": 0.9007,
+      "step": 822
+    },
+    {
+      "epoch": 0.7117324877486307,
+      "grad_norm": 5.737984657287598,
+      "learning_rate": 7.869943637968738e-05,
+      "loss": 0.5574,
+      "step": 823
+    },
+    {
+      "epoch": 0.7125972902853849,
+      "grad_norm": 9.1240234375,
+      "learning_rate": 7.85683498329481e-05,
+      "loss": 0.9687,
+      "step": 824
+    },
+    {
+      "epoch": 0.7134620928221389,
+      "grad_norm": 8.149517059326172,
+      "learning_rate": 7.843697109525352e-05,
+      "loss": 0.8161,
+      "step": 825
+    },
+    {
+      "epoch": 0.714326895358893,
+      "grad_norm": 10.62049674987793,
+      "learning_rate": 7.830530151031719e-05,
+      "loss": 0.6275,
+      "step": 826
+    },
+    {
+      "epoch": 0.7151916978956472,
+      "grad_norm": 4.933554172515869,
+      "learning_rate": 7.817334242482738e-05,
+      "loss": 0.361,
+      "step": 827
+    },
+    {
+      "epoch": 0.7160565004324013,
+      "grad_norm": 4.892520427703857,
+      "learning_rate": 7.804109518843334e-05,
+      "loss": 0.2424,
+      "step": 828
+    },
+    {
+      "epoch": 0.7169213029691553,
+      "grad_norm": 8.320906639099121,
+      "learning_rate": 7.790856115373142e-05,
+      "loss": 0.7132,
+      "step": 829
+    },
+    {
+      "epoch": 0.7177861055059095,
+      "grad_norm": 8.682563781738281,
+      "learning_rate": 7.77757416762513e-05,
+      "loss": 0.5795,
+      "step": 830
+    },
+    {
+      "epoch": 0.7186509080426636,
+      "grad_norm": 1.6696056127548218,
+      "learning_rate": 7.764263811444215e-05,
+      "loss": 0.0414,
+      "step": 831
+    },
+    {
+      "epoch": 0.7195157105794177,
+      "grad_norm": 10.681838989257812,
+      "learning_rate": 7.75092518296586e-05,
+      "loss": 0.8467,
+      "step": 832
+    },
+    {
+      "epoch": 0.7203805131161718,
+      "grad_norm": 5.933515548706055,
+      "learning_rate": 7.737558418614699e-05,
+      "loss": 0.3639,
+      "step": 833
+    },
+    {
+      "epoch": 0.7212453156529259,
+      "grad_norm": 3.935758352279663,
+      "learning_rate": 7.724163655103131e-05,
+      "loss": 0.3737,
+      "step": 834
+    },
+    {
+      "epoch": 0.7221101181896801,
+      "grad_norm": 9.584526062011719,
+      "learning_rate": 7.710741029429926e-05,
+      "loss": 0.6802,
+      "step": 835
+    },
+    {
+      "epoch": 0.7229749207264341,
+      "grad_norm": 13.70799446105957,
+      "learning_rate": 7.697290678878819e-05,
+      "loss": 1.0565,
+      "step": 836
+    },
+    {
+      "epoch": 0.7238397232631882,
+      "grad_norm": 8.689953804016113,
+      "learning_rate": 7.683812741017112e-05,
+      "loss": 0.5841,
+      "step": 837
+    },
+    {
+      "epoch": 0.7247045257999424,
+      "grad_norm": 13.601666450500488,
+      "learning_rate": 7.670307353694262e-05,
+      "loss": 1.0172,
+      "step": 838
+    },
+    {
+      "epoch": 0.7255693283366964,
+      "grad_norm": 3.734889507293701,
+      "learning_rate": 7.656774655040472e-05,
+      "loss": 0.3109,
+      "step": 839
+    },
+    {
+      "epoch": 0.7264341308734505,
+      "grad_norm": 10.951227188110352,
+      "learning_rate": 7.643214783465286e-05,
+      "loss": 0.6902,
+      "step": 840
+    },
+    {
+      "epoch": 0.7272989334102047,
+      "grad_norm": 2.3853259086608887,
+      "learning_rate": 7.62962787765616e-05,
+      "loss": 0.1287,
+      "step": 841
+    },
+    {
+      "epoch": 0.7281637359469588,
+      "grad_norm": 10.43367862701416,
+      "learning_rate": 7.616014076577054e-05,
+      "loss": 0.6679,
+      "step": 842
+    },
+    {
+      "epoch": 0.7290285384837129,
+      "grad_norm": 5.164660453796387,
+      "learning_rate": 7.602373519467005e-05,
+      "loss": 0.3239,
+      "step": 843
+    },
+    {
+      "epoch": 0.729893341020467,
+      "grad_norm": 6.129587650299072,
+      "learning_rate": 7.588706345838705e-05,
+      "loss": 0.1646,
+      "step": 844
+    },
+    {
+      "epoch": 0.7307581435572211,
+      "grad_norm": 5.64245080947876,
+      "learning_rate": 7.575012695477076e-05,
+      "loss": 0.3594,
+      "step": 845
+    },
+    {
+      "epoch": 0.7316229460939752,
+      "grad_norm": 5.245384216308594,
+      "learning_rate": 7.561292708437838e-05,
+      "loss": 0.2795,
+      "step": 846
+    },
+    {
+      "epoch": 0.7324877486307293,
+      "grad_norm": 6.762210369110107,
+      "learning_rate": 7.547546525046073e-05,
+      "loss": 0.3268,
+      "step": 847
+    },
+    {
+      "epoch": 0.7333525511674834,
+      "grad_norm": 9.87009048461914,
+      "learning_rate": 7.533774285894798e-05,
+      "loss": 0.8067,
+      "step": 848
+    },
+    {
+      "epoch": 0.7342173537042376,
+      "grad_norm": 4.286474704742432,
+      "learning_rate": 7.519976131843522e-05,
+      "loss": 0.3708,
+      "step": 849
+    },
+    {
+      "epoch": 0.7350821562409916,
+      "grad_norm": 9.59669303894043,
+      "learning_rate": 7.506152204016807e-05,
+      "loss": 0.5467,
+      "step": 850
+    },
+    {
+      "epoch": 0.7359469587777457,
+      "grad_norm": 3.928433895111084,
+      "learning_rate": 7.492302643802821e-05,
+      "loss": 0.15,
+      "step": 851
+    },
+    {
+      "epoch": 0.7368117613144999,
+      "grad_norm": 7.317601203918457,
+      "learning_rate": 7.478427592851893e-05,
+      "loss": 0.4525,
+      "step": 852
+    },
+    {
+      "epoch": 0.737676563851254,
+      "grad_norm": 6.541726589202881,
+      "learning_rate": 7.464527193075073e-05,
+      "loss": 0.3871,
+      "step": 853
+    },
+    {
+      "epoch": 0.738541366388008,
+      "grad_norm": 12.070144653320312,
+      "learning_rate": 7.450601586642664e-05,
+      "loss": 0.8351,
+      "step": 854
+    },
+    {
+      "epoch": 0.7394061689247622,
+      "grad_norm": 8.084358215332031,
+      "learning_rate": 7.436650915982785e-05,
+      "loss": 0.3939,
+      "step": 855
+    },
+    {
+      "epoch": 0.7402709714615163,
+      "grad_norm": 6.941904067993164,
+      "learning_rate": 7.422675323779907e-05,
+      "loss": 0.4311,
+      "step": 856
+    },
+    {
+      "epoch": 0.7411357739982704,
+      "grad_norm": 8.018699645996094,
+      "learning_rate": 7.408674952973382e-05,
+      "loss": 0.4675,
+      "step": 857
+    },
+    {
+      "epoch": 0.7420005765350245,
+      "grad_norm": 7.949825763702393,
+      "learning_rate": 7.394649946756004e-05,
+      "loss": 0.5963,
+      "step": 858
+    },
+    {
+      "epoch": 0.7428653790717786,
+      "grad_norm": 6.355823040008545,
+      "learning_rate": 7.38060044857253e-05,
+      "loss": 0.3415,
+      "step": 859
+    },
+    {
+      "epoch": 0.7437301816085328,
+      "grad_norm": 7.31845760345459,
+      "learning_rate": 7.366526602118214e-05,
+      "loss": 0.3599,
+      "step": 860
+    },
+    {
+      "epoch": 0.7445949841452868,
+      "grad_norm": 4.008370876312256,
+      "learning_rate": 7.352428551337338e-05,
+      "loss": 0.3354,
+      "step": 861
+    },
+    {
+      "epoch": 0.7454597866820409,
+      "grad_norm": 6.440021991729736,
+      "learning_rate": 7.338306440421743e-05,
+      "loss": 0.2971,
+      "step": 862
+    },
+    {
+      "epoch": 0.7463245892187951,
+      "grad_norm": 11.389256477355957,
+      "learning_rate": 7.32416041380935e-05,
+      "loss": 0.6679,
+      "step": 863
+    },
+    {
+      "epoch": 0.7471893917555491,
+      "grad_norm": 2.519818067550659,
+      "learning_rate": 7.309990616182685e-05,
+      "loss": 0.1211,
+      "step": 864
+    },
+    {
+      "epoch": 0.7480541942923032,
+      "grad_norm": 7.607640743255615,
+      "learning_rate": 7.2957971924674e-05,
+      "loss": 0.2407,
+      "step": 865
+    },
+    {
+      "epoch": 0.7489189968290574,
+      "grad_norm": 7.118372917175293,
+      "learning_rate": 7.28158028783079e-05,
+      "loss": 0.3254,
+      "step": 866
+    },
+    {
+      "epoch": 0.7497837993658115,
+      "grad_norm": 2.883557081222534,
+      "learning_rate": 7.267340047680305e-05,
+      "loss": 0.1074,
+      "step": 867
+    },
+    {
+      "epoch": 0.7506486019025655,
+      "grad_norm": 4.721225738525391,
+      "learning_rate": 7.253076617662065e-05,
+      "loss": 0.1904,
+      "step": 868
+    },
+    {
+      "epoch": 0.7515134044393197,
+      "grad_norm": 2.654787302017212,
+      "learning_rate": 7.23879014365938e-05,
+      "loss": 0.182,
+      "step": 869
+    },
+    {
+      "epoch": 0.7523782069760738,
+      "grad_norm": 7.568452835083008,
+      "learning_rate": 7.224480771791235e-05,
+      "loss": 0.4094,
+      "step": 870
+    },
+    {
+      "epoch": 0.7532430095128279,
+      "grad_norm": 8.068111419677734,
+      "learning_rate": 7.210148648410821e-05,
+      "loss": 0.8455,
+      "step": 871
+    },
+    {
+      "epoch": 0.754107812049582,
+      "grad_norm": 6.598762512207031,
+      "learning_rate": 7.195793920104023e-05,
+      "loss": 0.4085,
+      "step": 872
+    },
+    {
+      "epoch": 0.7549726145863361,
+      "grad_norm": 6.5393829345703125,
+      "learning_rate": 7.18141673368792e-05,
+      "loss": 0.4978,
+      "step": 873
+    },
+    {
+      "epoch": 0.7558374171230903,
+      "grad_norm": 4.241705894470215,
+      "learning_rate": 7.167017236209292e-05,
+      "loss": 0.2777,
+      "step": 874
+    },
+    {
+      "epoch": 0.7567022196598443,
+      "grad_norm": 5.239429950714111,
+      "learning_rate": 7.152595574943113e-05,
+      "loss": 0.3822,
+      "step": 875
+    },
+    {
+      "epoch": 0.7575670221965984,
+      "grad_norm": 10.576812744140625,
+      "learning_rate": 7.138151897391041e-05,
+      "loss": 0.5127,
+      "step": 876
+    },
+    {
+      "epoch": 0.7584318247333526,
+      "grad_norm": 4.40622615814209,
+      "learning_rate": 7.123686351279914e-05,
+      "loss": 0.2795,
+      "step": 877
+    },
+    {
+      "epoch": 0.7592966272701066,
+      "grad_norm": 8.214874267578125,
+      "learning_rate": 7.10919908456023e-05,
+      "loss": 0.4,
+      "step": 878
+    },
+    {
+      "epoch": 0.7601614298068607,
+      "grad_norm": 5.674429893493652,
+      "learning_rate": 7.094690245404652e-05,
+      "loss": 0.3919,
+      "step": 879
+    },
+    {
+      "epoch": 0.7610262323436149,
+      "grad_norm": 7.315159797668457,
+      "learning_rate": 7.080159982206471e-05,
+      "loss": 0.3323,
+      "step": 880
+    },
+    {
+      "epoch": 0.761891034880369,
+      "grad_norm": 5.864488124847412,
+      "learning_rate": 7.065608443578105e-05,
+      "loss": 0.5407,
+      "step": 881
+    },
+    {
+      "epoch": 0.762755837417123,
+      "grad_norm": 9.524258613586426,
+      "learning_rate": 7.05103577834957e-05,
+      "loss": 0.8925,
+      "step": 882
+    },
+    {
+      "epoch": 0.7636206399538772,
+      "grad_norm": 2.4174962043762207,
+      "learning_rate": 7.036442135566961e-05,
+      "loss": 0.116,
+      "step": 883
+    },
+    {
+      "epoch": 0.7644854424906313,
+      "grad_norm": 5.054670810699463,
+      "learning_rate": 7.021827664490928e-05,
+      "loss": 0.382,
+      "step": 884
+    },
+    {
+      "epoch": 0.7653502450273855,
+      "grad_norm": 4.311699867248535,
+      "learning_rate": 7.007192514595141e-05,
+      "loss": 0.2573,
+      "step": 885
+    },
+    {
+      "epoch": 0.7662150475641395,
+      "grad_norm": 5.006008625030518,
+      "learning_rate": 6.992536835564782e-05,
+      "loss": 0.2442,
+      "step": 886
+    },
+    {
+      "epoch": 0.7670798501008936,
+      "grad_norm": 4.521592140197754,
+      "learning_rate": 6.977860777294988e-05,
+      "loss": 0.2122,
+      "step": 887
+    },
+    {
+      "epoch": 0.7679446526376478,
+      "grad_norm": 7.981561183929443,
+      "learning_rate": 6.963164489889337e-05,
+      "loss": 0.3405,
+      "step": 888
+    },
+    {
+      "epoch": 0.7688094551744018,
+      "grad_norm": 10.011691093444824,
+      "learning_rate": 6.948448123658308e-05,
+      "loss": 0.4895,
+      "step": 889
+    },
+    {
+      "epoch": 0.7696742577111559,
+      "grad_norm": 6.9324517250061035,
+      "learning_rate": 6.933711829117733e-05,
+      "loss": 0.4046,
+      "step": 890
+    },
+    {
+      "epoch": 0.7705390602479101,
+      "grad_norm": 5.044534683227539,
+      "learning_rate": 6.918955756987275e-05,
+      "loss": 0.3365,
+      "step": 891
+    },
+    {
+      "epoch": 0.7714038627846642,
+      "grad_norm": 6.062309265136719,
+      "learning_rate": 6.904180058188877e-05,
+      "loss": 0.3073,
+      "step": 892
+    },
+    {
+      "epoch": 0.7722686653214182,
+      "grad_norm": 9.762418746948242,
+      "learning_rate": 6.889384883845214e-05,
+      "loss": 0.7621,
+      "step": 893
+    },
+    {
+      "epoch": 0.7731334678581724,
+      "grad_norm": 8.496923446655273,
+      "learning_rate": 6.874570385278158e-05,
+      "loss": 0.4088,
+      "step": 894
+    },
+    {
+      "epoch": 0.7739982703949265,
+      "grad_norm": 9.173744201660156,
+      "learning_rate": 6.859736714007226e-05,
+      "loss": 0.6372,
+      "step": 895
+    },
+    {
+      "epoch": 0.7748630729316806,
+      "grad_norm": 8.595545768737793,
+      "learning_rate": 6.844884021748019e-05,
+      "loss": 0.7089,
+      "step": 896
+    },
+    {
+      "epoch": 0.7757278754684347,
+      "grad_norm": 7.156553268432617,
+      "learning_rate": 6.830012460410697e-05,
+      "loss": 0.5503,
+      "step": 897
+    },
+    {
+      "epoch": 0.7765926780051888,
+      "grad_norm": 5.894566059112549,
+      "learning_rate": 6.815122182098394e-05,
+      "loss": 0.5239,
+      "step": 898
+    },
+    {
+      "epoch": 0.777457480541943,
+      "grad_norm": 5.80053186416626,
+      "learning_rate": 6.800213339105683e-05,
+      "loss": 0.1838,
+      "step": 899
+    },
+    {
+      "epoch": 0.778322283078697,
+      "grad_norm": 2.8142247200012207,
+      "learning_rate": 6.785286083917017e-05,
+      "loss": 0.1141,
+      "step": 900
+    },
+    {
+      "epoch": 0.7791870856154511,
+      "grad_norm": 5.2369537353515625,
+      "learning_rate": 6.770340569205157e-05,
+      "loss": 0.4552,
+      "step": 901
+    },
+    {
+      "epoch": 0.7800518881522053,
+      "grad_norm": 7.276421070098877,
+      "learning_rate": 6.755376947829625e-05,
+      "loss": 0.4267,
+      "step": 902
+    },
+    {
+      "epoch": 0.7809166906889593,
+      "grad_norm": 10.988953590393066,
+      "learning_rate": 6.74039537283513e-05,
+      "loss": 1.0252,
+      "step": 903
+    },
+    {
+      "epoch": 0.7817814932257134,
+      "grad_norm": 10.337282180786133,
+      "learning_rate": 6.725395997450008e-05,
+      "loss": 0.6281,
+      "step": 904
+    },
+    {
+      "epoch": 0.7826462957624676,
+      "grad_norm": 10.337082862854004,
+      "learning_rate": 6.710378975084652e-05,
+      "loss": 0.6716,
+      "step": 905
+    },
+    {
+      "epoch": 0.7835110982992217,
+      "grad_norm": 3.361793279647827,
+      "learning_rate": 6.695344459329948e-05,
+      "loss": 0.1769,
+      "step": 906
+    },
+    {
+      "epoch": 0.7843759008359757,
+      "grad_norm": 8.392909049987793,
+      "learning_rate": 6.6802926039557e-05,
+      "loss": 0.428,
+      "step": 907
+    },
+    {
+      "epoch": 0.7852407033727299,
+      "grad_norm": 5.3866729736328125,
+      "learning_rate": 6.665223562909058e-05,
+      "loss": 0.335,
+      "step": 908
+    },
+    {
+      "epoch": 0.786105505909484,
+      "grad_norm": 8.97474479675293,
+      "learning_rate": 6.650137490312935e-05,
+      "loss": 0.6272,
+      "step": 909
+    },
+    {
+      "epoch": 0.786970308446238,
+      "grad_norm": 9.634217262268066,
+      "learning_rate": 6.635034540464456e-05,
+      "loss": 0.6253,
+      "step": 910
+    },
+    {
+      "epoch": 0.7878351109829922,
+      "grad_norm": 3.891382932662964,
+      "learning_rate": 6.619914867833343e-05,
+      "loss": 0.2603,
+      "step": 911
+    },
+    {
+      "epoch": 0.7886999135197463,
+      "grad_norm": 6.183927059173584,
+      "learning_rate": 6.60477862706037e-05,
+      "loss": 0.5737,
+      "step": 912
+    },
+    {
+      "epoch": 0.7895647160565005,
+      "grad_norm": 7.62052583694458,
+      "learning_rate": 6.589625972955764e-05,
+      "loss": 0.3792,
+      "step": 913
+    },
+    {
+      "epoch": 0.7904295185932545,
+      "grad_norm": 8.527345657348633,
+      "learning_rate": 6.574457060497618e-05,
+      "loss": 0.308,
+      "step": 914
+    },
+    {
+      "epoch": 0.7912943211300086,
+      "grad_norm": 4.892148494720459,
+      "learning_rate": 6.559272044830317e-05,
+      "loss": 0.2018,
+      "step": 915
+    },
+    {
+      "epoch": 0.7921591236667628,
+      "grad_norm": 3.214404582977295,
+      "learning_rate": 6.544071081262943e-05,
+      "loss": 0.1299,
+      "step": 916
+    },
+    {
+      "epoch": 0.7930239262035169,
+      "grad_norm": 7.314729690551758,
+      "learning_rate": 6.528854325267692e-05,
+      "loss": 0.4338,
+      "step": 917
+    },
+    {
+      "epoch": 0.7938887287402709,
+      "grad_norm": 6.503054618835449,
+      "learning_rate": 6.513621932478282e-05,
+      "loss": 0.2775,
+      "step": 918
+    },
+    {
+      "epoch": 0.7947535312770251,
+      "grad_norm": 3.8166730403900146,
+      "learning_rate": 6.498374058688359e-05,
+      "loss": 0.2077,
+      "step": 919
+    },
+    {
+      "epoch": 0.7956183338137792,
+      "grad_norm": 3.5877130031585693,
+      "learning_rate": 6.483110859849907e-05,
+      "loss": 0.2204,
+      "step": 920
+    },
+    {
+      "epoch": 0.7956183338137792,
+      "eval_Qnli-dev-1024_cosine_accuracy": 0.7604166666666666,
+      "eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.8248084783554077,
+      "eval_Qnli-dev-1024_cosine_ap": 0.7343586316206616,
+      "eval_Qnli-dev-1024_cosine_f1": 0.7628865979381444,
+      "eval_Qnli-dev-1024_cosine_f1_threshold": 0.8248084783554077,
+      "eval_Qnli-dev-1024_cosine_mcc": 0.528911810491234,
+      "eval_Qnli-dev-1024_cosine_precision": 0.7115384615384616,
+      "eval_Qnli-dev-1024_cosine_recall": 0.8222222222222222,
+      "eval_Qnli-dev_cosine_accuracy": 0.71875,
+      "eval_Qnli-dev_cosine_accuracy_threshold": 0.7927051782608032,
+      "eval_Qnli-dev_cosine_ap": 0.7346717053497452,
+      "eval_Qnli-dev_cosine_f1": 0.7254901960784313,
+      "eval_Qnli-dev_cosine_f1_threshold": 0.7089404463768005,
+      "eval_Qnli-dev_cosine_mcc": 0.43697448216965834,
+      "eval_Qnli-dev_cosine_precision": 0.6491228070175439,
+      "eval_Qnli-dev_cosine_recall": 0.8222222222222222,
+      "eval_allNLI--triplets-1024_cosine_accuracy": 0.9375,
+      "eval_allNLI-triplets_cosine_accuracy": 0.9583333134651184,
+      "eval_global_dataset_loss": 0.5776817798614502,
+      "eval_global_dataset_runtime": 67.9151,
+      "eval_global_dataset_samples_per_second": 14.312,
+      "eval_global_dataset_steps_per_second": 0.309,
+      "eval_sequential_score": 0.9375,
+      "eval_sts-test-1024_pearson_cosine": 0.8700957313588291,
+      "eval_sts-test-1024_spearman_cosine": 0.9050692984403192,
+      "eval_sts-test_pearson_cosine": 0.9076253073025099,
+      "eval_sts-test_spearman_cosine": 0.9206752404165891,
+      "step": 920
+    },
+    {
+      "epoch": 0.7964831363505332,
+      "grad_norm": 8.318743705749512,
+      "learning_rate": 6.467832492071649e-05,
+      "loss": 0.6926,
+      "step": 921
+    },
+    {
+      "epoch": 0.7973479388872874,
+      "grad_norm": 3.8544118404388428,
+      "learning_rate": 6.452539111617453e-05,
+      "loss": 0.287,
+      "step": 922
+    },
+    {
+      "epoch": 0.7982127414240415,
+      "grad_norm": 5.1298441886901855,
+      "learning_rate": 6.437230874904739e-05,
+      "loss": 0.119,
+      "step": 923
+    },
+    {
+      "epoch": 0.7990775439607957,
+      "grad_norm": 6.161678791046143,
+      "learning_rate": 6.421907938502865e-05,
+      "loss": 0.2366,
+      "step": 924
+    },
+    {
+      "epoch": 0.7999423464975497,
+      "grad_norm": 6.632068634033203,
+      "learning_rate": 6.406570459131538e-05,
+      "loss": 0.2398,
+      "step": 925
+    },
+    {
+      "epoch": 0.8008071490343038,
+      "grad_norm": 7.237907886505127,
+      "learning_rate": 6.39121859365921e-05,
+      "loss": 0.5197,
+      "step": 926
+    },
+    {
+      "epoch": 0.801671951571058,
+      "grad_norm": 4.103116035461426,
+      "learning_rate": 6.375852499101467e-05,
+      "loss": 0.2894,
+      "step": 927
+    },
+    {
+      "epoch": 0.802536754107812,
+      "grad_norm": 4.985617160797119,
+      "learning_rate": 6.36047233261943e-05,
+      "loss": 0.3327,
+      "step": 928
+    },
+    {
+      "epoch": 0.8034015566445661,
+      "grad_norm": 3.32094144821167,
+      "learning_rate": 6.345078251518143e-05,
+      "loss": 0.1615,
+      "step": 929
+    },
+    {
+      "epoch": 0.8042663591813203,
+      "grad_norm": 2.774306297302246,
+      "learning_rate": 6.329670413244967e-05,
+      "loss": 0.181,
+      "step": 930
+    },
+    {
+      "epoch": 0.8051311617180744,
+      "grad_norm": 5.269750595092773,
+      "learning_rate": 6.314248975387965e-05,
+      "loss": 0.2791,
+      "step": 931
+    },
+    {
+      "epoch": 0.8059959642548284,
+      "grad_norm": 6.187272548675537,
+      "learning_rate": 6.298814095674297e-05,
+      "loss": 0.4581,
+      "step": 932
+    },
+    {
+      "epoch": 0.8068607667915826,
+      "grad_norm": 5.671879291534424,
+      "learning_rate": 6.283365931968603e-05,
+      "loss": 0.2483,
+      "step": 933
+    },
+    {
+      "epoch": 0.8077255693283367,
+      "grad_norm": 12.671977043151855,
+      "learning_rate": 6.26790464227139e-05,
+      "loss": 1.2848,
+      "step": 934
+    },
+    {
+      "epoch": 0.8085903718650908,
+      "grad_norm": 4.673594951629639,
+      "learning_rate": 6.252430384717412e-05,
+      "loss": 0.2594,
+      "step": 935
+    },
+    {
+      "epoch": 0.8094551744018449,
+      "grad_norm": 6.6575117111206055,
+      "learning_rate": 6.236943317574056e-05,
+      "loss": 0.3473,
+      "step": 936
+    },
+    {
+      "epoch": 0.810319976938599,
+      "grad_norm": 3.377204656600952,
+      "learning_rate": 6.221443599239721e-05,
+      "loss": 0.2606,
+      "step": 937
+    },
+    {
+      "epoch": 0.8111847794753532,
+      "grad_norm": 7.629633903503418,
+      "learning_rate": 6.205931388242207e-05,
+      "loss": 0.3551,
+      "step": 938
+    },
+    {
+      "epoch": 0.8120495820121072,
+      "grad_norm": 7.514203071594238,
+      "learning_rate": 6.190406843237078e-05,
+      "loss": 0.3383,
+      "step": 939
+    },
+    {
+      "epoch": 0.8129143845488613,
+      "grad_norm": 6.37880277633667,
+      "learning_rate": 6.174870123006051e-05,
+      "loss": 0.2142,
+      "step": 940
+    },
+    {
+      "epoch": 0.8137791870856155,
+      "grad_norm": 5.619572639465332,
+      "learning_rate": 6.159321386455372e-05,
+      "loss": 0.2147,
+      "step": 941
+    },
+    {
+      "epoch": 0.8146439896223696,
+      "grad_norm": 4.535106658935547,
+      "learning_rate": 6.143760792614179e-05,
+      "loss": 0.2273,
+      "step": 942
+    },
+    {
+      "epoch": 0.8155087921591236,
+      "grad_norm": 9.820999145507812,
+      "learning_rate": 6.128188500632892e-05,
+      "loss": 0.9269,
+      "step": 943
+    },
+    {
+      "epoch": 0.8163735946958778,
+      "grad_norm": 13.05849838256836,
+      "learning_rate": 6.112604669781572e-05,
+      "loss": 1.5161,
+      "step": 944
+    },
+    {
+      "epoch": 0.8172383972326319,
+      "grad_norm": 5.841894626617432,
+      "learning_rate": 6.0970094594483004e-05,
+      "loss": 0.1962,
+      "step": 945
+    },
+    {
+      "epoch": 0.818103199769386,
+      "grad_norm": 7.9914069175720215,
+      "learning_rate": 6.0814030291375424e-05,
+      "loss": 0.5516,
+      "step": 946
+    },
+    {
+      "epoch": 0.8189680023061401,
+      "grad_norm": 4.961643695831299,
+      "learning_rate": 6.0657855384685215e-05,
+      "loss": 0.3498,
+      "step": 947
+    },
+    {
+      "epoch": 0.8198328048428942,
+      "grad_norm": 5.379317283630371,
+      "learning_rate": 6.050157147173581e-05,
+      "loss": 0.3962,
+      "step": 948
+    },
+    {
+      "epoch": 0.8206976073796484,
+      "grad_norm": 4.794488430023193,
+      "learning_rate": 6.0345180150965576e-05,
+      "loss": 0.2953,
+      "step": 949
+    },
+    {
+      "epoch": 0.8215624099164024,
+      "grad_norm": 3.9415969848632812,
+      "learning_rate": 6.0188683021911396e-05,
+      "loss": 0.1737,
+      "step": 950
+    },
+    {
+      "epoch": 0.8224272124531565,
+      "grad_norm": 5.720635890960693,
+      "learning_rate": 6.003208168519233e-05,
+      "loss": 0.1876,
+      "step": 951
+    },
+    {
+      "epoch": 0.8232920149899107,
+      "grad_norm": 5.777576923370361,
+      "learning_rate": 5.9875377742493276e-05,
+      "loss": 0.2037,
+      "step": 952
+    },
+    {
+      "epoch": 0.8241568175266647,
+      "grad_norm": 7.2347798347473145,
+      "learning_rate": 5.971857279654854e-05,
+      "loss": 0.6859,
+      "step": 953
+    },
+    {
+      "epoch": 0.8250216200634188,
+      "grad_norm": 9.168425559997559,
+      "learning_rate": 5.956166845112552e-05,
+      "loss": 0.5139,
+      "step": 954
+    },
+    {
+      "epoch": 0.825886422600173,
+      "grad_norm": 8.480242729187012,
+      "learning_rate": 5.9404666311008175e-05,
+      "loss": 0.4557,
+      "step": 955
+    },
+    {
+      "epoch": 0.8267512251369271,
+      "grad_norm": 7.415064811706543,
+      "learning_rate": 5.924756798198075e-05,
+      "loss": 0.418,
+      "step": 956
+    },
+    {
+      "epoch": 0.8276160276736811,
+      "grad_norm": 5.769486427307129,
+      "learning_rate": 5.909037507081121e-05,
+      "loss": 0.3326,
+      "step": 957
+    },
+    {
+      "epoch": 0.8284808302104353,
+      "grad_norm": 9.98505687713623,
+      "learning_rate": 5.893308918523498e-05,
+      "loss": 0.6773,
+      "step": 958
+    },
+    {
+      "epoch": 0.8293456327471894,
+      "grad_norm": 6.003732681274414,
+      "learning_rate": 5.877571193393837e-05,
+      "loss": 0.1938,
+      "step": 959
+    },
+    {
+      "epoch": 0.8302104352839434,
+      "grad_norm": 6.989200115203857,
+      "learning_rate": 5.8618244926542156e-05,
+      "loss": 0.2502,
+      "step": 960
+    },
+    {
+      "epoch": 0.8310752378206976,
+      "grad_norm": 5.944050312042236,
+      "learning_rate": 5.84606897735851e-05,
+      "loss": 0.1686,
+      "step": 961
+    },
+    {
+      "epoch": 0.8319400403574517,
+      "grad_norm": 2.568422794342041,
+      "learning_rate": 5.830304808650753e-05,
+      "loss": 0.1174,
+      "step": 962
+    },
+    {
+      "epoch": 0.8328048428942059,
+      "grad_norm": 6.347965717315674,
+      "learning_rate": 5.814532147763478e-05,
+      "loss": 0.3688,
+      "step": 963
+    },
+    {
+      "epoch": 0.8336696454309599,
+      "grad_norm": 9.391959190368652,
+      "learning_rate": 5.798751156016085e-05,
+      "loss": 0.4529,
+      "step": 964
+    },
+    {
+      "epoch": 0.834534447967714,
+      "grad_norm": 7.86402702331543,
+      "learning_rate": 5.7829619948131654e-05,
+      "loss": 0.5973,
+      "step": 965
+    },
+    {
+      "epoch": 0.8353992505044682,
+      "grad_norm": 9.44655990600586,
+      "learning_rate": 5.767164825642879e-05,
+      "loss": 0.7635,
+      "step": 966
+    },
+    {
+      "epoch": 0.8362640530412222,
+      "grad_norm": 7.177609920501709,
+      "learning_rate": 5.751359810075284e-05,
+      "loss": 0.5631,
+      "step": 967
+    },
+    {
+      "epoch": 0.8371288555779763,
+      "grad_norm": 5.718000411987305,
+      "learning_rate": 5.735547109760686e-05,
+      "loss": 0.313,
+      "step": 968
+    },
+    {
+      "epoch": 0.8379936581147305,
+      "grad_norm": 6.908907413482666,
+      "learning_rate": 5.719726886427998e-05,
+      "loss": 0.4425,
+      "step": 969
+    },
+    {
+      "epoch": 0.8388584606514846,
+      "grad_norm": 6.510931491851807,
+      "learning_rate": 5.7038993018830675e-05,
+      "loss": 0.3581,
+      "step": 970
+    },
+    {
+      "epoch": 0.8397232631882386,
+      "grad_norm": 6.836475372314453,
+      "learning_rate": 5.688064518007036e-05,
+      "loss": 0.2542,
+      "step": 971
+    },
+    {
+      "epoch": 0.8405880657249928,
+      "grad_norm": 6.765063762664795,
+      "learning_rate": 5.6722226967546764e-05,
+      "loss": 0.2576,
+      "step": 972
+    },
+    {
+      "epoch": 0.8414528682617469,
+      "grad_norm": 2.587757110595703,
+      "learning_rate": 5.65637400015274e-05,
+      "loss": 0.1156,
+      "step": 973
+    },
+    {
+      "epoch": 0.8423176707985011,
+      "grad_norm": 11.528030395507812,
+      "learning_rate": 5.640518590298298e-05,
+      "loss": 0.8184,
+      "step": 974
+    },
+    {
+      "epoch": 0.8431824733352551,
+      "grad_norm": 7.4515790939331055,
+      "learning_rate": 5.624656629357081e-05,
+      "loss": 0.3536,
+      "step": 975
+    },
+    {
+      "epoch": 0.8440472758720092,
+      "grad_norm": 6.2617082595825195,
+      "learning_rate": 5.6087882795618216e-05,
+      "loss": 0.3023,
+      "step": 976
+    },
+    {
+      "epoch": 0.8449120784087634,
+      "grad_norm": 4.997031211853027,
+      "learning_rate": 5.5929137032106005e-05,
+      "loss": 0.418,
+      "step": 977
+    },
+    {
+      "epoch": 0.8457768809455174,
+      "grad_norm": 7.6783671379089355,
+      "learning_rate": 5.577033062665179e-05,
+      "loss": 0.3036,
+      "step": 978
+    },
+    {
+      "epoch": 0.8466416834822715,
+      "grad_norm": 7.0620436668396,
+      "learning_rate": 5.561146520349343e-05,
+      "loss": 0.55,
+      "step": 979
+    },
+    {
+      "epoch": 0.8475064860190257,
+      "grad_norm": 8.351699829101562,
+      "learning_rate": 5.5452542387472416e-05,
+      "loss": 0.6477,
+      "step": 980
+    },
+    {
+      "epoch": 0.8483712885557798,
+      "grad_norm": 7.685431480407715,
+      "learning_rate": 5.529356380401722e-05,
+      "loss": 0.3518,
+      "step": 981
+    },
+    {
+      "epoch": 0.8492360910925338,
+      "grad_norm": 9.351055145263672,
+      "learning_rate": 5.5134531079126704e-05,
+      "loss": 0.7033,
+      "step": 982
+    },
+    {
+      "epoch": 0.850100893629288,
+      "grad_norm": 9.499361038208008,
+      "learning_rate": 5.497544583935347e-05,
+      "loss": 0.6931,
+      "step": 983
+    },
+    {
+      "epoch": 0.8509656961660421,
+      "grad_norm": 10.090303421020508,
+      "learning_rate": 5.481630971178721e-05,
+      "loss": 0.9278,
+      "step": 984
+    },
+    {
+      "epoch": 0.8518304987027961,
+      "grad_norm": 4.208652019500732,
+      "learning_rate": 5.465712432403812e-05,
+      "loss": 0.3061,
+      "step": 985
+    },
+    {
+      "epoch": 0.8526953012395503,
+      "grad_norm": 9.341512680053711,
+      "learning_rate": 5.4497891304220225e-05,
+      "loss": 0.8352,
+      "step": 986
+    },
+    {
+      "epoch": 0.8535601037763044,
+      "grad_norm": 1.4906487464904785,
+      "learning_rate": 5.433861228093471e-05,
+      "loss": 0.125,
+      "step": 987
+    },
+    {
+      "epoch": 0.8544249063130586,
+      "grad_norm": 2.660661458969116,
+      "learning_rate": 5.417928888325324e-05,
+      "loss": 0.2284,
+      "step": 988
+    },
+    {
+      "epoch": 0.8552897088498126,
+      "grad_norm": 10.015325546264648,
+      "learning_rate": 5.401992274070136e-05,
+      "loss": 0.838,
+      "step": 989
+    },
+    {
+      "epoch": 0.8561545113865667,
+      "grad_norm": 8.29864501953125,
+      "learning_rate": 5.386051548324179e-05,
+      "loss": 0.5318,
+      "step": 990
+    },
+    {
+      "epoch": 0.8570193139233209,
+      "grad_norm": 4.587142467498779,
+      "learning_rate": 5.3701068741257796e-05,
+      "loss": 0.1618,
+      "step": 991
+    },
+    {
+      "epoch": 0.8578841164600749,
+      "grad_norm": 1.8213179111480713,
+      "learning_rate": 5.354158414553646e-05,
+      "loss": 0.0871,
+      "step": 992
+    },
+    {
+      "epoch": 0.858748918996829,
+      "grad_norm": 8.93700122833252,
+      "learning_rate": 5.3382063327252017e-05,
+      "loss": 0.6915,
+      "step": 993
+    },
+    {
+      "epoch": 0.8596137215335832,
+      "grad_norm": 4.793188095092773,
+      "learning_rate": 5.322250791794916e-05,
+      "loss": 0.3728,
+      "step": 994
+    },
+    {
+      "epoch": 0.8604785240703373,
+      "grad_norm": 4.624011516571045,
+      "learning_rate": 5.3062919549526436e-05,
+      "loss": 0.2403,
+      "step": 995
+    },
+    {
+      "epoch": 0.8613433266070913,
+      "grad_norm": 1.8955051898956299,
+      "learning_rate": 5.2903299854219435e-05,
+      "loss": 0.0651,
+      "step": 996
+    },
+    {
+      "epoch": 0.8622081291438455,
+      "grad_norm": 10.889961242675781,
+      "learning_rate": 5.274365046458416e-05,
+      "loss": 0.5783,
+      "step": 997
+    },
+    {
+      "epoch": 0.8630729316805996,
+      "grad_norm": 4.15156888961792,
+      "learning_rate": 5.258397301348035e-05,
+      "loss": 0.2061,
+      "step": 998
+    },
+    {
+      "epoch": 0.8639377342173538,
+      "grad_norm": 3.9485700130462646,
+      "learning_rate": 5.2424269134054694e-05,
+      "loss": 0.154,
+      "step": 999
+    },
+    {
+      "epoch": 0.8648025367541078,
+      "grad_norm": 9.996199607849121,
+      "learning_rate": 5.2264540459724276e-05,
+      "loss": 0.4689,
+      "step": 1000
+    },
+    {
+      "epoch": 0.8656673392908619,
+      "grad_norm": 7.154214382171631,
+      "learning_rate": 5.21047886241597e-05,
+      "loss": 0.2088,
+      "step": 1001
+    },
+    {
+      "epoch": 0.8665321418276161,
+      "grad_norm": 8.80577564239502,
+      "learning_rate": 5.194501526126842e-05,
+      "loss": 0.5299,
+      "step": 1002
+    },
+    {
+      "epoch": 0.8673969443643701,
+      "grad_norm": 5.227262020111084,
+      "learning_rate": 5.1785222005178224e-05,
+      "loss": 0.2689,
+      "step": 1003
+    },
+    {
+      "epoch": 0.8682617469011242,
+      "grad_norm": 6.6007843017578125,
+      "learning_rate": 5.162541049022019e-05,
+      "loss": 0.3098,
+      "step": 1004
+    },
+    {
+      "epoch": 0.8691265494378784,
+      "grad_norm": 6.239222526550293,
+      "learning_rate": 5.146558235091225e-05,
+      "loss": 0.3478,
+      "step": 1005
+    },
+    {
+      "epoch": 0.8699913519746325,
+      "grad_norm": 2.814821243286133,
+      "learning_rate": 5.1305739221942364e-05,
+      "loss": 0.1841,
+      "step": 1006
+    },
+    {
+      "epoch": 0.8708561545113865,
+      "grad_norm": 1.4831047058105469,
+      "learning_rate": 5.114588273815173e-05,
+      "loss": 0.0862,
+      "step": 1007
+    },
+    {
+      "epoch": 0.8717209570481407,
+      "grad_norm": 8.568103790283203,
+      "learning_rate": 5.09860145345182e-05,
+      "loss": 0.5991,
+      "step": 1008
+    },
+    {
+      "epoch": 0.8725857595848948,
+      "grad_norm": 9.560081481933594,
+      "learning_rate": 5.082613624613946e-05,
+      "loss": 0.4026,
+      "step": 1009
+    },
+    {
+      "epoch": 0.8734505621216488,
+      "grad_norm": 7.88618803024292,
+      "learning_rate": 5.066624950821637e-05,
+      "loss": 0.4991,
+      "step": 1010
+    },
+    {
+      "epoch": 0.874315364658403,
+      "grad_norm": 5.938468933105469,
+      "learning_rate": 5.05063559560362e-05,
+      "loss": 0.2786,
+      "step": 1011
+    },
+    {
+      "epoch": 0.8751801671951571,
+      "grad_norm": 9.075552940368652,
+      "learning_rate": 5.0346457224955903e-05,
+      "loss": 0.4708,
+      "step": 1012
+    },
+    {
+      "epoch": 0.8760449697319113,
+      "grad_norm": 8.848043441772461,
+      "learning_rate": 5.018655495038541e-05,
+      "loss": 0.4201,
+      "step": 1013
+    },
+    {
+      "epoch": 0.8769097722686653,
+      "grad_norm": 5.168188095092773,
+      "learning_rate": 5.002665076777091e-05,
+      "loss": 0.2089,
+      "step": 1014
+    },
+    {
+      "epoch": 0.8777745748054194,
+      "grad_norm": 4.413999557495117,
+      "learning_rate": 4.986674631257804e-05,
+      "loss": 0.3158,
+      "step": 1015
+    },
+    {
+      "epoch": 0.8786393773421736,
+      "grad_norm": 9.610701560974121,
+      "learning_rate": 4.970684322027534e-05,
+      "loss": 0.7363,
+      "step": 1016
+    },
+    {
+      "epoch": 0.8795041798789276,
+      "grad_norm": 6.793404579162598,
+      "learning_rate": 4.9546943126317274e-05,
+      "loss": 0.2885,
+      "step": 1017
+    },
+    {
+      "epoch": 0.8803689824156817,
+      "grad_norm": 9.434625625610352,
+      "learning_rate": 4.9387047666127786e-05,
+      "loss": 0.4937,
+      "step": 1018
+    },
+    {
+      "epoch": 0.8812337849524359,
+      "grad_norm": 6.130424499511719,
+      "learning_rate": 4.9227158475083304e-05,
+      "loss": 0.1684,
+      "step": 1019
+    },
+    {
+      "epoch": 0.88209858748919,
+      "grad_norm": 4.250467777252197,
+      "learning_rate": 4.9067277188496185e-05,
+      "loss": 0.2749,
+      "step": 1020
+    },
+    {
+      "epoch": 0.882963390025944,
+      "grad_norm": 3.2336244583129883,
+      "learning_rate": 4.890740544159796e-05,
+      "loss": 0.2789,
+      "step": 1021
+    },
+    {
+      "epoch": 0.8838281925626982,
+      "grad_norm": 7.7692084312438965,
+      "learning_rate": 4.874754486952255e-05,
+      "loss": 0.5868,
+      "step": 1022
+    },
+    {
+      "epoch": 0.8846929950994523,
+      "grad_norm": 7.071033954620361,
+      "learning_rate": 4.8587697107289626e-05,
+      "loss": 0.5894,
+      "step": 1023
+    },
+    {
+      "epoch": 0.8855577976362063,
+      "grad_norm": 6.448328971862793,
+      "learning_rate": 4.84278637897878e-05,
+      "loss": 0.1635,
+      "step": 1024
+    },
+    {
+      "epoch": 0.8864226001729605,
+      "grad_norm": 11.375746726989746,
+      "learning_rate": 4.826804655175795e-05,
+      "loss": 0.6829,
+      "step": 1025
+    },
+    {
+      "epoch": 0.8872874027097146,
+      "grad_norm": 1.4379364252090454,
+      "learning_rate": 4.8108247027776565e-05,
+      "loss": 0.1124,
+      "step": 1026
+    },
+    {
+      "epoch": 0.8881522052464688,
+      "grad_norm": 11.497692108154297,
+      "learning_rate": 4.794846685223886e-05,
+      "loss": 1.2642,
+      "step": 1027
+    },
+    {
+      "epoch": 0.8890170077832228,
+      "grad_norm": 6.77423620223999,
+      "learning_rate": 4.778870765934221e-05,
+      "loss": 0.6585,
+      "step": 1028
+    },
+    {
+      "epoch": 0.8898818103199769,
+      "grad_norm": 6.651241779327393,
+      "learning_rate": 4.762897108306939e-05,
+      "loss": 0.2622,
+      "step": 1029
+    },
+    {
+      "epoch": 0.8907466128567311,
+      "grad_norm": 7.476505279541016,
+      "learning_rate": 4.7469258757171854e-05,
+      "loss": 0.2797,
+      "step": 1030
+    },
+    {
+      "epoch": 0.8916114153934852,
+      "grad_norm": 7.375949382781982,
+      "learning_rate": 4.7309572315152976e-05,
+      "loss": 0.2747,
+      "step": 1031
+    },
+    {
+      "epoch": 0.8924762179302392,
+      "grad_norm": 4.907548427581787,
+      "learning_rate": 4.7149913390251494e-05,
+      "loss": 0.3748,
+      "step": 1032
+    },
+    {
+      "epoch": 0.8933410204669934,
+      "grad_norm": 7.232724189758301,
+      "learning_rate": 4.6990283615424605e-05,
+      "loss": 0.1792,
+      "step": 1033
+    },
+    {
+      "epoch": 0.8942058230037475,
+      "grad_norm": 6.12727165222168,
+      "learning_rate": 4.6830684623331446e-05,
+      "loss": 0.342,
+      "step": 1034
+    },
+    {
+      "epoch": 0.8950706255405015,
+      "grad_norm": 4.968775272369385,
+      "learning_rate": 4.667111804631626e-05,
+      "loss": 0.3287,
+      "step": 1035
+    },
+    {
+      "epoch": 0.8950706255405015,
+      "eval_Qnli-dev-1024_cosine_accuracy": 0.7083333333333334,
+      "eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.8668882846832275,
+      "eval_Qnli-dev-1024_cosine_ap": 0.6999822477767415,
+      "eval_Qnli-dev-1024_cosine_f1": 0.7090909090909091,
+      "eval_Qnli-dev-1024_cosine_f1_threshold": 0.7944933772087097,
+      "eval_Qnli-dev-1024_cosine_mcc": 0.3808509397785054,
+      "eval_Qnli-dev-1024_cosine_precision": 0.6,
+      "eval_Qnli-dev-1024_cosine_recall": 0.8666666666666667,
+      "eval_Qnli-dev_cosine_accuracy": 0.71875,
+      "eval_Qnli-dev_cosine_accuracy_threshold": 0.7880457639694214,
+      "eval_Qnli-dev_cosine_ap": 0.7340095878922616,
+      "eval_Qnli-dev_cosine_f1": 0.7238095238095237,
+      "eval_Qnli-dev_cosine_f1_threshold": 0.703315019607544,
+      "eval_Qnli-dev_cosine_mcc": 0.42578476395267345,
+      "eval_Qnli-dev_cosine_precision": 0.6333333333333333,
+      "eval_Qnli-dev_cosine_recall": 0.8444444444444444,
+      "eval_allNLI--triplets-1024_cosine_accuracy": 0.9375,
+      "eval_allNLI-triplets_cosine_accuracy": 0.9791666865348816,
+      "eval_global_dataset_loss": 0.5188027620315552,
+      "eval_global_dataset_runtime": 67.9093,
+      "eval_global_dataset_samples_per_second": 14.313,
+      "eval_global_dataset_steps_per_second": 0.309,
+      "eval_sequential_score": 0.9375,
+      "eval_sts-test-1024_pearson_cosine": 0.8797952712975998,
+      "eval_sts-test-1024_spearman_cosine": 0.9135158587840699,
+      "eval_sts-test_pearson_cosine": 0.9084511864603124,
+      "eval_sts-test_spearman_cosine": 0.9222536610997011,
+      "step": 1035
+    },
+    {
+      "epoch": 0.8959354280772557,
+      "grad_norm": 8.467510223388672,
+      "learning_rate": 4.651158551639177e-05,
+      "loss": 0.5348,
+      "step": 1036
+    },
+    {
+      "epoch": 0.8968002306140098,
+      "grad_norm": 6.454378604888916,
+      "learning_rate": 4.635208866522251e-05,
+      "loss": 0.6588,
+      "step": 1037
+    },
+    {
+      "epoch": 0.897665033150764,
+      "grad_norm": 6.776357650756836,
+      "learning_rate": 4.619262912410804e-05,
+      "loss": 0.5132,
+      "step": 1038
+    },
+    {
+      "epoch": 0.898529835687518,
+      "grad_norm": 10.85428237915039,
+      "learning_rate": 4.603320852396637e-05,
+      "loss": 1.1707,
+      "step": 1039
+    },
+    {
+      "epoch": 0.8993946382242721,
+      "grad_norm": 3.449406862258911,
+      "learning_rate": 4.587382849531717e-05,
+      "loss": 0.1442,
+      "step": 1040
+    },
+    {
+      "epoch": 0.9002594407610263,
+      "grad_norm": 2.9549670219421387,
+      "learning_rate": 4.5714490668265245e-05,
+      "loss": 0.2243,
+      "step": 1041
+    },
+    {
+      "epoch": 0.9011242432977803,
+      "grad_norm": 4.6617817878723145,
+      "learning_rate": 4.5555196672483685e-05,
+      "loss": 0.3099,
+      "step": 1042
+    },
+    {
+      "epoch": 0.9019890458345344,
+      "grad_norm": 6.141875267028809,
+      "learning_rate": 4.5395948137197296e-05,
+      "loss": 0.1839,
+      "step": 1043
+    },
+    {
+      "epoch": 0.9028538483712886,
+      "grad_norm": 12.232782363891602,
+      "learning_rate": 4.5236746691166e-05,
+      "loss": 0.6248,
+      "step": 1044
+    },
+    {
+      "epoch": 0.9037186509080427,
+      "grad_norm": 5.728059768676758,
+      "learning_rate": 4.507759396266802e-05,
+      "loss": 0.4605,
+      "step": 1045
+    },
+    {
+      "epoch": 0.9045834534447967,
+      "grad_norm": 8.688108444213867,
+      "learning_rate": 4.49184915794833e-05,
+      "loss": 0.4857,
+      "step": 1046
+    },
+    {
+      "epoch": 0.9054482559815509,
+      "grad_norm": 8.695257186889648,
+      "learning_rate": 4.475944116887695e-05,
+      "loss": 0.3966,
+      "step": 1047
+    },
+    {
+      "epoch": 0.906313058518305,
+      "grad_norm": 5.200995922088623,
+      "learning_rate": 4.460044435758241e-05,
+      "loss": 0.4439,
+      "step": 1048
+    },
+    {
+      "epoch": 0.907177861055059,
+      "grad_norm": 12.601680755615234,
+      "learning_rate": 4.4441502771785003e-05,
+      "loss": 0.6051,
+      "step": 1049
+    },
+    {
+      "epoch": 0.9080426635918132,
+      "grad_norm": 9.575990676879883,
+      "learning_rate": 4.428261803710516e-05,
+      "loss": 0.3982,
+      "step": 1050
+    },
+    {
+      "epoch": 0.9089074661285673,
+      "grad_norm": 2.344109058380127,
+      "learning_rate": 4.4123791778581865e-05,
+      "loss": 0.1718,
+      "step": 1051
+    },
+    {
+      "epoch": 0.9097722686653215,
+      "grad_norm": 7.567986488342285,
+      "learning_rate": 4.3965025620656065e-05,
+      "loss": 0.2641,
+      "step": 1052
+    },
+    {
+      "epoch": 0.9106370712020755,
+      "grad_norm": 8.634700775146484,
+      "learning_rate": 4.3806321187153934e-05,
+      "loss": 0.3788,
+      "step": 1053
+    },
+    {
+      "epoch": 0.9115018737388296,
+      "grad_norm": 8.53459644317627,
+      "learning_rate": 4.3647680101270416e-05,
+      "loss": 0.4456,
+      "step": 1054
+    },
+    {
+      "epoch": 0.9123666762755838,
+      "grad_norm": 10.249025344848633,
+      "learning_rate": 4.348910398555249e-05,
+      "loss": 1.0234,
+      "step": 1055
+    },
+    {
+      "epoch": 0.9132314788123379,
+      "grad_norm": 10.008344650268555,
+      "learning_rate": 4.333059446188269e-05,
+      "loss": 0.6228,
+      "step": 1056
+    },
+    {
+      "epoch": 0.9140962813490919,
+      "grad_norm": 8.067853927612305,
+      "learning_rate": 4.317215315146238e-05,
+      "loss": 0.4588,
+      "step": 1057
+    },
+    {
+      "epoch": 0.9149610838858461,
+      "grad_norm": 10.182132720947266,
+      "learning_rate": 4.301378167479532e-05,
+      "loss": 0.8651,
+      "step": 1058
+    },
+    {
+      "epoch": 0.9158258864226002,
+      "grad_norm": 11.363606452941895,
+      "learning_rate": 4.285548165167105e-05,
+      "loss": 0.8571,
+      "step": 1059
+    },
+    {
+      "epoch": 0.9166906889593542,
+      "grad_norm": 10.103208541870117,
+      "learning_rate": 4.2697254701148235e-05,
+      "loss": 0.6446,
+      "step": 1060
+    },
+    {
+      "epoch": 0.9175554914961084,
+      "grad_norm": 6.2334418296813965,
+      "learning_rate": 4.253910244153817e-05,
+      "loss": 0.2193,
+      "step": 1061
+    },
+    {
+      "epoch": 0.9184202940328625,
+      "grad_norm": 5.234436511993408,
+      "learning_rate": 4.2381026490388245e-05,
+      "loss": 0.258,
+      "step": 1062
+    },
+    {
+      "epoch": 0.9192850965696167,
+      "grad_norm": 8.499395370483398,
+      "learning_rate": 4.222302846446544e-05,
+      "loss": 0.4164,
+      "step": 1063
+    },
+    {
+      "epoch": 0.9201498991063707,
+      "grad_norm": 5.450392723083496,
+      "learning_rate": 4.206510997973963e-05,
+      "loss": 0.4783,
+      "step": 1064
+    },
+    {
+      "epoch": 0.9210147016431248,
+      "grad_norm": 5.65176248550415,
+      "learning_rate": 4.1907272651367226e-05,
+      "loss": 0.246,
+      "step": 1065
+    },
+    {
+      "epoch": 0.921879504179879,
+      "grad_norm": 8.317374229431152,
+      "learning_rate": 4.1749518093674566e-05,
+      "loss": 0.3821,
+      "step": 1066
+    },
+    {
+      "epoch": 0.922744306716633,
+      "grad_norm": 4.983073711395264,
+      "learning_rate": 4.159184792014145e-05,
+      "loss": 0.182,
+      "step": 1067
+    },
+    {
+      "epoch": 0.9236091092533871,
+      "grad_norm": 10.939299583435059,
+      "learning_rate": 4.143426374338459e-05,
+      "loss": 0.6648,
+      "step": 1068
+    },
+    {
+      "epoch": 0.9244739117901413,
+      "grad_norm": 5.333117485046387,
+      "learning_rate": 4.1276767175141125e-05,
+      "loss": 0.5405,
+      "step": 1069
+    },
+    {
+      "epoch": 0.9253387143268954,
+      "grad_norm": 6.263637542724609,
+      "learning_rate": 4.1119359826252226e-05,
+      "loss": 0.3681,
+      "step": 1070
+    },
+    {
+      "epoch": 0.9262035168636494,
+      "grad_norm": 5.16562032699585,
+      "learning_rate": 4.0962043306646455e-05,
+      "loss": 0.2323,
+      "step": 1071
+    },
+    {
+      "epoch": 0.9270683194004036,
+      "grad_norm": 6.132068634033203,
+      "learning_rate": 4.080481922532348e-05,
+      "loss": 0.4676,
+      "step": 1072
+    },
+    {
+      "epoch": 0.9279331219371577,
+      "grad_norm": 8.957972526550293,
+      "learning_rate": 4.064768919033746e-05,
+      "loss": 0.5141,
+      "step": 1073
+    },
+    {
+      "epoch": 0.9287979244739117,
+      "grad_norm": 7.958962440490723,
+      "learning_rate": 4.0490654808780685e-05,
+      "loss": 0.3067,
+      "step": 1074
+    },
+    {
+      "epoch": 0.9296627270106659,
+      "grad_norm": 6.653066158294678,
+      "learning_rate": 4.033371768676716e-05,
+      "loss": 0.4638,
+      "step": 1075
+    },
+    {
+      "epoch": 0.93052752954742,
+      "grad_norm": 5.897211074829102,
+      "learning_rate": 4.0176879429416086e-05,
+      "loss": 0.3082,
+      "step": 1076
+    },
+    {
+      "epoch": 0.9313923320841742,
+      "grad_norm": 8.102348327636719,
+      "learning_rate": 4.002014164083552e-05,
+      "loss": 0.4003,
+      "step": 1077
+    },
+    {
+      "epoch": 0.9322571346209282,
+      "grad_norm": 7.730281829833984,
+      "learning_rate": 3.9863505924105995e-05,
+      "loss": 0.3053,
+      "step": 1078
+    },
+    {
+      "epoch": 0.9331219371576823,
+      "grad_norm": 5.675047397613525,
+      "learning_rate": 3.970697388126397e-05,
+      "loss": 0.1876,
+      "step": 1079
+    },
+    {
+      "epoch": 0.9339867396944365,
+      "grad_norm": 9.553377151489258,
+      "learning_rate": 3.9550547113285665e-05,
+      "loss": 0.569,
+      "step": 1080
+    },
+    {
+      "epoch": 0.9348515422311905,
+      "grad_norm": 10.86451244354248,
+      "learning_rate": 3.9394227220070466e-05,
+      "loss": 0.8728,
+      "step": 1081
+    },
+    {
+      "epoch": 0.9357163447679446,
+      "grad_norm": 9.33718204498291,
+      "learning_rate": 3.923801580042476e-05,
+      "loss": 0.4347,
+      "step": 1082
+    },
+    {
+      "epoch": 0.9365811473046988,
+      "grad_norm": 8.696025848388672,
+      "learning_rate": 3.90819144520454e-05,
+      "loss": 0.8919,
+      "step": 1083
+    },
+    {
+      "epoch": 0.9374459498414529,
+      "grad_norm": 7.635885238647461,
+      "learning_rate": 3.892592477150352e-05,
+      "loss": 0.4828,
+      "step": 1084
+    },
+    {
+      "epoch": 0.9383107523782069,
+      "grad_norm": 7.686861038208008,
+      "learning_rate": 3.877004835422815e-05,
+      "loss": 0.4338,
+      "step": 1085
+    },
+    {
+      "epoch": 0.9391755549149611,
+      "grad_norm": 6.8635029792785645,
+      "learning_rate": 3.861428679448983e-05,
+      "loss": 0.359,
+      "step": 1086
+    },
+    {
+      "epoch": 0.9400403574517152,
+      "grad_norm": 4.335479736328125,
+      "learning_rate": 3.845864168538437e-05,
+      "loss": 0.3828,
+      "step": 1087
+    },
+    {
+      "epoch": 0.9409051599884694,
+      "grad_norm": 7.711667537689209,
+      "learning_rate": 3.8303114618816577e-05,
+      "loss": 0.5294,
+      "step": 1088
+    },
+    {
+      "epoch": 0.9417699625252234,
+      "grad_norm": 6.784587383270264,
+      "learning_rate": 3.814770718548396e-05,
+      "loss": 0.4212,
+      "step": 1089
+    },
+    {
+      "epoch": 0.9426347650619775,
+      "grad_norm": 8.687413215637207,
+      "learning_rate": 3.7992420974860384e-05,
+      "loss": 0.5723,
+      "step": 1090
+    },
+    {
+      "epoch": 0.9434995675987317,
+      "grad_norm": 3.785308361053467,
+      "learning_rate": 3.783725757517994e-05,
+      "loss": 0.2047,
+      "step": 1091
+    },
+    {
+      "epoch": 0.9443643701354857,
+      "grad_norm": 8.60908031463623,
+      "learning_rate": 3.7682218573420576e-05,
+      "loss": 0.4359,
+      "step": 1092
+    },
+    {
+      "epoch": 0.9452291726722398,
+      "grad_norm": 3.608921527862549,
+      "learning_rate": 3.7527305555287976e-05,
+      "loss": 0.2121,
+      "step": 1093
+    },
+    {
+      "epoch": 0.946093975208994,
+      "grad_norm": 7.160829544067383,
+      "learning_rate": 3.737252010519925e-05,
+      "loss": 0.461,
+      "step": 1094
+    },
+    {
+      "epoch": 0.9469587777457481,
+      "grad_norm": 2.5925629138946533,
+      "learning_rate": 3.721786380626675e-05,
+      "loss": 0.1127,
+      "step": 1095
+    },
+    {
+      "epoch": 0.9478235802825021,
+      "grad_norm": 9.759129524230957,
+      "learning_rate": 3.706333824028201e-05,
+      "loss": 0.5365,
+      "step": 1096
+    },
+    {
+      "epoch": 0.9486883828192563,
+      "grad_norm": 9.999465942382812,
+      "learning_rate": 3.690894498769933e-05,
+      "loss": 1.0112,
+      "step": 1097
+    },
+    {
+      "epoch": 0.9495531853560104,
+      "grad_norm": 9.034364700317383,
+      "learning_rate": 3.675468562761982e-05,
+      "loss": 0.6563,
+      "step": 1098
+    },
+    {
+      "epoch": 0.9504179878927644,
+      "grad_norm": 2.152198076248169,
+      "learning_rate": 3.6600561737775106e-05,
+      "loss": 0.0732,
+      "step": 1099
+    },
+    {
+      "epoch": 0.9512827904295186,
+      "grad_norm": 4.004874229431152,
+      "learning_rate": 3.6446574894511265e-05,
+      "loss": 0.1631,
+      "step": 1100
+    },
+    {
+      "epoch": 0.9521475929662727,
+      "grad_norm": 7.518155097961426,
+      "learning_rate": 3.629272667277274e-05,
+      "loss": 0.4512,
+      "step": 1101
+    },
+    {
+      "epoch": 0.9530123955030269,
+      "grad_norm": 5.864679336547852,
+      "learning_rate": 3.613901864608611e-05,
+      "loss": 0.2731,
+      "step": 1102
+    },
+    {
+      "epoch": 0.9538771980397809,
+      "grad_norm": 7.249544620513916,
+      "learning_rate": 3.598545238654416e-05,
+      "loss": 0.4866,
+      "step": 1103
+    },
+    {
+      "epoch": 0.954742000576535,
+      "grad_norm": 2.4601848125457764,
+      "learning_rate": 3.583202946478963e-05,
+      "loss": 0.2007,
+      "step": 1104
+    },
+    {
+      "epoch": 0.9556068031132892,
+      "grad_norm": 7.753067970275879,
+      "learning_rate": 3.567875144999925e-05,
+      "loss": 0.501,
+      "step": 1105
+    },
+    {
+      "epoch": 0.9564716056500432,
+      "grad_norm": 11.398188591003418,
+      "learning_rate": 3.5525619909867704e-05,
+      "loss": 0.8343,
+      "step": 1106
+    },
+    {
+      "epoch": 0.9573364081867973,
+      "grad_norm": 3.151561975479126,
+      "learning_rate": 3.537263641059152e-05,
+      "loss": 0.1781,
+      "step": 1107
+    },
+    {
+      "epoch": 0.9582012107235515,
+      "grad_norm": 5.797046184539795,
+      "learning_rate": 3.521980251685315e-05,
+      "loss": 0.3011,
+      "step": 1108
+    },
+    {
+      "epoch": 0.9590660132603056,
+      "grad_norm": 8.037071228027344,
+      "learning_rate": 3.506711979180485e-05,
+      "loss": 0.423,
+      "step": 1109
+    },
+    {
+      "epoch": 0.9599308157970596,
+      "grad_norm": 8.320140838623047,
+      "learning_rate": 3.49145897970528e-05,
+      "loss": 0.6317,
+      "step": 1110
+    },
+    {
+      "epoch": 0.9607956183338138,
+      "grad_norm": 7.24954080581665,
+      "learning_rate": 3.47622140926411e-05,
+      "loss": 0.3058,
+      "step": 1111
+    },
+    {
+      "epoch": 0.9616604208705679,
+      "grad_norm": 2.021778106689453,
+      "learning_rate": 3.4609994237035746e-05,
+      "loss": 0.1734,
+      "step": 1112
+    },
+    {
+      "epoch": 0.962525223407322,
+      "grad_norm": 4.613988876342773,
+      "learning_rate": 3.4457931787108774e-05,
+      "loss": 0.1935,
+      "step": 1113
+    },
+    {
+      "epoch": 0.9633900259440761,
+      "grad_norm": 4.552547454833984,
+      "learning_rate": 3.4306028298122316e-05,
+      "loss": 0.1533,
+      "step": 1114
+    },
+    {
+      "epoch": 0.9642548284808302,
+      "grad_norm": 10.501197814941406,
+      "learning_rate": 3.415428532371271e-05,
+      "loss": 0.9337,
+      "step": 1115
+    },
+    {
+      "epoch": 0.9651196310175844,
+      "grad_norm": 7.168083190917969,
+      "learning_rate": 3.40027044158745e-05,
+      "loss": 0.3695,
+      "step": 1116
+    },
+    {
+      "epoch": 0.9659844335543384,
+      "grad_norm": 10.598306655883789,
+      "learning_rate": 3.3851287124944756e-05,
+      "loss": 0.7095,
+      "step": 1117
+    },
+    {
+      "epoch": 0.9668492360910925,
+      "grad_norm": 5.203083038330078,
+      "learning_rate": 3.370003499958703e-05,
+      "loss": 0.4206,
+      "step": 1118
+    },
+    {
+      "epoch": 0.9677140386278467,
+      "grad_norm": 5.217127323150635,
+      "learning_rate": 3.3548949586775624e-05,
+      "loss": 0.235,
+      "step": 1119
+    },
+    {
+      "epoch": 0.9685788411646008,
+      "grad_norm": 4.155709266662598,
+      "learning_rate": 3.339803243177972e-05,
+      "loss": 0.1233,
+      "step": 1120
+    },
+    {
+      "epoch": 0.9694436437013548,
+      "grad_norm": 2.8669726848602295,
+      "learning_rate": 3.324728507814764e-05,
+      "loss": 0.1605,
+      "step": 1121
+    },
+    {
+      "epoch": 0.970308446238109,
+      "grad_norm": 3.5733962059020996,
+      "learning_rate": 3.3096709067691006e-05,
+      "loss": 0.1095,
+      "step": 1122
+    },
+    {
+      "epoch": 0.9711732487748631,
+      "grad_norm": 4.109647274017334,
+      "learning_rate": 3.294630594046892e-05,
+      "loss": 0.2737,
+      "step": 1123
+    },
+    {
+      "epoch": 0.9720380513116171,
+      "grad_norm": 7.015890121459961,
+      "learning_rate": 3.279607723477234e-05,
+      "loss": 0.3482,
+      "step": 1124
+    },
+    {
+      "epoch": 0.9729028538483713,
+      "grad_norm": 6.006662368774414,
+      "learning_rate": 3.2646024487108236e-05,
+      "loss": 0.4144,
+      "step": 1125
+    },
+    {
+      "epoch": 0.9737676563851254,
+      "grad_norm": 8.346697807312012,
+      "learning_rate": 3.249614923218391e-05,
+      "loss": 0.4055,
+      "step": 1126
+    },
+    {
+      "epoch": 0.9746324589218796,
+      "grad_norm": 6.663881778717041,
+      "learning_rate": 3.234645300289137e-05,
+      "loss": 0.5001,
+      "step": 1127
+    },
+    {
+      "epoch": 0.9754972614586336,
+      "grad_norm": 7.918451309204102,
+      "learning_rate": 3.21969373302915e-05,
+      "loss": 0.6129,
+      "step": 1128
+    },
+    {
+      "epoch": 0.9763620639953877,
+      "grad_norm": 5.889848709106445,
+      "learning_rate": 3.204760374359857e-05,
+      "loss": 0.3793,
+      "step": 1129
+    },
+    {
+      "epoch": 0.9772268665321419,
+      "grad_norm": 13.368314743041992,
+      "learning_rate": 3.189845377016448e-05,
+      "loss": 1.0901,
+      "step": 1130
+    },
+    {
+      "epoch": 0.9780916690688959,
+      "grad_norm": 2.977189064025879,
+      "learning_rate": 3.1749488935463145e-05,
+      "loss": 0.1219,
+      "step": 1131
+    },
+    {
+      "epoch": 0.97895647160565,
+      "grad_norm": 9.539501190185547,
+      "learning_rate": 3.160071076307497e-05,
+      "loss": 0.5126,
+      "step": 1132
+    },
+    {
+      "epoch": 0.9798212741424042,
+      "grad_norm": 2.8723487854003906,
+      "learning_rate": 3.145212077467118e-05,
+      "loss": 0.2261,
+      "step": 1133
+    },
+    {
+      "epoch": 0.9806860766791583,
+      "grad_norm": 4.65241813659668,
+      "learning_rate": 3.1303720489998326e-05,
+      "loss": 0.1636,
+      "step": 1134
+    },
+    {
+      "epoch": 0.9815508792159123,
+      "grad_norm": 6.660006999969482,
+      "learning_rate": 3.1155511426862654e-05,
+      "loss": 0.2711,
+      "step": 1135
+    },
+    {
+      "epoch": 0.9824156817526665,
+      "grad_norm": 4.477895259857178,
+      "learning_rate": 3.100749510111471e-05,
+      "loss": 0.3117,
+      "step": 1136
+    },
+    {
+      "epoch": 0.9832804842894206,
+      "grad_norm": 13.759649276733398,
+      "learning_rate": 3.085967302663375e-05,
+      "loss": 0.8633,
+      "step": 1137
+    },
+    {
+      "epoch": 0.9841452868261746,
+      "grad_norm": 5.8578948974609375,
+      "learning_rate": 3.071204671531221e-05,
+      "loss": 0.3619,
+      "step": 1138
+    },
+    {
+      "epoch": 0.9850100893629288,
+      "grad_norm": 2.4084582328796387,
+      "learning_rate": 3.056461767704037e-05,
+      "loss": 0.1079,
+      "step": 1139
+    },
+    {
+      "epoch": 0.9858748918996829,
+      "grad_norm": 7.170529842376709,
+      "learning_rate": 3.041738741969078e-05,
+      "loss": 0.4303,
+      "step": 1140
+    },
+    {
+      "epoch": 0.986739694436437,
+      "grad_norm": 4.021960735321045,
+      "learning_rate": 3.027035744910298e-05,
+      "loss": 0.1799,
+      "step": 1141
+    },
+    {
+      "epoch": 0.9876044969731911,
+      "grad_norm": 4.080975532531738,
+      "learning_rate": 3.012352926906794e-05,
+      "loss": 0.3902,
+      "step": 1142
+    },
+    {
+      "epoch": 0.9884692995099452,
+      "grad_norm": 2.9526562690734863,
+      "learning_rate": 2.9976904381312835e-05,
+      "loss": 0.1557,
+      "step": 1143
+    },
+    {
+      "epoch": 0.9893341020466994,
+      "grad_norm": 5.068524360656738,
+      "learning_rate": 2.9830484285485544e-05,
+      "loss": 0.1057,
+      "step": 1144
+    },
+    {
+      "epoch": 0.9901989045834535,
+      "grad_norm": 4.044787883758545,
+      "learning_rate": 2.968427047913942e-05,
+      "loss": 0.1142,
+      "step": 1145
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 3468,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 1145,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 0.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}