Model save

Browse files

Files changed (5) hide show

README.md +58 -0
all_results.json +8 -0
generation_config.json +14 -0
train_results.json +8 -0
trainer_state.json +1442 -0

README.md ADDED Viewed

	@@ -0,0 +1,58 @@

+---
+base_model: Qwen/Qwen2.5-1.5B-Instruct
+library_name: transformers
+model_name: Qwen2.5-1.5B-Open-R1-Distill
+tags:
+- generated_from_trainer
+- trl
+- sft
+licence: license
+---
+# Model Card for Qwen2.5-1.5B-Open-R1-Distill
+This model is a fine-tuned version of [Qwen/Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct).
+It has been trained using [TRL](https://github.com/huggingface/trl).
+## Quick start
+```python
+from transformers import pipeline
+question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
+generator = pipeline("text-generation", model="yjh00/Qwen2.5-1.5B-Open-R1-Distill", device="cuda")
+output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
+print(output["generated_text"])
+```
+## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/liangzizhangluo-hohai-university/huggingface/runs/4cxrwklk)
+This model was trained with SFT.
+### Framework versions
+- TRL: 0.16.0.dev0
+- Transformers: 4.49.0.dev0
+- Pytorch: 2.5.1
+- Datasets: 3.3.0
+- Tokenizers: 0.21.0
+## Citations
+Cite TRL as:
+```bibtex
+@misc{vonwerra2022trl,
+	title        = {{TRL: Transformer Reinforcement Learning}},
+	author       = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallouédec},
+	year         = 2020,
+	journal      = {GitHub repository},
+	publisher    = {GitHub},
+	howpublished = {\url{https://github.com/huggingface/trl}}
+}
+```

all_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "total_flos": 28487712768000.0,
+    "train_loss": 0.920774197101593,
+    "train_runtime": 1759.2244,
+    "train_samples": 16610,
+    "train_samples_per_second": 18.19,
+    "train_steps_per_second": 0.568
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "bos_token_id": 151643,
+  "do_sample": true,
+  "eos_token_id": [
+    151645,
+    151643
+  ],
+  "pad_token_id": 151643,
+  "repetition_penalty": 1.1,
+  "temperature": 0.7,
+  "top_k": 20,
+  "top_p": 0.8,
+  "transformers_version": "4.49.0.dev0"
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "total_flos": 28487712768000.0,
+    "train_loss": 0.920774197101593,
+    "train_runtime": 1759.2244,
+    "train_samples": 16610,
+    "train_samples_per_second": 18.19,
+    "train_steps_per_second": 0.568
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,1442 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.3699593044765076,
+  "eval_steps": 500,
+  "global_step": 1000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.001849796522382538,
+      "grad_norm": 3.3279617254334535,
+      "learning_rate": 5e-06,
+      "loss": 1.3195,
+      "step": 5
+    },
+    {
+      "epoch": 0.003699593044765076,
+      "grad_norm": 1.9430817685675177,
+      "learning_rate": 1e-05,
+      "loss": 1.2699,
+      "step": 10
+    },
+    {
+      "epoch": 0.005549389567147614,
+      "grad_norm": 2.1226078391974967,
+      "learning_rate": 1.5e-05,
+      "loss": 1.1007,
+      "step": 15
+    },
+    {
+      "epoch": 0.007399186089530152,
+      "grad_norm": 1.864202847273668,
+      "learning_rate": 2e-05,
+      "loss": 1.0631,
+      "step": 20
+    },
+    {
+      "epoch": 0.00924898261191269,
+      "grad_norm": 1.5790964667081508,
+      "learning_rate": 2.5e-05,
+      "loss": 1.0944,
+      "step": 25
+    },
+    {
+      "epoch": 0.011098779134295227,
+      "grad_norm": 1.3616177321523328,
+      "learning_rate": 3e-05,
+      "loss": 0.9914,
+      "step": 30
+    },
+    {
+      "epoch": 0.012948575656677765,
+      "grad_norm": 1.404161383781241,
+      "learning_rate": 3.5e-05,
+      "loss": 0.9922,
+      "step": 35
+    },
+    {
+      "epoch": 0.014798372179060304,
+      "grad_norm": 1.4817289036292525,
+      "learning_rate": 4e-05,
+      "loss": 0.992,
+      "step": 40
+    },
+    {
+      "epoch": 0.016648168701442843,
+      "grad_norm": 1.4532039393449283,
+      "learning_rate": 4.5e-05,
+      "loss": 0.9724,
+      "step": 45
+    },
+    {
+      "epoch": 0.01849796522382538,
+      "grad_norm": 1.459454732453025,
+      "learning_rate": 5e-05,
+      "loss": 0.9584,
+      "step": 50
+    },
+    {
+      "epoch": 0.020347761746207917,
+      "grad_norm": 1.3435610444029484,
+      "learning_rate": 4.9996924362330555e-05,
+      "loss": 0.9809,
+      "step": 55
+    },
+    {
+      "epoch": 0.022197558268590455,
+      "grad_norm": 1.507556200878351,
+      "learning_rate": 4.998769829017084e-05,
+      "loss": 1.0205,
+      "step": 60
+    },
+    {
+      "epoch": 0.024047354790972992,
+      "grad_norm": 1.74928123764404,
+      "learning_rate": 4.997232430583686e-05,
+      "loss": 1.0078,
+      "step": 65
+    },
+    {
+      "epoch": 0.02589715131335553,
+      "grad_norm": 1.3343301345399712,
+      "learning_rate": 4.995080661242243e-05,
+      "loss": 0.9691,
+      "step": 70
+    },
+    {
+      "epoch": 0.02774694783573807,
+      "grad_norm": 1.383358561734251,
+      "learning_rate": 4.992315109265007e-05,
+      "loss": 1.0215,
+      "step": 75
+    },
+    {
+      "epoch": 0.029596744358120607,
+      "grad_norm": 1.220791806067286,
+      "learning_rate": 4.988936530726276e-05,
+      "loss": 0.9797,
+      "step": 80
+    },
+    {
+      "epoch": 0.031446540880503145,
+      "grad_norm": 1.7288941628239176,
+      "learning_rate": 4.984945849295686e-05,
+      "loss": 0.9667,
+      "step": 85
+    },
+    {
+      "epoch": 0.033296337402885685,
+      "grad_norm": 1.3374822253676133,
+      "learning_rate": 4.980344155985695e-05,
+      "loss": 0.9725,
+      "step": 90
+    },
+    {
+      "epoch": 0.03514613392526822,
+      "grad_norm": 1.258960837768452,
+      "learning_rate": 4.975132708853304e-05,
+      "loss": 0.9877,
+      "step": 95
+    },
+    {
+      "epoch": 0.03699593044765076,
+      "grad_norm": 1.438691891865278,
+      "learning_rate": 4.9693129326561254e-05,
+      "loss": 1.0251,
+      "step": 100
+    },
+    {
+      "epoch": 0.038845726970033294,
+      "grad_norm": 1.3123491113835215,
+      "learning_rate": 4.96288641846286e-05,
+      "loss": 0.9805,
+      "step": 105
+    },
+    {
+      "epoch": 0.040695523492415835,
+      "grad_norm": 1.3868311724825364,
+      "learning_rate": 4.955854923218321e-05,
+      "loss": 0.9866,
+      "step": 110
+    },
+    {
+      "epoch": 0.042545320014798375,
+      "grad_norm": 1.1867492456163233,
+      "learning_rate": 4.948220369263099e-05,
+      "loss": 0.9671,
+      "step": 115
+    },
+    {
+      "epoch": 0.04439511653718091,
+      "grad_norm": 1.3516961645638472,
+      "learning_rate": 4.939984843808013e-05,
+      "loss": 0.9712,
+      "step": 120
+    },
+    {
+      "epoch": 0.04624491305956345,
+      "grad_norm": 1.3818877123258775,
+      "learning_rate": 4.931150598363494e-05,
+      "loss": 0.9973,
+      "step": 125
+    },
+    {
+      "epoch": 0.048094709581945984,
+      "grad_norm": 1.2461243722970794,
+      "learning_rate": 4.921720048124035e-05,
+      "loss": 0.9721,
+      "step": 130
+    },
+    {
+      "epoch": 0.049944506104328525,
+      "grad_norm": 1.3531497904983025,
+      "learning_rate": 4.9116957713079084e-05,
+      "loss": 0.9947,
+      "step": 135
+    },
+    {
+      "epoch": 0.05179430262671106,
+      "grad_norm": 1.3909365555868392,
+      "learning_rate": 4.901080508452305e-05,
+      "loss": 0.9886,
+      "step": 140
+    },
+    {
+      "epoch": 0.0536440991490936,
+      "grad_norm": 1.3632639733726144,
+      "learning_rate": 4.889877161664096e-05,
+      "loss": 0.9856,
+      "step": 145
+    },
+    {
+      "epoch": 0.05549389567147614,
+      "grad_norm": 1.376746573930021,
+      "learning_rate": 4.878088793826428e-05,
+      "loss": 1.0124,
+      "step": 150
+    },
+    {
+      "epoch": 0.057343692193858674,
+      "grad_norm": 1.2051488416511116,
+      "learning_rate": 4.865718627761363e-05,
+      "loss": 0.9502,
+      "step": 155
+    },
+    {
+      "epoch": 0.059193488716241215,
+      "grad_norm": 1.4444748706679968,
+      "learning_rate": 4.8527700453487873e-05,
+      "loss": 0.976,
+      "step": 160
+    },
+    {
+      "epoch": 0.06104328523862375,
+      "grad_norm": 1.5080951955787747,
+      "learning_rate": 4.839246586601841e-05,
+      "loss": 0.9858,
+      "step": 165
+    },
+    {
+      "epoch": 0.06289308176100629,
+      "grad_norm": 1.218432538840472,
+      "learning_rate": 4.825151948699116e-05,
+      "loss": 0.985,
+      "step": 170
+    },
+    {
+      "epoch": 0.06474287828338883,
+      "grad_norm": 1.1799037097271943,
+      "learning_rate": 4.8104899849738795e-05,
+      "loss": 0.9378,
+      "step": 175
+    },
+    {
+      "epoch": 0.06659267480577137,
+      "grad_norm": 1.2779121353402405,
+      "learning_rate": 4.795264703860616e-05,
+      "loss": 0.9683,
+      "step": 180
+    },
+    {
+      "epoch": 0.0684424713281539,
+      "grad_norm": 1.4722942379840678,
+      "learning_rate": 4.779480267799158e-05,
+      "loss": 0.9904,
+      "step": 185
+    },
+    {
+      "epoch": 0.07029226785053644,
+      "grad_norm": 1.3984538759925134,
+      "learning_rate": 4.763140992096718e-05,
+      "loss": 0.9702,
+      "step": 190
+    },
+    {
+      "epoch": 0.07214206437291898,
+      "grad_norm": 1.2519064402447997,
+      "learning_rate": 4.7462513437481266e-05,
+      "loss": 0.9671,
+      "step": 195
+    },
+    {
+      "epoch": 0.07399186089530152,
+      "grad_norm": 1.281644209465442,
+      "learning_rate": 4.7288159402146e-05,
+      "loss": 0.9653,
+      "step": 200
+    },
+    {
+      "epoch": 0.07584165741768406,
+      "grad_norm": 1.3861942696753027,
+      "learning_rate": 4.7108395481613736e-05,
+      "loss": 0.989,
+      "step": 205
+    },
+    {
+      "epoch": 0.07769145394006659,
+      "grad_norm": 1.2692148251690907,
+      "learning_rate": 4.692327082154542e-05,
+      "loss": 0.9695,
+      "step": 210
+    },
+    {
+      "epoch": 0.07954125046244913,
+      "grad_norm": 1.2503784332879924,
+      "learning_rate": 4.6732836033174634e-05,
+      "loss": 0.9442,
+      "step": 215
+    },
+    {
+      "epoch": 0.08139104698483167,
+      "grad_norm": 1.2071851181770694,
+      "learning_rate": 4.653714317947105e-05,
+      "loss": 0.9664,
+      "step": 220
+    },
+    {
+      "epoch": 0.08324084350721421,
+      "grad_norm": 1.3973898482595015,
+      "learning_rate": 4.6336245760906896e-05,
+      "loss": 1.0037,
+      "step": 225
+    },
+    {
+      "epoch": 0.08509064002959675,
+      "grad_norm": 1.1885748014842101,
+      "learning_rate": 4.6130198700830455e-05,
+      "loss": 0.9523,
+      "step": 230
+    },
+    {
+      "epoch": 0.08694043655197928,
+      "grad_norm": 1.1676588738483402,
+      "learning_rate": 4.591905833045059e-05,
+      "loss": 0.9317,
+      "step": 235
+    },
+    {
+      "epoch": 0.08879023307436182,
+      "grad_norm": 1.2482898560322035,
+      "learning_rate": 4.570288237343632e-05,
+      "loss": 0.9315,
+      "step": 240
+    },
+    {
+      "epoch": 0.09064002959674436,
+      "grad_norm": 1.122744743080674,
+      "learning_rate": 4.5481729930135805e-05,
+      "loss": 0.9582,
+      "step": 245
+    },
+    {
+      "epoch": 0.0924898261191269,
+      "grad_norm": 1.1964791201941039,
+      "learning_rate": 4.5255661461418854e-05,
+      "loss": 0.9613,
+      "step": 250
+    },
+    {
+      "epoch": 0.09433962264150944,
+      "grad_norm": 1.313613853504735,
+      "learning_rate": 4.502473877214754e-05,
+      "loss": 0.9627,
+      "step": 255
+    },
+    {
+      "epoch": 0.09618941916389197,
+      "grad_norm": 1.2998325386052085,
+      "learning_rate": 4.478902499427933e-05,
+      "loss": 0.9435,
+      "step": 260
+    },
+    {
+      "epoch": 0.09803921568627451,
+      "grad_norm": 1.1188167692403637,
+      "learning_rate": 4.454858456960754e-05,
+      "loss": 0.9531,
+      "step": 265
+    },
+    {
+      "epoch": 0.09988901220865705,
+      "grad_norm": 1.245499023500759,
+      "learning_rate": 4.430348323214347e-05,
+      "loss": 0.9615,
+      "step": 270
+    },
+    {
+      "epoch": 0.10173880873103959,
+      "grad_norm": 1.2731826201754,
+      "learning_rate": 4.4053787990145465e-05,
+      "loss": 0.9852,
+      "step": 275
+    },
+    {
+      "epoch": 0.10358860525342212,
+      "grad_norm": 1.0779984323652068,
+      "learning_rate": 4.379956710779951e-05,
+      "loss": 0.9649,
+      "step": 280
+    },
+    {
+      "epoch": 0.10543840177580466,
+      "grad_norm": 1.3267240330921417,
+      "learning_rate": 4.3540890086556435e-05,
+      "loss": 0.967,
+      "step": 285
+    },
+    {
+      "epoch": 0.1072881982981872,
+      "grad_norm": 1.194869135121013,
+      "learning_rate": 4.327782764613099e-05,
+      "loss": 0.9745,
+      "step": 290
+    },
+    {
+      "epoch": 0.10913799482056974,
+      "grad_norm": 1.092064798384306,
+      "learning_rate": 4.301045170516773e-05,
+      "loss": 0.9354,
+      "step": 295
+    },
+    {
+      "epoch": 0.11098779134295228,
+      "grad_norm": 1.1564826438676266,
+      "learning_rate": 4.2738835361579175e-05,
+      "loss": 0.9301,
+      "step": 300
+    },
+    {
+      "epoch": 0.1128375878653348,
+      "grad_norm": 1.1034286954430845,
+      "learning_rate": 4.2463052872561584e-05,
+      "loss": 0.9536,
+      "step": 305
+    },
+    {
+      "epoch": 0.11468738438771735,
+      "grad_norm": 1.1101601059988588,
+      "learning_rate": 4.2183179634293794e-05,
+      "loss": 0.9302,
+      "step": 310
+    },
+    {
+      "epoch": 0.11653718091009989,
+      "grad_norm": 1.2757853825149674,
+      "learning_rate": 4.1899292161324627e-05,
+      "loss": 0.9547,
+      "step": 315
+    },
+    {
+      "epoch": 0.11838697743248243,
+      "grad_norm": 1.1087176065384894,
+      "learning_rate": 4.1611468065654586e-05,
+      "loss": 0.9764,
+      "step": 320
+    },
+    {
+      "epoch": 0.12023677395486497,
+      "grad_norm": 1.2270496050163748,
+      "learning_rate": 4.1319786035517534e-05,
+      "loss": 0.9551,
+      "step": 325
+    },
+    {
+      "epoch": 0.1220865704772475,
+      "grad_norm": 1.2025590067136398,
+      "learning_rate": 4.1024325813868065e-05,
+      "loss": 0.9238,
+      "step": 330
+    },
+    {
+      "epoch": 0.12393636699963004,
+      "grad_norm": 1.1631866711130052,
+      "learning_rate": 4.072516817658065e-05,
+      "loss": 0.9573,
+      "step": 335
+    },
+    {
+      "epoch": 0.12578616352201258,
+      "grad_norm": 1.064949988724441,
+      "learning_rate": 4.0422394910366236e-05,
+      "loss": 0.9184,
+      "step": 340
+    },
+    {
+      "epoch": 0.12763596004439512,
+      "grad_norm": 1.4077051494514279,
+      "learning_rate": 4.0116088790412645e-05,
+      "loss": 0.9518,
+      "step": 345
+    },
+    {
+      "epoch": 0.12948575656677766,
+      "grad_norm": 1.2486218937453641,
+      "learning_rate": 3.980633355775461e-05,
+      "loss": 0.9518,
+      "step": 350
+    },
+    {
+      "epoch": 0.1313355530891602,
+      "grad_norm": 1.2416341494672163,
+      "learning_rate": 3.949321389637986e-05,
+      "loss": 0.9238,
+      "step": 355
+    },
+    {
+      "epoch": 0.13318534961154274,
+      "grad_norm": 1.18527481990195,
+      "learning_rate": 3.917681541007734e-05,
+      "loss": 0.9319,
+      "step": 360
+    },
+    {
+      "epoch": 0.13503514613392525,
+      "grad_norm": 1.1856076287802888,
+      "learning_rate": 3.885722459903399e-05,
+      "loss": 0.941,
+      "step": 365
+    },
+    {
+      "epoch": 0.1368849426563078,
+      "grad_norm": 1.3096185432838516,
+      "learning_rate": 3.853452883618644e-05,
+      "loss": 0.9531,
+      "step": 370
+    },
+    {
+      "epoch": 0.13873473917869034,
+      "grad_norm": 1.0667136184374695,
+      "learning_rate": 3.8208816343334156e-05,
+      "loss": 0.9648,
+      "step": 375
+    },
+    {
+      "epoch": 0.14058453570107288,
+      "grad_norm": 1.3556343012265513,
+      "learning_rate": 3.788017616702048e-05,
+      "loss": 0.9622,
+      "step": 380
+    },
+    {
+      "epoch": 0.14243433222345542,
+      "grad_norm": 1.2826346418353032,
+      "learning_rate": 3.754869815418815e-05,
+      "loss": 0.9243,
+      "step": 385
+    },
+    {
+      "epoch": 0.14428412874583796,
+      "grad_norm": 1.4908193033444779,
+      "learning_rate": 3.721447292761609e-05,
+      "loss": 0.9402,
+      "step": 390
+    },
+    {
+      "epoch": 0.1461339252682205,
+      "grad_norm": 1.2092388640077134,
+      "learning_rate": 3.687759186114403e-05,
+      "loss": 0.8881,
+      "step": 395
+    },
+    {
+      "epoch": 0.14798372179060304,
+      "grad_norm": 1.2289784753028945,
+      "learning_rate": 3.6538147054691817e-05,
+      "loss": 0.9171,
+      "step": 400
+    },
+    {
+      "epoch": 0.14983351831298558,
+      "grad_norm": 1.2686814293232442,
+      "learning_rate": 3.619623130908018e-05,
+      "loss": 0.9487,
+      "step": 405
+    },
+    {
+      "epoch": 0.15168331483536812,
+      "grad_norm": 1.0701718673112672,
+      "learning_rate": 3.5851938100659964e-05,
+      "loss": 0.9099,
+      "step": 410
+    },
+    {
+      "epoch": 0.15353311135775063,
+      "grad_norm": 1.090800531105658,
+      "learning_rate": 3.550536155575662e-05,
+      "loss": 0.8853,
+      "step": 415
+    },
+    {
+      "epoch": 0.15538290788013318,
+      "grad_norm": 1.2313388220061225,
+      "learning_rate": 3.515659642493697e-05,
+      "loss": 0.9385,
+      "step": 420
+    },
+    {
+      "epoch": 0.15723270440251572,
+      "grad_norm": 1.2536700830777894,
+      "learning_rate": 3.480573805710538e-05,
+      "loss": 0.9143,
+      "step": 425
+    },
+    {
+      "epoch": 0.15908250092489826,
+      "grad_norm": 1.1514860580661626,
+      "learning_rate": 3.4452882373436316e-05,
+      "loss": 0.8996,
+      "step": 430
+    },
+    {
+      "epoch": 0.1609322974472808,
+      "grad_norm": 1.3731197382175357,
+      "learning_rate": 3.4098125841150466e-05,
+      "loss": 0.9243,
+      "step": 435
+    },
+    {
+      "epoch": 0.16278209396966334,
+      "grad_norm": 1.0232827661891992,
+      "learning_rate": 3.37415654471415e-05,
+      "loss": 0.9369,
+      "step": 440
+    },
+    {
+      "epoch": 0.16463189049204588,
+      "grad_norm": 1.1305694231647365,
+      "learning_rate": 3.3383298671460944e-05,
+      "loss": 0.9142,
+      "step": 445
+    },
+    {
+      "epoch": 0.16648168701442842,
+      "grad_norm": 1.1398463628627697,
+      "learning_rate": 3.3023423460667985e-05,
+      "loss": 0.9094,
+      "step": 450
+    },
+    {
+      "epoch": 0.16833148353681096,
+      "grad_norm": 1.0590670030120253,
+      "learning_rate": 3.2662038201051914e-05,
+      "loss": 0.9013,
+      "step": 455
+    },
+    {
+      "epoch": 0.1701812800591935,
+      "grad_norm": 1.1536632313067792,
+      "learning_rate": 3.2299241691734304e-05,
+      "loss": 0.9233,
+      "step": 460
+    },
+    {
+      "epoch": 0.17203107658157601,
+      "grad_norm": 1.167439193543511,
+      "learning_rate": 3.1935133117658306e-05,
+      "loss": 0.952,
+      "step": 465
+    },
+    {
+      "epoch": 0.17388087310395856,
+      "grad_norm": 1.1956939577700672,
+      "learning_rate": 3.156981202247248e-05,
+      "loss": 0.9247,
+      "step": 470
+    },
+    {
+      "epoch": 0.1757306696263411,
+      "grad_norm": 1.1593001767390334,
+      "learning_rate": 3.1203378281316515e-05,
+      "loss": 0.9012,
+      "step": 475
+    },
+    {
+      "epoch": 0.17758046614872364,
+      "grad_norm": 1.0435734696221646,
+      "learning_rate": 3.0835932073516444e-05,
+      "loss": 0.9154,
+      "step": 480
+    },
+    {
+      "epoch": 0.17943026267110618,
+      "grad_norm": 1.1108156220083871,
+      "learning_rate": 3.0467573855196558e-05,
+      "loss": 0.899,
+      "step": 485
+    },
+    {
+      "epoch": 0.18128005919348872,
+      "grad_norm": 1.0167505348335453,
+      "learning_rate": 3.0098404331815695e-05,
+      "loss": 0.9174,
+      "step": 490
+    },
+    {
+      "epoch": 0.18312985571587126,
+      "grad_norm": 0.965632547470374,
+      "learning_rate": 2.9728524430635417e-05,
+      "loss": 0.9143,
+      "step": 495
+    },
+    {
+      "epoch": 0.1849796522382538,
+      "grad_norm": 1.1035315117198525,
+      "learning_rate": 2.9358035273127483e-05,
+      "loss": 0.9261,
+      "step": 500
+    },
+    {
+      "epoch": 0.18682944876063634,
+      "grad_norm": 1.1169822035484065,
+      "learning_rate": 2.8987038147328238e-05,
+      "loss": 0.9313,
+      "step": 505
+    },
+    {
+      "epoch": 0.18867924528301888,
+      "grad_norm": 0.9769048796586226,
+      "learning_rate": 2.86156344801475e-05,
+      "loss": 0.9132,
+      "step": 510
+    },
+    {
+      "epoch": 0.1905290418054014,
+      "grad_norm": 1.030443130117634,
+      "learning_rate": 2.824392580963944e-05,
+      "loss": 0.9115,
+      "step": 515
+    },
+    {
+      "epoch": 0.19237883832778394,
+      "grad_norm": 1.1062439088724974,
+      "learning_rate": 2.787201375724307e-05,
+      "loss": 0.884,
+      "step": 520
+    },
+    {
+      "epoch": 0.19422863485016648,
+      "grad_norm": 1.0597749922719453,
+      "learning_rate": 2.7500000000000004e-05,
+      "loss": 0.8783,
+      "step": 525
+    },
+    {
+      "epoch": 0.19607843137254902,
+      "grad_norm": 1.1259858455874245,
+      "learning_rate": 2.7127986242756936e-05,
+      "loss": 0.8982,
+      "step": 530
+    },
+    {
+      "epoch": 0.19792822789493156,
+      "grad_norm": 1.1500074530646092,
+      "learning_rate": 2.6756074190360563e-05,
+      "loss": 0.9127,
+      "step": 535
+    },
+    {
+      "epoch": 0.1997780244173141,
+      "grad_norm": 1.0395549080430184,
+      "learning_rate": 2.63843655198525e-05,
+      "loss": 0.8975,
+      "step": 540
+    },
+    {
+      "epoch": 0.20162782093969664,
+      "grad_norm": 1.083025795794125,
+      "learning_rate": 2.6012961852671767e-05,
+      "loss": 0.9231,
+      "step": 545
+    },
+    {
+      "epoch": 0.20347761746207918,
+      "grad_norm": 1.0839550256360933,
+      "learning_rate": 2.5641964726872526e-05,
+      "loss": 0.9459,
+      "step": 550
+    },
+    {
+      "epoch": 0.20532741398446172,
+      "grad_norm": 1.0874256330778362,
+      "learning_rate": 2.527147556936459e-05,
+      "loss": 0.891,
+      "step": 555
+    },
+    {
+      "epoch": 0.20717721050684423,
+      "grad_norm": 1.19750486880985,
+      "learning_rate": 2.4901595668184314e-05,
+      "loss": 0.8838,
+      "step": 560
+    },
+    {
+      "epoch": 0.20902700702922677,
+      "grad_norm": 1.092576941889689,
+      "learning_rate": 2.453242614480345e-05,
+      "loss": 0.9442,
+      "step": 565
+    },
+    {
+      "epoch": 0.21087680355160932,
+      "grad_norm": 1.227755705501534,
+      "learning_rate": 2.416406792648355e-05,
+      "loss": 0.8934,
+      "step": 570
+    },
+    {
+      "epoch": 0.21272660007399186,
+      "grad_norm": 1.2087319750759349,
+      "learning_rate": 2.3796621718683487e-05,
+      "loss": 0.9035,
+      "step": 575
+    },
+    {
+      "epoch": 0.2145763965963744,
+      "grad_norm": 1.1471923615461788,
+      "learning_rate": 2.3430187977527533e-05,
+      "loss": 0.8942,
+      "step": 580
+    },
+    {
+      "epoch": 0.21642619311875694,
+      "grad_norm": 0.9623686085619546,
+      "learning_rate": 2.3064866882341696e-05,
+      "loss": 0.8907,
+      "step": 585
+    },
+    {
+      "epoch": 0.21827598964113948,
+      "grad_norm": 1.1021625170926432,
+      "learning_rate": 2.27007583082657e-05,
+      "loss": 0.8488,
+      "step": 590
+    },
+    {
+      "epoch": 0.22012578616352202,
+      "grad_norm": 1.0507739548200454,
+      "learning_rate": 2.233796179894809e-05,
+      "loss": 0.9243,
+      "step": 595
+    },
+    {
+      "epoch": 0.22197558268590456,
+      "grad_norm": 1.0241361415980714,
+      "learning_rate": 2.1976576539332024e-05,
+      "loss": 0.8746,
+      "step": 600
+    },
+    {
+      "epoch": 0.2238253792082871,
+      "grad_norm": 1.1308505070134895,
+      "learning_rate": 2.1616701328539057e-05,
+      "loss": 0.9094,
+      "step": 605
+    },
+    {
+      "epoch": 0.2256751757306696,
+      "grad_norm": 1.0945099014034436,
+      "learning_rate": 2.1258434552858502e-05,
+      "loss": 0.9125,
+      "step": 610
+    },
+    {
+      "epoch": 0.22752497225305215,
+      "grad_norm": 1.116483983505738,
+      "learning_rate": 2.090187415884955e-05,
+      "loss": 0.9148,
+      "step": 615
+    },
+    {
+      "epoch": 0.2293747687754347,
+      "grad_norm": 0.9728451267819282,
+      "learning_rate": 2.054711762656369e-05,
+      "loss": 0.8596,
+      "step": 620
+    },
+    {
+      "epoch": 0.23122456529781724,
+      "grad_norm": 1.0747467852239778,
+      "learning_rate": 2.0194261942894628e-05,
+      "loss": 0.8733,
+      "step": 625
+    },
+    {
+      "epoch": 0.23307436182019978,
+      "grad_norm": 1.0108414553651262,
+      "learning_rate": 1.984340357506303e-05,
+      "loss": 0.8476,
+      "step": 630
+    },
+    {
+      "epoch": 0.23492415834258232,
+      "grad_norm": 1.0393062726091873,
+      "learning_rate": 1.949463844424338e-05,
+      "loss": 0.8926,
+      "step": 635
+    },
+    {
+      "epoch": 0.23677395486496486,
+      "grad_norm": 1.017203795362952,
+      "learning_rate": 1.914806189934003e-05,
+      "loss": 0.8905,
+      "step": 640
+    },
+    {
+      "epoch": 0.2386237513873474,
+      "grad_norm": 1.0799681224310813,
+      "learning_rate": 1.8803768690919832e-05,
+      "loss": 0.8897,
+      "step": 645
+    },
+    {
+      "epoch": 0.24047354790972994,
+      "grad_norm": 1.0781213573400381,
+      "learning_rate": 1.8461852945308196e-05,
+      "loss": 0.8637,
+      "step": 650
+    },
+    {
+      "epoch": 0.24232334443211248,
+      "grad_norm": 1.1249251204928792,
+      "learning_rate": 1.8122408138855974e-05,
+      "loss": 0.8802,
+      "step": 655
+    },
+    {
+      "epoch": 0.244173140954495,
+      "grad_norm": 1.1923220257639287,
+      "learning_rate": 1.778552707238391e-05,
+      "loss": 0.9033,
+      "step": 660
+    },
+    {
+      "epoch": 0.24602293747687753,
+      "grad_norm": 1.0225563377110887,
+      "learning_rate": 1.7451301845811857e-05,
+      "loss": 0.8605,
+      "step": 665
+    },
+    {
+      "epoch": 0.24787273399926008,
+      "grad_norm": 1.0757446768121959,
+      "learning_rate": 1.711982383297953e-05,
+      "loss": 0.8906,
+      "step": 670
+    },
+    {
+      "epoch": 0.24972253052164262,
+      "grad_norm": 1.0589738260259443,
+      "learning_rate": 1.6791183656665846e-05,
+      "loss": 0.8938,
+      "step": 675
+    },
+    {
+      "epoch": 0.25157232704402516,
+      "grad_norm": 0.9884710814924109,
+      "learning_rate": 1.6465471163813574e-05,
+      "loss": 0.9306,
+      "step": 680
+    },
+    {
+      "epoch": 0.2534221235664077,
+      "grad_norm": 1.0526270648654439,
+      "learning_rate": 1.6142775400966023e-05,
+      "loss": 0.9038,
+      "step": 685
+    },
+    {
+      "epoch": 0.25527192008879024,
+      "grad_norm": 1.0276777351490343,
+      "learning_rate": 1.582318458992267e-05,
+      "loss": 0.8701,
+      "step": 690
+    },
+    {
+      "epoch": 0.2571217166111728,
+      "grad_norm": 1.1308701537231916,
+      "learning_rate": 1.5506786103620146e-05,
+      "loss": 0.9146,
+      "step": 695
+    },
+    {
+      "epoch": 0.2589715131335553,
+      "grad_norm": 1.1220390082042924,
+      "learning_rate": 1.5193666442245402e-05,
+      "loss": 0.894,
+      "step": 700
+    },
+    {
+      "epoch": 0.26082130965593786,
+      "grad_norm": 0.9872150958917795,
+      "learning_rate": 1.4883911209587368e-05,
+      "loss": 0.8846,
+      "step": 705
+    },
+    {
+      "epoch": 0.2626711061783204,
+      "grad_norm": 1.0255612887775873,
+      "learning_rate": 1.4577605089633773e-05,
+      "loss": 0.8751,
+      "step": 710
+    },
+    {
+      "epoch": 0.26452090270070294,
+      "grad_norm": 1.0397487279123176,
+      "learning_rate": 1.427483182341936e-05,
+      "loss": 0.8553,
+      "step": 715
+    },
+    {
+      "epoch": 0.2663706992230855,
+      "grad_norm": 0.9811002147501025,
+      "learning_rate": 1.3975674186131937e-05,
+      "loss": 0.8811,
+      "step": 720
+    },
+    {
+      "epoch": 0.268220495745468,
+      "grad_norm": 0.9912452879027507,
+      "learning_rate": 1.3680213964482475e-05,
+      "loss": 0.882,
+      "step": 725
+    },
+    {
+      "epoch": 0.2700702922678505,
+      "grad_norm": 1.085598582193152,
+      "learning_rate": 1.3388531934345416e-05,
+      "loss": 0.8663,
+      "step": 730
+    },
+    {
+      "epoch": 0.27192008879023305,
+      "grad_norm": 1.0976908092393123,
+      "learning_rate": 1.3100707838675377e-05,
+      "loss": 0.8568,
+      "step": 735
+    },
+    {
+      "epoch": 0.2737698853126156,
+      "grad_norm": 1.110399579286176,
+      "learning_rate": 1.2816820365706206e-05,
+      "loss": 0.861,
+      "step": 740
+    },
+    {
+      "epoch": 0.27561968183499813,
+      "grad_norm": 0.9972527883665196,
+      "learning_rate": 1.2536947127438415e-05,
+      "loss": 0.8583,
+      "step": 745
+    },
+    {
+      "epoch": 0.27746947835738067,
+      "grad_norm": 0.9846092199040459,
+      "learning_rate": 1.2261164638420832e-05,
+      "loss": 0.8869,
+      "step": 750
+    },
+    {
+      "epoch": 0.2793192748797632,
+      "grad_norm": 1.0377584915303817,
+      "learning_rate": 1.198954829483227e-05,
+      "loss": 0.8689,
+      "step": 755
+    },
+    {
+      "epoch": 0.28116907140214575,
+      "grad_norm": 1.0080689260720659,
+      "learning_rate": 1.1722172353869008e-05,
+      "loss": 0.8494,
+      "step": 760
+    },
+    {
+      "epoch": 0.2830188679245283,
+      "grad_norm": 1.1377623343596663,
+      "learning_rate": 1.1459109913443567e-05,
+      "loss": 0.8789,
+      "step": 765
+    },
+    {
+      "epoch": 0.28486866444691084,
+      "grad_norm": 1.0844127152295038,
+      "learning_rate": 1.12004328922005e-05,
+      "loss": 0.8714,
+      "step": 770
+    },
+    {
+      "epoch": 0.2867184609692934,
+      "grad_norm": 0.9818291742586884,
+      "learning_rate": 1.094621200985454e-05,
+      "loss": 0.8762,
+      "step": 775
+    },
+    {
+      "epoch": 0.2885682574916759,
+      "grad_norm": 1.0561402094886105,
+      "learning_rate": 1.0696516767856546e-05,
+      "loss": 0.8602,
+      "step": 780
+    },
+    {
+      "epoch": 0.29041805401405846,
+      "grad_norm": 0.9507833621657122,
+      "learning_rate": 1.0451415430392474e-05,
+      "loss": 0.8493,
+      "step": 785
+    },
+    {
+      "epoch": 0.292267850536441,
+      "grad_norm": 1.0142419832874243,
+      "learning_rate": 1.0210975005720677e-05,
+      "loss": 0.8621,
+      "step": 790
+    },
+    {
+      "epoch": 0.29411764705882354,
+      "grad_norm": 1.0414075410194514,
+      "learning_rate": 9.975261227852472e-06,
+      "loss": 0.8867,
+      "step": 795
+    },
+    {
+      "epoch": 0.2959674435812061,
+      "grad_norm": 1.0172601224221116,
+      "learning_rate": 9.744338538581147e-06,
+      "loss": 0.861,
+      "step": 800
+    },
+    {
+      "epoch": 0.2978172401035886,
+      "grad_norm": 1.0520376994102576,
+      "learning_rate": 9.518270069864195e-06,
+      "loss": 0.8693,
+      "step": 805
+    },
+    {
+      "epoch": 0.29966703662597116,
+      "grad_norm": 1.0297669417176623,
+      "learning_rate": 9.297117626563687e-06,
+      "loss": 0.8556,
+      "step": 810
+    },
+    {
+      "epoch": 0.3015168331483537,
+      "grad_norm": 1.1288942237451884,
+      "learning_rate": 9.080941669549423e-06,
+      "loss": 0.8636,
+      "step": 815
+    },
+    {
+      "epoch": 0.30336662967073624,
+      "grad_norm": 1.0700035713287315,
+      "learning_rate": 8.86980129916955e-06,
+      "loss": 0.8735,
+      "step": 820
+    },
+    {
+      "epoch": 0.3052164261931188,
+      "grad_norm": 1.0433713434057506,
+      "learning_rate": 8.663754239093109e-06,
+      "loss": 0.8745,
+      "step": 825
+    },
+    {
+      "epoch": 0.30706622271550127,
+      "grad_norm": 0.9399395136256045,
+      "learning_rate": 8.462856820528952e-06,
+      "loss": 0.8625,
+      "step": 830
+    },
+    {
+      "epoch": 0.3089160192378838,
+      "grad_norm": 1.012084107494069,
+      "learning_rate": 8.26716396682537e-06,
+      "loss": 0.8575,
+      "step": 835
+    },
+    {
+      "epoch": 0.31076581576026635,
+      "grad_norm": 1.0479666608180018,
+      "learning_rate": 8.076729178454588e-06,
+      "loss": 0.8766,
+      "step": 840
+    },
+    {
+      "epoch": 0.3126156122826489,
+      "grad_norm": 1.137450242844205,
+      "learning_rate": 7.89160451838626e-06,
+      "loss": 0.8544,
+      "step": 845
+    },
+    {
+      "epoch": 0.31446540880503143,
+      "grad_norm": 1.0197993824595155,
+      "learning_rate": 7.711840597853998e-06,
+      "loss": 0.8583,
+      "step": 850
+    },
+    {
+      "epoch": 0.316315205327414,
+      "grad_norm": 1.0686381595688783,
+      "learning_rate": 7.537486562518735e-06,
+      "loss": 0.8518,
+      "step": 855
+    },
+    {
+      "epoch": 0.3181650018497965,
+      "grad_norm": 0.9978231071140236,
+      "learning_rate": 7.368590079032822e-06,
+      "loss": 0.8648,
+      "step": 860
+    },
+    {
+      "epoch": 0.32001479837217905,
+      "grad_norm": 1.0560393356842295,
+      "learning_rate": 7.205197322008425e-06,
+      "loss": 0.8349,
+      "step": 865
+    },
+    {
+      "epoch": 0.3218645948945616,
+      "grad_norm": 0.9842318437238298,
+      "learning_rate": 7.047352961393844e-06,
+      "loss": 0.8595,
+      "step": 870
+    },
+    {
+      "epoch": 0.32371439141694414,
+      "grad_norm": 0.9952118512469338,
+      "learning_rate": 6.8951001502612065e-06,
+      "loss": 0.869,
+      "step": 875
+    },
+    {
+      "epoch": 0.3255641879393267,
+      "grad_norm": 0.9424925973460067,
+      "learning_rate": 6.748480513008844e-06,
+      "loss": 0.8745,
+      "step": 880
+    },
+    {
+      "epoch": 0.3274139844617092,
+      "grad_norm": 1.0535831291700837,
+      "learning_rate": 6.607534133981594e-06,
+      "loss": 0.8748,
+      "step": 885
+    },
+    {
+      "epoch": 0.32926378098409176,
+      "grad_norm": 1.0723567866928634,
+      "learning_rate": 6.472299546512134e-06,
+      "loss": 0.8815,
+      "step": 890
+    },
+    {
+      "epoch": 0.3311135775064743,
+      "grad_norm": 1.0482717572477003,
+      "learning_rate": 6.342813722386374e-06,
+      "loss": 0.8543,
+      "step": 895
+    },
+    {
+      "epoch": 0.33296337402885684,
+      "grad_norm": 1.046401991122096,
+      "learning_rate": 6.219112061735721e-06,
+      "loss": 0.8785,
+      "step": 900
+    },
+    {
+      "epoch": 0.3348131705512394,
+      "grad_norm": 1.0211208893623536,
+      "learning_rate": 6.1012283833590465e-06,
+      "loss": 0.8757,
+      "step": 905
+    },
+    {
+      "epoch": 0.3366629670736219,
+      "grad_norm": 1.0035173287590444,
+      "learning_rate": 5.989194915476954e-06,
+      "loss": 0.8442,
+      "step": 910
+    },
+    {
+      "epoch": 0.33851276359600446,
+      "grad_norm": 0.9878066592821019,
+      "learning_rate": 5.883042286920918e-06,
+      "loss": 0.8638,
+      "step": 915
+    },
+    {
+      "epoch": 0.340362560118387,
+      "grad_norm": 1.05735602894325,
+      "learning_rate": 5.782799518759658e-06,
+      "loss": 0.8593,
+      "step": 920
+    },
+    {
+      "epoch": 0.3422123566407695,
+      "grad_norm": 1.0649111496758787,
+      "learning_rate": 5.688494016365067e-06,
+      "loss": 0.845,
+      "step": 925
+    },
+    {
+      "epoch": 0.34406215316315203,
+      "grad_norm": 0.957225274905496,
+      "learning_rate": 5.600151561919871e-06,
+      "loss": 0.8134,
+      "step": 930
+    },
+    {
+      "epoch": 0.34591194968553457,
+      "grad_norm": 1.0690464205878594,
+      "learning_rate": 5.517796307369017e-06,
+      "loss": 0.8364,
+      "step": 935
+    },
+    {
+      "epoch": 0.3477617462079171,
+      "grad_norm": 1.0520479367896434,
+      "learning_rate": 5.44145076781679e-06,
+      "loss": 0.8697,
+      "step": 940
+    },
+    {
+      "epoch": 0.34961154273029965,
+      "grad_norm": 0.9648256295717741,
+      "learning_rate": 5.371135815371398e-06,
+      "loss": 0.8334,
+      "step": 945
+    },
+    {
+      "epoch": 0.3514613392526822,
+      "grad_norm": 1.0090319357515105,
+      "learning_rate": 5.3068706734387484e-06,
+      "loss": 0.866,
+      "step": 950
+    },
+    {
+      "epoch": 0.35331113577506473,
+      "grad_norm": 1.0792234217081147,
+      "learning_rate": 5.248672911466959e-06,
+      "loss": 0.8796,
+      "step": 955
+    },
+    {
+      "epoch": 0.3551609322974473,
+      "grad_norm": 0.9703249482629162,
+      "learning_rate": 5.196558440143059e-06,
+      "loss": 0.8329,
+      "step": 960
+    },
+    {
+      "epoch": 0.3570107288198298,
+      "grad_norm": 1.0041999264211245,
+      "learning_rate": 5.150541507043143e-06,
+      "loss": 0.8871,
+      "step": 965
+    },
+    {
+      "epoch": 0.35886052534221236,
+      "grad_norm": 0.9816980969927207,
+      "learning_rate": 5.110634692737244e-06,
+      "loss": 0.8391,
+      "step": 970
+    },
+    {
+      "epoch": 0.3607103218645949,
+      "grad_norm": 0.9903601458320098,
+      "learning_rate": 5.0768489073499295e-06,
+      "loss": 0.8707,
+      "step": 975
+    },
+    {
+      "epoch": 0.36256011838697744,
+      "grad_norm": 0.9605358480816072,
+      "learning_rate": 5.049193387577574e-06,
+      "loss": 0.8677,
+      "step": 980
+    },
+    {
+      "epoch": 0.36440991490936,
+      "grad_norm": 1.0149824944311472,
+      "learning_rate": 5.027675694163144e-06,
+      "loss": 0.8252,
+      "step": 985
+    },
+    {
+      "epoch": 0.3662597114317425,
+      "grad_norm": 1.0514282882320594,
+      "learning_rate": 5.012301709829164e-06,
+      "loss": 0.8361,
+      "step": 990
+    },
+    {
+      "epoch": 0.36810950795412506,
+      "grad_norm": 1.0144426282975678,
+      "learning_rate": 5.003075637669448e-06,
+      "loss": 0.8604,
+      "step": 995
+    },
+    {
+      "epoch": 0.3699593044765076,
+      "grad_norm": 1.041222716272992,
+      "learning_rate": 5e-06,
+      "loss": 0.8502,
+      "step": 1000
+    },
+    {
+      "epoch": 0.3699593044765076,
+      "step": 1000,
+      "total_flos": 28487712768000.0,
+      "train_loss": 0.920774197101593,
+      "train_runtime": 1759.2244,
+      "train_samples_per_second": 18.19,
+      "train_steps_per_second": 0.568
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 1000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 100,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 28487712768000.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}