{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9166666666666665, "global_step": 70, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.42, "learning_rate": 8.611111111111112e-05, "loss": 1.4322, "step": 10 }, { "epoch": 0.42, "eval_accuracy": 0.3125, "eval_loss": 1.3030599355697632, "eval_runtime": 9.8316, "eval_samples_per_second": 4.882, "eval_steps_per_second": 1.221, "step": 10 }, { "epoch": 0.83, "learning_rate": 7.222222222222222e-05, "loss": 1.2975, "step": 20 }, { "epoch": 0.83, "eval_accuracy": 0.3541666567325592, "eval_loss": 1.2456461191177368, "eval_runtime": 12.762, "eval_samples_per_second": 3.761, "eval_steps_per_second": 0.94, "step": 20 }, { "epoch": 1.25, "learning_rate": 5.833333333333334e-05, "loss": 1.3053, "step": 30 }, { "epoch": 1.25, "eval_accuracy": 0.375, "eval_loss": 1.235076904296875, "eval_runtime": 9.8907, "eval_samples_per_second": 4.853, "eval_steps_per_second": 1.213, "step": 30 }, { "epoch": 1.67, "learning_rate": 4.4444444444444447e-05, "loss": 1.2042, "step": 40 }, { "epoch": 1.67, "eval_accuracy": 0.375, "eval_loss": 1.1900380849838257, "eval_runtime": 12.1455, "eval_samples_per_second": 3.952, "eval_steps_per_second": 0.988, "step": 40 }, { "epoch": 2.08, "learning_rate": 3.055555555555556e-05, "loss": 1.0192, "step": 50 }, { "epoch": 2.08, "eval_accuracy": 0.5416666865348816, "eval_loss": 1.006090760231018, "eval_runtime": 9.9246, "eval_samples_per_second": 4.836, "eval_steps_per_second": 1.209, "step": 50 }, { "epoch": 2.5, "learning_rate": 1.6666666666666667e-05, "loss": 0.8874, "step": 60 }, { "epoch": 2.5, "eval_accuracy": 0.625, "eval_loss": 0.8699900507926941, "eval_runtime": 11.1183, "eval_samples_per_second": 4.317, "eval_steps_per_second": 1.079, "step": 60 }, { "epoch": 2.92, "learning_rate": 2.777777777777778e-06, "loss": 0.855, "step": 70 }, { "epoch": 2.92, "eval_accuracy": 0.5833333134651184, "eval_loss": 0.9461669921875, "eval_runtime": 9.9807, "eval_samples_per_second": 4.809, "eval_steps_per_second": 1.202, "step": 70 } ], "max_steps": 72, "num_train_epochs": 3, "total_flos": 2.022508856471088e+17, "trial_name": null, "trial_params": null }