|
{ |
|
"best_metric": 1.1274107694625854, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.004597436928912131, |
|
"eval_steps": 100, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.2987184644560657e-05, |
|
"eval_loss": 2.365870714187622, |
|
"eval_runtime": 520.553, |
|
"eval_samples_per_second": 35.188, |
|
"eval_steps_per_second": 17.595, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00022987184644560657, |
|
"grad_norm": 6.934662342071533, |
|
"learning_rate": 2.5e-06, |
|
"loss": 2.1972, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00045974369289121313, |
|
"grad_norm": 6.904849052429199, |
|
"learning_rate": 5e-06, |
|
"loss": 2.213, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0006896155393368198, |
|
"grad_norm": 4.1402997970581055, |
|
"learning_rate": 7.5e-06, |
|
"loss": 2.1982, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0009194873857824263, |
|
"grad_norm": 4.010258674621582, |
|
"learning_rate": 1e-05, |
|
"loss": 1.6513, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0011493592322280328, |
|
"grad_norm": 4.718471527099609, |
|
"learning_rate": 1.25e-05, |
|
"loss": 1.2747, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0013792310786736395, |
|
"grad_norm": 3.8932864665985107, |
|
"learning_rate": 1.5e-05, |
|
"loss": 1.2147, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.001609102925119246, |
|
"grad_norm": 5.198622703552246, |
|
"learning_rate": 1.75e-05, |
|
"loss": 1.2063, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0018389747715648525, |
|
"grad_norm": 4.243978500366211, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1345, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0020688466180104593, |
|
"grad_norm": 4.224928379058838, |
|
"learning_rate": 2.25e-05, |
|
"loss": 1.1293, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0022987184644560656, |
|
"grad_norm": 5.764342784881592, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.1988, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0022987184644560656, |
|
"eval_loss": 1.184658408164978, |
|
"eval_runtime": 520.0761, |
|
"eval_samples_per_second": 35.22, |
|
"eval_steps_per_second": 17.611, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0025285903109016723, |
|
"grad_norm": 3.7343056201934814, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 1.0981, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.002758462157347279, |
|
"grad_norm": 4.652059078216553, |
|
"learning_rate": 3e-05, |
|
"loss": 1.2341, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0029883340037928853, |
|
"grad_norm": 4.9458231925964355, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 1.1237, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.003218205850238492, |
|
"grad_norm": 3.449315309524536, |
|
"learning_rate": 3.5e-05, |
|
"loss": 1.0449, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.0034480776966840988, |
|
"grad_norm": 4.949376106262207, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 1.1911, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.003677949543129705, |
|
"grad_norm": 5.409412384033203, |
|
"learning_rate": 4e-05, |
|
"loss": 1.0661, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.003907821389575312, |
|
"grad_norm": 3.166673421859741, |
|
"learning_rate": 4.25e-05, |
|
"loss": 1.1751, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.0041376932360209185, |
|
"grad_norm": 3.628138303756714, |
|
"learning_rate": 4.5e-05, |
|
"loss": 1.1335, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.004367565082466525, |
|
"grad_norm": 3.710859775543213, |
|
"learning_rate": 4.75e-05, |
|
"loss": 1.1087, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.004597436928912131, |
|
"grad_norm": 3.711989164352417, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2195, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.004597436928912131, |
|
"eval_loss": 1.1274107694625854, |
|
"eval_runtime": 521.4205, |
|
"eval_samples_per_second": 35.129, |
|
"eval_steps_per_second": 17.565, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.13314785247232e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|