Training in progress, step 2750, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1370666272
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:45a6dc7811d0780d24458a5c92ede92e93f2f4528983f11ed322dff31c53b943
|
3 |
size 1370666272
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 697294462
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bb00e6d73ff026c0ba65053478c15df649da25fa90ee6e337c488730339ed699
|
3 |
size 697294462
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:790e3eb032b3621396d9aae2e147bc1026c5e0e652ee7a1b6ebb9eb3f2b37b4b
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -18907,6 +18907,356 @@
|
|
18907 |
"learning_rate": 1.4419552353869964e-05,
|
18908 |
"loss": 1.6192,
|
18909 |
"step": 2700
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18910 |
}
|
18911 |
],
|
18912 |
"logging_steps": 1,
|
@@ -18926,7 +19276,7 @@
|
|
18926 |
"attributes": {}
|
18927 |
}
|
18928 |
},
|
18929 |
-
"total_flos": 1.
|
18930 |
"train_batch_size": 32,
|
18931 |
"trial_name": null,
|
18932 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.6516587677725119,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 2750,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
18907 |
"learning_rate": 1.4419552353869964e-05,
|
18908 |
"loss": 1.6192,
|
18909 |
"step": 2700
|
18910 |
+
},
|
18911 |
+
{
|
18912 |
+
"epoch": 0.6400473933649289,
|
18913 |
+
"grad_norm": 0.7438738942146301,
|
18914 |
+
"learning_rate": 1.4402692901787481e-05,
|
18915 |
+
"loss": 1.6959,
|
18916 |
+
"step": 2701
|
18917 |
+
},
|
18918 |
+
{
|
18919 |
+
"epoch": 0.6402843601895735,
|
18920 |
+
"grad_norm": 0.7854204773902893,
|
18921 |
+
"learning_rate": 1.4385839322844863e-05,
|
18922 |
+
"loss": 1.6625,
|
18923 |
+
"step": 2702
|
18924 |
+
},
|
18925 |
+
{
|
18926 |
+
"epoch": 0.640521327014218,
|
18927 |
+
"grad_norm": 0.9514130353927612,
|
18928 |
+
"learning_rate": 1.436899162638255e-05,
|
18929 |
+
"loss": 1.6547,
|
18930 |
+
"step": 2703
|
18931 |
+
},
|
18932 |
+
{
|
18933 |
+
"epoch": 0.6407582938388625,
|
18934 |
+
"grad_norm": 0.8762110471725464,
|
18935 |
+
"learning_rate": 1.4352149821737692e-05,
|
18936 |
+
"loss": 1.5567,
|
18937 |
+
"step": 2704
|
18938 |
+
},
|
18939 |
+
{
|
18940 |
+
"epoch": 0.6409952606635071,
|
18941 |
+
"grad_norm": 0.8398658633232117,
|
18942 |
+
"learning_rate": 1.4335313918244209e-05,
|
18943 |
+
"loss": 1.7205,
|
18944 |
+
"step": 2705
|
18945 |
+
},
|
18946 |
+
{
|
18947 |
+
"epoch": 0.6412322274881517,
|
18948 |
+
"grad_norm": 0.9590023756027222,
|
18949 |
+
"learning_rate": 1.4318483925232734e-05,
|
18950 |
+
"loss": 1.5485,
|
18951 |
+
"step": 2706
|
18952 |
+
},
|
18953 |
+
{
|
18954 |
+
"epoch": 0.6414691943127963,
|
18955 |
+
"grad_norm": 0.9123085737228394,
|
18956 |
+
"learning_rate": 1.4301659852030642e-05,
|
18957 |
+
"loss": 1.5367,
|
18958 |
+
"step": 2707
|
18959 |
+
},
|
18960 |
+
{
|
18961 |
+
"epoch": 0.6417061611374407,
|
18962 |
+
"grad_norm": 0.8524888753890991,
|
18963 |
+
"learning_rate": 1.4284841707961989e-05,
|
18964 |
+
"loss": 1.5718,
|
18965 |
+
"step": 2708
|
18966 |
+
},
|
18967 |
+
{
|
18968 |
+
"epoch": 0.6419431279620853,
|
18969 |
+
"grad_norm": 0.7618995308876038,
|
18970 |
+
"learning_rate": 1.426802950234758e-05,
|
18971 |
+
"loss": 1.6583,
|
18972 |
+
"step": 2709
|
18973 |
+
},
|
18974 |
+
{
|
18975 |
+
"epoch": 0.6421800947867299,
|
18976 |
+
"grad_norm": 0.8648304343223572,
|
18977 |
+
"learning_rate": 1.4251223244504918e-05,
|
18978 |
+
"loss": 1.5905,
|
18979 |
+
"step": 2710
|
18980 |
+
},
|
18981 |
+
{
|
18982 |
+
"epoch": 0.6424170616113745,
|
18983 |
+
"grad_norm": 0.9064821004867554,
|
18984 |
+
"learning_rate": 1.4234422943748207e-05,
|
18985 |
+
"loss": 1.6898,
|
18986 |
+
"step": 2711
|
18987 |
+
},
|
18988 |
+
{
|
18989 |
+
"epoch": 0.6426540284360189,
|
18990 |
+
"grad_norm": 0.8449388742446899,
|
18991 |
+
"learning_rate": 1.4217628609388356e-05,
|
18992 |
+
"loss": 1.6486,
|
18993 |
+
"step": 2712
|
18994 |
+
},
|
18995 |
+
{
|
18996 |
+
"epoch": 0.6428909952606635,
|
18997 |
+
"grad_norm": 0.9291089773178101,
|
18998 |
+
"learning_rate": 1.4200840250732956e-05,
|
18999 |
+
"loss": 1.6963,
|
19000 |
+
"step": 2713
|
19001 |
+
},
|
19002 |
+
{
|
19003 |
+
"epoch": 0.6431279620853081,
|
19004 |
+
"grad_norm": 0.9232550859451294,
|
19005 |
+
"learning_rate": 1.4184057877086299e-05,
|
19006 |
+
"loss": 1.6709,
|
19007 |
+
"step": 2714
|
19008 |
+
},
|
19009 |
+
{
|
19010 |
+
"epoch": 0.6433649289099526,
|
19011 |
+
"grad_norm": 1.0231064558029175,
|
19012 |
+
"learning_rate": 1.4167281497749362e-05,
|
19013 |
+
"loss": 1.5898,
|
19014 |
+
"step": 2715
|
19015 |
+
},
|
19016 |
+
{
|
19017 |
+
"epoch": 0.6436018957345971,
|
19018 |
+
"grad_norm": 0.9170055985450745,
|
19019 |
+
"learning_rate": 1.4150511122019771e-05,
|
19020 |
+
"loss": 1.5688,
|
19021 |
+
"step": 2716
|
19022 |
+
},
|
19023 |
+
{
|
19024 |
+
"epoch": 0.6438388625592417,
|
19025 |
+
"grad_norm": 0.8643277287483215,
|
19026 |
+
"learning_rate": 1.4133746759191862e-05,
|
19027 |
+
"loss": 1.6283,
|
19028 |
+
"step": 2717
|
19029 |
+
},
|
19030 |
+
{
|
19031 |
+
"epoch": 0.6440758293838863,
|
19032 |
+
"grad_norm": 0.8216496706008911,
|
19033 |
+
"learning_rate": 1.4116988418556626e-05,
|
19034 |
+
"loss": 1.6397,
|
19035 |
+
"step": 2718
|
19036 |
+
},
|
19037 |
+
{
|
19038 |
+
"epoch": 0.6443127962085308,
|
19039 |
+
"grad_norm": 0.8264020085334778,
|
19040 |
+
"learning_rate": 1.4100236109401694e-05,
|
19041 |
+
"loss": 1.5473,
|
19042 |
+
"step": 2719
|
19043 |
+
},
|
19044 |
+
{
|
19045 |
+
"epoch": 0.6445497630331753,
|
19046 |
+
"grad_norm": 0.9171279072761536,
|
19047 |
+
"learning_rate": 1.4083489841011383e-05,
|
19048 |
+
"loss": 1.5799,
|
19049 |
+
"step": 2720
|
19050 |
+
},
|
19051 |
+
{
|
19052 |
+
"epoch": 0.6447867298578199,
|
19053 |
+
"grad_norm": 0.8693802952766418,
|
19054 |
+
"learning_rate": 1.4066749622666655e-05,
|
19055 |
+
"loss": 1.565,
|
19056 |
+
"step": 2721
|
19057 |
+
},
|
19058 |
+
{
|
19059 |
+
"epoch": 0.6450236966824645,
|
19060 |
+
"grad_norm": 0.845255970954895,
|
19061 |
+
"learning_rate": 1.4050015463645127e-05,
|
19062 |
+
"loss": 1.5896,
|
19063 |
+
"step": 2722
|
19064 |
+
},
|
19065 |
+
{
|
19066 |
+
"epoch": 0.645260663507109,
|
19067 |
+
"grad_norm": 0.9638903141021729,
|
19068 |
+
"learning_rate": 1.4033287373221022e-05,
|
19069 |
+
"loss": 1.6578,
|
19070 |
+
"step": 2723
|
19071 |
+
},
|
19072 |
+
{
|
19073 |
+
"epoch": 0.6454976303317536,
|
19074 |
+
"grad_norm": 0.8678495287895203,
|
19075 |
+
"learning_rate": 1.401656536066524e-05,
|
19076 |
+
"loss": 1.5692,
|
19077 |
+
"step": 2724
|
19078 |
+
},
|
19079 |
+
{
|
19080 |
+
"epoch": 0.6457345971563981,
|
19081 |
+
"grad_norm": 0.8614510297775269,
|
19082 |
+
"learning_rate": 1.3999849435245293e-05,
|
19083 |
+
"loss": 1.5025,
|
19084 |
+
"step": 2725
|
19085 |
+
},
|
19086 |
+
{
|
19087 |
+
"epoch": 0.6459715639810426,
|
19088 |
+
"grad_norm": 0.8298974633216858,
|
19089 |
+
"learning_rate": 1.3983139606225338e-05,
|
19090 |
+
"loss": 1.5499,
|
19091 |
+
"step": 2726
|
19092 |
+
},
|
19093 |
+
{
|
19094 |
+
"epoch": 0.6462085308056872,
|
19095 |
+
"grad_norm": 0.8978272676467896,
|
19096 |
+
"learning_rate": 1.3966435882866118e-05,
|
19097 |
+
"loss": 1.6624,
|
19098 |
+
"step": 2727
|
19099 |
+
},
|
19100 |
+
{
|
19101 |
+
"epoch": 0.6464454976303318,
|
19102 |
+
"grad_norm": 0.8641113638877869,
|
19103 |
+
"learning_rate": 1.3949738274425023e-05,
|
19104 |
+
"loss": 1.6375,
|
19105 |
+
"step": 2728
|
19106 |
+
},
|
19107 |
+
{
|
19108 |
+
"epoch": 0.6466824644549763,
|
19109 |
+
"grad_norm": 0.8167167901992798,
|
19110 |
+
"learning_rate": 1.3933046790156056e-05,
|
19111 |
+
"loss": 1.5251,
|
19112 |
+
"step": 2729
|
19113 |
+
},
|
19114 |
+
{
|
19115 |
+
"epoch": 0.6469194312796208,
|
19116 |
+
"grad_norm": 0.8559361696243286,
|
19117 |
+
"learning_rate": 1.3916361439309792e-05,
|
19118 |
+
"loss": 1.5547,
|
19119 |
+
"step": 2730
|
19120 |
+
},
|
19121 |
+
{
|
19122 |
+
"epoch": 0.6471563981042654,
|
19123 |
+
"grad_norm": 0.8385886549949646,
|
19124 |
+
"learning_rate": 1.3899682231133437e-05,
|
19125 |
+
"loss": 1.5326,
|
19126 |
+
"step": 2731
|
19127 |
+
},
|
19128 |
+
{
|
19129 |
+
"epoch": 0.64739336492891,
|
19130 |
+
"grad_norm": 0.8604897260665894,
|
19131 |
+
"learning_rate": 1.3883009174870793e-05,
|
19132 |
+
"loss": 1.5613,
|
19133 |
+
"step": 2732
|
19134 |
+
},
|
19135 |
+
{
|
19136 |
+
"epoch": 0.6476303317535546,
|
19137 |
+
"grad_norm": 0.7844538688659668,
|
19138 |
+
"learning_rate": 1.386634227976224e-05,
|
19139 |
+
"loss": 1.5588,
|
19140 |
+
"step": 2733
|
19141 |
+
},
|
19142 |
+
{
|
19143 |
+
"epoch": 0.647867298578199,
|
19144 |
+
"grad_norm": 0.8636563420295715,
|
19145 |
+
"learning_rate": 1.3849681555044744e-05,
|
19146 |
+
"loss": 1.7389,
|
19147 |
+
"step": 2734
|
19148 |
+
},
|
19149 |
+
{
|
19150 |
+
"epoch": 0.6481042654028436,
|
19151 |
+
"grad_norm": 0.8014503717422485,
|
19152 |
+
"learning_rate": 1.383302700995186e-05,
|
19153 |
+
"loss": 1.5395,
|
19154 |
+
"step": 2735
|
19155 |
+
},
|
19156 |
+
{
|
19157 |
+
"epoch": 0.6483412322274882,
|
19158 |
+
"grad_norm": 0.8970300555229187,
|
19159 |
+
"learning_rate": 1.3816378653713713e-05,
|
19160 |
+
"loss": 1.5465,
|
19161 |
+
"step": 2736
|
19162 |
+
},
|
19163 |
+
{
|
19164 |
+
"epoch": 0.6485781990521327,
|
19165 |
+
"grad_norm": 0.8757097721099854,
|
19166 |
+
"learning_rate": 1.3799736495557003e-05,
|
19167 |
+
"loss": 1.5238,
|
19168 |
+
"step": 2737
|
19169 |
+
},
|
19170 |
+
{
|
19171 |
+
"epoch": 0.6488151658767772,
|
19172 |
+
"grad_norm": 1.0647528171539307,
|
19173 |
+
"learning_rate": 1.378310054470498e-05,
|
19174 |
+
"loss": 1.6296,
|
19175 |
+
"step": 2738
|
19176 |
+
},
|
19177 |
+
{
|
19178 |
+
"epoch": 0.6490521327014218,
|
19179 |
+
"grad_norm": 0.7840937376022339,
|
19180 |
+
"learning_rate": 1.3766470810377468e-05,
|
19181 |
+
"loss": 1.6038,
|
19182 |
+
"step": 2739
|
19183 |
+
},
|
19184 |
+
{
|
19185 |
+
"epoch": 0.6492890995260664,
|
19186 |
+
"grad_norm": 0.8857571482658386,
|
19187 |
+
"learning_rate": 1.374984730179085e-05,
|
19188 |
+
"loss": 1.5821,
|
19189 |
+
"step": 2740
|
19190 |
+
},
|
19191 |
+
{
|
19192 |
+
"epoch": 0.6495260663507109,
|
19193 |
+
"grad_norm": 0.9384831786155701,
|
19194 |
+
"learning_rate": 1.3733230028158034e-05,
|
19195 |
+
"loss": 1.6555,
|
19196 |
+
"step": 2741
|
19197 |
+
},
|
19198 |
+
{
|
19199 |
+
"epoch": 0.6497630331753554,
|
19200 |
+
"grad_norm": 0.9250389337539673,
|
19201 |
+
"learning_rate": 1.3716618998688502e-05,
|
19202 |
+
"loss": 1.6025,
|
19203 |
+
"step": 2742
|
19204 |
+
},
|
19205 |
+
{
|
19206 |
+
"epoch": 0.65,
|
19207 |
+
"grad_norm": 0.8577538132667542,
|
19208 |
+
"learning_rate": 1.3700014222588255e-05,
|
19209 |
+
"loss": 1.5963,
|
19210 |
+
"step": 2743
|
19211 |
+
},
|
19212 |
+
{
|
19213 |
+
"epoch": 0.6502369668246446,
|
19214 |
+
"grad_norm": 0.8080734014511108,
|
19215 |
+
"learning_rate": 1.3683415709059852e-05,
|
19216 |
+
"loss": 1.6265,
|
19217 |
+
"step": 2744
|
19218 |
+
},
|
19219 |
+
{
|
19220 |
+
"epoch": 0.6504739336492891,
|
19221 |
+
"grad_norm": 1.0959298610687256,
|
19222 |
+
"learning_rate": 1.3666823467302341e-05,
|
19223 |
+
"loss": 1.54,
|
19224 |
+
"step": 2745
|
19225 |
+
},
|
19226 |
+
{
|
19227 |
+
"epoch": 0.6507109004739337,
|
19228 |
+
"grad_norm": 0.7990492582321167,
|
19229 |
+
"learning_rate": 1.3650237506511331e-05,
|
19230 |
+
"loss": 1.5411,
|
19231 |
+
"step": 2746
|
19232 |
+
},
|
19233 |
+
{
|
19234 |
+
"epoch": 0.6509478672985782,
|
19235 |
+
"grad_norm": 0.886177122592926,
|
19236 |
+
"learning_rate": 1.3633657835878938e-05,
|
19237 |
+
"loss": 1.7162,
|
19238 |
+
"step": 2747
|
19239 |
+
},
|
19240 |
+
{
|
19241 |
+
"epoch": 0.6511848341232227,
|
19242 |
+
"grad_norm": 0.885360598564148,
|
19243 |
+
"learning_rate": 1.3617084464593799e-05,
|
19244 |
+
"loss": 1.597,
|
19245 |
+
"step": 2748
|
19246 |
+
},
|
19247 |
+
{
|
19248 |
+
"epoch": 0.6514218009478673,
|
19249 |
+
"grad_norm": 1.0608346462249756,
|
19250 |
+
"learning_rate": 1.3600517401841029e-05,
|
19251 |
+
"loss": 1.6312,
|
19252 |
+
"step": 2749
|
19253 |
+
},
|
19254 |
+
{
|
19255 |
+
"epoch": 0.6516587677725119,
|
19256 |
+
"grad_norm": 0.7951533198356628,
|
19257 |
+
"learning_rate": 1.3583956656802289e-05,
|
19258 |
+
"loss": 1.5896,
|
19259 |
+
"step": 2750
|
19260 |
}
|
19261 |
],
|
19262 |
"logging_steps": 1,
|
|
|
19276 |
"attributes": {}
|
19277 |
}
|
19278 |
},
|
19279 |
+
"total_flos": 1.220660969865216e+18,
|
19280 |
"train_batch_size": 32,
|
19281 |
"trial_name": null,
|
19282 |
"trial_params": null
|