mosama commited on
Commit
64957bf
·
verified ·
1 Parent(s): 5159f04

Training in progress, step 2750, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e6e64cd0f65a1b2b00c95c82731ef88ea000d66c62bf6137f7e1a473aacf8ef
3
  size 1370666272
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45a6dc7811d0780d24458a5c92ede92e93f2f4528983f11ed322dff31c53b943
3
  size 1370666272
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e437bdbea15b052c2946672282387fce8ebaa3eec7db8105928623b5d6c489b
3
  size 697294462
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb00e6d73ff026c0ba65053478c15df649da25fa90ee6e337c488730339ed699
3
  size 697294462
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b35ab382e0f59869aa7c89a602bbec3425d3ead1f6c99394c8d4c3ad9639e0d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:790e3eb032b3621396d9aae2e147bc1026c5e0e652ee7a1b6ebb9eb3f2b37b4b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.6398104265402843,
5
  "eval_steps": 500,
6
- "global_step": 2700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -18907,6 +18907,356 @@
18907
  "learning_rate": 1.4419552353869964e-05,
18908
  "loss": 1.6192,
18909
  "step": 2700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18910
  }
18911
  ],
18912
  "logging_steps": 1,
@@ -18926,7 +19276,7 @@
18926
  "attributes": {}
18927
  }
18928
  },
18929
- "total_flos": 1.1984671340494848e+18,
18930
  "train_batch_size": 32,
18931
  "trial_name": null,
18932
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.6516587677725119,
5
  "eval_steps": 500,
6
+ "global_step": 2750,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
18907
  "learning_rate": 1.4419552353869964e-05,
18908
  "loss": 1.6192,
18909
  "step": 2700
18910
+ },
18911
+ {
18912
+ "epoch": 0.6400473933649289,
18913
+ "grad_norm": 0.7438738942146301,
18914
+ "learning_rate": 1.4402692901787481e-05,
18915
+ "loss": 1.6959,
18916
+ "step": 2701
18917
+ },
18918
+ {
18919
+ "epoch": 0.6402843601895735,
18920
+ "grad_norm": 0.7854204773902893,
18921
+ "learning_rate": 1.4385839322844863e-05,
18922
+ "loss": 1.6625,
18923
+ "step": 2702
18924
+ },
18925
+ {
18926
+ "epoch": 0.640521327014218,
18927
+ "grad_norm": 0.9514130353927612,
18928
+ "learning_rate": 1.436899162638255e-05,
18929
+ "loss": 1.6547,
18930
+ "step": 2703
18931
+ },
18932
+ {
18933
+ "epoch": 0.6407582938388625,
18934
+ "grad_norm": 0.8762110471725464,
18935
+ "learning_rate": 1.4352149821737692e-05,
18936
+ "loss": 1.5567,
18937
+ "step": 2704
18938
+ },
18939
+ {
18940
+ "epoch": 0.6409952606635071,
18941
+ "grad_norm": 0.8398658633232117,
18942
+ "learning_rate": 1.4335313918244209e-05,
18943
+ "loss": 1.7205,
18944
+ "step": 2705
18945
+ },
18946
+ {
18947
+ "epoch": 0.6412322274881517,
18948
+ "grad_norm": 0.9590023756027222,
18949
+ "learning_rate": 1.4318483925232734e-05,
18950
+ "loss": 1.5485,
18951
+ "step": 2706
18952
+ },
18953
+ {
18954
+ "epoch": 0.6414691943127963,
18955
+ "grad_norm": 0.9123085737228394,
18956
+ "learning_rate": 1.4301659852030642e-05,
18957
+ "loss": 1.5367,
18958
+ "step": 2707
18959
+ },
18960
+ {
18961
+ "epoch": 0.6417061611374407,
18962
+ "grad_norm": 0.8524888753890991,
18963
+ "learning_rate": 1.4284841707961989e-05,
18964
+ "loss": 1.5718,
18965
+ "step": 2708
18966
+ },
18967
+ {
18968
+ "epoch": 0.6419431279620853,
18969
+ "grad_norm": 0.7618995308876038,
18970
+ "learning_rate": 1.426802950234758e-05,
18971
+ "loss": 1.6583,
18972
+ "step": 2709
18973
+ },
18974
+ {
18975
+ "epoch": 0.6421800947867299,
18976
+ "grad_norm": 0.8648304343223572,
18977
+ "learning_rate": 1.4251223244504918e-05,
18978
+ "loss": 1.5905,
18979
+ "step": 2710
18980
+ },
18981
+ {
18982
+ "epoch": 0.6424170616113745,
18983
+ "grad_norm": 0.9064821004867554,
18984
+ "learning_rate": 1.4234422943748207e-05,
18985
+ "loss": 1.6898,
18986
+ "step": 2711
18987
+ },
18988
+ {
18989
+ "epoch": 0.6426540284360189,
18990
+ "grad_norm": 0.8449388742446899,
18991
+ "learning_rate": 1.4217628609388356e-05,
18992
+ "loss": 1.6486,
18993
+ "step": 2712
18994
+ },
18995
+ {
18996
+ "epoch": 0.6428909952606635,
18997
+ "grad_norm": 0.9291089773178101,
18998
+ "learning_rate": 1.4200840250732956e-05,
18999
+ "loss": 1.6963,
19000
+ "step": 2713
19001
+ },
19002
+ {
19003
+ "epoch": 0.6431279620853081,
19004
+ "grad_norm": 0.9232550859451294,
19005
+ "learning_rate": 1.4184057877086299e-05,
19006
+ "loss": 1.6709,
19007
+ "step": 2714
19008
+ },
19009
+ {
19010
+ "epoch": 0.6433649289099526,
19011
+ "grad_norm": 1.0231064558029175,
19012
+ "learning_rate": 1.4167281497749362e-05,
19013
+ "loss": 1.5898,
19014
+ "step": 2715
19015
+ },
19016
+ {
19017
+ "epoch": 0.6436018957345971,
19018
+ "grad_norm": 0.9170055985450745,
19019
+ "learning_rate": 1.4150511122019771e-05,
19020
+ "loss": 1.5688,
19021
+ "step": 2716
19022
+ },
19023
+ {
19024
+ "epoch": 0.6438388625592417,
19025
+ "grad_norm": 0.8643277287483215,
19026
+ "learning_rate": 1.4133746759191862e-05,
19027
+ "loss": 1.6283,
19028
+ "step": 2717
19029
+ },
19030
+ {
19031
+ "epoch": 0.6440758293838863,
19032
+ "grad_norm": 0.8216496706008911,
19033
+ "learning_rate": 1.4116988418556626e-05,
19034
+ "loss": 1.6397,
19035
+ "step": 2718
19036
+ },
19037
+ {
19038
+ "epoch": 0.6443127962085308,
19039
+ "grad_norm": 0.8264020085334778,
19040
+ "learning_rate": 1.4100236109401694e-05,
19041
+ "loss": 1.5473,
19042
+ "step": 2719
19043
+ },
19044
+ {
19045
+ "epoch": 0.6445497630331753,
19046
+ "grad_norm": 0.9171279072761536,
19047
+ "learning_rate": 1.4083489841011383e-05,
19048
+ "loss": 1.5799,
19049
+ "step": 2720
19050
+ },
19051
+ {
19052
+ "epoch": 0.6447867298578199,
19053
+ "grad_norm": 0.8693802952766418,
19054
+ "learning_rate": 1.4066749622666655e-05,
19055
+ "loss": 1.565,
19056
+ "step": 2721
19057
+ },
19058
+ {
19059
+ "epoch": 0.6450236966824645,
19060
+ "grad_norm": 0.845255970954895,
19061
+ "learning_rate": 1.4050015463645127e-05,
19062
+ "loss": 1.5896,
19063
+ "step": 2722
19064
+ },
19065
+ {
19066
+ "epoch": 0.645260663507109,
19067
+ "grad_norm": 0.9638903141021729,
19068
+ "learning_rate": 1.4033287373221022e-05,
19069
+ "loss": 1.6578,
19070
+ "step": 2723
19071
+ },
19072
+ {
19073
+ "epoch": 0.6454976303317536,
19074
+ "grad_norm": 0.8678495287895203,
19075
+ "learning_rate": 1.401656536066524e-05,
19076
+ "loss": 1.5692,
19077
+ "step": 2724
19078
+ },
19079
+ {
19080
+ "epoch": 0.6457345971563981,
19081
+ "grad_norm": 0.8614510297775269,
19082
+ "learning_rate": 1.3999849435245293e-05,
19083
+ "loss": 1.5025,
19084
+ "step": 2725
19085
+ },
19086
+ {
19087
+ "epoch": 0.6459715639810426,
19088
+ "grad_norm": 0.8298974633216858,
19089
+ "learning_rate": 1.3983139606225338e-05,
19090
+ "loss": 1.5499,
19091
+ "step": 2726
19092
+ },
19093
+ {
19094
+ "epoch": 0.6462085308056872,
19095
+ "grad_norm": 0.8978272676467896,
19096
+ "learning_rate": 1.3966435882866118e-05,
19097
+ "loss": 1.6624,
19098
+ "step": 2727
19099
+ },
19100
+ {
19101
+ "epoch": 0.6464454976303318,
19102
+ "grad_norm": 0.8641113638877869,
19103
+ "learning_rate": 1.3949738274425023e-05,
19104
+ "loss": 1.6375,
19105
+ "step": 2728
19106
+ },
19107
+ {
19108
+ "epoch": 0.6466824644549763,
19109
+ "grad_norm": 0.8167167901992798,
19110
+ "learning_rate": 1.3933046790156056e-05,
19111
+ "loss": 1.5251,
19112
+ "step": 2729
19113
+ },
19114
+ {
19115
+ "epoch": 0.6469194312796208,
19116
+ "grad_norm": 0.8559361696243286,
19117
+ "learning_rate": 1.3916361439309792e-05,
19118
+ "loss": 1.5547,
19119
+ "step": 2730
19120
+ },
19121
+ {
19122
+ "epoch": 0.6471563981042654,
19123
+ "grad_norm": 0.8385886549949646,
19124
+ "learning_rate": 1.3899682231133437e-05,
19125
+ "loss": 1.5326,
19126
+ "step": 2731
19127
+ },
19128
+ {
19129
+ "epoch": 0.64739336492891,
19130
+ "grad_norm": 0.8604897260665894,
19131
+ "learning_rate": 1.3883009174870793e-05,
19132
+ "loss": 1.5613,
19133
+ "step": 2732
19134
+ },
19135
+ {
19136
+ "epoch": 0.6476303317535546,
19137
+ "grad_norm": 0.7844538688659668,
19138
+ "learning_rate": 1.386634227976224e-05,
19139
+ "loss": 1.5588,
19140
+ "step": 2733
19141
+ },
19142
+ {
19143
+ "epoch": 0.647867298578199,
19144
+ "grad_norm": 0.8636563420295715,
19145
+ "learning_rate": 1.3849681555044744e-05,
19146
+ "loss": 1.7389,
19147
+ "step": 2734
19148
+ },
19149
+ {
19150
+ "epoch": 0.6481042654028436,
19151
+ "grad_norm": 0.8014503717422485,
19152
+ "learning_rate": 1.383302700995186e-05,
19153
+ "loss": 1.5395,
19154
+ "step": 2735
19155
+ },
19156
+ {
19157
+ "epoch": 0.6483412322274882,
19158
+ "grad_norm": 0.8970300555229187,
19159
+ "learning_rate": 1.3816378653713713e-05,
19160
+ "loss": 1.5465,
19161
+ "step": 2736
19162
+ },
19163
+ {
19164
+ "epoch": 0.6485781990521327,
19165
+ "grad_norm": 0.8757097721099854,
19166
+ "learning_rate": 1.3799736495557003e-05,
19167
+ "loss": 1.5238,
19168
+ "step": 2737
19169
+ },
19170
+ {
19171
+ "epoch": 0.6488151658767772,
19172
+ "grad_norm": 1.0647528171539307,
19173
+ "learning_rate": 1.378310054470498e-05,
19174
+ "loss": 1.6296,
19175
+ "step": 2738
19176
+ },
19177
+ {
19178
+ "epoch": 0.6490521327014218,
19179
+ "grad_norm": 0.7840937376022339,
19180
+ "learning_rate": 1.3766470810377468e-05,
19181
+ "loss": 1.6038,
19182
+ "step": 2739
19183
+ },
19184
+ {
19185
+ "epoch": 0.6492890995260664,
19186
+ "grad_norm": 0.8857571482658386,
19187
+ "learning_rate": 1.374984730179085e-05,
19188
+ "loss": 1.5821,
19189
+ "step": 2740
19190
+ },
19191
+ {
19192
+ "epoch": 0.6495260663507109,
19193
+ "grad_norm": 0.9384831786155701,
19194
+ "learning_rate": 1.3733230028158034e-05,
19195
+ "loss": 1.6555,
19196
+ "step": 2741
19197
+ },
19198
+ {
19199
+ "epoch": 0.6497630331753554,
19200
+ "grad_norm": 0.9250389337539673,
19201
+ "learning_rate": 1.3716618998688502e-05,
19202
+ "loss": 1.6025,
19203
+ "step": 2742
19204
+ },
19205
+ {
19206
+ "epoch": 0.65,
19207
+ "grad_norm": 0.8577538132667542,
19208
+ "learning_rate": 1.3700014222588255e-05,
19209
+ "loss": 1.5963,
19210
+ "step": 2743
19211
+ },
19212
+ {
19213
+ "epoch": 0.6502369668246446,
19214
+ "grad_norm": 0.8080734014511108,
19215
+ "learning_rate": 1.3683415709059852e-05,
19216
+ "loss": 1.6265,
19217
+ "step": 2744
19218
+ },
19219
+ {
19220
+ "epoch": 0.6504739336492891,
19221
+ "grad_norm": 1.0959298610687256,
19222
+ "learning_rate": 1.3666823467302341e-05,
19223
+ "loss": 1.54,
19224
+ "step": 2745
19225
+ },
19226
+ {
19227
+ "epoch": 0.6507109004739337,
19228
+ "grad_norm": 0.7990492582321167,
19229
+ "learning_rate": 1.3650237506511331e-05,
19230
+ "loss": 1.5411,
19231
+ "step": 2746
19232
+ },
19233
+ {
19234
+ "epoch": 0.6509478672985782,
19235
+ "grad_norm": 0.886177122592926,
19236
+ "learning_rate": 1.3633657835878938e-05,
19237
+ "loss": 1.7162,
19238
+ "step": 2747
19239
+ },
19240
+ {
19241
+ "epoch": 0.6511848341232227,
19242
+ "grad_norm": 0.885360598564148,
19243
+ "learning_rate": 1.3617084464593799e-05,
19244
+ "loss": 1.597,
19245
+ "step": 2748
19246
+ },
19247
+ {
19248
+ "epoch": 0.6514218009478673,
19249
+ "grad_norm": 1.0608346462249756,
19250
+ "learning_rate": 1.3600517401841029e-05,
19251
+ "loss": 1.6312,
19252
+ "step": 2749
19253
+ },
19254
+ {
19255
+ "epoch": 0.6516587677725119,
19256
+ "grad_norm": 0.7951533198356628,
19257
+ "learning_rate": 1.3583956656802289e-05,
19258
+ "loss": 1.5896,
19259
+ "step": 2750
19260
  }
19261
  ],
19262
  "logging_steps": 1,
 
19276
  "attributes": {}
19277
  }
19278
  },
19279
+ "total_flos": 1.220660969865216e+18,
19280
  "train_batch_size": 32,
19281
  "trial_name": null,
19282
  "trial_params": null