ranarag commited on
Commit
dd6c8af
·
verified ·
1 Parent(s): b9a7d27

Update README.md

Browse files

removed bbh and drop for now. Will add after fixing them.

Files changed (1) hide show
  1. README.md +23 -22
README.md CHANGED
@@ -190,8 +190,8 @@ By redesigning a common household item like the plastic bottle, we can create a
190
  <th style="text-align:center; background-color: #001d6c; color: white;">MMLU</th>
191
  <th style="text-align:center; background-color: #001d6c; color: white;">PopQA</th>
192
  <th style="text-align:center; background-color: #001d6c; color: white;">TruthfulQA</th>
193
- <th style="text-align:center; background-color: #001d6c; color: white;">BigBenchHard</th>
194
- <th style="text-align:center; background-color: #001d6c; color: white;">DROP</th>
195
  <th style="text-align:center; background-color: #001d6c; color: white;">GSM8K</th>
196
  <th style="text-align:center; background-color: #001d6c; color: white;">HumanEval</th>
197
  <th style="text-align:center; background-color: #001d6c; color: white;">HumanEval+</th>
@@ -206,8 +206,8 @@ By redesigning a common household item like the plastic bottle, we can create a
206
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">57.11</td>
207
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">20.55</td>
208
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">59.79</td>
209
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">54.46</td>
210
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">18.68</td>
211
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">67.55</td>
212
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">79.45</td>
213
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">75.26</td>
@@ -221,8 +221,8 @@ By redesigning a common household item like the plastic bottle, we can create a
221
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">57.18</td>
222
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">20.56</td>
223
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">59.8</td>
224
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">52.27</td>
225
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">21.12</td>
226
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">67.02</td>
227
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">80.13</td>
228
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">73.39</td>
@@ -236,8 +236,8 @@ By redesigning a common household item like the plastic bottle, we can create a
236
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 55.88 </td>
237
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 18.4 </td>
238
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 58.97 </td>
239
- <td style="text-align:center; background-color: #DAE8FF; color: black;"> 52.51 </td>
240
- <td style="text-align:center; background-color: #DAE8FF; color: black;"> 35.98 </td>
241
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 72.48 </td>
242
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 80.51 </td>
243
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 75.68 </td>
@@ -252,8 +252,8 @@ By redesigning a common household item like the plastic bottle, we can create a
252
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">69.15</td>
253
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">28.79</td>
254
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">52.79</td>
255
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">72.66</td>
256
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">61.48</td>
257
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">83.24</td>
258
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">85.32</td>
259
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">80.15</td>
@@ -268,8 +268,8 @@ By redesigning a common household item like the plastic bottle, we can create a
268
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">45.80</td>
269
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">13.25</td>
270
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">47.43</td>
271
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">65.71</td>
272
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">44.46</td>
273
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">72.18</td>
274
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">67.54</td>
275
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">62.91</td>
@@ -284,8 +284,8 @@ By redesigning a common household item like the plastic bottle, we can create a
284
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">74.30</td>
285
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">18.12</td>
286
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">63.06</td>
287
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">70.40</td>
288
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">54.71</td>
289
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">84.46</td>
290
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">93.35</td>
291
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">89.91</td>
@@ -300,8 +300,8 @@ By redesigning a common household item like the plastic bottle, we can create a
300
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">50.72</td>
301
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">9.94</td>
302
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">47.14</td>
303
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">65.04</td>
304
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">42.76</td>
305
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">78.47</td>
306
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">79.89</td>
307
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">78.43</td>
@@ -315,8 +315,8 @@ By redesigning a common household item like the plastic bottle, we can create a
315
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">66.77</td>
316
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">28.7</td>
317
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">65.84</td>
318
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">68.55</td>
319
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">50.78</td>
320
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">79.15</td>
321
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">89.63</td>
322
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">85.79</td>
@@ -331,8 +331,8 @@ By redesigning a common household item like the plastic bottle, we can create a
331
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">66.79</td>
332
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">28.04</td>
333
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">66.92</td>
334
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">64.77</td>
335
- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">50.95</td>
336
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">81.65</td>
337
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">89.35</td>
338
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">85.72</td>
@@ -346,8 +346,8 @@ By redesigning a common household item like the plastic bottle, we can create a
346
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 65.54 </td>
347
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 26.17 </td>
348
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 66.86 </td>
349
- <td style="text-align:center; background-color: #DAE8FF; color: black;"> 59.01 </td>
350
- <td style="text-align:center; background-color: #DAE8FF; color: black;"> 41.53 </td>
351
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 80.89 </td>
352
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 89.73 </td>
353
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 86.09 </td>
@@ -355,6 +355,7 @@ By redesigning a common household item like the plastic bottle, we can create a
355
  <td style="text-align:center; background-color: #DAE8FF; color: black;">88.5</td>
356
  </tr>
357
  </tbody></table>
 
358
  <table>
359
  <caption style="text-align:center"><b>Math Benchmarks</b></caption>
360
  <thead>
 
190
  <th style="text-align:center; background-color: #001d6c; color: white;">MMLU</th>
191
  <th style="text-align:center; background-color: #001d6c; color: white;">PopQA</th>
192
  <th style="text-align:center; background-color: #001d6c; color: white;">TruthfulQA</th>
193
+ <!-- <th style="text-align:center; background-color: #001d6c; color: white;">BigBenchHard</th> -->
194
+ <!-- <th style="text-align:center; background-color: #001d6c; color: white;">DROP</th> -->
195
  <th style="text-align:center; background-color: #001d6c; color: white;">GSM8K</th>
196
  <th style="text-align:center; background-color: #001d6c; color: white;">HumanEval</th>
197
  <th style="text-align:center; background-color: #001d6c; color: white;">HumanEval+</th>
 
206
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">57.11</td>
207
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">20.55</td>
208
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">59.79</td>
209
+ <!-- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">54.46</td> -->
210
+ <!-- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">18.68</td> -->
211
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">67.55</td>
212
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">79.45</td>
213
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">75.26</td>
 
221
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">57.18</td>
222
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">20.56</td>
223
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">59.8</td>
224
+ <!-- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">52.27</td> -->
225
+ <!-- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">21.12</td> -->
226
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">67.02</td>
227
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">80.13</td>
228
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">73.39</td>
 
236
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 55.88 </td>
237
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 18.4 </td>
238
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 58.97 </td>
239
+ <!-- <td style="text-align:center; background-color: #DAE8FF; color: black;"> 52.51 </td> -->
240
+ <!-- <td style="text-align:center; background-color: #DAE8FF; color: black;"> 35.98 </td> -->
241
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 72.48 </td>
242
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 80.51 </td>
243
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 75.68 </td>
 
252
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">69.15</td>
253
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">28.79</td>
254
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">52.79</td>
255
+ <!-- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">72.66</td> -->
256
+ <!-- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">61.48</td> -->
257
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">83.24</td>
258
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">85.32</td>
259
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">80.15</td>
 
268
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">45.80</td>
269
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">13.25</td>
270
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">47.43</td>
271
+ <!-- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">65.71</td> -->
272
+ <!-- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">44.46</td> -->
273
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">72.18</td>
274
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">67.54</td>
275
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">62.91</td>
 
284
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">74.30</td>
285
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">18.12</td>
286
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">63.06</td>
287
+ <!-- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">70.40</td> -->
288
+ <!-- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">54.71</td> -->
289
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">84.46</td>
290
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">93.35</td>
291
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">89.91</td>
 
300
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">50.72</td>
301
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">9.94</td>
302
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">47.14</td>
303
+ <!-- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">65.04</td> -->
304
+ <!-- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">42.76</td> -->
305
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">78.47</td>
306
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">79.89</td>
307
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">78.43</td>
 
315
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">66.77</td>
316
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">28.7</td>
317
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">65.84</td>
318
+ <!-- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">68.55</td> -->
319
+ <!-- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">50.78</td> -->
320
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">79.15</td>
321
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">89.63</td>
322
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">85.79</td>
 
331
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">66.79</td>
332
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">28.04</td>
333
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">66.92</td>
334
+ <!-- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">64.77</td> -->
335
+ <!-- <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">50.95</td> -->
336
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">81.65</td>
337
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">89.35</td>
338
  <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">85.72</td>
 
346
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 65.54 </td>
347
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 26.17 </td>
348
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 66.86 </td>
349
+ <!-- <td style="text-align:center; background-color: #DAE8FF; color: black;"> 59.01 </td> -->
350
+ <!-- <td style="text-align:center; background-color: #DAE8FF; color: black;"> 41.53 </td> -->
351
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 80.89 </td>
352
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 89.73 </td>
353
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 86.09 </td>
 
355
  <td style="text-align:center; background-color: #DAE8FF; color: black;">88.5</td>
356
  </tr>
357
  </tbody></table>
358
+
359
  <table>
360
  <caption style="text-align:center"><b>Math Benchmarks</b></caption>
361
  <thead>