ranarag commited on
Commit
9cb34ad
·
verified ·
1 Parent(s): ea56e83

Update README.md

Browse files

updated results in model card.

Files changed (1) hide show
  1. README.md +77 -75
README.md CHANGED
@@ -42,7 +42,7 @@ This model is designed to handle general instruction-following tasks and can be
42
 
43
 
44
  **Generation:**
45
- This is a simple example of how to use Granite-3.3-2B-Instruct model.
46
 
47
  Install the following libraries:
48
 
@@ -184,10 +184,11 @@ By redesigning a common household item like the plastic bottle, we can create a
184
  <table>
185
 
186
  <thead>
 
187
  <tr>
188
  <th style="text-align:left; background-color: #001d6c; color: white;">Models</th>
189
  <th style="text-align:center; background-color: #001d6c; color: white;">ArenaHard</th>
190
- <th style="text-align:center; background-color: #001d6c; color: white;">Alpaca-Eval-2</th>
191
  <th style="text-align:center; background-color: #001d6c; color: white;">MMLU</th>
192
  <th style="text-align:center; background-color: #001d6c; color: white;">PopQA</th>
193
  <th style="text-align:center; background-color: #001d6c; color: white;">TruthfulQA</th>
@@ -197,8 +198,55 @@ By redesigning a common household item like the plastic bottle, we can create a
197
  <th style="text-align:center; background-color: #001d6c; color: white;">HumanEval</th>
198
  <th style="text-align:center; background-color: #001d6c; color: white;">HumanEval+</th>
199
  <th style="text-align:center; background-color: #001d6c; color: white;">IFEval</th>
 
200
  </tr></thead>
201
  <tbody>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
  <tr>
203
  <td style="text-align:left; background-color: #DAE8FF; color: black;">Llama-3.1-8B-Instruct</td>
204
  <td style="text-align:center; background-color: #DAE8FF; color: black;">36.43</td>
@@ -212,7 +260,7 @@ By redesigning a common household item like the plastic bottle, we can create a
212
  <td style="text-align:center; background-color: #DAE8FF; color: black;">85.32</td>
213
  <td style="text-align:center; background-color: #DAE8FF; color: black;">80.15</td>
214
  <td style="text-align:center; background-color: #DAE8FF; color: black;">79.10</td>
215
-
216
  </tr>
217
 
218
  <tr>
@@ -228,7 +276,7 @@ By redesigning a common household item like the plastic bottle, we can create a
228
  <td style="text-align:center; background-color: #DAE8FF; color: black;">67.54</td>
229
  <td style="text-align:center; background-color: #DAE8FF; color: black;">62.91</td>
230
  <td style="text-align:center; background-color: #DAE8FF; color: black;">66.50</td>
231
-
232
  </tr>
233
 
234
  <tr>
@@ -244,7 +292,7 @@ By redesigning a common household item like the plastic bottle, we can create a
244
  <td style="text-align:center; background-color: #DAE8FF; color: black;">93.35</td>
245
  <td style="text-align:center; background-color: #DAE8FF; color: black;">89.91</td>
246
  <td style="text-align:center; background-color: #DAE8FF; color: black;">74.90</td>
247
-
248
  </tr>
249
 
250
  <tr>
@@ -260,9 +308,8 @@ By redesigning a common household item like the plastic bottle, we can create a
260
  <td style="text-align:center; background-color: #DAE8FF; color: black;">79.89</td>
261
  <td style="text-align:center; background-color: #DAE8FF; color: black;">78.43</td>
262
  <td style="text-align:center; background-color: #DAE8FF; color: black;">59.10</td>
263
-
264
  </tr>
265
-
266
  <tr>
267
  <td style="text-align:left; background-color: #DAE8FF; color: black;">Granite-3.1-8B-Instruct</td>
268
  <td style="text-align:center; background-color: #DAE8FF; color: black;">37.58</td>
@@ -276,26 +323,9 @@ By redesigning a common household item like the plastic bottle, we can create a
276
  <td style="text-align:center; background-color: #DAE8FF; color: black;">89.63</td>
277
  <td style="text-align:center; background-color: #DAE8FF; color: black;">85.79</td>
278
  <td style="text-align:center; background-color: #DAE8FF; color: black;">73.20</td>
279
-
280
  </tr>
281
-
282
-
283
- <tr>
284
- <td style="text-align:left; background-color: #DAE8FF; color: black;">Granite-3.1-2B-Instruct</td>
285
- <td style="text-align:center; background-color: #DAE8FF; color: black;">23.3</td>
286
- <td style="text-align:center; background-color: #DAE8FF; color: black;">27.17</td>
287
- <td style="text-align:center; background-color: #DAE8FF; color: black;">57.11</td>
288
- <td style="text-align:center; background-color: #DAE8FF; color: black;">20.55</td>
289
- <td style="text-align:center; background-color: #DAE8FF; color: black;">59.79</td>
290
- <td style="text-align:center; background-color: #DAE8FF; color: black;">54.46</td>
291
- <td style="text-align:center; background-color: #DAE8FF; color: black;">18.68</td>
292
- <td style="text-align:center; background-color: #DAE8FF; color: black;">67.55</td>
293
- <td style="text-align:center; background-color: #DAE8FF; color: black;">79.45</td>
294
- <td style="text-align:center; background-color: #DAE8FF; color: black;">75.26</td>
295
- <td style="text-align:center; background-color: #DAE8FF; color: black;">63.59</td>
296
-
297
- </tr>
298
-
299
  <tr>
300
  <td style="text-align:left; background-color: #DAE8FF; color: black;">Granite-3.2-8B-Instruct</td>
301
  <td style="text-align:center; background-color: #DAE8FF; color: black;">55.25</td>
@@ -309,24 +339,8 @@ By redesigning a common household item like the plastic bottle, we can create a
309
  <td style="text-align:center; background-color: #DAE8FF; color: black;">89.35</td>
310
  <td style="text-align:center; background-color: #DAE8FF; color: black;">85.72</td>
311
  <td style="text-align:center; background-color: #DAE8FF; color: black;">74.31</td>
312
-
313
  </tr>
314
-
315
- <tr>
316
- <td style="text-align:left; background-color: #DAE8FF; color: black;"><b>Granite-3.2-2B-Instruct</b></td>
317
- <td style="text-align:center; background-color: #DAE8FF; color: black;">24.86</td>
318
- <td style="text-align:center; background-color: #DAE8FF; color: black;">34.51</td>
319
- <td style="text-align:center; background-color: #DAE8FF; color: black;">57.18</td>
320
- <td style="text-align:center; background-color: #DAE8FF; color: black;">20.56</td>
321
- <td style="text-align:center; background-color: #DAE8FF; color: black;">59.8</td>
322
- <td style="text-align:center; background-color: #DAE8FF; color: black;">52.27</td>
323
- <td style="text-align:center; background-color: #DAE8FF; color: black;">21.12</td>
324
- <td style="text-align:center; background-color: #DAE8FF; color: black;">67.02</td>
325
- <td style="text-align:center; background-color: #DAE8FF; color: black;">80.13</td>
326
- <td style="text-align:center; background-color: #DAE8FF; color: black;">73.39</td>
327
- <td style="text-align:center; background-color: #DAE8FF; color: black;">61.55</td>
328
- </tr>
329
-
330
  <tr>
331
  <td style="text-align:left; background-color: #DAE8FF; color: black;"><b>Granite-3.3-8B-Instruct</b></td>
332
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 57.56 </td>
@@ -334,28 +348,14 @@ By redesigning a common household item like the plastic bottle, we can create a
334
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 65.54 </td>
335
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 26.17 </td>
336
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 66.86 </td>
337
- <td style="text-align:center; background-color: #DAE8FF; color: black;"> 14.19 </td>
338
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 41.53 </td>
339
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 80.89 </td>
340
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 89.73 </td>
341
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 86.09 </td>
342
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 74.82 </td>
343
- </tr>
344
-
345
- <tr>
346
- <td style="text-align:left; background-color: #DAE8FF; color: black;"><b>Granite-3.3-2B-Instruct</b></td>
347
- <td style="text-align:center; background-color: #DAE8FF; color: black;"> 28.86 </td>
348
- <td style="text-align:center; background-color: #DAE8FF; color: black;"> 43.45 </td>
349
- <td style="text-align:center; background-color: #DAE8FF; color: black;"> 55.88 </td>
350
- <td style="text-align:center; background-color: #DAE8FF; color: black;"> 18.4 </td>
351
- <td style="text-align:center; background-color: #DAE8FF; color: black;"> 58.97 </td>
352
- <td style="text-align:center; background-color: #DAE8FF; color: black;"> 5.41 </td>
353
- <td style="text-align:center; background-color: #DAE8FF; color: black;"> 35.98 </td>
354
- <td style="text-align:center; background-color: #DAE8FF; color: black;"> 72.48 </td>
355
- <td style="text-align:center; background-color: #DAE8FF; color: black;"> 80.51 </td>
356
- <td style="text-align:center; background-color: #DAE8FF; color: black;"> 75.68 </td>
357
- <td style="text-align:center; background-color: #DAE8FF; color: black;"> 65.8 </td>
358
- </tr>
359
  </tbody></table>
360
 
361
 
@@ -368,37 +368,39 @@ By redesigning a common household item like the plastic bottle, we can create a
368
  <th style="text-align:center; background-color: #001d6c; color: white;">MATH500</th>
369
  </tr></thead>
370
  <tbody>
371
- <tr>
372
- <td style="text-align:left; background-color: #DAE8FF; color: black;">Granite-3.1-8B-Instruct</td>
373
- <td style="text-align:center; background-color: #DAE8FF; color: black;"> 1.97 </td>
374
- <td style="text-align:center; background-color: #DAE8FF; color: black;"> 48.73 </td>
375
- </tr>
376
  <tr>
377
  <td style="text-align:left; background-color: #DAE8FF; color: black;">Granite-3.1-2B-Instruct</td>
378
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 0.89 </td>
379
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 35.07 </td>
380
  </tr>
381
- <tr>
382
- <td style="text-align:left; background-color: #DAE8FF; color: black;">Granite-3.2-8B-Instruct</td>
383
- <td style="text-align:center; background-color: #DAE8FF; color: black;"> 2.43 </td>
384
- <td style="text-align:center; background-color: #DAE8FF; color: black;"> 52.8 </td>
385
- </tr>
386
  <tr>
387
  <td style="text-align:left; background-color: #DAE8FF; color: black;">Granite-3.2-2B-Instruct</td>
388
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 0.89 </td>
389
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 35.54 </td>
390
  </tr>
391
- <tr>
392
- <td style="text-align:left; background-color: #DAE8FF; color: black;"><b>Granite-3.3-8B-Instruct</b></td>
393
- <td style="text-align:center; background-color: #DAE8FF; color: black;"> 8.12 </td>
394
- <td style="text-align:center; background-color: #DAE8FF; color: black;"> 69.02 </td>
395
- </tr>
396
  <tr>
397
  <td style="text-align:left; background-color: #DAE8FF; color: black;"><b>Granite-3.3-2B-Instruct</b></td>
398
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 3.28 </td>
399
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 58.09 </td>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
400
  </tr>
401
  </tbody></table>
 
 
402
 
403
 
404
  **Training Data:**
 
42
 
43
 
44
  **Generation:**
45
+ This is a simple example of how to use Granite-3.3-8B-Instruct model.
46
 
47
  Install the following libraries:
48
 
 
184
  <table>
185
 
186
  <thead>
187
+ <caption style="text-align:center"><b>Comparison with different models over various benchmarks. Scores of AlpacaEval-2.0 and Arena-Hard are calculated with thinking=True</b></caption>
188
  <tr>
189
  <th style="text-align:left; background-color: #001d6c; color: white;">Models</th>
190
  <th style="text-align:center; background-color: #001d6c; color: white;">ArenaHard</th>
191
+ <th style="text-align:center; background-color: #001d6c; color: white;">AlpacaEval-2.0</th>
192
  <th style="text-align:center; background-color: #001d6c; color: white;">MMLU</th>
193
  <th style="text-align:center; background-color: #001d6c; color: white;">PopQA</th>
194
  <th style="text-align:center; background-color: #001d6c; color: white;">TruthfulQA</th>
 
198
  <th style="text-align:center; background-color: #001d6c; color: white;">HumanEval</th>
199
  <th style="text-align:center; background-color: #001d6c; color: white;">HumanEval+</th>
200
  <th style="text-align:center; background-color: #001d6c; color: white;">IFEval</th>
201
+ <th style="text-align:center; background-color: #001d6c; color: white;">Attaq</th>
202
  </tr></thead>
203
  <tbody>
204
+ <tr>
205
+ <td style="text-align:left; background-color: #DAE8FF; color: black;">Granite-3.1-2B-Instruct</td>
206
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">23.3</td>
207
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">27.17</td>
208
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">57.11</td>
209
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">20.55</td>
210
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">59.79</td>
211
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">54.46</td>
212
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">18.68</td>
213
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">67.55</td>
214
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">79.45</td>
215
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">75.26</td>
216
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">63.59</td>
217
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">84.7</td>
218
+ </tr>
219
+ <tr>
220
+ <td style="text-align:left; background-color: #DAE8FF; color: black;">Granite-3.2-2B-Instruct</td>
221
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">24.86</td>
222
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">34.51</td>
223
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">57.18</td>
224
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">20.56</td>
225
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">59.8</td>
226
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">52.27</td>
227
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">21.12</td>
228
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">67.02</td>
229
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">80.13</td>
230
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">73.39</td>
231
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">61.55</td>
232
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">83.23</td>
233
+ </tr>
234
+ <tr>
235
+ <td style="text-align:left; background-color: #DAE8FF; color: black;"><b>Granite-3.3-2B-Instruct</b></td>
236
+ <td style="text-align:center; background-color: #DAE8FF; color: black;"> 28.86 </td>
237
+ <td style="text-align:center; background-color: #DAE8FF; color: black;"> 43.45 </td>
238
+ <td style="text-align:center; background-color: #DAE8FF; color: black;"> 55.88 </td>
239
+ <td style="text-align:center; background-color: #DAE8FF; color: black;"> 18.4 </td>
240
+ <td style="text-align:center; background-color: #DAE8FF; color: black;"> 58.97 </td>
241
+ <td style="text-align:center; background-color: #DAE8FF; color: black;"> 52.51 </td>
242
+ <td style="text-align:center; background-color: #DAE8FF; color: black;"> 35.98 </td>
243
+ <td style="text-align:center; background-color: #DAE8FF; color: black;"> 72.48 </td>
244
+ <td style="text-align:center; background-color: #DAE8FF; color: black;"> 80.51 </td>
245
+ <td style="text-align:center; background-color: #DAE8FF; color: black;"> 75.68 </td>
246
+ <td style="text-align:center; background-color: #DAE8FF; color: black;"> 65.8 </td>
247
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">87.47</td>
248
+ </tr>
249
+
250
  <tr>
251
  <td style="text-align:left; background-color: #DAE8FF; color: black;">Llama-3.1-8B-Instruct</td>
252
  <td style="text-align:center; background-color: #DAE8FF; color: black;">36.43</td>
 
260
  <td style="text-align:center; background-color: #DAE8FF; color: black;">85.32</td>
261
  <td style="text-align:center; background-color: #DAE8FF; color: black;">80.15</td>
262
  <td style="text-align:center; background-color: #DAE8FF; color: black;">79.10</td>
263
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">83.43</td>
264
  </tr>
265
 
266
  <tr>
 
276
  <td style="text-align:center; background-color: #DAE8FF; color: black;">67.54</td>
277
  <td style="text-align:center; background-color: #DAE8FF; color: black;">62.91</td>
278
  <td style="text-align:center; background-color: #DAE8FF; color: black;">66.50</td>
279
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">42.87</td>
280
  </tr>
281
 
282
  <tr>
 
292
  <td style="text-align:center; background-color: #DAE8FF; color: black;">93.35</td>
293
  <td style="text-align:center; background-color: #DAE8FF; color: black;">89.91</td>
294
  <td style="text-align:center; background-color: #DAE8FF; color: black;">74.90</td>
295
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">81.90</td>
296
  </tr>
297
 
298
  <tr>
 
308
  <td style="text-align:center; background-color: #DAE8FF; color: black;">79.89</td>
309
  <td style="text-align:center; background-color: #DAE8FF; color: black;">78.43</td>
310
  <td style="text-align:center; background-color: #DAE8FF; color: black;">59.10</td>
311
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">42.45</td>
312
  </tr>
 
313
  <tr>
314
  <td style="text-align:left; background-color: #DAE8FF; color: black;">Granite-3.1-8B-Instruct</td>
315
  <td style="text-align:center; background-color: #DAE8FF; color: black;">37.58</td>
 
323
  <td style="text-align:center; background-color: #DAE8FF; color: black;">89.63</td>
324
  <td style="text-align:center; background-color: #DAE8FF; color: black;">85.79</td>
325
  <td style="text-align:center; background-color: #DAE8FF; color: black;">73.20</td>
326
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">85.73</td>
327
  </tr>
328
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
329
  <tr>
330
  <td style="text-align:left; background-color: #DAE8FF; color: black;">Granite-3.2-8B-Instruct</td>
331
  <td style="text-align:center; background-color: #DAE8FF; color: black;">55.25</td>
 
339
  <td style="text-align:center; background-color: #DAE8FF; color: black;">89.35</td>
340
  <td style="text-align:center; background-color: #DAE8FF; color: black;">85.72</td>
341
  <td style="text-align:center; background-color: #DAE8FF; color: black;">74.31</td>
342
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">84.7</td>
343
  </tr>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
344
  <tr>
345
  <td style="text-align:left; background-color: #DAE8FF; color: black;"><b>Granite-3.3-8B-Instruct</b></td>
346
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 57.56 </td>
 
348
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 65.54 </td>
349
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 26.17 </td>
350
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 66.86 </td>
351
+ <td style="text-align:center; background-color: #DAE8FF; color: black;"> 59.01 </td>
352
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 41.53 </td>
353
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 80.89 </td>
354
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 89.73 </td>
355
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 86.09 </td>
356
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 74.82 </td>
357
+ <td style="text-align:center; background-color: #DAE8FF; color: black;">88.5</td>
358
+ </tr>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
359
  </tbody></table>
360
 
361
 
 
368
  <th style="text-align:center; background-color: #001d6c; color: white;">MATH500</th>
369
  </tr></thead>
370
  <tbody>
 
 
 
 
 
371
  <tr>
372
  <td style="text-align:left; background-color: #DAE8FF; color: black;">Granite-3.1-2B-Instruct</td>
373
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 0.89 </td>
374
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 35.07 </td>
375
  </tr>
 
 
 
 
 
376
  <tr>
377
  <td style="text-align:left; background-color: #DAE8FF; color: black;">Granite-3.2-2B-Instruct</td>
378
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 0.89 </td>
379
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 35.54 </td>
380
  </tr>
 
 
 
 
 
381
  <tr>
382
  <td style="text-align:left; background-color: #DAE8FF; color: black;"><b>Granite-3.3-2B-Instruct</b></td>
383
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 3.28 </td>
384
  <td style="text-align:center; background-color: #DAE8FF; color: black;"> 58.09 </td>
385
+ </tr>
386
+ <tr>
387
+ <td style="text-align:left; background-color: #DAE8FF; color: black;">Granite-3.1-8B-Instruct</td>
388
+ <td style="text-align:center; background-color: #DAE8FF; color: black;"> 1.97 </td>
389
+ <td style="text-align:center; background-color: #DAE8FF; color: black;"> 48.73 </td>
390
+ </tr>
391
+ <tr>
392
+ <td style="text-align:left; background-color: #DAE8FF; color: black;">Granite-3.2-8B-Instruct</td>
393
+ <td style="text-align:center; background-color: #DAE8FF; color: black;"> 2.43 </td>
394
+ <td style="text-align:center; background-color: #DAE8FF; color: black;"> 52.8 </td>
395
+ </tr>
396
+ <tr>
397
+ <td style="text-align:left; background-color: #DAE8FF; color: black;"><b>Granite-3.3-8B-Instruct</b></td>
398
+ <td style="text-align:center; background-color: #DAE8FF; color: black;"> 8.12 </td>
399
+ <td style="text-align:center; background-color: #DAE8FF; color: black;"> 69.02 </td>
400
  </tr>
401
  </tbody></table>
402
+
403
+ </tbody></table>
404
 
405
 
406
  **Training Data:**