Commit
·
a1a4522
1
Parent(s):
7051c52
Update README.md
Browse files
README.md
CHANGED
@@ -274,10 +274,10 @@ This model is a fine-tuned version of [google/flan-t5-large](https://huggingface
|
|
274 |
|
275 |
- **NLP Paper's Abstract + Introduction --> {Question} [SEP] {Answer}**
|
276 |
|
277 |
-
## How to
|
278 |
|
279 |
|
280 |
-
###
|
281 |
```(python)
|
282 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
283 |
|
@@ -305,14 +305,46 @@ summaries = model.generate(input_ids =inputs["input_ids"], max_new_tokens=100, d
|
|
305 |
summaries = model.generate(input_ids =inputs["input_ids"], max_new_tokens=100, do_sample = True, top_p = 0.95)
|
306 |
```
|
307 |
|
308 |
-
|
309 |
-
|
310 |
```
|
311 |
decoded_summaries = [tokenizer.decode(s, skip_special_tokens=False, clean_up_tokenization_spaces=True) for s in summaries]
|
312 |
decoded_summaries = [d.replace("<n>", " ").replace(tokenizer.pad_token, "").replace(tokenizer.eos_token, "") for d in decoded_summaries]
|
313 |
|
314 |
```
|
315 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
316 |
|
317 |
### Training results
|
318 |
|
|
|
274 |
|
275 |
- **NLP Paper's Abstract + Introduction --> {Question} [SEP] {Answer}**
|
276 |
|
277 |
+
## (1) How to use: Inference on CPU ( Code Snippets )
|
278 |
|
279 |
|
280 |
+
### Load model directly
|
281 |
```(python)
|
282 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
283 |
|
|
|
305 |
summaries = model.generate(input_ids =inputs["input_ids"], max_new_tokens=100, do_sample = True, top_p = 0.95)
|
306 |
```
|
307 |
|
|
|
|
|
308 |
```
|
309 |
decoded_summaries = [tokenizer.decode(s, skip_special_tokens=False, clean_up_tokenization_spaces=True) for s in summaries]
|
310 |
decoded_summaries = [d.replace("<n>", " ").replace(tokenizer.pad_token, "").replace(tokenizer.eos_token, "") for d in decoded_summaries]
|
311 |
|
312 |
```
|
313 |
|
314 |
+
## (2) Faster Inference on GPU
|
315 |
+
- about 60x faster than (1) [CPU --> COLAB T4 GPU]
|
316 |
+
|
317 |
+
### Additional Installation
|
318 |
+
```(python)
|
319 |
+
!pip install accelerate -q
|
320 |
+
!pip install bitsandbytes -q
|
321 |
+
!pip install optimum -q
|
322 |
+
```
|
323 |
+
|
324 |
+
### Load model directly
|
325 |
+
```(python)
|
326 |
+
import torch
|
327 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM,BitsAndBytesConfig
|
328 |
+
from optimum.bettertransformer import BetterTransformer
|
329 |
+
|
330 |
+
# load model in 4-bit
|
331 |
+
quantization_config = BitsAndBytesConfig(
|
332 |
+
load_in_4bit=True,
|
333 |
+
bnb_4bit_compute_dtype=torch.bfloat16
|
334 |
+
)
|
335 |
+
|
336 |
+
tokenizer = AutoTokenizer.from_pretrained("UNIST-Eunchan/FLAN-T5-NLP-Paper-to-Question-Generation")
|
337 |
+
model = AutoModelForSeq2SeqLM.from_pretrained("UNIST-Eunchan/FLAN-T5-NLP-Paper-to-Question-Generation", quantization_config=quantization_config)
|
338 |
+
model = BetterTransformer.transform(model)
|
339 |
+
```
|
340 |
+
|
341 |
+
|
342 |
+
### For Multiple Question Generation (👍)
|
343 |
+
```(python)
|
344 |
+
# use to(device)
|
345 |
+
summaries = model.generate(input_ids =inputs["input_ids"].to(device), max_new_tokens=100, do_sample = True, top_p = 0.95, num_return_sequences = 4)
|
346 |
+
```
|
347 |
+
|
348 |
|
349 |
### Training results
|
350 |
|