Update README.md
Browse files
README.md
CHANGED
@@ -30,7 +30,7 @@ language:
|
|
30 |
|
31 |
|
32 |
|
33 |
-
|
34 |
```
|
35 |
from transformers import (
|
36 |
AutoModelForCausalLM,
|
@@ -62,7 +62,7 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
62 |
# Load tokenizer
|
63 |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, token = HF_TOKEN)
|
64 |
|
65 |
-
#
|
66 |
datasets = []
|
67 |
with open("./elyza-tasks-100-TV_0.jsonl", "r") as f:
|
68 |
item = ""
|
@@ -73,6 +73,30 @@ with open("./elyza-tasks-100-TV_0.jsonl", "r") as f:
|
|
73 |
datasets.append(json.loads(item))
|
74 |
item = ""
|
75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
# Generate jsonl
|
77 |
import re
|
78 |
model_name = re.sub(".*/", "", model_name)
|
|
|
30 |
|
31 |
|
32 |
|
33 |
+
結果jsonlを作成ためのコード例
|
34 |
```
|
35 |
from transformers import (
|
36 |
AutoModelForCausalLM,
|
|
|
62 |
# Load tokenizer
|
63 |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, token = HF_TOKEN)
|
64 |
|
65 |
+
# Load Questions
|
66 |
datasets = []
|
67 |
with open("./elyza-tasks-100-TV_0.jsonl", "r") as f:
|
68 |
item = ""
|
|
|
73 |
datasets.append(json.loads(item))
|
74 |
item = ""
|
75 |
|
76 |
+
# Generate results using loaded model
|
77 |
+
results = []
|
78 |
+
for data in tqdm(datasets):
|
79 |
+
|
80 |
+
input = data["input"]
|
81 |
+
|
82 |
+
prompt = f"""### 指示
|
83 |
+
{input}
|
84 |
+
### 回答:
|
85 |
+
"""
|
86 |
+
|
87 |
+
tokenized_input = tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt").to(model.device)
|
88 |
+
with torch.no_grad():
|
89 |
+
outputs = model.generate(
|
90 |
+
tokenized_input,
|
91 |
+
max_new_tokens=100,
|
92 |
+
do_sample=False,
|
93 |
+
repetition_penalty=1.2
|
94 |
+
)[0]
|
95 |
+
output = tokenizer.decode(outputs[tokenized_input.size(1):], skip_special_tokens=True)
|
96 |
+
|
97 |
+
results.append({"task_id": data["task_id"], "input": input, "output": output})
|
98 |
+
|
99 |
+
|
100 |
# Generate jsonl
|
101 |
import re
|
102 |
model_name = re.sub(".*/", "", model_name)
|