AlHfac commited on
Commit
1ca276c
·
verified ·
1 Parent(s): 8636c9b

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +26 -2
README.md CHANGED
@@ -30,7 +30,7 @@ language:
30
 
31
 
32
 
33
- コード例
34
  ```
35
  from transformers import (
36
  AutoModelForCausalLM,
@@ -62,7 +62,7 @@ model = AutoModelForCausalLM.from_pretrained(
62
  # Load tokenizer
63
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, token = HF_TOKEN)
64
 
65
- # Evaluate
66
  datasets = []
67
  with open("./elyza-tasks-100-TV_0.jsonl", "r") as f:
68
  item = ""
@@ -73,6 +73,30 @@ with open("./elyza-tasks-100-TV_0.jsonl", "r") as f:
73
  datasets.append(json.loads(item))
74
  item = ""
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  # Generate jsonl
77
  import re
78
  model_name = re.sub(".*/", "", model_name)
 
30
 
31
 
32
 
33
+ 結果jsonlを作成ためのコード例
34
  ```
35
  from transformers import (
36
  AutoModelForCausalLM,
 
62
  # Load tokenizer
63
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, token = HF_TOKEN)
64
 
65
+ # Load Questions
66
  datasets = []
67
  with open("./elyza-tasks-100-TV_0.jsonl", "r") as f:
68
  item = ""
 
73
  datasets.append(json.loads(item))
74
  item = ""
75
 
76
+ # Generate results using loaded model
77
+ results = []
78
+ for data in tqdm(datasets):
79
+
80
+ input = data["input"]
81
+
82
+ prompt = f"""### 指示
83
+ {input}
84
+ ### 回答:
85
+ """
86
+
87
+ tokenized_input = tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt").to(model.device)
88
+ with torch.no_grad():
89
+ outputs = model.generate(
90
+ tokenized_input,
91
+ max_new_tokens=100,
92
+ do_sample=False,
93
+ repetition_penalty=1.2
94
+ )[0]
95
+ output = tokenizer.decode(outputs[tokenized_input.size(1):], skip_special_tokens=True)
96
+
97
+ results.append({"task_id": data["task_id"], "input": input, "output": output})
98
+
99
+
100
  # Generate jsonl
101
  import re
102
  model_name = re.sub(".*/", "", model_name)