rishanthrajendhran
/

VeriFastScore

@@ -72,10 +72,51 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
 tokenizer = AutoTokenizer.from_pretrained("rishanthrajendhran/VeriFastScore")
 model = AutoModelForCausalLM.from_pretrained("rishanthrajendhran/VeriFastScore")
-prompt = "<your prompt with evidence and response>"
-inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
-outputs = model.generate(**inputs, max_new_tokens=512)
-print(tokenizer.decode(outputs[0], skip_special_tokens=True))
 ```
 ## Training Details

 tokenizer = AutoTokenizer.from_pretrained("rishanthrajendhran/VeriFastScore")
 model = AutoModelForCausalLM.from_pretrained("rishanthrajendhran/VeriFastScore")
+system_prompt = '''You are trying to verify how factual a response is by extracting fine-grained, verifiable claims. Each claim must describe one single event or one single state (for example, “Nvidia was founded in 1993 in Sunnyvale, California, U.S.”) in one sentence with at most one embedded clause. Each fact should be understandable on its own and require no additional context. This means that all entities must be referred to by name but not by pronoun. Use the name of entities rather than definite noun phrases (e.g., “the teacher”) whenever possible. If a definite noun phrase is used, be sure to add modifiers (e.g., an embedded clause or a prepositional phrase). Each fact must be situated within relevant temporal and location details whenever needed.
+All necessary specific details—including entities, dates, and locations—must be explicitly named, and verify here means that every detail of a claim is directly confirmed by the provided evidence. The verification process involves cross-checking each detail against the evidence; a detail is considered verified if it is clearly confirmed by the evidence.
+Avoid extracting stories, personal experiences, hypotheticals (e.g., those using “would be” or the subjunctive mood), subjective opinions, suggestions, advice, instructions, or similarly non-factual content; however, biographical, historical, scientific, and similar texts are acceptable. Also, ignore any listed references.
+For each extracted claim, classify it as follows:
+Supported: Every detail of the claim (including entities, dates, and locations) is directly confirmed by the provided evidence with no contradictions.
+Unsupported: One or more details of the claim are either missing from or contradicted by the provided evidence, even though the claim remains verifiable using external sources.
+You do not need to justify what you extract.
+Output format:
+<fact 1>: <your judgment of fact 1>
+<fact 2>: <your judgment of fact 2>
+…
+<fact n>: <your judgment of fact n>
+If no verifiable claim can be extracted, simply output "No verifiable claim."'''
+prompt = "### Response\n{response}\n### Evidence\n{evidence}".format(
+  response=response,
+  evidence=evidence
+)
+conversation_history = [
+    {
+    "role": "system",
+    "content": system_prompt,
+  }, {
+    "role": "user",
+    "content": prompt
+  }
+]
+inputs = self.tokenizer.apply_chat_template(
+    conversation=conversation_history,
+    add_generation_prompt=True,
+    tokenize=True,
+    truncation=False,
+    padding="do_not_pad"
+).to(model.device)
+outputs = model.generate(**inputs, max_new_tokens=2048)
+print(tokenizer.decode(outputs, skip_special_tokens=True))
 ```
 ## Training Details