Spaces:
Runtime error
Runtime error
from typing import Union | |
import numpy as np | |
import torch | |
import transformers | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
from config import huggingface_config | |
from .utils import assert_tokenizer_consistency | |
from .metrics import perplexity, entropy | |
torch.set_grad_enabled(False) | |
# selected using Falcon-7B and Falcon-7B-Instruct at bfloat16 | |
BINOCULARS_ACCURACY_THRESHOLD = 0.9015310749276843 # optimized for f1-score | |
BINOCULARS_FPR_THRESHOLD = 0.8536432310785527 # optimized for low-fpr | |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
class Binoculars(object): | |
def __init__(self, | |
observer_name_or_path: str = "tiiuae/falcon-7b", | |
performer_name_or_path: str = "tiiuae/falcon-7b-instruct", | |
use_bfloat16: bool = True, | |
max_token_observed: int = 512, | |
mode: str = "low-fpr", | |
) -> None: | |
assert_tokenizer_consistency(observer_name_or_path, performer_name_or_path) | |
if mode == "low-fpr": | |
self.threshold = BINOCULARS_FPR_THRESHOLD | |
elif mode == "accuracy": | |
self.threshold = BINOCULARS_ACCURACY_THRESHOLD | |
else: | |
raise ValueError(f"Invalid mode: {mode}") | |
# Load models with memory-efficient settings | |
model_kwargs = { | |
"device_map": "auto", | |
"load_in_8bit": True, | |
"trust_remote_code": True, | |
"token": huggingface_config["TOKEN"] | |
} | |
self.observer_model = AutoModelForCausalLM.from_pretrained(observer_name_or_path, **model_kwargs) | |
self.performer_model = AutoModelForCausalLM.from_pretrained(performer_name_or_path, **model_kwargs) | |
self.observer_model.eval() | |
self.performer_model.eval() | |
self.tokenizer = AutoTokenizer.from_pretrained(observer_name_or_path) | |
if not self.tokenizer.pad_token: | |
self.tokenizer.pad_token = self.tokenizer.eos_token | |
self.max_token_observed = max_token_observed | |
def change_mode(self, mode: str) -> None: | |
if mode == "low-fpr": | |
self.threshold = BINOCULARS_FPR_THRESHOLD | |
elif mode == "accuracy": | |
self.threshold = BINOCULARS_ACCURACY_THRESHOLD | |
else: | |
raise ValueError(f"Invalid mode: {mode}") | |
def _tokenize(self, batch: list[str]) -> transformers.BatchEncoding: | |
batch_size = len(batch) | |
encodings = self.tokenizer( | |
batch, | |
return_tensors="pt", | |
padding="longest" if batch_size > 1 else False, | |
truncation=True, | |
max_length=self.max_token_observed, | |
return_token_type_ids=False) | |
return encodings | |
def _get_logits(self, encodings: transformers.BatchEncoding) -> torch.Tensor: | |
observer_logits = self.observer_model(**encodings).logits | |
performer_logits = self.performer_model(**encodings).logits | |
return observer_logits, performer_logits | |
def compute_score(self, input_text: Union[list[str], str]) -> Union[float, list[float]]: | |
batch = [input_text] if isinstance(input_text, str) else input_text | |
encodings = self._tokenize(batch) | |
observer_logits, performer_logits = self._get_logits(encodings) | |
ppl = perplexity(encodings, performer_logits) | |
x_ppl = entropy(observer_logits, performer_logits, encodings, self.tokenizer.pad_token_id) | |
binoculars_scores = ppl / x_ppl | |
binoculars_scores = binoculars_scores.tolist() | |
return binoculars_scores[0] if isinstance(input_text, str) else binoculars_scores | |
def predict(self, input_text: Union[list[str], str]) -> Union[list[str], str]: | |
binoculars_scores = np.array(self.compute_score(input_text)) | |
pred = np.where(binoculars_scores < self.threshold, | |
"Most likely AI-generated", | |
"Most likely human-generated" | |
).tolist() | |
return pred[0] if isinstance(input_text, str) else pred |