from typing import Union import numpy as np import torch import transformers from transformers import AutoModelForCausalLM, AutoTokenizer from config import huggingface_config from .utils import assert_tokenizer_consistency from .metrics import perplexity, entropy torch.set_grad_enabled(False) # selected using Falcon-7B and Falcon-7B-Instruct at bfloat16 BINOCULARS_ACCURACY_THRESHOLD = 0.9015310749276843 # optimized for f1-score BINOCULARS_FPR_THRESHOLD = 0.8536432310785527 # optimized for low-fpr DEVICE = "cuda" if torch.cuda.is_available() else "cpu" class Binoculars(object): def __init__(self, observer_name_or_path: str = "tiiuae/falcon-7b", performer_name_or_path: str = "tiiuae/falcon-7b-instruct", use_bfloat16: bool = True, max_token_observed: int = 512, mode: str = "low-fpr", quantize: bool = True ) -> None: assert_tokenizer_consistency(observer_name_or_path, performer_name_or_path) if mode == "low-fpr": self.threshold = BINOCULARS_FPR_THRESHOLD elif mode == "accuracy": self.threshold = BINOCULARS_ACCURACY_THRESHOLD else: raise ValueError(f"Invalid mode: {mode}") # Load models with memory-efficient settings model_kwargs = { "device_map": "auto", "trust_remote_code": True, "token": huggingface_config["TOKEN"] } if quantize: try: import bitsandbytes as bnb model_kwargs["load_in_8bit"] = True except ImportError: print("bitsandbytes not available. Falling back to full precision.") model_kwargs["torch_dtype"] = torch.bfloat16 if use_bfloat16 else torch.float32 else: model_kwargs["torch_dtype"] = torch.bfloat16 if use_bfloat16 else torch.float32 self.observer_model = AutoModelForCausalLM.from_pretrained(observer_name_or_path, **model_kwargs) self.performer_model = AutoModelForCausalLM.from_pretrained(performer_name_or_path, **model_kwargs) self.observer_model.eval() self.performer_model.eval() self.tokenizer = AutoTokenizer.from_pretrained(observer_name_or_path) if not self.tokenizer.pad_token: self.tokenizer.pad_token = self.tokenizer.eos_token self.max_token_observed = max_token_observed def change_mode(self, mode: str) -> None: if mode == "low-fpr": self.threshold = BINOCULARS_FPR_THRESHOLD elif mode == "accuracy": self.threshold = BINOCULARS_ACCURACY_THRESHOLD else: raise ValueError(f"Invalid mode: {mode}") def _tokenize(self, batch: list[str]) -> transformers.BatchEncoding: batch_size = len(batch) encodings = self.tokenizer( batch, return_tensors="pt", padding="longest" if batch_size > 1 else False, truncation=True, max_length=self.max_token_observed, return_token_type_ids=False) return encodings @torch.inference_mode() def _get_logits(self, encodings: transformers.BatchEncoding) -> torch.Tensor: observer_logits = self.observer_model(**encodings).logits performer_logits = self.performer_model(**encodings).logits return observer_logits, performer_logits def compute_score(self, input_text: Union[list[str], str]) -> Union[float, list[float]]: batch = [input_text] if isinstance(input_text, str) else input_text encodings = self._tokenize(batch) observer_logits, performer_logits = self._get_logits(encodings) ppl = perplexity(encodings, performer_logits) x_ppl = entropy(observer_logits, performer_logits, encodings, self.tokenizer.pad_token_id) binoculars_scores = ppl / x_ppl binoculars_scores = binoculars_scores.tolist() return binoculars_scores[0] if isinstance(input_text, str) else binoculars_scores def predict(self, input_text: Union[list[str], str]) -> Union[str, list[str]]: binoculars_scores = np.array(self.compute_score(input_text)) if isinstance(input_text, str): return "Most likely AI-generated" if binoculars_scores < self.threshold else "Most likely human-generated" else: return ["Most likely AI-generated" if score < self.threshold else "Most likely human-generated" for score in binoculars_scores]