from huggingface_hub import InferenceClient
from config import BASE_MODEL, MY_MODEL, HF_TOKEN

class SchoolChatbot:
    """
    This class is extra scaffolding around a model. Modify this class to specify how the model recieves prompts and generates responses.

    Example usage:
        chatbot = SchoolChatbot()
        response = chatbot.get_response("What schools offer Spanish programs?")
    """

    def __init__(self):
        """
        Initialize the chatbot with a HF model ID
        """
        model_id = MY_MODEL if MY_MODEL else BASE_MODEL # define MY_MODEL in config.py if you create a new model in the HuggingFace Hub
        self.client = InferenceClient(model=model_id, token=HF_TOKEN)
        self.df = pd.read_csv("bps_data.csv")
        with open("keyword_to_column_map.json") as f:
            self.keyword_map = json.load(f)
        
    def format_prompt(self, user_input):
        """
        TODO: Implement this method to format the user's input into a proper prompt.
        
        This method should:
        1. Add any necessary system context or instructions
        2. Format the user's input appropriately
        3. Add any special tokens or formatting the model expects

        Args:
            user_input (str): The user's question about Boston schools

        Returns:
            str: A formatted prompt ready for the model
        
        Example prompt format:
            "You are a helpful assistant that specializes in Boston schools...
             User: {user_input}
             Assistant:"
        """
        return (
            "<|system|>You are a helpful assistant that specializes in Boston public school enrollment.<|end|>\n"
            f"<|user|>{user_input}<|end|>\n"
            "<|assistant|>"
        )

    def lookup_structured_data(self, query, school_name=None):
        """Search the structured BPS dataset for relevant information"""
        results = []
        df_filtered = self.df

        if school_name:
            df_filtered = self.df[self.df["BPS_School_Name"].str.contains(school_name, case=False, na=False)]

        if df_filtered.empty:
            return None

        row = df_filtered.iloc[0]
        for key, col in self.keyword_map.items():
            if key in query.lower():
                val = row.get(col, "N/A")
                if pd.notna(val):
                    results.append(f"{key.title()}: {val}")
        return "\n".join(results) if results else None
        
    def get_response(self, user_input):
        """
        TODO: Implement this method to generate responses to user questions.
        
        This method should:
        1. Use format_prompt() to prepare the input
        2. Generate a response using the model
        3. Clean up and return the response

        Args:
            user_input (str): The user's question about Boston schools

        Returns:
            str: The chatbot's response

        Implementation tips:
        - Use self.format_prompt() to format the user's input
        - Use self.client to generate responses
        """
        matched_school = None
        for name in self.df["BPS_School_Name"].dropna():
            if name.lower() in user_input.lower():
                matched_school = name
                break

        structured_response = self.lookup_structured_data(user_input, matched_school)

        if structured_response:
            return f"Here’s what I found based on school data:\n{structured_response}"

        prompt = self.format_prompt(user_input)
        response = self.client.text_generation(
            prompt,
            max_new_tokens=512,
            temperature=0.7,
            top_p=0.9,
            stop_sequences=["<|end|>"]
        )
        return response.strip()