bps-school-chatbot-new

Sleeping

App Files Files Community

ellawang9 commited on Apr 15

Commit

bd3d7ac

verified ·

1 Parent(s): eb189bb

Update src/chat.py

Browse files

Files changed (1) hide show

src/chat.py +41 -66

src/chat.py CHANGED Viewed

@@ -1,103 +1,78 @@
 from huggingface_hub import InferenceClient
 from config import BASE_MODEL, MY_MODEL, HF_TOKEN
 import pandas as pd
 import json
 class SchoolChatbot:
     """
-    This class is extra scaffolding around a model. Modify this class to specify how the model recieves prompts and generates responses.
-    Example usage:
-        chatbot = SchoolChatbot()
-        response = chatbot.get_response("What schools offer Spanish programs?")
     """
     def __init__(self):
-        """
-        Initialize the chatbot with a HF model ID
-        """
-        model_id = MY_MODEL if MY_MODEL else BASE_MODEL # define MY_MODEL in config.py if you create a new model in the HuggingFace Hub
         self.client = InferenceClient(model=model_id, token=HF_TOKEN)
         self.df = pd.read_csv("bps_data.csv")
-        with open("keyword_to_column_map.json") as f:
             self.keyword_map = json.load(f)
-    def format_prompt(self, user_input):
-        """
-        TODO: Implement this method to format the user's input into a proper prompt.
-        This method should:
-        1. Add any necessary system context or instructions
-        2. Format the user's input appropriately
-        3. Add any special tokens or formatting the model expects
-        Args:
-            user_input (str): The user's question about Boston schools
-        Returns:
-            str: A formatted prompt ready for the model
-        Example prompt format:
-            "You are a helpful assistant that specializes in Boston schools...
-             User: {user_input}
-             Assistant:"
-        """
         return (
             "<|system|>You are a helpful assistant that specializes in Boston public school enrollment.<|end|>\n"
             f"<|user|>{user_input}<|end|>\n"
             "<|assistant|>"
         )
-    def lookup_structured_data(self, query, school_name=None):
-        """Search the structured BPS dataset for relevant information"""
-        results = []
-        df_filtered = self.df
         if school_name:
             df_filtered = self.df[self.df["BPS_School_Name"].str.contains(school_name, case=False, na=False)]
         if df_filtered.empty:
-            return None
         row = df_filtered.iloc[0]
-        for key, col in self.keyword_map.items():
-            if key in query.lower():
-                val = row.get(col, "N/A")
-                if pd.notna(val):
-                    results.append(f"{key.title()}: {val}")
-        return "\n".join(results) if results else None
-    def get_response(self, user_input):
-        """
-        TODO: Implement this method to generate responses to user questions.
-        This method should:
-        1. Use format_prompt() to prepare the input
-        2. Generate a response using the model
-        3. Clean up and return the response
-        Args:
-            user_input (str): The user's question about Boston schools
-        Returns:
-            str: The chatbot's response
-        Implementation tips:
-        - Use self.format_prompt() to format the user's input
-        - Use self.client to generate responses
-        """
         matched_school = None
         for name in self.df["BPS_School_Name"].dropna():
             if name.lower() in user_input.lower():
                 matched_school = name
                 break
-        structured_response = self.lookup_structured_data(user_input, matched_school)
-        if structured_response:
-            return f"Here’s what I found based on school data:\n{structured_response}"
-        prompt = self.format_prompt(user_input)
         response = self.client.text_generation(
             prompt,
             max_new_tokens=512,
@@ -105,4 +80,4 @@ class SchoolChatbot:
             top_p=0.9,
             stop_sequences=["<|end|>"]
         )
-        return response.strip()

 from huggingface_hub import InferenceClient
 from config import BASE_MODEL, MY_MODEL, HF_TOKEN
 import pandas as pd
 import json
+import re
+from difflib import get_close_matches
 class SchoolChatbot:
     """
+    A chatbot that integrates structured school data and language generation to assist with Boston Public School queries.
     """
     def __init__(self):
+        model_id = MY_MODEL if MY_MODEL else BASE_MODEL
         self.client = InferenceClient(model=model_id, token=HF_TOKEN)
         self.df = pd.read_csv("bps_data.csv")
+        with open("cleaned_keyword_to_column_map.json") as f:
             self.keyword_map = json.load(f)
+    def format_prompt(self, user_input):
         return (
             "<|system|>You are a helpful assistant that specializes in Boston public school enrollment.<|end|>\n"
             f"<|user|>{user_input}<|end|>\n"
             "<|assistant|>"
         )
+    def extract_context_with_keywords(self, prompt, school_name=None):
+        def extract_keywords(text):
+            tokens = re.findall(r'\b\w+\b', text.lower())
+            matched = set()
+            for token in tokens:
+                matched.update(get_close_matches(token, self.keyword_map.keys(), cutoff=0.85))
+            return matched
+        matched_keywords = extract_keywords(prompt)
+        df_filtered = self.df
         if school_name:
             df_filtered = self.df[self.df["BPS_School_Name"].str.contains(school_name, case=False, na=False)]
         if df_filtered.empty:
+            return []
         row = df_filtered.iloc[0]
+        context_items = []
+        for kw in matched_keywords:
+            col = self.keyword_map.get(kw)
+            val = row.get(col) if col else None
+            if col and pd.notna(val):
+                context_items.append(f"The school's {kw} is {val.lower()}.")
+        return context_items
+    def get_response(self, user_input):
         matched_school = None
         for name in self.df["BPS_School_Name"].dropna():
             if name.lower() in user_input.lower():
                 matched_school = name
                 break
+        structured_facts = self.extract_context_with_keywords(user_input, matched_school)
+        if structured_facts:
+            natural_context = (
+                f"You know the following facts about {matched_school or 'a Boston public school'}:\n"
+                + "\n".join(f"- {fact}" for fact in structured_facts)
+            )
+            prompt = (
+                "<|system|>You are a helpful assistant that specializes in Boston public school enrollment. "
+                "Use any known facts about the school to answer helpfully.<|end|>\n"
+                f"<|user|>{user_input}<|end|>\n"
+                f"<|context|>{natural_context}<|end|>\n"
+                "<|assistant|>"
+            )
+        else:
+            prompt = self.format_prompt(user_input)
         response = self.client.text_generation(
             prompt,
             max_new_tokens=512,
             top_p=0.9,
             stop_sequences=["<|end|>"]
         )
+        return response.strip()