Spaces:
Build error
Build error
from huggingface_hub import InferenceClient | |
from config import BASE_MODEL, MY_MODEL, HF_TOKEN | |
class SchoolChatbot: | |
""" | |
This class is extra scaffolding around a model. Modify this class to specify how the model recieves prompts and generates responses. | |
Example usage: | |
chatbot = SchoolChatbot() | |
response = chatbot.get_response("What schools offer Spanish programs?") | |
""" | |
def __init__(self): | |
""" | |
Initialize the chatbot with a HF model ID | |
""" | |
model_id = MY_MODEL if MY_MODEL else BASE_MODEL # define MY_MODEL in config.py if you create a new model in the HuggingFace Hub | |
self.client = InferenceClient(model=model_id, token=HF_TOKEN) | |
self.df = pd.read_csv("bps_data.csv") | |
with open("keyword_to_column_map.json") as f: | |
self.keyword_map = json.load(f) | |
def format_prompt(self, user_input): | |
""" | |
TODO: Implement this method to format the user's input into a proper prompt. | |
This method should: | |
1. Add any necessary system context or instructions | |
2. Format the user's input appropriately | |
3. Add any special tokens or formatting the model expects | |
Args: | |
user_input (str): The user's question about Boston schools | |
Returns: | |
str: A formatted prompt ready for the model | |
Example prompt format: | |
"You are a helpful assistant that specializes in Boston schools... | |
User: {user_input} | |
Assistant:" | |
""" | |
return ( | |
"<|system|>You are a helpful assistant that specializes in Boston public school enrollment.<|end|>\n" | |
f"<|user|>{user_input}<|end|>\n" | |
"<|assistant|>" | |
) | |
def lookup_structured_data(self, query, school_name=None): | |
"""Search the structured BPS dataset for relevant information""" | |
results = [] | |
df_filtered = self.df | |
if school_name: | |
df_filtered = self.df[self.df["BPS_School_Name"].str.contains(school_name, case=False, na=False)] | |
if df_filtered.empty: | |
return None | |
row = df_filtered.iloc[0] | |
for key, col in self.keyword_map.items(): | |
if key in query.lower(): | |
val = row.get(col, "N/A") | |
if pd.notna(val): | |
results.append(f"{key.title()}: {val}") | |
return "\n".join(results) if results else None | |
def get_response(self, user_input): | |
""" | |
TODO: Implement this method to generate responses to user questions. | |
This method should: | |
1. Use format_prompt() to prepare the input | |
2. Generate a response using the model | |
3. Clean up and return the response | |
Args: | |
user_input (str): The user's question about Boston schools | |
Returns: | |
str: The chatbot's response | |
Implementation tips: | |
- Use self.format_prompt() to format the user's input | |
- Use self.client to generate responses | |
""" | |
matched_school = None | |
for name in self.df["BPS_School_Name"].dropna(): | |
if name.lower() in user_input.lower(): | |
matched_school = name | |
break | |
structured_response = self.lookup_structured_data(user_input, matched_school) | |
if structured_response: | |
return f"Here’s what I found based on school data:\n{structured_response}" | |
prompt = self.format_prompt(user_input) | |
response = self.client.text_generation( | |
prompt, | |
max_new_tokens=512, | |
temperature=0.7, | |
top_p=0.9, | |
stop_sequences=["<|end|>"] | |
) | |
return response.strip() |