ellawang9 commited on
Commit
bd3d7ac
·
verified ·
1 Parent(s): eb189bb

Update src/chat.py

Browse files
Files changed (1) hide show
  1. src/chat.py +41 -66
src/chat.py CHANGED
@@ -1,103 +1,78 @@
 
1
  from huggingface_hub import InferenceClient
2
  from config import BASE_MODEL, MY_MODEL, HF_TOKEN
3
  import pandas as pd
4
  import json
 
 
5
 
6
  class SchoolChatbot:
7
  """
8
- This class is extra scaffolding around a model. Modify this class to specify how the model recieves prompts and generates responses.
9
-
10
- Example usage:
11
- chatbot = SchoolChatbot()
12
- response = chatbot.get_response("What schools offer Spanish programs?")
13
  """
14
 
15
  def __init__(self):
16
- """
17
- Initialize the chatbot with a HF model ID
18
- """
19
- model_id = MY_MODEL if MY_MODEL else BASE_MODEL # define MY_MODEL in config.py if you create a new model in the HuggingFace Hub
20
  self.client = InferenceClient(model=model_id, token=HF_TOKEN)
21
  self.df = pd.read_csv("bps_data.csv")
22
- with open("keyword_to_column_map.json") as f:
23
  self.keyword_map = json.load(f)
24
-
25
- def format_prompt(self, user_input):
26
- """
27
- TODO: Implement this method to format the user's input into a proper prompt.
28
-
29
- This method should:
30
- 1. Add any necessary system context or instructions
31
- 2. Format the user's input appropriately
32
- 3. Add any special tokens or formatting the model expects
33
 
34
- Args:
35
- user_input (str): The user's question about Boston schools
36
-
37
- Returns:
38
- str: A formatted prompt ready for the model
39
-
40
- Example prompt format:
41
- "You are a helpful assistant that specializes in Boston schools...
42
- User: {user_input}
43
- Assistant:"
44
- """
45
  return (
46
  "<|system|>You are a helpful assistant that specializes in Boston public school enrollment.<|end|>\n"
47
  f"<|user|>{user_input}<|end|>\n"
48
  "<|assistant|>"
49
  )
50
 
51
- def lookup_structured_data(self, query, school_name=None):
52
- """Search the structured BPS dataset for relevant information"""
53
- results = []
54
- df_filtered = self.df
 
 
 
55
 
 
 
56
  if school_name:
57
  df_filtered = self.df[self.df["BPS_School_Name"].str.contains(school_name, case=False, na=False)]
58
-
59
  if df_filtered.empty:
60
- return None
61
 
62
  row = df_filtered.iloc[0]
63
- for key, col in self.keyword_map.items():
64
- if key in query.lower():
65
- val = row.get(col, "N/A")
66
- if pd.notna(val):
67
- results.append(f"{key.title()}: {val}")
68
- return "\n".join(results) if results else None
69
-
70
- def get_response(self, user_input):
71
- """
72
- TODO: Implement this method to generate responses to user questions.
73
-
74
- This method should:
75
- 1. Use format_prompt() to prepare the input
76
- 2. Generate a response using the model
77
- 3. Clean up and return the response
78
 
79
- Args:
80
- user_input (str): The user's question about Boston schools
81
-
82
- Returns:
83
- str: The chatbot's response
84
-
85
- Implementation tips:
86
- - Use self.format_prompt() to format the user's input
87
- - Use self.client to generate responses
88
- """
89
  matched_school = None
90
  for name in self.df["BPS_School_Name"].dropna():
91
  if name.lower() in user_input.lower():
92
  matched_school = name
93
  break
94
 
95
- structured_response = self.lookup_structured_data(user_input, matched_school)
96
 
97
- if structured_response:
98
- return f"Here’s what I found based on school data:\n{structured_response}"
 
 
 
 
 
 
 
 
 
 
 
 
99
 
100
- prompt = self.format_prompt(user_input)
101
  response = self.client.text_generation(
102
  prompt,
103
  max_new_tokens=512,
@@ -105,4 +80,4 @@ class SchoolChatbot:
105
  top_p=0.9,
106
  stop_sequences=["<|end|>"]
107
  )
108
- return response.strip()
 
1
+
2
  from huggingface_hub import InferenceClient
3
  from config import BASE_MODEL, MY_MODEL, HF_TOKEN
4
  import pandas as pd
5
  import json
6
+ import re
7
+ from difflib import get_close_matches
8
 
9
  class SchoolChatbot:
10
  """
11
+ A chatbot that integrates structured school data and language generation to assist with Boston Public School queries.
 
 
 
 
12
  """
13
 
14
  def __init__(self):
15
+ model_id = MY_MODEL if MY_MODEL else BASE_MODEL
 
 
 
16
  self.client = InferenceClient(model=model_id, token=HF_TOKEN)
17
  self.df = pd.read_csv("bps_data.csv")
18
+ with open("cleaned_keyword_to_column_map.json") as f:
19
  self.keyword_map = json.load(f)
 
 
 
 
 
 
 
 
 
20
 
21
+ def format_prompt(self, user_input):
 
 
 
 
 
 
 
 
 
 
22
  return (
23
  "<|system|>You are a helpful assistant that specializes in Boston public school enrollment.<|end|>\n"
24
  f"<|user|>{user_input}<|end|>\n"
25
  "<|assistant|>"
26
  )
27
 
28
+ def extract_context_with_keywords(self, prompt, school_name=None):
29
+ def extract_keywords(text):
30
+ tokens = re.findall(r'\b\w+\b', text.lower())
31
+ matched = set()
32
+ for token in tokens:
33
+ matched.update(get_close_matches(token, self.keyword_map.keys(), cutoff=0.85))
34
+ return matched
35
 
36
+ matched_keywords = extract_keywords(prompt)
37
+ df_filtered = self.df
38
  if school_name:
39
  df_filtered = self.df[self.df["BPS_School_Name"].str.contains(school_name, case=False, na=False)]
 
40
  if df_filtered.empty:
41
+ return []
42
 
43
  row = df_filtered.iloc[0]
44
+ context_items = []
45
+ for kw in matched_keywords:
46
+ col = self.keyword_map.get(kw)
47
+ val = row.get(col) if col else None
48
+ if col and pd.notna(val):
49
+ context_items.append(f"The school's {kw} is {val.lower()}.")
50
+ return context_items
 
 
 
 
 
 
 
 
51
 
52
+ def get_response(self, user_input):
 
 
 
 
 
 
 
 
 
53
  matched_school = None
54
  for name in self.df["BPS_School_Name"].dropna():
55
  if name.lower() in user_input.lower():
56
  matched_school = name
57
  break
58
 
59
+ structured_facts = self.extract_context_with_keywords(user_input, matched_school)
60
 
61
+ if structured_facts:
62
+ natural_context = (
63
+ f"You know the following facts about {matched_school or 'a Boston public school'}:\n"
64
+ + "\n".join(f"- {fact}" for fact in structured_facts)
65
+ )
66
+ prompt = (
67
+ "<|system|>You are a helpful assistant that specializes in Boston public school enrollment. "
68
+ "Use any known facts about the school to answer helpfully.<|end|>\n"
69
+ f"<|user|>{user_input}<|end|>\n"
70
+ f"<|context|>{natural_context}<|end|>\n"
71
+ "<|assistant|>"
72
+ )
73
+ else:
74
+ prompt = self.format_prompt(user_input)
75
 
 
76
  response = self.client.text_generation(
77
  prompt,
78
  max_new_tokens=512,
 
80
  top_p=0.9,
81
  stop_sequences=["<|end|>"]
82
  )
83
+ return response.strip()