ellawang9 commited on
Commit
8e036bb
·
verified ·
1 Parent(s): f902b26

Update src/chat.py

Browse files
Files changed (1) hide show
  1. src/chat.py +30 -7
src/chat.py CHANGED
@@ -15,9 +15,31 @@ class SchoolChatbot:
15
  model_id = MY_MODEL if MY_MODEL else BASE_MODEL
16
  self.client = InferenceClient(model=model_id, token=HF_TOKEN)
17
  self.df = pd.read_csv("bps_data.csv")
18
- with open("keyword_to_column_map.json") as f:
19
  self.keyword_map = json.load(f)
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  def format_prompt(self, user_input):
22
  return (
23
  "<|system|>You are a helpful assistant that specializes in Boston public school enrollment.<|end|>\n"
@@ -25,6 +47,12 @@ class SchoolChatbot:
25
  "<|assistant|>"
26
  )
27
 
 
 
 
 
 
 
28
  def extract_context_with_keywords(self, prompt, school_name=None):
29
  def extract_keywords(text):
30
  tokens = re.findall(r'\b\w+\b', text.lower())
@@ -50,12 +78,7 @@ class SchoolChatbot:
50
  return context_items
51
 
52
  def get_response(self, user_input):
53
- matched_school = None
54
- for name in self.df["BPS_School_Name"].dropna():
55
- if name.lower() in user_input.lower():
56
- matched_school = name
57
- break
58
-
59
  structured_facts = self.extract_context_with_keywords(user_input, matched_school)
60
 
61
  if structured_facts:
 
15
  model_id = MY_MODEL if MY_MODEL else BASE_MODEL
16
  self.client = InferenceClient(model=model_id, token=HF_TOKEN)
17
  self.df = pd.read_csv("bps_data.csv")
18
+ with open("cleaned_keyword_to_column_map.json") as f:
19
  self.keyword_map = json.load(f)
20
 
21
+ # Create school name map with aliases
22
+ self.school_name_map = {}
23
+ for _, row in self.df.iterrows():
24
+ primary = row.get("BPS_School_Name")
25
+ hist = row.get("BPS_Historical_Name")
26
+ abbrev = row.get("SMMA_Abbreviated_Name")
27
+ if pd.notna(primary):
28
+ self.school_name_map[primary.lower()] = primary
29
+ if pd.notna(hist):
30
+ self.school_name_map[hist.lower()] = primary
31
+ if pd.notna(abbrev):
32
+ self.school_name_map[abbrev.lower()] = primary
33
+
34
+ # Add custom aliases
35
+ self.school_name_map.update({
36
+ "acc": "Another Course to College*",
37
+ "baldwin": "Baldwin Early Learning Pilot Academy",
38
+ "adams elementary": "Adams, Samuel Elementary",
39
+ "alighieri montessori": "Alighieri, Dante Montessori School",
40
+ "phineas bates": "Bates, Phineas Elementary"
41
+ })
42
+
43
  def format_prompt(self, user_input):
44
  return (
45
  "<|system|>You are a helpful assistant that specializes in Boston public school enrollment.<|end|>\n"
 
47
  "<|assistant|>"
48
  )
49
 
50
+ def match_school_name(self, query):
51
+ for key in self.school_name_map:
52
+ if key in query.lower():
53
+ return self.school_name_map[key]
54
+ return None
55
+
56
  def extract_context_with_keywords(self, prompt, school_name=None):
57
  def extract_keywords(text):
58
  tokens = re.findall(r'\b\w+\b', text.lower())
 
78
  return context_items
79
 
80
  def get_response(self, user_input):
81
+ matched_school = self.match_school_name(user_input)
 
 
 
 
 
82
  structured_facts = self.extract_context_with_keywords(user_input, matched_school)
83
 
84
  if structured_facts: