Spaces:
Running
Running
ahmadgenus
commited on
Commit
·
8721562
1
Parent(s):
5896773
all
Browse files- app.py +5 -1
- chatbot.py +3 -10
app.py
CHANGED
@@ -92,7 +92,11 @@ with posts_col:
|
|
92 |
comments_list = json.loads(comments) if isinstance(comments, str) else comments
|
93 |
except Exception:
|
94 |
comments_list = comments
|
95 |
-
|
|
|
|
|
|
|
|
|
96 |
post_url = metadata.get('url', "#")
|
97 |
subreddit = metadata.get('subreddit', 'N/A')
|
98 |
created_str = created_at.strftime('%Y-%m-%d %H:%M:%S')
|
|
|
92 |
comments_list = json.loads(comments) if isinstance(comments, str) else comments
|
93 |
except Exception:
|
94 |
comments_list = comments
|
95 |
+
# Convert metadata from JSON string to dict
|
96 |
+
try:
|
97 |
+
metadata = json.loads(metadata) if isinstance(metadata, str) else metadata
|
98 |
+
except Exception as e:
|
99 |
+
metadata = {}
|
100 |
post_url = metadata.get('url', "#")
|
101 |
subreddit = metadata.get('subreddit', 'N/A')
|
102 |
created_str = created_at.strftime('%Y-%m-%d %H:%M:%S')
|
chatbot.py
CHANGED
@@ -29,7 +29,7 @@ reddit = praw.Reddit(
|
|
29 |
client_secret=os.getenv("REDDIT_CLIENT_SECRET"),
|
30 |
user_agent=os.getenv("REDDIT_USER_AGENT")
|
31 |
)
|
32 |
-
|
33 |
# SQLite DB Connection
|
34 |
def get_db_conn():
|
35 |
return sqlite3.connect("reddit_data.db", check_same_thread=False)
|
@@ -60,7 +60,6 @@ def setup_db():
|
|
60 |
conn.close()
|
61 |
|
62 |
# Keyword filter
|
63 |
-
|
64 |
def keyword_in_post_or_comments(post, keyword):
|
65 |
keyword_lower = keyword.lower()
|
66 |
combined_text = (post.title + " " + post.selftext).lower()
|
@@ -73,7 +72,6 @@ def keyword_in_post_or_comments(post, keyword):
|
|
73 |
return False
|
74 |
|
75 |
# Fetch and process Reddit data
|
76 |
-
|
77 |
def fetch_reddit_data(keyword, days=7, limit=None):
|
78 |
end_time = datetime.utcnow()
|
79 |
start_time = end_time - timedelta(days=days)
|
@@ -110,7 +108,6 @@ def fetch_reddit_data(keyword, days=7, limit=None):
|
|
110 |
save_to_db(data)
|
111 |
|
112 |
# Save data into SQLite
|
113 |
-
|
114 |
def save_to_db(posts):
|
115 |
conn = get_db_conn()
|
116 |
cur = conn.cursor()
|
@@ -137,7 +134,6 @@ def save_to_db(posts):
|
|
137 |
conn.close()
|
138 |
|
139 |
# Retrieve similar context from DB
|
140 |
-
|
141 |
def retrieve_context(question, keyword, reddit_id=None, top_k=10):
|
142 |
lower_q = question.lower()
|
143 |
requested_top_k = 50 if any(word in lower_q for word in ["summarize", "overview", "all posts"]) else top_k
|
@@ -153,7 +149,7 @@ def retrieve_context(question, keyword, reddit_id=None, top_k=10):
|
|
153 |
else:
|
154 |
cur.execute("""
|
155 |
SELECT title, post_text, comments FROM reddit_posts
|
156 |
-
WHERE keyword = ? ORDER BY created_at DESC LIMIT ?;
|
157 |
""", (keyword, requested_top_k))
|
158 |
|
159 |
results = cur.fetchall()
|
@@ -162,7 +158,6 @@ def retrieve_context(question, keyword, reddit_id=None, top_k=10):
|
|
162 |
return results
|
163 |
|
164 |
# Summarizer
|
165 |
-
|
166 |
summarize_prompt = ChatPromptTemplate.from_template("""
|
167 |
You are a summarizer. Summarize the following context from Reddit posts into a concise summary that preserves the key insights. Do not add extra commentary.
|
168 |
|
@@ -174,7 +169,6 @@ Summary:
|
|
174 |
summarize_chain = LLMChain(llm=llm, prompt=summarize_prompt)
|
175 |
|
176 |
# Chatbot memory and prompt
|
177 |
-
|
178 |
memory = ConversationBufferMemory(memory_key="chat_history")
|
179 |
chat_prompt = ChatPromptTemplate.from_template("""
|
180 |
Chat History:
|
@@ -194,7 +188,6 @@ chat_chain = LLMChain(
|
|
194 |
)
|
195 |
|
196 |
# Chatbot response
|
197 |
-
|
198 |
def get_chatbot_response(question, keyword, reddit_id=None):
|
199 |
context_posts = retrieve_context(question, keyword, reddit_id)
|
200 |
context = "\n\n".join([f"{p[0]}:\n{p[1]}" for p in context_posts])
|
@@ -202,4 +195,4 @@ def get_chatbot_response(question, keyword, reddit_id=None):
|
|
202 |
context = summarize_chain.run({"context": context})
|
203 |
combined_input = f"Context:\n{context}\n\nUser Question: {question}"
|
204 |
response = chat_chain.run({"input": combined_input})
|
205 |
-
return response, context_posts
|
|
|
29 |
client_secret=os.getenv("REDDIT_CLIENT_SECRET"),
|
30 |
user_agent=os.getenv("REDDIT_USER_AGENT")
|
31 |
)
|
32 |
+
|
33 |
# SQLite DB Connection
|
34 |
def get_db_conn():
|
35 |
return sqlite3.connect("reddit_data.db", check_same_thread=False)
|
|
|
60 |
conn.close()
|
61 |
|
62 |
# Keyword filter
|
|
|
63 |
def keyword_in_post_or_comments(post, keyword):
|
64 |
keyword_lower = keyword.lower()
|
65 |
combined_text = (post.title + " " + post.selftext).lower()
|
|
|
72 |
return False
|
73 |
|
74 |
# Fetch and process Reddit data
|
|
|
75 |
def fetch_reddit_data(keyword, days=7, limit=None):
|
76 |
end_time = datetime.utcnow()
|
77 |
start_time = end_time - timedelta(days=days)
|
|
|
108 |
save_to_db(data)
|
109 |
|
110 |
# Save data into SQLite
|
|
|
111 |
def save_to_db(posts):
|
112 |
conn = get_db_conn()
|
113 |
cur = conn.cursor()
|
|
|
134 |
conn.close()
|
135 |
|
136 |
# Retrieve similar context from DB
|
|
|
137 |
def retrieve_context(question, keyword, reddit_id=None, top_k=10):
|
138 |
lower_q = question.lower()
|
139 |
requested_top_k = 50 if any(word in lower_q for word in ["summarize", "overview", "all posts"]) else top_k
|
|
|
149 |
else:
|
150 |
cur.execute("""
|
151 |
SELECT title, post_text, comments FROM reddit_posts
|
152 |
+
WHERE keyword = ? ORDER BY datetime(created_at) DESC LIMIT ?;
|
153 |
""", (keyword, requested_top_k))
|
154 |
|
155 |
results = cur.fetchall()
|
|
|
158 |
return results
|
159 |
|
160 |
# Summarizer
|
|
|
161 |
summarize_prompt = ChatPromptTemplate.from_template("""
|
162 |
You are a summarizer. Summarize the following context from Reddit posts into a concise summary that preserves the key insights. Do not add extra commentary.
|
163 |
|
|
|
169 |
summarize_chain = LLMChain(llm=llm, prompt=summarize_prompt)
|
170 |
|
171 |
# Chatbot memory and prompt
|
|
|
172 |
memory = ConversationBufferMemory(memory_key="chat_history")
|
173 |
chat_prompt = ChatPromptTemplate.from_template("""
|
174 |
Chat History:
|
|
|
188 |
)
|
189 |
|
190 |
# Chatbot response
|
|
|
191 |
def get_chatbot_response(question, keyword, reddit_id=None):
|
192 |
context_posts = retrieve_context(question, keyword, reddit_id)
|
193 |
context = "\n\n".join([f"{p[0]}:\n{p[1]}" for p in context_posts])
|
|
|
195 |
context = summarize_chain.run({"context": context})
|
196 |
combined_input = f"Context:\n{context}\n\nUser Question: {question}"
|
197 |
response = chat_chain.run({"input": combined_input})
|
198 |
+
return response, context_posts
|