Commit
·
4d88a84
1
Parent(s):
aedbc59
updated profile data
Browse files- README.md +127 -11
- Vector_storing.py +270 -0
- all_chunks.json +0 -0
- app.py +60 -75
- faiss_store/v61_600_150/index.faiss +0 -0
README.md
CHANGED
@@ -1,14 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
|
|
|
|
|
|
|
|
12 |
---
|
13 |
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# 🧠 Krishna's Personal AI Chatbot
|
2 |
+
|
3 |
+
A memory-grounded, retrieval-augmented AI assistant built with LangChain, FAISS, BM25, and Llama3 — personalized to Krishna Vamsi Dhulipalla’s career, projects, and technical profile.
|
4 |
+
|
5 |
+
> ⚡️ Ask me anything about Krishna — skills, experience, goals, or even what tools he used at Virginia Tech.
|
6 |
+
|
7 |
+
---
|
8 |
+
|
9 |
+
## 📌 Features
|
10 |
+
|
11 |
+
- ✅ **Hybrid Retrieval**: Combines dense vector search (FAISS) + keyword search (BM25) for precise, high-recall chunk selection
|
12 |
+
- 🤖 **LLM-Powered Pipelines**: Uses OpenAI GPT-4o and NVIDIA NIMs (e.g. LLaMA-3, Mixtral) for rewriting, validation, and final answer generation
|
13 |
+
- 🧠 **Memory Module**: Stores user preferences, recent topics, and inferred tone using a structured `KnowledgeBase` schema
|
14 |
+
- 🛠️ **Custom Architecture**:
|
15 |
+
- Query → Rewriting → Hybrid Retriever → Scope Validator → LLM Answer
|
16 |
+
- Fallback humor model (Mixtral) for out-of-scope queries
|
17 |
+
- 🧩 **Document Grounding**: Powered by Krishna’s actual markdown files like `profile.md`, `goals.md`, and `chatbot_architecture.md`
|
18 |
+
- 📊 **Enriched Vector Store**: Chunks include LLM-generated summaries and synthetic queries for better search performance
|
19 |
+
- 🎛️ **Gradio Frontend**: Responsive, markdown-formatted interface for natural, real-time interaction
|
20 |
+
|
21 |
+
---
|
22 |
+
|
23 |
+
## 🏗️ Architecture
|
24 |
+
|
25 |
+
```text
|
26 |
+
User Query
|
27 |
+
↓
|
28 |
+
[LLM1] → Rephrase into 3 diverse subqueries
|
29 |
+
↓
|
30 |
+
Hybrid Retrieval (BM25 + FAISS)
|
31 |
+
↓
|
32 |
+
[LLM2] → Classify: In-scope or Out-of-scope
|
33 |
+
↓
|
34 |
+
├─ In-scope → Top-k Chunks → GPT-4o
|
35 |
+
└─ Out-of-scope → Mixtral (funny fallback)
|
36 |
+
↓
|
37 |
+
Final Answer + Async Memory Update
|
38 |
+
```
|
39 |
+
|
40 |
---
|
41 |
+
|
42 |
+
## 📂 Project Structure
|
43 |
+
|
44 |
+
```
|
45 |
+
.
|
46 |
+
├── app.py # Main Gradio app and pipeline logic
|
47 |
+
├── Vector_storing.py # Chunking, LLM-based enrichment, and FAISS store creation
|
48 |
+
├── requirements.txt # Python package dependencies
|
49 |
+
├── faiss_store/ # Saved FAISS vector index
|
50 |
+
├── all_chunks.json # JSON of enriched document chunks
|
51 |
+
├── personal_data/ # Source markdown files (right now excluded)
|
52 |
+
├── README.md
|
53 |
+
```
|
54 |
+
|
55 |
---
|
56 |
|
57 |
+
## 🧠 Knowledge Sources
|
58 |
+
|
59 |
+
All answers are grounded in curated markdown files:
|
60 |
+
|
61 |
+
| File Name | Description |
|
62 |
+
| ------------------------- | ---------------------------------------------- |
|
63 |
+
| `profile.md` | Krishna’s full technical profile and education |
|
64 |
+
| `goals.md` | Short- and long-term personal goals |
|
65 |
+
| `chatbot_architecture.md` | System-level breakdown of this AI assistant |
|
66 |
+
| `personal_interests.md` | Hobbies, cultural identity, food preferences |
|
67 |
+
| `conversations.md` | Sample queries and expected response tone |
|
68 |
+
|
69 |
+
---
|
70 |
+
|
71 |
+
## 🧪 How It Works
|
72 |
+
|
73 |
+
1. **User input** is rewritten into subqueries (LLM1)
|
74 |
+
2. **Retriever** fetches relevant chunks using BM25 and FAISS
|
75 |
+
3. **Classifier LLM** decides if results are relevant to Krishna
|
76 |
+
4. **GPT-4o** generates final answer using top-k chunks
|
77 |
+
5. **Memory is updated** asynchronously with every turn
|
78 |
+
|
79 |
+
---
|
80 |
+
|
81 |
+
## 💬 Example Queries
|
82 |
+
|
83 |
+
- What programming languages does Krishna know?
|
84 |
+
- Tell me about Krishna’s chatbot architecture
|
85 |
+
- Can this chatbot explain Krishna's work at Virginia Tech?
|
86 |
+
- What tools has Krishna used for data engineering?
|
87 |
+
|
88 |
+
---
|
89 |
+
|
90 |
+
## 🚀 Setup & Usage
|
91 |
+
|
92 |
+
```bash
|
93 |
+
# 1. Clone the repo
|
94 |
+
git clone https://github.com/krishna-creator/krishna-personal-chatbot.git
|
95 |
+
cd krishna-personal-chatbot
|
96 |
+
|
97 |
+
# 2. Install dependencies
|
98 |
+
pip install -r requirements.txt
|
99 |
+
|
100 |
+
# 3. Set your API keys (OpenAI, NVIDIA)
|
101 |
+
export OPENAI_API_KEY=...
|
102 |
+
export NVIDIA_API_KEY=...
|
103 |
+
|
104 |
+
# 4. Launch the chatbot
|
105 |
+
python app.py
|
106 |
+
```
|
107 |
+
|
108 |
+
---
|
109 |
+
|
110 |
+
## 🔮 Model Stack
|
111 |
+
|
112 |
+
| Purpose | Model Name | Provider |
|
113 |
+
| ------------------ | ------------------------ | -------- |
|
114 |
+
| Query Rewriting | `phi-3-mini-4k-instruct` | NVIDIA |
|
115 |
+
| Scope Classifier | `llama-3-70b-instruct` | NVIDIA |
|
116 |
+
| Answer Generator | `gpt-4o` | OpenAI |
|
117 |
+
| Fallback Humor LLM | `mixtral-8x22b-instruct` | NVIDIA |
|
118 |
+
|
119 |
+
---
|
120 |
+
|
121 |
+
## 📌 Acknowledgments
|
122 |
+
|
123 |
+
- Built as part of Krishna's exploration into **LLM orchestration and agentic RAG**
|
124 |
+
- Inspired by LangChain, SentenceTransformers, and NVIDIA RAG Agents Course
|
125 |
+
|
126 |
+
---
|
127 |
+
|
128 |
+
## 📜 License
|
129 |
+
|
130 |
+
MIT License © Krishna Vamsi Dhulipalla
|
Vector_storing.py
ADDED
@@ -0,0 +1,270 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import re
|
3 |
+
import json
|
4 |
+
import hashlib
|
5 |
+
from pathlib import Path
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
8 |
+
from langchain_community.vectorstores import FAISS
|
9 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
10 |
+
from langchain_nvidia_ai_endpoints import ChatNVIDIA
|
11 |
+
|
12 |
+
# === UTILS ===
|
13 |
+
def hash_text(text):
|
14 |
+
return hashlib.md5(text.encode()).hexdigest()[:8]
|
15 |
+
|
16 |
+
def fix_json_text(text):
|
17 |
+
# Normalize quotes and extract clean JSON
|
18 |
+
text = text.replace("“", '"').replace("”", '"').replace("‘", "'").replace("’", "'")
|
19 |
+
match = re.search(r'\{.*\}', text, re.DOTALL)
|
20 |
+
return match.group(0) if match else text
|
21 |
+
|
22 |
+
def enrich_chunk_with_llm(text, llm):
|
23 |
+
prompt = f"""You're a helpful assistant optimizing document retrieval.
|
24 |
+
|
25 |
+
Every document you see is about Krishna Vamsi Dhulipalla.
|
26 |
+
|
27 |
+
Here’s a document chunk:
|
28 |
+
{text}
|
29 |
+
|
30 |
+
1. Summarize the key content of this chunk in 1–2 sentences, assuming the overall context is about Krishna.
|
31 |
+
2. Generate 3 natural-language questions that a user might ask to which this chunk would be a relevant answer, focusing on Krishna-related topics.
|
32 |
+
|
33 |
+
Respond in JSON:
|
34 |
+
{{
|
35 |
+
"summary": "...",
|
36 |
+
"synthetic_queries": ["...", "...", "..."]
|
37 |
+
}}"""
|
38 |
+
|
39 |
+
response = llm.invoke(prompt)
|
40 |
+
content = getattr(response, "content", "").strip()
|
41 |
+
|
42 |
+
if not content:
|
43 |
+
raise ValueError("⚠️ LLM returned empty response")
|
44 |
+
|
45 |
+
fixed = fix_json_text(content)
|
46 |
+
try:
|
47 |
+
return json.loads(fixed)
|
48 |
+
except Exception as e:
|
49 |
+
raise ValueError(f"Invalid JSON from LLM: {e}\n--- Raw Output ---\n{content}")
|
50 |
+
|
51 |
+
# === MAIN FUNCTION ===
|
52 |
+
def create_faiss_store(
|
53 |
+
md_dir="./personal_data",
|
54 |
+
chunk_size=600,
|
55 |
+
chunk_overlap=150,
|
56 |
+
persist_dir="./faiss_store",
|
57 |
+
chunk_save_path="all_chunks.json",
|
58 |
+
llm=None
|
59 |
+
):
|
60 |
+
splitter = RecursiveCharacterTextSplitter(
|
61 |
+
chunk_size=chunk_size,
|
62 |
+
chunk_overlap=chunk_overlap,
|
63 |
+
separators=["\n# ", "\n## ", "\n### ", "\n#### ", "\n\n", "\n- ", "\n", ". ", " "],
|
64 |
+
keep_separator=True,
|
65 |
+
length_function=len, # Consider switching to tokenizer-based later
|
66 |
+
is_separator_regex=False
|
67 |
+
)
|
68 |
+
|
69 |
+
docs, all_chunks, failed_chunks = [], [], []
|
70 |
+
|
71 |
+
for md_file in Path(md_dir).glob("*.md"):
|
72 |
+
with open(md_file, "r", encoding="utf-8") as f:
|
73 |
+
content = f.read().strip()
|
74 |
+
if not content:
|
75 |
+
continue
|
76 |
+
content = re.sub(r'\n#+(\w)', r'\n# \1', content)
|
77 |
+
docs.append({
|
78 |
+
"content": content,
|
79 |
+
"metadata": {
|
80 |
+
"source": md_file.name,
|
81 |
+
"header": content.split('\n')[0]
|
82 |
+
}
|
83 |
+
})
|
84 |
+
|
85 |
+
for doc in docs:
|
86 |
+
try:
|
87 |
+
chunks = splitter.split_text(doc["content"])
|
88 |
+
except Exception as e:
|
89 |
+
print(f"❌ Error splitting {doc['metadata']['source']}: {e}")
|
90 |
+
continue
|
91 |
+
|
92 |
+
for i, chunk in enumerate(chunks):
|
93 |
+
chunk = chunk.strip()
|
94 |
+
if len(chunk) < 50:
|
95 |
+
continue
|
96 |
+
|
97 |
+
chunk_id = f"{doc['metadata']['source']}_#{i}_{hash_text(chunk)}"
|
98 |
+
metadata = {
|
99 |
+
**doc["metadata"],
|
100 |
+
"chunk_id": chunk_id,
|
101 |
+
"has_header": chunk.startswith("#"),
|
102 |
+
"word_count": len(chunk.split())
|
103 |
+
}
|
104 |
+
|
105 |
+
try:
|
106 |
+
print("🔍 Processing chunk:", chunk_id)
|
107 |
+
enriched = enrich_chunk_with_llm(chunk, llm)
|
108 |
+
summary = enriched.get("summary", "")
|
109 |
+
questions = enriched.get("synthetic_queries", [])
|
110 |
+
|
111 |
+
metadata.update({
|
112 |
+
"summary": summary,
|
113 |
+
"synthetic_queries": questions
|
114 |
+
})
|
115 |
+
|
116 |
+
enriched_text = (
|
117 |
+
f"{chunk}\n\n"
|
118 |
+
f"---\n"
|
119 |
+
f"🔹 Summary:\n{summary}\n\n"
|
120 |
+
f"🔸 Related Questions:\n" + "\n".join(f"- {q}" for q in questions)
|
121 |
+
)
|
122 |
+
|
123 |
+
all_chunks.append({
|
124 |
+
"text": enriched_text,
|
125 |
+
"metadata": metadata
|
126 |
+
})
|
127 |
+
except Exception as e:
|
128 |
+
print(f"⚠️ LLM failed for {chunk_id}: {e}")
|
129 |
+
failed_chunks.append(f"{chunk_id} → {str(e)}")
|
130 |
+
|
131 |
+
print(f"✅ Markdown files processed: {len(docs)}")
|
132 |
+
print(f"✅ Chunks created: {len(all_chunks)} | ⚠️ Failed: {len(failed_chunks)}")
|
133 |
+
|
134 |
+
# Save enriched chunks
|
135 |
+
with open(chunk_save_path, "w", encoding="utf-8") as f:
|
136 |
+
json.dump(all_chunks, f, indent=2, ensure_ascii=False)
|
137 |
+
print(f"📁 Saved enriched chunks → {chunk_save_path}")
|
138 |
+
|
139 |
+
os.makedirs(persist_dir, exist_ok=True)
|
140 |
+
version_tag = f"v{len(all_chunks)}_{chunk_size}_{chunk_overlap}"
|
141 |
+
save_path = os.path.join(persist_dir, version_tag)
|
142 |
+
os.makedirs(save_path, exist_ok=True)
|
143 |
+
|
144 |
+
embeddings = HuggingFaceEmbeddings(
|
145 |
+
model_name="sentence-transformers/all-MiniLM-L6-v2",
|
146 |
+
model_kwargs={"device": "cpu"},
|
147 |
+
encode_kwargs={"normalize_embeddings": True}
|
148 |
+
)
|
149 |
+
|
150 |
+
vector_store = FAISS.from_texts(
|
151 |
+
texts=[chunk["text"] for chunk in all_chunks],
|
152 |
+
embedding=embeddings,
|
153 |
+
metadatas=[chunk["metadata"] for chunk in all_chunks]
|
154 |
+
)
|
155 |
+
vector_store.save_local(save_path)
|
156 |
+
|
157 |
+
print(f"✅ FAISS index saved at: {save_path}")
|
158 |
+
avg_len = sum(len(c['text']) for c in all_chunks) / len(all_chunks) if all_chunks else 0
|
159 |
+
print(f"📊 Stats → Chunks: {len(all_chunks)} | Avg length: {avg_len:.1f} characters")
|
160 |
+
|
161 |
+
if failed_chunks:
|
162 |
+
with open("failed_chunks.txt", "w") as f:
|
163 |
+
for line in failed_chunks:
|
164 |
+
f.write(line + "\n")
|
165 |
+
print("📝 Failed chunk IDs saved to failed_chunks.txt")
|
166 |
+
|
167 |
+
dotenv_path = os.path.join(os.getcwd(), ".env")
|
168 |
+
load_dotenv(dotenv_path)
|
169 |
+
api_key = os.getenv("NVIDIA_API_KEY")
|
170 |
+
os.environ["NVIDIA_API_KEY"] = api_key
|
171 |
+
# Initialize the model
|
172 |
+
llm = ChatNVIDIA(model="nvidia/llama-3.1-nemotron-70b-instruct")
|
173 |
+
|
174 |
+
create_faiss_store(
|
175 |
+
md_dir="./personal_data",
|
176 |
+
chunk_size=600,
|
177 |
+
chunk_overlap=150,
|
178 |
+
persist_dir="./faiss_store",
|
179 |
+
llm=llm
|
180 |
+
)
|
181 |
+
|
182 |
+
|
183 |
+
|
184 |
+
|
185 |
+
#
|
186 |
+
# from langchain.text_splitter import (
|
187 |
+
# RecursiveCharacterTextSplitter,
|
188 |
+
# MarkdownHeaderTextSplitter
|
189 |
+
# )
|
190 |
+
# from langchain.embeddings import HuggingFaceEmbeddings
|
191 |
+
# from langchain.vectorstores import FAISS
|
192 |
+
# from langchain.docstore.document import Document
|
193 |
+
# from transformers import AutoTokenizer
|
194 |
+
# from pathlib import Path
|
195 |
+
# import os
|
196 |
+
# from typing import List
|
197 |
+
|
198 |
+
# def prepare_vectorstore(
|
199 |
+
# base_path: str,
|
200 |
+
# faiss_path: str,
|
201 |
+
# use_markdown_headers: bool = True,
|
202 |
+
# chunk_size: int = 600,
|
203 |
+
# chunk_overlap: int = 150,
|
204 |
+
# model_name: str = "sentence-transformers/all-MiniLM-L6-v2",
|
205 |
+
# verbose: bool = True
|
206 |
+
# ) -> FAISS:
|
207 |
+
# docs = []
|
208 |
+
# for md_file in Path(base_path).glob("*.md"):
|
209 |
+
# with open(md_file, "r", encoding="utf-8") as f:
|
210 |
+
# content = f.read()
|
211 |
+
# metadata = {
|
212 |
+
# "source": md_file.name,
|
213 |
+
# "file_type": "markdown",
|
214 |
+
# "created_at": md_file.stat().st_ctime
|
215 |
+
# }
|
216 |
+
# docs.append(Document(page_content=content, metadata=metadata))
|
217 |
+
|
218 |
+
# # Optional Markdown-aware splitting
|
219 |
+
# if use_markdown_headers:
|
220 |
+
# header_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=[
|
221 |
+
# ("#", "h1"), ("##", "h2"), ("###", "h3")
|
222 |
+
# ])
|
223 |
+
# structured_chunks = []
|
224 |
+
# for doc in docs:
|
225 |
+
# splits = header_splitter.split_text(doc.page_content)
|
226 |
+
# for chunk in splits:
|
227 |
+
# chunk.metadata.update(doc.metadata)
|
228 |
+
# structured_chunks.append(chunk)
|
229 |
+
# else:
|
230 |
+
# structured_chunks = docs
|
231 |
+
|
232 |
+
# # Tokenizer-based recursive splitting
|
233 |
+
# tokenizer = AutoTokenizer.from_pretrained(model_name)
|
234 |
+
# recursive_splitter = RecursiveCharacterTextSplitter(
|
235 |
+
# chunk_size=chunk_size,
|
236 |
+
# chunk_overlap=chunk_overlap,
|
237 |
+
# length_function=lambda text: len(tokenizer.encode(text)),
|
238 |
+
# separators=["\n## ", "\n### ", "\n\n", "\n", ". "]
|
239 |
+
# )
|
240 |
+
|
241 |
+
# final_chunks: List[Document] = []
|
242 |
+
# for chunk in structured_chunks:
|
243 |
+
# sub_chunks = recursive_splitter.split_text(chunk.page_content)
|
244 |
+
# for i, sub in enumerate(sub_chunks):
|
245 |
+
# final_chunks.append(Document(
|
246 |
+
# page_content=sub,
|
247 |
+
# metadata={**chunk.metadata, "sub_chunk": i}
|
248 |
+
# ))
|
249 |
+
|
250 |
+
# if verbose:
|
251 |
+
# print(f"✅ Total chunks after splitting: {len(final_chunks)}")
|
252 |
+
# print(f"📁 Storing to: {faiss_path}")
|
253 |
+
|
254 |
+
# embedding_model = HuggingFaceEmbeddings(model_name=model_name)
|
255 |
+
# vectorstore = FAISS.from_documents(final_chunks, embedding_model)
|
256 |
+
# vectorstore.save_local(faiss_path)
|
257 |
+
|
258 |
+
# if verbose:
|
259 |
+
# print(f"✅ FAISS vectorstore saved at: {os.path.abspath(faiss_path)}")
|
260 |
+
|
261 |
+
# return vectorstore
|
262 |
+
|
263 |
+
# vectorstore = prepare_vectorstore(
|
264 |
+
# base_path="./personal_data",
|
265 |
+
# faiss_path="krishna_vectorstore_hybrid",
|
266 |
+
# use_markdown_headers=True,
|
267 |
+
# chunk_size=600,
|
268 |
+
# chunk_overlap=150,
|
269 |
+
# verbose=True
|
270 |
+
# )
|
all_chunks.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
app.py
CHANGED
@@ -3,9 +3,8 @@ import json
|
|
3 |
import re
|
4 |
import hashlib
|
5 |
import gradio as gr
|
6 |
-
import threading
|
7 |
from functools import partial
|
8 |
-
import
|
9 |
from collections import defaultdict
|
10 |
from pathlib import Path
|
11 |
from typing import List, Dict, Any, Optional, List, Literal, Type
|
@@ -19,13 +18,10 @@ from langchain_nvidia_ai_endpoints import ChatNVIDIA
|
|
19 |
from langchain_core.output_parsers import StrOutputParser
|
20 |
from langchain_core.prompts import ChatPromptTemplate
|
21 |
from langchain.schema.runnable.passthrough import RunnableAssign
|
22 |
-
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
23 |
from langchain_huggingface import HuggingFaceEmbeddings
|
24 |
from langchain.vectorstores import FAISS
|
25 |
-
from langchain.docstore.document import Document
|
26 |
from langchain.retrievers import BM25Retriever
|
27 |
from langchain_openai import ChatOpenAI
|
28 |
-
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
29 |
from langchain.output_parsers import PydanticOutputParser
|
30 |
|
31 |
#dotenv_path = os.path.join(os.getcwd(), ".env")
|
@@ -38,7 +34,7 @@ if not api_key:
|
|
38 |
raise RuntimeError("🚨 NVIDIA_API_KEY not found in environment! Please add it in Hugging Face Secrets.")
|
39 |
|
40 |
# Constants
|
41 |
-
FAISS_PATH = "faiss_store/
|
42 |
CHUNKS_PATH = "all_chunks.json"
|
43 |
|
44 |
if not Path(FAISS_PATH).exists():
|
@@ -47,13 +43,16 @@ if not Path(FAISS_PATH).exists():
|
|
47 |
if not Path(CHUNKS_PATH).exists():
|
48 |
raise FileNotFoundError(f"Chunks file not found at {CHUNKS_PATH}")
|
49 |
|
50 |
-
KRISHNA_BIO = """Krishna Vamsi Dhulipalla completed
|
51 |
|
52 |
He has led projects involving retrieval-augmented generation (RAG), feature selection for genomic classification, fine-tuning domain-specific LLMs (e.g., DNABERT, HyenaDNA), and real-time forecasting systems using Kafka, Spark, and Airflow. His cloud proficiency spans AWS (S3, SageMaker, ECS, CloudWatch), GCP (BigQuery, Cloud Composer), and DevOps tools like Docker, Kubernetes, and MLflow.
|
53 |
|
54 |
Krishna’s research has focused on genomic sequence modeling, transformer optimization, MLOps automation, and cross-domain generalization. He has published work in bioinformatics and machine learning applications for circadian transcription prediction and transcription factor binding.
|
55 |
|
56 |
-
He holds certifications in NVIDIA’s RAG Agents with LLMs, Google Cloud Data Engineering, and AWS ML Specialization. Krishna is passionate about scalable LLM infrastructure, data-centric AI, and domain-adaptive ML solutions — combining deep technical expertise with real-world engineering impact.
|
|
|
|
|
|
|
57 |
|
58 |
def initialize_console():
|
59 |
console = Console()
|
@@ -62,6 +61,12 @@ def initialize_console():
|
|
62 |
|
63 |
pprint = initialize_console()
|
64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
def load_chunks_from_json(path: str = CHUNKS_PATH) -> List[Dict]:
|
66 |
with open(path, "r", encoding="utf-8") as f:
|
67 |
return json.load(f)
|
@@ -111,12 +116,16 @@ answer_llm = ChatOpenAI(
|
|
111 |
|
112 |
# Prompts
|
113 |
repharser_prompt = ChatPromptTemplate.from_template(
|
114 |
-
"
|
115 |
-
"
|
116 |
-
"\n
|
117 |
-
"
|
|
|
|
|
|
|
|
|
|
|
118 |
"2."
|
119 |
-
"3."
|
120 |
)
|
121 |
|
122 |
relevance_prompt = ChatPromptTemplate.from_template("""
|
@@ -201,19 +210,11 @@ answer_prompt_relevant = ChatPromptTemplate.from_template(
|
|
201 |
"- You may use general knowledge to briefly explain tools (like PyTorch or Kafka), but **do not invent any new facts** about Krishna.\n"
|
202 |
"- Avoid filler phrases, repetition, or generic praise (e.g., strengths) unless directly asked.\n"
|
203 |
"- End with a friendly follow-up question (no subheading needed here).\n\n"
|
204 |
-
"Example:\n"
|
205 |
-
"**Q: What work experience does Krishna have?**\n"
|
206 |
-
"**A:**\n"
|
207 |
-
"**🔧 Work Experience Overview**\n"
|
208 |
-
"**1. UJR Technologies** – Migrated batch ETL to real-time (Kafka/Spark), Dockerized services, and optimized Snowflake queries.\n"
|
209 |
-
"**2. Virginia Tech** – Built real-time IoT forecasting pipeline (10K sensors, GPT-4), achieving 91% accuracy and 15% energy savings.\n\n"
|
210 |
-
"_Would you like to dive into Krishna’s cloud deployment work using SageMaker and MLflow?_\n\n"
|
211 |
"Now generate the answer for the following:\n\n"
|
212 |
"User Question:\n{query}\n\n"
|
213 |
"Answer:"
|
214 |
)
|
215 |
|
216 |
-
|
217 |
answer_prompt_fallback = ChatPromptTemplate.from_template(
|
218 |
"You are Krishna’s personal AI assistant. The user asked a question unrelated to Krishna’s background.\n"
|
219 |
"Respond with a touch of humor, then guide the conversation back to Krishna’s actual skills, experiences, or projects.\n\n"
|
@@ -239,17 +240,15 @@ parser_prompt = ChatPromptTemplate.from_template(
|
|
239 |
# Helper Functions
|
240 |
def parse_rewrites(raw_response: str) -> list[str]:
|
241 |
lines = raw_response.strip().split("\n")
|
242 |
-
return [line.strip("0123456789. ").strip() for line in lines if line.strip()][:
|
243 |
|
244 |
def hybrid_retrieve(inputs, exclude_terms=None):
|
245 |
-
# if exclude_terms is None:
|
246 |
-
# exclude_terms = ["cgpa", "university", "b.tech", "m.s.", "certification", "coursera", "edx", "goal", "aspiration", "linkedin", "publication", "ieee", "doi", "degree"]
|
247 |
bm25_retriever = inputs["bm25_retriever"]
|
248 |
all_queries = inputs["all_queries"]
|
249 |
bm25_retriever.k = inputs["k_per_query"]
|
250 |
vectorstore = inputs["vectorstore"]
|
251 |
alpha = inputs["alpha"]
|
252 |
-
top_k = inputs.get("top_k",
|
253 |
k_per_query = inputs["k_per_query"]
|
254 |
|
255 |
scored_chunks = defaultdict(lambda: {
|
@@ -258,45 +257,37 @@ def hybrid_retrieve(inputs, exclude_terms=None):
|
|
258 |
"content": None,
|
259 |
"metadata": None,
|
260 |
})
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
# Vector retrieval
|
265 |
-
vec_hits = vectorstore.similarity_search_with_score(subquery, k=k_per_query)
|
266 |
-
vec_results = []
|
267 |
-
for doc, score in vec_hits:
|
268 |
-
key = hashlib.md5(doc.page_content.encode("utf-8")).hexdigest()
|
269 |
-
vec_results.append((key, doc, score))
|
270 |
-
|
271 |
-
# BM25 retrieval
|
272 |
bm_hits = bm25_retriever.invoke(subquery)
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
|
|
|
|
|
|
|
|
|
|
279 |
return vec_results, bm_results
|
280 |
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
scored_chunks[key]["bm25_score"] += bm_score
|
296 |
-
scored_chunks[key]["content"] = doc.page_content
|
297 |
-
scored_chunks[key]["metadata"] = doc.metadata
|
298 |
-
|
299 |
-
# Rest of the scoring and filtering logic remains the same
|
300 |
all_vec_means = [np.mean(v["vector_scores"]) for v in scored_chunks.values() if v["vector_scores"]]
|
301 |
max_vec = max(all_vec_means) if all_vec_means else 1
|
302 |
min_vec = min(all_vec_means) if all_vec_means else 0
|
@@ -304,23 +295,18 @@ def hybrid_retrieve(inputs, exclude_terms=None):
|
|
304 |
final_results = []
|
305 |
for chunk in scored_chunks.values():
|
306 |
vec_score = np.mean(chunk["vector_scores"]) if chunk["vector_scores"] else 0.0
|
307 |
-
norm_vec = (vec_score - min_vec) / (max_vec - min_vec)
|
308 |
bm25_score = chunk["bm25_score"] / len(all_queries)
|
309 |
final_score = alpha * norm_vec + (1 - alpha) * bm25_score
|
310 |
|
311 |
content = chunk["content"].lower()
|
312 |
-
if final_score < 0.
|
313 |
continue
|
314 |
|
315 |
final_results.append({
|
316 |
"content": chunk["content"],
|
317 |
"source": chunk["metadata"].get("source", ""),
|
318 |
-
"final_score": float(round(final_score, 4))
|
319 |
-
"vector_score": float(round(vec_score, 4)),
|
320 |
-
"bm25_score": float(round(bm25_score, 4)),
|
321 |
-
"metadata": chunk["metadata"],
|
322 |
-
"summary": chunk["metadata"].get("summary", ""),
|
323 |
-
"synthetic_queries": chunk["metadata"].get("synthetic_queries", [])
|
324 |
})
|
325 |
|
326 |
final_results = sorted(final_results, key=lambda x: x["final_score"], reverse=True)
|
@@ -477,8 +463,8 @@ def chat_interface(message, history):
|
|
477 |
"query": message,
|
478 |
"all_queries": [message],
|
479 |
"all_texts": all_chunks,
|
480 |
-
"k_per_query":
|
481 |
-
"alpha": 0.
|
482 |
"vectorstore": vectorstore,
|
483 |
"bm25_retriever": bm25_retriever,
|
484 |
}
|
@@ -497,7 +483,6 @@ def chat_interface(message, history):
|
|
497 |
|
498 |
# After streaming completes, update KB in background thread
|
499 |
if full_response:
|
500 |
-
import threading
|
501 |
update_thread = threading.Thread(
|
502 |
target=update_knowledge_base,
|
503 |
args=(message, full_response),
|
@@ -549,9 +534,9 @@ demo = gr.ChatInterface(
|
|
549 |
title="💬 Ask Krishna's AI Assistant",
|
550 |
description="💡 Ask anything about Krishna Vamsi Dhulipalla",
|
551 |
examples=[
|
552 |
-
"
|
553 |
-
"What
|
554 |
-
"
|
555 |
],
|
556 |
)
|
557 |
|
|
|
3 |
import re
|
4 |
import hashlib
|
5 |
import gradio as gr
|
|
|
6 |
from functools import partial
|
7 |
+
import threading
|
8 |
from collections import defaultdict
|
9 |
from pathlib import Path
|
10 |
from typing import List, Dict, Any, Optional, List, Literal, Type
|
|
|
18 |
from langchain_core.output_parsers import StrOutputParser
|
19 |
from langchain_core.prompts import ChatPromptTemplate
|
20 |
from langchain.schema.runnable.passthrough import RunnableAssign
|
|
|
21 |
from langchain_huggingface import HuggingFaceEmbeddings
|
22 |
from langchain.vectorstores import FAISS
|
|
|
23 |
from langchain.retrievers import BM25Retriever
|
24 |
from langchain_openai import ChatOpenAI
|
|
|
25 |
from langchain.output_parsers import PydanticOutputParser
|
26 |
|
27 |
#dotenv_path = os.path.join(os.getcwd(), ".env")
|
|
|
34 |
raise RuntimeError("🚨 NVIDIA_API_KEY not found in environment! Please add it in Hugging Face Secrets.")
|
35 |
|
36 |
# Constants
|
37 |
+
FAISS_PATH = "faiss_store/v61_600_150"
|
38 |
CHUNKS_PATH = "all_chunks.json"
|
39 |
|
40 |
if not Path(FAISS_PATH).exists():
|
|
|
43 |
if not Path(CHUNKS_PATH).exists():
|
44 |
raise FileNotFoundError(f"Chunks file not found at {CHUNKS_PATH}")
|
45 |
|
46 |
+
KRISHNA_BIO = """Krishna Vamsi Dhulipalla completed masters in Computer Science at Virginia Tech, awarded degree in december 2024, with over 3 years of experience across data engineering, machine learning research, and real-time analytics. He specializes in building scalable data systems and intelligent LLM-powered applications, with strong expertise in Python, PyTorch, Hugging Face Transformers, and end-to-end ML pipelines.
|
47 |
|
48 |
He has led projects involving retrieval-augmented generation (RAG), feature selection for genomic classification, fine-tuning domain-specific LLMs (e.g., DNABERT, HyenaDNA), and real-time forecasting systems using Kafka, Spark, and Airflow. His cloud proficiency spans AWS (S3, SageMaker, ECS, CloudWatch), GCP (BigQuery, Cloud Composer), and DevOps tools like Docker, Kubernetes, and MLflow.
|
49 |
|
50 |
Krishna’s research has focused on genomic sequence modeling, transformer optimization, MLOps automation, and cross-domain generalization. He has published work in bioinformatics and machine learning applications for circadian transcription prediction and transcription factor binding.
|
51 |
|
52 |
+
He holds certifications in NVIDIA’s RAG Agents with LLMs, Google Cloud Data Engineering, and AWS ML Specialization. Krishna is passionate about scalable LLM infrastructure, data-centric AI, and domain-adaptive ML solutions — combining deep technical expertise with real-world engineering impact.
|
53 |
+
\n\n
|
54 |
+
Beside carrer, Krishna loves hiking, cricket, and exploring new technologies. He is big fan of Marvel Movies and Space exploration.
|
55 |
+
"""
|
56 |
|
57 |
def initialize_console():
|
58 |
console = Console()
|
|
|
61 |
|
62 |
pprint = initialize_console()
|
63 |
|
64 |
+
def PPrint(preface="State: "):
|
65 |
+
def print_and_return(x, preface=""):
|
66 |
+
pprint(preface, x)
|
67 |
+
return x
|
68 |
+
return RunnableLambda(partial(print_and_return, preface=preface))
|
69 |
+
|
70 |
def load_chunks_from_json(path: str = CHUNKS_PATH) -> List[Dict]:
|
71 |
with open(path, "r", encoding="utf-8") as f:
|
72 |
return json.load(f)
|
|
|
116 |
|
117 |
# Prompts
|
118 |
repharser_prompt = ChatPromptTemplate.from_template(
|
119 |
+
"You are a smart retrieval assistant. Rewrite the user's question into 2 different variants optimized for hybrid retrieval systems (BM25 + dense vectors).\n\n"
|
120 |
+
"Your rewrites should:\n"
|
121 |
+
"- Vary tone and phrasing\n"
|
122 |
+
"- Expand or clarify intent if implicit\n"
|
123 |
+
"- Include helpful keywords, synonyms, or topic-specific terms if possible\n"
|
124 |
+
"- Be semantically close but diverse enough to match different chunks in the knowledge base\n\n"
|
125 |
+
"Original Question:\n{query}\n\n"
|
126 |
+
"Rewrites:\n"
|
127 |
+
"1.\n"
|
128 |
"2."
|
|
|
129 |
)
|
130 |
|
131 |
relevance_prompt = ChatPromptTemplate.from_template("""
|
|
|
210 |
"- You may use general knowledge to briefly explain tools (like PyTorch or Kafka), but **do not invent any new facts** about Krishna.\n"
|
211 |
"- Avoid filler phrases, repetition, or generic praise (e.g., strengths) unless directly asked.\n"
|
212 |
"- End with a friendly follow-up question (no subheading needed here).\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
213 |
"Now generate the answer for the following:\n\n"
|
214 |
"User Question:\n{query}\n\n"
|
215 |
"Answer:"
|
216 |
)
|
217 |
|
|
|
218 |
answer_prompt_fallback = ChatPromptTemplate.from_template(
|
219 |
"You are Krishna’s personal AI assistant. The user asked a question unrelated to Krishna’s background.\n"
|
220 |
"Respond with a touch of humor, then guide the conversation back to Krishna’s actual skills, experiences, or projects.\n\n"
|
|
|
240 |
# Helper Functions
|
241 |
def parse_rewrites(raw_response: str) -> list[str]:
|
242 |
lines = raw_response.strip().split("\n")
|
243 |
+
return [line.strip("0123456789. ").strip() for line in lines if line.strip()][:2]
|
244 |
|
245 |
def hybrid_retrieve(inputs, exclude_terms=None):
|
|
|
|
|
246 |
bm25_retriever = inputs["bm25_retriever"]
|
247 |
all_queries = inputs["all_queries"]
|
248 |
bm25_retriever.k = inputs["k_per_query"]
|
249 |
vectorstore = inputs["vectorstore"]
|
250 |
alpha = inputs["alpha"]
|
251 |
+
top_k = inputs.get("top_k", 30)
|
252 |
k_per_query = inputs["k_per_query"]
|
253 |
|
254 |
scored_chunks = defaultdict(lambda: {
|
|
|
257 |
"content": None,
|
258 |
"metadata": None,
|
259 |
})
|
260 |
+
|
261 |
+
def process_subquery(subquery, k=k_per_query):
|
262 |
+
vec_hits = vectorstore.similarity_search_with_score(subquery, k=k)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
263 |
bm_hits = bm25_retriever.invoke(subquery)
|
264 |
+
|
265 |
+
vec_results = [
|
266 |
+
(hashlib.md5(doc.page_content.encode("utf-8")).hexdigest(), doc, score)
|
267 |
+
for doc, score in vec_hits
|
268 |
+
]
|
269 |
+
|
270 |
+
bm_results = [
|
271 |
+
(hashlib.md5(doc.page_content.encode("utf-8")).hexdigest(), doc, 1.0 / (rank + 1))
|
272 |
+
for rank, doc in enumerate(bm_hits)
|
273 |
+
]
|
274 |
+
|
275 |
return vec_results, bm_results
|
276 |
|
277 |
+
# Process each subquery serially
|
278 |
+
for subquery in all_queries:
|
279 |
+
vec_results, bm_results = process_subquery(subquery)
|
280 |
+
|
281 |
+
for key, doc, vec_score in vec_results:
|
282 |
+
scored_chunks[key]["vector_scores"].append(vec_score)
|
283 |
+
scored_chunks[key]["content"] = doc.page_content
|
284 |
+
scored_chunks[key]["metadata"] = doc.metadata
|
285 |
+
|
286 |
+
for key, doc, bm_score in bm_results:
|
287 |
+
scored_chunks[key]["bm25_score"] += bm_score
|
288 |
+
scored_chunks[key]["content"] = doc.page_content
|
289 |
+
scored_chunks[key]["metadata"] = doc.metadata
|
290 |
+
|
|
|
|
|
|
|
|
|
|
|
291 |
all_vec_means = [np.mean(v["vector_scores"]) for v in scored_chunks.values() if v["vector_scores"]]
|
292 |
max_vec = max(all_vec_means) if all_vec_means else 1
|
293 |
min_vec = min(all_vec_means) if all_vec_means else 0
|
|
|
295 |
final_results = []
|
296 |
for chunk in scored_chunks.values():
|
297 |
vec_score = np.mean(chunk["vector_scores"]) if chunk["vector_scores"] else 0.0
|
298 |
+
norm_vec = 0.5 if max_vec == min_vec else (vec_score - min_vec) / (max_vec - min_vec)
|
299 |
bm25_score = chunk["bm25_score"] / len(all_queries)
|
300 |
final_score = alpha * norm_vec + (1 - alpha) * bm25_score
|
301 |
|
302 |
content = chunk["content"].lower()
|
303 |
+
if final_score < 0.01 or len(content.strip()) < 40:
|
304 |
continue
|
305 |
|
306 |
final_results.append({
|
307 |
"content": chunk["content"],
|
308 |
"source": chunk["metadata"].get("source", ""),
|
309 |
+
"final_score": float(round(final_score, 4))
|
|
|
|
|
|
|
|
|
|
|
310 |
})
|
311 |
|
312 |
final_results = sorted(final_results, key=lambda x: x["final_score"], reverse=True)
|
|
|
463 |
"query": message,
|
464 |
"all_queries": [message],
|
465 |
"all_texts": all_chunks,
|
466 |
+
"k_per_query": 10,
|
467 |
+
"alpha": 0.5,
|
468 |
"vectorstore": vectorstore,
|
469 |
"bm25_retriever": bm25_retriever,
|
470 |
}
|
|
|
483 |
|
484 |
# After streaming completes, update KB in background thread
|
485 |
if full_response:
|
|
|
486 |
update_thread = threading.Thread(
|
487 |
target=update_knowledge_base,
|
488 |
args=(message, full_response),
|
|
|
534 |
title="💬 Ask Krishna's AI Assistant",
|
535 |
description="💡 Ask anything about Krishna Vamsi Dhulipalla",
|
536 |
examples=[
|
537 |
+
"Give me an overview of Krishna Vamsi Dhulipalla’s work experience across different roles?",
|
538 |
+
"What programming languages and tools does Krishna use for data science?",
|
539 |
+
"Can this chatbot tell me what Krishna's chatbot architecture looks like and how it works?"
|
540 |
],
|
541 |
)
|
542 |
|
faiss_store/v61_600_150/index.faiss
ADDED
Binary file (93.7 kB). View file
|
|