iamseyhmus7 commited on
Commit
70d956a
·
verified ·
1 Parent(s): 035a8ae

Upload 17 files

Browse files
Dockerfile ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10
2
+
3
+ WORKDIR /code
4
+
5
+ COPY requirements.txt .
6
+ RUN pip install --no-cache-dir -r requirements.txt
7
+
8
+ COPY . .
9
+
10
+ CMD ["uvicorn", "app.model:app", "--host", "0.0.0.0", "--port", "7860"]
RAG/.env ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ PINECONE_API_KEY=pcsk_cJepc_AAkUuTmZeLvms1mzhwADATKYSPLVBJrb9ZudeZMCco5CsoNMVN5QySyzoWVmnf5
2
+ PINECONE_INDEX_NAME=turkishgpt2-rag
3
+ PINECONE_DIMENSION=1024
4
+ PINECONE_ENVIRONMENT=us-west-2
RAG/VektorDataBase/chunkingg.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def chunk_text(text: str, max_words: int = 100, stride: int = 50) -> list[str]:
2
+ """
3
+ Metni kayan pencere yöntemiyle chunk'lara böler.
4
+ - max_words: Her chunk'taki maksimum kelime sayısı
5
+ - stride: Her chunk'ın kaç kelime ileri kayacağı
6
+ """
7
+ words = text.split()
8
+ chunks = []
9
+ i = 0
10
+ while i < len(words):
11
+ chunk = " ".join(words[i:i + max_words])
12
+ chunks.append(chunk)
13
+ i += stride
14
+ return chunks
RAG/VektorDataBase/embedder.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # embedder.py
2
+
3
+ from sentence_transformers import SentenceTransformer
4
+
5
+ # multilingual-e5-large modelini yükle
6
+ model = SentenceTransformer("intfloat/multilingual-e5-large")
7
+
8
+ def get_embedding(text: str) -> list[float]:
9
+ try:
10
+ formatted = f"passage: {text.strip()}"
11
+ return model.encode(formatted, convert_to_numpy=True).tolist()
12
+ except Exception as e:
13
+ print(f"Embed hatası: {e}")
14
+ return None
RAG/VektorDataBase/pinecone_client.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import hashlib
3
+ from dotenv import load_dotenv
4
+ from pinecone import Pinecone, ServerlessSpec
5
+ from chunkingg import chunk_text
6
+
7
+ # .env dosyasını yükle
8
+ load_dotenv()
9
+
10
+ # Ortam değişkenlerini oku
11
+ PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
12
+ INDEX_NAME = os.getenv("PINECONE_INDEX_NAME")
13
+ DIMENSION = int(os.getenv("PINECONE_DIMENSION", "1024"))
14
+ ENVIRONMENT = os.getenv("PINECONE_ENVIRONMENT", "us-west-2")
15
+
16
+ # Pinecone bağlantısı
17
+ pc = Pinecone(api_key=PINECONE_API_KEY)
18
+
19
+ # Eğer index yoksa oluştur
20
+ if INDEX_NAME not in pc.list_indexes().names():
21
+ pc.create_index(
22
+ name=INDEX_NAME,
23
+ dimension=DIMENSION,
24
+ metric="cosine",
25
+ spec=ServerlessSpec(
26
+ cloud="aws",
27
+ region=ENVIRONMENT
28
+ )
29
+ )
30
+
31
+ # Index nesnesi
32
+ index = pc.Index(INDEX_NAME)
33
+
34
+ def get_id_from_url(url: str) -> str:
35
+ """URL'den benzersiz bir ID üretir"""
36
+ return hashlib.md5(url.encode()).hexdigest()
37
+
38
+ def upsert_article_chunks(article_id_base: str, url: str, title: str, text: str, timestamp: str, embed_func):
39
+ """
40
+ Metni chunk'lara bölerek her chunk'ı embed'le ve Pinecone'a yükle.
41
+ - Boş, çok kısa ve tekrarlayan chunk'lar alınmaz.
42
+ """
43
+ chunks = chunk_text(text)
44
+ vectors = []
45
+ seen_chunks = set() # Tekrarı engellemek için içerik seti
46
+
47
+ for i, chunk in enumerate(chunks):
48
+ chunk = chunk.strip()
49
+
50
+ # Filtreleme
51
+ if not chunk:
52
+ continue
53
+ if len(chunk) < 30:
54
+ continue
55
+ if chunk in seen_chunks:
56
+ continue
57
+
58
+ embedding = embed_func(chunk)
59
+ if not embedding or not isinstance(embedding, list):
60
+ continue
61
+
62
+ chunk_id = f"{article_id_base}-chunk-{i}"
63
+ vectors.append({
64
+ "id": chunk_id,
65
+ "values": embedding,
66
+ "metadata": {
67
+ "url": url,
68
+ "title": title,
69
+ "text": chunk,
70
+ "timestamp": timestamp
71
+ }
72
+ })
73
+
74
+ seen_chunks.add(chunk)
75
+
76
+ try:
77
+ if vectors:
78
+ index.upsert(vectors)
79
+ print(f"{len(vectors)} chunk '{title[:50]}...' için Pinecone'a yüklendi.")
80
+ else:
81
+ print(f"[UYARI] '{title[:50]}...' için geçerli chunk bulunamadı.")
82
+ except Exception as e:
83
+ print(f"[HATA] Pinecone upsert hatası: {e}")
84
+
85
+ def search_pinecone(query_embedding, top_k=3):
86
+ """
87
+ Verilen embedding vektörüyle Pinecone'da arama yapar.
88
+ Dönen sonuçlar: [{'id': ..., 'score': ..., 'metadata': {...}}, ...]
89
+ """
90
+ try:
91
+ # Eğer embedding np.ndarray ise .tolist() ile listeye çevir
92
+ vector = query_embedding.tolist() if hasattr(query_embedding, 'tolist') else list(query_embedding)
93
+ result = index.query(
94
+ vector=vector,
95
+ top_k=top_k,
96
+ include_metadata=True
97
+ )
98
+ # Sonuçlar result['matches'] içinde döner
99
+ return result['matches']
100
+ except Exception as e:
101
+ print(f"[HATA] Pinecone arama hatası: {e}")
102
+ return []
RAG/auto-runner.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from pipeline import run_pipeline
2
+ import time
3
+
4
+ print("Sürekli çalışan haber botu başladı...")
5
+
6
+ while True:
7
+ run_pipeline()
8
+ print("5 dakika bekleniyor...") # ya da 10 dakika
9
+ time.sleep(300) # 300 saniye = 5 dakika
RAG/pipeline.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from scraper.milliyet import get_sondakika_links as milliyet_links, get_news_content as milliyet_parse
2
+ from scraper.haberler import scrape_haberler
3
+ from VektorDataBase.pinecone_client import upsert_article_chunks , get_id_from_url
4
+ from VektorDataBase.embedder import get_embedding
5
+ from datetime import datetime
6
+
7
+ # Tüm işlenmiş içerikleri burada tutuyoruz (hash veya direkt metin de olabilir)
8
+ processed_contents = set()
9
+
10
+ def is_duplicate_content(content: str) -> bool:
11
+ """Aynı içerik daha önce işlendi mi? aynı içeriğin tekrar veritabanına eklenmesini engeller"""
12
+ return content.strip() in processed_contents
13
+
14
+ def process_news_item(source: str, url: str, parse_func):
15
+ try:
16
+ news = parse_func(url)
17
+ title = news.get("title", "").strip()
18
+ content = news.get("content", "").strip()
19
+
20
+ if not title or not content:
21
+ print(f"{source} boş içerik veya başlık atlandı → {url}")
22
+ return
23
+
24
+ if is_duplicate_content(content):
25
+ print(f"{source} aynı içerik atlandı (dupe) → {url}")
26
+ return
27
+
28
+ upsert_article_chunks(
29
+ article_id_base=get_id_from_url(url),
30
+ url=url,
31
+ title=title,
32
+ text=content,
33
+ timestamp=datetime.now().isoformat(),
34
+ embed_func=get_embedding
35
+ )
36
+ processed_contents.add(content)
37
+
38
+ except Exception as e:
39
+ print(f"Error processing {source} news item: {e}")
40
+
41
+ def run_pipeline():
42
+ print(f"\nPipeline çalışıyor... {datetime.now().isoformat()}")
43
+
44
+ # --- Milliyet ---
45
+ print("\nMilliyet haberleri çekiliyor...")
46
+ for link in milliyet_links():
47
+ process_news_item("milliyet.com.tr", link, milliyet_parse)
48
+
49
+ # --- Haberler.com ---
50
+ print("\nHaberler.com içerikleri işleniyor...")
51
+ haberler_articles = scrape_haberler()
52
+ for article in haberler_articles:
53
+ title = article["title"].strip()
54
+ content = article["content"].strip()
55
+
56
+ if not title or not content:
57
+ print(f"haberler.com boş içerik atlandı → {article['url']}")
58
+ continue
59
+
60
+ if is_duplicate_content(content):
61
+ print(f"haberler.com aynı içerik atlandı (dupe) → {article['url']}")
62
+ continue
63
+
64
+ upsert_article_chunks(
65
+ article_id_base=article["id"],
66
+ url=article["url"],
67
+ title=title,
68
+ text=content,
69
+ timestamp=article["timestamp"],
70
+ embed_func=get_embedding
71
+ )
72
+ processed_contents.add(content)
73
+
74
+ print(f"\nPipeline tamamlandı: {datetime.utcnow().isoformat()}")
75
+ if __name__ == "__main__":
76
+ run_pipeline()
RAG/scraper/general_scraper.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ from urllib.parse import urlparse
4
+
5
+ def scrape_webpage(url: str) -> dict:
6
+ """
7
+ Herhangi bir web sayfasından başlık ve içerik çeker."""
8
+ headers = {
9
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
10
+ }
11
+
12
+ try:
13
+ response = requests.get(url , headers = headers , timeout=10)
14
+ response.raise_for_status() # HTTP hatalarını kontrol et
15
+ except Exception as e:
16
+ raise Exception(f"Web sayfası alınamadı: {e}")
17
+ soup = BeautifulSoup(response.text , "html.parser")
18
+
19
+ # Sayfa Başlığı
20
+ title_tag = soup.find("title")
21
+ title = title_tag.get_text(strip=True) if title_tag else "Başlık bulunamadı"
22
+
23
+ # Sayfa İçeriği
24
+ paragraphs = soup.find_all("p")
25
+ content = "\n".join([p.get_text(strip = True) for p in paragraphs if len(p.get_text(strip = True))>30])
26
+
27
+ if not content:
28
+ raise Exception("Sayfa içeriği bulunamadı")
29
+ return{
30
+ "title":title,
31
+ "content":content
32
+ }
RAG/scraper/haberler.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ from datetime import datetime
4
+ import hashlib
5
+
6
+ def get_article_id(url):
7
+ return hashlib.md5(url.encode()).hexdigest()
8
+
9
+ def clean_haberler_content(content):
10
+ """Gereksiz telif ve site reklam metinlerini siler"""
11
+ blacklist_phrases = [
12
+ "© Copyright",
13
+ "Haberler.com:",
14
+ "Haber:",
15
+ "bildirimlerimize izin vererek",
16
+ "masaüstü",
17
+ "Tüm Hakları Gizlidir",
18
+ "Haberler.com’da"
19
+ ]
20
+ lines = content.split("\n")
21
+ cleaned_lines = []
22
+ for line in lines:
23
+ if not any(phrase.lower() in line.lower() for phrase in blacklist_phrases):
24
+ cleaned_lines.append(line)
25
+ return "\n".join(cleaned_lines).strip()
26
+
27
+ def extract_full_content(soup):
28
+ """Haberin farklı yapılarda olabilecek içeriğini sağlam şekilde çıkarır"""
29
+
30
+ candidate_containers = [
31
+ # Özel div class'ları
32
+ ("div", {"class": "haber-metin"}),
33
+ ("div", {"class": "article-content"}),
34
+ ("div", {"class": "news-content"}),
35
+ ("div", {"class": "detail-text"}),
36
+ ("div", {"class": "content-text"}),
37
+ ("div", {"id": "content"}),
38
+ # Article
39
+ ("article", {}),
40
+ # Section/main
41
+ ("section", {}),
42
+ ("main", {}),
43
+ ]
44
+
45
+ for tag, attr in candidate_containers:
46
+ container = soup.find(tag, attr)
47
+ if container:
48
+ paragraphs = container.find_all("p")
49
+ content = "\n".join([p.get_text(strip=True) for p in paragraphs if p.get_text(strip=True)])
50
+ if content and len(content.strip()) > 50:
51
+ return content
52
+
53
+ # Son çare: tüm <p> etiketlerini tara
54
+ paragraphs = soup.find_all("p")
55
+ content = "\n".join([p.get_text(strip=True) for p in paragraphs if p.get_text(strip=True)])
56
+ if content and len(content.strip()) > 50:
57
+ return content
58
+
59
+ # Ekstra: bazı haberler <div> içinde tek blok metin olabilir
60
+ all_divs = soup.find_all("div")
61
+ text_blobs = [div.get_text(strip=True) for div in all_divs if div.get_text(strip=True)]
62
+ fallback_content = "\n".join(text_blobs)
63
+ if fallback_content and len(fallback_content.strip()) > 50:
64
+ return fallback_content
65
+
66
+ return "" # Hiçbir içerik bulunamadıysa
67
+
68
+ def scrape_haberler():
69
+ url = "https://www.haberler.com/son-dakika/"
70
+ headers = {"User-Agent": "Mozilla/5.0"}
71
+ response = requests.get(url, headers=headers)
72
+ response.encoding = "utf-8"
73
+ soup = BeautifulSoup(response.text, "html.parser")
74
+
75
+ articles = []
76
+ seen = set()
77
+
78
+ for a_tag in soup.select("a"):
79
+ href = a_tag.get("href", "")
80
+ text = a_tag.get_text(strip=True)
81
+
82
+ if not href or not text or "haberi" not in href:
83
+ continue
84
+
85
+ if not href.startswith("http"):
86
+ href = "https://www.haberler.com" + href
87
+
88
+ if href in seen:
89
+ continue
90
+ seen.add(href)
91
+
92
+ try:
93
+ detail_resp = requests.get(href, headers=headers, timeout=10)
94
+ detail_resp.encoding = "utf-8"
95
+ detail_soup = BeautifulSoup(detail_resp.text, "html.parser")
96
+
97
+ title_tag = detail_soup.select_one("h1")
98
+ full_content = extract_full_content(detail_soup)
99
+ full_content = clean_haberler_content(full_content)
100
+
101
+ if title_tag and full_content and len(full_content.strip()) > 50:
102
+ article = {
103
+ "id": get_article_id(href),
104
+ "title": title_tag.get_text(strip=True),
105
+ "content": full_content,
106
+ "url": href,
107
+ "source": "haberler.com",
108
+ "timestamp": datetime.utcnow().isoformat()
109
+ }
110
+ articles.append(article)
111
+ print(f"{article['title']} → {href}")
112
+ else:
113
+ print(f"İçerik bulunamadı → {href}")
114
+
115
+ except Exception as e:
116
+ print(f"Hata ({href}): {e}")
117
+
118
+ print(f"\nToplam {len(articles)} haber çekildi.")
119
+ return articles
120
+
121
+ # Test / terminal çıktısı
122
+ if __name__ == "__main__":
123
+ print("Haberler.com sitesinden son dakika haberleri çekiliyor...\n")
124
+
125
+ articles = scrape_haberler()
126
+
127
+ print("\nÇekilen Haber Özeti:")
128
+ for i, article in enumerate(articles, 1):
129
+ print(f"\n{i}. Haber")
130
+ print(f"Başlık: {article['title']}")
131
+ print(f"Link: {article['url']}")
132
+ print(f"İçerik Uzunluğu: {len(article['content'])} karakter")
133
+ print(f"Zaman Damgası: {article['timestamp']}")
134
+ print(f"\nİçerik:\n{article['content']}")
135
+ print("-" * 120)
RAG/scraper/milliyet.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # milliyet_link_scraper.py
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+
5
+ def get_sondakika_links():
6
+ url = "https://www.milliyet.com.tr/son-dakika/"
7
+ headers = {
8
+ "User-Agent": "Mozilla/5.0"
9
+ }
10
+ base_url = "https://www.milliyet.com.tr"
11
+
12
+ response = requests.get(url, headers=headers)
13
+ response.raise_for_status()
14
+ soup = BeautifulSoup(response.text, "html.parser")
15
+
16
+ seen = set()
17
+ news_links = []
18
+
19
+ for a in soup.find_all("a", href=True):
20
+ href = a["href"].strip()
21
+
22
+ if href.startswith("/"):
23
+ href = base_url + href
24
+ elif not href.startswith("http"):
25
+ continue
26
+
27
+ # -737 ile biten haber ID'sine sahip olanları al (haber linkleri)
28
+ if "-737" in href and "milliyet.com.tr" in href:
29
+ if href not in seen:
30
+ seen.add(href)
31
+ news_links.append(href)
32
+
33
+ return news_links
34
+ def get_news_content(url):
35
+ headers = {"User-Agent": "Mozilla/5.0"}
36
+ response = requests.get(url, headers=headers)
37
+ response.raise_for_status()
38
+ soup = BeautifulSoup(response.text, "html.parser")
39
+
40
+ # Başlık bulma (farklı HTML yapıları için deneme)
41
+ title = None
42
+ for selector in [
43
+ ("h1", {"id": "title"}),
44
+ ("h1", {"class": "news-title"}),
45
+ ("h1", {})
46
+ ]:
47
+ found = soup.find(selector[0], selector[1])
48
+ if found:
49
+ title = found.get_text(strip=True)
50
+ break
51
+ if not title:
52
+ title = "Başlık bulunamadı"
53
+
54
+ # İçerik bulma
55
+ content = ""
56
+ article_div = soup.find("div", class_="articleBox") or soup.find("div", class_="news-content")
57
+ if article_div:
58
+ paragraphs = article_div.find_all("p")
59
+ content = "\n".join([p.get_text(strip=True) for p in paragraphs if p.get_text(strip=True)])
60
+ else:
61
+ # Yedek olarak tüm paragrafları dene
62
+ paragraphs = soup.find_all("p")
63
+ content = "\n".join([p.get_text(strip=True) for p in paragraphs if p.get_text(strip=True)])
64
+
65
+ return {
66
+ "title": title,
67
+ "content": content.strip()
68
+ }
app/config.yaml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Pinecone Arama Ayarları
2
+ pinecone:
3
+ top_k: 10 # İlk aşamada getirilecek maksimum kaynak sayısı
4
+ rerank_top: 5 # Yeniden sıralamadan sonra seçilecek kaynak sayısı
5
+ batch_size: 32 # Cross-encoder için batch boyutu
6
+
7
+ # Model Üretim Ayarları
8
+ model:
9
+ max_new_tokens: 124 # Maksimum üretilecek token sayısı
10
+ temperature: 0.7 # Yaratıcılık seviyesi (0-1 arası)
11
+ top_p: 0.9 # Nucleus sampling parametresi
12
+ top_k: 50 # Top-k sampling parametresi
13
+ repetition_penalty: 1.2 # Tekrar ceza katsayısı
14
+
15
+ # Önbellek Ayarları
16
+ cache:
17
+ maxsize: 100 # LRU cache maksimum boyutu
18
+ ttl: 300 # Önbellek ömrü (saniye)
19
+
20
+ # İsteğe Bağlı Gelişmiş Ayarlar
21
+ advanced:
22
+ enable_fallback: true # Kaynak yoksa fallback mekanizmasını aktif et
23
+ truncate_sources: true # Kaynakları otomatik kısalt
24
+ log_level: "INFO" # Log seviyesi (DEBUG, INFO, WARNING, ERROR)
app/model.py ADDED
@@ -0,0 +1,255 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import asyncio
3
+ import logging
4
+ import re
5
+ import yaml
6
+ import torch
7
+ import numpy as np
8
+ from functools import lru_cache
9
+ from fastapi import FastAPI, Request
10
+ from fastapi.responses import JSONResponse
11
+ from fastapi.staticfiles import StaticFiles
12
+ from fastapi.templating import Jinja2Templates
13
+ from pydantic import BaseModel
14
+ from transformers import AutoTokenizer, AutoModelForCausalLM
15
+ from sentence_transformers import SentenceTransformer, CrossEncoder
16
+ from pinecone import Pinecone
17
+ from pathlib import Path
18
+ from dotenv import load_dotenv
19
+ from typing import Dict
20
+
21
+ # === LOGGING ===
22
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
23
+ logger = logging.getLogger(__name__)
24
+
25
+ # === CONFIG LOAD ===
26
+ CONFIG_PATH = Path(__file__).resolve().parent / "config.yaml"
27
+ def load_config() -> Dict:
28
+ try:
29
+ with open(CONFIG_PATH, 'r', encoding='utf-8') as f:
30
+ return yaml.safe_load(f)
31
+ except Exception as e:
32
+ logger.error(f"Konfigürasyon dosyası yüklenemedi: {e}")
33
+ return {
34
+ "pinecone": {"top_k": 10, "rerank_top": 5, "batch_size": 32},
35
+ "model": {"max_new_tokens": 50, "temperature": 0.7},
36
+ "cache": {"maxsize": 100}
37
+ }
38
+ config = load_config()
39
+
40
+ # === ENV LOAD ===
41
+ env_path = Path(__file__).resolve().parent.parent / "RAG" / ".env"
42
+ load_dotenv(dotenv_path=env_path)
43
+
44
+ PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
45
+ PINECONE_ENV = os.getenv("PINECONE_ENVIRONMENT")
46
+ PINECONE_INDEX_NAME = os.getenv("PINECONE_INDEX_NAME")
47
+ if not all([PINECONE_API_KEY, PINECONE_ENV, PINECONE_INDEX_NAME]):
48
+ raise ValueError("Pinecone ortam değişkenleri eksik!")
49
+
50
+ # === PINECONE CONNECT ===
51
+ pinecone_client = Pinecone(api_key=PINECONE_API_KEY, environment=PINECONE_ENV)
52
+ try:
53
+ index = pinecone_client.Index(PINECONE_INDEX_NAME)
54
+ index_stats = index.describe_index_stats()
55
+ logger.info(f"Pinecone index stats: {index_stats}")
56
+ except Exception as e:
57
+ logger.error(f"Pinecone bağlantı hatası: {e}")
58
+ raise
59
+
60
+ # === MODEL LOAD ===
61
+ MODEL_PATH = "iamseyhmus7/GenerationTurkishGPT2_final"
62
+ try:
63
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
64
+ model = AutoModelForCausalLM.from_pretrained(MODEL_PATH)
65
+ tokenizer.pad_token = tokenizer.eos_token
66
+ model.config.pad_token_id = tokenizer.pad_token_id
67
+ model.eval()
68
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
69
+ model.to(device)
70
+ logger.info(f"Model {MODEL_PATH} Hugging Face Hub'dan yüklendi, cihaz: {device}")
71
+ except Exception as e:
72
+ logger.error(f"Model yükleme hatası: {e}")
73
+ raise
74
+
75
+ # === EMBEDDING MODELS ===
76
+ embedder = SentenceTransformer("intfloat/multilingual-e5-large", device="cpu")
77
+ cross_encoder = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2", device="cpu")
78
+ logger.info("Embedding ve reranking modelleri yüklendi")
79
+
80
+ # === FASTAPI ===
81
+ app = FastAPI()
82
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
83
+ app.mount("/static", StaticFiles(directory=os.path.join(BASE_DIR, "static")), name="static")
84
+ templates = Jinja2Templates(directory=os.path.join(BASE_DIR, "templates"))
85
+
86
+ class QuestionRequest(BaseModel):
87
+ query: str
88
+
89
+ def clean_text_output(text: str) -> str:
90
+ """
91
+ Tüm prompt, komut, yönerge, link ve gereksiz açıklamaları temizler.
92
+ Sadece net, kısa yanıtı bırakır.
93
+ """
94
+ # Modelin başındaki yönerge/talimat cümleleri
95
+ text = re.sub(
96
+ r"^(Sadece doğru, kısa ve açık bilgi ver\.? Ekstra açıklama veya kaynak ekleme\.?)",
97
+ "", text, flags=re.IGNORECASE
98
+ )
99
+ # Büyük prompt ve yönergeleri sil (Metin:, output:, Cevap:)
100
+ text = re.sub(r"^.*?(Metin:|output:|Cevap:)", "", text, flags=re.IGNORECASE | re.DOTALL)
101
+ # Tek satırlık açıklama veya yönerge kalanlarını sil
102
+ text = re.sub(r"^(Aşağıdaki haber.*|Yalnızca olay özeti.*|Cevapta sadece.*|Metin:|output:|Cevap:)", "", text, flags=re.IGNORECASE | re.MULTILINE)
103
+ # 'Detaylı bilgi için', 'Daha fazla bilgi için', 'Wikipedia', 'Kaynak:', linkler vs.
104
+ text = re.sub(r"(Detaylı bilgi için.*|Daha fazla bilgi için.*|Wikipedia.*|Kaynak:.*|https?://\S+)", "", text, flags=re.IGNORECASE)
105
+ # Madde işaretleri ve baştaki sayı/karakterler
106
+ text = re.sub(r"^\- ", "", text, flags=re.MULTILINE)
107
+ text = re.sub(r"^\d+[\.\)]?\s+", "", text, flags=re.MULTILINE)
108
+ ## Model promptlarının başında kalan talimat cümlelerini sil
109
+ text = re.sub(
110
+ r"^(Sadece doğru, kısa ve açık bilgi ver\.? Ekstra açıklama veya kaynak ekleme\.?)",
111
+ "", text, flags=re.IGNORECASE
112
+ )
113
+ # Tekrarlı boşluklar ve baş/son boşluk
114
+ text = re.sub(r"\s+", " ", text).strip()
115
+ return text
116
+
117
+ @lru_cache(maxsize=config["cache"]["maxsize"])
118
+ def get_embedding(text: str, max_length: int = 512) -> np.ndarray:
119
+ formatted = f"query: {text.strip()}"[:max_length]
120
+ return embedder.encode(formatted, normalize_embeddings=True)
121
+
122
+ @lru_cache(maxsize=32)
123
+ def pinecone_query_cached(query: str, top_k: int) -> tuple:
124
+ query_embedding = get_embedding(query)
125
+ result = index.query(vector=query_embedding.tolist(), top_k=top_k, include_metadata=True)
126
+ matches = result.get("matches", [])
127
+ output = []
128
+ for m in matches:
129
+ text = m.get("metadata", {}).get("text", "").strip()
130
+ url = m.get("metadata", {}).get("url", "")
131
+ if text:
132
+ output.append((text, url))
133
+ return tuple(output)
134
+
135
+ async def retrieve_sources_from_pinecone(query: str, top_k: int = None) -> Dict[str, any]:
136
+ top_k = top_k or config["pinecone"]["top_k"]
137
+ output = pinecone_query_cached(query, top_k)
138
+ if not output:
139
+ return {"sources": "", "results": [], "source_url": ""}
140
+ # Cross-encoder ile yeniden sıralama
141
+ sentence_pairs = [[query, text] for text, url in output]
142
+ scores = await asyncio.to_thread(cross_encoder.predict, sentence_pairs)
143
+ reranked = [(float(score), text, url) for score, (text, url) in zip(scores, output)]
144
+ reranked.sort(key=lambda x: x[0], reverse=True)
145
+ top_results = reranked[:1]
146
+ top_texts = [text for _, text, _ in top_results]
147
+ source_url = top_results[0][2] if top_results else ""
148
+ return {"sources": "\n".join(top_texts), "results": top_results, "source_url": source_url}
149
+
150
+ async def generate_model_response(question: str) -> str:
151
+ prompt = (
152
+ f"input: {question}\noutput:"
153
+ "Sadece doğru, kısa ve açık bilgi ver. Ekstra açıklama veya kaynak ekleme."
154
+ )
155
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=256).to(device)
156
+ with torch.no_grad():
157
+ outputs = model.generate(
158
+ **inputs,
159
+ max_new_tokens=64,
160
+ do_sample=False,
161
+ num_beams=5,
162
+ no_repeat_ngram_size=3,
163
+ early_stopping=True,
164
+ pad_token_id=tokenizer.pad_token_id,
165
+ eos_token_id=tokenizer.eos_token_id
166
+ )
167
+ answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
168
+ return answer
169
+
170
+ def extract_self_answer(output: str) -> str:
171
+ # Eğer "output:" etiketi varsa, sonrasını al
172
+ match = re.search(r"output:(.*)", output, flags=re.IGNORECASE | re.DOTALL)
173
+ if match:
174
+ return match.group(1).strip()
175
+ # Eğer "Cevap:" varsa, sonrasını al
176
+ if "Cevap:" in output:
177
+ return output.split("Cevap:")[-1].strip()
178
+ return output.strip()
179
+
180
+ async def selfrag_agent(question: str):
181
+ # 1. VDB cevabı ve kaynak url
182
+ result = await retrieve_sources_from_pinecone(question)
183
+ vdb_paragraph = result.get("sources", "").strip()
184
+ source_url = result.get("source_url", "")
185
+
186
+ # 2. Model cevabı
187
+ model_paragraph = await generate_model_response(question)
188
+ model_paragraph = extract_self_answer(model_paragraph)
189
+
190
+ # 3. Temizle (SADECE METİN DEĞERLERİNDE!)
191
+ vdb_paragraph = clean_text_output(vdb_paragraph)
192
+ model_paragraph = clean_text_output(model_paragraph)
193
+
194
+ # 4. Cross-encoder ile skorlama
195
+ candidates = []
196
+ candidate_urls = []
197
+ label_names = []
198
+ if vdb_paragraph:
199
+ candidates.append(vdb_paragraph)
200
+ candidate_urls.append(source_url)
201
+ label_names.append("VDB")
202
+ if model_paragraph:
203
+ candidates.append(model_paragraph)
204
+ candidate_urls.append(None)
205
+ label_names.append("MODEL")
206
+
207
+ if not candidates:
208
+ return {"answer": "Cevap bulunamadı.", "source_url": None}
209
+
210
+ sentence_pairs = [[question, cand] for cand in candidates]
211
+ scores = await asyncio.to_thread(cross_encoder.predict, sentence_pairs)
212
+ print(f"VDB Skor: {scores[0]:.4f}")
213
+ if len(scores) > 1:
214
+ print(f"Model Skor: {scores[1]:.4f}")
215
+
216
+ # === Seçim Kuralları ===
217
+ if len(scores) == 2:
218
+ vdb_score = scores[0]
219
+ model_score = scores[1]
220
+ # Eğer modelin skoru, VDB'nin 2 katından fazlaysa modeli döndür
221
+ if model_score > 1.5 * vdb_score:
222
+ best_idx = 1
223
+ else:
224
+ best_idx = 0
225
+ else:
226
+ # Sadece VDB veya model varsa, en yüksek skoru seç
227
+ best_idx = int(np.argmax(scores))
228
+
229
+ final_answer = candidates[best_idx]
230
+ final_source_url = candidate_urls[best_idx]
231
+
232
+ return {
233
+ "answer": final_answer,
234
+ "source_url": final_source_url
235
+ }
236
+
237
+
238
+ @app.get("/")
239
+ async def home(request: Request):
240
+ return templates.TemplateResponse("index.html", {"request": request})
241
+
242
+ @app.post("/api/ask")
243
+ async def ask_question(request: QuestionRequest):
244
+ try:
245
+ question = request.query.strip()
246
+ if not question:
247
+ return JSONResponse(status_code=400, content={"error": "Sorgu boş olamaz."})
248
+ result = await selfrag_agent(question)
249
+ response_text = result["answer"]
250
+ if result["source_url"]:
251
+ response_text += f'<br><br>Daha fazla bilgi için: <a href="{result["source_url"]}" target="_blank">{result["source_url"]}</a>'
252
+ return JSONResponse(content={"answer": response_text})
253
+ except Exception as e:
254
+ logger.error(f"API hatası: {e}")
255
+ return JSONResponse(status_code=500, content={"error": f"Sunucu hatası: {str(e)}"})
app/static/script.js ADDED
@@ -0,0 +1,262 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // === Yardımcı Fonksiyonlar ===
2
+ const qs = (selector) => document.querySelector(selector);
3
+ const qsa = (selector) => document.querySelectorAll(selector);
4
+
5
+ // === DOM Elemanları ===
6
+ const sidebar = qs('#sidebar');
7
+ const openSidebarBtn = qs('#open-sidebar');
8
+ const closeSidebarBtn = qs('#close-sidebar');
9
+ const chatForm = qs('#chat-form');
10
+ const messageInput = qs('#message-input');
11
+ const messagesContainer = qs('#messages');
12
+ const newChatBtn = qs('#new-chat');
13
+ const chatHistoryContainer = qs('#chat-history');
14
+ const toggleThemeBtn = qs('#toggle-theme');
15
+ const emojiPickerBtn = qs('#emoji-picker-btn');
16
+ const emojiPicker = qs('#emoji-picker');
17
+ const usernameInput = qs('#username-input');
18
+ const themeSelector = qs('#theme-selector');
19
+ const themePreview = qs('#theme-preview');
20
+ const chatSearch = qs('#chat-search');
21
+ const bubbleStyle = qs('#bubble-style');
22
+ const chatContainer = qs('#chat-container') || messagesContainer;
23
+
24
+ let chats = JSON.parse(localStorage.getItem('chats')) || [];
25
+ let currentChatId = null;
26
+ let currentUser = localStorage.getItem('username') || 'Kullanıcı';
27
+ let lastSender = null;
28
+ let messageDraft = localStorage.getItem('messageDraft') || '';
29
+
30
+ // === Tema Renkleri ===
31
+ const themeColors = {
32
+ default: '#4f46e5', blue: '#3b82f6', green: '#10b981', purple: '#8b5cf6', red: '#ef4444'
33
+ };
34
+
35
+ // === Başlangıç Ayarları ===
36
+ function initializeSettings() {
37
+ usernameInput.value = currentUser;
38
+ messageInput.value = messageDraft;
39
+ if (localStorage.getItem('theme') === 'dark') document.body.classList.add('dark');
40
+ const savedColor = localStorage.getItem('colorTheme') || 'default';
41
+ const savedBubble = localStorage.getItem('bubbleStyle') || 'rounded';
42
+ themeSelector.value = savedColor;
43
+ bubbleStyle.value = savedBubble;
44
+ themePreview.style.background = themeColors[savedColor];
45
+ document.body.classList.add(`theme-${savedColor}`);
46
+ document.documentElement.style.setProperty('--bubble-radius', savedBubble === 'rounded' ? '12px' : '4px');
47
+ }
48
+
49
+ // === Tema Değiştirici ===
50
+ function applyTheme(theme) {
51
+ document.body.className = document.body.className.replace(/theme-\w+/g, '');
52
+ document.body.classList.add(`theme-${theme}`);
53
+ themePreview.style.background = themeColors[theme];
54
+ localStorage.setItem('colorTheme', theme);
55
+ }
56
+
57
+ // === Karanlık Mod Değiştirici ===
58
+ toggleThemeBtn?.addEventListener('click', () => {
59
+ document.body.classList.toggle('dark');
60
+ localStorage.setItem('theme', document.body.classList.contains('dark') ? 'dark' : 'light');
61
+ });
62
+
63
+ // === Event Listeners ===
64
+ usernameInput.addEventListener('change', () => {
65
+ currentUser = usernameInput.value.trim() || 'Kullanıcı';
66
+ localStorage.setItem('username', currentUser);
67
+ renderChatHistory();
68
+ });
69
+
70
+ messageInput.addEventListener('input', () => {
71
+ localStorage.setItem('messageDraft', messageInput.value);
72
+ });
73
+
74
+ themeSelector.addEventListener('change', () => applyTheme(themeSelector.value));
75
+ themeSelector.addEventListener('mouseover', () => themePreview.style.background = themeColors[themeSelector.value]);
76
+
77
+ bubbleStyle.addEventListener('change', () => {
78
+ const radius = bubbleStyle.value === 'rounded' ? '12px' : '4px';
79
+ document.documentElement.style.setProperty('--bubble-radius', radius);
80
+ localStorage.setItem('bubbleStyle', bubbleStyle.value);
81
+ });
82
+
83
+ openSidebarBtn?.addEventListener('click', () => sidebar.classList.remove('hidden'));
84
+ closeSidebarBtn?.addEventListener('click', () => sidebar.classList.add('hidden'));
85
+ emojiPickerBtn?.addEventListener('click', () => emojiPicker.classList.toggle('hidden'));
86
+ chatSearch.addEventListener('input', () => renderChatHistory(chatSearch.value.toLowerCase()));
87
+
88
+ messageInput.addEventListener('keydown', (e) => {
89
+ if (e.key === 'Enter' && !e.shiftKey) {
90
+ e.preventDefault();
91
+ chatForm.dispatchEvent(new Event('submit'));
92
+ } else if (e.key === 'Escape') {
93
+ emojiPicker.classList.add('hidden');
94
+ }
95
+ });
96
+
97
+ // === Yeni Sohbet Başlat ===
98
+ newChatBtn.addEventListener('click', () => {
99
+ currentChatId = Date.now().toString();
100
+ chats.push({ id: currentChatId, messages: [] });
101
+ saveChats();
102
+ renderChatHistory();
103
+ messagesContainer.innerHTML = '';
104
+ lastSender = null;
105
+ showNotification('Yeni sohbet başlatıldı!', 'success');
106
+ });
107
+
108
+ // === Mesaj Gönderme ===
109
+ let isWaitingResponse = false;
110
+ chatForm.addEventListener('submit', async (e) => {
111
+ e.preventDefault();
112
+ if (isWaitingResponse) return; // Aynı anda birden fazla istek gönderilmesini engeller
113
+
114
+ const query = messageInput.value.trim();
115
+ if (!query) return showNotification('Lütfen bir mesaj yazın!', 'error');
116
+
117
+ isWaitingResponse = true;
118
+ addMessage(query, 'user');
119
+ showTypingIndicator();
120
+
121
+ try {
122
+ const res = await fetch('/api/ask', {
123
+ method: 'POST',
124
+ headers: { 'Content-Type': 'application/json' },
125
+ body: JSON.stringify({ query: query })
126
+ });
127
+ const data = await res.json();
128
+ removeTypingIndicator();
129
+ addMessage(data.answer || 'Bir hata oluştu.', 'bot');
130
+ saveMessage(data.answer || 'Bir hata oluştu.', 'bot');
131
+ } catch {
132
+ removeTypingIndicator();
133
+ addMessage('Cevap alınamadı.', 'bot');
134
+ saveMessage('Cevap alınamadı.', 'bot');
135
+ }
136
+
137
+ saveMessage(query, 'user');
138
+ messageInput.value = '';
139
+ localStorage.removeItem('messageDraft');
140
+ isWaitingResponse = false;
141
+ scrollToBottom();
142
+ });
143
+
144
+
145
+ // === Mesajları Kaydetme ===
146
+ function saveMessage(text, sender) {
147
+ if (!currentChatId) {
148
+ currentChatId = Date.now().toString();
149
+ chats.push({ id: currentChatId, messages: [] });
150
+ }
151
+ const chat = chats.find(c => c.id === currentChatId);
152
+ chat.messages.push({ id: Date.now().toString(), text, sender, timestamp: new Date().toLocaleTimeString(), isPinned: false, reactions: '' });
153
+ saveChats();
154
+ renderChatHistory();
155
+ }
156
+
157
+ function saveChats() {
158
+ localStorage.setItem('chats', JSON.stringify(chats));
159
+ }
160
+
161
+ // === Görsel İşlevler ===
162
+ function scrollToBottom() {
163
+ chatContainer.scrollTop = chatContainer.scrollHeight;
164
+ }
165
+
166
+ function showTypingIndicator() {
167
+ const typingDiv = document.createElement('div');
168
+ typingDiv.id = 'typing-indicator';
169
+ typingDiv.className = 'typing';
170
+ typingDiv.innerHTML = '<span></span><span></span><span></span>';
171
+ messagesContainer.appendChild(typingDiv);
172
+ scrollToBottom();
173
+ }
174
+
175
+ function removeTypingIndicator() {
176
+ qs('#typing-indicator')?.remove();
177
+ }
178
+
179
+ function showNotification(text, type = 'success') {
180
+ const notif = document.createElement('div');
181
+ notif.className = `notification fixed bottom-4 left-4 p-3 rounded-lg text-white ${type === 'success' ? 'bg-green-500' : 'bg-red-500'}`;
182
+ notif.textContent = text;
183
+ document.body.appendChild(notif);
184
+ setTimeout(() => notif.remove(), 3000);
185
+ }
186
+
187
+ // === Mesaj Ekleme ===
188
+ function addMessage(text, sender, id = Date.now().toString(), isPinned = false) {
189
+ const isSameSender = sender === lastSender;
190
+ const div = document.createElement('div');
191
+ div.className = `message chat-group ${sender === 'user' ? 'user-message' : 'bot-message'}${isSameSender ? ' mt-1' : ''}${isPinned ? ' pinned-message' : ''}`;
192
+ div.dataset.id = id;
193
+ div.innerHTML = `
194
+ <img src="${sender === 'user' ? 'https://img.icons8.com/color/48/000000/user.png' : 'https://img.icons8.com/color/48/000000/bot.png'}" class="w-8 h-8 rounded-full ${isSameSender && !isPinned ? 'invisible' : ''}">
195
+ <div class="flex-1">
196
+ <div class="flex justify-between items-baseline">
197
+ <span class="text-sm font-semibold ${sender === 'user' ? 'text-white' : 'text-gray-700 dark:text-gray-300'}">${sender === 'user' ? currentUser : 'Bot'}</span>
198
+ <span class="text-xs text-gray-400">${new Date().toLocaleTimeString()}</span>
199
+ </div>
200
+ <div class="break-words">${text}</div>
201
+ </div>`;
202
+ messagesContainer.appendChild(div);
203
+ lastSender = sender;
204
+ scrollToBottom();
205
+ }
206
+
207
+ // === Sohbet Geçmişi ===
208
+ function renderChatHistory(query = '') {
209
+ chatHistoryContainer.innerHTML = '';
210
+ chats.filter(chat => !query || chat.messages.some(m => m.text.toLowerCase().includes(query)))
211
+ .forEach(chat => {
212
+ const last = chat.messages.at(-1);
213
+ const item = document.createElement('div');
214
+ item.className = `p-3 rounded-lg flex justify-between items-center cursor-pointer ${currentChatId === chat.id ? 'bg-indigo-100' : 'bg-gray-100'} hover:bg-indigo-50`;
215
+ item.innerHTML = `
216
+ <div class="flex-1" onclick="loadChat('${chat.id}')">
217
+ <div class="text-sm font-semibold text-gray-800">${chat.messages[0]?.text.substring(0, 25) || 'Yeni Sohbet'}...</div>
218
+ <div class="text-xs text-gray-500">${last?.timestamp || ''}</div>
219
+ </div>
220
+ <button onclick="deleteChat('${chat.id}')" class="text-red-500 hover:text-red-700"><i class="fas fa-trash"></i></button>
221
+ `;
222
+ chatHistoryContainer.appendChild(item);
223
+ });
224
+ }
225
+
226
+ // === Global Fonksiyonlar ===
227
+ window.loadChat = function(id) {
228
+ currentChatId = id;
229
+ const chat = chats.find(c => c.id === id);
230
+ messagesContainer.innerHTML = '';
231
+ lastSender = null;
232
+ chat.messages.forEach(msg => addMessage(msg.text, msg.sender, msg.id, msg.isPinned));
233
+ scrollToBottom();
234
+ sidebar.classList.add('hidden');
235
+ };
236
+
237
+ window.deleteChat = function(id) {
238
+ chats = chats.filter(c => c.id !== id);
239
+ saveChats();
240
+ renderChatHistory();
241
+ if (currentChatId === id) {
242
+ messagesContainer.innerHTML = '';
243
+ currentChatId = null;
244
+ lastSender = null;
245
+ }
246
+ showNotification('Sohbet silindi!', 'success');
247
+ };
248
+
249
+ // === Emoji ve Quick Reply ===
250
+ function addEmoji(emoji) {
251
+ messageInput.value += emoji;
252
+ messageInput.focus();
253
+ }
254
+
255
+ function quickReply(text) {
256
+ messageInput.value = text;
257
+ chatForm.dispatchEvent(new Event('submit'));
258
+ }
259
+
260
+ // === Başlat ===
261
+ initializeSettings();
262
+ renderChatHistory();
app/static/styles.css ADDED
@@ -0,0 +1,265 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ body {
2
+ font-family: 'Inter', sans-serif;
3
+ background-color: #f8f9fb;
4
+ color: #1f2937;
5
+ transition: background-color 0.3s ease, color 0.3s ease;
6
+ }
7
+
8
+ /* Sidebar */
9
+ #sidebar {
10
+ transition: transform 0.4s cubic-bezier(0.4, 0, 0.2, 1);
11
+ backdrop-filter: blur(10px);
12
+ -webkit-backdrop-filter: blur(10px);
13
+ box-shadow: 2px 0 12px rgba(0, 0, 0, 0.15);
14
+ }
15
+
16
+ #sidebar.hidden {
17
+ transform: translateX(-100%);
18
+ }
19
+
20
+ /* Chat alanı */
21
+ .chat-container {
22
+ height: calc(100vh - 140px);
23
+ overflow-y: auto;
24
+ background: url("data:image/svg+xml;utf8,<svg xmlns='http://www.w3.org/2000/svg' width='40' height='40' viewBox='0 0 40 40'><path fill='rgba(0,0,0,0.05)' d='M20 4a2 2 0 1 0 0 4 2 2 0 0 0 0-4zm-16 16a2 2 0 1 0 4 0 2 2 0 0 0-4 0zm32 0a2 2 0 1 0 4 0 2 2 0 0 0-4 0zm-16 16a2 2 0 1 0 0 4 2 2 0 0 0 0-4z'/></svg>") repeat;
25
+ }
26
+
27
+ /* Mesaj kutuları */
28
+ .message {
29
+ max-width: 75%;
30
+ margin: 8px 16px;
31
+ padding: 12px 16px;
32
+ border-radius: var(--bubble-radius, 12px);
33
+ line-height: 1.6;
34
+ animation: bubblePop 0.3s ease-out;
35
+ position: relative;
36
+ display: flex;
37
+ align-items: flex-start;
38
+ gap: 8px;
39
+ box-shadow: 0 3px 6px rgba(0, 0, 0, 0.1);
40
+ transition: transform 0.2s ease;
41
+ }
42
+
43
+ .message:hover {
44
+ transform: translateY(-2px);
45
+ }
46
+
47
+ .user-message {
48
+ margin-left: auto;
49
+ border-bottom-right-radius: 2px;
50
+ }
51
+
52
+ .bot-message {
53
+ border-bottom-left-radius: 2px;
54
+ }
55
+
56
+ /* Sabitlenen mesaj */
57
+ .pinned-message {
58
+ background: #fefcbf !important;
59
+ border: 1px solid #facc15;
60
+ position: sticky;
61
+ top: 0;
62
+ z-index: 10;
63
+ animation: none;
64
+ }
65
+
66
+ /* Yazıyor animasyonu */
67
+ .typing {
68
+ display: flex;
69
+ align-items: center;
70
+ gap: 6px;
71
+ padding: 12px 16px;
72
+ margin: 8px 16px;
73
+ }
74
+
75
+ .typing span {
76
+ width: 10px;
77
+ height: 10px;
78
+ border-radius: 50%;
79
+ animation: pulse 0.8s infinite alternate;
80
+ background: #4f46e5;
81
+ }
82
+
83
+ .typing span:nth-child(2) { animation-delay: 0.2s; }
84
+ .typing span:nth-child(3) { animation-delay: 0.4s; }
85
+
86
+ @keyframes bubblePop {
87
+ from { transform: scale(0.9) translateY(10px); opacity: 0; }
88
+ to { transform: scale(1) translateY(0); opacity: 1; }
89
+ }
90
+
91
+ @keyframes pulse {
92
+ to { transform: scale(1.3); opacity: 0.6; }
93
+ }
94
+
95
+ /* Bildirim */
96
+ .notification {
97
+ animation: slideInLeft 0.3s ease-out, fadeOut 0.3s ease-out 2.7s forwards;
98
+ border-left: 4px solid;
99
+ backdrop-filter: blur(5px);
100
+ }
101
+
102
+ @keyframes slideInLeft {
103
+ from { transform: translateX(-100%); opacity: 0; }
104
+ to { transform: translateX(0); opacity: 1; }
105
+ }
106
+
107
+ @keyframes fadeOut {
108
+ to { opacity: 0; }
109
+ }
110
+
111
+ /* Scrollbar */
112
+ .chat-container::-webkit-scrollbar {
113
+ width: 6px;
114
+ }
115
+
116
+ .chat-container::-webkit-scrollbar-thumb {
117
+ background: rgba(0, 0, 0, 0.1);
118
+ border-radius: 6px;
119
+ }
120
+
121
+ .chat-container::-webkit-scrollbar-thumb:hover {
122
+ background: rgba(0, 0, 0, 0.3);
123
+ }
124
+
125
+ /* Ripple Efekti */
126
+ button {
127
+ cursor: pointer;
128
+ position: relative;
129
+ transition: transform 0.2s, background 0.2s;
130
+ }
131
+
132
+ button:hover {
133
+ transform: scale(1.05);
134
+ }
135
+
136
+ button:active::after {
137
+ content: '';
138
+ position: absolute;
139
+ width: 100px;
140
+ height: 100px;
141
+ background: rgba(255, 255, 255, 0.3);
142
+ border-radius: 50%;
143
+ transform: scale(0);
144
+ animation: rippleEffect 0.6s ease-out;
145
+ top: 50%;
146
+ left: 50%;
147
+ transform-origin: center;
148
+ }
149
+
150
+ @keyframes rippleEffect {
151
+ to { transform: scale(4); opacity: 0; }
152
+ }
153
+
154
+ /* Input Alanı */
155
+ #message-input {
156
+ background: rgba(255, 255, 255, 0.8);
157
+ backdrop-filter: blur(5px);
158
+ }
159
+
160
+ .dark #message-input {
161
+ background: rgba(31, 41, 55, 0.8);
162
+ color: #f3f4f6;
163
+ }
164
+
165
+ /* Temalar */
166
+ :root {
167
+ --bubble-radius: 12px;
168
+ }
169
+
170
+ .theme-default .user-message {
171
+ background: linear-gradient(135deg, #6366f1, #7c3aed);
172
+ color: white;
173
+ }
174
+
175
+ .theme-default .bot-message {
176
+ background: linear-gradient(135deg, #e5e7eb, #d1d5db);
177
+ color: #1f2937;
178
+ }
179
+
180
+ .theme-blue .user-message {
181
+ background: linear-gradient(135deg, #3b82f6, #60a5fa);
182
+ color: white;
183
+ }
184
+
185
+ .theme-blue .bot-message {
186
+ background: linear-gradient(135deg, #bfdbfe, #93c5fd);
187
+ color: #1e3a8a;
188
+ }
189
+
190
+ .theme-green .user-message {
191
+ background: linear-gradient(135deg, #10b981, #34d399);
192
+ color: white;
193
+ }
194
+
195
+ .theme-green .bot-message {
196
+ background: linear-gradient(135deg, #a7f3d0, #6ee7b7);
197
+ color: #064e3b;
198
+ }
199
+
200
+ .theme-purple .user-message {
201
+ background: linear-gradient(135deg, #8b5cf6, #a78bfa);
202
+ color: white;
203
+ }
204
+
205
+ .theme-purple .bot-message {
206
+ background: linear-gradient(135deg, #ddd6fe, #c4b5fd);
207
+ color: #4c1d95;
208
+ }
209
+
210
+ .theme-red .user-message {
211
+ background: linear-gradient(135deg, #ef4444, #f87171);
212
+ color: white;
213
+ }
214
+
215
+ .theme-red .bot-message {
216
+ background: linear-gradient(135deg, #fecaca, #f9a8a8);
217
+ color: #991b1b;
218
+ }
219
+
220
+ /* Koyu mod (yumuşak koyu gri) */
221
+ .dark {
222
+ background-color: #1e1e1e;
223
+ color: #e5e7eb;
224
+ }
225
+
226
+ .dark .chat-container {
227
+ background: linear-gradient(135deg, #1e1e1e, #2c2c2c);
228
+ }
229
+
230
+ .dark .bg-white {
231
+ background: #2a2a2a;
232
+ color: #e5e7eb;
233
+ }
234
+ /* === Tema Uyumu: Sidebar ve Arka Plan === */
235
+
236
+ /* Aydınlık tema için sidebar ve yazılar */
237
+ .theme-default #sidebar {
238
+ background: #f8fafc;
239
+ color: #1e293b;
240
+ }
241
+ .theme-default .chat-container {
242
+ background: url("https://www.transparenttextures.com/patterns/pw-maze-white.png") repeat;
243
+ }
244
+
245
+ /* Karanlık tema için sidebar ve yazılar */
246
+ .dark #sidebar {
247
+ background: #1f2937;
248
+ color: #f8fafc;
249
+ }
250
+ .dark .chat-container {
251
+ background: url("https://www.transparenttextures.com/patterns/asfalt-dark.png") repeat;
252
+ }
253
+
254
+ /* Sidebar içindeki input/select'ler temaya uyum sağlar */
255
+ #sidebar input,
256
+ #sidebar select {
257
+ color: inherit;
258
+ background-color: transparent;
259
+ }
260
+
261
+ .dark #sidebar input,
262
+ .dark #sidebar select {
263
+ background-color: #374151;
264
+ color: #f1f5f9;
265
+ }
app/templates/index.html ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="tr">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0"/>
6
+ <title>Sohbet Chatbot</title>
7
+ <script src="https://cdn.tailwindcss.com"></script>
8
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/all.min.css"/>
9
+ <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap" rel="stylesheet"/>
10
+ <link rel="stylesheet" href="/static/styles.css"/>
11
+ </head>
12
+ <body class="bg-gray-100 dark:bg-[#1e1e1e] text-gray-900 dark:text-gray-100 transition-colors duration-300">
13
+ <div class="flex h-screen">
14
+ <!-- Sidebar -->
15
+ <div id="sidebar" class="w-80 bg-white dark:bg-[#2a2a2a] shadow-lg p-5 absolute md:static z-20 h-full" role="navigation" aria-label="Sohbet Geçmişi">
16
+ <!-- Profesyonel başlık + ikonlar -->
17
+ <div class="flex items-center justify-between mb-6">
18
+ <h2 class="text-2xl font-bold text-gray-800 dark:text-white">Sohbetler</h2>
19
+ <div class="flex items-center gap-2">
20
+ <button id="new-chat" class="text-indigo-600 hover:text-indigo-800 text-xl" title="Yeni Sohbet">
21
+ <i class="fas fa-plus-circle"></i>
22
+ </button>
23
+ <button id="toggle-theme" class="text-gray-600 hover:text-gray-800 dark:text-gray-300 text-xl ml-2" title="Tema Değiştir">
24
+ <i class="fas fa-moon"></i>
25
+ </button>
26
+ <button id="close-sidebar" class="md:hidden text-gray-600 hover:text-gray-800 dark:text-gray-300 text-xl" title="Kapat">
27
+ <i class="fas fa-times-circle"></i>
28
+ </button>
29
+ </div>
30
+ </div>
31
+
32
+ <!-- Arama -->
33
+ <div class="mb-4">
34
+ <input id="chat-search" type="text" placeholder="Sohbetlerde Ara..."
35
+ class="w-full p-2 border rounded-lg focus:outline-none focus:ring-2 focus:ring-indigo-500 bg-white text-gray-800 dark:bg-gray-700 dark:text-white"
36
+ aria-label="Sohbet Arama">
37
+ </div>
38
+
39
+ <!-- Kullanıcı adı -->
40
+ <div class="mb-4">
41
+ <input id="username-input" type="text" placeholder="Kullanıcı Adı Girin"
42
+ class="w-full p-2 border rounded-lg focus:outline-none focus:ring-2 focus:ring-indigo-500 bg-white text-gray-800 dark:bg-gray-700 dark:text-white"
43
+ aria-label="Kullanıcı Adı">
44
+ </div>
45
+
46
+ <!-- Tema Seçici -->
47
+ <div class="mb-4 relative">
48
+ <select id="theme-selector"
49
+ class="w-full p-2 border rounded-lg focus:outline-none focus:ring-2 focus:ring-indigo-500 bg-white text-gray-800 dark:bg-gray-700 dark:text-white"
50
+ aria-label="Tema Seç">
51
+ <option value="default">Varsayılan Tema</option>
52
+ <option value="blue">Mavi Tema</option>
53
+ <option value="green">Yeşil Tema</option>
54
+ <option value="purple">Mor Tema</option>
55
+ <option value="red">Kırmızı Tema</option>
56
+ </select>
57
+ <div id="theme-preview" class="absolute w-4 h-4 rounded-full top-3 right-3"></div>
58
+ </div>
59
+
60
+ <!-- Mesaj Stili Seçici -->
61
+ <div class="mb-4">
62
+ <select id="bubble-style" class="w-full p-2 border rounded-lg focus:outline-none focus:ring-2 focus:ring-indigo-500" aria-label="Mesaj Balonu Stili">
63
+ <option value="rounded">Yuvarlak Kenarlı</option>
64
+ <option value="sharp">Keskin Kenarlı</option>
65
+ </select>
66
+ </div>
67
+
68
+ <!-- Sohbet Geçmişi -->
69
+ <div id="chat-history" class="space-y-3" role="list" aria-label="Sohbet Geçmişi Listesi">
70
+ <!-- Dinamik sohbet geçmişi -->
71
+ </div>
72
+ </div>
73
+
74
+ <!-- Ana Sohbet Alanı -->
75
+ <div class="flex-1 flex flex-col">
76
+ <!-- Başlık -->
77
+ <div class="bg-white dark:bg-[#2a2a2a] shadow p-4 flex justify-between items-center">
78
+ <button id="open-sidebar" class="md:hidden text-gray-600 hover:text-gray-800 dark:text-gray-300 ripple" aria-label="Sidebar'ı Aç">
79
+ <i class="fas fa-bars"></i>
80
+ </button>
81
+ <h1 class="text-2xl font-bold text-gray-800 dark:text-gray-200">Chatbot 🤖</h1>
82
+ </div>
83
+
84
+ <!-- Mesajlar -->
85
+ <div id="chat-container" class="chat-container p-6 flex-1 bg-white dark:bg-[#1e1e1e]" role="main" aria-label="Sohbet Alanı">
86
+ <div id="messages" class="flex flex-col space-y-2"></div>
87
+ </div>
88
+
89
+ <!-- Mesaj Giriş Alanı -->
90
+ <div class="bg-white dark:bg-[#2a2a2a] p-4 shadow">
91
+ <div id="quick-replies" class="flex gap-2 mb-2 flex-wrap">
92
+ <button class="px-3 py-1 bg-gray-200 dark:bg-gray-700 rounded-full text-sm hover:bg-gray-300 dark:hover:bg-gray-600 ripple" onclick="quickReply('Merhaba! 😊')">Merhaba!</button>
93
+ <button class="px-3 py-1 bg-gray-200 dark:bg-gray-700 rounded-full text-sm hover:bg-gray-300 dark:hover:bg-gray-600 ripple" onclick="quickReply('Nasılsın?')">Nasılsın?</button>
94
+ <button class="px-3 py-1 bg-gray-200 dark:bg-gray-700 rounded-full text-sm hover:bg-gray-300 dark:hover:bg-gray-600 ripple" onclick="quickReply('Teşekkürler!')">Teşekkürler!</button>
95
+ </div>
96
+ <form id="chat-form" class="flex items-center gap-3">
97
+ <div class="relative flex-1">
98
+ <input id="message-input" type="text" placeholder="Mesajınızı yazın... 😊" class="w-full p-3 border rounded-lg focus:outline-none focus:ring-2 focus:ring-indigo-500 dark:bg-gray-700 dark:text-white" aria-label="Mesaj Girişi">
99
+ <button type="button" id="emoji-picker-btn" class="absolute right-3 top-1/2 transform -translate-y-1/2 text-gray-500 hover:text-gray-700 ripple" aria-label="Emoji Seçici">
100
+ <i class="fas fa-smile"></i>
101
+ </button>
102
+ <div id="emoji-picker" class="hidden absolute bottom-14 right-0 bg-white dark:bg-gray-800 border rounded-lg p-3 shadow-xl max-w-sm z-10">
103
+ <div class="flex gap-2 flex-wrap text-xl">
104
+ <span class="cursor-pointer hover:bg-gray-100 dark:hover:bg-gray-600 p-1 rounded" onclick="addEmoji('😊')">😊</span>
105
+ <span class="cursor-pointer hover:bg-gray-100 dark:hover:bg-gray-600 p-1 rounded" onclick="addEmoji('😂')">😂</span>
106
+ <span class="cursor-pointer hover:bg-gray-100 dark:hover:bg-gray-600 p-1 rounded" onclick="addEmoji('👍')">👍</span>
107
+ <span class="cursor-pointer hover:bg-gray-100 dark:hover:bg-gray-600 p-1 rounded" onclick="addEmoji('❤️')">❤️</span>
108
+ <span class="cursor-pointer hover:bg-gray-100 dark:hover:bg-gray-600 p-1 rounded" onclick="addEmoji('🚀')">🚀</span>
109
+ <span class="cursor-pointer hover:bg-gray-100 dark:hover:bg-gray-600 p-1 rounded" onclick="addEmoji('🌟')">🌟</span>
110
+ <span class="cursor-pointer hover:bg-gray-100 dark:hover:bg-gray-600 p-1 rounded" onclick="addEmoji('😎')">😎</span>
111
+ <span class="cursor-pointer hover:bg-gray-100 dark:hover:bg-gray-600 p-1 rounded" onclick="addEmoji('🎉')">🎉</span>
112
+ <span class="cursor-pointer hover:bg-gray-100 dark:hover:bg-gray-600 p-1 rounded" onclick="addEmoji('🙌')">🙌</span>
113
+ <span class="cursor-pointer hover:bg-gray-100 dark:hover:bg-gray-600 p-1 rounded" onclick="addEmoji('🔥')">🔥</span>
114
+ </div>
115
+ </div>
116
+ </div>
117
+ <button type="submit" class="bg-indigo-600 text-white p-3 rounded-lg hover:bg-indigo-700 transition-colors ripple" aria-label="Mesaj Gönder">
118
+ <i class="fas fa-paper-plane"></i>
119
+ </button>
120
+ </form>
121
+ </div>
122
+ </div>
123
+ </div>
124
+
125
+ <script src="/static/script.js"></script>
126
+ </body>
127
+ </html>
docker-compose.yml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: '3.8'
2
+
3
+ services:
4
+ news-bot:
5
+ build: .
6
+ container_name: news-pipeline-bot
7
+ restart: always
8
+ env_file:
9
+ - .env
10
+ volumes:
11
+ - .:/app
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ transformers
4
+ torch
5
+ pinecone
6
+ requests
7
+ beautifulsoup4
8
+ # Diğer tüm kullandığın modülleri buraya ekle