from fastapi import FastAPI from langchain_qdrant import QdrantVectorStore from qdrant_client import QdrantClient from qdrant_client.http.models import Distance, VectorParams from langchain_qdrant import FastEmbedSparse, QdrantVectorStore, RetrievalMode from langchain_community.embeddings.fastembed import FastEmbedEmbeddings from qdrant_client import QdrantClient, models from qdrant_client.http.models import Distance, SparseVectorParams, VectorParams from uuid import uuid4 from langchain_core.documents import Document from typing import Union, List, Dict, Any from pydantic import BaseModel, Field class Data(BaseModel): items: Union[Dict[str, Any], List[Dict[str, Any]]] = Field(..., description="Either a dictionary or a list of dictionaries.") # document_1 = Document( # page_content="I had chocolate chip pancakes and scrambled eggs for breakfast this morning.", # metadata={"source": "tweet"}, # ) # document_2 = Document( # page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees Fahrenheit.", # metadata={"source": "news"}, # ) # document_3 = Document( # page_content="Building an exciting new project with LangChain - come check it out!", # metadata={"source": "tweet"}, # ) # document_4 = Document( # page_content="Robbers broke into the city bank and stole $1 million in cash.", # metadata={"source": "news"}, # ) # document_5 = Document( # page_content="Wow! That was an amazing movie. I can't wait to see it again.", # metadata={"source": "tweet"}, # ) # document_6 = Document( # page_content="Is the new iPhone worth the price? Read this review to find out.", # metadata={"source": "website"}, # ) # document_7 = Document( # page_content="The top 10 soccer players in the world right now.", # metadata={"source": "website"}, # ) # document_8 = Document( # page_content="LangGraph is the best framework for building stateful, agentic applications!", # metadata={"source": "tweet"}, # ) # document_9 = Document( # page_content="The stock market is down 500 points today due to fears of a recession.", # metadata={"source": "news"}, # ) # document_10 = Document( # page_content="I have a bad feeling I am going to get deleted :(", # metadata={"source": "tweet"}, # ) # documents = [ # document_1, # document_2, # document_3, # document_4, # document_5, # document_6, # document_7, # document_8, # document_9, # document_10, # ] # uuids = [str(uuid4()) for _ in range(len(documents))] # docs = documents # from uuid import uuid4 # from langchain.schema import Document document_1 = Document( page_content="Aduan: Saya tidak bisa login ke sistem e-learning.\nJawaban: Kami menemukan bahwa akun Anda terkunci setelah tiga kali gagal login. Kami telah membuka kunci akun dan menyarankan Anda untuk melakukan reset password menggunakan fitur 'Lupa Kata Sandi'.", metadata={"source": "Aduan"}, ) document_2 = Document( page_content="Request: Mohon bantuannya untuk mendapatkan akses ke folder tim di server.\nJawaban: Kami telah menambahkan akun Anda ke grup pengguna 'Tim IT' di Active Directory. Akses ke folder sekarang dapat dilakukan setelah Anda login ulang.", metadata={"source": "Request"}, ) document_3 = Document( page_content="Incident: Laporan printer di lantai 3 tidak bisa mencetak.\nJawaban: Kami melakukan restart pada spooler service di perangkat printer dan membersihkan antrian cetak yang bermasalah. Printer sudah kembali normal.", metadata={"source": "Incident"}, ) document_4 = Document( page_content="Aduan: Email saya sering masuk ke folder spam penerima.\nJawaban: Kami periksa konfigurasi SPF, DKIM, dan DMARC pada domain Anda. Ternyata ada konfigurasi SPF yang tidak lengkap. Kami telah memperbaikinya dan hasil pengujian sudah menunjukkan pengiriman email berjalan normal.", metadata={"source": "Aduan"}, ) document_5 = Document( page_content="Request: Saya membutuhkan instalasi software AutoCAD untuk proyek desain.\nJawaban: Kami telah mengunduh versi terbaru dari situs resmi AutoDesk dan melakukan instalasi di laptop Anda. Lisensi telah diaktivasi menggunakan akun universitas.", metadata={"source": "Request"}, ) document_6 = Document( page_content="Incident: Sistem ERP tidak bisa mengakses modul keuangan sejak pagi.\nJawaban: Kami temukan bahwa service database MySQL berhenti secara tiba-tiba. Service telah kami nyalakan kembali dan modul keuangan kini dapat diakses kembali.", metadata={"source": "Incident"}, ) document_7 = Document( page_content="Aduan: Aplikasi mobile sering crash saat dibuka.\nJawaban: Kami analisis log error dan menemukan bug pada fitur notifikasi. Kami telah melakukan patch pada versi 1.2.3 dan memperbarui aplikasi Anda melalui MDM.", metadata={"source": "Aduan"}, ) document_8 = Document( page_content="Request: Mohon dibuatkan email dinas baru untuk staf baru di departemen HR.\nJawaban: Email telah dibuat dengan format nama.staf@institusi.ac.id dan password default. Informasi login telah kami kirimkan melalui email pribadi yang terdaftar.", metadata={"source": "Request"}, ) document_9 = Document( page_content="Incident: Koneksi internet putus-putus di gedung B.\nJawaban: Kami lakukan pengecekan router dan mengganti kabel jaringan yang rusak di lantai 2. Koneksi kini stabil dan normal.", metadata={"source": "Incident"}, ) document_10 = Document( page_content="Aduan: Layar laptop saya berkedip-kedip.\nJawaban: Masalah disebabkan oleh driver grafis yang tidak kompatibel. Kami telah menginstal versi driver yang sesuai dengan perangkat Anda dan masalah layar sudah tidak muncul lagi.", metadata={"source": "Aduan"}, ) documents = [ document_1, document_2, document_3, document_4, document_5, document_6, document_7, document_8, document_9, document_10, ] uuids = [str(uuid4()) for _ in range(len(documents))] docs = documents # sparse_embeddings = FastEmbedSparse(model_name="Qdrant/bm25") sparse_embeddings = FastEmbedEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2") client = QdrantClient(path="tmp/langchain_qdrant") # Create a collection with sparse vectors client.create_collection( collection_name="my_documents", # vectors_config={"dense": VectorParams(size=3072, distance=Distance.COSINE)}, # sparse_vectors_config={ # "sparse": SparseVectorParams(index=models.SparseIndexParams(on_disk=False)) # }, ) qdrant = QdrantVectorStore( client=client, collection_name="my_documents", sparse_embedding=sparse_embeddings, # retrieval_mode=RetrievalMode.SPARSE, # sparse_vector_name="sparse", ) qdrant.add_documents(documents=documents, ids=uuids) app = FastAPI() @app.get("/get_data") def get_data(query: str): # query = "How much money did the robbers steal?" found_docs = [x.model_dump() for x in qdrant.similarity_search(query)] for doc in found_docs: doc.pop("id", None) # key = for k in list(doc["metadata"].keys()): if k[0] == "_": doc["metadata"].pop(k) return { "data": found_docs } @app.post("/add_data") def add_data(data: Data): global qdrant if isinstance(data.items, dict): qdrant.add_documents(documents=[Document(**data.items)]) else: qdrant.add_documents(documents=[Document(**x.items) for x in data]) return {"message":"Create data successfully!", "status_code":201} @app.get("/") def greet_json(): return {"Hello": "World!"}