# Import necessary libraries import os import json import time import re import base64 from datetime import datetime from io import BytesIO import numpy as np import pandas as pd import gradio as gr from gradio import ChatMessage from gradio_modal import Modal from sentence_transformers import CrossEncoder from azure.storage.fileshare import ShareServiceClient # Import custom modules from climateqa.engine.embeddings import get_embeddings_function from climateqa.engine.llm import get_llm from climateqa.engine.vectorstore import get_pinecone_vectorstore from climateqa.engine.reranker import get_reranker from climateqa.sample_questions import QUESTIONS from climateqa.constants import POSSIBLE_REPORTS from climateqa.utils import get_image_from_azure_blob_storage from climateqa.engine.graph import make_graph_agent from climateqa.engine.chains.retrieve_papers import find_papers from front.utils import serialize_docs, process_figures from climateqa.event_handler import ( init_audience, handle_retrieved_documents, stream_answer, handle_retrieved_owid_graphs ) from utils import create_user_id # Load environment variables in local mode try: from dotenv import load_dotenv load_dotenv() except Exception as e: pass # Set up Gradio Theme theme = gr.themes.Base( primary_hue="blue", secondary_hue="red", font=[gr.themes.GoogleFont("Poppins"), "ui-sans-serif", "system-ui", "sans-serif"], ) # Initialize prompt and system template init_prompt = """ Hello, I am ClimateQ&A, a conversational assistant designed to help you understand climate change and biodiversity loss. I will answer your questions by **sifting through the IPCC and IPBES scientific reports**. ❓ How to use - **Language**: You can ask me your questions in any language. - **Audience**: You can specify your audience (children, general public, experts) to get a more adapted answer. - **Sources**: You can choose to search in the IPCC or IPBES reports, or both. - **Relevant content sources**: You can choose to search for figures, papers, or graphs that can be relevant for your question. ⚠️ Limitations *Please note that the AI is not perfect and may sometimes give irrelevant answers. If you are not satisfied with the answer, please ask a more specific question or report your feedback to help us improve the system.* 🛈 Information Please note that we log your questions for meta-analysis purposes, so avoid sharing any sensitive or personal information. What do you want to learn ? """ # Azure Blob Storage credentials account_key = os.environ["BLOB_ACCOUNT_KEY"] if len(account_key) == 86: account_key += "==" credential = { "account_key": account_key, "account_name": os.environ["BLOB_ACCOUNT_NAME"], } account_url = os.environ["BLOB_ACCOUNT_URL"] file_share_name = "climateqa" service = ShareServiceClient(account_url=account_url, credential=credential) share_client = service.get_share_client(file_share_name) user_id = create_user_id() # Citation information CITATION_LABEL = "BibTeX citation for ClimateQ&A" CITATION_TEXT = r"""@misc{climateqa, author={Théo Alves Da Costa, Timothée Bohe}, title={ClimateQ&A, AI-powered conversational assistant for climate change and biodiversity loss}, year={2024}, howpublished= {\url{https://climateqa.com}}, } @software{climateqa, author = {Théo Alves Da Costa, Timothée Bohe}, publisher = {ClimateQ&A}, title = {ClimateQ&A, AI-powered conversational assistant for climate change and biodiversity loss}, } """ # Create vectorstore and retriever embeddings_function = get_embeddings_function() vectorstore = get_pinecone_vectorstore(embeddings_function, index_name=os.getenv("PINECONE_API_INDEX")) vectorstore_graphs = get_pinecone_vectorstore(embeddings_function, index_name=os.getenv("PINECONE_API_INDEX_OWID"), text_key="description") llm = get_llm(provider="openai", max_tokens=1024, temperature=0.0) reranker = get_reranker("nano") agent = make_graph_agent(llm=llm, vectorstore_ipcc=vectorstore, vectorstore_graphs=vectorstore_graphs, reranker=reranker) # Function to update modal visibility def update_config_modal_visibility(config_open): new_config_visibility_status = not config_open return gr.update(visible=new_config_visibility_status), new_config_visibility_status # Main chat function # async def chat(query, history, audience, sources, reports, relevant_content_sources, search_only): async def chat( query: str, history: list[ChatMessage], audience: str, sources: list[str], reports: list[str], relevant_content_sources: list[str], search_only: bool ) -> tuple[list, str, str, str, list, str]: """Process a chat query and return response with relevant sources and visualizations. Args: query (str): The user's question history (list): Chat message history audience (str): Target audience type sources (list): Knowledge base sources to search reports (list): Specific reports to search within sources relevant_content_sources (list): Types of content to retrieve (figures, papers, etc) search_only (bool): Whether to only search without generating answer Yields: tuple: Contains: - history: Updated chat history - docs_html: HTML of retrieved documents - output_query: Processed query - output_language: Detected language - related_contents: Related content - graphs_html: HTML of relevant graphs """ # Log incoming question date_now = datetime.now().strftime("%Y-%m-%d %H:%M:%S") print(f">> NEW QUESTION ({date_now}) : {query}") audience_prompt = init_audience(audience) sources = sources or ["IPCC", "IPBES", "IPOS"] reports = reports or [] # Prepare inputs for agent inputs = { "user_input": query, "audience": audience_prompt, "sources_input": sources, "relevant_content_sources": relevant_content_sources, "search_only": search_only } # Get streaming events from agent result = agent.astream_events(inputs, version="v1") # Initialize state variables docs = [] used_figures = [] related_contents = [] docs_html = "" output_query = "" output_language = "" output_keywords = "" start_streaming = False graphs_html = "" figures = '