Spaces:

Sebbe33
/

complex_rag

Sleeping

App Files Files Community

Sebbe33 commited on Mar 8

Commit

3f48d1f

verified ·

1 Parent(s): fb3f158

Create app.py

Browse files

Files changed (1) hide show

app.py +80 -0

app.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import os
+import chainlit as cl
+from dotenv import load_dotenv
+# LangChain imports for retrieval and generation
+from langchain.document_loaders import WebBaseLoader
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.embeddings import OpenAIEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.chains import RetrievalQA
+from langchain.llms import OpenAI
+# Load environment variables (e.g., OPENAI_API_KEY)
+load_dotenv()
+# Global variable to store our QA chain.
+qa_chain = None
+@cl.on_chat_start
+async def start_chat():
+    """
+    When the chat starts, load the document using WebBaseLoader, split it into chunks,
+    create embeddings, build a vector store, and finally initialize a RetrievalQA chain.
+    This chain will serve as the backend for our RAG system.
+    """
+    global qa_chain
+    # URL to crawl (German Wikipedia page on Künstliche Intelligenz)
+    url = "https://de.wikipedia.org/wiki/K%C3%BCnstliche_Intelligenz"
+    # Retrieve the document from the webpage
+    loader = WebBaseLoader(url)
+    documents = loader.load()  # returns a list of Document objects
+    # Split the document into manageable chunks for better retrieval
+    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+    docs = text_splitter.split_documents(documents)
+    # Create embeddings (make sure your OPENAI_API_KEY is set in your environment)
+    embeddings = OpenAIEmbeddings()
+    # Build a vector store from the documents using FAISS
+    vectorstore = FAISS.from_documents(docs, embeddings)
+    # Configure the retriever: retrieve the top 3 most relevant chunks
+    retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
+    # Set up the language model (using OpenAI LLM here) with desired parameters
+    llm = OpenAI(temperature=0)
+    # Create a RetrievalQA chain that first retrieves relevant context and then generates an answer.
+    qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
+    await cl.Message(
+        content="✅ Document loaded and processed successfully! "
+                "You can now ask me questions about 'Künstliche Intelligenz'."
+    ).send()
+@cl.on_message
+async def process_question(message: cl.Message):
+    """
+    When a message is received, use the QA chain to process the query. The chain:
+    1. Retrieves relevant document chunks.
+    2. Augments your query with the retrieved context.
+    3. Generates an answer via the language model.
+    """
+    global qa_chain
+    if qa_chain is None:
+        await cl.Message(content="❌ The document has not been loaded yet.").send()
+        return
+    # Get the user's query
+    query = message.content.strip()
+    # Process the query using the RetrievalQA chain
+    result = qa_chain.run(query)
+    # Send the answer back to the user
+    await cl.Message(content=result).send()