Spaces:

masadonline
/

Quasa

Sleeping

App Files Files Community

masadonline commited on May 18

Commit

d186b8d

verified ·

1 Parent(s): f4e7b4f

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -18

app.py CHANGED Viewed

@@ -3,11 +3,10 @@ from PyPDF2 import PdfReader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.vectorstores import FAISS
-from langchain.llms import HuggingFaceHub
-from langchain.chains import RetrievalQAWithSourcesChain
 import pandas as pd
 import os
 import io
 # --- 1. Data Loading and Preprocessing ---
@@ -48,44 +47,85 @@ def create_vectorstore(chunks):
     vectorstore = FAISS.from_texts(chunks, embeddings)
     return vectorstore
-# --- 2. Question Answering with RAG ---
-@st.cache_resource()
-def setup_llm():
-    """Sets up the Hugging Face Hub LLM."""
-    llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature": 0.5, "max_length": 512})
-    return llm
-def perform_rag(vectorstore, llm, query):
-    """Performs retrieval-augmented generation."""
-    qa_chain = RetrievalQAWithSourcesChain.from_llm(llm, retriever=vectorstore.as_retriever())
-    result = qa_chain({"question": query})
-    return result
 # --- 3. Streamlit UI ---
 def main():
-    st.title("PDF Q&A with Local Docs")
     st.info("Make sure you have a 'docs' folder in the same directory as this script containing your PDF files.")
     with st.spinner("Loading and processing PDF(s)..."):
         all_text, all_tables = load_and_process_pdfs_from_folder()
     if all_text:
         with st.spinner("Creating knowledge base..."):
             chunks = split_text_into_chunks(all_text)
-            vectorstore = create_vectorstore(chunks)
-            llm = setup_llm()
         query = st.text_input("Ask a question about the documents:")
         if query:
             with st.spinner("Searching for answer..."):
-                result = perform_rag(vectorstore, llm, query)
                 st.subheader("Answer:")
                 st.write(result["answer"])
                 if "sources" in result:
                     st.subheader("Source:")
-                    st.write(result["sources"])
     if all_tables:
         st.subheader("Extracted Tables:")

 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.vectorstores import FAISS
 import pandas as pd
 import os
 import io
+import requests
 # --- 1. Data Loading and Preprocessing ---
     vectorstore = FAISS.from_texts(chunks, embeddings)
     return vectorstore
+# --- 2. Question Answering with Groq ---
+def generate_answer_with_groq(question, context):
+    """Generates an answer using the Groq API."""
+    url = "https://api.groq.com/openai/v1/chat/completions"
+    api_key = os.environ.get("GROQ_API_KEY")
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json",
+    }
+    prompt = (
+        f"Customer asked: '{question}'\n\n"
+        f"Here is the relevant product or policy info to help:\n{context}\n\n"
+        f"Respond in a friendly and helpful tone as a toy shop support agent."
+    )
+    payload = {
+        "model": "llama3-8b-8192",
+        "messages": [
+            {
+                "role": "system",
+                "content": (
+                    "You are ToyBot, a friendly and helpful WhatsApp assistant for an online toy shop. "
+                    "Your goal is to politely answer customer questions, help them choose the right toys, "
+                    "provide order or delivery information, explain return policies, and guide them through purchases."
+                )
+            },
+            {"role": "user", "content": prompt},
+        ],
+        "temperature": 0.5,
+        "max_tokens": 300,
+    }
+    try:
+        response = requests.post(url, headers=headers, json=payload)
+        response.raise_for_status()  # Raise an exception for bad status codes
+        return response.json()['choices'][0]['message']['content'].strip()
+    except requests.exceptions.RequestException as e:
+        st.error(f"Error communicating with Groq API: {e}")
+        return "An error occurred while trying to get the answer."
+def perform_rag_groq(vectorstore, query):
+    """Performs retrieval and generates an answer using Groq."""
+    retriever = vectorstore.as_retriever()
+    relevant_docs = retriever.get_relevant_documents(query)
+    context = "\n\n".join([doc.page_content for doc in relevant_docs])
+    answer = generate_answer_with_groq(query, context)
+    return {"answer": answer, "sources": [doc.metadata['source'] for doc in relevant_docs]} # You might need to adjust how sources are stored
 # --- 3. Streamlit UI ---
 def main():
+    st.title("PDF Q&A with Local Docs (Powered by Groq)")
     st.info("Make sure you have a 'docs' folder in the same directory as this script containing your PDF files.")
+    groq_api_key = st.text_input("Enter your Groq API Key:", type="password")
+    if not groq_api_key:
+        st.warning("Please enter your Groq API key to ask questions.")
+        return
+    os.environ["GROQ_API_KEY"] = groq_api_key
     with st.spinner("Loading and processing PDF(s)..."):
         all_text, all_tables = load_and_process_pdfs_from_folder()
     if all_text:
         with st.spinner("Creating knowledge base..."):
             chunks = split_text_into_chunks(all_text)
+            # We need to add metadata (source) to the chunks for accurate source tracking
+            metadatas = [{"source": f"doc_{i+1}"} for i in range(len(chunks))] # Basic source tracking
+            embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
+            vectorstore = FAISS.from_texts(chunks, embeddings, metadatas=metadatas)
         query = st.text_input("Ask a question about the documents:")
         if query:
             with st.spinner("Searching for answer..."):
+                result = perform_rag_groq(vectorstore, query)
                 st.subheader("Answer:")
                 st.write(result["answer"])
                 if "sources" in result:
                     st.subheader("Source:")
+                    st.write(", ".join(result["sources"])) # Display sources
     if all_tables:
         st.subheader("Extracted Tables:")