Spaces:

bainskarman
/

AllAboutRAG

Build error

App Files Files Community

bainskarman commited on Mar 13, 2025

Commit

5e06280

verified ·

1 Parent(s): d2c0564

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -7

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import streamlit as st
 import os
 import requests
 from langdetect import detect
 # Load the Hugging Face token from environment variables (secrets)
 token = os.environ.get("Key2")  # Replace "KEY2" with your secret key name
@@ -33,6 +34,32 @@ def detect_language(text):
     except:
         return "en"  # Default to English if detection fails
 # Streamlit App
 def main():
     st.title("RAG Model with Advanced Query Translation and Indexing")
@@ -41,6 +68,10 @@ def main():
     # Sidebar for options
     st.sidebar.title("Options")
     # Query Translation Options
     st.sidebar.header("Query Translation")
     query_translation = st.sidebar.selectbox(
@@ -63,7 +94,7 @@ def main():
     # System Prompt
     st.sidebar.header("System Prompt")
-    default_system_prompt = "You are a helpful assistant."
     system_prompt = st.sidebar.text_area("System Prompt", default_system_prompt)
     # Main Content
@@ -79,11 +110,14 @@ def main():
         # Query Translation
         if st.button("Apply Query Translation"):
             st.write(f"**Applied Query Translation Method:** {query_translation}")
-            # Implement query translation logic here
-            # Example: Generate multiple queries for Multi-Query
-            if query_translation == "Multi-Query":
-                queries = [f"{prompt} - Query {i}" for i in range(3)]
-                st.write("**Generated Queries:**", queries)
         # Indexing
         if st.button("Apply Indexing"):
@@ -93,11 +127,17 @@ def main():
             if indexing_method == "ColBERT":
                 st.write("Indexing with ColBERT...")
-        # Query the Hugging Face API
         if st.button("Generate Response"):
             response = query_huggingface_api(prompt, max_new_tokens, temperature, top_k)
             if response:
                 st.write("**Response:**", response)
 if __name__ == "__main__":
     main()

 import os
 import requests
 from langdetect import detect
+from PyPDF2 import PdfReader
 # Load the Hugging Face token from environment variables (secrets)
 token = os.environ.get("Key2")  # Replace "KEY2" with your secret key name
     except:
         return "en"  # Default to English if detection fails
+# Function to extract text from PDF
+def extract_text_from_pdf(pdf_file):
+    pdf_reader = PdfReader(pdf_file)
+    text = ""
+    for page in pdf_reader.pages:
+        text += page.extract_text()
+    return text
+# Default system prompts for each query translation method
+DEFAULT_SYSTEM_PROMPTS = {
+    "Multi-Query": """You are an AI language model assistant. Your task is to generate five
+different versions of the given user question to retrieve relevant documents from a vector
+database. By generating multiple perspectives on the user question, your goal is to help
+the user overcome some of the limitations of the distance-based similarity search.
+Provide these alternative questions separated by newlines. Original question: {question}""",
+    "RAG Fusion": """You are an AI language model assistant. Your task is to combine multiple
+queries into a single, refined query to improve retrieval accuracy. Original question: {question}""",
+    "Decomposition": """You are an AI language model assistant. Your task is to break down
+the given user question into simpler sub-questions. Provide these sub-questions separated
+by newlines. Original question: {question}""",
+    "Step Back": """You are an AI language model assistant. Your task is to refine the given
+user question by taking a step back and asking a more general question. Original question: {question}""",
+    "HyDE": """You are an AI language model assistant. Your task is to generate a hypothetical
+document that would be relevant to the given user question. Original question: {question}""",
+}
 # Streamlit App
 def main():
     st.title("RAG Model with Advanced Query Translation and Indexing")
     # Sidebar for options
     st.sidebar.title("Options")
+    # PDF Upload
+    st.sidebar.header("Upload PDF")
+    pdf_file = st.sidebar.file_uploader("Upload a PDF file", type="pdf")
     # Query Translation Options
     st.sidebar.header("Query Translation")
     query_translation = st.sidebar.selectbox(
     # System Prompt
     st.sidebar.header("System Prompt")
+    default_system_prompt = DEFAULT_SYSTEM_PROMPTS[query_translation]
     system_prompt = st.sidebar.text_area("System Prompt", default_system_prompt)
     # Main Content
         # Query Translation
         if st.button("Apply Query Translation"):
             st.write(f"**Applied Query Translation Method:** {query_translation}")
+            # Format the system prompt with the user's question
+            formatted_prompt = system_prompt.format(question=prompt)
+            st.write("**Formatted System Prompt:**", formatted_prompt)
+            # Query the Hugging Face API for query translation
+            translated_queries = query_huggingface_api(formatted_prompt, max_new_tokens, temperature, top_k)
+            if translated_queries:
+                st.write("**Translated Queries:**", translated_queries)
         # Indexing
         if st.button("Apply Indexing"):
             if indexing_method == "ColBERT":
                 st.write("Indexing with ColBERT...")
+        # Query the Hugging Face API for final response
         if st.button("Generate Response"):
             response = query_huggingface_api(prompt, max_new_tokens, temperature, top_k)
             if response:
                 st.write("**Response:**", response)
+    # Display PDF text if uploaded
+    if pdf_file is not None:
+        st.header("PDF Content")
+        pdf_text = extract_text_from_pdf(pdf_file)
+        st.write(pdf_text)
 if __name__ == "__main__":
     main()