Spaces:

sunbal7
/

PDFQueryApplication

Sleeping

App Files Files Community

sunbal7 commited on Jun 20

Commit

ba3ef77

verified ·

1 Parent(s): 6c9740a

Update app.py

Browse files

Files changed (1) hide show

app.py +130 -124

app.py CHANGED Viewed

@@ -5,14 +5,12 @@ from langchain_community.document_loaders import PyPDFLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import FAISS
 from langchain_community.embeddings import HuggingFaceEmbeddings
-from langchain_community.chat_models import ChatOllama
-from langchain.chains import RetrievalQA
-from langchain.prompts import PromptTemplate
-from langchain_core.runnables import RunnablePassthrough
-from langchain_core.output_parsers import StrOutputParser
 import base64
-# Set page config
 st.set_page_config(
     page_title="EduQuery - Smart PDF Assistant",
     page_icon="📚",
@@ -20,11 +18,19 @@ st.set_page_config(
     initial_sidebar_state="collapsed"
 )
-# Embedded CSS for colorful UI
 st.markdown("""
 <style>
 body {
-    background-color: #f0f2f6;
 }
 .stApp {
@@ -34,37 +40,47 @@ body {
 }
 .header {
-    background: linear-gradient(135deg, #6a11cb 0%, #2575fc 100%);
     color: white;
     padding: 2rem;
     border-radius: 15px;
     margin-bottom: 2rem;
     text-align: center;
 }
 .header h1 {
-    font-size: 2.5rem;
     margin-bottom: 0.5rem;
 }
 .stButton>button {
-    background: linear-gradient(135deg, #6a11cb 0%, #2575fc 100%);
     color: white;
     border: none;
     border-radius: 25px;
-    padding: 0.5rem 1.5rem;
     font-weight: bold;
     transition: all 0.3s ease;
 }
 .stButton>button:hover {
     transform: scale(1.05);
-    box-shadow: 0 5px 15px rgba(0,0,0,0.1);
 }
 .stTextInput>div>div>input {
     border-radius: 25px;
-    padding: 0.75rem 1.5rem;
 }
 .stChatMessage {
@@ -72,31 +88,55 @@ body {
     border-radius: 20px;
     margin-bottom: 1rem;
     max-width: 80%;
 }
 .stChatMessage[data-testid="user"] {
-    background: linear-gradient(135deg, #43e97b 0%, #38f9d7 100%);
     margin-left: auto;
 }
 .stChatMessage[data-testid="assistant"] {
-    background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%);
     margin-right: auto;
 }
-.qa-box {
-    background: linear-gradient(135deg, #fff1eb 0%, #ace0f9 100%);
-    padding: 1.5rem;
     border-radius: 15px;
-    margin-top: 1rem;
-    box-shadow: 0 5px 15px rgba(0,0,0,0.05);
 }
 .footer {
     text-align: center;
-    color: #6c757d;
-    padding-top: 1.5rem;
     font-size: 0.9rem;
 }
 </style>
 """, unsafe_allow_html=True)
@@ -112,11 +152,10 @@ st.markdown("""
 # Initialize session state
 if "vector_store" not in st.session_state:
     st.session_state.vector_store = None
-if "messages" not in st.session_state:
-    st.session_state.messages = []
-# Model selection
-MODEL_NAME = "nous-hermes2"  # Best open-source model for instruction following
 # PDF Processing
 def process_pdf(pdf_file):
@@ -125,129 +164,96 @@ def process_pdf(pdf_file):
         tmp_path = tmp_file.name
     loader = PyPDFLoader(tmp_path)
-    docs = loader.load()
     text_splitter = RecursiveCharacterTextSplitter(
-        chunk_size=1000,
-        chunk_overlap=200,
-        length_function=len
     )
-    chunks = text_splitter.split_documents(docs)
-    embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5")
     vector_store = FAISS.from_documents(chunks, embeddings)
     os.unlink(tmp_path)
     return vector_store
-# RAG Setup
 def setup_qa_chain(vector_store):
-    llm = ChatOllama(model=MODEL_NAME, temperature=0.3)
-    custom_prompt = """
-    You are an expert academic assistant. Answer the question based only on the following context:
-    {context}
-    Question: {question}
-    Provide a clear, concise answer with page number references. If unsure, say "I couldn't find this information in the document".
-    """
-    prompt = PromptTemplate(
-        template=custom_prompt,
-        input_variables=["context", "question"]
     )
-    retriever = vector_store.as_retriever(search_kwargs={"k": 3})
-    qa_chain = (
-        {"context": retriever, "question": RunnablePassthrough()}
-        | prompt
-        | llm
-        | StrOutputParser()
     )
-    return qa_chain
-# Generate questions from chapter
-def generate_chapter_questions(vector_store, chapter_title):
-    llm = ChatOllama(model=MODEL_NAME, temperature=0.7)
-    prompt = PromptTemplate(
-        input_variables=["chapter_title"],
-        template="""
-        You are an expert educator. Generate 5 important questions and answers about '{chapter_title}'
-        that would help students understand key concepts. Format as:
-        Q1: [Question]
-        A1: [Answer with page reference]
-        Q2: [Question]
-        A2: [Answer with page reference]
-        ..."""
     )
-    chain = prompt | llm | StrOutputParser()
-    return chain.invoke({"chapter_title": chapter_title})
 # File upload section
-st.subheader("📤 Upload Your Textbook/Notes")
 uploaded_file = st.file_uploader("", type="pdf", accept_multiple_files=False, label_visibility="collapsed")
 if uploaded_file:
     with st.spinner("Processing PDF..."):
         st.session_state.vector_store = process_pdf(uploaded_file)
     st.success("PDF processed successfully! You can now ask questions.")
-# Main content columns
-col1, col2 = st.columns([1, 2])
-# Chapter-based Q&A Generator
-with col1:
-    st.subheader("🔍 Generate Chapter Questions")
-    chapter_title = st.text_input("Enter chapter title/section name:", key="chapter_input")
-    if st.button("Generate Q&A", key="generate_btn") and chapter_title and st.session_state.vector_store:
-        with st.spinner(f"Generating questions about {chapter_title}..."):
-            questions = generate_chapter_questions(
-                st.session_state.vector_store,
-                chapter_title
-            )
-            st.markdown(f"<div class='qa-box'>{questions}</div>", unsafe_allow_html=True)
-    elif chapter_title and not st.session_state.vector_store:
         st.warning("Please upload a PDF first")
-# Chat interface
-with col2:
-    st.subheader("💬 Ask Anything About the Document")
-    for message in st.session_state.messages:
-        with st.chat_message(message["role"]):
-            st.markdown(message["content"])
-    if prompt := st.chat_input("Your question..."):
-        if not st.session_state.vector_store:
-            st.warning("Please upload a PDF first")
-            st.stop()
-        st.session_state.messages.append({"role": "user", "content": prompt})
-        with st.chat_message("user"):
-            st.markdown(prompt)
-        with st.chat_message("assistant"):
-            with st.spinner("Thinking..."):
-                qa_chain = setup_qa_chain(st.session_state.vector_store)
-                response = qa_chain.invoke(prompt)
-            st.markdown(response)
-            st.session_state.messages.append({"role": "assistant", "content": response})
 # Footer
-st.markdown("---")
-st.markdown(
-    """
-    <div class="footer">
-        <p>EduQuery - Helping students learn smarter • Powered by Nous-Hermes2 and LangChain</p>
-    </div>
-    """,
-    unsafe_allow_html=True
-)

 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import FAISS
 from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain.chains import ConversationalRetrievalChain
+from langchain.memory import ConversationBufferMemory
+from langchain_community.llms import HuggingFaceHub
 import base64
+# Set page config with light purple theme
 st.set_page_config(
     page_title="EduQuery - Smart PDF Assistant",
     page_icon="📚",
     initial_sidebar_state="collapsed"
 )
+# Embedded CSS for light purple UI
 st.markdown("""
 <style>
+:root {
+    --primary: #8a4fff;
+    --secondary: #d0bcff;
+    --light: #f3edff;
+    --dark: #4a2b80;
+}
 body {
+    background-color: #f8f5ff;
+    font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
 }
 .stApp {
 }
 .header {
+    background: linear-gradient(135deg, var(--primary) 0%, var(--dark) 100%);
     color: white;
     padding: 2rem;
     border-radius: 15px;
     margin-bottom: 2rem;
     text-align: center;
+    box-shadow: 0 4px 20px rgba(138, 79, 255, 0.2);
 }
 .header h1 {
+    font-size: 2.8rem;
     margin-bottom: 0.5rem;
 }
 .stButton>button {
+    background: linear-gradient(135deg, var(--primary) 0%, var(--dark) 100%);
     color: white;
     border: none;
     border-radius: 25px;
+    padding: 0.75rem 2rem;
     font-weight: bold;
+    font-size: 1rem;
     transition: all 0.3s ease;
+    margin-top: 1rem;
 }
 .stButton>button:hover {
     transform: scale(1.05);
+    box-shadow: 0 5px 15px rgba(138, 79, 255, 0.3);
 }
 .stTextInput>div>div>input {
     border-radius: 25px;
+    padding: 0.9rem 1.5rem;
+    border: 1px solid var(--secondary);
+    background-color: var(--light);
+}
+.stTextInput>div>div>input:focus {
+    border-color: var(--primary);
+    box-shadow: 0 0 0 2px rgba(138, 79, 255, 0.2);
 }
 .stChatMessage {
     border-radius: 20px;
     margin-bottom: 1rem;
     max-width: 80%;
+    box-shadow: 0 4px 12px rgba(0,0,0,0.05);
 }
 .stChatMessage[data-testid="user"] {
+    background: linear-gradient(135deg, #d0bcff 0%, #b8a1ff 100%);
     margin-left: auto;
+    color: #4a2b80;
 }
 .stChatMessage[data-testid="assistant"] {
+    background: linear-gradient(135deg, #e6dcff 0%, #f3edff 100%);
     margin-right: auto;
+    color: #4a2b80;
+    border: 1px solid var(--secondary);
 }
+.upload-area {
+    background: linear-gradient(135deg, #f3edff 0%, #e6dcff 100%);
+    padding: 2rem;
     border-radius: 15px;
+    text-align: center;
+    border: 2px dashed var(--primary);
+    margin-bottom: 2rem;
+}
+.chat-area {
+    background: white;
+    padding: 2rem;
+    border-radius: 15px;
+    box-shadow: 0 4px 20px rgba(138, 79, 255, 0.1);
+    height: 500px;
+    overflow-y: auto;
 }
 .footer {
     text-align: center;
+    color: #8a4fff;
+    padding-top: 2rem;
     font-size: 0.9rem;
+    margin-top: 2rem;
+    border-top: 1px solid var(--secondary);
+}
+.spinner {
+    color: var(--primary) !important;
+}
+.stSpinner > div > div {
+    border-top-color: var(--primary) !important;
 }
 </style>
 """, unsafe_allow_html=True)
 # Initialize session state
 if "vector_store" not in st.session_state:
     st.session_state.vector_store = None
+if "chat_history" not in st.session_state:
+    st.session_state.chat_history = []
+if "qa_chain" not in st.session_state:
+    st.session_state.qa_chain = None
 # PDF Processing
 def process_pdf(pdf_file):
         tmp_path = tmp_file.name
     loader = PyPDFLoader(tmp_path)
+    pages = loader.load_and_split()
     text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=800,
+        chunk_overlap=150
     )
+    chunks = text_splitter.split_documents(pages)
+    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
     vector_store = FAISS.from_documents(chunks, embeddings)
     os.unlink(tmp_path)
     return vector_store
+# Setup QA Chain
 def setup_qa_chain(vector_store):
+    # Use Mistral-7B from Hugging Face Hub
+    repo_id = "mistralai/Mistral-7B-Instruct-v0.1"
+    llm = HuggingFaceHub(
+        repo_id=repo_id,
+        model_kwargs={"temperature": 0.5, "max_new_tokens": 500}
     )
+    memory = ConversationBufferMemory(
+        memory_key="chat_history",
+        return_messages=True
     )
+    qa_chain = ConversationalRetrievalChain.from_llm(
+        llm=llm,
+        retriever=vector_store.as_retriever(search_kwargs={"k": 3}),
+        memory=memory,
+        chain_type="stuff"
     )
+    return qa_chain
 # File upload section
+st.markdown("""
+<div class="upload-area">
+    <h3>📤 Upload Your Textbook/Notes</h3>
+""", unsafe_allow_html=True)
 uploaded_file = st.file_uploader("", type="pdf", accept_multiple_files=False, label_visibility="collapsed")
+st.markdown("</div>", unsafe_allow_html=True)
 if uploaded_file:
     with st.spinner("Processing PDF..."):
         st.session_state.vector_store = process_pdf(uploaded_file)
+        st.session_state.qa_chain = setup_qa_chain(st.session_state.vector_store)
     st.success("PDF processed successfully! You can now ask questions.")
+# Chat interface
+st.markdown("""
+<div class="chat-area">
+    <h3>💬 Ask Anything About the Document</h3>
+""", unsafe_allow_html=True)
+# Display chat history
+for message in st.session_state.chat_history:
+    with st.chat_message(message["role"]):
+        st.markdown(message["content"])
+# User input
+if prompt := st.chat_input("Your question..."):
+    if not st.session_state.vector_store:
         st.warning("Please upload a PDF first")
+        st.stop()
+    # Add user message to chat history
+    st.session_state.chat_history.append({"role": "user", "content": prompt})
+    with st.chat_message("user"):
+        st.markdown(prompt)
+    # Get assistant response
+    with st.chat_message("assistant"):
+        with st.spinner("Thinking..."):
+            response = st.session_state.qa_chain({"question": prompt})
+            answer = response["answer"]
+        st.markdown(answer)
+    # Add assistant response to chat history
+    st.session_state.chat_history.append({"role": "assistant", "content": answer})
+st.markdown("</div>", unsafe_allow_html=True)
 # Footer
+st.markdown("""
+<div class="footer">
+    <p>EduQuery - Helping students learn smarter • Powered by Mistral-7B and LangChain</p>
+</div>
+""", unsafe_allow_html=True)