import streamlit as st import os import tempfile from langchain_community.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.vectorstores import FAISS from langchain_community.embeddings import HuggingFaceEmbeddings from langchain.chains import ConversationalRetrievalChain from langchain.memory import ConversationBufferMemory from langchain_community.llms import HuggingFaceHub import base64 # Set page config with light purple theme st.set_page_config( page_title="EduQuery - Smart PDF Assistant", page_icon="📚", layout="wide", initial_sidebar_state="collapsed" ) # Embedded CSS for light purple UI st.markdown(""" """, unsafe_allow_html=True) # Header with gradient st.markdown(""" """, unsafe_allow_html=True) # Initialize session state if "vector_store" not in st.session_state: st.session_state.vector_store = None if "chat_history" not in st.session_state: st.session_state.chat_history = [] if "qa_chain" not in st.session_state: st.session_state.qa_chain = None # PDF Processing def process_pdf(pdf_file): with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file: tmp_file.write(pdf_file.getvalue()) tmp_path = tmp_file.name loader = PyPDFLoader(tmp_path) pages = loader.load_and_split() text_splitter = RecursiveCharacterTextSplitter( chunk_size=800, chunk_overlap=150 ) chunks = text_splitter.split_documents(pages) embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") vector_store = FAISS.from_documents(chunks, embeddings) os.unlink(tmp_path) return vector_store # Setup QA Chain def setup_qa_chain(vector_store): # Use Mistral-7B from Hugging Face Hub repo_id = "mistralai/Mistral-7B-Instruct-v0.1" llm = HuggingFaceHub( repo_id=repo_id, model_kwargs={"temperature": 0.5, "max_new_tokens": 500} ) memory = ConversationBufferMemory( memory_key="chat_history", return_messages=True ) qa_chain = ConversationalRetrievalChain.from_llm( llm=llm, retriever=vector_store.as_retriever(search_kwargs={"k": 3}), memory=memory, chain_type="stuff" ) return qa_chain # File upload section st.markdown("""

📤 Upload Your Textbook/Notes

""", unsafe_allow_html=True) uploaded_file = st.file_uploader("", type="pdf", accept_multiple_files=False, label_visibility="collapsed") st.markdown("

", unsafe_allow_html=True) if uploaded_file: with st.spinner("Processing PDF..."): st.session_state.vector_store = process_pdf(uploaded_file) st.session_state.qa_chain = setup_qa_chain(st.session_state.vector_store) st.success("PDF processed successfully! You can now ask questions.") # Chat interface st.markdown("""

💬 Ask Anything About the Document

""", unsafe_allow_html=True) # Display chat history for message in st.session_state.chat_history: with st.chat_message(message["role"]): st.markdown(message["content"]) # User input if prompt := st.chat_input("Your question..."): if not st.session_state.vector_store: st.warning("Please upload a PDF first") st.stop() # Add user message to chat history st.session_state.chat_history.append({"role": "user", "content": prompt}) with st.chat_message("user"): st.markdown(prompt) # Get assistant response with st.chat_message("assistant"): with st.spinner("Thinking..."): response = st.session_state.qa_chain({"question": prompt}) answer = response["answer"] st.markdown(answer) # Add assistant response to chat history st.session_state.chat_history.append({"role": "assistant", "content": answer}) st.markdown("

", unsafe_allow_html=True) # Footer st.markdown(""" """, unsafe_allow_html=True)

📚 EduQuery

📤 Upload Your Textbook/Notes

💬 Ask Anything About the Document