import streamlit as st import os import tempfile from langchain_community.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.vectorstores import FAISS from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_community.chat_models import ChatOllama from langchain.chains import RetrievalQA from langchain.prompts import PromptTemplate from langchain_core.runnables import RunnablePassthrough from langchain_core.output_parsers import StrOutputParser import base64 # Set page config st.set_page_config( page_title="EduQuery - Smart PDF Assistant", page_icon="📚", layout="wide", initial_sidebar_state="collapsed" ) # Custom CSS for colorful UI def local_css(file_name): with open(file_name) as f: st.markdown(f'', unsafe_allow_html=True) local_css("style.css") # Header with gradient st.markdown("""

📚 EduQuery

Smart PDF Assistant for Students

""", unsafe_allow_html=True) # Initialize session state if "vector_store" not in st.session_state: st.session_state.vector_store = None if "messages" not in st.session_state: st.session_state.messages = [] # Model selection MODEL_NAME = "nous-hermes2" # Best open-source model for instruction following # PDF Processing def process_pdf(pdf_file): with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file: tmp_file.write(pdf_file.getvalue()) tmp_path = tmp_file.name loader = PyPDFLoader(tmp_path) docs = loader.load() text_splitter = RecursiveCharacterTextSplitter( chunk_size=1000, chunk_overlap=200, length_function=len ) chunks = text_splitter.split_documents(docs) embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5") vector_store = FAISS.from_documents(chunks, embeddings) os.unlink(tmp_path) return vector_store # RAG Setup def setup_qa_chain(vector_store): llm = ChatOllama(model=MODEL_NAME, temperature=0.3) custom_prompt = """ You are an expert academic assistant. Answer the question based only on the following context: {context} Question: {question} Provide a clear, concise answer with page number references. If unsure, say "I couldn't find this information in the document". """ prompt = PromptTemplate( template=custom_prompt, input_variables=["context", "question"] ) retriever = vector_store.as_retriever(search_kwargs={"k": 3}) qa_chain = ( {"context": retriever, "question": RunnablePassthrough()} | prompt | llm | StrOutputParser() ) return qa_chain # Generate questions from chapter def generate_chapter_questions(vector_store, chapter_title): llm = ChatOllama(model=MODEL_NAME, temperature=0.7) prompt = PromptTemplate( input_variables=["chapter_title"], template=""" You are an expert educator. Generate 5 important questions and answers about '{chapter_title}' that would help students understand key concepts. Format as: Q1: [Question] A1: [Answer with page reference] Q2: [Question] A2: [Answer with page reference] ...""" ) chain = prompt | llm | StrOutputParser() return chain.invoke({"chapter_title": chapter_title}) # File upload section st.subheader("📤 Upload Your Textbook/Notes") uploaded_file = st.file_uploader("", type="pdf", accept_multiple_files=False) if uploaded_file: with st.spinner("Processing PDF..."): st.session_state.vector_store = process_pdf(uploaded_file) st.success("PDF processed successfully! You can now ask questions.") # Main content columns col1, col2 = st.columns([1, 2]) # Chapter-based Q&A Generator with col1: st.subheader("🔍 Generate Chapter Questions") chapter_title = st.text_input("Enter chapter title/section name:") if st.button("Generate Q&A") and chapter_title and st.session_state.vector_store: with st.spinner(f"Generating questions about {chapter_title}..."): questions = generate_chapter_questions( st.session_state.vector_store, chapter_title ) st.markdown(f"
{questions}
", unsafe_allow_html=True) elif chapter_title and not st.session_state.vector_store: st.warning("Please upload a PDF first") # Chat interface with col2: st.subheader("💬 Ask Anything About the Document") for message in st.session_state.messages: with st.chat_message(message["role"]): st.markdown(message["content"]) if prompt := st.chat_input("Your question..."): if not st.session_state.vector_store: st.warning("Please upload a PDF first") st.stop() st.session_state.messages.append({"role": "user", "content": prompt}) with st.chat_message("user"): st.markdown(prompt) with st.chat_message("assistant"): with st.spinner("Thinking..."): qa_chain = setup_qa_chain(st.session_state.vector_store) response = qa_chain.invoke(prompt) st.markdown(response) st.session_state.messages.append({"role": "assistant", "content": response}) # Footer st.markdown("---") st.markdown( """ """, unsafe_allow_html=True )