import streamlit as st from streamlit_option_menu import option_menu import fitz # PyMuPDF from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_community.vectorstores import FAISS from langchain_community.llms import HuggingFaceHub from langchain.chains import RetrievalQA import tempfile import os import base64 # Page configuration st.set_page_config( page_title="PDF Study Assistant", page_icon="📚", layout="wide", initial_sidebar_state="collapsed" ) # Custom CSS for colorful design st.markdown(""" """, unsafe_allow_html=True) # Initialize session state if 'pdf_processed' not in st.session_state: st.session_state.pdf_processed = False if 'qa_chain' not in st.session_state: st.session_state.qa_chain = None if 'pages' not in st.session_state: st.session_state.pages = [] # Load models with caching @st.cache_resource def load_embedding_model(): return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") @st.cache_resource def load_qa_model(): return HuggingFaceHub( repo_id="google/flan-t5-xxl", model_kwargs={"temperature": 0.5, "max_length": 512}, huggingfacehub_api_token=os.getenv("HF_API_KEY") ) def process_pdf(pdf_file): """Extract text from PDF and create vector store""" with st.spinner("📖 Reading PDF..."): doc = fitz.open(stream=pdf_file.read(), filetype="pdf") text = "" st.session_state.pages = [] for page in doc: text += page.get_text() st.session_state.pages.append(page.get_text()) with st.spinner("🔍 Processing text..."): text_splitter = RecursiveCharacterTextSplitter( chunk_size=1000, chunk_overlap=200, length_function=len ) chunks = text_splitter.split_text(text) embeddings = load_embedding_model() vector_store = FAISS.from_texts(chunks, embeddings) qa_model = load_qa_model() st.session_state.qa_chain = RetrievalQA.from_chain_type( llm=qa_model, chain_type="stuff", retriever=vector_store.as_retriever(search_kwargs={"k": 3}), return_source_documents=True ) st.session_state.pdf_processed = True st.success("✅ PDF processed successfully!") def generate_qa_for_chapter(start_page, end_page): """Generate Q&A for specific chapter pages""" if start_page < 1 or end_page > len(st.session_state.pages) or start_page > end_page: st.error("Invalid page range") return [] chapter_text = "\n".join(st.session_state.pages[start_page-1:end_page]) text_splitter = RecursiveCharacterTextSplitter( chunk_size=800, chunk_overlap=100, length_function=len ) chunks = text_splitter.split_text(chapter_text) qa_pairs = [] qa_model = load_qa_model() with st.spinner(f"🧠 Generating Q&A for pages {start_page}-{end_page}..."): for i, chunk in enumerate(chunks): if i % 2 == 0: # Generate question prompt = f"Generate a study question based on: {chunk[:500]}" question = qa_model(prompt)[:120] + "?" else: # Generate answer prompt = f"Answer the question: {qa_pairs[-1][0]} using context: {chunk[:500]}" answer = qa_model(prompt) qa_pairs[-1] = (qa_pairs[-1][0], answer) return qa_pairs # App header st.markdown("

📚 PDF Study Assistant

", unsafe_allow_html=True) # PDF Upload Section with st.container(): st.subheader("📤 Upload Your Textbook/Notes") pdf_file = st.file_uploader("", type="pdf", label_visibility="collapsed") # Main content if pdf_file: if not st.session_state.pdf_processed: process_pdf(pdf_file) if st.session_state.pdf_processed: # Navigation tabs selected_tab = option_menu( None, ["Ask Questions", "Generate Chapter Q&A"], icons=["chat", "book"], menu_icon="cast", default_index=0, orientation="horizontal", styles={ "container": {"padding": "0!important", "background-color": "#f9f9f9"}, "nav-link": {"font-size": "16px", "font-weight": "bold"}, "nav-link-selected": {"background": "linear-gradient(to right, #3a86ff, #ff4b4b)"}, } ) # Question Answering Tab if selected_tab == "Ask Questions": st.markdown("### 💬 Ask Questions About Your Document") user_question = st.text_input("Type your question here:", key="user_question") if user_question: with st.spinner("🤔 Thinking..."): result = st.session_state.qa_chain({"query": user_question}) st.markdown(f"
Answer: {result['result']}
", unsafe_allow_html=True) with st.expander("🔍 See source passages"): for i, doc in enumerate(result["source_documents"]): st.markdown(f"**Passage {i+1}:** {doc.page_content[:500]}...") # Chapter Q&A Generation Tab elif selected_tab == "Generate Chapter Q&A": st.markdown("### 📝 Generate Q&A for Specific Chapter") col1, col2 = st.columns(2) with col1: start_page = st.number_input("Start Page", min_value=1, max_value=len(st.session_state.pages), value=1) with col2: end_page = st.number_input("End Page", min_value=1, max_value=len(st.session_state.pages), value=min(5, len(st.session_state.pages))) if st.button("Generate Q&A", key="generate_qa"): qa_pairs = generate_qa_for_chapter(start_page, end_page) if qa_pairs: st.markdown(f"

📖 Generated Questions for Pages {start_page}-{end_page}

", unsafe_allow_html=True) for i, (question, answer) in enumerate(qa_pairs): st.markdown(f"""
Q{i+1}: {question}
A{i+1}: {answer}
""", unsafe_allow_html=True) else: st.warning("No Q&A pairs generated. Try a different page range.") # Footer st.markdown("---") st.markdown("""
Built with ❤️ for students | PDF Study Assistant v1.0
""", unsafe_allow_html=True)