import streamlit as st from langchain_community.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import FAISS from langchain.chains import ConversationalRetrievalChain from langchain.llms import HuggingFaceHub from langchain.memory import ConversationBufferMemory import os # App title and color theme st.set_page_config(page_title="📘 PDF Q&A Agent", layout="centered", page_icon="📘") st.markdown( \"\"\"

📘 Student PDF Assistant

Ask questions from your uploaded PDF and generate Q&A for chapters!

\"\"\", unsafe_allow_html=True ) # Upload PDF uploaded_file = st.file_uploader("📎 Upload your PDF file", type=["pdf"]) if uploaded_file: # Save PDF temporarily with open("uploaded.pdf", "wb") as f: f.write(uploaded_file.read()) st.success("✅ PDF uploaded successfully!") # Load and split PDF loader = PyPDFLoader("uploaded.pdf") pages = loader.load_and_split() text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=150) chunks = text_splitter.split_documents(pages) # Embedding embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") vectordb = FAISS.from_documents(chunks, embeddings) # Load Open Source LLM from Hugging Face (Mistral or any lightweight LLM) repo_id = "mistralai/Mistral-7B-Instruct-v0.1" llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={"temperature":0.5, "max_new_tokens":500}) # Memory and Chain memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) qa_chain = ConversationalRetrievalChain.from_llm( llm, retriever=vectordb.as_retriever(), memory=memory ) # Chat Interface st.markdown("---") st.markdown("💬 **Ask a question from the PDF:**") if "chat_history" not in st.session_state: st.session_state.chat_history = [] question = st.text_input("Type your question here...", key="user_input") if question: result = qa_chain.run(question) st.session_state.chat_history.append(("You", question)) st.session_state.chat_history.append(("Bot", result)) # Show chat history for sender, msg in st.session_state.chat_history[::-1]: st.markdown(f"**{sender}:** {msg}") # Question Generation Button st.markdown("---") if st.button("📚 Generate Q&A from all chapters"): st.info("Generating questions and answers from the content...") questions = [ "What is the main idea of this chapter?", "What are the key points discussed?", "Can you summarize this section?", "Are there any definitions or terms introduced?" ] for i, chunk in enumerate(chunks[:3]): # Limit to first 3 chunks for demo st.markdown(f"**Chapter Section {i+1}:**") for q in questions: answer = llm.invoke(q + "\\n" + chunk.page_content[:1000]) st.markdown(f"**Q:** {q}") st.markdown(f"**A:** {answer}") st.markdown("---") """ # Save both files to /mnt/data for user download or deployment with open("/mnt/data/requirements.txt", "w") as f: f.write(requirements_txt.strip()) with open("/mnt/data/app.py", "w") as f: f.write(app_py.strip())