sunbal7's picture
Update app.py
24ba781 verified
raw
history blame
8.12 kB
import streamlit as st
from streamlit_option_menu import option_menu
import fitz # PyMuPDF
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.llms import HuggingFaceHub
from langchain.chains import RetrievalQA
import tempfile
import os
import base64
# Page configuration
st.set_page_config(
page_title="PDF Study Assistant",
page_icon="πŸ“š",
layout="wide",
initial_sidebar_state="collapsed"
)
# Custom CSS for colorful design
st.markdown("""
<style>
:root {
--primary: #ff4b4b;
--secondary: #ff9a3d;
--accent1: #ffcb74;
--accent2: #3a86ff;
--background: #f0f2f6;
--card: #ffffff;
}
.stApp {
background: linear-gradient(135deg, var(--background) 0%, #e0e5ec 100%);
}
.stButton>button {
background: linear-gradient(to right, var(--secondary), var(--primary));
color: white;
border-radius: 12px;
padding: 8px 20px;
font-weight: 600;
}
.stTextInput>div>div>input {
border-radius: 12px;
border: 2px solid var(--accent2);
padding: 10px;
}
.card {
background: var(--card);
border-radius: 15px;
box-shadow: 0 8px 16px rgba(0,0,0,0.1);
padding: 20px;
margin-bottom: 20px;
}
.header {
background: linear-gradient(to right, var(--accent2), var(--primary));
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
text-align: center;
margin-bottom: 30px;
}
.tab-content {
animation: fadeIn 0.5s ease-in-out;
}
@keyframes fadeIn {
from { opacity: 0; }
to { opacity: 1; }
}
</style>
""", unsafe_allow_html=True)
# Initialize session state
if 'pdf_processed' not in st.session_state:
st.session_state.pdf_processed = False
if 'qa_chain' not in st.session_state:
st.session_state.qa_chain = None
if 'pages' not in st.session_state:
st.session_state.pages = []
# Load models with caching
@st.cache_resource
def load_embedding_model():
return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
@st.cache_resource
def load_qa_model():
return HuggingFaceHub(
repo_id="google/flan-t5-xxl",
model_kwargs={"temperature": 0.5, "max_length": 512},
huggingfacehub_api_token=os.getenv("HF_API_KEY")
)
def process_pdf(pdf_file):
"""Extract text from PDF and create vector store"""
with st.spinner("πŸ“– Reading PDF..."):
doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
text = ""
st.session_state.pages = []
for page in doc:
text += page.get_text()
st.session_state.pages.append(page.get_text())
with st.spinner("πŸ” Processing text..."):
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200,
length_function=len
)
chunks = text_splitter.split_text(text)
embeddings = load_embedding_model()
vector_store = FAISS.from_texts(chunks, embeddings)
qa_model = load_qa_model()
st.session_state.qa_chain = RetrievalQA.from_chain_type(
llm=qa_model,
chain_type="stuff",
retriever=vector_store.as_retriever(search_kwargs={"k": 3}),
return_source_documents=True
)
st.session_state.pdf_processed = True
st.success("βœ… PDF processed successfully!")
def generate_qa_for_chapter(start_page, end_page):
"""Generate Q&A for specific chapter pages"""
if start_page < 1 or end_page > len(st.session_state.pages) or start_page > end_page:
st.error("Invalid page range")
return []
chapter_text = "\n".join(st.session_state.pages[start_page-1:end_page])
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=800,
chunk_overlap=100,
length_function=len
)
chunks = text_splitter.split_text(chapter_text)
qa_pairs = []
qa_model = load_qa_model()
with st.spinner(f"🧠 Generating Q&A for pages {start_page}-{end_page}..."):
for i, chunk in enumerate(chunks):
if i % 2 == 0: # Generate question
prompt = f"Generate a study question based on: {chunk[:500]}"
question = qa_model(prompt)[:120] + "?"
else: # Generate answer
prompt = f"Answer the question: {qa_pairs[-1][0]} using context: {chunk[:500]}"
answer = qa_model(prompt)
qa_pairs[-1] = (qa_pairs[-1][0], answer)
return qa_pairs
# App header
st.markdown("<h1 class='header'>πŸ“š PDF Study Assistant</h1>", unsafe_allow_html=True)
# PDF Upload Section
with st.container():
st.subheader("πŸ“€ Upload Your Textbook/Notes")
pdf_file = st.file_uploader("", type="pdf", label_visibility="collapsed")
# Main content
if pdf_file:
if not st.session_state.pdf_processed:
process_pdf(pdf_file)
if st.session_state.pdf_processed:
# Navigation tabs
selected_tab = option_menu(
None,
["Ask Questions", "Generate Chapter Q&A"],
icons=["chat", "book"],
menu_icon="cast",
default_index=0,
orientation="horizontal",
styles={
"container": {"padding": "0!important", "background-color": "#f9f9f9"},
"nav-link": {"font-size": "16px", "font-weight": "bold"},
"nav-link-selected": {"background": "linear-gradient(to right, #3a86ff, #ff4b4b)"},
}
)
# Question Answering Tab
if selected_tab == "Ask Questions":
st.markdown("### πŸ’¬ Ask Questions About Your Document")
user_question = st.text_input("Type your question here:", key="user_question")
if user_question:
with st.spinner("πŸ€” Thinking..."):
result = st.session_state.qa_chain({"query": user_question})
st.markdown(f"<div class='card'><b>Answer:</b> {result['result']}</div>", unsafe_allow_html=True)
with st.expander("πŸ” See source passages"):
for i, doc in enumerate(result["source_documents"]):
st.markdown(f"**Passage {i+1}:** {doc.page_content[:500]}...")
# Chapter Q&A Generation Tab
elif selected_tab == "Generate Chapter Q&A":
st.markdown("### πŸ“ Generate Q&A for Specific Chapter")
col1, col2 = st.columns(2)
with col1:
start_page = st.number_input("Start Page", min_value=1, max_value=len(st.session_state.pages), value=1)
with col2:
end_page = st.number_input("End Page", min_value=1, max_value=len(st.session_state.pages), value=min(5, len(st.session_state.pages)))
if st.button("Generate Q&A", key="generate_qa"):
qa_pairs = generate_qa_for_chapter(start_page, end_page)
if qa_pairs:
st.markdown(f"<h4>πŸ“– Generated Questions for Pages {start_page}-{end_page}</h4>", unsafe_allow_html=True)
for i, (question, answer) in enumerate(qa_pairs):
st.markdown(f"""
<div class='card'>
<b>Q{i+1}:</b> {question}<br>
<b>A{i+1}:</b> {answer}
</div>
""", unsafe_allow_html=True)
else:
st.warning("No Q&A pairs generated. Try a different page range.")
# Footer
st.markdown("---")
st.markdown("""
<div style="text-align: center; padding: 20px;">
Built with ❀️ for students | PDF Study Assistant v1.0
</div>
""", unsafe_allow_html=True)