sunbal7's picture
Update app.py
66d14e0 verified
import streamlit as st
import os
import tempfile
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain_community.llms import HuggingFaceHub
import base64
# Set page config with light purple theme
st.set_page_config(
page_title="EduQuery - Smart PDF Assistant",
page_icon="πŸ“š",
layout="wide",
initial_sidebar_state="collapsed"
)
# Embedded CSS for light purple UI
st.markdown("""
<style>
:root {
--primary: #8a4fff;
--secondary: #d0bcff;
--light: #f3edff;
--dark: #4a2b80;
}
body {
background-color: #f8f5ff;
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
}
.stApp {
max-width: 1200px;
margin: 0 auto;
padding: 2rem;
}
.header {
background: linear-gradient(135deg, var(--primary) 0%, var(--dark) 100%);
color: white;
padding: 2rem;
border-radius: 15px;
margin-bottom: 2rem;
text-align: center;
box-shadow: 0 4px 20px rgba(138, 79, 255, 0.2);
}
.header h1 {
font-size: 2.8rem;
margin-bottom: 0.5rem;
}
.stButton>button {
background: linear-gradient(135deg, var(--primary) 0%, var(--dark) 100%);
color: white;
border: none;
border-radius: 25px;
padding: 0.75rem 2rem;
font-weight: bold;
font-size: 1rem;
transition: all 0.3s ease;
margin-top: 1rem;
}
.stButton>button:hover {
transform: scale(1.05);
box-shadow: 0 5px 15px rgba(138, 79, 255, 0.3);
}
.stTextInput>div>div>input {
border-radius: 25px;
padding: 0.9rem 1.5rem;
border: 1px solid var(--secondary);
background-color: var(--light);
}
.stTextInput>div>div>input:focus {
border-color: var(--primary);
box-shadow: 0 0 0 2px rgba(138, 79, 255, 0.2);
}
.stChatMessage {
padding: 1.5rem;
border-radius: 20px;
margin-bottom: 1rem;
max-width: 80%;
box-shadow: 0 4px 12px rgba(0,0,0,0.05);
}
.stChatMessage[data-testid="user"] {
background: linear-gradient(135deg, #d0bcff 0%, #b8a1ff 100%);
margin-left: auto;
color: #4a2b80;
}
.stChatMessage[data-testid="assistant"] {
background: linear-gradient(135deg, #e6dcff 0%, #f3edff 100%);
margin-right: auto;
color: #4a2b80;
border: 1px solid var(--secondary);
}
.upload-area {
background: linear-gradient(135deg, #f3edff 0%, #e6dcff 100%);
padding: 2rem;
border-radius: 15px;
text-align: center;
border: 2px dashed var(--primary);
margin-bottom: 2rem;
}
.chat-area {
background: white;
padding: 2rem;
border-radius: 15px;
box-shadow: 0 4px 20px rgba(138, 79, 255, 0.1);
height: 500px;
overflow-y: auto;
}
.footer {
text-align: center;
color: #8a4fff;
padding-top: 2rem;
font-size: 0.9rem;
margin-top: 2rem;
border-top: 1px solid var(--secondary);
}
.spinner {
color: var(--primary) !important;
}
.stSpinner > div > div {
border-top-color: var(--primary) !important;
}
.token-input {
background: var(--light);
padding: 1rem;
border-radius: 15px;
margin-bottom: 1rem;
}
</style>
""", unsafe_allow_html=True)
# Header with gradient
st.markdown("""
<div class="header">
<h1>πŸ“š EduQuery</h1>
<p>Smart PDF Assistant for Students</p>
</div>
""", unsafe_allow_html=True)
# Initialize session state
if "vector_store" not in st.session_state:
st.session_state.vector_store = None
if "chat_history" not in st.session_state:
st.session_state.chat_history = []
if "qa_chain" not in st.session_state:
st.session_state.qa_chain = None
# PDF Processing
def process_pdf(pdf_file):
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
tmp_file.write(pdf_file.getvalue())
tmp_path = tmp_file.name
loader = PyPDFLoader(tmp_path)
pages = loader.load_and_split()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=800,
chunk_overlap=150
)
chunks = text_splitter.split_documents(pages)
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_store = FAISS.from_documents(chunks, embeddings)
os.unlink(tmp_path)
return vector_store
# Setup QA Chain
def setup_qa_chain(vector_store, hf_token=None):
# Use free open-source model that doesn't require authentication
repo_id = "google/flan-t5-xxl" # Free model that doesn't require token
try:
if hf_token:
llm = HuggingFaceHub(
repo_id=repo_id,
huggingfacehub_api_token=hf_token,
model_kwargs={"temperature": 0.5, "max_new_tokens": 500}
)
else:
# Try without token (works for some open models)
llm = HuggingFaceHub(
repo_id=repo_id,
model_kwargs={"temperature": 0.5, "max_new_tokens": 500}
)
except Exception as e:
st.error(f"Error loading model: {str(e)}")
return None
memory = ConversationBufferMemory(
memory_key="chat_history",
return_messages=True
)
qa_chain = ConversationalRetrievalChain.from_llm(
llm=llm,
retriever=vector_store.as_retriever(search_kwargs={"k": 3}),
memory=memory,
chain_type="stuff"
)
return qa_chain
# Hugging Face Token Input
st.markdown("""
<div class="token-input">
<h3>πŸ”‘ Hugging Face Token (Optional)</h3>
<p>For better models like Mistral, enter your <a href="https://huggingface.co/settings/tokens" target="_blank">Hugging Face token</a></p>
""", unsafe_allow_html=True)
hf_token = st.text_input("", type="password", label_visibility="collapsed", placeholder="hf_xxxxxxxxxxxxxxxxxx")
st.markdown("</div>", unsafe_allow_html=True)
# File upload section
st.markdown("""
<div class="upload-area">
<h3>πŸ“€ Upload Your Textbook/Notes</h3>
""", unsafe_allow_html=True)
uploaded_file = st.file_uploader("", type="pdf", accept_multiple_files=False, label_visibility="collapsed")
st.markdown("</div>", unsafe_allow_html=True)
if uploaded_file:
with st.spinner("Processing PDF..."):
st.session_state.vector_store = process_pdf(uploaded_file)
st.session_state.qa_chain = setup_qa_chain(st.session_state.vector_store, hf_token)
if st.session_state.qa_chain:
st.success("PDF processed successfully! You can now ask questions.")
# Chat interface
st.markdown("""
<div class="chat-area">
<h3>πŸ’¬ Ask Anything About the Document</h3>
""", unsafe_allow_html=True)
# Display chat history
for message in st.session_state.chat_history:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# User input
if prompt := st.chat_input("Your question..."):
if not st.session_state.vector_store:
st.warning("Please upload a PDF first")
st.stop()
if not st.session_state.qa_chain:
st.error("Model not initialized. Please check your Hugging Face token or try again.")
st.stop()
# Add user message to chat history
st.session_state.chat_history.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
# Get assistant response
with st.chat_message("assistant"):
with st.spinner("Thinking..."):
try:
response = st.session_state.qa_chain({"question": prompt})
answer = response["answer"]
except Exception as e:
answer = f"Error: {str(e)}"
st.markdown(answer)
# Add assistant response to chat history
st.session_state.chat_history.append({"role": "assistant", "content": answer})
st.markdown("</div>", unsafe_allow_html=True)
# Footer
st.markdown("""
<div class="footer">
<p>EduQuery - Helping students learn smarter β€’ Powered by Flan-T5 and LangChain</p>
</div>
""", unsafe_allow_html=True)