Spaces:
Sleeping
Sleeping
import streamlit as st | |
import os | |
import tempfile | |
from langchain_community.document_loaders import PyPDFLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain_community.vectorstores import FAISS | |
from langchain_community.embeddings import HuggingFaceEmbeddings | |
from langchain.chains import ConversationalRetrievalChain | |
from langchain.memory import ConversationBufferMemory | |
from langchain_community.llms import HuggingFaceHub | |
import base64 | |
# Set page config with light purple theme | |
st.set_page_config( | |
page_title="EduQuery - Smart PDF Assistant", | |
page_icon="π", | |
layout="wide", | |
initial_sidebar_state="collapsed" | |
) | |
# Embedded CSS for light purple UI | |
st.markdown(""" | |
<style> | |
:root { | |
--primary: #8a4fff; | |
--secondary: #d0bcff; | |
--light: #f3edff; | |
--dark: #4a2b80; | |
} | |
body { | |
background-color: #f8f5ff; | |
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; | |
} | |
.stApp { | |
max-width: 1200px; | |
margin: 0 auto; | |
padding: 2rem; | |
} | |
.header { | |
background: linear-gradient(135deg, var(--primary) 0%, var(--dark) 100%); | |
color: white; | |
padding: 2rem; | |
border-radius: 15px; | |
margin-bottom: 2rem; | |
text-align: center; | |
box-shadow: 0 4px 20px rgba(138, 79, 255, 0.2); | |
} | |
.header h1 { | |
font-size: 2.8rem; | |
margin-bottom: 0.5rem; | |
} | |
.stButton>button { | |
background: linear-gradient(135deg, var(--primary) 0%, var(--dark) 100%); | |
color: white; | |
border: none; | |
border-radius: 25px; | |
padding: 0.75rem 2rem; | |
font-weight: bold; | |
font-size: 1rem; | |
transition: all 0.3s ease; | |
margin-top: 1rem; | |
} | |
.stButton>button:hover { | |
transform: scale(1.05); | |
box-shadow: 0 5px 15px rgba(138, 79, 255, 0.3); | |
} | |
.stTextInput>div>div>input { | |
border-radius: 25px; | |
padding: 0.9rem 1.5rem; | |
border: 1px solid var(--secondary); | |
background-color: var(--light); | |
} | |
.stTextInput>div>div>input:focus { | |
border-color: var(--primary); | |
box-shadow: 0 0 0 2px rgba(138, 79, 255, 0.2); | |
} | |
.stChatMessage { | |
padding: 1.5rem; | |
border-radius: 20px; | |
margin-bottom: 1rem; | |
max-width: 80%; | |
box-shadow: 0 4px 12px rgba(0,0,0,0.05); | |
} | |
.stChatMessage[data-testid="user"] { | |
background: linear-gradient(135deg, #d0bcff 0%, #b8a1ff 100%); | |
margin-left: auto; | |
color: #4a2b80; | |
} | |
.stChatMessage[data-testid="assistant"] { | |
background: linear-gradient(135deg, #e6dcff 0%, #f3edff 100%); | |
margin-right: auto; | |
color: #4a2b80; | |
border: 1px solid var(--secondary); | |
} | |
.upload-area { | |
background: linear-gradient(135deg, #f3edff 0%, #e6dcff 100%); | |
padding: 2rem; | |
border-radius: 15px; | |
text-align: center; | |
border: 2px dashed var(--primary); | |
margin-bottom: 2rem; | |
} | |
.chat-area { | |
background: white; | |
padding: 2rem; | |
border-radius: 15px; | |
box-shadow: 0 4px 20px rgba(138, 79, 255, 0.1); | |
height: 500px; | |
overflow-y: auto; | |
} | |
.footer { | |
text-align: center; | |
color: #8a4fff; | |
padding-top: 2rem; | |
font-size: 0.9rem; | |
margin-top: 2rem; | |
border-top: 1px solid var(--secondary); | |
} | |
.spinner { | |
color: var(--primary) !important; | |
} | |
.stSpinner > div > div { | |
border-top-color: var(--primary) !important; | |
} | |
.token-input { | |
background: var(--light); | |
padding: 1rem; | |
border-radius: 15px; | |
margin-bottom: 1rem; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
# Header with gradient | |
st.markdown(""" | |
<div class="header"> | |
<h1>π EduQuery</h1> | |
<p>Smart PDF Assistant for Students</p> | |
</div> | |
""", unsafe_allow_html=True) | |
# Initialize session state | |
if "vector_store" not in st.session_state: | |
st.session_state.vector_store = None | |
if "chat_history" not in st.session_state: | |
st.session_state.chat_history = [] | |
if "qa_chain" not in st.session_state: | |
st.session_state.qa_chain = None | |
# PDF Processing | |
def process_pdf(pdf_file): | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file: | |
tmp_file.write(pdf_file.getvalue()) | |
tmp_path = tmp_file.name | |
loader = PyPDFLoader(tmp_path) | |
pages = loader.load_and_split() | |
text_splitter = RecursiveCharacterTextSplitter( | |
chunk_size=800, | |
chunk_overlap=150 | |
) | |
chunks = text_splitter.split_documents(pages) | |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
vector_store = FAISS.from_documents(chunks, embeddings) | |
os.unlink(tmp_path) | |
return vector_store | |
# Setup QA Chain | |
def setup_qa_chain(vector_store, hf_token=None): | |
# Use free open-source model that doesn't require authentication | |
repo_id = "google/flan-t5-xxl" # Free model that doesn't require token | |
try: | |
if hf_token: | |
llm = HuggingFaceHub( | |
repo_id=repo_id, | |
huggingfacehub_api_token=hf_token, | |
model_kwargs={"temperature": 0.5, "max_new_tokens": 500} | |
) | |
else: | |
# Try without token (works for some open models) | |
llm = HuggingFaceHub( | |
repo_id=repo_id, | |
model_kwargs={"temperature": 0.5, "max_new_tokens": 500} | |
) | |
except Exception as e: | |
st.error(f"Error loading model: {str(e)}") | |
return None | |
memory = ConversationBufferMemory( | |
memory_key="chat_history", | |
return_messages=True | |
) | |
qa_chain = ConversationalRetrievalChain.from_llm( | |
llm=llm, | |
retriever=vector_store.as_retriever(search_kwargs={"k": 3}), | |
memory=memory, | |
chain_type="stuff" | |
) | |
return qa_chain | |
# Hugging Face Token Input | |
st.markdown(""" | |
<div class="token-input"> | |
<h3>π Hugging Face Token (Optional)</h3> | |
<p>For better models like Mistral, enter your <a href="https://huggingface.co/settings/tokens" target="_blank">Hugging Face token</a></p> | |
""", unsafe_allow_html=True) | |
hf_token = st.text_input("", type="password", label_visibility="collapsed", placeholder="hf_xxxxxxxxxxxxxxxxxx") | |
st.markdown("</div>", unsafe_allow_html=True) | |
# File upload section | |
st.markdown(""" | |
<div class="upload-area"> | |
<h3>π€ Upload Your Textbook/Notes</h3> | |
""", unsafe_allow_html=True) | |
uploaded_file = st.file_uploader("", type="pdf", accept_multiple_files=False, label_visibility="collapsed") | |
st.markdown("</div>", unsafe_allow_html=True) | |
if uploaded_file: | |
with st.spinner("Processing PDF..."): | |
st.session_state.vector_store = process_pdf(uploaded_file) | |
st.session_state.qa_chain = setup_qa_chain(st.session_state.vector_store, hf_token) | |
if st.session_state.qa_chain: | |
st.success("PDF processed successfully! You can now ask questions.") | |
# Chat interface | |
st.markdown(""" | |
<div class="chat-area"> | |
<h3>π¬ Ask Anything About the Document</h3> | |
""", unsafe_allow_html=True) | |
# Display chat history | |
for message in st.session_state.chat_history: | |
with st.chat_message(message["role"]): | |
st.markdown(message["content"]) | |
# User input | |
if prompt := st.chat_input("Your question..."): | |
if not st.session_state.vector_store: | |
st.warning("Please upload a PDF first") | |
st.stop() | |
if not st.session_state.qa_chain: | |
st.error("Model not initialized. Please check your Hugging Face token or try again.") | |
st.stop() | |
# Add user message to chat history | |
st.session_state.chat_history.append({"role": "user", "content": prompt}) | |
with st.chat_message("user"): | |
st.markdown(prompt) | |
# Get assistant response | |
with st.chat_message("assistant"): | |
with st.spinner("Thinking..."): | |
try: | |
response = st.session_state.qa_chain({"question": prompt}) | |
answer = response["answer"] | |
except Exception as e: | |
answer = f"Error: {str(e)}" | |
st.markdown(answer) | |
# Add assistant response to chat history | |
st.session_state.chat_history.append({"role": "assistant", "content": answer}) | |
st.markdown("</div>", unsafe_allow_html=True) | |
# Footer | |
st.markdown(""" | |
<div class="footer"> | |
<p>EduQuery - Helping students learn smarter β’ Powered by Flan-T5 and LangChain</p> | |
</div> | |
""", unsafe_allow_html=True) |