import streamlit as st
import os
import tempfile
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain_community.llms import HuggingFaceHub
import base64
# Set page config with light purple theme
st.set_page_config(
page_title="EduQuery - Smart PDF Assistant",
page_icon="📚",
layout="wide",
initial_sidebar_state="collapsed"
)
# Embedded CSS for light purple UI
st.markdown("""
""", unsafe_allow_html=True)
# Header with gradient
st.markdown("""
""", unsafe_allow_html=True)
# Initialize session state
if "vector_store" not in st.session_state:
st.session_state.vector_store = None
if "chat_history" not in st.session_state:
st.session_state.chat_history = []
if "qa_chain" not in st.session_state:
st.session_state.qa_chain = None
# PDF Processing
def process_pdf(pdf_file):
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
tmp_file.write(pdf_file.getvalue())
tmp_path = tmp_file.name
loader = PyPDFLoader(tmp_path)
pages = loader.load_and_split()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=800,
chunk_overlap=150
)
chunks = text_splitter.split_documents(pages)
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_store = FAISS.from_documents(chunks, embeddings)
os.unlink(tmp_path)
return vector_store
# Setup QA Chain
def setup_qa_chain(vector_store, hf_token=None):
# Use free open-source model that doesn't require authentication
repo_id = "google/flan-t5-xxl" # Free model that doesn't require token
try:
if hf_token:
llm = HuggingFaceHub(
repo_id=repo_id,
huggingfacehub_api_token=hf_token,
model_kwargs={"temperature": 0.5, "max_new_tokens": 500}
)
else:
# Try without token (works for some open models)
llm = HuggingFaceHub(
repo_id=repo_id,
model_kwargs={"temperature": 0.5, "max_new_tokens": 500}
)
except Exception as e:
st.error(f"Error loading model: {str(e)}")
return None
memory = ConversationBufferMemory(
memory_key="chat_history",
return_messages=True
)
qa_chain = ConversationalRetrievalChain.from_llm(
llm=llm,
retriever=vector_store.as_retriever(search_kwargs={"k": 3}),
memory=memory,
chain_type="stuff"
)
return qa_chain
# Hugging Face Token Input
st.markdown("""
", unsafe_allow_html=True)
# File upload section
st.markdown("""
📤 Upload Your Textbook/Notes
""", unsafe_allow_html=True)
uploaded_file = st.file_uploader("", type="pdf", accept_multiple_files=False, label_visibility="collapsed")
st.markdown("", unsafe_allow_html=True)
if uploaded_file:
with st.spinner("Processing PDF..."):
st.session_state.vector_store = process_pdf(uploaded_file)
st.session_state.qa_chain = setup_qa_chain(st.session_state.vector_store, hf_token)
if st.session_state.qa_chain:
st.success("PDF processed successfully! You can now ask questions.")
# Chat interface
st.markdown("""
💬 Ask Anything About the Document
""", unsafe_allow_html=True)
# Display chat history
for message in st.session_state.chat_history:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# User input
if prompt := st.chat_input("Your question..."):
if not st.session_state.vector_store:
st.warning("Please upload a PDF first")
st.stop()
if not st.session_state.qa_chain:
st.error("Model not initialized. Please check your Hugging Face token or try again.")
st.stop()
# Add user message to chat history
st.session_state.chat_history.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
# Get assistant response
with st.chat_message("assistant"):
with st.spinner("Thinking..."):
try:
response = st.session_state.qa_chain({"question": prompt})
answer = response["answer"]
except Exception as e:
answer = f"Error: {str(e)}"
st.markdown(answer)
# Add assistant response to chat history
st.session_state.chat_history.append({"role": "assistant", "content": answer})
st.markdown("", unsafe_allow_html=True)
# Footer
st.markdown("""
""", unsafe_allow_html=True)