import streamlit as st import os import tempfile from langchain_community.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.vectorstores import FAISS from langchain_community.embeddings import HuggingFaceEmbeddings from langchain.chains import ConversationalRetrievalChain from langchain.memory import ConversationBufferMemory from langchain_community.llms import HuggingFaceHub import base64 # Set page config with light purple theme st.set_page_config( page_title="EduQuery - Smart PDF Assistant", page_icon="📚", layout="wide", initial_sidebar_state="collapsed" ) # Embedded CSS for light purple UI st.markdown(""" """, unsafe_allow_html=True) # Header with gradient st.markdown(""" """, unsafe_allow_html=True) # Initialize session state if "vector_store" not in st.session_state: st.session_state.vector_store = None if "chat_history" not in st.session_state: st.session_state.chat_history = [] if "qa_chain" not in st.session_state: st.session_state.qa_chain = None # PDF Processing def process_pdf(pdf_file): with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file: tmp_file.write(pdf_file.getvalue()) tmp_path = tmp_file.name loader = PyPDFLoader(tmp_path) pages = loader.load_and_split() text_splitter = RecursiveCharacterTextSplitter( chunk_size=800, chunk_overlap=150 ) chunks = text_splitter.split_documents(pages) embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") vector_store = FAISS.from_documents(chunks, embeddings) os.unlink(tmp_path) return vector_store # Setup QA Chain def setup_qa_chain(vector_store, hf_token=None): # Use free open-source model that doesn't require authentication repo_id = "google/flan-t5-xxl" # Free model that doesn't require token try: if hf_token: llm = HuggingFaceHub( repo_id=repo_id, huggingfacehub_api_token=hf_token, model_kwargs={"temperature": 0.5, "max_new_tokens": 500} ) else: # Try without token (works for some open models) llm = HuggingFaceHub( repo_id=repo_id, model_kwargs={"temperature": 0.5, "max_new_tokens": 500} ) except Exception as e: st.error(f"Error loading model: {str(e)}") return None memory = ConversationBufferMemory( memory_key="chat_history", return_messages=True ) qa_chain = ConversationalRetrievalChain.from_llm( llm=llm, retriever=vector_store.as_retriever(search_kwargs={"k": 3}), memory=memory, chain_type="stuff" ) return qa_chain # Hugging Face Token Input st.markdown("""

🔑 Hugging Face Token (Optional)

For better models like Mistral, enter your Hugging Face token

""", unsafe_allow_html=True) hf_token = st.text_input("", type="password", label_visibility="collapsed", placeholder="hf_xxxxxxxxxxxxxxxxxx") st.markdown("

", unsafe_allow_html=True) # File upload section st.markdown("""

📤 Upload Your Textbook/Notes

""", unsafe_allow_html=True) uploaded_file = st.file_uploader("", type="pdf", accept_multiple_files=False, label_visibility="collapsed") st.markdown("

", unsafe_allow_html=True) if uploaded_file: with st.spinner("Processing PDF..."): st.session_state.vector_store = process_pdf(uploaded_file) st.session_state.qa_chain = setup_qa_chain(st.session_state.vector_store, hf_token) if st.session_state.qa_chain: st.success("PDF processed successfully! You can now ask questions.") # Chat interface st.markdown("""

💬 Ask Anything About the Document

""", unsafe_allow_html=True) # Display chat history for message in st.session_state.chat_history: with st.chat_message(message["role"]): st.markdown(message["content"]) # User input if prompt := st.chat_input("Your question..."): if not st.session_state.vector_store: st.warning("Please upload a PDF first") st.stop() if not st.session_state.qa_chain: st.error("Model not initialized. Please check your Hugging Face token or try again.") st.stop() # Add user message to chat history st.session_state.chat_history.append({"role": "user", "content": prompt}) with st.chat_message("user"): st.markdown(prompt) # Get assistant response with st.chat_message("assistant"): with st.spinner("Thinking..."): try: response = st.session_state.qa_chain({"question": prompt}) answer = response["answer"] except Exception as e: answer = f"Error: {str(e)}" st.markdown(answer) # Add assistant response to chat history st.session_state.chat_history.append({"role": "assistant", "content": answer}) st.markdown("

", unsafe_allow_html=True) # Footer st.markdown(""" """, unsafe_allow_html=True)