Spaces:

sunbal7
/

PDFQueryApplication

Sleeping

File size: 8,162 Bytes

1c7a288
6c9740a
 
e6bfac3
24ba781
6c9740a
 
ba3ef77
 
 
6c9740a
 
ba3ef77
6c9740a
 
 
 
 
6648f74
 
ba3ef77
6c9740a
 
ba3ef77
 
 
 
 
 
 
6c9740a
ba3ef77
 
6c9740a
 
 
 
 
 
 
 
 
ba3ef77
6c9740a
 
 
 
 
ba3ef77
6c9740a
 
 
ba3ef77
6c9740a
 
 
 
ba3ef77
6c9740a
 
 
ba3ef77
6c9740a
ba3ef77
6c9740a
ba3ef77
6c9740a
 
 
 
ba3ef77
6c9740a
 
 
 
ba3ef77
 
 
 
 
 
 
 
6c9740a
 
 
 
 
 
 
ba3ef77
6c9740a
 
 
ba3ef77
6c9740a
ba3ef77
6c9740a
 
 
ba3ef77
6c9740a
ba3ef77
 
6c9740a
 
ba3ef77
 
 
6c9740a
ba3ef77
 
 
 
 
 
 
 
 
 
 
 
6c9740a
 
 
 
ba3ef77
 
6c9740a
ba3ef77
 
 
 
 
 
 
 
 
 
6c9740a
66d14e0
 
 
 
 
 
 
6c9740a
 
 
 
 
 
 
 
 
 
 
 
 
 
ba3ef77
 
 
 
6c9740a
 
 
 
 
 
 
 
ba3ef77
6c9740a
 
ba3ef77
 
6c9740a
ba3ef77
6c9740a
ba3ef77
6c9740a
 
 
 
 
ba3ef77
66d14e0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6c9740a
ba3ef77
 
 
6c9740a
 
ba3ef77
 
 
 
 
e6bfac3
6c9740a
ba3ef77
24ba781
66d14e0
 
 
 
 
 
 
 
 
6c9740a
ba3ef77
 
 
 
 
6c9740a
24ba781
ba3ef77
 
6c9740a
 
 
66d14e0
 
 
6c9740a
ba3ef77
 
 
 
 
6c9740a
ba3ef77
 
 
 
 
 
 
 
6c9740a
ba3ef77
66d14e0
 
 
 
ba3ef77
 
 
 
 
 
 
 
 
66d14e0
 
 
 
 
ba3ef77
 
 
 
6c9740a
ba3ef77
6c9740a
 
ba3ef77
 
66d14e0
ba3ef77

import streamlit as st
import os
import tempfile
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain_community.llms import HuggingFaceHub
import base64

# Set page config with light purple theme
st.set_page_config(
    page_title="EduQuery - Smart PDF Assistant",
    page_icon="📚",
    layout="wide",
    initial_sidebar_state="collapsed"
)

# Embedded CSS for light purple UI
st.markdown("""
<style>
:root {
    --primary: #8a4fff;
    --secondary: #d0bcff;
    --light: #f3edff;
    --dark: #4a2b80;
}

body {
    background-color: #f8f5ff;
    font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
}

.stApp {
    max-width: 1200px;
    margin: 0 auto;
    padding: 2rem;
}

.header {
    background: linear-gradient(135deg, var(--primary) 0%, var(--dark) 100%);
    color: white;
    padding: 2rem;
    border-radius: 15px;
    margin-bottom: 2rem;
    text-align: center;
    box-shadow: 0 4px 20px rgba(138, 79, 255, 0.2);
}

.header h1 {
    font-size: 2.8rem;
    margin-bottom: 0.5rem;
}

.stButton>button {
    background: linear-gradient(135deg, var(--primary) 0%, var(--dark) 100%);
    color: white;
    border: none;
    border-radius: 25px;
    padding: 0.75rem 2rem;
    font-weight: bold;
    font-size: 1rem;
    transition: all 0.3s ease;
    margin-top: 1rem;
}

.stButton>button:hover {
    transform: scale(1.05);
    box-shadow: 0 5px 15px rgba(138, 79, 255, 0.3);
}

.stTextInput>div>div>input {
    border-radius: 25px;
    padding: 0.9rem 1.5rem;
    border: 1px solid var(--secondary);
    background-color: var(--light);
}

.stTextInput>div>div>input:focus {
    border-color: var(--primary);
    box-shadow: 0 0 0 2px rgba(138, 79, 255, 0.2);
}

.stChatMessage {
    padding: 1.5rem;
    border-radius: 20px;
    margin-bottom: 1rem;
    max-width: 80%;
    box-shadow: 0 4px 12px rgba(0,0,0,0.05);
}

.stChatMessage[data-testid="user"] {
    background: linear-gradient(135deg, #d0bcff 0%, #b8a1ff 100%);
    margin-left: auto;
    color: #4a2b80;
}

.stChatMessage[data-testid="assistant"] {
    background: linear-gradient(135deg, #e6dcff 0%, #f3edff 100%);
    margin-right: auto;
    color: #4a2b80;
    border: 1px solid var(--secondary);
}

.upload-area {
    background: linear-gradient(135deg, #f3edff 0%, #e6dcff 100%);
    padding: 2rem;
    border-radius: 15px;
    text-align: center;
    border: 2px dashed var(--primary);
    margin-bottom: 2rem;
}

.chat-area {
    background: white;
    padding: 2rem;
    border-radius: 15px;
    box-shadow: 0 4px 20px rgba(138, 79, 255, 0.1);
    height: 500px;
    overflow-y: auto;
}

.footer {
    text-align: center;
    color: #8a4fff;
    padding-top: 2rem;
    font-size: 0.9rem;
    margin-top: 2rem;
    border-top: 1px solid var(--secondary);
}

.spinner {
    color: var(--primary) !important;
}

.stSpinner > div > div {
    border-top-color: var(--primary) !important;
}

.token-input {
    background: var(--light);
    padding: 1rem;
    border-radius: 15px;
    margin-bottom: 1rem;
}
</style>
""", unsafe_allow_html=True)

# Header with gradient
st.markdown("""
<div class="header">
    <h1>📚 EduQuery</h1>
    <p>Smart PDF Assistant for Students</p>
</div>
""", unsafe_allow_html=True)

# Initialize session state
if "vector_store" not in st.session_state:
    st.session_state.vector_store = None
if "chat_history" not in st.session_state:
    st.session_state.chat_history = []
if "qa_chain" not in st.session_state:
    st.session_state.qa_chain = None

# PDF Processing
def process_pdf(pdf_file):
    with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
        tmp_file.write(pdf_file.getvalue())
        tmp_path = tmp_file.name
    
    loader = PyPDFLoader(tmp_path)
    pages = loader.load_and_split()
    
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=800,
        chunk_overlap=150
    )
    chunks = text_splitter.split_documents(pages)
    
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    vector_store = FAISS.from_documents(chunks, embeddings)
    
    os.unlink(tmp_path)
    return vector_store

# Setup QA Chain
def setup_qa_chain(vector_store, hf_token=None):
    # Use free open-source model that doesn't require authentication
    repo_id = "google/flan-t5-xxl"  # Free model that doesn't require token
    
    try:
        if hf_token:
            llm = HuggingFaceHub(
                repo_id=repo_id,
                huggingfacehub_api_token=hf_token,
                model_kwargs={"temperature": 0.5, "max_new_tokens": 500}
            )
        else:
            # Try without token (works for some open models)
            llm = HuggingFaceHub(
                repo_id=repo_id,
                model_kwargs={"temperature": 0.5, "max_new_tokens": 500}
            )
    except Exception as e:
        st.error(f"Error loading model: {str(e)}")
        return None
    
    memory = ConversationBufferMemory(
        memory_key="chat_history", 
        return_messages=True
    )
    
    qa_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=vector_store.as_retriever(search_kwargs={"k": 3}),
        memory=memory,
        chain_type="stuff"
    )
    
    return qa_chain

# Hugging Face Token Input
st.markdown("""
<div class="token-input">
    <h3>🔑 Hugging Face Token (Optional)</h3>
    <p>For better models like Mistral, enter your <a href="https://huggingface.co/settings/tokens" target="_blank">Hugging Face token</a></p>
""", unsafe_allow_html=True)
hf_token = st.text_input("", type="password", label_visibility="collapsed", placeholder="hf_xxxxxxxxxxxxxxxxxx")
st.markdown("</div>", unsafe_allow_html=True)

# File upload section
st.markdown("""
<div class="upload-area">
    <h3>📤 Upload Your Textbook/Notes</h3>
""", unsafe_allow_html=True)

uploaded_file = st.file_uploader("", type="pdf", accept_multiple_files=False, label_visibility="collapsed")

st.markdown("</div>", unsafe_allow_html=True)

if uploaded_file:
    with st.spinner("Processing PDF..."):
        st.session_state.vector_store = process_pdf(uploaded_file)
        st.session_state.qa_chain = setup_qa_chain(st.session_state.vector_store, hf_token)
        if st.session_state.qa_chain:
            st.success("PDF processed successfully! You can now ask questions.")

# Chat interface
st.markdown("""
<div class="chat-area">
    <h3>💬 Ask Anything About the Document</h3>
""", unsafe_allow_html=True)

# Display chat history
for message in st.session_state.chat_history:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

# User input
if prompt := st.chat_input("Your question..."):
    if not st.session_state.vector_store:
        st.warning("Please upload a PDF first")
        st.stop()
    
    if not st.session_state.qa_chain:
        st.error("Model not initialized. Please check your Hugging Face token or try again.")
        st.stop()
        
    # Add user message to chat history
    st.session_state.chat_history.append({"role": "user", "content": prompt})
    with st.chat_message("user"):
        st.markdown(prompt)
        
    # Get assistant response
    with st.chat_message("assistant"):
        with st.spinner("Thinking..."):
            try:
                response = st.session_state.qa_chain({"question": prompt})
                answer = response["answer"]
            except Exception as e:
                answer = f"Error: {str(e)}"
        st.markdown(answer)
        
    # Add assistant response to chat history
    st.session_state.chat_history.append({"role": "assistant", "content": answer})

st.markdown("</div>", unsafe_allow_html=True)

# Footer
st.markdown("""
<div class="footer">
    <p>EduQuery - Helping students learn smarter • Powered by Flan-T5 and LangChain</p>
</div>
""", unsafe_allow_html=True)