Spaces:

sunbal7
/

PDFQueryApplication

Sleeping

File size: 3,618 Bytes

import streamlit as st
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain
from langchain.llms import HuggingFaceHub
from langchain.memory import ConversationBufferMemory
import os

# App title and color theme
st.set_page_config(page_title="📘 PDF Q&A Agent", layout="centered", page_icon="📘")

st.markdown(
    \"\"\"
    <div style="background-color:#E3E8FF;padding:10px;border-radius:10px">
    <h2 style="color:#3C3C88;text-align:center">📘 Student PDF Assistant</h2>
    <p style="color:#444;text-align:center">Ask questions from your uploaded PDF and generate Q&A for chapters!</p>
    </div>
    \"\"\", unsafe_allow_html=True
)

# Upload PDF
uploaded_file = st.file_uploader("📎 Upload your PDF file", type=["pdf"])

if uploaded_file:
    # Save PDF temporarily
    with open("uploaded.pdf", "wb") as f:
        f.write(uploaded_file.read())

    st.success("✅ PDF uploaded successfully!")

    # Load and split PDF
    loader = PyPDFLoader("uploaded.pdf")
    pages = loader.load_and_split()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=150)
    chunks = text_splitter.split_documents(pages)

    # Embedding
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    vectordb = FAISS.from_documents(chunks, embeddings)

    # Load Open Source LLM from Hugging Face (Mistral or any lightweight LLM)
    repo_id = "mistralai/Mistral-7B-Instruct-v0.1"
    llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={"temperature":0.5, "max_new_tokens":500})

    # Memory and Chain
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
    qa_chain = ConversationalRetrievalChain.from_llm(
        llm, retriever=vectordb.as_retriever(), memory=memory
    )

    # Chat Interface
    st.markdown("---")
    st.markdown("💬 **Ask a question from the PDF:**")

    if "chat_history" not in st.session_state:
        st.session_state.chat_history = []

    question = st.text_input("Type your question here...", key="user_input")

    if question:
        result = qa_chain.run(question)
        st.session_state.chat_history.append(("You", question))
        st.session_state.chat_history.append(("Bot", result))

    # Show chat history
    for sender, msg in st.session_state.chat_history[::-1]:
        st.markdown(f"**{sender}:** {msg}")

    # Question Generation Button
    st.markdown("---")
    if st.button("📚 Generate Q&A from all chapters"):
        st.info("Generating questions and answers from the content...")
        questions = [
            "What is the main idea of this chapter?",
            "What are the key points discussed?",
            "Can you summarize this section?",
            "Are there any definitions or terms introduced?"
        ]
        for i, chunk in enumerate(chunks[:3]):  # Limit to first 3 chunks for demo
            st.markdown(f"**Chapter Section {i+1}:**")
            for q in questions:
                answer = llm.invoke(q + "\\n" + chunk.page_content[:1000])
                st.markdown(f"**Q:** {q}")
                st.markdown(f"**A:** {answer}")
                st.markdown("---")

"""

# Save both files to /mnt/data for user download or deployment
with open("/mnt/data/requirements.txt", "w") as f:
    f.write(requirements_txt.strip())

with open("/mnt/data/app.py", "w") as f:
    f.write(app_py.strip())