Spaces:

sunbal7
/

PDFQueryApplication

Sleeping

App Files Files Community

PDFQueryApplication / app.py

sunbal7

Update app.py

e6bfac3 verified 2 months ago

raw

history blame

5.72 kB

	import streamlit as st
	import os
	import tempfile
	from langchain_community.document_loaders import PyPDFLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_community.vectorstores import FAISS
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain_community.chat_models import ChatOllama
	from langchain.chains import RetrievalQA
	from langchain.prompts import PromptTemplate
	from langchain_core.runnables import RunnablePassthrough
	from langchain_core.output_parsers import StrOutputParser
	import base64

	# Set page config
	st.set_page_config(
	page_title="EduQuery - Smart PDF Assistant",
	page_icon="📚",
	layout="wide",
	initial_sidebar_state="collapsed"
	)

	# Custom CSS for colorful UI
	def local_css(file_name):
	with open(file_name) as f:
	st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)

	local_css("style.css")

	# Header with gradient
	st.markdown("""
	<div class="header">
	<h1>📚 EduQuery</h1>
	<p>Smart PDF Assistant for Students</p>
	</div>
	""", unsafe_allow_html=True)

	# Initialize session state
	if "vector_store" not in st.session_state:
	st.session_state.vector_store = None
	if "messages" not in st.session_state:
	st.session_state.messages = []

	# Model selection
	MODEL_NAME = "nous-hermes2" # Best open-source model for instruction following

	# PDF Processing
	def process_pdf(pdf_file):
	with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
	tmp_file.write(pdf_file.getvalue())
	tmp_path = tmp_file.name

	loader = PyPDFLoader(tmp_path)
	docs = loader.load()

	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=1000,
	chunk_overlap=200,
	length_function=len
	)
	chunks = text_splitter.split_documents(docs)

	embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5")
	vector_store = FAISS.from_documents(chunks, embeddings)

	os.unlink(tmp_path)
	return vector_store

	# RAG Setup
	def setup_qa_chain(vector_store):
	llm = ChatOllama(model=MODEL_NAME, temperature=0.3)

	custom_prompt = """
	You are an expert academic assistant. Answer the question based only on the following context:
	{context}

	Question: {question}

	Provide a clear, concise answer with page number references. If unsure, say "I couldn't find this information in the document".
	"""

	prompt = PromptTemplate(
	template=custom_prompt,
	input_variables=["context", "question"]
	)

	retriever = vector_store.as_retriever(search_kwargs={"k": 3})

	qa_chain = (
	{"context": retriever, "question": RunnablePassthrough()}
	\| prompt
	\| llm
	\| StrOutputParser()
	)

	return qa_chain

	# Generate questions from chapter
	def generate_chapter_questions(vector_store, chapter_title):
	llm = ChatOllama(model=MODEL_NAME, temperature=0.7)

	prompt = PromptTemplate(
	input_variables=["chapter_title"],
	template="""
	You are an expert educator. Generate 5 important questions and answers about '{chapter_title}'
	that would help students understand key concepts. Format as:

	Q1: [Question]
	A1: [Answer with page reference]

	Q2: [Question]
	A2: [Answer with page reference]
	..."""
	)

	chain = prompt \| llm \| StrOutputParser()
	return chain.invoke({"chapter_title": chapter_title})

	# File upload section
	st.subheader("📤 Upload Your Textbook/Notes")
	uploaded_file = st.file_uploader("", type="pdf", accept_multiple_files=False)

	if uploaded_file:
	with st.spinner("Processing PDF..."):
	st.session_state.vector_store = process_pdf(uploaded_file)
	st.success("PDF processed successfully! You can now ask questions.")

	# Main content columns
	col1, col2 = st.columns([1, 2])

	# Chapter-based Q&A Generator
	with col1:
	st.subheader("🔍 Generate Chapter Questions")
	chapter_title = st.text_input("Enter chapter title/section name:")

	if st.button("Generate Q&A") and chapter_title and st.session_state.vector_store:
	with st.spinner(f"Generating questions about {chapter_title}..."):
	questions = generate_chapter_questions(
	st.session_state.vector_store,
	chapter_title
	)
	st.markdown(f"<div class='qa-box'>{questions}</div>", unsafe_allow_html=True)
	elif chapter_title and not st.session_state.vector_store:
	st.warning("Please upload a PDF first")

	# Chat interface
	with col2:
	st.subheader("💬 Ask Anything About the Document")

	for message in st.session_state.messages:
	with st.chat_message(message["role"]):
	st.markdown(message["content"])

	if prompt := st.chat_input("Your question..."):
	if not st.session_state.vector_store:
	st.warning("Please upload a PDF first")
	st.stop()

	st.session_state.messages.append({"role": "user", "content": prompt})
	with st.chat_message("user"):
	st.markdown(prompt)

	with st.chat_message("assistant"):
	with st.spinner("Thinking..."):
	qa_chain = setup_qa_chain(st.session_state.vector_store)
	response = qa_chain.invoke(prompt)
	st.markdown(response)
	st.session_state.messages.append({"role": "assistant", "content": response})

	# Footer
	st.markdown("---")
	st.markdown(
	"""
	<div class="footer">
	<p>EduQuery - Helping students learn smarter • Powered by Nous-Hermes2 and LangChain</p>
	</div>
	""",
	unsafe_allow_html=True
	)