Spaces:

sunbal7
/

PDFQueryApplication

Sleeping

App Files Files Community

PDFQueryApplication / app.py

sunbal7

Update app.py

2f15a0b verified 2 months ago

raw

history blame

3.62 kB

	import streamlit as st
	from langchain_community.document_loaders import PyPDFLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.vectorstores import FAISS
	from langchain.chains import ConversationalRetrievalChain
	from langchain.llms import HuggingFaceHub
	from langchain.memory import ConversationBufferMemory
	import os

	# App title and color theme
	st.set_page_config(page_title="📘 PDF Q&A Agent", layout="centered", page_icon="📘")

	st.markdown(
	\"\"\"
	<div style="background-color:#E3E8FF;padding:10px;border-radius:10px">
	<h2 style="color:#3C3C88;text-align:center">📘 Student PDF Assistant</h2>
	<p style="color:#444;text-align:center">Ask questions from your uploaded PDF and generate Q&A for chapters!</p>
	</div>
	\"\"\", unsafe_allow_html=True
	)

	# Upload PDF
	uploaded_file = st.file_uploader("📎 Upload your PDF file", type=["pdf"])

	if uploaded_file:
	# Save PDF temporarily
	with open("uploaded.pdf", "wb") as f:
	f.write(uploaded_file.read())

	st.success("✅ PDF uploaded successfully!")

	# Load and split PDF
	loader = PyPDFLoader("uploaded.pdf")
	pages = loader.load_and_split()
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=150)
	chunks = text_splitter.split_documents(pages)

	# Embedding
	embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
	vectordb = FAISS.from_documents(chunks, embeddings)

	# Load Open Source LLM from Hugging Face (Mistral or any lightweight LLM)
	repo_id = "mistralai/Mistral-7B-Instruct-v0.1"
	llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={"temperature":0.5, "max_new_tokens":500})

	# Memory and Chain
	memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
	qa_chain = ConversationalRetrievalChain.from_llm(
	llm, retriever=vectordb.as_retriever(), memory=memory
	)

	# Chat Interface
	st.markdown("---")
	st.markdown("💬 Ask a question from the PDF:")

	if "chat_history" not in st.session_state:
	st.session_state.chat_history = []

	question = st.text_input("Type your question here...", key="user_input")

	if question:
	result = qa_chain.run(question)
	st.session_state.chat_history.append(("You", question))
	st.session_state.chat_history.append(("Bot", result))

	# Show chat history
	for sender, msg in st.session_state.chat_history[::-1]:
	st.markdown(f"{sender}: {msg}")

	# Question Generation Button
	st.markdown("---")
	if st.button("📚 Generate Q&A from all chapters"):
	st.info("Generating questions and answers from the content...")
	questions = [
	"What is the main idea of this chapter?",
	"What are the key points discussed?",
	"Can you summarize this section?",
	"Are there any definitions or terms introduced?"
	]
	for i, chunk in enumerate(chunks[:3]): # Limit to first 3 chunks for demo
	st.markdown(f"Chapter Section {i+1}:")
	for q in questions:
	answer = llm.invoke(q + "\\n" + chunk.page_content[:1000])
	st.markdown(f"Q: {q}")
	st.markdown(f"A: {answer}")
	st.markdown("---")

	"""

	# Save both files to /mnt/data for user download or deployment
	with open("/mnt/data/requirements.txt", "w") as f:
	f.write(requirements_txt.strip())

	with open("/mnt/data/app.py", "w") as f:
	f.write(app_py.strip())