Spaces:
Sleeping
Sleeping
import streamlit as st | |
import os | |
import tempfile | |
from langchain_community.document_loaders import PyPDFLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain_community.vectorstores import FAISS | |
from langchain_community.embeddings import HuggingFaceEmbeddings | |
from langchain_community.chat_models import ChatOllama | |
from langchain.chains import RetrievalQA | |
from langchain.prompts import PromptTemplate | |
from langchain_core.runnables import RunnablePassthrough | |
from langchain_core.output_parsers import StrOutputParser | |
import base64 | |
# Set page config | |
st.set_page_config( | |
page_title="EduQuery - Smart PDF Assistant", | |
page_icon="π", | |
layout="wide", | |
initial_sidebar_state="collapsed" | |
) | |
# Custom CSS for colorful UI | |
def local_css(file_name): | |
with open(file_name) as f: | |
st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True) | |
local_css("style.css") | |
# Header with gradient | |
st.markdown(""" | |
<div class="header"> | |
<h1>π EduQuery</h1> | |
<p>Smart PDF Assistant for Students</p> | |
</div> | |
""", unsafe_allow_html=True) | |
# Initialize session state | |
if "vector_store" not in st.session_state: | |
st.session_state.vector_store = None | |
if "messages" not in st.session_state: | |
st.session_state.messages = [] | |
# Model selection | |
MODEL_NAME = "nous-hermes2" # Best open-source model for instruction following | |
# PDF Processing | |
def process_pdf(pdf_file): | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file: | |
tmp_file.write(pdf_file.getvalue()) | |
tmp_path = tmp_file.name | |
loader = PyPDFLoader(tmp_path) | |
docs = loader.load() | |
text_splitter = RecursiveCharacterTextSplitter( | |
chunk_size=1000, | |
chunk_overlap=200, | |
length_function=len | |
) | |
chunks = text_splitter.split_documents(docs) | |
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5") | |
vector_store = FAISS.from_documents(chunks, embeddings) | |
os.unlink(tmp_path) | |
return vector_store | |
# RAG Setup | |
def setup_qa_chain(vector_store): | |
llm = ChatOllama(model=MODEL_NAME, temperature=0.3) | |
custom_prompt = """ | |
You are an expert academic assistant. Answer the question based only on the following context: | |
{context} | |
Question: {question} | |
Provide a clear, concise answer with page number references. If unsure, say "I couldn't find this information in the document". | |
""" | |
prompt = PromptTemplate( | |
template=custom_prompt, | |
input_variables=["context", "question"] | |
) | |
retriever = vector_store.as_retriever(search_kwargs={"k": 3}) | |
qa_chain = ( | |
{"context": retriever, "question": RunnablePassthrough()} | |
| prompt | |
| llm | |
| StrOutputParser() | |
) | |
return qa_chain | |
# Generate questions from chapter | |
def generate_chapter_questions(vector_store, chapter_title): | |
llm = ChatOllama(model=MODEL_NAME, temperature=0.7) | |
prompt = PromptTemplate( | |
input_variables=["chapter_title"], | |
template=""" | |
You are an expert educator. Generate 5 important questions and answers about '{chapter_title}' | |
that would help students understand key concepts. Format as: | |
Q1: [Question] | |
A1: [Answer with page reference] | |
Q2: [Question] | |
A2: [Answer with page reference] | |
...""" | |
) | |
chain = prompt | llm | StrOutputParser() | |
return chain.invoke({"chapter_title": chapter_title}) | |
# File upload section | |
st.subheader("π€ Upload Your Textbook/Notes") | |
uploaded_file = st.file_uploader("", type="pdf", accept_multiple_files=False) | |
if uploaded_file: | |
with st.spinner("Processing PDF..."): | |
st.session_state.vector_store = process_pdf(uploaded_file) | |
st.success("PDF processed successfully! You can now ask questions.") | |
# Main content columns | |
col1, col2 = st.columns([1, 2]) | |
# Chapter-based Q&A Generator | |
with col1: | |
st.subheader("π Generate Chapter Questions") | |
chapter_title = st.text_input("Enter chapter title/section name:") | |
if st.button("Generate Q&A") and chapter_title and st.session_state.vector_store: | |
with st.spinner(f"Generating questions about {chapter_title}..."): | |
questions = generate_chapter_questions( | |
st.session_state.vector_store, | |
chapter_title | |
) | |
st.markdown(f"<div class='qa-box'>{questions}</div>", unsafe_allow_html=True) | |
elif chapter_title and not st.session_state.vector_store: | |
st.warning("Please upload a PDF first") | |
# Chat interface | |
with col2: | |
st.subheader("π¬ Ask Anything About the Document") | |
for message in st.session_state.messages: | |
with st.chat_message(message["role"]): | |
st.markdown(message["content"]) | |
if prompt := st.chat_input("Your question..."): | |
if not st.session_state.vector_store: | |
st.warning("Please upload a PDF first") | |
st.stop() | |
st.session_state.messages.append({"role": "user", "content": prompt}) | |
with st.chat_message("user"): | |
st.markdown(prompt) | |
with st.chat_message("assistant"): | |
with st.spinner("Thinking..."): | |
qa_chain = setup_qa_chain(st.session_state.vector_store) | |
response = qa_chain.invoke(prompt) | |
st.markdown(response) | |
st.session_state.messages.append({"role": "assistant", "content": response}) | |
# Footer | |
st.markdown("---") | |
st.markdown( | |
""" | |
<div class="footer"> | |
<p>EduQuery - Helping students learn smarter β’ Powered by Nous-Hermes2 and LangChain</p> | |
</div> | |
""", | |
unsafe_allow_html=True | |
) |