sunbal7 commited on
Commit
6c9740a
Β·
verified Β·
1 Parent(s): 2f15a0b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +245 -89
app.py CHANGED
@@ -1,97 +1,253 @@
1
  import streamlit as st
 
 
2
  from langchain_community.document_loaders import PyPDFLoader
3
  from langchain.text_splitter import RecursiveCharacterTextSplitter
4
- from langchain.embeddings import HuggingFaceEmbeddings
5
- from langchain.vectorstores import FAISS
6
- from langchain.chains import ConversationalRetrievalChain
7
- from langchain.llms import HuggingFaceHub
8
- from langchain.memory import ConversationBufferMemory
9
- import os
10
-
11
- # App title and color theme
12
- st.set_page_config(page_title="πŸ“˜ PDF Q&A Agent", layout="centered", page_icon="πŸ“˜")
13
-
14
- st.markdown(
15
- \"\"\"
16
- <div style="background-color:#E3E8FF;padding:10px;border-radius:10px">
17
- <h2 style="color:#3C3C88;text-align:center">πŸ“˜ Student PDF Assistant</h2>
18
- <p style="color:#444;text-align:center">Ask questions from your uploaded PDF and generate Q&A for chapters!</p>
19
- </div>
20
- \"\"\", unsafe_allow_html=True
21
  )
22
 
23
- # Upload PDF
24
- uploaded_file = st.file_uploader("πŸ“Ž Upload your PDF file", type=["pdf"])
25
-
26
- if uploaded_file:
27
- # Save PDF temporarily
28
- with open("uploaded.pdf", "wb") as f:
29
- f.write(uploaded_file.read())
30
-
31
- st.success("βœ… PDF uploaded successfully!")
32
-
33
- # Load and split PDF
34
- loader = PyPDFLoader("uploaded.pdf")
35
- pages = loader.load_and_split()
36
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=150)
37
- chunks = text_splitter.split_documents(pages)
38
-
39
- # Embedding
40
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
41
- vectordb = FAISS.from_documents(chunks, embeddings)
42
-
43
- # Load Open Source LLM from Hugging Face (Mistral or any lightweight LLM)
44
- repo_id = "mistralai/Mistral-7B-Instruct-v0.1"
45
- llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={"temperature":0.5, "max_new_tokens":500})
46
-
47
- # Memory and Chain
48
- memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
49
- qa_chain = ConversationalRetrievalChain.from_llm(
50
- llm, retriever=vectordb.as_retriever(), memory=memory
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  )
 
 
 
52
 
53
- # Chat Interface
54
- st.markdown("---")
55
- st.markdown("πŸ’¬ **Ask a question from the PDF:**")
56
-
57
- if "chat_history" not in st.session_state:
58
- st.session_state.chat_history = []
59
-
60
- question = st.text_input("Type your question here...", key="user_input")
61
-
62
- if question:
63
- result = qa_chain.run(question)
64
- st.session_state.chat_history.append(("You", question))
65
- st.session_state.chat_history.append(("Bot", result))
66
-
67
- # Show chat history
68
- for sender, msg in st.session_state.chat_history[::-1]:
69
- st.markdown(f"**{sender}:** {msg}")
70
-
71
- # Question Generation Button
72
- st.markdown("---")
73
- if st.button("πŸ“š Generate Q&A from all chapters"):
74
- st.info("Generating questions and answers from the content...")
75
- questions = [
76
- "What is the main idea of this chapter?",
77
- "What are the key points discussed?",
78
- "Can you summarize this section?",
79
- "Are there any definitions or terms introduced?"
80
- ]
81
- for i, chunk in enumerate(chunks[:3]): # Limit to first 3 chunks for demo
82
- st.markdown(f"**Chapter Section {i+1}:**")
83
- for q in questions:
84
- answer = llm.invoke(q + "\\n" + chunk.page_content[:1000])
85
- st.markdown(f"**Q:** {q}")
86
- st.markdown(f"**A:** {answer}")
87
- st.markdown("---")
88
-
89
- """
90
-
91
- # Save both files to /mnt/data for user download or deployment
92
- with open("/mnt/data/requirements.txt", "w") as f:
93
- f.write(requirements_txt.strip())
94
-
95
- with open("/mnt/data/app.py", "w") as f:
96
- f.write(app_py.strip())
97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import os
3
+ import tempfile
4
  from langchain_community.document_loaders import PyPDFLoader
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ from langchain_community.vectorstores import FAISS
7
+ from langchain_community.embeddings import HuggingFaceEmbeddings
8
+ from langchain_community.chat_models import ChatOllama
9
+ from langchain.chains import RetrievalQA
10
+ from langchain.prompts import PromptTemplate
11
+ from langchain_core.runnables import RunnablePassthrough
12
+ from langchain_core.output_parsers import StrOutputParser
13
+ import base64
14
+
15
+ # Set page config
16
+ st.set_page_config(
17
+ page_title="EduQuery - Smart PDF Assistant",
18
+ page_icon="πŸ“š",
19
+ layout="wide",
20
+ initial_sidebar_state="collapsed"
 
 
21
  )
22
 
23
+ # Embedded CSS for colorful UI
24
+ st.markdown("""
25
+ <style>
26
+ body {
27
+ background-color: #f0f2f6;
28
+ }
29
+
30
+ .stApp {
31
+ max-width: 1200px;
32
+ margin: 0 auto;
33
+ padding: 2rem;
34
+ }
35
+
36
+ .header {
37
+ background: linear-gradient(135deg, #6a11cb 0%, #2575fc 100%);
38
+ color: white;
39
+ padding: 2rem;
40
+ border-radius: 15px;
41
+ margin-bottom: 2rem;
42
+ text-align: center;
43
+ }
44
+
45
+ .header h1 {
46
+ font-size: 2.5rem;
47
+ margin-bottom: 0.5rem;
48
+ }
49
+
50
+ .stButton>button {
51
+ background: linear-gradient(135deg, #6a11cb 0%, #2575fc 100%);
52
+ color: white;
53
+ border: none;
54
+ border-radius: 25px;
55
+ padding: 0.5rem 1.5rem;
56
+ font-weight: bold;
57
+ transition: all 0.3s ease;
58
+ }
59
+
60
+ .stButton>button:hover {
61
+ transform: scale(1.05);
62
+ box-shadow: 0 5px 15px rgba(0,0,0,0.1);
63
+ }
64
+
65
+ .stTextInput>div>div>input {
66
+ border-radius: 25px;
67
+ padding: 0.75rem 1.5rem;
68
+ }
69
+
70
+ .stChatMessage {
71
+ padding: 1.5rem;
72
+ border-radius: 20px;
73
+ margin-bottom: 1rem;
74
+ max-width: 80%;
75
+ }
76
+
77
+ .stChatMessage[data-testid="user"] {
78
+ background: linear-gradient(135deg, #43e97b 0%, #38f9d7 100%);
79
+ margin-left: auto;
80
+ }
81
+
82
+ .stChatMessage[data-testid="assistant"] {
83
+ background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%);
84
+ margin-right: auto;
85
+ }
86
+
87
+ .qa-box {
88
+ background: linear-gradient(135deg, #fff1eb 0%, #ace0f9 100%);
89
+ padding: 1.5rem;
90
+ border-radius: 15px;
91
+ margin-top: 1rem;
92
+ box-shadow: 0 5px 15px rgba(0,0,0,0.05);
93
+ }
94
+
95
+ .footer {
96
+ text-align: center;
97
+ color: #6c757d;
98
+ padding-top: 1.5rem;
99
+ font-size: 0.9rem;
100
+ }
101
+ </style>
102
+ """, unsafe_allow_html=True)
103
+
104
+ # Header with gradient
105
+ st.markdown("""
106
+ <div class="header">
107
+ <h1>πŸ“š EduQuery</h1>
108
+ <p>Smart PDF Assistant for Students</p>
109
+ </div>
110
+ """, unsafe_allow_html=True)
111
+
112
+ # Initialize session state
113
+ if "vector_store" not in st.session_state:
114
+ st.session_state.vector_store = None
115
+ if "messages" not in st.session_state:
116
+ st.session_state.messages = []
117
+
118
+ # Model selection
119
+ MODEL_NAME = "nous-hermes2" # Best open-source model for instruction following
120
+
121
+ # PDF Processing
122
+ def process_pdf(pdf_file):
123
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
124
+ tmp_file.write(pdf_file.getvalue())
125
+ tmp_path = tmp_file.name
126
+
127
+ loader = PyPDFLoader(tmp_path)
128
+ docs = loader.load()
129
+
130
+ text_splitter = RecursiveCharacterTextSplitter(
131
+ chunk_size=1000,
132
+ chunk_overlap=200,
133
+ length_function=len
134
+ )
135
+ chunks = text_splitter.split_documents(docs)
136
+
137
+ embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5")
138
+ vector_store = FAISS.from_documents(chunks, embeddings)
139
+
140
+ os.unlink(tmp_path)
141
+ return vector_store
142
+
143
+ # RAG Setup
144
+ def setup_qa_chain(vector_store):
145
+ llm = ChatOllama(model=MODEL_NAME, temperature=0.3)
146
+
147
+ custom_prompt = """
148
+ You are an expert academic assistant. Answer the question based only on the following context:
149
+ {context}
150
+
151
+ Question: {question}
152
+
153
+ Provide a clear, concise answer with page number references. If unsure, say "I couldn't find this information in the document".
154
+ """
155
+
156
+ prompt = PromptTemplate(
157
+ template=custom_prompt,
158
+ input_variables=["context", "question"]
159
+ )
160
+
161
+ retriever = vector_store.as_retriever(search_kwargs={"k": 3})
162
+
163
+ qa_chain = (
164
+ {"context": retriever, "question": RunnablePassthrough()}
165
+ | prompt
166
+ | llm
167
+ | StrOutputParser()
168
+ )
169
+
170
+ return qa_chain
171
+
172
+ # Generate questions from chapter
173
+ def generate_chapter_questions(vector_store, chapter_title):
174
+ llm = ChatOllama(model=MODEL_NAME, temperature=0.7)
175
+
176
+ prompt = PromptTemplate(
177
+ input_variables=["chapter_title"],
178
+ template="""
179
+ You are an expert educator. Generate 5 important questions and answers about '{chapter_title}'
180
+ that would help students understand key concepts. Format as:
181
+
182
+ Q1: [Question]
183
+ A1: [Answer with page reference]
184
+
185
+ Q2: [Question]
186
+ A2: [Answer with page reference]
187
+ ..."""
188
  )
189
+
190
+ chain = prompt | llm | StrOutputParser()
191
+ return chain.invoke({"chapter_title": chapter_title})
192
 
193
+ # File upload section
194
+ st.subheader("πŸ“€ Upload Your Textbook/Notes")
195
+ uploaded_file = st.file_uploader("", type="pdf", accept_multiple_files=False, label_visibility="collapsed")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
 
197
+ if uploaded_file:
198
+ with st.spinner("Processing PDF..."):
199
+ st.session_state.vector_store = process_pdf(uploaded_file)
200
+ st.success("PDF processed successfully! You can now ask questions.")
201
+
202
+ # Main content columns
203
+ col1, col2 = st.columns([1, 2])
204
+
205
+ # Chapter-based Q&A Generator
206
+ with col1:
207
+ st.subheader("πŸ” Generate Chapter Questions")
208
+ chapter_title = st.text_input("Enter chapter title/section name:", key="chapter_input")
209
+
210
+ if st.button("Generate Q&A", key="generate_btn") and chapter_title and st.session_state.vector_store:
211
+ with st.spinner(f"Generating questions about {chapter_title}..."):
212
+ questions = generate_chapter_questions(
213
+ st.session_state.vector_store,
214
+ chapter_title
215
+ )
216
+ st.markdown(f"<div class='qa-box'>{questions}</div>", unsafe_allow_html=True)
217
+ elif chapter_title and not st.session_state.vector_store:
218
+ st.warning("Please upload a PDF first")
219
+
220
+ # Chat interface
221
+ with col2:
222
+ st.subheader("πŸ’¬ Ask Anything About the Document")
223
+
224
+ for message in st.session_state.messages:
225
+ with st.chat_message(message["role"]):
226
+ st.markdown(message["content"])
227
+
228
+ if prompt := st.chat_input("Your question..."):
229
+ if not st.session_state.vector_store:
230
+ st.warning("Please upload a PDF first")
231
+ st.stop()
232
+
233
+ st.session_state.messages.append({"role": "user", "content": prompt})
234
+ with st.chat_message("user"):
235
+ st.markdown(prompt)
236
+
237
+ with st.chat_message("assistant"):
238
+ with st.spinner("Thinking..."):
239
+ qa_chain = setup_qa_chain(st.session_state.vector_store)
240
+ response = qa_chain.invoke(prompt)
241
+ st.markdown(response)
242
+ st.session_state.messages.append({"role": "assistant", "content": response})
243
+
244
+ # Footer
245
+ st.markdown("---")
246
+ st.markdown(
247
+ """
248
+ <div class="footer">
249
+ <p>EduQuery - Helping students learn smarter β€’ Powered by Nous-Hermes2 and LangChain</p>
250
+ </div>
251
+ """,
252
+ unsafe_allow_html=True
253
+ )