sunbal7 commited on
Commit
ba3ef77
Β·
verified Β·
1 Parent(s): 6c9740a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +130 -124
app.py CHANGED
@@ -5,14 +5,12 @@ from langchain_community.document_loaders import PyPDFLoader
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
  from langchain_community.vectorstores import FAISS
7
  from langchain_community.embeddings import HuggingFaceEmbeddings
8
- from langchain_community.chat_models import ChatOllama
9
- from langchain.chains import RetrievalQA
10
- from langchain.prompts import PromptTemplate
11
- from langchain_core.runnables import RunnablePassthrough
12
- from langchain_core.output_parsers import StrOutputParser
13
  import base64
14
 
15
- # Set page config
16
  st.set_page_config(
17
  page_title="EduQuery - Smart PDF Assistant",
18
  page_icon="πŸ“š",
@@ -20,11 +18,19 @@ st.set_page_config(
20
  initial_sidebar_state="collapsed"
21
  )
22
 
23
- # Embedded CSS for colorful UI
24
  st.markdown("""
25
  <style>
 
 
 
 
 
 
 
26
  body {
27
- background-color: #f0f2f6;
 
28
  }
29
 
30
  .stApp {
@@ -34,37 +40,47 @@ body {
34
  }
35
 
36
  .header {
37
- background: linear-gradient(135deg, #6a11cb 0%, #2575fc 100%);
38
  color: white;
39
  padding: 2rem;
40
  border-radius: 15px;
41
  margin-bottom: 2rem;
42
  text-align: center;
 
43
  }
44
 
45
  .header h1 {
46
- font-size: 2.5rem;
47
  margin-bottom: 0.5rem;
48
  }
49
 
50
  .stButton>button {
51
- background: linear-gradient(135deg, #6a11cb 0%, #2575fc 100%);
52
  color: white;
53
  border: none;
54
  border-radius: 25px;
55
- padding: 0.5rem 1.5rem;
56
  font-weight: bold;
 
57
  transition: all 0.3s ease;
 
58
  }
59
 
60
  .stButton>button:hover {
61
  transform: scale(1.05);
62
- box-shadow: 0 5px 15px rgba(0,0,0,0.1);
63
  }
64
 
65
  .stTextInput>div>div>input {
66
  border-radius: 25px;
67
- padding: 0.75rem 1.5rem;
 
 
 
 
 
 
 
68
  }
69
 
70
  .stChatMessage {
@@ -72,31 +88,55 @@ body {
72
  border-radius: 20px;
73
  margin-bottom: 1rem;
74
  max-width: 80%;
 
75
  }
76
 
77
  .stChatMessage[data-testid="user"] {
78
- background: linear-gradient(135deg, #43e97b 0%, #38f9d7 100%);
79
  margin-left: auto;
 
80
  }
81
 
82
  .stChatMessage[data-testid="assistant"] {
83
- background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%);
84
  margin-right: auto;
 
 
85
  }
86
 
87
- .qa-box {
88
- background: linear-gradient(135deg, #fff1eb 0%, #ace0f9 100%);
89
- padding: 1.5rem;
90
  border-radius: 15px;
91
- margin-top: 1rem;
92
- box-shadow: 0 5px 15px rgba(0,0,0,0.05);
 
 
 
 
 
 
 
 
 
 
93
  }
94
 
95
  .footer {
96
  text-align: center;
97
- color: #6c757d;
98
- padding-top: 1.5rem;
99
  font-size: 0.9rem;
 
 
 
 
 
 
 
 
 
 
100
  }
101
  </style>
102
  """, unsafe_allow_html=True)
@@ -112,11 +152,10 @@ st.markdown("""
112
  # Initialize session state
113
  if "vector_store" not in st.session_state:
114
  st.session_state.vector_store = None
115
- if "messages" not in st.session_state:
116
- st.session_state.messages = []
117
-
118
- # Model selection
119
- MODEL_NAME = "nous-hermes2" # Best open-source model for instruction following
120
 
121
  # PDF Processing
122
  def process_pdf(pdf_file):
@@ -125,129 +164,96 @@ def process_pdf(pdf_file):
125
  tmp_path = tmp_file.name
126
 
127
  loader = PyPDFLoader(tmp_path)
128
- docs = loader.load()
129
 
130
  text_splitter = RecursiveCharacterTextSplitter(
131
- chunk_size=1000,
132
- chunk_overlap=200,
133
- length_function=len
134
  )
135
- chunks = text_splitter.split_documents(docs)
136
 
137
- embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5")
138
  vector_store = FAISS.from_documents(chunks, embeddings)
139
 
140
  os.unlink(tmp_path)
141
  return vector_store
142
 
143
- # RAG Setup
144
  def setup_qa_chain(vector_store):
145
- llm = ChatOllama(model=MODEL_NAME, temperature=0.3)
146
-
147
- custom_prompt = """
148
- You are an expert academic assistant. Answer the question based only on the following context:
149
- {context}
150
-
151
- Question: {question}
152
-
153
- Provide a clear, concise answer with page number references. If unsure, say "I couldn't find this information in the document".
154
- """
155
-
156
- prompt = PromptTemplate(
157
- template=custom_prompt,
158
- input_variables=["context", "question"]
159
  )
160
 
161
- retriever = vector_store.as_retriever(search_kwargs={"k": 3})
162
-
163
- qa_chain = (
164
- {"context": retriever, "question": RunnablePassthrough()}
165
- | prompt
166
- | llm
167
- | StrOutputParser()
168
  )
169
 
170
- return qa_chain
171
-
172
- # Generate questions from chapter
173
- def generate_chapter_questions(vector_store, chapter_title):
174
- llm = ChatOllama(model=MODEL_NAME, temperature=0.7)
175
-
176
- prompt = PromptTemplate(
177
- input_variables=["chapter_title"],
178
- template="""
179
- You are an expert educator. Generate 5 important questions and answers about '{chapter_title}'
180
- that would help students understand key concepts. Format as:
181
-
182
- Q1: [Question]
183
- A1: [Answer with page reference]
184
-
185
- Q2: [Question]
186
- A2: [Answer with page reference]
187
- ..."""
188
  )
189
 
190
- chain = prompt | llm | StrOutputParser()
191
- return chain.invoke({"chapter_title": chapter_title})
192
 
193
  # File upload section
194
- st.subheader("πŸ“€ Upload Your Textbook/Notes")
 
 
 
 
195
  uploaded_file = st.file_uploader("", type="pdf", accept_multiple_files=False, label_visibility="collapsed")
196
 
 
 
197
  if uploaded_file:
198
  with st.spinner("Processing PDF..."):
199
  st.session_state.vector_store = process_pdf(uploaded_file)
 
200
  st.success("PDF processed successfully! You can now ask questions.")
201
 
202
- # Main content columns
203
- col1, col2 = st.columns([1, 2])
 
 
 
204
 
205
- # Chapter-based Q&A Generator
206
- with col1:
207
- st.subheader("πŸ” Generate Chapter Questions")
208
- chapter_title = st.text_input("Enter chapter title/section name:", key="chapter_input")
209
-
210
- if st.button("Generate Q&A", key="generate_btn") and chapter_title and st.session_state.vector_store:
211
- with st.spinner(f"Generating questions about {chapter_title}..."):
212
- questions = generate_chapter_questions(
213
- st.session_state.vector_store,
214
- chapter_title
215
- )
216
- st.markdown(f"<div class='qa-box'>{questions}</div>", unsafe_allow_html=True)
217
- elif chapter_title and not st.session_state.vector_store:
218
  st.warning("Please upload a PDF first")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
 
220
- # Chat interface
221
- with col2:
222
- st.subheader("πŸ’¬ Ask Anything About the Document")
223
-
224
- for message in st.session_state.messages:
225
- with st.chat_message(message["role"]):
226
- st.markdown(message["content"])
227
-
228
- if prompt := st.chat_input("Your question..."):
229
- if not st.session_state.vector_store:
230
- st.warning("Please upload a PDF first")
231
- st.stop()
232
-
233
- st.session_state.messages.append({"role": "user", "content": prompt})
234
- with st.chat_message("user"):
235
- st.markdown(prompt)
236
-
237
- with st.chat_message("assistant"):
238
- with st.spinner("Thinking..."):
239
- qa_chain = setup_qa_chain(st.session_state.vector_store)
240
- response = qa_chain.invoke(prompt)
241
- st.markdown(response)
242
- st.session_state.messages.append({"role": "assistant", "content": response})
243
 
244
  # Footer
245
- st.markdown("---")
246
- st.markdown(
247
- """
248
- <div class="footer">
249
- <p>EduQuery - Helping students learn smarter β€’ Powered by Nous-Hermes2 and LangChain</p>
250
- </div>
251
- """,
252
- unsafe_allow_html=True
253
- )
 
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
  from langchain_community.vectorstores import FAISS
7
  from langchain_community.embeddings import HuggingFaceEmbeddings
8
+ from langchain.chains import ConversationalRetrievalChain
9
+ from langchain.memory import ConversationBufferMemory
10
+ from langchain_community.llms import HuggingFaceHub
 
 
11
  import base64
12
 
13
+ # Set page config with light purple theme
14
  st.set_page_config(
15
  page_title="EduQuery - Smart PDF Assistant",
16
  page_icon="πŸ“š",
 
18
  initial_sidebar_state="collapsed"
19
  )
20
 
21
+ # Embedded CSS for light purple UI
22
  st.markdown("""
23
  <style>
24
+ :root {
25
+ --primary: #8a4fff;
26
+ --secondary: #d0bcff;
27
+ --light: #f3edff;
28
+ --dark: #4a2b80;
29
+ }
30
+
31
  body {
32
+ background-color: #f8f5ff;
33
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
34
  }
35
 
36
  .stApp {
 
40
  }
41
 
42
  .header {
43
+ background: linear-gradient(135deg, var(--primary) 0%, var(--dark) 100%);
44
  color: white;
45
  padding: 2rem;
46
  border-radius: 15px;
47
  margin-bottom: 2rem;
48
  text-align: center;
49
+ box-shadow: 0 4px 20px rgba(138, 79, 255, 0.2);
50
  }
51
 
52
  .header h1 {
53
+ font-size: 2.8rem;
54
  margin-bottom: 0.5rem;
55
  }
56
 
57
  .stButton>button {
58
+ background: linear-gradient(135deg, var(--primary) 0%, var(--dark) 100%);
59
  color: white;
60
  border: none;
61
  border-radius: 25px;
62
+ padding: 0.75rem 2rem;
63
  font-weight: bold;
64
+ font-size: 1rem;
65
  transition: all 0.3s ease;
66
+ margin-top: 1rem;
67
  }
68
 
69
  .stButton>button:hover {
70
  transform: scale(1.05);
71
+ box-shadow: 0 5px 15px rgba(138, 79, 255, 0.3);
72
  }
73
 
74
  .stTextInput>div>div>input {
75
  border-radius: 25px;
76
+ padding: 0.9rem 1.5rem;
77
+ border: 1px solid var(--secondary);
78
+ background-color: var(--light);
79
+ }
80
+
81
+ .stTextInput>div>div>input:focus {
82
+ border-color: var(--primary);
83
+ box-shadow: 0 0 0 2px rgba(138, 79, 255, 0.2);
84
  }
85
 
86
  .stChatMessage {
 
88
  border-radius: 20px;
89
  margin-bottom: 1rem;
90
  max-width: 80%;
91
+ box-shadow: 0 4px 12px rgba(0,0,0,0.05);
92
  }
93
 
94
  .stChatMessage[data-testid="user"] {
95
+ background: linear-gradient(135deg, #d0bcff 0%, #b8a1ff 100%);
96
  margin-left: auto;
97
+ color: #4a2b80;
98
  }
99
 
100
  .stChatMessage[data-testid="assistant"] {
101
+ background: linear-gradient(135deg, #e6dcff 0%, #f3edff 100%);
102
  margin-right: auto;
103
+ color: #4a2b80;
104
+ border: 1px solid var(--secondary);
105
  }
106
 
107
+ .upload-area {
108
+ background: linear-gradient(135deg, #f3edff 0%, #e6dcff 100%);
109
+ padding: 2rem;
110
  border-radius: 15px;
111
+ text-align: center;
112
+ border: 2px dashed var(--primary);
113
+ margin-bottom: 2rem;
114
+ }
115
+
116
+ .chat-area {
117
+ background: white;
118
+ padding: 2rem;
119
+ border-radius: 15px;
120
+ box-shadow: 0 4px 20px rgba(138, 79, 255, 0.1);
121
+ height: 500px;
122
+ overflow-y: auto;
123
  }
124
 
125
  .footer {
126
  text-align: center;
127
+ color: #8a4fff;
128
+ padding-top: 2rem;
129
  font-size: 0.9rem;
130
+ margin-top: 2rem;
131
+ border-top: 1px solid var(--secondary);
132
+ }
133
+
134
+ .spinner {
135
+ color: var(--primary) !important;
136
+ }
137
+
138
+ .stSpinner > div > div {
139
+ border-top-color: var(--primary) !important;
140
  }
141
  </style>
142
  """, unsafe_allow_html=True)
 
152
  # Initialize session state
153
  if "vector_store" not in st.session_state:
154
  st.session_state.vector_store = None
155
+ if "chat_history" not in st.session_state:
156
+ st.session_state.chat_history = []
157
+ if "qa_chain" not in st.session_state:
158
+ st.session_state.qa_chain = None
 
159
 
160
  # PDF Processing
161
  def process_pdf(pdf_file):
 
164
  tmp_path = tmp_file.name
165
 
166
  loader = PyPDFLoader(tmp_path)
167
+ pages = loader.load_and_split()
168
 
169
  text_splitter = RecursiveCharacterTextSplitter(
170
+ chunk_size=800,
171
+ chunk_overlap=150
 
172
  )
173
+ chunks = text_splitter.split_documents(pages)
174
 
175
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
176
  vector_store = FAISS.from_documents(chunks, embeddings)
177
 
178
  os.unlink(tmp_path)
179
  return vector_store
180
 
181
+ # Setup QA Chain
182
  def setup_qa_chain(vector_store):
183
+ # Use Mistral-7B from Hugging Face Hub
184
+ repo_id = "mistralai/Mistral-7B-Instruct-v0.1"
185
+ llm = HuggingFaceHub(
186
+ repo_id=repo_id,
187
+ model_kwargs={"temperature": 0.5, "max_new_tokens": 500}
 
 
 
 
 
 
 
 
 
188
  )
189
 
190
+ memory = ConversationBufferMemory(
191
+ memory_key="chat_history",
192
+ return_messages=True
 
 
 
 
193
  )
194
 
195
+ qa_chain = ConversationalRetrievalChain.from_llm(
196
+ llm=llm,
197
+ retriever=vector_store.as_retriever(search_kwargs={"k": 3}),
198
+ memory=memory,
199
+ chain_type="stuff"
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  )
201
 
202
+ return qa_chain
 
203
 
204
  # File upload section
205
+ st.markdown("""
206
+ <div class="upload-area">
207
+ <h3>πŸ“€ Upload Your Textbook/Notes</h3>
208
+ """, unsafe_allow_html=True)
209
+
210
  uploaded_file = st.file_uploader("", type="pdf", accept_multiple_files=False, label_visibility="collapsed")
211
 
212
+ st.markdown("</div>", unsafe_allow_html=True)
213
+
214
  if uploaded_file:
215
  with st.spinner("Processing PDF..."):
216
  st.session_state.vector_store = process_pdf(uploaded_file)
217
+ st.session_state.qa_chain = setup_qa_chain(st.session_state.vector_store)
218
  st.success("PDF processed successfully! You can now ask questions.")
219
 
220
+ # Chat interface
221
+ st.markdown("""
222
+ <div class="chat-area">
223
+ <h3>πŸ’¬ Ask Anything About the Document</h3>
224
+ """, unsafe_allow_html=True)
225
 
226
+ # Display chat history
227
+ for message in st.session_state.chat_history:
228
+ with st.chat_message(message["role"]):
229
+ st.markdown(message["content"])
230
+
231
+ # User input
232
+ if prompt := st.chat_input("Your question..."):
233
+ if not st.session_state.vector_store:
 
 
 
 
 
234
  st.warning("Please upload a PDF first")
235
+ st.stop()
236
+
237
+ # Add user message to chat history
238
+ st.session_state.chat_history.append({"role": "user", "content": prompt})
239
+ with st.chat_message("user"):
240
+ st.markdown(prompt)
241
+
242
+ # Get assistant response
243
+ with st.chat_message("assistant"):
244
+ with st.spinner("Thinking..."):
245
+ response = st.session_state.qa_chain({"question": prompt})
246
+ answer = response["answer"]
247
+ st.markdown(answer)
248
+
249
+ # Add assistant response to chat history
250
+ st.session_state.chat_history.append({"role": "assistant", "content": answer})
251
 
252
+ st.markdown("</div>", unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
 
254
  # Footer
255
+ st.markdown("""
256
+ <div class="footer">
257
+ <p>EduQuery - Helping students learn smarter β€’ Powered by Mistral-7B and LangChain</p>
258
+ </div>
259
+ """, unsafe_allow_html=True)