sunbal7 commited on
Commit
351c135
Β·
verified Β·
1 Parent(s): 245f6f3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -35
app.py CHANGED
@@ -7,6 +7,7 @@ from langchain_community.vectorstores import FAISS
7
  import requests
8
  import os
9
  import time
 
10
 
11
  # Page configuration
12
  st.set_page_config(
@@ -70,12 +71,21 @@ st.markdown("""
70
  background-color: #ffebee;
71
  border-left: 4px solid #f44336;
72
  padding: 10px;
 
73
  }
74
 
75
  .info {
76
  background-color: #e3f2fd;
77
  border-left: 4px solid #2196f3;
78
  padding: 10px;
 
 
 
 
 
 
 
 
79
  }
80
 
81
  @keyframes fadeIn {
@@ -94,17 +104,32 @@ if 'pages' not in st.session_state:
94
  st.session_state.pages = []
95
  if 'history' not in st.session_state:
96
  st.session_state.history = []
 
 
97
 
98
  # Load embedding model with caching
99
  @st.cache_resource
100
  def load_embedding_model():
101
  return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
102
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  def query_hf_inference_api(prompt, max_tokens=200):
104
- """Query Hugging Face Inference API with error handling and retry"""
105
- MODEL = "google/flan-t5-large" # Smaller, freely accessible model
106
  API_URL = f"https://api-inference.huggingface.co/models/{MODEL}"
107
  headers = {"Authorization": f"Bearer {os.getenv('HF_API_KEY')}"} if os.getenv('HF_API_KEY') else {}
 
108
  payload = {
109
  "inputs": prompt,
110
  "parameters": {
@@ -122,22 +147,30 @@ def query_hf_inference_api(prompt, max_tokens=200):
122
  return result[0]['generated_text'] if result else ""
123
 
124
  elif response.status_code == 403:
125
- st.error("403 Forbidden: Please check your Hugging Face API token and model access")
126
- st.markdown("""
127
- <div class="info">
128
- <h4>How to fix this:</h4>
 
 
 
 
 
 
129
  <ol>
130
- <li>Get your free Hugging Face token from <a href="https://huggingface.co/settings/tokens" target="_blank">https://huggingface.co/settings/tokens</a></li>
131
- <li>Add it to your Space secrets as <code>HF_API_KEY</code></li>
132
- <li>Accept terms for the model: <a href="https://huggingface.co/google/flan-t5-large" target="_blank">https://huggingface.co/google/flan-t5-large</a></li>
 
133
  </ol>
134
  </div>
135
- """, unsafe_allow_html=True)
 
136
  return ""
137
 
138
  elif response.status_code == 429:
139
  st.warning("Rate limit exceeded. Waiting and retrying...")
140
- time.sleep(5) # Wait 5 seconds before retrying
141
  return query_hf_inference_api(prompt, max_tokens)
142
 
143
  else:
@@ -173,14 +206,14 @@ def process_pdf(pdf_file):
173
  st.session_state.pdf_processed = True
174
  st.success("βœ… PDF processed successfully!")
175
 
176
- def ask_question(question):
177
  """Answer a question using the vector store and Hugging Face API"""
178
  if not st.session_state.vector_store:
179
  return "PDF not processed yet", []
180
 
181
  # Find relevant passages
182
  docs = st.session_state.vector_store.similarity_search(question, k=3)
183
- context = "\n\n".join([doc.page_content for doc in docs])
184
 
185
  # Format prompt for the model
186
  prompt = f"""
@@ -195,18 +228,19 @@ def ask_question(question):
195
  """
196
 
197
  # Query the model
198
- answer = query_hf_inference_api(prompt)
199
 
200
  # Add to history
201
  st.session_state.history.append({
202
  "question": question,
203
  "answer": answer,
204
- "sources": [doc.page_content for doc in docs]
 
205
  })
206
 
207
  return answer, docs
208
 
209
- def generate_qa_for_chapter(start_page, end_page):
210
  """Generate Q&A for specific chapter pages"""
211
  if start_page < 1 or end_page > len(st.session_state.pages) or start_page > end_page:
212
  st.error("Invalid page range")
@@ -227,7 +261,7 @@ def generate_qa_for_chapter(start_page, end_page):
227
  for i, chunk in enumerate(chunks):
228
  if i % 2 == 0: # Generate question
229
  prompt = f"Based on this text, generate one study question: {chunk[:500]}"
230
- question = query_hf_inference_api(prompt, max_tokens=100)
231
  if question and not question.endswith("?"):
232
  question += "?"
233
  if question: # Only add if we got a valid question
@@ -235,7 +269,7 @@ def generate_qa_for_chapter(start_page, end_page):
235
  else: # Generate answer
236
  if qa_pairs: # Ensure we have a question to answer
237
  prompt = f"Answer this question: {qa_pairs[-1][0]} using this context: {chunk[:500]}"
238
- answer = query_hf_inference_api(prompt, max_tokens=200)
239
  qa_pairs[-1] = (qa_pairs[-1][0], answer)
240
 
241
  return qa_pairs
@@ -243,19 +277,52 @@ def generate_qa_for_chapter(start_page, end_page):
243
  # App header
244
  st.markdown("<h1 class='header'>πŸ“š PDF Study Assistant</h1>", unsafe_allow_html=True)
245
 
246
- # API Token Instructions
247
- if not os.getenv("HF_API_KEY"):
248
- st.markdown("""
249
- <div class="info">
250
- <h4>Setup Required:</h4>
251
- <p>This app requires a free Hugging Face API token to work:</p>
252
- <ol>
253
- <li>Get your token from <a href="https://huggingface.co/settings/tokens" target="_blank">https://huggingface.co/settings/tokens</a></li>
254
- <li>Add it to your Space secrets as <code>HF_API_KEY</code></li>
255
- <li>Accept terms for the model: <a href="https://huggingface.co/google/flan-t5-large" target="_blank">google/flan-t5-large</a></li>
256
- </ol>
257
- </div>
258
- """, unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
 
260
  # PDF Upload Section
261
  with st.container():
@@ -268,6 +335,15 @@ if pdf_file:
268
  process_pdf(pdf_file)
269
 
270
  if st.session_state.pdf_processed:
 
 
 
 
 
 
 
 
 
271
  # Navigation tabs
272
  selected_tab = option_menu(
273
  None,
@@ -290,7 +366,7 @@ if pdf_file:
290
 
291
  if user_question:
292
  with st.spinner("πŸ€” Thinking..."):
293
- answer, docs = ask_question(user_question)
294
  if answer:
295
  st.markdown(f"<div class='card'><b>Answer:</b> {answer}</div>", unsafe_allow_html=True)
296
 
@@ -308,7 +384,7 @@ if pdf_file:
308
  end_page = st.number_input("End Page", min_value=1, max_value=len(st.session_state.pages), value=min(5, len(st.session_state.pages)))
309
 
310
  if st.button("Generate Q&A", key="generate_qa"):
311
- qa_pairs = generate_qa_for_chapter(start_page, end_page)
312
 
313
  if qa_pairs:
314
  st.markdown(f"<h4>πŸ“– Generated Questions for Pages {start_page}-{end_page}</h4>", unsafe_allow_html=True)
@@ -329,7 +405,7 @@ if pdf_file:
329
  st.info("No questions asked yet.")
330
  else:
331
  for i, item in enumerate(reversed(st.session_state.history)):
332
- with st.expander(f"Q{i+1}: {item['question']}"):
333
  st.markdown(f"**Answer:** {item['answer']}")
334
  st.markdown("**Source Passages:**")
335
  for j, source in enumerate(item['sources']):
@@ -339,6 +415,6 @@ if pdf_file:
339
  st.markdown("---")
340
  st.markdown("""
341
  <div style="text-align: center; padding: 20px;">
342
- Built with ❀️ for students | PDF Study Assistant v3.0
343
  </div>
344
  """, unsafe_allow_html=True)
 
7
  import requests
8
  import os
9
  import time
10
+ import base64
11
 
12
  # Page configuration
13
  st.set_page_config(
 
71
  background-color: #ffebee;
72
  border-left: 4px solid #f44336;
73
  padding: 10px;
74
+ margin: 10px 0;
75
  }
76
 
77
  .info {
78
  background-color: #e3f2fd;
79
  border-left: 4px solid #2196f3;
80
  padding: 10px;
81
+ margin: 10px 0;
82
+ }
83
+
84
+ .success {
85
+ background-color: #e8f5e9;
86
+ border-left: 4px solid #4caf50;
87
+ padding: 10px;
88
+ margin: 10px 0;
89
  }
90
 
91
  @keyframes fadeIn {
 
104
  st.session_state.pages = []
105
  if 'history' not in st.session_state:
106
  st.session_state.history = []
107
+ if 'token_valid' not in st.session_state:
108
+ st.session_state.token_valid = None
109
 
110
  # Load embedding model with caching
111
  @st.cache_resource
112
  def load_embedding_model():
113
  return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
114
 
115
+ def check_token_validity():
116
+ """Check if the token is valid by making a simple API call"""
117
+ if not os.getenv("HF_API_KEY"):
118
+ return False
119
+
120
+ try:
121
+ headers = {"Authorization": f"Bearer {os.getenv('HF_API_KEY')}"}
122
+ response = requests.get("https://huggingface.co/api/whoami", headers=headers)
123
+ return response.status_code == 200
124
+ except:
125
+ return False
126
+
127
  def query_hf_inference_api(prompt, max_tokens=200):
128
+ """Query Hugging Face Inference API with better error handling"""
129
+ MODEL = "google/flan-t5-base" # Switch to base model for better accessibility
130
  API_URL = f"https://api-inference.huggingface.co/models/{MODEL}"
131
  headers = {"Authorization": f"Bearer {os.getenv('HF_API_KEY')}"} if os.getenv('HF_API_KEY') else {}
132
+
133
  payload = {
134
  "inputs": prompt,
135
  "parameters": {
 
147
  return result[0]['generated_text'] if result else ""
148
 
149
  elif response.status_code == 403:
150
+ # Detailed debug information
151
+ st.session_state.token_valid = check_token_validity()
152
+
153
+ debug_info = f"""
154
+ <div class="error">
155
+ <h4>403 Forbidden Error</h4>
156
+ <p>Token is set: <strong>{'Yes' if os.getenv('HF_API_KEY') else 'No'}</strong></p>
157
+ <p>Token valid: <strong>{'Yes' if st.session_state.token_valid else 'No'}</strong></p>
158
+ <p>Model: {MODEL}</p>
159
+ <p>Possible solutions:</p>
160
  <ol>
161
+ <li>Visit the <a href="https://huggingface.co/google/flan-t5-base" target="_blank">model page</a> and click "Agree and access repository"</li>
162
+ <li>Ensure your token has "read" permissions</li>
163
+ <li>Wait 5-10 minutes after accepting terms</li>
164
+ <li>Try a different model using the dropdown below</li>
165
  </ol>
166
  </div>
167
+ """
168
+ st.markdown(debug_info, unsafe_allow_html=True)
169
  return ""
170
 
171
  elif response.status_code == 429:
172
  st.warning("Rate limit exceeded. Waiting and retrying...")
173
+ time.sleep(3)
174
  return query_hf_inference_api(prompt, max_tokens)
175
 
176
  else:
 
206
  st.session_state.pdf_processed = True
207
  st.success("βœ… PDF processed successfully!")
208
 
209
+ def ask_question(question, model_choice):
210
  """Answer a question using the vector store and Hugging Face API"""
211
  if not st.session_state.vector_store:
212
  return "PDF not processed yet", []
213
 
214
  # Find relevant passages
215
  docs = st.session_state.vector_store.similarity_search(question, k=3)
216
+ context = "\n\n".join([doc.page_content[:500] for doc in docs])
217
 
218
  # Format prompt for the model
219
  prompt = f"""
 
228
  """
229
 
230
  # Query the model
231
+ answer = query_hf_inference_api(prompt, model=model_choice)
232
 
233
  # Add to history
234
  st.session_state.history.append({
235
  "question": question,
236
  "answer": answer,
237
+ "sources": [doc.page_content for doc in docs],
238
+ "model": model_choice
239
  })
240
 
241
  return answer, docs
242
 
243
+ def generate_qa_for_chapter(start_page, end_page, model_choice):
244
  """Generate Q&A for specific chapter pages"""
245
  if start_page < 1 or end_page > len(st.session_state.pages) or start_page > end_page:
246
  st.error("Invalid page range")
 
261
  for i, chunk in enumerate(chunks):
262
  if i % 2 == 0: # Generate question
263
  prompt = f"Based on this text, generate one study question: {chunk[:500]}"
264
+ question = query_hf_inference_api(prompt, model=model_choice, max_tokens=100)
265
  if question and not question.endswith("?"):
266
  question += "?"
267
  if question: # Only add if we got a valid question
 
269
  else: # Generate answer
270
  if qa_pairs: # Ensure we have a question to answer
271
  prompt = f"Answer this question: {qa_pairs[-1][0]} using this context: {chunk[:500]}"
272
+ answer = query_hf_inference_api(prompt, model=model_choice, max_tokens=200)
273
  qa_pairs[-1] = (qa_pairs[-1][0], answer)
274
 
275
  return qa_pairs
 
277
  # App header
278
  st.markdown("<h1 class='header'>πŸ“š PDF Study Assistant</h1>", unsafe_allow_html=True)
279
 
280
+ # Model selection
281
+ MODEL_OPTIONS = {
282
+ "google/flan-t5-base": "T5 Base (Recommended)",
283
+ "google/flan-t5-large": "T5 Large (Requires Auth)",
284
+ "mrm8488/t5-base-finetuned-question-generation-ap": "Question Generation",
285
+ "declare-lab/flan-alpaca-base": "Alpaca Base"
286
+ }
287
+
288
+ # Debug info panel
289
+ with st.expander("πŸ”§ Debug Information", expanded=False):
290
+ st.subheader("Hugging Face Token Status")
291
+
292
+ # Check token validity
293
+ token_valid = check_token_validity()
294
+ st.session_state.token_valid = token_valid
295
+
296
+ col1, col2 = st.columns(2)
297
+ with col1:
298
+ st.write(f"Token is set: {'βœ… Yes' if os.getenv('HF_API_KEY') else '❌ No'}")
299
+ with col2:
300
+ st.write(f"Token is valid: {'βœ… Yes' if token_valid else '❌ No'}")
301
+
302
+ if os.getenv('HF_API_KEY'):
303
+ st.markdown("""
304
+ <div class="info">
305
+ <p>Your token is set but we're still having issues. Try these steps:</p>
306
+ <ol>
307
+ <li>Visit the <a href="https://huggingface.co/google/flan-t5-base" target="_blank">model page</a></li>
308
+ <li>Click "Agree and access repository"</li>
309
+ <li>Wait 5-10 minutes for changes to propagate</li>
310
+ <li>Try a different model from the dropdown</li>
311
+ </ol>
312
+ </div>
313
+ """, unsafe_allow_html=True)
314
+ else:
315
+ st.markdown("""
316
+ <div class="error">
317
+ <p>Token is not set! Add it in your Space secrets:</p>
318
+ <ol>
319
+ <li>Go to your Space β†’ Settings β†’ Secrets</li>
320
+ <li>Add <code>HF_API_KEY</code> with your token</li>
321
+ <li>Redeploy the Space</li>
322
+ </ol>
323
+ <p>Get your token: <a href="https://huggingface.co/settings/tokens" target="_blank">https://huggingface.co/settings/tokens</a></p>
324
+ </div>
325
+ """, unsafe_allow_html=True)
326
 
327
  # PDF Upload Section
328
  with st.container():
 
335
  process_pdf(pdf_file)
336
 
337
  if st.session_state.pdf_processed:
338
+ # Model selection
339
+ st.subheader("Model Selection")
340
+ model_choice = st.selectbox(
341
+ "Choose AI model:",
342
+ options=list(MODEL_OPTIONS.keys()),
343
+ format_func=lambda x: MODEL_OPTIONS[x],
344
+ help="Some models require accepting terms on Hugging Face"
345
+ )
346
+
347
  # Navigation tabs
348
  selected_tab = option_menu(
349
  None,
 
366
 
367
  if user_question:
368
  with st.spinner("πŸ€” Thinking..."):
369
+ answer, docs = ask_question(user_question, model_choice)
370
  if answer:
371
  st.markdown(f"<div class='card'><b>Answer:</b> {answer}</div>", unsafe_allow_html=True)
372
 
 
384
  end_page = st.number_input("End Page", min_value=1, max_value=len(st.session_state.pages), value=min(5, len(st.session_state.pages)))
385
 
386
  if st.button("Generate Q&A", key="generate_qa"):
387
+ qa_pairs = generate_qa_for_chapter(start_page, end_page, model_choice)
388
 
389
  if qa_pairs:
390
  st.markdown(f"<h4>πŸ“– Generated Questions for Pages {start_page}-{end_page}</h4>", unsafe_allow_html=True)
 
405
  st.info("No questions asked yet.")
406
  else:
407
  for i, item in enumerate(reversed(st.session_state.history)):
408
+ with st.expander(f"Q{i+1}: {item['question']} ({MODEL_OPTIONS.get(item['model'], item['model'])})"):
409
  st.markdown(f"**Answer:** {item['answer']}")
410
  st.markdown("**Source Passages:**")
411
  for j, source in enumerate(item['sources']):
 
415
  st.markdown("---")
416
  st.markdown("""
417
  <div style="text-align: center; padding: 20px;">
418
+ Built with ❀️ for students | PDF Study Assistant v4.0
419
  </div>
420
  """, unsafe_allow_html=True)