tatianija commited on
Commit
87f7811
·
verified ·
1 Parent(s): b25a056

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -229
app.py CHANGED
@@ -185,9 +185,9 @@ class IntelligentAgent:
185
 
186
  return "\n\n".join(media_content) if media_content else ""
187
 
188
- def _should_search(self, question: str, media_context: str) -> bool:
189
  """
190
- Use LLM to determine if search is needed for the question.
191
  Returns True if search is recommended, False otherwise.
192
  """
193
  decision_prompt = f"""Analyze this question and decide if it requires real-time information, recent data, or specific facts that might not be in your training data.
@@ -208,14 +208,18 @@ SEARCH IS NOT NEEDED for:
208
  - Definitions of well-established concepts
209
  - How-to instructions for common tasks
210
  - Creative writing or opinion-based responses
 
211
 
212
  Question: "{question}"
213
 
 
 
214
  Respond with only "SEARCH" or "NO_SEARCH" followed by a brief reason (max 20 words).
215
 
216
  Example responses:
217
  - "SEARCH - Current weather data needed"
218
  - "NO_SEARCH - Mathematical concept, general knowledge sufficient"
 
219
  """
220
 
221
  try:
@@ -236,12 +240,16 @@ Example responses:
236
  # Default to search if decision fails
237
  return True
238
 
239
- def _answer_with_llm(self, question: str) -> str:
240
  """
241
- Generate answer using LLM without search.
242
  """
 
 
243
  answer_prompt = f"""You are a general AI assistant. I will ask you a question. YOUR ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
244
 
 
 
245
  Question: {question}
246
 
247
  Answer:"""
@@ -253,9 +261,9 @@ Answer:"""
253
  except Exception as e:
254
  return f"Sorry, I encountered an error generating the response: {e}"
255
 
256
- def _answer_with_search(self, question: str) -> str:
257
  """
258
- Generate answer using search results and LLM.
259
  """
260
  try:
261
  # Perform search
@@ -264,17 +272,11 @@ Answer:"""
264
 
265
  if self.debug:
266
  print(f"Search results type: {type(search_results)}")
267
- #print(f"Search results: {search_results}")
268
 
269
  if not search_results:
270
- return "No search results found. Let me try to answer based on my knowledge:\n\n" + self._answer_with_llm(question)
271
 
272
  # Format search results - handle different result formats
273
- if self.debug:
274
- print(f"First result type: {type(search_results[0]) if search_results else 'None'}")
275
- print(f"First result: {search_results[0] if search_results else 'None'}")
276
-
277
- # If search_results is a string, use it directly
278
  if isinstance(search_results, str):
279
  search_context = search_results
280
  else:
@@ -287,22 +289,25 @@ Answer:"""
287
  link = result.get("link", "")
288
  formatted_results.append(f"Title: {title}\nContent: {snippet}\nSource: {link}")
289
  elif isinstance(result, str):
290
- # If result is a string, use it directly
291
  formatted_results.append(result)
292
  else:
293
- # Handle other formats
294
  formatted_results.append(str(result))
295
 
296
  search_context = "\n\n".join(formatted_results)
297
 
298
- # Generate answer using search context
 
 
299
  answer_prompt = f"""You are a general AI assistant. I will ask you a question. Based on the search results below, provide an answer to the question. If the search results don't fully answer the question, you can supplement with your general knowledge.
300
  Your ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
 
301
  Question: {question}
302
 
303
  Search Results:
304
  {search_context}
305
 
 
 
306
  Answer:"""
307
 
308
  try:
@@ -331,29 +336,37 @@ Answer:"""
331
  return "Search completed but no usable results found."
332
 
333
  except Exception as e:
334
- return f"Search failed: {e}. Let me try to answer based on my knowledge:\n\n" + self._answer_with_llm(question)
335
 
336
- def __call__(self, question: str) -> str:
337
  """
338
- Main entry point - decide whether to search and generate appropriate response.
339
  """
340
  if self.debug:
341
  print(f"Agent received question: {question}")
 
 
342
 
343
  # Early validation
344
  if not question or not question.strip():
345
  return "Please provide a valid question."
346
 
347
  try:
 
 
 
 
 
 
348
  # Decide whether to search
349
- if self._should_search(question):
350
  if self.debug:
351
  print("Using search-based approach")
352
- answer = self._answer_with_search(question)
353
  else:
354
  if self.debug:
355
  print("Using LLM-only approach")
356
- answer = self._answer_with_llm(question)
357
 
358
  except Exception as e:
359
  answer = f"Sorry, I encountered an error: {e}"
@@ -474,210 +487,4 @@ def start_answer_generation(model_choice: str):
474
 
475
  return f"Answer generation started using {model_choice}. Check progress."
476
 
477
- def get_generation_progress():
478
- """
479
- Get the current progress of answer generation.
480
- """
481
- if not processing_status["is_processing"] and processing_status["progress"] == 0:
482
- return "Not started"
483
-
484
- if processing_status["is_processing"]:
485
- progress = processing_status["progress"]
486
- total = processing_status["total"]
487
- status_msg = f"Generating answers... {progress}/{total} completed"
488
- return status_msg
489
- else:
490
- # Generation completed
491
- if cached_answers:
492
- # Create DataFrame with results
493
- display_data = []
494
- for task_id, data in cached_answers.items():
495
- display_data.append({
496
- "Task ID": task_id,
497
- "Question": data["question"][:100] + "..." if len(data["question"]) > 100 else data["question"],
498
- "Generated Answer": data["answer"][:200] + "..." if len(data["answer"]) > 200 else data["answer"]
499
- })
500
-
501
- df = pd.DataFrame(display_data)
502
- status_msg = f"Answer generation completed! {len(cached_answers)} answers ready for submission."
503
- return status_msg, df
504
- else:
505
- return "Answer generation completed but no answers were generated."
506
-
507
- def submit_cached_answers(profile: gr.OAuthProfile | None):
508
- """
509
- Submit the cached answers to the evaluation API.
510
- """
511
- global cached_answers
512
-
513
- if not profile:
514
- return "Please log in to Hugging Face first.", None
515
-
516
- if not cached_answers:
517
- return "No cached answers available. Please generate answers first.", None
518
-
519
- username = profile.username
520
- space_id = os.getenv("SPACE_ID")
521
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Unknown"
522
-
523
- # Prepare submission payload
524
- answers_payload = []
525
- for task_id, data in cached_answers.items():
526
- answers_payload.append({
527
- "task_id": task_id,
528
- "submitted_answer": data["answer"]
529
- })
530
-
531
- submission_data = {
532
- "username": username.strip(),
533
- "agent_code": agent_code,
534
- "answers": answers_payload
535
- }
536
-
537
- # Submit to API
538
- api_url = DEFAULT_API_URL
539
- submit_url = f"{api_url}/submit"
540
-
541
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
542
-
543
- try:
544
- response = requests.post(submit_url, json=submission_data, timeout=60)
545
- response.raise_for_status()
546
- result_data = response.json()
547
-
548
- final_status = (
549
- f"Submission Successful!\n"
550
- f"User: {result_data.get('username')}\n"
551
- f"Overall Score: {result_data.get('score', 'N/A')}% "
552
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
553
- f"Message: {result_data.get('message', 'No message received.')}"
554
- )
555
-
556
- # Create results DataFrame
557
- results_log = []
558
- for task_id, data in cached_answers.items():
559
- results_log.append({
560
- "Task ID": task_id,
561
- "Question": data["question"],
562
- "Submitted Answer": data["answer"]
563
- })
564
-
565
- results_df = pd.DataFrame(results_log)
566
- return final_status, results_df
567
-
568
- except requests.exceptions.HTTPError as e:
569
- error_detail = f"Server responded with status {e.response.status_code}."
570
- try:
571
- error_json = e.response.json()
572
- error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
573
- except:
574
- error_detail += f" Response: {e.response.text[:500]}"
575
- return f"Submission Failed: {error_detail}", None
576
-
577
- except requests.exceptions.Timeout:
578
- return "Submission Failed: The request timed out.", None
579
-
580
- except Exception as e:
581
- return f"Submission Failed: {e}", None
582
-
583
- def clear_cache():
584
- """
585
- Clear all cached data.
586
- """
587
- global cached_answers, cached_questions, processing_status
588
- cached_answers = {}
589
- cached_questions = []
590
- processing_status = {"is_processing": False, "progress": 0, "total": 0}
591
- return "Cache cleared successfully.", None
592
-
593
- # --- Enhanced Gradio Interface ---
594
- with gr.Blocks(title="Intelligent Agent with Conditional Search") as demo:
595
- gr.Markdown("# Intelligent Agent with Conditional Search")
596
- gr.Markdown("This agent uses an LLM to decide when search is needed, optimizing for both accuracy and efficiency.")
597
-
598
- with gr.Row():
599
- gr.LoginButton()
600
- clear_btn = gr.Button("Clear Cache", variant="secondary")
601
-
602
- with gr.Tab("Step 1: Fetch Questions"):
603
- gr.Markdown("### Fetch Questions from API")
604
- fetch_btn = gr.Button("Fetch Questions", variant="primary")
605
- fetch_status = gr.Textbox(label="Fetch Status", lines=2, interactive=False)
606
- questions_table = gr.DataFrame(label="Available Questions", wrap=True)
607
-
608
- fetch_btn.click(
609
- fn=fetch_questions,
610
- outputs=[fetch_status, questions_table]
611
- )
612
-
613
- with gr.Tab("Step 2: Generate Answers"):
614
- gr.Markdown("### Generate Answers with Intelligent Search Decision")
615
-
616
- with gr.Row():
617
- model_choice = gr.Dropdown(
618
- choices=["Llama 3.1 8B", "Mistral 7B"],
619
- value="Llama 3.1 8B",
620
- label="Select Model"
621
- )
622
- generate_btn = gr.Button("Start Answer Generation", variant="primary")
623
- refresh_btn = gr.Button("Refresh Progress", variant="secondary")
624
-
625
- generation_status = gr.Textbox(label="Generation Status", lines=2, interactive=False)
626
-
627
- generate_btn.click(
628
- fn=start_answer_generation,
629
- inputs=[model_choice],
630
- outputs=[generation_status]
631
- )
632
-
633
- refresh_btn.click(
634
- fn=get_generation_progress,
635
- outputs=[generation_status]
636
- )
637
-
638
- with gr.Tab("Step 3: Submit Results"):
639
- gr.Markdown("### Submit Generated Answers")
640
- submit_btn = gr.Button("Submit Cached Answers", variant="primary")
641
- submission_status = gr.Textbox(label="Submission Status", lines=5, interactive=False)
642
- final_results = gr.DataFrame(label="Final Submission Results", wrap=True)
643
-
644
- submit_btn.click(
645
- fn=submit_cached_answers,
646
- outputs=[submission_status, final_results]
647
- )
648
-
649
- # Clear cache functionality
650
- clear_btn.click(
651
- fn=clear_cache,
652
- outputs=[fetch_status, questions_table]
653
- )
654
-
655
- # Auto-refresh progress every 5 seconds when generation is active
656
- demo.load(
657
- fn=get_generation_progress,
658
- outputs=[generation_status]
659
- )
660
-
661
- if __name__ == "__main__":
662
- print("\n" + "-"*30 + " Intelligent Agent Starting " + "-"*30)
663
-
664
- space_host_startup = os.getenv("SPACE_HOST")
665
- space_id_startup = os.getenv("SPACE_ID")
666
-
667
- if space_host_startup:
668
- print(f"✅ SPACE_HOST found: {space_host_startup}")
669
- print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
670
- else:
671
- print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
672
-
673
- if space_id_startup:
674
- print(f"✅ SPACE_ID found: {space_id_startup}")
675
- print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
676
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
677
- else:
678
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
679
-
680
- print("-"*(60 + len(" Intelligent Agent Starting ")) + "\n")
681
-
682
- print("Launching Intelligent Agent Interface...")
683
- demo.launch(debug=True, share=False)
 
185
 
186
  return "\n\n".join(media_content) if media_content else ""
187
 
188
+ def _should_search(self, question: str, media_context: str = "") -> bool:
189
  """
190
+ Use LLM to determine if search is needed for the question, considering media context.
191
  Returns True if search is recommended, False otherwise.
192
  """
193
  decision_prompt = f"""Analyze this question and decide if it requires real-time information, recent data, or specific facts that might not be in your training data.
 
208
  - Definitions of well-established concepts
209
  - How-to instructions for common tasks
210
  - Creative writing or opinion-based responses
211
+ - Questions that can be answered from attached media content
212
 
213
  Question: "{question}"
214
 
215
+ {f"Media Context Available: {media_context[:500]}..." if media_context else "No media context available."}
216
+
217
  Respond with only "SEARCH" or "NO_SEARCH" followed by a brief reason (max 20 words).
218
 
219
  Example responses:
220
  - "SEARCH - Current weather data needed"
221
  - "NO_SEARCH - Mathematical concept, general knowledge sufficient"
222
+ - "NO_SEARCH - Can be answered from attached image content"
223
  """
224
 
225
  try:
 
240
  # Default to search if decision fails
241
  return True
242
 
243
+ def _answer_with_llm(self, question: str, media_context: str = "") -> str:
244
  """
245
+ Generate answer using LLM without search, considering media context.
246
  """
247
+ context_section = f"\n\nMedia Context:\n{media_context}" if media_context else ""
248
+
249
  answer_prompt = f"""You are a general AI assistant. I will ask you a question. YOUR ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
250
 
251
+ {context_section}
252
+
253
  Question: {question}
254
 
255
  Answer:"""
 
261
  except Exception as e:
262
  return f"Sorry, I encountered an error generating the response: {e}"
263
 
264
+ def _answer_with_search(self, question: str, media_context: str = "") -> str:
265
  """
266
+ Generate answer using search results and LLM, considering media context.
267
  """
268
  try:
269
  # Perform search
 
272
 
273
  if self.debug:
274
  print(f"Search results type: {type(search_results)}")
 
275
 
276
  if not search_results:
277
+ return "No search results found. Let me try to answer based on my knowledge:\n\n" + self._answer_with_llm(question, media_context)
278
 
279
  # Format search results - handle different result formats
 
 
 
 
 
280
  if isinstance(search_results, str):
281
  search_context = search_results
282
  else:
 
289
  link = result.get("link", "")
290
  formatted_results.append(f"Title: {title}\nContent: {snippet}\nSource: {link}")
291
  elif isinstance(result, str):
 
292
  formatted_results.append(result)
293
  else:
 
294
  formatted_results.append(str(result))
295
 
296
  search_context = "\n\n".join(formatted_results)
297
 
298
+ # Generate answer using search context and media context
299
+ context_section = f"\n\nMedia Context:\n{media_context}" if media_context else ""
300
+
301
  answer_prompt = f"""You are a general AI assistant. I will ask you a question. Based on the search results below, provide an answer to the question. If the search results don't fully answer the question, you can supplement with your general knowledge.
302
  Your ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
303
+
304
  Question: {question}
305
 
306
  Search Results:
307
  {search_context}
308
 
309
+ {context_section}
310
+
311
  Answer:"""
312
 
313
  try:
 
336
  return "Search completed but no usable results found."
337
 
338
  except Exception as e:
339
+ return f"Search failed: {e}. Let me try to answer based on my knowledge:\n\n" + self._answer_with_llm(question, media_context)
340
 
341
+ def __call__(self, question: str, image_files: List[str] = None, audio_files: List[str] = None) -> str:
342
  """
343
+ Main entry point - process media files, decide whether to search, and generate appropriate response.
344
  """
345
  if self.debug:
346
  print(f"Agent received question: {question}")
347
+ print(f"Image files: {image_files}")
348
+ print(f"Audio files: {audio_files}")
349
 
350
  # Early validation
351
  if not question or not question.strip():
352
  return "Please provide a valid question."
353
 
354
  try:
355
+ # Process media files first
356
+ media_context = self._process_media_files(image_files, audio_files)
357
+
358
+ if self.debug and media_context:
359
+ print(f"Media context: {media_context[:200]}...")
360
+
361
  # Decide whether to search
362
+ if self._should_search(question, media_context):
363
  if self.debug:
364
  print("Using search-based approach")
365
+ answer = self._answer_with_search(question, media_context)
366
  else:
367
  if self.debug:
368
  print("Using LLM-only approach")
369
+ answer = self._answer_with_llm(question, media_context)
370
 
371
  except Exception as e:
372
  answer = f"Sorry, I encountered an error: {e}"
 
487
 
488
  return f"Answer generation started using {model_choice}. Check progress."
489
 
490
+ def get_generation_prog