tatianija commited on
Commit
81ee216
·
verified ·
1 Parent(s): b0ffe80

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +252 -122
app.py CHANGED
@@ -172,13 +172,6 @@ def save_attachment_to_file(attachment_data: Union[str, bytes, dict], temp_dir:
172
  Save attachment data to a temporary file.
173
  Returns the local file path if successful, None otherwise.
174
  """
175
-
176
-
177
-
178
-
179
-
180
-
181
-
182
  try:
183
  # Determine file name and extension
184
  if not file_name:
@@ -272,8 +265,6 @@ def save_attachment_to_file(attachment_data: Union[str, bytes, dict], temp_dir:
272
  print(f"Failed to save attachment: {e}")
273
  return None
274
 
275
-
276
-
277
  # --- Code Processing Tool ---
278
  class CodeAnalysisTool:
279
  def __init__(self, model_name: str = "meta-llama/Llama-3.1-8B-Instruct"):
@@ -302,23 +293,8 @@ Code:
302
  {code_content}
303
  ```
304
 
305
-
306
-
307
-
308
-
309
-
310
-
311
  Provide a brief, focused analysis:"""
312
 
313
-
314
-
315
-
316
-
317
-
318
-
319
-
320
-
321
-
322
  messages = [{"role": "user", "content": analysis_prompt}]
323
  response = self.client.chat_completion(
324
  messages=messages,
@@ -493,128 +469,282 @@ class IntelligentAgent:
493
 
494
  return "\n\n" + "="*50 + "\n".join(formatted_content) + "\n" + "="*50
495
 
496
- def _detect_and_process_direct_attachments(self, file_name: str) -> Tuple[List[str], List[str], List[str]]:
497
- """
498
- Detect and process a single attachment directly attached to a question (not as a URL).
499
- Returns (image_files, audio_files, code_files)
500
- """
501
- image_files = []
502
- audio_files = []
503
- code_files = []
504
-
505
- if not file_name:
506
- return image_files, audio_files, code_files
507
 
508
- try:
509
- # Construct the file path (assuming file is in current directory)
510
- file_path = os.path.join(os.getcwd(), file_name)
511
-
512
- # Check if file exists
513
- if not os.path.exists(file_path):
514
- if self.debug:
515
- print(f"File not found: {file_path}")
516
  return image_files, audio_files, code_files
517
 
518
- # Get file extension
519
- file_ext = Path(file_name).suffix.lower()
520
-
521
- # Determine category
522
- is_image = (
523
- file_ext in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.tiff']
524
- )
525
- is_audio = (
526
- file_ext in ['.mp3', '.wav', '.m4a', '.ogg', '.flac', '.aac']
527
- )
528
- is_code = (
529
- file_ext in ['.py', '.txt', '.js', '.html', '.css', '.json', '.xml', '.md', '.c', '.cpp', '.java']
530
- )
531
 
532
- # Categorize the file
533
- if is_image:
534
- image_files.append(file_path)
535
- elif is_audio:
536
- audio_files.append(file_path)
537
- elif is_code:
538
- code_files.append(file_path)
539
- else:
540
- # Default to code/text for unknown types
541
- code_files.append(file_path)
542
 
543
- if self.debug:
544
- print(f"Processed file: {file_name} -> {'image' if is_image else 'audio' if is_audio else 'code'}")
 
 
 
 
 
 
 
 
545
 
546
- except Exception as e:
547
- if self.debug:
548
- print(f"Error processing attachment {file_name}: {e}")
 
 
 
 
 
 
 
549
 
550
- if self.debug:
551
- print(f"Processed attachment: {len(image_files)} images, {len(audio_files)} audio, {len(code_files)} code files")
552
-
553
- return image_files, audio_files, code_files
554
 
555
- def process_question_with_attachments(self, question_data: dict) -> str:
556
- """
557
- Process a question that may have attachments and URLs.
558
- """
559
- question_text = question_data.get('question', '')
560
- if self.debug:
561
- print(f"Question data keys: {list(question_data.keys())}")
562
- print(f"\n1. Processing question with potential attachments and URLs: {question_text[:300]}...")
563
 
564
- try:
565
- # Detect and process URLs
566
  if self.debug:
567
- print(f"2. Detecting and processing URLs...")
 
 
 
 
 
 
 
 
568
 
569
- url_context = self._extract_and_process_urls(question_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
570
 
571
- if self.debug and url_context:
572
- print(f"URL context found: {len(url_context)} characters")
573
- except Exception as e:
574
- if self.debug:
575
- print(f"Error extracting URLs: {e}")
576
- url_context = ""
 
 
 
 
 
 
577
 
578
- try:
579
- # Detect and download attachments
580
- if self.debug:
581
- print(f"3. Searching for images, audio or code attachments...")
 
 
 
 
 
 
 
 
582
 
583
- attachment_name = question_data.get('file_name', '')
584
- if self.debug:
585
- print(f"Attachment name from question_data: '{attachment_name}'")
586
-
587
- image_files, audio_files, code_files = self._detect_and_process_direct_attachments(attachment_name)
 
 
 
588
 
589
- # Process attachments to get context
590
- attachment_context = self._process_attachments(image_files, audio_files, code_files)
 
 
 
 
 
591
 
592
- if self.debug and attachment_context:
593
- print(f"Attachment context: {attachment_context[:200]}...")
 
 
 
 
 
 
 
 
 
 
 
594
 
595
- # Decide whether to search
596
- if self._should_search(question_text, attachment_context, url_context):
 
 
 
 
597
  if self.debug:
598
- print("5. Using search-based approach")
599
- answer = self._answer_with_search(question_text, attachment_context, url_context)
600
- else:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
601
  if self.debug:
602
- print("5. Using LLM-only approach")
603
- answer = self._answer_with_llm(question_text, attachment_context, url_context)
 
 
 
 
 
604
  if self.debug:
605
- print(f"LLM answer: {answer}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
606
 
607
- # Note: We don't cleanup files here since they're not temporary files we created
608
- # They are actual files in the working directory
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
609
 
610
- except Exception as e:
611
  if self.debug:
612
- print(f"Error in attachment processing: {e}")
613
- answer = f"Sorry, I encountered an error: {e}"
614
 
615
- if self.debug:
616
- print(f"6. Agent returning answer: {answer[:100]}...")
617
- return answer
618
  def fetch_questions() -> Tuple[str, Optional[pd.DataFrame]]:
619
  """
620
  Fetch questions from the API and cache them.
 
172
  Save attachment data to a temporary file.
173
  Returns the local file path if successful, None otherwise.
174
  """
 
 
 
 
 
 
 
175
  try:
176
  # Determine file name and extension
177
  if not file_name:
 
265
  print(f"Failed to save attachment: {e}")
266
  return None
267
 
 
 
268
  # --- Code Processing Tool ---
269
  class CodeAnalysisTool:
270
  def __init__(self, model_name: str = "meta-llama/Llama-3.1-8B-Instruct"):
 
293
  {code_content}
294
  ```
295
 
 
 
 
 
 
 
296
  Provide a brief, focused analysis:"""
297
 
 
 
 
 
 
 
 
 
 
298
  messages = [{"role": "user", "content": analysis_prompt}]
299
  response = self.client.chat_completion(
300
  messages=messages,
 
469
 
470
  return "\n\n" + "="*50 + "\n".join(formatted_content) + "\n" + "="*50
471
 
472
+ def _detect_and_process_direct_attachments(self, file_name: str) -> Tuple[List[str], List[str], List[str]]:
473
+ """
474
+ Detect and process a single attachment directly attached to a question (not as a URL).
475
+ Returns (image_files, audio_files, code_files)
476
+ """
477
+ image_files = []
478
+ audio_files = []
479
+ code_files = []
 
 
 
480
 
481
+ if not file_name:
 
 
 
 
 
 
 
482
  return image_files, audio_files, code_files
483
 
484
+ try:
485
+ # Construct the file path (assuming file is in current directory)
486
+ file_path = os.path.join(os.getcwd(), file_name)
487
+
488
+ # Check if file exists
489
+ if not os.path.exists(file_path):
490
+ if self.debug:
491
+ print(f"File not found: {file_path}")
492
+ return image_files, audio_files, code_files
 
 
 
 
493
 
494
+ # Get file extension
495
+ file_ext = Path(file_name).suffix.lower()
 
 
 
 
 
 
 
 
496
 
497
+ # Determine category
498
+ is_image = (
499
+ file_ext in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.tiff']
500
+ )
501
+ is_audio = (
502
+ file_ext in ['.mp3', '.wav', '.m4a', '.ogg', '.flac', '.aac']
503
+ )
504
+ is_code = (
505
+ file_ext in ['.py', '.txt', '.js', '.html', '.css', '.json', '.xml', '.md', '.c', '.cpp', '.java']
506
+ )
507
 
508
+ # Categorize the file
509
+ if is_image:
510
+ image_files.append(file_path)
511
+ elif is_audio:
512
+ audio_files.append(file_path)
513
+ elif is_code:
514
+ code_files.append(file_path)
515
+ else:
516
+ # Default to code/text for unknown types
517
+ code_files.append(file_path)
518
 
519
+ if self.debug:
520
+ print(f"Processed file: {file_name} -> {'image' if is_image else 'audio' if is_audio else 'code'}")
 
 
521
 
522
+ except Exception as e:
523
+ if self.debug:
524
+ print(f"Error processing attachment {file_name}: {e}")
 
 
 
 
 
525
 
 
 
526
  if self.debug:
527
+ print(f"Processed attachment: {len(image_files)} images, {len(audio_files)} audio, {len(code_files)} code files")
528
+
529
+ return image_files, audio_files, code_files
530
+
531
+ def _process_attachments(self, image_files: List[str], audio_files: List[str], code_files: List[str]) -> str:
532
+ """
533
+ Process different types of attachments and return consolidated context.
534
+ """
535
+ attachment_context = ""
536
 
537
+ # Process images
538
+ for image_file in image_files:
539
+ if self.debug:
540
+ print(f"Processing image: {image_file}")
541
+ try:
542
+ image_description = self.image_tool.analyze_image(image_file)
543
+ ocr_text = self.image_tool.extract_text_from_image(image_file)
544
+
545
+ attachment_context += f"\n\nIMAGE ANALYSIS ({image_file}):\n"
546
+ attachment_context += f"Description: {image_description}\n"
547
+ if ocr_text and "No text found" not in ocr_text and "OCR failed" not in ocr_text:
548
+ attachment_context += f"Text extracted: {ocr_text}\n"
549
+
550
+ except Exception as e:
551
+ if self.debug:
552
+ print(f"Error processing image {image_file}: {e}")
553
+ attachment_context += f"\n\nIMAGE PROCESSING ERROR ({image_file}): {e}\n"
554
 
555
+ # Process audio files
556
+ for audio_file in audio_files:
557
+ if self.debug:
558
+ print(f"Processing audio: {audio_file}")
559
+ try:
560
+ transcription = self.audio_tool.transcribe_audio(audio_file)
561
+ attachment_context += f"\n\nAUDIO TRANSCRIPTION ({audio_file}):\n{transcription}\n"
562
+
563
+ except Exception as e:
564
+ if self.debug:
565
+ print(f"Error processing audio {audio_file}: {e}")
566
+ attachment_context += f"\n\nAUDIO PROCESSING ERROR ({audio_file}): {e}\n"
567
 
568
+ # Process code/text files
569
+ for code_file in code_files:
570
+ if self.debug:
571
+ print(f"Processing code/text: {code_file}")
572
+ try:
573
+ code_analysis = self.code_tool.analyze_code(code_file)
574
+ attachment_context += f"\n\nCODE ANALYSIS ({code_file}):\n{code_analysis}\n"
575
+
576
+ except Exception as e:
577
+ if self.debug:
578
+ print(f"Error processing code {code_file}: {e}")
579
+ attachment_context += f"\n\nCODE PROCESSING ERROR ({code_file}): {e}\n"
580
 
581
+ return attachment_context
582
+
583
+ def _should_search(self, question: str, attachment_context: str, url_context: str) -> bool:
584
+ """
585
+ Decide whether to use search based on the question and available context.
586
+ """
587
+ # If we have rich context from attachments or URLs, we might not need search
588
+ has_rich_context = bool(attachment_context.strip() or url_context.strip())
589
 
590
+ # Keywords that typically indicate search is needed
591
+ search_keywords = [
592
+ "latest", "recent", "current", "today", "now", "2024", "2025",
593
+ "news", "update", "breaking", "trending", "happening",
594
+ "who is", "what is", "where is", "when did", "how many",
595
+ "price", "stock", "weather", "forecast"
596
+ ]
597
 
598
+ question_lower = question.lower()
599
+ needs_search = any(keyword in question_lower for keyword in search_keywords)
600
+
601
+ # Use LLM to make a more nuanced decision
602
+ try:
603
+ decision_prompt = f"""
604
+ Given this question and available context, should I search the web for additional information?
605
+
606
+ Question: {question}
607
+
608
+ Available context: {"Yes - rich context from attachments/URLs" if has_rich_context else "No additional context"}
609
+
610
+ Context preview: {(attachment_context + url_context)[:500]}...
611
 
612
+ Answer with just "YES" if web search would be helpful, or "NO" if the available context is sufficient or if this is a general knowledge question that doesn't require current information.
613
+ """
614
+
615
+ decision = self._chat_completion(decision_prompt, max_tokens=10, temperature=0.1)
616
+ should_search = "YES" in decision.upper()
617
+
618
  if self.debug:
619
+ print(f"Search decision: {should_search} (LLM said: {decision})")
620
+
621
+ return should_search
622
+
623
+ except Exception as e:
624
+ if self.debug:
625
+ print(f"Error in search decision: {e}, falling back to keyword-based decision")
626
+ return needs_search and not has_rich_context
627
+
628
+ def _answer_with_search(self, question: str, attachment_context: str, url_context: str) -> str:
629
+ """
630
+ Answer the question using search + LLM.
631
+ """
632
+ try:
633
+ # Perform search
634
+ search_results = self.search.call(question)
635
+
636
+ # Combine all contexts
637
+ full_context = f"""
638
+ Question: {question}
639
+
640
+ Search Results: {search_results}
641
+
642
+ {attachment_context}
643
+
644
+ {url_context}
645
+ """
646
+
647
+ answer_prompt = f"""Based on the search results and additional context provided, answer this question comprehensively and accurately:
648
+
649
+ {full_context}
650
+
651
+ Provide a clear, well-structured answer:"""
652
+
653
+ return self._chat_completion(answer_prompt, max_tokens=800, temperature=0.3)
654
+
655
+ except Exception as e:
656
+ if self.debug:
657
+ print(f"Search-based answer failed: {e}")
658
+ return self._answer_with_llm(question, attachment_context, url_context)
659
+
660
+ def _answer_with_llm(self, question: str, attachment_context: str, url_context: str) -> str:
661
+ """
662
+ Answer the question using only the LLM and available context.
663
+ """
664
+ try:
665
+ full_context = f"""
666
+ Question: {question}
667
+
668
+ {attachment_context}
669
+
670
+ {url_context}
671
+ """
672
+
673
+ answer_prompt = f"""Answer this question based on your knowledge and the provided context:
674
+
675
+ {full_context}
676
+
677
+ Provide a clear, comprehensive answer:"""
678
+
679
+ return self._chat_completion(answer_prompt, max_tokens=800, temperature=0.3)
680
+
681
+ except Exception as e:
682
+ return f"I apologize, but I encountered an error while processing your question: {e}"
683
+
684
+ def process_question_with_attachments(self, question_data: dict) -> str:
685
+ """
686
+ Process a question that may have attachments and URLs.
687
+ """
688
+ question_text = question_data.get('question', '')
689
+ if self.debug:
690
+ print(f"Question data keys: {list(question_data.keys())}")
691
+ print(f"\n1. Processing question with potential attachments and URLs: {question_text[:300]}...")
692
+
693
+ try:
694
+ # Detect and process URLs
695
  if self.debug:
696
+ print(f"2. Detecting and processing URLs...")
697
+
698
+ url_context = self._extract_and_process_urls(question_text)
699
+
700
+ if self.debug and url_context:
701
+ print(f"URL context found: {len(url_context)} characters")
702
+ except Exception as e:
703
  if self.debug:
704
+ print(f"Error extracting URLs: {e}")
705
+ url_context = ""
706
+
707
+ try:
708
+ # Detect and download attachments
709
+ if self.debug:
710
+ print(f"3. Searching for images, audio or code attachments...")
711
+
712
+ attachment_name = question_data.get('file_name', '')
713
+ if self.debug:
714
+ print(f"Attachment name from question_data: '{attachment_name}'")
715
+
716
+ image_files, audio_files, code_files = self._detect_and_process_direct_attachments(attachment_name)
717
+
718
+ # Process attachments to get context
719
+ attachment_context = self._process_attachments(image_files, audio_files, code_files)
720
+
721
+ if self.debug and attachment_context:
722
+ print(f"Attachment context: {attachment_context[:200]}...")
723
 
724
+ # Decide whether to search
725
+ if self._should_search(question_text, attachment_context, url_context):
726
+ if self.debug:
727
+ print("5. Using search-based approach")
728
+ answer = self._answer_with_search(question_text, attachment_context, url_context)
729
+ else:
730
+ if self.debug:
731
+ print("5. Using LLM-only approach")
732
+ answer = self._answer_with_llm(question_text, attachment_context, url_context)
733
+ if self.debug:
734
+ print(f"LLM answer: {answer}")
735
+
736
+ # Note: We don't cleanup files here since they're not temporary files we created
737
+ # They are actual files in the working directory
738
+
739
+ except Exception as e:
740
+ if self.debug:
741
+ print(f"Error in attachment processing: {e}")
742
+ answer = f"Sorry, I encountered an error: {e}"
743
 
 
744
  if self.debug:
745
+ print(f"6. Agent returning answer: {answer[:100]}...")
746
+ return answer
747
 
 
 
 
748
  def fetch_questions() -> Tuple[str, Optional[pd.DataFrame]]:
749
  """
750
  Fetch questions from the API and cache them.