Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -172,13 +172,6 @@ def save_attachment_to_file(attachment_data: Union[str, bytes, dict], temp_dir:
|
|
172 |
Save attachment data to a temporary file.
|
173 |
Returns the local file path if successful, None otherwise.
|
174 |
"""
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
try:
|
183 |
# Determine file name and extension
|
184 |
if not file_name:
|
@@ -272,8 +265,6 @@ def save_attachment_to_file(attachment_data: Union[str, bytes, dict], temp_dir:
|
|
272 |
print(f"Failed to save attachment: {e}")
|
273 |
return None
|
274 |
|
275 |
-
|
276 |
-
|
277 |
# --- Code Processing Tool ---
|
278 |
class CodeAnalysisTool:
|
279 |
def __init__(self, model_name: str = "meta-llama/Llama-3.1-8B-Instruct"):
|
@@ -302,23 +293,8 @@ Code:
|
|
302 |
{code_content}
|
303 |
```
|
304 |
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
Provide a brief, focused analysis:"""
|
312 |
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
messages = [{"role": "user", "content": analysis_prompt}]
|
323 |
response = self.client.chat_completion(
|
324 |
messages=messages,
|
@@ -493,128 +469,282 @@ class IntelligentAgent:
|
|
493 |
|
494 |
return "\n\n" + "="*50 + "\n".join(formatted_content) + "\n" + "="*50
|
495 |
|
496 |
-
def _detect_and_process_direct_attachments(self, file_name: str) -> Tuple[List[str], List[str], List[str]]:
|
497 |
-
|
498 |
-
|
499 |
-
|
500 |
-
|
501 |
-
|
502 |
-
|
503 |
-
|
504 |
-
|
505 |
-
if not file_name:
|
506 |
-
return image_files, audio_files, code_files
|
507 |
|
508 |
-
|
509 |
-
# Construct the file path (assuming file is in current directory)
|
510 |
-
file_path = os.path.join(os.getcwd(), file_name)
|
511 |
-
|
512 |
-
# Check if file exists
|
513 |
-
if not os.path.exists(file_path):
|
514 |
-
if self.debug:
|
515 |
-
print(f"File not found: {file_path}")
|
516 |
return image_files, audio_files, code_files
|
517 |
|
518 |
-
|
519 |
-
|
520 |
-
|
521 |
-
|
522 |
-
|
523 |
-
|
524 |
-
|
525 |
-
|
526 |
-
|
527 |
-
)
|
528 |
-
is_code = (
|
529 |
-
file_ext in ['.py', '.txt', '.js', '.html', '.css', '.json', '.xml', '.md', '.c', '.cpp', '.java']
|
530 |
-
)
|
531 |
|
532 |
-
|
533 |
-
|
534 |
-
image_files.append(file_path)
|
535 |
-
elif is_audio:
|
536 |
-
audio_files.append(file_path)
|
537 |
-
elif is_code:
|
538 |
-
code_files.append(file_path)
|
539 |
-
else:
|
540 |
-
# Default to code/text for unknown types
|
541 |
-
code_files.append(file_path)
|
542 |
|
543 |
-
|
544 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
545 |
|
546 |
-
|
547 |
-
|
548 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
549 |
|
550 |
-
|
551 |
-
|
552 |
-
|
553 |
-
return image_files, audio_files, code_files
|
554 |
|
555 |
-
|
556 |
-
|
557 |
-
|
558 |
-
"""
|
559 |
-
question_text = question_data.get('question', '')
|
560 |
-
if self.debug:
|
561 |
-
print(f"Question data keys: {list(question_data.keys())}")
|
562 |
-
print(f"\n1. Processing question with potential attachments and URLs: {question_text[:300]}...")
|
563 |
|
564 |
-
try:
|
565 |
-
# Detect and process URLs
|
566 |
if self.debug:
|
567 |
-
print(f"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
568 |
|
569 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
570 |
|
571 |
-
|
572 |
-
|
573 |
-
|
574 |
-
|
575 |
-
|
576 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
577 |
|
578 |
-
|
579 |
-
|
580 |
-
|
581 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
582 |
|
583 |
-
|
584 |
-
|
585 |
-
|
586 |
-
|
587 |
-
|
|
|
|
|
|
|
588 |
|
589 |
-
#
|
590 |
-
|
|
|
|
|
|
|
|
|
|
|
591 |
|
592 |
-
|
593 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
594 |
|
595 |
-
|
596 |
-
|
|
|
|
|
|
|
|
|
597 |
if self.debug:
|
598 |
-
print("
|
599 |
-
|
600 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
601 |
if self.debug:
|
602 |
-
print("
|
603 |
-
|
|
|
|
|
|
|
|
|
|
|
604 |
if self.debug:
|
605 |
-
print(f"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
606 |
|
607 |
-
|
608 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
609 |
|
610 |
-
except Exception as e:
|
611 |
if self.debug:
|
612 |
-
print(f"
|
613 |
-
answer
|
614 |
|
615 |
-
if self.debug:
|
616 |
-
print(f"6. Agent returning answer: {answer[:100]}...")
|
617 |
-
return answer
|
618 |
def fetch_questions() -> Tuple[str, Optional[pd.DataFrame]]:
|
619 |
"""
|
620 |
Fetch questions from the API and cache them.
|
|
|
172 |
Save attachment data to a temporary file.
|
173 |
Returns the local file path if successful, None otherwise.
|
174 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
try:
|
176 |
# Determine file name and extension
|
177 |
if not file_name:
|
|
|
265 |
print(f"Failed to save attachment: {e}")
|
266 |
return None
|
267 |
|
|
|
|
|
268 |
# --- Code Processing Tool ---
|
269 |
class CodeAnalysisTool:
|
270 |
def __init__(self, model_name: str = "meta-llama/Llama-3.1-8B-Instruct"):
|
|
|
293 |
{code_content}
|
294 |
```
|
295 |
|
|
|
|
|
|
|
|
|
|
|
|
|
296 |
Provide a brief, focused analysis:"""
|
297 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
298 |
messages = [{"role": "user", "content": analysis_prompt}]
|
299 |
response = self.client.chat_completion(
|
300 |
messages=messages,
|
|
|
469 |
|
470 |
return "\n\n" + "="*50 + "\n".join(formatted_content) + "\n" + "="*50
|
471 |
|
472 |
+
def _detect_and_process_direct_attachments(self, file_name: str) -> Tuple[List[str], List[str], List[str]]:
|
473 |
+
"""
|
474 |
+
Detect and process a single attachment directly attached to a question (not as a URL).
|
475 |
+
Returns (image_files, audio_files, code_files)
|
476 |
+
"""
|
477 |
+
image_files = []
|
478 |
+
audio_files = []
|
479 |
+
code_files = []
|
|
|
|
|
|
|
480 |
|
481 |
+
if not file_name:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
482 |
return image_files, audio_files, code_files
|
483 |
|
484 |
+
try:
|
485 |
+
# Construct the file path (assuming file is in current directory)
|
486 |
+
file_path = os.path.join(os.getcwd(), file_name)
|
487 |
+
|
488 |
+
# Check if file exists
|
489 |
+
if not os.path.exists(file_path):
|
490 |
+
if self.debug:
|
491 |
+
print(f"File not found: {file_path}")
|
492 |
+
return image_files, audio_files, code_files
|
|
|
|
|
|
|
|
|
493 |
|
494 |
+
# Get file extension
|
495 |
+
file_ext = Path(file_name).suffix.lower()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
496 |
|
497 |
+
# Determine category
|
498 |
+
is_image = (
|
499 |
+
file_ext in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.tiff']
|
500 |
+
)
|
501 |
+
is_audio = (
|
502 |
+
file_ext in ['.mp3', '.wav', '.m4a', '.ogg', '.flac', '.aac']
|
503 |
+
)
|
504 |
+
is_code = (
|
505 |
+
file_ext in ['.py', '.txt', '.js', '.html', '.css', '.json', '.xml', '.md', '.c', '.cpp', '.java']
|
506 |
+
)
|
507 |
|
508 |
+
# Categorize the file
|
509 |
+
if is_image:
|
510 |
+
image_files.append(file_path)
|
511 |
+
elif is_audio:
|
512 |
+
audio_files.append(file_path)
|
513 |
+
elif is_code:
|
514 |
+
code_files.append(file_path)
|
515 |
+
else:
|
516 |
+
# Default to code/text for unknown types
|
517 |
+
code_files.append(file_path)
|
518 |
|
519 |
+
if self.debug:
|
520 |
+
print(f"Processed file: {file_name} -> {'image' if is_image else 'audio' if is_audio else 'code'}")
|
|
|
|
|
521 |
|
522 |
+
except Exception as e:
|
523 |
+
if self.debug:
|
524 |
+
print(f"Error processing attachment {file_name}: {e}")
|
|
|
|
|
|
|
|
|
|
|
525 |
|
|
|
|
|
526 |
if self.debug:
|
527 |
+
print(f"Processed attachment: {len(image_files)} images, {len(audio_files)} audio, {len(code_files)} code files")
|
528 |
+
|
529 |
+
return image_files, audio_files, code_files
|
530 |
+
|
531 |
+
def _process_attachments(self, image_files: List[str], audio_files: List[str], code_files: List[str]) -> str:
|
532 |
+
"""
|
533 |
+
Process different types of attachments and return consolidated context.
|
534 |
+
"""
|
535 |
+
attachment_context = ""
|
536 |
|
537 |
+
# Process images
|
538 |
+
for image_file in image_files:
|
539 |
+
if self.debug:
|
540 |
+
print(f"Processing image: {image_file}")
|
541 |
+
try:
|
542 |
+
image_description = self.image_tool.analyze_image(image_file)
|
543 |
+
ocr_text = self.image_tool.extract_text_from_image(image_file)
|
544 |
+
|
545 |
+
attachment_context += f"\n\nIMAGE ANALYSIS ({image_file}):\n"
|
546 |
+
attachment_context += f"Description: {image_description}\n"
|
547 |
+
if ocr_text and "No text found" not in ocr_text and "OCR failed" not in ocr_text:
|
548 |
+
attachment_context += f"Text extracted: {ocr_text}\n"
|
549 |
+
|
550 |
+
except Exception as e:
|
551 |
+
if self.debug:
|
552 |
+
print(f"Error processing image {image_file}: {e}")
|
553 |
+
attachment_context += f"\n\nIMAGE PROCESSING ERROR ({image_file}): {e}\n"
|
554 |
|
555 |
+
# Process audio files
|
556 |
+
for audio_file in audio_files:
|
557 |
+
if self.debug:
|
558 |
+
print(f"Processing audio: {audio_file}")
|
559 |
+
try:
|
560 |
+
transcription = self.audio_tool.transcribe_audio(audio_file)
|
561 |
+
attachment_context += f"\n\nAUDIO TRANSCRIPTION ({audio_file}):\n{transcription}\n"
|
562 |
+
|
563 |
+
except Exception as e:
|
564 |
+
if self.debug:
|
565 |
+
print(f"Error processing audio {audio_file}: {e}")
|
566 |
+
attachment_context += f"\n\nAUDIO PROCESSING ERROR ({audio_file}): {e}\n"
|
567 |
|
568 |
+
# Process code/text files
|
569 |
+
for code_file in code_files:
|
570 |
+
if self.debug:
|
571 |
+
print(f"Processing code/text: {code_file}")
|
572 |
+
try:
|
573 |
+
code_analysis = self.code_tool.analyze_code(code_file)
|
574 |
+
attachment_context += f"\n\nCODE ANALYSIS ({code_file}):\n{code_analysis}\n"
|
575 |
+
|
576 |
+
except Exception as e:
|
577 |
+
if self.debug:
|
578 |
+
print(f"Error processing code {code_file}: {e}")
|
579 |
+
attachment_context += f"\n\nCODE PROCESSING ERROR ({code_file}): {e}\n"
|
580 |
|
581 |
+
return attachment_context
|
582 |
+
|
583 |
+
def _should_search(self, question: str, attachment_context: str, url_context: str) -> bool:
|
584 |
+
"""
|
585 |
+
Decide whether to use search based on the question and available context.
|
586 |
+
"""
|
587 |
+
# If we have rich context from attachments or URLs, we might not need search
|
588 |
+
has_rich_context = bool(attachment_context.strip() or url_context.strip())
|
589 |
|
590 |
+
# Keywords that typically indicate search is needed
|
591 |
+
search_keywords = [
|
592 |
+
"latest", "recent", "current", "today", "now", "2024", "2025",
|
593 |
+
"news", "update", "breaking", "trending", "happening",
|
594 |
+
"who is", "what is", "where is", "when did", "how many",
|
595 |
+
"price", "stock", "weather", "forecast"
|
596 |
+
]
|
597 |
|
598 |
+
question_lower = question.lower()
|
599 |
+
needs_search = any(keyword in question_lower for keyword in search_keywords)
|
600 |
+
|
601 |
+
# Use LLM to make a more nuanced decision
|
602 |
+
try:
|
603 |
+
decision_prompt = f"""
|
604 |
+
Given this question and available context, should I search the web for additional information?
|
605 |
+
|
606 |
+
Question: {question}
|
607 |
+
|
608 |
+
Available context: {"Yes - rich context from attachments/URLs" if has_rich_context else "No additional context"}
|
609 |
+
|
610 |
+
Context preview: {(attachment_context + url_context)[:500]}...
|
611 |
|
612 |
+
Answer with just "YES" if web search would be helpful, or "NO" if the available context is sufficient or if this is a general knowledge question that doesn't require current information.
|
613 |
+
"""
|
614 |
+
|
615 |
+
decision = self._chat_completion(decision_prompt, max_tokens=10, temperature=0.1)
|
616 |
+
should_search = "YES" in decision.upper()
|
617 |
+
|
618 |
if self.debug:
|
619 |
+
print(f"Search decision: {should_search} (LLM said: {decision})")
|
620 |
+
|
621 |
+
return should_search
|
622 |
+
|
623 |
+
except Exception as e:
|
624 |
+
if self.debug:
|
625 |
+
print(f"Error in search decision: {e}, falling back to keyword-based decision")
|
626 |
+
return needs_search and not has_rich_context
|
627 |
+
|
628 |
+
def _answer_with_search(self, question: str, attachment_context: str, url_context: str) -> str:
|
629 |
+
"""
|
630 |
+
Answer the question using search + LLM.
|
631 |
+
"""
|
632 |
+
try:
|
633 |
+
# Perform search
|
634 |
+
search_results = self.search.call(question)
|
635 |
+
|
636 |
+
# Combine all contexts
|
637 |
+
full_context = f"""
|
638 |
+
Question: {question}
|
639 |
+
|
640 |
+
Search Results: {search_results}
|
641 |
+
|
642 |
+
{attachment_context}
|
643 |
+
|
644 |
+
{url_context}
|
645 |
+
"""
|
646 |
+
|
647 |
+
answer_prompt = f"""Based on the search results and additional context provided, answer this question comprehensively and accurately:
|
648 |
+
|
649 |
+
{full_context}
|
650 |
+
|
651 |
+
Provide a clear, well-structured answer:"""
|
652 |
+
|
653 |
+
return self._chat_completion(answer_prompt, max_tokens=800, temperature=0.3)
|
654 |
+
|
655 |
+
except Exception as e:
|
656 |
+
if self.debug:
|
657 |
+
print(f"Search-based answer failed: {e}")
|
658 |
+
return self._answer_with_llm(question, attachment_context, url_context)
|
659 |
+
|
660 |
+
def _answer_with_llm(self, question: str, attachment_context: str, url_context: str) -> str:
|
661 |
+
"""
|
662 |
+
Answer the question using only the LLM and available context.
|
663 |
+
"""
|
664 |
+
try:
|
665 |
+
full_context = f"""
|
666 |
+
Question: {question}
|
667 |
+
|
668 |
+
{attachment_context}
|
669 |
+
|
670 |
+
{url_context}
|
671 |
+
"""
|
672 |
+
|
673 |
+
answer_prompt = f"""Answer this question based on your knowledge and the provided context:
|
674 |
+
|
675 |
+
{full_context}
|
676 |
+
|
677 |
+
Provide a clear, comprehensive answer:"""
|
678 |
+
|
679 |
+
return self._chat_completion(answer_prompt, max_tokens=800, temperature=0.3)
|
680 |
+
|
681 |
+
except Exception as e:
|
682 |
+
return f"I apologize, but I encountered an error while processing your question: {e}"
|
683 |
+
|
684 |
+
def process_question_with_attachments(self, question_data: dict) -> str:
|
685 |
+
"""
|
686 |
+
Process a question that may have attachments and URLs.
|
687 |
+
"""
|
688 |
+
question_text = question_data.get('question', '')
|
689 |
+
if self.debug:
|
690 |
+
print(f"Question data keys: {list(question_data.keys())}")
|
691 |
+
print(f"\n1. Processing question with potential attachments and URLs: {question_text[:300]}...")
|
692 |
+
|
693 |
+
try:
|
694 |
+
# Detect and process URLs
|
695 |
if self.debug:
|
696 |
+
print(f"2. Detecting and processing URLs...")
|
697 |
+
|
698 |
+
url_context = self._extract_and_process_urls(question_text)
|
699 |
+
|
700 |
+
if self.debug and url_context:
|
701 |
+
print(f"URL context found: {len(url_context)} characters")
|
702 |
+
except Exception as e:
|
703 |
if self.debug:
|
704 |
+
print(f"Error extracting URLs: {e}")
|
705 |
+
url_context = ""
|
706 |
+
|
707 |
+
try:
|
708 |
+
# Detect and download attachments
|
709 |
+
if self.debug:
|
710 |
+
print(f"3. Searching for images, audio or code attachments...")
|
711 |
+
|
712 |
+
attachment_name = question_data.get('file_name', '')
|
713 |
+
if self.debug:
|
714 |
+
print(f"Attachment name from question_data: '{attachment_name}'")
|
715 |
+
|
716 |
+
image_files, audio_files, code_files = self._detect_and_process_direct_attachments(attachment_name)
|
717 |
+
|
718 |
+
# Process attachments to get context
|
719 |
+
attachment_context = self._process_attachments(image_files, audio_files, code_files)
|
720 |
+
|
721 |
+
if self.debug and attachment_context:
|
722 |
+
print(f"Attachment context: {attachment_context[:200]}...")
|
723 |
|
724 |
+
# Decide whether to search
|
725 |
+
if self._should_search(question_text, attachment_context, url_context):
|
726 |
+
if self.debug:
|
727 |
+
print("5. Using search-based approach")
|
728 |
+
answer = self._answer_with_search(question_text, attachment_context, url_context)
|
729 |
+
else:
|
730 |
+
if self.debug:
|
731 |
+
print("5. Using LLM-only approach")
|
732 |
+
answer = self._answer_with_llm(question_text, attachment_context, url_context)
|
733 |
+
if self.debug:
|
734 |
+
print(f"LLM answer: {answer}")
|
735 |
+
|
736 |
+
# Note: We don't cleanup files here since they're not temporary files we created
|
737 |
+
# They are actual files in the working directory
|
738 |
+
|
739 |
+
except Exception as e:
|
740 |
+
if self.debug:
|
741 |
+
print(f"Error in attachment processing: {e}")
|
742 |
+
answer = f"Sorry, I encountered an error: {e}"
|
743 |
|
|
|
744 |
if self.debug:
|
745 |
+
print(f"6. Agent returning answer: {answer[:100]}...")
|
746 |
+
return answer
|
747 |
|
|
|
|
|
|
|
748 |
def fetch_questions() -> Tuple[str, Optional[pd.DataFrame]]:
|
749 |
"""
|
750 |
Fetch questions from the API and cache them.
|