Final_Assignment_Template

Sleeping

App Files Files Community

tatianija commited on Jul 1

Commit

81ee216

verified ·

1 Parent(s): b0ffe80

Update app.py

Browse files

Files changed (1) hide show

app.py +252 -122

app.py CHANGED Viewed

@@ -172,13 +172,6 @@ def save_attachment_to_file(attachment_data: Union[str, bytes, dict], temp_dir:
     Save attachment data to a temporary file.
     Returns the local file path if successful, None otherwise.
     """
     try:
         # Determine file name and extension
         if not file_name:
@@ -272,8 +265,6 @@ def save_attachment_to_file(attachment_data: Union[str, bytes, dict], temp_dir:
         print(f"Failed to save attachment: {e}")
         return None
 # --- Code Processing Tool ---
 class CodeAnalysisTool:
     def __init__(self, model_name: str = "meta-llama/Llama-3.1-8B-Instruct"):
@@ -302,23 +293,8 @@ Code:
 {code_content}
 ```
 Provide a brief, focused analysis:"""
             messages = [{"role": "user", "content": analysis_prompt}]
             response = self.client.chat_completion(
                 messages=messages,
@@ -493,128 +469,282 @@ class IntelligentAgent:
         return "\n\n" + "="*50 + "\n".join(formatted_content) + "\n" + "="*50
-def _detect_and_process_direct_attachments(self, file_name: str) -> Tuple[List[str], List[str], List[str]]:
-    """
-    Detect and process a single attachment directly attached to a question (not as a URL).
-    Returns (image_files, audio_files, code_files)
-    """
-    image_files = []
-    audio_files = []
-    code_files = []
-    if not file_name:
-        return image_files, audio_files, code_files
-    try:
-        # Construct the file path (assuming file is in current directory)
-        file_path = os.path.join(os.getcwd(), file_name)
-        # Check if file exists
-        if not os.path.exists(file_path):
-            if self.debug:
-                print(f"File not found: {file_path}")
             return image_files, audio_files, code_files
-        # Get file extension
-        file_ext = Path(file_name).suffix.lower()
-        # Determine category
-        is_image = (
-            file_ext in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.tiff']
-        )
-        is_audio = (
-            file_ext in ['.mp3', '.wav', '.m4a', '.ogg', '.flac', '.aac']
-        )
-        is_code = (
-            file_ext in ['.py', '.txt', '.js', '.html', '.css', '.json', '.xml', '.md', '.c', '.cpp', '.java']
-        )
-        # Categorize the file
-        if is_image:
-            image_files.append(file_path)
-        elif is_audio:
-            audio_files.append(file_path)
-        elif is_code:
-            code_files.append(file_path)
-        else:
-            # Default to code/text for unknown types
-            code_files.append(file_path)
-        if self.debug:
-            print(f"Processed file: {file_name} -> {'image' if is_image else 'audio' if is_audio else 'code'}")
-    except Exception as e:
-        if self.debug:
-            print(f"Error processing attachment {file_name}: {e}")
-    if self.debug:
-        print(f"Processed attachment: {len(image_files)} images, {len(audio_files)} audio, {len(code_files)} code files")
-    return image_files, audio_files, code_files
-def process_question_with_attachments(self, question_data: dict) -> str:
-    """
-    Process a question that may have attachments and URLs.
-    """
-    question_text = question_data.get('question', '')
-    if self.debug:
-        print(f"Question data keys: {list(question_data.keys())}")
-        print(f"\n1. Processing question with potential attachments and URLs: {question_text[:300]}...")
-    try:
-        # Detect and process URLs
         if self.debug:
-            print(f"2. Detecting and processing URLs...")
-        url_context = self._extract_and_process_urls(question_text)
-        if self.debug and url_context:
-            print(f"URL context found: {len(url_context)} characters")
-    except Exception as e:
-        if self.debug:
-            print(f"Error extracting URLs: {e}")
-        url_context = ""
-    try:
-        # Detect and download attachments
-        if self.debug:
-            print(f"3. Searching for images, audio or code attachments...")
-        attachment_name = question_data.get('file_name', '')
-        if self.debug:
-            print(f"Attachment name from question_data: '{attachment_name}'")
-        image_files, audio_files, code_files = self._detect_and_process_direct_attachments(attachment_name)
-        # Process attachments to get context
-        attachment_context = self._process_attachments(image_files, audio_files, code_files)
-        if self.debug and attachment_context:
-            print(f"Attachment context: {attachment_context[:200]}...")
-        # Decide whether to search
-        if self._should_search(question_text, attachment_context, url_context):
             if self.debug:
-                print("5. Using search-based approach")
-            answer = self._answer_with_search(question_text, attachment_context, url_context)
-        else:
             if self.debug:
-                print("5. Using LLM-only approach")
-            answer = self._answer_with_llm(question_text, attachment_context, url_context)
             if self.debug:
-                print(f"LLM answer: {answer}")
-        # Note: We don't cleanup files here since they're not temporary files we created
-        # They are actual files in the working directory
-    except Exception as e:
         if self.debug:
-            print(f"Error in attachment processing: {e}")
-        answer = f"Sorry, I encountered an error: {e}"
-    if self.debug:
-        print(f"6. Agent returning answer: {answer[:100]}...")
-    return answer
 def fetch_questions() -> Tuple[str, Optional[pd.DataFrame]]:
     """
     Fetch questions from the API and cache them.

     Save attachment data to a temporary file.
     Returns the local file path if successful, None otherwise.
     """
     try:
         # Determine file name and extension
         if not file_name:
         print(f"Failed to save attachment: {e}")
         return None
 # --- Code Processing Tool ---
 class CodeAnalysisTool:
     def __init__(self, model_name: str = "meta-llama/Llama-3.1-8B-Instruct"):
 {code_content}
 ```
 Provide a brief, focused analysis:"""
             messages = [{"role": "user", "content": analysis_prompt}]
             response = self.client.chat_completion(
                 messages=messages,
         return "\n\n" + "="*50 + "\n".join(formatted_content) + "\n" + "="*50
+    def _detect_and_process_direct_attachments(self, file_name: str) -> Tuple[List[str], List[str], List[str]]:
+        """
+        Detect and process a single attachment directly attached to a question (not as a URL).
+        Returns (image_files, audio_files, code_files)
+        """
+        image_files = []
+        audio_files = []
+        code_files = []
+        if not file_name:
             return image_files, audio_files, code_files
+        try:
+            # Construct the file path (assuming file is in current directory)
+            file_path = os.path.join(os.getcwd(), file_name)
+            # Check if file exists
+            if not os.path.exists(file_path):
+                if self.debug:
+                    print(f"File not found: {file_path}")
+                return image_files, audio_files, code_files
+            # Get file extension
+            file_ext = Path(file_name).suffix.lower()
+            # Determine category
+            is_image = (
+                file_ext in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.tiff']
+            )
+            is_audio = (
+                file_ext in ['.mp3', '.wav', '.m4a', '.ogg', '.flac', '.aac']
+            )
+            is_code = (
+                file_ext in ['.py', '.txt', '.js', '.html', '.css', '.json', '.xml', '.md', '.c', '.cpp', '.java']
+            )
+            # Categorize the file
+            if is_image:
+                image_files.append(file_path)
+            elif is_audio:
+                audio_files.append(file_path)
+            elif is_code:
+                code_files.append(file_path)
+            else:
+                # Default to code/text for unknown types
+                code_files.append(file_path)
+            if self.debug:
+                print(f"Processed file: {file_name} -> {'image' if is_image else 'audio' if is_audio else 'code'}")
+        except Exception as e:
+            if self.debug:
+                print(f"Error processing attachment {file_name}: {e}")
         if self.debug:
+            print(f"Processed attachment: {len(image_files)} images, {len(audio_files)} audio, {len(code_files)} code files")
+        return image_files, audio_files, code_files
+    def _process_attachments(self, image_files: List[str], audio_files: List[str], code_files: List[str]) -> str:
+        """
+        Process different types of attachments and return consolidated context.
+        """
+        attachment_context = ""
+        # Process images
+        for image_file in image_files:
+            if self.debug:
+                print(f"Processing image: {image_file}")
+            try:
+                image_description = self.image_tool.analyze_image(image_file)
+                ocr_text = self.image_tool.extract_text_from_image(image_file)
+                attachment_context += f"\n\nIMAGE ANALYSIS ({image_file}):\n"
+                attachment_context += f"Description: {image_description}\n"
+                if ocr_text and "No text found" not in ocr_text and "OCR failed" not in ocr_text:
+                    attachment_context += f"Text extracted: {ocr_text}\n"
+            except Exception as e:
+                if self.debug:
+                    print(f"Error processing image {image_file}: {e}")
+                attachment_context += f"\n\nIMAGE PROCESSING ERROR ({image_file}): {e}\n"
+        # Process audio files
+        for audio_file in audio_files:
+            if self.debug:
+                print(f"Processing audio: {audio_file}")
+            try:
+                transcription = self.audio_tool.transcribe_audio(audio_file)
+                attachment_context += f"\n\nAUDIO TRANSCRIPTION ({audio_file}):\n{transcription}\n"
+            except Exception as e:
+                if self.debug:
+                    print(f"Error processing audio {audio_file}: {e}")
+                attachment_context += f"\n\nAUDIO PROCESSING ERROR ({audio_file}): {e}\n"
+        # Process code/text files
+        for code_file in code_files:
+            if self.debug:
+                print(f"Processing code/text: {code_file}")
+            try:
+                code_analysis = self.code_tool.analyze_code(code_file)
+                attachment_context += f"\n\nCODE ANALYSIS ({code_file}):\n{code_analysis}\n"
+            except Exception as e:
+                if self.debug:
+                    print(f"Error processing code {code_file}: {e}")
+                attachment_context += f"\n\nCODE PROCESSING ERROR ({code_file}): {e}\n"
+        return attachment_context
+    def _should_search(self, question: str, attachment_context: str, url_context: str) -> bool:
+        """
+        Decide whether to use search based on the question and available context.
+        """
+        # If we have rich context from attachments or URLs, we might not need search
+        has_rich_context = bool(attachment_context.strip() or url_context.strip())
+        # Keywords that typically indicate search is needed
+        search_keywords = [
+            "latest", "recent", "current", "today", "now", "2024", "2025",
+            "news", "update", "breaking", "trending", "happening",
+            "who is", "what is", "where is", "when did", "how many",
+            "price", "stock", "weather", "forecast"
+        ]
+        question_lower = question.lower()
+        needs_search = any(keyword in question_lower for keyword in search_keywords)
+        # Use LLM to make a more nuanced decision
+        try:
+            decision_prompt = f"""
+Given this question and available context, should I search the web for additional information?
+Question: {question}
+Available context: {"Yes - rich context from attachments/URLs" if has_rich_context else "No additional context"}
+Context preview: {(attachment_context + url_context)[:500]}...
+Answer with just "YES" if web search would be helpful, or "NO" if the available context is sufficient or if this is a general knowledge question that doesn't require current information.
+"""
+            decision = self._chat_completion(decision_prompt, max_tokens=10, temperature=0.1)
+            should_search = "YES" in decision.upper()
             if self.debug:
+                print(f"Search decision: {should_search} (LLM said: {decision})")
+            return should_search
+        except Exception as e:
+            if self.debug:
+                print(f"Error in search decision: {e}, falling back to keyword-based decision")
+            return needs_search and not has_rich_context
+    def _answer_with_search(self, question: str, attachment_context: str, url_context: str) -> str:
+        """
+        Answer the question using search + LLM.
+        """
+        try:
+            # Perform search
+            search_results = self.search.call(question)
+            # Combine all contexts
+            full_context = f"""
+Question: {question}
+Search Results: {search_results}
+{attachment_context}
+{url_context}
+"""
+            answer_prompt = f"""Based on the search results and additional context provided, answer this question comprehensively and accurately:
+{full_context}
+Provide a clear, well-structured answer:"""
+            return self._chat_completion(answer_prompt, max_tokens=800, temperature=0.3)
+        except Exception as e:
+            if self.debug:
+                print(f"Search-based answer failed: {e}")
+            return self._answer_with_llm(question, attachment_context, url_context)
+    def _answer_with_llm(self, question: str, attachment_context: str, url_context: str) -> str:
+        """
+        Answer the question using only the LLM and available context.
+        """
+        try:
+            full_context = f"""
+Question: {question}
+{attachment_context}
+{url_context}
+"""
+            answer_prompt = f"""Answer this question based on your knowledge and the provided context:
+{full_context}
+Provide a clear, comprehensive answer:"""
+            return self._chat_completion(answer_prompt, max_tokens=800, temperature=0.3)
+        except Exception as e:
+            return f"I apologize, but I encountered an error while processing your question: {e}"
+    def process_question_with_attachments(self, question_data: dict) -> str:
+        """
+        Process a question that may have attachments and URLs.
+        """
+        question_text = question_data.get('question', '')
+        if self.debug:
+            print(f"Question data keys: {list(question_data.keys())}")
+            print(f"\n1. Processing question with potential attachments and URLs: {question_text[:300]}...")
+        try:
+            # Detect and process URLs
             if self.debug:
+                print(f"2. Detecting and processing URLs...")
+            url_context = self._extract_and_process_urls(question_text)
+            if self.debug and url_context:
+                print(f"URL context found: {len(url_context)} characters")
+        except Exception as e:
             if self.debug:
+                print(f"Error extracting URLs: {e}")
+            url_context = ""
+        try:
+            # Detect and download attachments
+            if self.debug:
+                print(f"3. Searching for images, audio or code attachments...")
+            attachment_name = question_data.get('file_name', '')
+            if self.debug:
+                print(f"Attachment name from question_data: '{attachment_name}'")
+            image_files, audio_files, code_files = self._detect_and_process_direct_attachments(attachment_name)
+            # Process attachments to get context
+            attachment_context = self._process_attachments(image_files, audio_files, code_files)
+            if self.debug and attachment_context:
+                print(f"Attachment context: {attachment_context[:200]}...")
+            # Decide whether to search
+            if self._should_search(question_text, attachment_context, url_context):
+                if self.debug:
+                    print("5. Using search-based approach")
+                answer = self._answer_with_search(question_text, attachment_context, url_context)
+            else:
+                if self.debug:
+                    print("5. Using LLM-only approach")
+                answer = self._answer_with_llm(question_text, attachment_context, url_context)
+                if self.debug:
+                    print(f"LLM answer: {answer}")
+            # Note: We don't cleanup files here since they're not temporary files we created
+            # They are actual files in the working directory
+        except Exception as e:
+            if self.debug:
+                print(f"Error in attachment processing: {e}")
+            answer = f"Sorry, I encountered an error: {e}"
         if self.debug:
+            print(f"6. Agent returning answer: {answer[:100]}...")
+        return answer
 def fetch_questions() -> Tuple[str, Optional[pd.DataFrame]]:
     """
     Fetch questions from the API and cache them.