Final_Assignment_Template

Sleeping

App Files Files Community

tatianija commited on Jun 30

Commit

56f59a5

verified ·

1 Parent(s): 7c48eb3

Update app.py

Browse files

Files changed (1) hide show

app.py +137 -83

app.py CHANGED Viewed

@@ -165,53 +165,104 @@ class WebContentFetcher:
             results.append(result)
             time.sleep(1)  # Be respectful to servers
         return results
-# --- File Download Utility ---
-def download_attachment(url: str, temp_dir: str) -> Optional[str]:
     """
-    Download an attachment from URL to a temporary directory.
     Returns the local file path if successful, None otherwise.
     """
     try:
-        response = requests.get(url, timeout=30)
-        response.raise_for_status()
-        # Extract filename from URL or create one based on content type
-        parsed_url = urllib.parse.urlparse(url)
-        filename = os.path.basename(parsed_url.path)
-        if not filename or '.' not in filename:
-            # Try to determine extension from content type
-            content_type = response.headers.get('content-type', '').lower()
-            if 'image' in content_type:
-                if 'jpeg' in content_type or 'jpg' in content_type:
-                    filename = f"attachment_{int(time.time())}.jpg"
-                elif 'png' in content_type:
-                    filename = f"attachment_{int(time.time())}.png"
                 else:
-                    filename = f"attachment_{int(time.time())}.img"
-            elif 'audio' in content_type:
-                if 'mp3' in content_type:
-                    filename = f"attachment_{int(time.time())}.mp3"
-                elif 'wav' in content_type:
-                    filename = f"attachment_{int(time.time())}.wav"
                 else:
-                    filename = f"attachment_{int(time.time())}.audio"
-            elif 'python' in content_type or 'text' in content_type:
-                filename = f"attachment_{int(time.time())}.py"
             else:
-                filename = f"attachment_{int(time.time())}.file"
-        file_path = os.path.join(temp_dir, filename)
-        with open(file_path, 'wb') as f:
-            f.write(response.content)
-        print(f"Downloaded attachment: {url} -> {file_path}")
         return file_path
     except Exception as e:
-        print(f"Failed to download attachment {url}: {e}")
         return None
 # --- Code Processing Tool ---
@@ -339,7 +390,7 @@ class AudioTranscriptionTool:
             except:
                 return f"Audio transcription failed: {e}"
-# --- Enhanced Intelligent Agent with URL Processing ---
 class IntelligentAgent:
     def __init__(self, debug: bool = True, model_name: str = "meta-llama/Llama-3.1-8B-Instruct"):
         self.search = DuckDuckGoSearchTool()
@@ -418,81 +469,87 @@ class IntelligentAgent:
         return "\n\n" + "="*50 + "\n".join(formatted_content) + "\n" + "="*50
-    def _detect_and_download_attachments(self, question_data: dict) -> Tuple[List[str], List[str], List[str]]:
         """
-        Detect and download attachments from question data.
         Returns (image_files, audio_files, code_files)
         """
         image_files = []
         audio_files = []
         code_files = []
-        # Create temporary directory for downloads
         temp_dir = tempfile.mkdtemp(prefix="agent_attachments_")
         # Check for attachments in various fields
         attachments = []
         # Common fields where attachments might be found
-        attachment_fields = ['attachments', 'files', 'media', 'resources']
         for field in attachment_fields:
             if field in question_data:
                 field_data = question_data[field]
                 if isinstance(field_data, list):
                     attachments.extend(field_data)
-                elif isinstance(field_data, str):
                     attachments.append(field_data)
-        # Also check if the question text contains file URLs (not web URLs)
-        question_text = question_data.get('question', '')
-        if 'http' in question_text:
-            # Only consider URLs that likely point to files, not web pages
-            urls = re.findall(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', question_text)
-            for url in urls:
-                # Check if URL likely points to a file (has file extension)
-                parsed = urllib.parse.urlparse(url)
-                path = parsed.path.lower()
-                if any(path.endswith(ext) for ext in ['.jpg', '.jpeg', '.png', '.gif', '.mp3', '.wav', '.py', '.txt', '.pdf']):
-                    attachments.append(url)
-        # Download and categorize attachments
-        for attachment in attachments:
-            if isinstance(attachment, dict):
-                url = attachment.get('url') or attachment.get('link') or attachment.get('file_url')
-                file_type = attachment.get('type', '').lower()
-            else:
-                url = attachment
                 file_type = ''
-            if not url:
-                continue
-            # Download the file
-            file_path = download_attachment(url, temp_dir)
-            if not file_path:
-                continue
-            # Categorize based on extension or type
-            file_ext = Path(file_path).suffix.lower()
-            if file_type:
-                if 'image' in file_type or file_ext in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp']:
                     image_files.append(file_path)
-                elif 'audio' in file_type or file_ext in ['.mp3', '.wav', '.m4a', '.ogg', '.flac']:
                     audio_files.append(file_path)
-                elif 'python' in file_type or 'code' in file_type or file_ext in ['.py', '.txt']:
                     code_files.append(file_path)
-            else:
-                # Auto-detect based on extension
-                if file_ext in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp']:
-                    image_files.append(file_path)
-                elif file_ext in ['.mp3', '.wav', '.m4a', '.ogg', '.flac']:
-                    audio_files.append(file_path)
-                elif file_ext in ['.py', '.txt']:
                     code_files.append(file_path)
         if self.debug:
-            print(f"...Found attachments: {len(image_files)} images, {len(audio_files)} audio, {len(code_files)} code files")
         return image_files, audio_files, code_files
@@ -659,9 +716,6 @@ Answer:"""
             time.sleep(10)
             search_results = self.search(question)
-            #if self.debug:
-            #    print(f"Search results type: {type(search_results)}")
             if not search_results:
                 return "No search results found. Let me try to answer based on my knowledge:\n\n" + self._answer_with_llm(question, attachment_context, url_context)

             results.append(result)
             time.sleep(1)  # Be respectful to servers
         return results
+# --- File Processing Utility ---
+def save_attachment_to_file(attachment_data: Union[str, bytes, dict], temp_dir: str, file_name: str = None) -> Optional[str]:
     """
+    Save attachment data to a temporary file.
     Returns the local file path if successful, None otherwise.
     """
     try:
+        # Determine file name and extension
+        if not file_name:
+            file_name = f"attachment_{int(time.time())}"
+        # Handle different data types
+        if isinstance(attachment_data, dict):
+            # Handle dict with file data
+            if 'data' in attachment_data:
+                file_data = attachment_data['data']
+                file_type = attachment_data.get('type', '').lower()
+                original_name = attachment_data.get('name', file_name)
+            elif 'content' in attachment_data:
+                file_data = attachment_data['content']
+                file_type = attachment_data.get('mime_type', '').lower()
+                original_name = attachment_data.get('filename', file_name)
+            else:
+                # Try to use the dict as file data directly
+                file_data = str(attachment_data)
+                file_type = ''
+                original_name = file_name
+            # Use original name if available
+            if original_name and original_name != file_name:
+                file_name = original_name
+        elif isinstance(attachment_data, str):
+            # Could be base64 encoded data or plain text
+            file_data = attachment_data
+            file_type = ''
+        elif isinstance(attachment_data, bytes):
+            # Binary data
+            file_data = attachment_data
+            file_type = ''
+        else:
+            print(f"Unknown attachment data type: {type(attachment_data)}")
+            return None
+        # Ensure file has an extension
+        if '.' not in file_name:
+            # Try to determine extension from type
+            if 'image' in file_type:
+                if 'jpeg' in file_type or 'jpg' in file_type:
+                    file_name += '.jpg'
+                elif 'png' in file_type:
+                    file_name += '.png'
                 else:
+                    file_name += '.img'
+            elif 'audio' in file_type:
+                if 'mp3' in file_type:
+                    file_name += '.mp3'
+                elif 'wav' in file_type:
+                    file_name += '.wav'
                 else:
+                    file_name += '.audio'
+            elif 'python' in file_type or 'text' in file_type:
+                file_name += '.py'
             else:
+                file_name += '.file'
+        file_path = os.path.join(temp_dir, file_name)
+        # Save the file
+        if isinstance(file_data, str):
+            # Try to decode if it's base64
+            try:
+                # Check if it looks like base64
+                if len(file_data) > 100 and '=' in file_data[-5:]:
+                    decoded_data = base64.b64decode(file_data)
+                    with open(file_path, 'wb') as f:
+                        f.write(decoded_data)
+                else:
+                    # Plain text
+                    with open(file_path, 'w', encoding='utf-8') as f:
+                        f.write(file_data)
+            except:
+                # If base64 decode fails, save as text
+                with open(file_path, 'w', encoding='utf-8') as f:
+                    f.write(file_data)
+        else:
+            # Binary data
+            with open(file_path, 'wb') as f:
+                f.write(file_data)
+        print(f"Saved attachment: {file_path}")
         return file_path
     except Exception as e:
+        print(f"Failed to save attachment: {e}")
         return None
 # --- Code Processing Tool ---
             except:
                 return f"Audio transcription failed: {e}"
+# --- Enhanced Intelligent Agent with Direct Attachment Processing ---
 class IntelligentAgent:
     def __init__(self, debug: bool = True, model_name: str = "meta-llama/Llama-3.1-8B-Instruct"):
         self.search = DuckDuckGoSearchTool()
         return "\n\n" + "="*50 + "\n".join(formatted_content) + "\n" + "="*50
+    def _detect_and_process_direct_attachments(self, question_data: dict) -> Tuple[List[str], List[str], List[str]]:
         """
+        Detect and process attachments that are directly attached to questions (not as URLs).
         Returns (image_files, audio_files, code_files)
         """
         image_files = []
         audio_files = []
         code_files = []
+        # Create temporary directory for attachments
         temp_dir = tempfile.mkdtemp(prefix="agent_attachments_")
         # Check for attachments in various fields
         attachments = []
         # Common fields where attachments might be found
+        attachment_fields = ['attachments', 'files', 'media', 'resources', 'file_data', 'file_content']
         for field in attachment_fields:
             if field in question_data:
                 field_data = question_data[field]
                 if isinstance(field_data, list):
                     attachments.extend(field_data)
+                elif field_data:  # Non-empty data
                     attachments.append(field_data)
+        # Process each attachment
+        for i, attachment in enumerate(attachments):
+            try:
+                # Determine file name
+                file_name = None
                 file_type = ''
+                if isinstance(attachment, dict):
+                    # Extract metadata if available
+                    file_name = attachment.get('name') or attachment.get('filename') or f"attachment_{i}"
+                    file_type = attachment.get('type', '').lower() or attachment.get('mime_type', '').lower()
+                else:
+                    file_name = f"attachment_{i}"
+                # Save attachment to file
+                file_path = save_attachment_to_file(attachment, temp_dir, file_name)
+                if not file_path:
+                    continue
+                # Categorize based on extension or type
+                file_ext = Path(file_path).suffix.lower()
+                # Determine category
+                is_image = (
+                    'image' in file_type or
+                    file_ext in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.tiff']
+                )
+                is_audio = (
+                    'audio' in file_type or
+                    file_ext in ['.mp3', '.wav', '.m4a', '.ogg', '.flac', '.aac']
+                )
+                is_code = (
+                    'python' in file_type or 'code' in file_type or 'text' in file_type or
+                    file_ext in ['.py', '.txt', '.js', '.html', '.css', '.json', '.xml']
+                )
+                # Categorize the file
+                if is_image:
                     image_files.append(file_path)
+                elif is_audio:
                     audio_files.append(file_path)
+                elif is_code:
                     code_files.append(file_path)
+                else:
+                    # Default to code/text for unknown types
                     code_files.append(file_path)
+            except Exception as e:
+                if self.debug:
+                    print(f"Error processing attachment {i}: {e}")
+                continue
         if self.debug:
+            print(f"...Found direct attachments: {len(image_files)} images, {len(audio_files)} audio, {len(code_files)} code files")
         return image_files, audio_files, code_files
             time.sleep(10)
             search_results = self.search(question)
             if not search_results:
                 return "No search results found. Let me try to answer based on my knowledge:\n\n" + self._answer_with_llm(question, attachment_context, url_context)