tatianija commited on
Commit
56f59a5
·
verified ·
1 Parent(s): 7c48eb3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +137 -83
app.py CHANGED
@@ -165,53 +165,104 @@ class WebContentFetcher:
165
  results.append(result)
166
  time.sleep(1) # Be respectful to servers
167
  return results
168
-
169
- # --- File Download Utility ---
170
- def download_attachment(url: str, temp_dir: str) -> Optional[str]:
171
  """
172
- Download an attachment from URL to a temporary directory.
173
  Returns the local file path if successful, None otherwise.
174
  """
175
  try:
176
- response = requests.get(url, timeout=30)
177
- response.raise_for_status()
 
178
 
179
- # Extract filename from URL or create one based on content type
180
- parsed_url = urllib.parse.urlparse(url)
181
- filename = os.path.basename(parsed_url.path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
 
183
- if not filename or '.' not in filename:
184
- # Try to determine extension from content type
185
- content_type = response.headers.get('content-type', '').lower()
186
- if 'image' in content_type:
187
- if 'jpeg' in content_type or 'jpg' in content_type:
188
- filename = f"attachment_{int(time.time())}.jpg"
189
- elif 'png' in content_type:
190
- filename = f"attachment_{int(time.time())}.png"
191
  else:
192
- filename = f"attachment_{int(time.time())}.img"
193
- elif 'audio' in content_type:
194
- if 'mp3' in content_type:
195
- filename = f"attachment_{int(time.time())}.mp3"
196
- elif 'wav' in content_type:
197
- filename = f"attachment_{int(time.time())}.wav"
198
  else:
199
- filename = f"attachment_{int(time.time())}.audio"
200
- elif 'python' in content_type or 'text' in content_type:
201
- filename = f"attachment_{int(time.time())}.py"
202
  else:
203
- filename = f"attachment_{int(time.time())}.file"
204
 
205
- file_path = os.path.join(temp_dir, filename)
206
 
207
- with open(file_path, 'wb') as f:
208
- f.write(response.content)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
 
210
- print(f"Downloaded attachment: {url} -> {file_path}")
211
  return file_path
212
 
213
  except Exception as e:
214
- print(f"Failed to download attachment {url}: {e}")
215
  return None
216
 
217
  # --- Code Processing Tool ---
@@ -339,7 +390,7 @@ class AudioTranscriptionTool:
339
  except:
340
  return f"Audio transcription failed: {e}"
341
 
342
- # --- Enhanced Intelligent Agent with URL Processing ---
343
  class IntelligentAgent:
344
  def __init__(self, debug: bool = True, model_name: str = "meta-llama/Llama-3.1-8B-Instruct"):
345
  self.search = DuckDuckGoSearchTool()
@@ -418,81 +469,87 @@ class IntelligentAgent:
418
 
419
  return "\n\n" + "="*50 + "\n".join(formatted_content) + "\n" + "="*50
420
 
421
- def _detect_and_download_attachments(self, question_data: dict) -> Tuple[List[str], List[str], List[str]]:
422
  """
423
- Detect and download attachments from question data.
424
  Returns (image_files, audio_files, code_files)
425
  """
426
  image_files = []
427
  audio_files = []
428
  code_files = []
429
 
430
- # Create temporary directory for downloads
431
  temp_dir = tempfile.mkdtemp(prefix="agent_attachments_")
432
 
433
  # Check for attachments in various fields
434
  attachments = []
435
 
436
  # Common fields where attachments might be found
437
- attachment_fields = ['attachments', 'files', 'media', 'resources']
438
 
439
  for field in attachment_fields:
440
  if field in question_data:
441
  field_data = question_data[field]
442
  if isinstance(field_data, list):
443
  attachments.extend(field_data)
444
- elif isinstance(field_data, str):
445
  attachments.append(field_data)
446
 
447
- # Also check if the question text contains file URLs (not web URLs)
448
- question_text = question_data.get('question', '')
449
- if 'http' in question_text:
450
- # Only consider URLs that likely point to files, not web pages
451
- urls = re.findall(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', question_text)
452
- for url in urls:
453
- # Check if URL likely points to a file (has file extension)
454
- parsed = urllib.parse.urlparse(url)
455
- path = parsed.path.lower()
456
- if any(path.endswith(ext) for ext in ['.jpg', '.jpeg', '.png', '.gif', '.mp3', '.wav', '.py', '.txt', '.pdf']):
457
- attachments.append(url)
458
-
459
- # Download and categorize attachments
460
- for attachment in attachments:
461
- if isinstance(attachment, dict):
462
- url = attachment.get('url') or attachment.get('link') or attachment.get('file_url')
463
- file_type = attachment.get('type', '').lower()
464
- else:
465
- url = attachment
466
  file_type = ''
467
-
468
- if not url:
469
- continue
470
 
471
- # Download the file
472
- file_path = download_attachment(url, temp_dir)
473
- if not file_path:
474
- continue
475
-
476
- # Categorize based on extension or type
477
- file_ext = Path(file_path).suffix.lower()
478
-
479
- if file_type:
480
- if 'image' in file_type or file_ext in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp']:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
481
  image_files.append(file_path)
482
- elif 'audio' in file_type or file_ext in ['.mp3', '.wav', '.m4a', '.ogg', '.flac']:
483
  audio_files.append(file_path)
484
- elif 'python' in file_type or 'code' in file_type or file_ext in ['.py', '.txt']:
485
  code_files.append(file_path)
486
- else:
487
- # Auto-detect based on extension
488
- if file_ext in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp']:
489
- image_files.append(file_path)
490
- elif file_ext in ['.mp3', '.wav', '.m4a', '.ogg', '.flac']:
491
- audio_files.append(file_path)
492
- elif file_ext in ['.py', '.txt']:
493
  code_files.append(file_path)
 
 
 
 
 
 
494
  if self.debug:
495
- print(f"...Found attachments: {len(image_files)} images, {len(audio_files)} audio, {len(code_files)} code files")
496
 
497
  return image_files, audio_files, code_files
498
 
@@ -659,9 +716,6 @@ Answer:"""
659
  time.sleep(10)
660
  search_results = self.search(question)
661
 
662
- #if self.debug:
663
- # print(f"Search results type: {type(search_results)}")
664
-
665
  if not search_results:
666
  return "No search results found. Let me try to answer based on my knowledge:\n\n" + self._answer_with_llm(question, attachment_context, url_context)
667
 
 
165
  results.append(result)
166
  time.sleep(1) # Be respectful to servers
167
  return results
168
+
169
+ # --- File Processing Utility ---
170
+ def save_attachment_to_file(attachment_data: Union[str, bytes, dict], temp_dir: str, file_name: str = None) -> Optional[str]:
171
  """
172
+ Save attachment data to a temporary file.
173
  Returns the local file path if successful, None otherwise.
174
  """
175
  try:
176
+ # Determine file name and extension
177
+ if not file_name:
178
+ file_name = f"attachment_{int(time.time())}"
179
 
180
+ # Handle different data types
181
+ if isinstance(attachment_data, dict):
182
+ # Handle dict with file data
183
+ if 'data' in attachment_data:
184
+ file_data = attachment_data['data']
185
+ file_type = attachment_data.get('type', '').lower()
186
+ original_name = attachment_data.get('name', file_name)
187
+ elif 'content' in attachment_data:
188
+ file_data = attachment_data['content']
189
+ file_type = attachment_data.get('mime_type', '').lower()
190
+ original_name = attachment_data.get('filename', file_name)
191
+ else:
192
+ # Try to use the dict as file data directly
193
+ file_data = str(attachment_data)
194
+ file_type = ''
195
+ original_name = file_name
196
+
197
+ # Use original name if available
198
+ if original_name and original_name != file_name:
199
+ file_name = original_name
200
+
201
+ elif isinstance(attachment_data, str):
202
+ # Could be base64 encoded data or plain text
203
+ file_data = attachment_data
204
+ file_type = ''
205
+
206
+ elif isinstance(attachment_data, bytes):
207
+ # Binary data
208
+ file_data = attachment_data
209
+ file_type = ''
210
+
211
+ else:
212
+ print(f"Unknown attachment data type: {type(attachment_data)}")
213
+ return None
214
 
215
+ # Ensure file has an extension
216
+ if '.' not in file_name:
217
+ # Try to determine extension from type
218
+ if 'image' in file_type:
219
+ if 'jpeg' in file_type or 'jpg' in file_type:
220
+ file_name += '.jpg'
221
+ elif 'png' in file_type:
222
+ file_name += '.png'
223
  else:
224
+ file_name += '.img'
225
+ elif 'audio' in file_type:
226
+ if 'mp3' in file_type:
227
+ file_name += '.mp3'
228
+ elif 'wav' in file_type:
229
+ file_name += '.wav'
230
  else:
231
+ file_name += '.audio'
232
+ elif 'python' in file_type or 'text' in file_type:
233
+ file_name += '.py'
234
  else:
235
+ file_name += '.file'
236
 
237
+ file_path = os.path.join(temp_dir, file_name)
238
 
239
+ # Save the file
240
+ if isinstance(file_data, str):
241
+ # Try to decode if it's base64
242
+ try:
243
+ # Check if it looks like base64
244
+ if len(file_data) > 100 and '=' in file_data[-5:]:
245
+ decoded_data = base64.b64decode(file_data)
246
+ with open(file_path, 'wb') as f:
247
+ f.write(decoded_data)
248
+ else:
249
+ # Plain text
250
+ with open(file_path, 'w', encoding='utf-8') as f:
251
+ f.write(file_data)
252
+ except:
253
+ # If base64 decode fails, save as text
254
+ with open(file_path, 'w', encoding='utf-8') as f:
255
+ f.write(file_data)
256
+ else:
257
+ # Binary data
258
+ with open(file_path, 'wb') as f:
259
+ f.write(file_data)
260
 
261
+ print(f"Saved attachment: {file_path}")
262
  return file_path
263
 
264
  except Exception as e:
265
+ print(f"Failed to save attachment: {e}")
266
  return None
267
 
268
  # --- Code Processing Tool ---
 
390
  except:
391
  return f"Audio transcription failed: {e}"
392
 
393
+ # --- Enhanced Intelligent Agent with Direct Attachment Processing ---
394
  class IntelligentAgent:
395
  def __init__(self, debug: bool = True, model_name: str = "meta-llama/Llama-3.1-8B-Instruct"):
396
  self.search = DuckDuckGoSearchTool()
 
469
 
470
  return "\n\n" + "="*50 + "\n".join(formatted_content) + "\n" + "="*50
471
 
472
+ def _detect_and_process_direct_attachments(self, question_data: dict) -> Tuple[List[str], List[str], List[str]]:
473
  """
474
+ Detect and process attachments that are directly attached to questions (not as URLs).
475
  Returns (image_files, audio_files, code_files)
476
  """
477
  image_files = []
478
  audio_files = []
479
  code_files = []
480
 
481
+ # Create temporary directory for attachments
482
  temp_dir = tempfile.mkdtemp(prefix="agent_attachments_")
483
 
484
  # Check for attachments in various fields
485
  attachments = []
486
 
487
  # Common fields where attachments might be found
488
+ attachment_fields = ['attachments', 'files', 'media', 'resources', 'file_data', 'file_content']
489
 
490
  for field in attachment_fields:
491
  if field in question_data:
492
  field_data = question_data[field]
493
  if isinstance(field_data, list):
494
  attachments.extend(field_data)
495
+ elif field_data: # Non-empty data
496
  attachments.append(field_data)
497
 
498
+ # Process each attachment
499
+ for i, attachment in enumerate(attachments):
500
+ try:
501
+ # Determine file name
502
+ file_name = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
503
  file_type = ''
 
 
 
504
 
505
+ if isinstance(attachment, dict):
506
+ # Extract metadata if available
507
+ file_name = attachment.get('name') or attachment.get('filename') or f"attachment_{i}"
508
+ file_type = attachment.get('type', '').lower() or attachment.get('mime_type', '').lower()
509
+ else:
510
+ file_name = f"attachment_{i}"
511
+
512
+ # Save attachment to file
513
+ file_path = save_attachment_to_file(attachment, temp_dir, file_name)
514
+
515
+ if not file_path:
516
+ continue
517
+
518
+ # Categorize based on extension or type
519
+ file_ext = Path(file_path).suffix.lower()
520
+
521
+ # Determine category
522
+ is_image = (
523
+ 'image' in file_type or
524
+ file_ext in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.tiff']
525
+ )
526
+ is_audio = (
527
+ 'audio' in file_type or
528
+ file_ext in ['.mp3', '.wav', '.m4a', '.ogg', '.flac', '.aac']
529
+ )
530
+ is_code = (
531
+ 'python' in file_type or 'code' in file_type or 'text' in file_type or
532
+ file_ext in ['.py', '.txt', '.js', '.html', '.css', '.json', '.xml']
533
+ )
534
+
535
+ # Categorize the file
536
+ if is_image:
537
  image_files.append(file_path)
538
+ elif is_audio:
539
  audio_files.append(file_path)
540
+ elif is_code:
541
  code_files.append(file_path)
542
+ else:
543
+ # Default to code/text for unknown types
 
 
 
 
 
544
  code_files.append(file_path)
545
+
546
+ except Exception as e:
547
+ if self.debug:
548
+ print(f"Error processing attachment {i}: {e}")
549
+ continue
550
+
551
  if self.debug:
552
+ print(f"...Found direct attachments: {len(image_files)} images, {len(audio_files)} audio, {len(code_files)} code files")
553
 
554
  return image_files, audio_files, code_files
555
 
 
716
  time.sleep(10)
717
  search_results = self.search(question)
718
 
 
 
 
719
  if not search_results:
720
  return "No search results found. Let me try to answer based on my knowledge:\n\n" + self._answer_with_llm(question, attachment_context, url_context)
721