Final_Assignment_Template

Paused

App Files Files Community

martinsu commited on May 2

Commit

d70b82d

1 Parent(s): c48121b

Enhance agent.py with image data extraction and YouTube transcript retrieval tools; update .gitignore and requirements.txt to include test files and yt_dlp dependency.

Browse files

Files changed (4) hide show

.gitignore +5 -1
agent.py +150 -4
app.py +7 -0
requirements.txt +1 -1

.gitignore CHANGED Viewed

@@ -12,4 +12,8 @@ TEST_SET/
 test_results/
 #cursor
-.cursor/

 test_results/
 #cursor
+.cursor/
+#test
+test.py
+test_youtube.py

agent.py CHANGED Viewed

@@ -15,7 +15,11 @@ import requests
 import json
 import time
 from daytona_sdk import Daytona, DaytonaConfig
 # Load environment variablesTuple
@@ -485,6 +489,69 @@ def extract_document_data(input_method: str, files: list, prompt: str, json_mode
     except Exception as e:
         return f"Error extracting document data: {str(e)}"
 @tool
 def extract_url_content(url: str) -> str:
     """Extract content from a URL using Diffbot API (supports webpages, articles, PDFs, etc.).
@@ -521,7 +588,7 @@ def extract_url_content(url: str) -> str:
     try:
         # Make the API request with a timeout
-        response = requests.get(api_url, params=params, timeout=30)  # 30 second timeout
         response.raise_for_status()  # Raise exception for HTTP errors
         # Parse the response
@@ -559,6 +626,77 @@ def extract_url_content(url: str) -> str:
     except Exception as e:
         return f"Error extracting content from {url}: {str(e)}"
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
@@ -594,7 +732,7 @@ class BasicAgent:
         )
         # Initialize tools
-        self.tools = [search_web_tavily, search_web_serper, execute_code_securely, execute_shell_command, sandbox_file_operation, extract_document_data, extract_url_content]
         # Bind tools only to the worker model
         self.worker_model = self.worker_model_base.bind_tools(self.tools)
@@ -721,6 +859,8 @@ class BasicAgent:
                         Worker has access to the following tools:
                         - Web search (using Tavily and Serper)
                         - Web content extraction
                         - Secure code execution (for Python and other languages)
                         - Secure shell command execution
                         - Secure file operations
@@ -763,6 +903,8 @@ class BasicAgent:
             Worker has access to the following powerful tools:
             - Web search (using Tavily and Serper)
             - Web content extraction
             - Secure code execution (for Python and other languages)
             - Secure shell command execution
             - Secure file operations
@@ -870,6 +1012,8 @@ class BasicAgent:
             Remember that the worker had access to:
             - Web search tools
             - Web content extraction
             - Secure code execution
             - Secure shell commands
             - Secure file operations
@@ -882,6 +1026,8 @@ class BasicAgent:
             - Ensure any numerical values, dates, names, or technical terms are correct
             - Confirm that the formatting precisely matches what was requested
             - Do not add units to the final answer if not explicitly requested
             - Answers tend to be as short as possible, so do not add extra data unless explicitly requested
             If the answer report is correct, format it exactly as asked in the question, and respond with:
@@ -988,7 +1134,7 @@ class BasicAgent:
         try:
             # Run the workflow
-            final_state = self.app.invoke(initial_state, config={"callbacks": [self.langfuse_handler], "recursion_limit": 50})
             # Return the final answer
             answer = final_state.get("final_answer", "")

 import json
 import time
 from daytona_sdk import Daytona, DaytonaConfig
+import yt_dlp
+import io
+import os
+import tempfile
+from pathlib import Path
 # Load environment variablesTuple
     except Exception as e:
         return f"Error extracting document data: {str(e)}"
+@tool
+def extract_image_data(input_method: str, images: list, prompt: str, json_mode: bool = False) -> str:
+    """Extract visual information from images using Dumpling AI.
+    This tool allows you to extract detailed descriptions or specific information from images
+    using vision-capable Large Language Models (LLMs). It can identify objects, scenes, text,
+    and other visual elements based on your specific prompt.
+    Parameters:
+    - input_method: How to input images, either "url" or "base64"
+    - images: List of image URLs or base64-encoded strings depending on input_method
+    - prompt: Specific instructions for what information to extract from the image
+    - json_mode: Whether to return structured JSON (true) or free text (false)
+    Returns:
+    - Extracted visual data from the image based on your prompt
+    """
+    api_key = os.getenv("DUMPLING_API_KEY")
+    if not api_key:
+        return "Error: DUMPLING_API_KEY environment variable not set"
+    try:
+        url = "https://app.dumplingai.com/api/v1/extract-image"
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {api_key}"
+        }
+        data = {
+            "inputMethod": input_method,
+            "images": images,
+            "prompt": prompt,
+            "jsonMode": json_mode
+        }
+        response = requests.post(url, headers=headers, json=data, timeout=120)
+        response.raise_for_status()
+        result = response.json()
+        # Format the response in a readable way
+        formatted_response = f"Image Analysis Results:\n\n"
+        formatted_response += f"Extracted Data:\n{result.get('results', 'No results found')}\n\n"
+        formatted_response += f"Images Processed: {result.get('imageCount', 'Unknown')}\n"
+        formatted_response += f"Credit Usage: {result.get('creditUsage', 'Unknown')}\n"
+        return formatted_response
+    except requests.exceptions.Timeout:
+        return "Error: Request to Dumpling AI API timed out after 120 seconds"
+    except requests.exceptions.HTTPError as e:
+        error_detail = f"HTTP Error: {e.response.status_code}"
+        try:
+            error_json = e.response.json()
+            error_detail += f" - {error_json.get('detail', error_json)}"
+        except:
+            error_detail += f" - {e.response.text[:500]}"
+        return error_detail
+    except requests.exceptions.RequestException as e:
+        return f"Error making request to Dumpling AI API: {str(e)}"
+    except Exception as e:
+        return f"Error extracting image data: {str(e)}"
 @tool
 def extract_url_content(url: str) -> str:
     """Extract content from a URL using Diffbot API (supports webpages, articles, PDFs, etc.).
     try:
         # Make the API request with a timeout
+        response = requests.get(api_url, params=params, timeout=60)  # 30 second timeout
         response.raise_for_status()  # Raise exception for HTTP errors
         # Parse the response
     except Exception as e:
         return f"Error extracting content from {url}: {str(e)}"
+@tool
+def get_youtube_transcript(url: str) -> str:
+    """Get the transcript (captions) from a YouTube video as text.
+    This tool extracts the transcript text from YouTube videos, returns the transcript as a string.
+    Parameters:
+    - url: The YouTube video URL
+    Returns:
+    - The transcript as a string, or an error message if the transcript couldn't be obtained
+    """
+    # Create a temporary directory to store subtitle files
+    temp_dir = tempfile.mkdtemp()
+    current_dir = os.getcwd()
+    try:
+        # Change to temp directory for download
+        os.chdir(temp_dir)
+        ydl_opts = {
+            'writesubtitles': True,        # Download subtitles
+            'writeautomaticsub': True,     # Download automatic subtitles
+            'subtitleslangs': ['en'],      # Specify English language
+            'skip_download': True,         # Skip downloading the video, only get subtitles
+            'outtmpl': 'subtitle',         # Simple output template
+        }
+        # Download the subtitles
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            info_dict = ydl.extract_info(url, download=True)
+            video_title = info_dict.get('title', 'Unknown Title')
+        # Look for subtitle files in the temp directory
+        subtitle_content = ""
+        subtitle_files = list(Path(temp_dir).glob("*.vtt")) + list(Path(temp_dir).glob("*.srt"))
+        if subtitle_files:
+            # Read the first subtitle file found
+            with open(subtitle_files[0], 'r', encoding='utf-8') as f:
+                subtitle_content = f.read()
+            # Clean up the subtitle content to remove timestamps and formatting
+            # This is a simple cleaning - more complex parsing may be needed for perfect results
+            lines = subtitle_content.split('\n')
+            cleaned_lines = []
+            for line in lines:
+                # Skip time codes, numbering and empty lines
+                if line.strip() and not line.strip().isdigit() and not '-->' in line and not line.startswith('WEBVTT'):
+                    cleaned_lines.append(line)
+            subtitle_content = ' '.join(cleaned_lines)
+            return f"Transcript from YouTube video: '{video_title}'\n\n{subtitle_content}"
+        else:
+            return f"No transcript found for YouTube video: '{video_title}'"
+    except Exception as e:
+        return f"Error retrieving YouTube transcript: {str(e)}"
+    finally:
+        # Change back to original directory and clean up
+        os.chdir(current_dir)
+        # Cleanup files (optional)
+        try:
+            for file in os.listdir(temp_dir):
+                os.remove(os.path.join(temp_dir, file))
+            os.rmdir(temp_dir)
+        except:
+            pass
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
         )
         # Initialize tools
+        self.tools = [search_web_tavily, search_web_serper, execute_code_securely, execute_shell_command, sandbox_file_operation, extract_document_data, extract_image_data, extract_url_content, get_youtube_transcript]
         # Bind tools only to the worker model
         self.worker_model = self.worker_model_base.bind_tools(self.tools)
                         Worker has access to the following tools:
                         - Web search (using Tavily and Serper)
                         - Web content extraction
+                        - Image analysis (can extract visual information from images)
+                        - Document data extraction (from PDFs, documents, etc.)
                         - Secure code execution (for Python and other languages)
                         - Secure shell command execution
                         - Secure file operations
             Worker has access to the following powerful tools:
             - Web search (using Tavily and Serper)
             - Web content extraction
+            - Image analysis (can extract visual information from images)
+            - Document data extraction (can extract data from PDFs, documents, etc.)
             - Secure code execution (for Python and other languages)
             - Secure shell command execution
             - Secure file operations
             Remember that the worker had access to:
             - Web search tools
             - Web content extraction
+            - Image analysis (can extract visual information from images)
+            - Document data extraction (from PDFs, documents, etc.)
             - Secure code execution
             - Secure shell commands
             - Secure file operations
             - Ensure any numerical values, dates, names, or technical terms are correct
             - Confirm that the formatting precisely matches what was requested
             - Do not add units to the final answer if not explicitly requested
+            - Do not use money symbols like in the final answer if not explicitly requested
+            - Dont use comma separators for integers like 1,000,000, just use 1000000
             - Answers tend to be as short as possible, so do not add extra data unless explicitly requested
             If the answer report is correct, format it exactly as asked in the question, and respond with:
         try:
             # Run the workflow
+            final_state = self.app.invoke(initial_state, config={"callbacks": [self.langfuse_handler], "recursion_limit": 35})
             # Return the final answer
             answer = final_state.get("final_answer", "")

app.py CHANGED Viewed

@@ -117,6 +117,13 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         # Skip if we already have an answer for this question
         if task_id in existing_answers_dict:
             submitted_answer = existing_answers_dict[task_id]

             print(f"Skipping item with missing task_id or question: {item}")
             continue
+        # Check if the question has an associated file and prepend information
+        file_name = item.get("file_name")
+        if file_name and file_name != "":
+            file_url = f"{api_url}/files/{task_id}"
+            question_with_file_info = f"For this task there is file available, with name {file_name}, it's possible to download it from {file_url}\n\n{question_text}"
+            question_text = question_with_file_info
         # Skip if we already have an answer for this question
         if task_id in existing_answers_dict:
             submitted_answer = existing_answers_dict[task_id]

requirements.txt CHANGED Viewed

@@ -7,4 +7,4 @@ langchain-anthropic
 anthropic
 python-Levenshtein
 daytona_sdk

 anthropic
 python-Levenshtein
 daytona_sdk
+yt_dlp