AGAZO_Final_Assignment

Sleeping

App Files Files Community

Alexandre Gazola commited on May 26

Commit

f66d8b7

1 Parent(s): 8ee60f3

codigo agente

Browse files

Files changed (14) hide show

audio_to_text_tool.py +61 -0
botanical_classification_tool.py +190 -0
constants.py +30 -0
convert_to_fen_tool copy.py +40 -0
count_max_distinct_bird_species_tool.py +82 -0
download_task_file.py +46 -0
excel_parser_tool.py +42 -0
image_to_text_tool.py +55 -0
internet_search_tool.py +50 -0
python_interpreter_tool.py +55 -0
sample_youtube_video.py +96 -0
supervisor.py +77 -0
video_to_text_tool.py +107 -0
wikipedia_tool.py +219 -0

audio_to_text_tool.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import whisper
+from langchain_core.tools import tool
+#@tool
+import whisper
+import os
+import os
+import whisper
+import subprocess
+import tempfile
+import os
+import whisper
+import subprocess
+import tempfile
+def audio_to_text(file_path: str) -> str:
+    """
+    Converts an MP3 file to WAV and transcribes it using Whisper.
+    Args:
+        file_path (str): Path to the MP3 file.
+    Returns:
+        str: Transcribed text.
+    """
+    if not os.path.isfile(file_path):
+        raise FileNotFoundError(f"File not found: {file_path}")
+    # Convert MP3 to temporary WAV file
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_wav:
+        tmp_wav_path = tmp_wav.name
+    try:
+        # Convert to WAV using ffmpeg
+        subprocess.run(
+            ["ffmpeg", "-y", "-i", file_path, tmp_wav_path],
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL,
+            check=True
+        )
+        model = whisper.load_model("base")
+        result = model.transcribe(tmp_wav_path)
+        if result is None or "text" not in result:
+            raise ValueError("Transcription failed or result is invalid.")
+        return result["text"]
+    finally:
+        # Clean up temporary WAV file
+        if os.path.exists(tmp_wav_path):
+            os.remove(tmp_wav_path)
+if __name__ == "__main__":
+    try:
+        print(audio_to_text("C:\\tmp\\ibm\\audio.mp3"))
+    except Exception as e:
+        print(f"Error: {e}")

botanical_classification_tool.py ADDED Viewed

	@@ -0,0 +1,190 @@

+from langchain_core.tools import tool
+@tool
+def get_botanical_classification(item_name):
+    """
+    Provides the botanical classification (fruit, vegetable, or other)
+    for a given food item, adhering to botanical definitions.
+    Args:
+        item_name (str): The name of the food item (e.g., "bell pepper", "sweet potato").
+    Returns:
+        dict: A dictionary containing:
+              - 'item': The normalized item name.
+              - 'botanical_category': 'fruit', 'vegetable', 'other', or 'unclassified'.
+              - 'botanical_part': The botanical part of the plant (e.g., 'matured ovary', 'root', 'leaf'),
+                                  or 'N/A' if not applicable/unknown.
+              - 'notes': Any additional botanical notes or clarifications.
+    """
+    # --- Curated Botanical Database ---
+    # This dictionary holds the botanical classifications.
+    # It's crucial this data is accurate according to botanical science.
+    # You will need to expand this as needed for your full grocery list.
+    botanical_data = {
+        "sweet potatoes": {
+            "botanical_category": "vegetable",
+            "botanical_part": "root/tuber",
+            "notes": "Edible root of the plant."
+        },
+        "fresh basil": {
+            "botanical_category": "vegetable",
+            "botanical_part": "leaf",
+            "notes": "Edible leaves of the herb."
+        },
+        "plums": {
+            "botanical_category": "fruit",
+            "botanical_part": "matured ovary",
+            "notes": "Simple fleshy fruit (drupe)."
+        },
+        "green beans": {
+            "botanical_category": "fruit",
+            "botanical_part": "matured ovary (legume)",
+            "notes": "Botanically a fruit (legume) containing seeds."
+        },
+        "rice": {
+            "botanical_category": "fruit",
+            "botanical_part": "matured ovary (caryopsis)",
+            "notes": "A grain, which is botanically a dry fruit (caryopsis)."
+        },
+        "corn": {
+            "botanical_category": "fruit",
+            "botanical_part": "matured ovary (caryopsis)",
+            "notes": "A grain, which is botanically a dry fruit (caryopsis)."
+        },
+        "bell pepper": {
+            "botanical_category": "fruit",
+            "botanical_part": "matured ovary",
+            "notes": "Developed from the flower's ovary and contains seeds."
+        },
+        "whole allspice": { # Allspice berries
+            "botanical_category": "fruit",
+            "botanical_part": "dried berry",
+            "notes": "Dried unripe berries of the Pimenta dioica plant."
+        },
+        "acorns": {
+            "botanical_category": "fruit",
+            "botanical_part": "nut (type of dry fruit)",
+            "notes": "A nut, which is botanically a type of dry fruit."
+        },
+        "broccoli": {
+            "botanical_category": "vegetable",
+            "botanical_part": "flower/stem",
+            "notes": "Edible flower heads and stalks."
+        },
+        "celery": {
+            "botanical_category": "vegetable",
+            "botanical_part": "stem/petiole",
+            "notes": "Edible leaf stalks."
+        },
+        "zucchini": {
+            "botanical_category": "fruit",
+            "botanical_part": "matured ovary",
+            "notes": "A type of berry (pepo) from a flowering plant."
+        },
+        "lettuce": {
+            "botanical_category": "vegetable",
+            "botanical_part": "leaf",
+            "notes": "Edible leaves."
+        },
+        "peanuts": {
+            "botanical_category": "fruit",
+            "botanical_part": "legume (matured ovary)",
+            "notes": "Botanically a fruit (legume), despite growing underground."
+        },
+        # Non-plant items or items not strictly fruit/vegetable botanically
+        "milk": {
+            "botanical_category": "other",
+            "botanical_part": "N/A",
+            "notes": "Dairy product (animal)."
+        },
+        "eggs": {
+            "botanical_category": "other",
+            "botanical_part": "N/A",
+            "notes": "Animal product."
+        },
+        "flour": {
+            "botanical_category": "other",
+            "botanical_part": "N/A",
+            "notes": "Processed grain product (typically wheat, which is a fruit)."
+        },
+        "whole bean coffee": {
+            "botanical_category": "other", # The bean itself is a seed, not the entire fruit
+            "botanical_part": "seed",
+            "notes": "The coffee 'bean' is botanically the seed of the coffee cherry (a fruit)."
+        },
+        "oreos": {
+            "botanical_category": "other",
+            "botanical_part": "N/A",
+            "notes": "Processed food item."
+        }
+    }
+    # Normalize the input item name for lookup
+    normalized_item = item_name.strip().lower()
+    # Handle pluralization/singularization for common cases if not explicitly in data
+    # This is a simple approach; for more robustness, you'd need a proper NLP library.
+    if normalized_item.endswith("s") and normalized_item[:-1] in botanical_data:
+        normalized_item = normalized_item[:-1]
+    elif normalized_item + "s" in botanical_data:
+        # Check if the plural form exists if input is singular
+        if item_name.strip().lower() + "s" in botanical_data:
+            normalized_item = item_name.strip().lower() + "s"
+    # Retrieve classification
+    classification = botanical_data.get(normalized_item)
+    if classification:
+        return {
+            "item": item_name,
+            "botanical_category": classification["botanical_category"],
+            "botanical_part": classification["botanical_part"],
+            "notes": classification["notes"]
+        }
+    else:
+        # If the item is not found in the database
+        return {
+            "item": item_name,
+            "botanical_category": "unclassified",
+            "botanical_part": "N/A",
+            "notes": "Classification not found in the current database."
+        }
+# --- Example Usage ---
+if __name__ == "__main__":
+    grocery_list = [
+        "milk", "eggs", "flour", "whole bean coffee", "Oreos", "sweet potatoes",
+        "fresh basil", "plums", "green beans", "rice", "corn", "bell pepper",
+        "whole allspice", "acorns", "broccoli", "celery", "zucchini", "lettuce",
+        "peanuts"
+    ]
+    botanical_fruits = []
+    botanical_vegetables = []
+    other_items = []
+    unclassified_items = []
+    print("--- Botanical Classifications ---")
+    for item in grocery_list:
+        classification = get_botanical_classification(item)
+        print(f"'{classification['item']}' -> Category: {classification['botanical_category']}, Part: {classification['botanical_part']} ({classification['notes']})")
+        if classification['botanical_category'] == 'fruit':
+            botanical_fruits.append(classification['item'])
+        elif classification['botanical_category'] == 'vegetable':
+            botanical_vegetables.append(classification['item'])
+        elif classification['botanical_category'] == 'other':
+            other_items.append(classification['item'])
+        else: # unclassified
+            unclassified_items.append(classification['item'])
+    print("\n--- Summary ---")
+    print(f"Botanical Fruits: {', '.join(sorted(botanical_fruits))}")
+    print(f"Botanical Vegetables: {', '.join(sorted(botanical_vegetables))}")
+    print(f"Other Groceries: {', '.join(sorted(other_items))}")
+    if unclassified_items:
+        print(f"Unclassified Items: {', '.join(sorted(unclassified_items))}")
+    print(get_botanical_classification('fresh basil'))

constants.py ADDED Viewed

	@@ -0,0 +1,30 @@

+# MODEL = 'gemini-2.0-flash'
+MODEL = 'gemini-2.0-flash-exp'
+# MODEL = 'gemini-2.5-pro-exp-03-25'
+# 'gemini-2.5-pro-exp-03-25'
+# gemini-2.0-flash
+# gemini-2.5-pro-exp-03-25
+API_KEY = 'AIzaSyDfVJVwfEhMpZCFNg91TWwXBvs0BZsMpPo'
+OPENAI_KEY = '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'
+PROMPT_LIMITADOR_LLM = """
+  You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
+  However, if I give you tools that access the internet, you may freely use them. In particular, you have a tool that allows you to search the internet. Use it when necessary.
+  Your primary goal is to assist me using only the tools and data I make available. If a task requires information or capabilities beyond what I have provided, you must clearly state that you cannot fulfill the request due to these limitations.
+  Do not attempt to:
+  - Access external websites or databases without the tools that I provide.
+  - Use any internal knowledge base beyond what I provide.
+  - Categorize food items based on your own knowledge. Use the provided tools. Run the provided tools on every item of the grocery list!
+  - Make assumptions or inferences based on information not explicitly given.
+  - Utilize any built-in tools or functions that I have not specifically presented.
+  Your responses should be concise and directly address the task at hand, using only the provided resources. If a question cannot be answered or a task cannot be completed with the given constraints, your response should be a polite refusal stating the limitation.
+  Understood? Acknowledge that you will operate strictly within these constraints.
+"""

convert_to_fen_tool copy.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import cv2
+import numpy as np
+import constants
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langgraph.graph import StateGraph, END
+from typing import TypedDict, List, Dict
+import google.generativeai as genai
+def image_to_fen(image_bytes):
+    genai.configure(api_key=constants.API_KEY)
+    model = genai.GenerativeModel(constants.MODEL)
+    response = model.generate_content([
+    {"mime_type": "image/jpeg", "data": image_bytes},
+        "Describe the chessboard in this image and provide the FEN notation."
+])
+    print(response.text)
+    return ''
+if __name__ == '__main__':
+    # Example usage:
+    # 1.  Load an image from a file (replace with your image path)
+    image_path = r"C:\Users\agazo\Downloads\cca530fc-4052-43b2-b130-b30968d8aa44_file.png"  # Replace with a valid image path
+    try:
+        with open(image_path, "rb") as image_file:
+            image_bytes = image_file.read()
+    except FileNotFoundError:
+        print(f"Error: File not found at {image_path}.  Please make sure the path is correct and the file exists.")
+        exit()
+    # 2. Call the function
+    fen = image_to_fen(image_bytes)
+    print(f"FEN: {fen}")
+    #resultado esperado do FEN; 1K6/1PP5/P2RBBqP/4n3/Q7/p2b4/1pp3pp/1k2r3 w - - 0 1

count_max_distinct_bird_species_tool.py ADDED Viewed

	@@ -0,0 +1,82 @@

+from sample_youtube_video import sample_youtube_video
+from ultralytics import YOLO
+from langchain_core.tools import tool
+from utils import decode_base64_to_frame
+@tool
+def count_max_distinct_bird_species_in_video(youtube_url: str) -> int:
+    """
+    Count the maximum number of distinct bird species in a YouTube video.
+    Args:
+    - youtube_url: str: the URL of the YouTube video
+    Returns:
+    - int: The maximum number of distinct bird species detected in the YouTube video.
+    """
+    frames = sample_youtube_video(youtube_url, 5)
+    return count_max_distinct_bird_species(frames)
+def count_max_distinct_bird_species(framesStr: list[str]) -> int:
+    """
+    Count the maximum number of distinct bird species in a single frame considering a list of frames.
+    Args:
+    - frames (List[cv2.Mat]): A list of frames (images) to analyze.
+    Returns:
+    - int: The maximum number of distinct bird species detected in any single frame.
+    """
+    # List of bird class indices from COCO dataset (class IDs for birds)
+    bird_class_ids = [
+        14, 15, 16, 17, 18, 19, 20, 21  # Example bird class IDs in COCO dataset
+    ]
+    # Load YOLOv8 model for object detection (pre-trained or custom-trained for animals)
+    model = YOLO("yolov8n.pt")  # Use a pre-trained YOLOv8 model
+    max_bird_species_count = 0  # To track the maximum count of distinct bird species
+    frames = [decode_base64_to_frame(b64) for b64 in framesStr]
+    # Iterate through all frames in the list
+    for frame in frames:
+        # Perform inference on the current frame
+        results = model(frame)
+        bird_species_detected = set()  # To track distinct bird species in this frame
+        # Parse the detected objects
+        for result in results:
+            # Convert the class indices from tensor to integer
+            detected_classes = result.boxes.cls.int()  # Convert tensor to int
+            # Iterate over detected classes and filter only bird species
+            for detected_class in detected_classes:
+                # If the class is a bird (based on COCO bird class IDs)
+                if detected_class.item() in bird_class_ids:
+                    species_name = result.names[detected_class.item()]  # Get the species name
+                    bird_species_detected.add(species_name)
+        # Update the maximum number of distinct bird species detected in any frame
+        max_bird_species_count = max(max_bird_species_count, len(bird_species_detected))
+    # Return the maximum number of distinct bird species detected in any frame
+    return max_bird_species_count
+if __name__ == "__main__":
+    sampled_frames = sample_youtube_video.invoke({
+        "youtube_url": "https://www.youtube.com/watch?v=L1vXCYZAYYM" ,
+        "sample_rate": 5
+        }
+    )
+    distinct_species_count = count.invoke({
+        "framesStr": sampled_frames
+        }
+    )
+    print(f"Distinct species detected: {distinct_species_count}")

download_task_file.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import os
+import requests
+def download_file(task_id: str):
+    # Construct the URL
+    url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
+    # Send the request to download the file
+    response = requests.get(url)
+    if response.status_code == 200:
+        # Get the content type from the response headers to determine the file extension
+        content_type = response.headers.get('Content-Type', '')
+        file_extension = ''
+        # Map content types to file extensions
+        if 'pdf' in content_type:
+            file_extension = '.pdf'
+        elif 'jpg' in content_type or 'jpeg' in content_type:
+            file_extension = '.jpg'
+        elif 'png' in content_type:
+            file_extension = '.png'
+        elif 'txt' in content_type:
+            file_extension = '.txt'
+        elif 'zip' in content_type:
+            file_extension = '.zip'
+        elif 'mp3' in content_type:
+            file_extension = '.mp3'
+        # Add more file types as necessary
+        # If the extension can't be determined, default to .bin
+        if not file_extension:
+            file_extension = '.bin'
+        # Set the path to the Downloads folder (adjust 'YourUsername' to your actual username)
+        save_path = os.path.join(os.path.expanduser('~'), 'Downloads', f"{task_id}_file{file_extension}")
+        # Save the file with the appropriate extension
+        with open(save_path, 'wb') as f:
+            f.write(response.content)
+        print(f"File successfully downloaded and saved as {save_path}")
+    else:
+        print(f"Failed to download the file. Status code: {response.status_code}")
+task_id = "7bd855d8-463d-4ed5-93ca-5fe35145f733"
+download_file(task_id)

excel_parser_tool.py ADDED Viewed

	@@ -0,0 +1,42 @@

+from duckduckgo_search import DDGS
+from utils import clean_text
+from langchain_core.tools import tool
+import io
+import pandas as pd
+import base64
+from utils import get_base64
+@tool
+def parse_excel(base64_str: str) -> str:
+    """Parses a base64-encoded Excel (.xlsx) file and returns a markdown summary."""
+    try:
+        file_bytes = base64.b64decode(base64_str)
+        excel_file = pd.ExcelFile(io.BytesIO(file_bytes))
+        output = []
+        for sheet in excel_file.sheet_names:
+            df = excel_file.parse(sheet)
+            output.append(f"### Sheet: {sheet}\n")
+            output.append(df.head(20).to_markdown(index=False))
+            output.append("\n")
+        return "\n".join(output)
+    except Exception as e:
+        return f"Error parsing Excel file: {e}"
+if __name__ == "__main__":
+    import sys
+    filepath = 'C:\\Users\\agazo\\Downloads\\7bd855d8-463d-4ed5-93ca-5fe35145f733_file.xlsx'
+    try:
+        result = parse_excel(get_base64(filepath))
+        print(result)
+    except FileNotFoundError:
+        print(f"File not found: {filepath}")
+    except Exception as e:
+        print(f"Error: {e}")

image_to_text_tool.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import constants
+import google.generativeai as genai
+from langchain_core.tools import tool
+from PIL import Image
+import io
+@tool
+def image_to_text(image_bytes, instructions):
+    """
+    Generates a text describing an image.
+    Args:
+        image_bytes (bytes): the bytes of the image to de described.
+        instructions (str): instructions to describe the image.
+    Returns:
+        str: A string describing the image according to the instructions given.
+    """
+    genai.configure(api_key=constants.API_KEY)
+    model = genai.GenerativeModel(constants.MODEL)
+    response = model.generate_content(
+        [
+            {
+                "mime_type": "image/jpeg",
+                "data": image_bytes
+             },
+            instructions
+        ]
+    )
+    return response.text
+if __name__ == '__main__':
+    # Example usage:
+    # 1.  Load an image from a file (replace with your image path)
+    image_path = r"C:\Users\agazo\Downloads\cca530fc-4052-43b2-b130-b30968d8aa44_file.png"  # Replace with a valid image path
+    try:
+        with open(image_path, "rb") as image_file:
+            image_bytes = image_file.read()
+    except FileNotFoundError:
+        print(f"Error: File not found at {image_path}.  Please make sure the path is correct and the file exists.")
+        exit()
+    # 2. Call the function
+    text = image_to_text.invoke(
+        {
+        "image_bytes": image_bytes,
+        "instructions": 'Describe the chessboard in this image and provide the FEN notation.'
+        }
+    )
+    print(f"FEN: {text}")
+    #resultado esperado do FEN; 1K6/1PP5/P2RBBqP/4n3/Q7/p2b4/1pp3pp/1k2r3 w - - 0 1
+    # 3r2k1/pp4pp/4b2p/Q7/3n4/PqBBR2P/PP4P1/K7 w - - 0 1

internet_search_tool.py ADDED Viewed

	@@ -0,0 +1,50 @@

+from duckduckgo_search import DDGS
+from utils import clean_text
+from langchain_core.tools import tool
+@tool
+def internet_search(query: str, max_results: int = 10) -> list[dict]:
+    """
+    Search the Internet for the given query and return a list of results.
+    Args:
+        query (str): The search query string.
+        max_results (int, optional): Maximum number of search results to return. Default is 10.
+    Returns:
+        list[dict]: A list of dictionaries containing search results. Each dictionary typically includes:
+                    - 'title': The title of the result.
+                    - 'href': The URL of the result.
+                    - 'body': A short snippet or description.
+    Raises:
+        ValueError: If the query is empty or only whitespace.
+    """
+    if not query.strip():
+        raise ValueError("Query must not be empty.")
+    results = []
+    with DDGS() as ddgs:
+        for r in ddgs.text(query):
+            results.append(r)
+            if len(results) >= max_results:
+                break
+    return results
+if __name__ == "__main__":
+    query = 'Alexandre Gazola'
+    try:
+        results = internet_search.invoke(
+            {
+            "query": query,
+            "max_results": 5
+            }
+         )
+        for i, result in enumerate(results, 1):
+            print(f"\nResult {i}:")
+            print(f"Title: {clean_text(result.get('title'))}")
+            print(f"URL: {clean_text(result.get('href'))}")
+            print(f"Snippet: {clean_text(result.get('body'))}")
+    except ValueError as e:
+        print(f"Error: {e}")

python_interpreter_tool.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import constants
+import google.generativeai as genai
+from langchain_core.tools import tool
+from PIL import Image
+import io
+@tool
+def image_to_text(image_bytes, instructions):
+    """
+    Generates a text describing an image.
+    Args:
+        image_bytes (bytes): the bytes of the image to de described.
+        instructions (str): instructions to describe the image.
+    Returns:
+        str: A string describing the image according to the instructions given.
+    """
+    genai.configure(api_key=constants.API_KEY)
+    model = genai.GenerativeModel(constants.MODEL)
+    response = model.generate_content(
+        [
+            {
+                "mime_type": "image/jpeg",
+                "data": image_bytes
+             },
+            instructions
+        ]
+    )
+    return response.text
+if __name__ == '__main__':
+    # Example usage:
+    # 1.  Load an image from a file (replace with your image path)
+    image_path = r"C:\Users\agazo\Downloads\cca530fc-4052-43b2-b130-b30968d8aa44_file.png"  # Replace with a valid image path
+    try:
+        with open(image_path, "rb") as image_file:
+            image_bytes = image_file.read()
+    except FileNotFoundError:
+        print(f"Error: File not found at {image_path}.  Please make sure the path is correct and the file exists.")
+        exit()
+    # 2. Call the function
+    text = image_to_text.invoke(
+        {
+        "image_bytes": image_bytes,
+        "instructions": 'Describe the chessboard in this image and provide the FEN notation.'
+        }
+    )
+    print(f"FEN: {text}")
+    #resultado esperado do FEN; 1K6/1PP5/P2RBBqP/4n3/Q7/p2b4/1pp3pp/1k2r3 w - - 0 1
+    # 3r2k1/pp4pp/4b2p/Q7/3n4/PqBBR2P/PP4P1/K7 w - - 0 1

sample_youtube_video.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import cv2
+import yt_dlp
+import os
+from typing import List
+from utils import encode_frame_to_base64
+# Nota: Precisa ter a biblioteca ffmpeg instalada na maquina!
+def sample_youtube_video(youtube_url: str, sample_rate: int) -> List[str]:
+    """
+    Downloads a YouTube video and samples frames from it at a specified interval.
+    Args:
+        youtube_url (str): The URL of the YouTube video.
+        sample_rate (int): The interval (in seconds) at which to sample frames.
+    Returns:
+        List[cv2.Mat]: A list of sampled frames as cv2.Mat objects. Returns an empty list on error.
+    """
+    frames: List[cv2.Mat] = []
+    # Construct the yt_dlp options
+    ydl_opts = {
+        'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/mp4',  # Get the best quality video and audio in mp4 format
+        'outtmpl': '%(title)s.%(ext)s',  # Save with the video title
+        'quiet': True,  # Suppress console output
+    }
+    # Download the video
+    try:
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            info_dict = ydl.extract_info(youtube_url, download=True)
+            video_file = ydl.prepare_filename(info_dict)  # Get the downloaded filename
+    except Exception as e:
+        print(f"Error downloading video: {e}")
+        return []
+    # Open the video file
+    try:
+        cap = cv2.VideoCapture(video_file)
+    except Exception as e:
+        print(f"Error opening video file: {e}")
+        return []
+    # Get the video's frame rate
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    if fps <= 0:
+        print("Error: Could not determine video frame rate. OpenCV may not be able to read this file.")
+        cap.release()
+        os.remove(video_file)
+        return []
+    # Calculate the frame interval
+    frame_interval = int(sample_rate * fps)
+    frame_count = 0
+    success = True
+    duration_seconds = float(info_dict.get('duration', 0))
+    while success:
+        success, frame = cap.read()
+        if not success:
+            break
+        if frame_count % frame_interval == 0:
+            # Get the current time stamp
+            current_time = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000.0  # in seconds
+            if current_time > duration_seconds and duration_seconds != 0:
+                break
+            frames.append(frame)
+        frame_count += 1
+    # Release the video capture object and remove the downloaded video file
+    cap.release()
+    try:
+        os.remove(video_file)
+        print(f"Deleted downloaded video file: {video_file}")
+    except Exception as e:
+        print(f"Error deleting video file: {e}")
+    print(f"Finished sampling. {len(frames)} frames sampled.")
+    return [encode_frame_to_base64(frame) for frame in frames]
+if __name__ == "__main__":
+   sampled_frames = sample_youtube_video("https://www.youtube.com/watch?v=L1vXCYZAYYM", 5)
+   if sampled_frames:
+      print(f"Number of frames returned: {len(sampled_frames)}")
+   else:
+      print("No frames were returned.")

supervisor.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import constants
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_core.tools import tool
+from langchain.agents import AgentExecutor, create_tool_calling_agent
+from langchain_core.runnables import Runnable
+from langchain_openai import ChatOpenAI
+from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
+from langchain_core.messages import SystemMessage
+from wikipedia_tool import wikipedia_revision_by_year_keyword
+from count_max_distinct_bird_species_tool import count_max_distinct_bird_species_in_video
+from image_to_text_tool import image_to_text
+from utils import get_bytes, get_text_file_contents, get_base64
+from internet_search_tool import internet_search
+from botanical_classification_tool import get_botanical_classification
+from excel_parser_tool import parse_excel
+llm = ChatGoogleGenerativeAI(
+    model=constants.MODEL,
+    api_key=constants.API_KEY,
+    temperature=0.7)
+tools = [
+    wikipedia_revision_by_year_keyword,
+    count_max_distinct_bird_species_in_video,
+    image_to_text,
+    internet_search,
+    get_botanical_classification,
+    parse_excel
+    ]
+prompt = ChatPromptTemplate.from_messages([
+    SystemMessage(
+        content=(
+            constants.PROMPT_LIMITADOR_LLM
+        )
+    ),
+    MessagesPlaceholder(variable_name="chat_history"),
+    ("human", "{input}"),
+    MessagesPlaceholder(variable_name="agent_scratchpad"),
+])
+agent = create_tool_calling_agent(llm, tools, prompt=prompt)
+agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
+questoes = [
+    'How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.',
+    'In the video https://www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species to be on camera simultaneously?',
+    '.rewsna eht sa \"tfel\" drow eht fo etisoppo eht etirw ,ecnetnes siht dnatsrednu uoy fI', # trivial - o modelo responde
+    'Review the chess position provided in the image. It is blacks turn. Provide the correct next move for black which guarantees a win. Please provide your response in algebraic notation.',
+    'Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?',
+    'Given this table defining * on the set S = {a, b, c, d, e}\n\n|*|a|b|c|d|e|\n|---|---|---|---|---|---|\n|a|a|b|c|b|d|\n|b|b|c|a|e|c|\n|c|c|a|b|b|a|\n|d|b|e|b|e|d|\n|e|d|b|a|d|c|\n\nprovide the subset of S involved in any possible counter-examples that prove * is not commutative. Provide your answer as a comma separated list of the elements in the set in alphabetical order.',
+    ## questoes puladas
+    'What is the final numeric output from the attached Python code?', # trivial - o modelo responde
+    'How many at bats did the Yankee with the most walks in the 1977 regular season have that same season?',
+    "What is the surname of the equine veterinarian mentioned in 1.E Exercises from the chemistry materials licensed by Marisa Alviar-Agnew & Henry Agnew under the CK-12 license in LibreText's Introductory Chemistry materials as compiled 08/21/2023?",
+    "I'm making a grocery list for my mom, but she's a professor of botany and she's a real stickler when it comes to categorizing things. I need to add different foods to different categories on the grocery list, but if I make a mistake, she won't buy anything inserted in the wrong category. Here's the list I have so far:\n\nmilk, eggs, flour, whole bean coffee, Oreos, sweet potatoes, fresh basil, plums, green beans, rice, corn, bell pepper, whole allspice, acorns, broccoli, celery, zucchini, lettuce, peanuts\n\nI need to make headings for the fruits and vegetables. Could you please create a list of just the vegetables from my list? If you could do that, then I can figure out how to categorize the rest of the list into the appropriate categories. But remember that my mom is a real stickler, so make sure that no botanical fruits end up on the vegetable list, or she won't get them when she's at the store. Please alphabetize the list of vegetables, and place each item in a comma separated list.",
+    "Hi, I'm making a pie but I could use some help with my shopping list. I have everything I need for the crust, but I'm not sure about the filling. I got the recipe from my friend Aditi, but she left it as a voice memo and the speaker on my phone is buzzing so I can't quite make out what she's saying. Could you please listen to the recipe and list all of the ingredients that my friend described? I only want the ingredients for the filling, as I have everything I need to make my favorite pie crust. I've attached the recipe as Strawberry pie.mp3.\n\nIn your response, please only list the ingredients, not any measurements. So if the recipe calls for \"a pinch of salt\" or \"two cups of ripe strawberries\" the ingredients on the list would be \"salt\" and \"ripe strawberries\".\n\nPlease format your response as a comma separated list of ingredients. Also, please alphabetize the ingredients.",
+    "Who did the actor who played Ray in the Polish-language version of Everybody Loves Raymond play in Magda M.? Give only the first name.",
+    "What is the first name of the only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists?",
+    "What country had the least number of athletes at the 1928 Summer Olympics? If there's a tie for a number of athletes, return the first in alphabetical order. Give the IOC country code as your answer.",
+    "Where were the Vietnamese specimens described by Kuznetzov in Nedoshivina's 2010 paper eventually deposited? Just give me the city name without abbreviations. Look up the web.",
+    "The attached Excel file contains the sales of menu items for a local fast-food chain. What were the total sales that the chain made from food (not including drinks)? Express your answer in USD with two decimal places.",
+]
+# CUB
+# resposta esperada: "Final answer": "broccoli, celery, fresh basil, lettuce, sweet potatoes"
+#"Final answer": "89706.00"
+response = agent_executor.invoke({
+ #    "input": questoes[15],
+     "input": f"{questoes[15]} The relevant data is provided as a base64 encoded Excel file string. Here is the base64 string: {get_base64(r'C:\Users\agazo\Downloads\7bd855d8-463d-4ed5-93ca-5fe35145f733_file.xlsx')}",
+    "chat_history": []
+})
+print(response)

video_to_text_tool.py ADDED Viewed

	@@ -0,0 +1,107 @@

+import requests
+import os
+import json
+import constants
+import requests
+import os
+import json
+from pytube import YouTube
+import subprocess
+# Replace with your actual OpenAI API key
+OPENAI_API_KEY = constants.OPENAI_KEY
+# Replace with the URL of the YouTube video you want to transcribe
+YOUTUBE_URL = "https://www.youtube.com/watch?v=1htKBjuUWec"
+def download_youtube_audio(youtube_url, output_path="."):
+    """Downloads the audio from a YouTube video.
+    Args:
+        youtube_url: The URL of the YouTube video.
+        output_path: The directory to save the audio file.
+    Returns:
+        str: The path to the downloaded audio file (in mp3 format), or None if an error occurs.
+    """
+    try:
+        yt = YouTube(youtube_url)
+        audio_stream = yt.streams.filter(only_audio=True).first()
+        if audio_stream:
+            downloaded_file = audio_stream.download(output_path=output_path, filename="youtube_audio")
+            base, ext = os.path.splitext(downloaded_file)
+            mp3_file = os.path.join(output_path, f"{base}.mp3")
+            subprocess.call(['ffmpeg', '-i', downloaded_file, mp3_file])
+            os.remove(downloaded_file)
+            return mp3_file
+        else:
+            print("Error: No audio stream found for this video.")
+            return None
+    except Exception as e:
+        print(f"Error downloading YouTube audio: {e}")
+        return None
+def transcribe_audio_openai(audio_file_path):
+    """
+    Transcribes an audio file using the OpenAI Audio API.
+    Args:
+        audio_file_path: The path to the audio file.
+    Returns:
+        str: The transcribed text, or None if an error occurs.
+    """
+    headers = {
+        "Authorization": f"Bearer {OPENAI_API_KEY}",
+    }
+    files = {
+        "file": open(audio_file_path, "rb"),
+    }
+    data = {
+        "model": "whisper-1",
+    }
+    try:
+        response = requests.post("https://api.openai.com/v1/audio/transcriptions", headers=headers, files=files, data=data)
+        response.raise_for_status()  # Raise an exception for bad status codes
+        return response.json().get("text")
+    except requests.exceptions.RequestException as e:
+        print(f"Error during OpenAI API call: {e}")
+        if response is not None:
+            print(f"Response status code: {response.status_code}")
+            try:
+                print(f"Response body: {response.json()}")
+            except json.JSONDecodeError:
+                print(f"Response body (non-JSON): {response.content.decode()}")
+        return None
+    except Exception as e:
+        print(f"An unexpected error occurred: {e}")
+        return None
+if __name__ == "__main__":
+    youtube_url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ"  # Replace with your YouTube video URL
+    # Download the audio from the YouTube video
+    audio_file_path = download_youtube_audio(youtube_url)
+    if audio_file_path:
+        print(f"Audio downloaded to: {audio_file_path}")
+        # Transcribe the downloaded audio using OpenAI
+        transcription = transcribe_audio_openai(audio_file_path)
+        # Clean up the downloaded audio file
+        os.remove(audio_file_path)
+        print(f"Deleted temporary audio file: {audio_file_path}")
+        if transcription:
+            print("\nYouTube Video Transcription (via OpenAI):")
+            print(transcription)
+        else:
+            print("Failed to transcribe the audio using OpenAI.")
+    else:
+        print("Could not download audio from the YouTube video.")

wikipedia_tool.py ADDED Viewed

	@@ -0,0 +1,219 @@

+import requests
+import wikipediaapi
+from langchain_core.tools import tool
+@tool
+def wikipedia_revision_by_year_keyword(keyword: str, year: int) -> dict:
+    """
+        Search for a Wikipedia page and get the latest revision from that year.
+        This tool allows you to search within the page content.
+    """
+    search_url = (
+        f"https://en.wikipedia.org/w/api.php"
+        f"?action=query"
+        f"&list=search"
+        f"&format=json"
+        f"&srsearch={requests.utils.quote(keyword)}"
+        f"&srlimit=1"
+    )
+    search_response = requests.get(search_url, verify=False).json()
+    search_results = search_response.get("query", {}).get("search", [])
+    if not search_results:
+        return {"error": f"No Wikipedia page found for '{keyword}'."}
+    title = search_results[0]["title"]
+    timestamp = f"{year}-12-31T23:59:59Z"
+    rev_url = (
+        f"https://en.wikipedia.org/w/api.php"
+        f"?action=query"
+        f"&format=json"
+        f"&prop=revisions"
+        f"&titles={requests.utils.quote(title)}"
+        f"&rvlimit=1"
+        f"&rvprop=timestamp|user|comment|content"
+        f"&rvdir=older"
+        f"&rvstart={timestamp}"
+    )
+    rev_response = requests.get(rev_url, verify=False).json()
+    pages = rev_response.get("query", {}).get("pages", {})
+    page = next(iter(pages.values()), {})
+    if "revisions" not in page:
+        return {"error": f"No revision found for page '{title}' before {timestamp}."}
+    rev = page["revisions"][0]
+    return {
+        "title": title,
+        "timestamp": rev["timestamp"],
+        "user": rev["user"],
+        "comment": rev.get("comment", ""),
+        "content": rev.get("*", "[Content omitted]")
+    }
+import requests
+USER_AGENT = "MyGenericTool/1.0 ([email protected])"  # Replace with your info
+def get_all_featured_articles():
+    """
+    Retrieves a list of titles of all Featured Articles on English Wikipedia.
+    """
+    url = "https://en.wikipedia.org/w/api.php"
+    params = {
+        'action': 'query',
+        'format': 'json',
+        'list': 'categorymembers',
+        'cmtitle': 'Category:Featured articles',
+        'cmtype': 'page',
+        'cmlimit': 'max'
+    }
+    headers = {'User-Agent': USER_AGENT}
+    try:
+        response = requests.get(url, params=params, headers=headers, verify=False)
+        response.raise_for_status()
+        data = response.json()
+        featured_articles = [cm['title'] for cm in data.get('query', {}).get('categorymembers', [])]
+        return featured_articles
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching featured articles: {e}")
+        return []
+def get_article_promotion_date(title):
+    """
+    Retrieves the date (YYYY-MM) when an article was promoted to Featured Article status.
+    Looks for the date in the talk page history based on "Wikipedia:Featured article candidates" comments.
+    """
+    talk_page_title = f"Talk:{title}"
+    url = "https://en.wikipedia.org/w/api.php"
+    params = {
+        'action': 'query',
+        'format': 'json',
+        'titles': talk_page_title,
+        'prop': 'revisions',
+        'rvprop': 'timestamp|user|comment',
+        'rvlimit': '50',  # Adjust limit as needed
+        'rvdir': 'older'
+    }
+    headers = {'User-Agent': USER_AGENT}
+    try:
+        response = requests.get(url, params=params, headers=headers, verify=False)
+        response.raise_for_status()
+        data = response.json()
+        page_data = next(iter(data.get('query', {}).get('pages', {}).values()), {})
+        revisions = page_data.get('revisions', [])
+        for rev in revisions:
+            if "Wikipedia:Featured article candidates" in rev.get('comment', ''):
+                timestamp = rev.get('timestamp')
+                if timestamp and "promoted" in rev.get('comment', '').lower():
+                    return timestamp[:7]  # Return YYYY-MM
+        return None
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching talk page history for {title}: {e}")
+        return None
+def get_nomination_user(title, promotion_month_year):
+    """
+    Retrieves the user who nominated a Featured Article promoted in a specific month and year.
+    Looks for the nomination discussion on the talk page.
+    """
+    talk_page_title = f"Talk:{title}"
+    url = "https://en.wikipedia.org/w/api.php"
+    params = {
+        'action': 'query',
+        'format': 'json',
+        'titles': talk_page_title,
+        'prop': 'revisions',
+        'rvprop': 'timestamp|user|comment',
+        'rvlimit': '500',  # Adjust limit as needed
+        'rvdir': 'older'
+    }
+    headers = {'User-Agent': USER_AGENT}
+    try:
+        response = requests.get(url, params=params, headers=headers)
+        response.raise_for_status()
+        data = response.json()
+        page_data = next(iter(data.get('query', {}).get('pages', {}).values()), {})
+        revisions = page_data.get('revisions', [])
+        nomination_start_comment = None
+        for rev in reversed(revisions):  # Look from newest to oldest
+            if f"Wikipedia:Featured article candidates/{title}" in rev.get('comment', ''):
+                nomination_start_comment = rev
+                break
+        if nomination_start_comment:
+            # Now, go back in history to find who initiated this section
+            params_history = {
+                'action': 'query',
+                'format': 'json',
+                'titles': talk_page_title,
+                'prop': 'revisions',
+                'rvprop': 'timestamp|user|comment',
+                'rvlimit': '500',
+                'rvdir': 'newer',
+                'rvstart': nomination_start_comment['timestamp']
+            }
+            response_history = requests.get(url, params=params_history, headers=headers)
+            response_history.raise_for_status()
+            data_history = response_history.json()
+            page_data_history = next(iter(data_history.get('query', {}).get('pages', {}).values()), {})
+            revisions_history = page_data_history.get('revisions', [])
+            if revisions_history:
+                return revisions_history[0].get('user')  # The first edit in the section is likely the nominator
+        return None
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching talk page history for {title}: {e}")
+        return None
+def find_nominator_of_fa_by_promotion_date(month_year):
+    """
+    Finds the nominator of the (presumably single) Featured Article promoted in the given month and year.
+    Args:
+        month_year (str): The promotion month and year in 'YYYY-MM' format.
+    Returns:
+        tuple: A tuple containing the title of the Featured Article and the nominator's username,
+               or (None, None) if no single FA was found for that month/year.
+    """
+    all_featured_articles = get_all_featured_articles()
+    promoted_in_month = []
+    for article in all_featured_articles:
+        promotion_date = get_article_promotion_date(article)
+        if promotion_date == month_year:
+            promoted_in_month.append(article)
+    if len(promoted_in_month) == 1:
+        target_article = promoted_in_month[0]
+        nominator = get_nomination_user(target_article, month_year)
+        return target_article, nominator
+    elif not promoted_in_month:
+        print(f"No Featured Article was found to be promoted in {month_year}.")
+        return None, None
+    else:
+        print(f"More than one Featured Article was promoted in {month_year}. Please be more specific.")
+        return None, None
+if __name__ == "__main__":
+    import sys
+    import io
+    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
+    featured_articles = get_all_featured_articles()
+    if featured_articles:
+        print("All Featured Articles:")
+        for article in featured_articles:
+            print(article) # Printing Unicode strings should now work if stdout is UTF-8
+    target_month_year = "2016-11"  # Example: November 2016
+    article_title, nominator_username = find_nominator_of_fa_by_promotion_date(target_month_year)
+    if article_title and nominator_username:
+        print(f"\nThe Featured Article '{article_title}' was promoted in {target_month_year} and was nominated by: {nominator_username}")
+    elif article_title:
+        print(f"\nThe Featured Article '{article_title}' was promoted in {target_month_year}, but the nominator could not be determined.")
+    elif article_title is None:
+        pass # Message already printed by find_nominator_of_fa_by_promotion_date