Final_Assignment_Template

Running

App Files Files Community

dawid-lorek commited on 3 days ago

Commit

e70ca94

verified ·

1 Parent(s): 703ec74

Update agent.py

Browse files

Files changed (1) hide show

agent.py +109 -44

agent.py CHANGED Viewed

@@ -1,72 +1,137 @@
-from smolagents import LiteLLMModel
-from smolagents import CodeAgent, DuckDuckGoSearchTool, VisitWebpageTool, tool, PythonInterpreterTool
-from youtube_transcript_api import YouTubeTranscriptApi
 import os
 @tool
-def reverse_sentence_tool(reverse_sentence: str) -> str:
     """
-    Receives a sentence where both the word order and the characters in each word are reversed.
-    Returns the sentence with words and order corrected.
     Args:
-        reverse_sentence: A sentence with reversed words and reversed word order.
     Returns:
-        A sentence in natural reading order.
     """
-    inverted_words = reverse_sentence.split(" ")[::-1]
-    correct_words = [word[::-1] for word in inverted_words]
-    return " ".join(correct_words)
 @tool
-def get_youtube_transcript(video_url: str) -> str:
     """
-    Fetches the transcript from a YouTube video if available.
     Args:
-        video_url: Full URL to the YouTube video.
     Returns:
-        Transcript text.
     """
-    video_id = video_url.split("v=")[-1]
-    transcript = YouTubeTranscriptApi.get_transcript(video_id)
-    full_text = " ".join([entry['text'] for entry in transcript])
-    return full_text
 @tool
-def check_answer(answer: str) -> str:
     """
-    Reviews the answer to check that it meets the requirements specified by the user and modifies it if necessary.
     Args:
-        answer (str): The answer of the Agent.
     Returns:
-        str: The final answer.
     """
-    if answer and answer[-1] == '.':
-        answer = answer[:-1]
-    if "St." in answer:
-        answer = answer.replace("St.", "Saint")
-    return answer
-class BasicAgent:
-    def __init__(self):
-        # Odczytaj klucz OpenAI z ENV
-        self.api_key = os.getenv("OPENAI_API_KEY")
-        # Ustaw preferowany model, np. GPT-4o lub inny OpenAI
-        self.model = LiteLLMModel(model_id="gpt-4o", api_key=self.api_key)
-        self.agent = CodeAgent(
             tools=[
                 DuckDuckGoSearchTool(),
-                PythonInterpreterTool(),
                 VisitWebpageTool(),
-                reverse_sentence_tool,
-                get_youtube_transcript,
-                check_answer,
             ],
-            model=self.model
         )
-        print("BasicAgent initialized (OpenAI).")
     def __call__(self, question: str) -> str:
-        print(f"Agent received question: {question[:50]}...")
-        answer = self.agent.run(question)
-        return answer

+# agent.py
 import os
+import requests
+from smolagents import LiteLLMModel, CodeAgent, tool, DuckDuckGoSearchTool, SpeechToTextTool, VisitWebpageTool
+import speech_recognition as sr
+from pydub import AudioSegment
+from PIL import Image
+# Ustaw endpoint API (dostosuj jeśli inny)
+api_url = "https://agents-course-unit4-scoring.hf.space"
+# ==== Narzędzia własne do podpięcia ====
+@tool
+def download_question_file(task_id: str, file_name: str = "", save_dir: str = ".") -> str:
+    """
+    Downloads the file associated with a given task ID and saves it to disk.
+    Args:
+        task_id (str): Unique question/task identifier.
+        file_name (str): Optional file name.
+        save_dir (str): Directory to save.
+    Returns:
+        str: Path to the saved file, or error.
+    """
+    url = f"{api_url}/files/{task_id}"
+    try:
+        resp = requests.get(url, timeout=15)
+        resp.raise_for_status()
+    except requests.exceptions.HTTPError as e:
+        return f"HTTP error: {e.response.status_code}"
+    except Exception as e:
+        return f"Network error: {e}"
+    content_disposition = resp.headers.get("Content-Disposition", "")
+    filename = (
+        content_disposition.split('filename="')[-1].rstrip('"')
+        if "filename=" in content_disposition
+        else file_name if file_name else f"{task_id}.dat"
+    )
+    os.makedirs(save_dir, exist_ok=True)
+    file_path = os.path.join(save_dir, filename)
+    with open(file_path, "wb") as f:
+        f.write(resp.content)
+    return file_path
 @tool
+def read_image(image_path: str) -> Image:
     """
+    Loads image from disk.
     Args:
+        image_path (str): Path to the image file.
     Returns:
+        The image.
     """
+    return Image.open(image_path)
 @tool
+def audio_to_text(audio_path: str) -> str:
     """
+    Converts audio (mp3/wav) to text using Google Speech Recognition.
     Args:
+        audio_path (str): Path to the audio file.
     Returns:
+        str: Recognized text.
     """
+    if audio_path.endswith(".mp3"):
+        source_file = audio_path.replace(".mp3", ".wav")
+        sound = AudioSegment.from_mp3(audio_path)
+        sound.export(source_file, format="wav")
+    else:
+        source_file = audio_path
+    r = sr.Recognizer()
+    audio_file = sr.AudioFile(source_file)
+    with audio_file as source:
+        audio = r.record(source)
+        text = r.recognize_google(audio)
+    return text
 @tool
+def extract_text_from_image(image_path: str) -> str:
     """
+    Extract text from image using pytesseract (OCR).
     Args:
+        image_path: Path to the image file.
     Returns:
+        Extracted text or error message.
     """
+    try:
+        import pytesseract
+        from PIL import Image
+        image = Image.open(image_path)
+        text = pytesseract.image_to_string(image)
+        return text
+    except ImportError:
+        return "Error: pytesseract is not installed."
+    except Exception as e:
+        return f"Error extracting text from image: {str(e)}"
+# ==== AGENT ====
+class GaiaAgent:
+    def __init__(self, model=None, max_steps=8):
+        # Jeśli model nie został przekazany, inicjalizuj domyślnie na OpenAI GPT-4o (lub inny)
+        if model is None:
+            api_key = os.getenv("OPENAI_API_KEY", "")
+            model = LiteLLMModel(
+                model_id="gpt-4o",  # Zmień na swój model jeśli potrzeba
+                api_key=api_key,
+            )
+        self.gaia_agent = CodeAgent(
+            model=model,
             tools=[
                 DuckDuckGoSearchTool(),
+                download_question_file,
+                read_image,
+                audio_to_text,
+                extract_text_from_image,
                 VisitWebpageTool(),
+                SpeechToTextTool()
             ],
+            additional_authorized_imports=["pandas", "numpy", "math", "statistics", "scipy"],
+            max_steps=max_steps
         )
+        # Możesz dodać tu dodatkową konfigurację promptów jeśli chcesz.
     def __call__(self, question: str) -> str:
+        print(f"Agent received question (first 50 chars): {question[:50]}...")
+        if self.gaia_agent:
+            try:
+                answer = self.gaia_agent.run(question)
+                print(f"Agent generated answer: {answer[:50]}..." if len(answer) > 50 else f"Agent generated answer: {answer}")
+                return answer
+            except Exception as e:
+                print(f"Error processing question: {e}")
+                return "An error occurred while processing your question. Please check the agent logs for details."
+        else:
+            return "The agent is not properly initialized. Please check your API keys and configuration."