gaia_final_assignment

Sleeping

App Files Files Community

Dkapsis commited on May 20

Commit

a9182c5

1 Parent(s): 64c3879

audio agent

Browse files

Files changed (9) hide show

__pycache__/agents.cpython-310.pyc +0 -0
__pycache__/multi_agent.cpython-310.pyc +0 -0
__pycache__/prompts.cpython-310.pyc +0 -0
__pycache__/tools.cpython-310.pyc +0 -0
agents.py +13 -2
app.py +3 -1
multi_agent.py +1 -1
prompts.py +11 -0
tools.py +33 -1

__pycache__/agents.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/agents.cpython-310.pyc and b/__pycache__/agents.cpython-310.pyc differ

__pycache__/multi_agent.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/multi_agent.cpython-310.pyc and b/__pycache__/multi_agent.cpython-310.pyc differ

__pycache__/prompts.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/prompts.cpython-310.pyc and b/__pycache__/prompts.cpython-310.pyc differ

__pycache__/tools.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/tools.cpython-310.pyc and b/__pycache__/tools.cpython-310.pyc differ

agents.py CHANGED Viewed

@@ -42,13 +42,23 @@ def create_image_analysis_agent(message):
         name="image_analysis_agent",
         description=prompts.get_image_analysis_prompt(message),
         model=InferenceClientModel(IMAGE_ANALYSIS_MODEL),
-        tools=[image_analysis_tool],
         max_steps=2,
     )
 def create_manager_agent(message):
     simple_web_search_agent = create_simple_web_search_agent(message)
     image_analysis_agent = create_image_analysis_agent(message)
     return CodeAgent(
         name="manager_agent",
@@ -60,6 +70,7 @@ def create_manager_agent(message):
         managed_agents=[
             simple_web_search_agent,
             image_analysis_agent,
         ],
         max_steps=10,
         additional_authorized_imports=[
@@ -95,6 +106,6 @@ def create_final_answer_agent(message):
         name="final_answer_agent",
         description="Given a question and an initial answer, return the final refined answer following strict formatting rules.",
         model=InferenceClientModel(FINAL_ANSWER_MODEL),
-        max_steps=2,
         tools=[],
     )

         name="image_analysis_agent",
         description=prompts.get_image_analysis_prompt(message),
         model=InferenceClientModel(IMAGE_ANALYSIS_MODEL),
+        tools=[tools.image_analysis_tool],
+        max_steps=2,
+    )
+def create_audio_analysis_agent(message):
+    return CodeAgent(
+        name="audio_analysis_agent",
+        description=prompts.get_audio_analysis_prompt(message),
+        model=InferenceClientModel(AUDIO_ANALYSIS_MODEL),
+        tools=[tools.audio_analysis_tool],
         max_steps=2,
     )
 def create_manager_agent(message):
     simple_web_search_agent = create_simple_web_search_agent(message)
     image_analysis_agent = create_image_analysis_agent(message)
+    audio_analysis_agent = create_audio_analysis_agent(message)
     return CodeAgent(
         name="manager_agent",
         managed_agents=[
             simple_web_search_agent,
             image_analysis_agent,
+            audio_analysis_agent,
         ],
         max_steps=10,
         additional_authorized_imports=[
         name="final_answer_agent",
         description="Given a question and an initial answer, return the final refined answer following strict formatting rules.",
         model=InferenceClientModel(FINAL_ANSWER_MODEL),
+        max_steps=3,
         tools=[],
     )

app.py CHANGED Viewed

@@ -161,7 +161,9 @@ with gr.Blocks() as demo:
         """
         **Instructions:**
-        4.  who is in the final of champions league this year?
         """
     )

         """
         **Instructions:**
+        1. Who is in the final of champions league in 2025?
+        2. What is the colour of the suit in this image: https://external-content.duckduckgo.com/iu/?u=https%3A%2F%2Fimages.hdqwalls.com%2Fwallpapers%2Fblack-superman-henry-cavill-xa.jpg&f=1&nofb=1&ipt=451cdc8bb05635ac59e50dc567cb68ae38ad45a626622ee7760b2c3ef828d5a7?
+        3. Which of the fruits shown in the 2008 painting “Embroidery from Uzbekistan” were served as part of the October 1949 breakfast menu for the ocean liner that was later used as a floating prop for the film “The Last Voyage”? Give the items as a comma-separated list, ordering them in clockwise order based on their arrangement in the painting starting from the 12 o’clock position. Use the plural form of each fruit.
         """
     )

multi_agent.py CHANGED Viewed

@@ -11,7 +11,7 @@ import agents
 def orchestrate(message, file_path):
     final_prompt = prompts.get_manager_prompt(message, file_path)
-    initial_answer = agents.create_simple_web_search_agent(message).run(message)
     final_answer = agents.create_final_answer_agent(message).run(prompts.get_final_answer_prompt(message, initial_answer))

 def orchestrate(message, file_path):
     final_prompt = prompts.get_manager_prompt(message, file_path)
+    initial_answer = agents.create_manager_agent(message).run(message)
     final_answer = agents.create_final_answer_agent(message).run(prompts.get_final_answer_prompt(message, initial_answer))

prompts.py CHANGED Viewed

@@ -14,6 +14,13 @@ def get_image_analysis_prompt(message, file_path=None):
     return prompt
 def get_manager_prompt(message, file_path=None):
     prompt = f"""Your job is to answer the following question.
         Answer the following question. If needed, delegate to one of your coworkers:\n
@@ -26,6 +33,8 @@ def get_manager_prompt(message, file_path=None):
         In case you cannot answer the question and there is not a good coworker, delegate to the Code Generation Agent.\n.
         Question: {message}
         """
@@ -55,6 +64,8 @@ def get_final_answer_prompt(message: str, initial_answer: str):
         **Example 5:** What is the opposite of bad, worse, worst? good, better, best
         **Final answer:**
         """
     return prompt

     return prompt
+def get_audio_analysis_prompt(message, file_path=None):
+    prompt = f"""
+    As an expert audio analysis assistant, you analyze the audio to answer the question. Given a question and audio file, analyze the audio and answer the question: {message}
+        """
+    return prompt
 def get_manager_prompt(message, file_path=None):
     prompt = f"""Your job is to answer the following question.
         Answer the following question. If needed, delegate to one of your coworkers:\n
         In case you cannot answer the question and there is not a good coworker, delegate to the Code Generation Agent.\n.
+        The final answer must always be a string and no other formats are acceptable.
         Question: {message}
         """
         **Example 5:** What is the opposite of bad, worse, worst? good, better, best
         **Final answer:**
+        The final answer must always be a string and no other formats are acceptable.
         """
     return prompt

tools.py CHANGED Viewed

@@ -66,4 +66,36 @@ def image_analysis_tool(question: str, file_path: str) -> str:
         # You can return this dictionary directly if your model expects JSON format
         return prompt  # Actual agent model will process this
     except Exception as e:
-        raise RuntimeError(f"Image analysis failed: {str(e)}")

         # You can return this dictionary directly if your model expects JSON format
         return prompt  # Actual agent model will process this
     except Exception as e:
+        raise RuntimeError(f"Image analysis failed: {str(e)}")
+@tool
+def audio_analysis_tool(question: str, file_path: str) -> str:
+    """
+    Given a question and an audio file path, analyze the audio to answer the question.
+    Args:
+        question (str): A question about the audio.
+        file_path (str): Path to the audio file.
+    Returns:
+        str: Structured prompt with audio and question (for agent model to process).
+    Raises:
+        RuntimeError: If processing fails.
+    """
+    try:
+        # Read and encode audio to base64
+        with open(file_path, "rb") as audio_file:
+            audio_data = base64.b64encode(audio_file.read()).decode("utf-8")
+        # Format the content in a vision+text style prompt, adapted for audio
+        prompt = {
+            "inputs": {
+                "audio": audio_data,
+                "question": question
+            }
+        }
+        return prompt  # The agent model will process this
+    except Exception as e:
+        raise RuntimeError(f"Audio analysis failed: {str(e)}")