Spaces:
Sleeping
Sleeping
audio agent
Browse files- __pycache__/agents.cpython-310.pyc +0 -0
- __pycache__/multi_agent.cpython-310.pyc +0 -0
- __pycache__/prompts.cpython-310.pyc +0 -0
- __pycache__/tools.cpython-310.pyc +0 -0
- agents.py +13 -2
- app.py +3 -1
- multi_agent.py +1 -1
- prompts.py +11 -0
- tools.py +33 -1
__pycache__/agents.cpython-310.pyc
CHANGED
Binary files a/__pycache__/agents.cpython-310.pyc and b/__pycache__/agents.cpython-310.pyc differ
|
|
__pycache__/multi_agent.cpython-310.pyc
CHANGED
Binary files a/__pycache__/multi_agent.cpython-310.pyc and b/__pycache__/multi_agent.cpython-310.pyc differ
|
|
__pycache__/prompts.cpython-310.pyc
CHANGED
Binary files a/__pycache__/prompts.cpython-310.pyc and b/__pycache__/prompts.cpython-310.pyc differ
|
|
__pycache__/tools.cpython-310.pyc
CHANGED
Binary files a/__pycache__/tools.cpython-310.pyc and b/__pycache__/tools.cpython-310.pyc differ
|
|
agents.py
CHANGED
@@ -42,13 +42,23 @@ def create_image_analysis_agent(message):
|
|
42 |
name="image_analysis_agent",
|
43 |
description=prompts.get_image_analysis_prompt(message),
|
44 |
model=InferenceClientModel(IMAGE_ANALYSIS_MODEL),
|
45 |
-
tools=[image_analysis_tool],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
max_steps=2,
|
47 |
)
|
48 |
|
49 |
def create_manager_agent(message):
|
50 |
simple_web_search_agent = create_simple_web_search_agent(message)
|
51 |
image_analysis_agent = create_image_analysis_agent(message)
|
|
|
52 |
|
53 |
return CodeAgent(
|
54 |
name="manager_agent",
|
@@ -60,6 +70,7 @@ def create_manager_agent(message):
|
|
60 |
managed_agents=[
|
61 |
simple_web_search_agent,
|
62 |
image_analysis_agent,
|
|
|
63 |
],
|
64 |
max_steps=10,
|
65 |
additional_authorized_imports=[
|
@@ -95,6 +106,6 @@ def create_final_answer_agent(message):
|
|
95 |
name="final_answer_agent",
|
96 |
description="Given a question and an initial answer, return the final refined answer following strict formatting rules.",
|
97 |
model=InferenceClientModel(FINAL_ANSWER_MODEL),
|
98 |
-
max_steps=
|
99 |
tools=[],
|
100 |
)
|
|
|
42 |
name="image_analysis_agent",
|
43 |
description=prompts.get_image_analysis_prompt(message),
|
44 |
model=InferenceClientModel(IMAGE_ANALYSIS_MODEL),
|
45 |
+
tools=[tools.image_analysis_tool],
|
46 |
+
max_steps=2,
|
47 |
+
)
|
48 |
+
|
49 |
+
def create_audio_analysis_agent(message):
|
50 |
+
return CodeAgent(
|
51 |
+
name="audio_analysis_agent",
|
52 |
+
description=prompts.get_audio_analysis_prompt(message),
|
53 |
+
model=InferenceClientModel(AUDIO_ANALYSIS_MODEL),
|
54 |
+
tools=[tools.audio_analysis_tool],
|
55 |
max_steps=2,
|
56 |
)
|
57 |
|
58 |
def create_manager_agent(message):
|
59 |
simple_web_search_agent = create_simple_web_search_agent(message)
|
60 |
image_analysis_agent = create_image_analysis_agent(message)
|
61 |
+
audio_analysis_agent = create_audio_analysis_agent(message)
|
62 |
|
63 |
return CodeAgent(
|
64 |
name="manager_agent",
|
|
|
70 |
managed_agents=[
|
71 |
simple_web_search_agent,
|
72 |
image_analysis_agent,
|
73 |
+
audio_analysis_agent,
|
74 |
],
|
75 |
max_steps=10,
|
76 |
additional_authorized_imports=[
|
|
|
106 |
name="final_answer_agent",
|
107 |
description="Given a question and an initial answer, return the final refined answer following strict formatting rules.",
|
108 |
model=InferenceClientModel(FINAL_ANSWER_MODEL),
|
109 |
+
max_steps=3,
|
110 |
tools=[],
|
111 |
)
|
app.py
CHANGED
@@ -161,7 +161,9 @@ with gr.Blocks() as demo:
|
|
161 |
"""
|
162 |
**Instructions:**
|
163 |
|
164 |
-
|
|
|
|
|
165 |
|
166 |
"""
|
167 |
)
|
|
|
161 |
"""
|
162 |
**Instructions:**
|
163 |
|
164 |
+
1. Who is in the final of champions league in 2025?
|
165 |
+
2. What is the colour of the suit in this image: https://external-content.duckduckgo.com/iu/?u=https%3A%2F%2Fimages.hdqwalls.com%2Fwallpapers%2Fblack-superman-henry-cavill-xa.jpg&f=1&nofb=1&ipt=451cdc8bb05635ac59e50dc567cb68ae38ad45a626622ee7760b2c3ef828d5a7?
|
166 |
+
3. Which of the fruits shown in the 2008 painting “Embroidery from Uzbekistan” were served as part of the October 1949 breakfast menu for the ocean liner that was later used as a floating prop for the film “The Last Voyage”? Give the items as a comma-separated list, ordering them in clockwise order based on their arrangement in the painting starting from the 12 o’clock position. Use the plural form of each fruit.
|
167 |
|
168 |
"""
|
169 |
)
|
multi_agent.py
CHANGED
@@ -11,7 +11,7 @@ import agents
|
|
11 |
|
12 |
def orchestrate(message, file_path):
|
13 |
final_prompt = prompts.get_manager_prompt(message, file_path)
|
14 |
-
initial_answer = agents.
|
15 |
|
16 |
final_answer = agents.create_final_answer_agent(message).run(prompts.get_final_answer_prompt(message, initial_answer))
|
17 |
|
|
|
11 |
|
12 |
def orchestrate(message, file_path):
|
13 |
final_prompt = prompts.get_manager_prompt(message, file_path)
|
14 |
+
initial_answer = agents.create_manager_agent(message).run(message)
|
15 |
|
16 |
final_answer = agents.create_final_answer_agent(message).run(prompts.get_final_answer_prompt(message, initial_answer))
|
17 |
|
prompts.py
CHANGED
@@ -14,6 +14,13 @@ def get_image_analysis_prompt(message, file_path=None):
|
|
14 |
|
15 |
return prompt
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
def get_manager_prompt(message, file_path=None):
|
18 |
prompt = f"""Your job is to answer the following question.
|
19 |
Answer the following question. If needed, delegate to one of your coworkers:\n
|
@@ -26,6 +33,8 @@ def get_manager_prompt(message, file_path=None):
|
|
26 |
|
27 |
In case you cannot answer the question and there is not a good coworker, delegate to the Code Generation Agent.\n.
|
28 |
|
|
|
|
|
29 |
Question: {message}
|
30 |
"""
|
31 |
|
@@ -55,6 +64,8 @@ def get_final_answer_prompt(message: str, initial_answer: str):
|
|
55 |
**Example 5:** What is the opposite of bad, worse, worst? good, better, best
|
56 |
|
57 |
**Final answer:**
|
|
|
|
|
58 |
"""
|
59 |
|
60 |
return prompt
|
|
|
14 |
|
15 |
return prompt
|
16 |
|
17 |
+
def get_audio_analysis_prompt(message, file_path=None):
|
18 |
+
prompt = f"""
|
19 |
+
As an expert audio analysis assistant, you analyze the audio to answer the question. Given a question and audio file, analyze the audio and answer the question: {message}
|
20 |
+
"""
|
21 |
+
|
22 |
+
return prompt
|
23 |
+
|
24 |
def get_manager_prompt(message, file_path=None):
|
25 |
prompt = f"""Your job is to answer the following question.
|
26 |
Answer the following question. If needed, delegate to one of your coworkers:\n
|
|
|
33 |
|
34 |
In case you cannot answer the question and there is not a good coworker, delegate to the Code Generation Agent.\n.
|
35 |
|
36 |
+
The final answer must always be a string and no other formats are acceptable.
|
37 |
+
|
38 |
Question: {message}
|
39 |
"""
|
40 |
|
|
|
64 |
**Example 5:** What is the opposite of bad, worse, worst? good, better, best
|
65 |
|
66 |
**Final answer:**
|
67 |
+
|
68 |
+
The final answer must always be a string and no other formats are acceptable.
|
69 |
"""
|
70 |
|
71 |
return prompt
|
tools.py
CHANGED
@@ -66,4 +66,36 @@ def image_analysis_tool(question: str, file_path: str) -> str:
|
|
66 |
# You can return this dictionary directly if your model expects JSON format
|
67 |
return prompt # Actual agent model will process this
|
68 |
except Exception as e:
|
69 |
-
raise RuntimeError(f"Image analysis failed: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
# You can return this dictionary directly if your model expects JSON format
|
67 |
return prompt # Actual agent model will process this
|
68 |
except Exception as e:
|
69 |
+
raise RuntimeError(f"Image analysis failed: {str(e)}")
|
70 |
+
|
71 |
+
@tool
|
72 |
+
def audio_analysis_tool(question: str, file_path: str) -> str:
|
73 |
+
"""
|
74 |
+
Given a question and an audio file path, analyze the audio to answer the question.
|
75 |
+
|
76 |
+
Args:
|
77 |
+
question (str): A question about the audio.
|
78 |
+
file_path (str): Path to the audio file.
|
79 |
+
|
80 |
+
Returns:
|
81 |
+
str: Structured prompt with audio and question (for agent model to process).
|
82 |
+
|
83 |
+
Raises:
|
84 |
+
RuntimeError: If processing fails.
|
85 |
+
"""
|
86 |
+
try:
|
87 |
+
# Read and encode audio to base64
|
88 |
+
with open(file_path, "rb") as audio_file:
|
89 |
+
audio_data = base64.b64encode(audio_file.read()).decode("utf-8")
|
90 |
+
|
91 |
+
# Format the content in a vision+text style prompt, adapted for audio
|
92 |
+
prompt = {
|
93 |
+
"inputs": {
|
94 |
+
"audio": audio_data,
|
95 |
+
"question": question
|
96 |
+
}
|
97 |
+
}
|
98 |
+
|
99 |
+
return prompt # The agent model will process this
|
100 |
+
except Exception as e:
|
101 |
+
raise RuntimeError(f"Audio analysis failed: {str(e)}")
|