Spaces:
Sleeping
Sleeping
Commit
·
23c0e5d
1
Parent(s):
80fa97f
implement history mechanic
Browse files- src/agent.py +36 -6
- src/ui.py +62 -31
src/agent.py
CHANGED
@@ -17,6 +17,7 @@ system_prompt = """You are an expert Audio Processing Assistant with specialized
|
|
17 |
- If a user asks about topics outside the audio domain, politely decline and redirect them back to audio-related assistance
|
18 |
- Be conversational, friendly, and helpful when discussing audio topics
|
19 |
- Share your expertise about audio concepts, techniques, and best practices when relevant
|
|
|
20 |
|
21 |
### Audio Processing Workflow:
|
22 |
When a user requests audio processing and provides input files, follow this structured approach:
|
@@ -59,6 +60,16 @@ When a user requests audio processing and provides input files, follow this stru
|
|
59 |
|
60 |
Remember: Stay focused on audio-related assistance and use your specialized tools to help users achieve their audio processing goals efficiently and effectively."""
|
61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
class AudioAgent:
|
63 |
def __init__(
|
64 |
self,
|
@@ -87,17 +98,36 @@ class AudioAgent:
|
|
87 |
|
88 |
return agent
|
89 |
|
90 |
-
async def run_agent(self, user_input: str, input_audio_files: list[str]):
|
91 |
if self.agent is None:
|
92 |
self.agent = await self.build_agent()
|
93 |
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
|
99 |
res = await self.agent.ainvoke(
|
100 |
-
{"messages":
|
101 |
)
|
102 |
|
103 |
return res["structured_response"]
|
|
|
17 |
- If a user asks about topics outside the audio domain, politely decline and redirect them back to audio-related assistance
|
18 |
- Be conversational, friendly, and helpful when discussing audio topics
|
19 |
- Share your expertise about audio concepts, techniques, and best practices when relevant
|
20 |
+
- If user doesn't provide input files, look for old messages to find input files. If many messages, look for the most recent one or ask the user to choose one of them.
|
21 |
|
22 |
### Audio Processing Workflow:
|
23 |
When a user requests audio processing and provides input files, follow this structured approach:
|
|
|
60 |
|
61 |
Remember: Stay focused on audio-related assistance and use your specialized tools to help users achieve their audio processing goals efficiently and effectively."""
|
62 |
|
63 |
+
user_prompt = """
|
64 |
+
User Request: {user_input}
|
65 |
+
Input Audio Files: {input_audio_files}
|
66 |
+
"""
|
67 |
+
|
68 |
+
assistant_prompt = """
|
69 |
+
Assistant Response: {final_response}
|
70 |
+
Output Audio Files: {output_audio_files}
|
71 |
+
"""
|
72 |
+
|
73 |
class AudioAgent:
|
74 |
def __init__(
|
75 |
self,
|
|
|
98 |
|
99 |
return agent
|
100 |
|
101 |
+
async def run_agent(self, user_input: str, input_audio_files: list[str], history: list = None):
|
102 |
if self.agent is None:
|
103 |
self.agent = await self.build_agent()
|
104 |
|
105 |
+
messages = []
|
106 |
+
if history:
|
107 |
+
for msg in history:
|
108 |
+
if msg["role"] == "user":
|
109 |
+
input_files = msg.get("input_files", [])
|
110 |
+
content = user_prompt.format(
|
111 |
+
user_input=msg["content"],
|
112 |
+
input_audio_files="\n".join(input_files)
|
113 |
+
)
|
114 |
+
messages.append({"role": "user", "content": content})
|
115 |
+
elif msg["role"] == "assistant":
|
116 |
+
output_files = msg.get("output_files", [])
|
117 |
+
content = assistant_prompt.format(
|
118 |
+
final_response=msg["content"],
|
119 |
+
output_audio_files="\n".join(output_files)
|
120 |
+
)
|
121 |
+
messages.append({"role": "assistant", "content": content})
|
122 |
+
|
123 |
+
current_input = user_prompt.format(
|
124 |
+
user_input=user_input,
|
125 |
+
input_audio_files="\n".join(input_audio_files)
|
126 |
+
)
|
127 |
+
messages.append({"role": "user", "content": current_input})
|
128 |
|
129 |
res = await self.agent.ainvoke(
|
130 |
+
{"messages": messages},
|
131 |
)
|
132 |
|
133 |
return res["structured_response"]
|
src/ui.py
CHANGED
@@ -14,12 +14,12 @@ def get_share_url(path):
|
|
14 |
return path
|
15 |
return f"{demo.share_url}/gradio_api/file={path}"
|
16 |
|
17 |
-
def user_input(user_message, audio_files, history):
|
18 |
"""
|
19 |
Handle user input with text and audio files
|
20 |
"""
|
21 |
if not user_message.strip() and not audio_files:
|
22 |
-
return "",
|
23 |
|
24 |
# Process audio files into URLs
|
25 |
audio_file_urls = []
|
@@ -33,49 +33,79 @@ def user_input(user_message, audio_files, history):
|
|
33 |
|
34 |
audio_file_urls.append(get_share_url(file_path))
|
35 |
|
36 |
-
# Add user message to history
|
37 |
-
history.append({
|
38 |
-
|
|
|
|
|
39 |
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
"""
|
42 |
Generate bot response using the agent
|
43 |
"""
|
44 |
if not history or history[-1]["role"] != "user":
|
45 |
return history, []
|
46 |
|
47 |
-
# Get the user message
|
48 |
-
user_message =
|
49 |
-
|
|
|
50 |
# If message is empty but we have audio files, provide default message
|
51 |
if not user_message.strip() and audio_file_urls:
|
52 |
user_message = "Please process these audio files"
|
53 |
|
54 |
try:
|
55 |
-
# Use the agent's run_agent method
|
56 |
-
result = await agent.run_agent(user_message,
|
57 |
|
58 |
# Extract the final response and audio files from the result
|
59 |
final_response = result.final_response
|
60 |
output_audio_files = result.output_audio_files
|
61 |
|
62 |
-
# Add assistant response to history
|
63 |
-
history.append({
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
return history, output_audio_files
|
66 |
|
67 |
except Exception as e:
|
68 |
-
history.append({
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
return history, []
|
70 |
|
71 |
-
def bot_response_sync(history, audio_file_urls):
|
72 |
"""
|
73 |
Synchronous wrapper for the async bot response
|
74 |
"""
|
75 |
loop = asyncio.new_event_loop()
|
76 |
asyncio.set_event_loop(loop)
|
77 |
try:
|
78 |
-
return loop.run_until_complete(bot_response(history, audio_file_urls))
|
79 |
finally:
|
80 |
loop.close()
|
81 |
|
@@ -89,8 +119,9 @@ def create_interface():
|
|
89 |
**Supported formats**: MP3, WAV, M4A, FLAC, AAC, OGG
|
90 |
""")
|
91 |
|
92 |
-
# Hidden state to store audio file URLs
|
93 |
audio_urls_state = gr.State([])
|
|
|
94 |
|
95 |
with gr.Row():
|
96 |
with gr.Column(scale=2):
|
@@ -126,34 +157,34 @@ def create_interface():
|
|
126 |
send_btn = gr.Button("Ask", variant="primary", scale=1, size="lg")
|
127 |
|
128 |
# Handle user input and bot response
|
129 |
-
def handle_submit(message, files, history):
|
130 |
-
new_msg, new_files, updated_history, audio_urls = user_input(message, files, history)
|
131 |
-
return new_msg, new_files, updated_history, audio_urls
|
132 |
|
133 |
-
def handle_bot_response(history, audio_urls):
|
134 |
-
updated_history, output_files = bot_response_sync(history, audio_urls)
|
135 |
-
return updated_history, output_files
|
136 |
|
137 |
msg.submit(
|
138 |
handle_submit,
|
139 |
-
[msg, audio_files, chatbot],
|
140 |
-
[msg, audio_files, chatbot, audio_urls_state],
|
141 |
queue=False
|
142 |
).then(
|
143 |
handle_bot_response,
|
144 |
-
[chatbot, audio_urls_state],
|
145 |
-
[chatbot, output_audio_files]
|
146 |
)
|
147 |
|
148 |
send_btn.click(
|
149 |
handle_submit,
|
150 |
-
[msg, audio_files, chatbot],
|
151 |
-
[msg, audio_files, chatbot, audio_urls_state],
|
152 |
queue=False
|
153 |
).then(
|
154 |
handle_bot_response,
|
155 |
-
[chatbot, audio_urls_state],
|
156 |
-
[chatbot, output_audio_files]
|
157 |
)
|
158 |
|
159 |
return interface
|
|
|
14 |
return path
|
15 |
return f"{demo.share_url}/gradio_api/file={path}"
|
16 |
|
17 |
+
def user_input(user_message, audio_files, history, custom_history):
|
18 |
"""
|
19 |
Handle user input with text and audio files
|
20 |
"""
|
21 |
if not user_message.strip() and not audio_files:
|
22 |
+
return "", audio_files, history, custom_history
|
23 |
|
24 |
# Process audio files into URLs
|
25 |
audio_file_urls = []
|
|
|
33 |
|
34 |
audio_file_urls.append(get_share_url(file_path))
|
35 |
|
36 |
+
# Add user message to history with input files
|
37 |
+
history.append({
|
38 |
+
"role": "user",
|
39 |
+
"content": user_message,
|
40 |
+
})
|
41 |
|
42 |
+
# Update custom history
|
43 |
+
custom_history.append({
|
44 |
+
"role": "user",
|
45 |
+
"content": user_message,
|
46 |
+
"input_files": audio_file_urls
|
47 |
+
})
|
48 |
+
|
49 |
+
return "", audio_files, history, audio_file_urls, custom_history
|
50 |
+
|
51 |
+
async def bot_response(history, audio_file_urls, custom_history):
|
52 |
"""
|
53 |
Generate bot response using the agent
|
54 |
"""
|
55 |
if not history or history[-1]["role"] != "user":
|
56 |
return history, []
|
57 |
|
58 |
+
# Get the user message and input files
|
59 |
+
user_message = custom_history[-1]["content"]
|
60 |
+
input_files = custom_history[-1].get("input_files", [])
|
61 |
+
|
62 |
# If message is empty but we have audio files, provide default message
|
63 |
if not user_message.strip() and audio_file_urls:
|
64 |
user_message = "Please process these audio files"
|
65 |
|
66 |
try:
|
67 |
+
# Use the agent's run_agent method with history
|
68 |
+
result = await agent.run_agent(user_message, input_files, custom_history)
|
69 |
|
70 |
# Extract the final response and audio files from the result
|
71 |
final_response = result.final_response
|
72 |
output_audio_files = result.output_audio_files
|
73 |
|
74 |
+
# Add assistant response to history with output files
|
75 |
+
history.append({
|
76 |
+
"role": "assistant",
|
77 |
+
"content": final_response,
|
78 |
+
})
|
79 |
+
|
80 |
+
# Update custom history
|
81 |
+
custom_history.append({
|
82 |
+
"role": "assistant",
|
83 |
+
"content": final_response,
|
84 |
+
"output_files": output_audio_files
|
85 |
+
})
|
86 |
|
87 |
return history, output_audio_files
|
88 |
|
89 |
except Exception as e:
|
90 |
+
history.append({
|
91 |
+
"role": "assistant",
|
92 |
+
"content": f"❌ **Error**: {e}",
|
93 |
+
})
|
94 |
+
custom_history.append({
|
95 |
+
"role": "assistant",
|
96 |
+
"content": f"❌ **Error**: {e}",
|
97 |
+
"output_files": []
|
98 |
+
})
|
99 |
return history, []
|
100 |
|
101 |
+
def bot_response_sync(history, audio_file_urls, custom_history):
|
102 |
"""
|
103 |
Synchronous wrapper for the async bot response
|
104 |
"""
|
105 |
loop = asyncio.new_event_loop()
|
106 |
asyncio.set_event_loop(loop)
|
107 |
try:
|
108 |
+
return loop.run_until_complete(bot_response(history, audio_file_urls, custom_history))
|
109 |
finally:
|
110 |
loop.close()
|
111 |
|
|
|
119 |
**Supported formats**: MP3, WAV, M4A, FLAC, AAC, OGG
|
120 |
""")
|
121 |
|
122 |
+
# Hidden state to store audio file URLs and custom history
|
123 |
audio_urls_state = gr.State([])
|
124 |
+
custom_history_state = gr.State([])
|
125 |
|
126 |
with gr.Row():
|
127 |
with gr.Column(scale=2):
|
|
|
157 |
send_btn = gr.Button("Ask", variant="primary", scale=1, size="lg")
|
158 |
|
159 |
# Handle user input and bot response
|
160 |
+
def handle_submit(message, files, history, custom_history):
|
161 |
+
new_msg, new_files, updated_history, audio_urls, updated_custom_history = user_input(message, files, history, custom_history)
|
162 |
+
return new_msg, new_files, updated_history, audio_urls, updated_custom_history
|
163 |
|
164 |
+
def handle_bot_response(history, audio_urls, custom_history):
|
165 |
+
updated_history, output_files = bot_response_sync(history, audio_urls, custom_history)
|
166 |
+
return updated_history, output_files, custom_history
|
167 |
|
168 |
msg.submit(
|
169 |
handle_submit,
|
170 |
+
[msg, audio_files, chatbot, custom_history_state],
|
171 |
+
[msg, audio_files, chatbot, audio_urls_state, custom_history_state],
|
172 |
queue=False
|
173 |
).then(
|
174 |
handle_bot_response,
|
175 |
+
[chatbot, audio_urls_state, custom_history_state],
|
176 |
+
[chatbot, output_audio_files, custom_history_state]
|
177 |
)
|
178 |
|
179 |
send_btn.click(
|
180 |
handle_submit,
|
181 |
+
[msg, audio_files, chatbot, custom_history_state],
|
182 |
+
[msg, audio_files, chatbot, audio_urls_state, custom_history_state],
|
183 |
queue=False
|
184 |
).then(
|
185 |
handle_bot_response,
|
186 |
+
[chatbot, audio_urls_state, custom_history_state],
|
187 |
+
[chatbot, output_audio_files, custom_history_state]
|
188 |
)
|
189 |
|
190 |
return interface
|