Spaces:

Agents-MCP-Hackathon
/

Audio-Agent

Sleeping

App Files Files Community

YigitSekerci commited on Jun 9

Commit

23c0e5d

1 Parent(s): 80fa97f

implement history mechanic

Browse files

Files changed (2) hide show

src/agent.py +36 -6
src/ui.py +62 -31

src/agent.py CHANGED Viewed

@@ -17,6 +17,7 @@ system_prompt = """You are an expert Audio Processing Assistant with specialized
 - If a user asks about topics outside the audio domain, politely decline and redirect them back to audio-related assistance
 - Be conversational, friendly, and helpful when discussing audio topics
 - Share your expertise about audio concepts, techniques, and best practices when relevant
 ### Audio Processing Workflow:
 When a user requests audio processing and provides input files, follow this structured approach:
@@ -59,6 +60,16 @@ When a user requests audio processing and provides input files, follow this stru
 Remember: Stay focused on audio-related assistance and use your specialized tools to help users achieve their audio processing goals efficiently and effectively."""
 class AudioAgent:
     def __init__(
         self,
@@ -87,17 +98,36 @@ class AudioAgent:
         return agent
-    async def run_agent(self, user_input: str, input_audio_files: list[str]):
         if self.agent is None:
             self.agent = await self.build_agent()
-        input_context = f"""
-        User Request: {user_input}
-        Input Audio Files: {', '.join(input_audio_files) if input_audio_files else 'None'}
-        """
         res = await self.agent.ainvoke(
-            {"messages": [{"role": "user", "content": input_context}]},
         )
         return res["structured_response"]

 - If a user asks about topics outside the audio domain, politely decline and redirect them back to audio-related assistance
 - Be conversational, friendly, and helpful when discussing audio topics
 - Share your expertise about audio concepts, techniques, and best practices when relevant
+- If user doesn't provide input files, look for old messages to find input files. If many messages, look for the most recent one or ask the user to choose one of them.
 ### Audio Processing Workflow:
 When a user requests audio processing and provides input files, follow this structured approach:
 Remember: Stay focused on audio-related assistance and use your specialized tools to help users achieve their audio processing goals efficiently and effectively."""
+user_prompt = """
+User Request: {user_input}
+Input Audio Files: {input_audio_files}
+"""
+assistant_prompt = """
+Assistant Response: {final_response}
+Output Audio Files: {output_audio_files}
+"""
 class AudioAgent:
     def __init__(
         self,
         return agent
+    async def run_agent(self, user_input: str, input_audio_files: list[str], history: list = None):
         if self.agent is None:
             self.agent = await self.build_agent()
+        messages = []
+        if history:
+            for msg in history:
+                if msg["role"] == "user":
+                    input_files = msg.get("input_files", [])
+                    content = user_prompt.format(
+                        user_input=msg["content"],
+                        input_audio_files="\n".join(input_files)
+                    )
+                    messages.append({"role": "user", "content": content})
+                elif msg["role"] == "assistant":
+                    output_files = msg.get("output_files", [])
+                    content = assistant_prompt.format(
+                        final_response=msg["content"],
+                        output_audio_files="\n".join(output_files)
+                    )
+                    messages.append({"role": "assistant", "content": content})
+        current_input = user_prompt.format(
+            user_input=user_input,
+            input_audio_files="\n".join(input_audio_files)
+        )
+        messages.append({"role": "user", "content": current_input})
         res = await self.agent.ainvoke(
+            {"messages": messages},
         )
         return res["structured_response"]

src/ui.py CHANGED Viewed

@@ -14,12 +14,12 @@ def get_share_url(path):
         return path
     return f"{demo.share_url}/gradio_api/file={path}"
-def user_input(user_message, audio_files, history):
     """
     Handle user input with text and audio files
     """
     if not user_message.strip() and not audio_files:
-        return "", [], history
     # Process audio files into URLs
     audio_file_urls = []
@@ -33,49 +33,79 @@ def user_input(user_message, audio_files, history):
             audio_file_urls.append(get_share_url(file_path))
-    # Add user message to history (no uploaded file display)
-    history.append({"role": "user", "content": user_message})
-    return "", [], history, audio_file_urls
-async def bot_response(history, audio_file_urls):
     """
     Generate bot response using the agent
     """
     if not history or history[-1]["role"] != "user":
         return history, []
-    # Get the user message
-    user_message = history[-1]["content"]
     # If message is empty but we have audio files, provide default message
     if not user_message.strip() and audio_file_urls:
         user_message = "Please process these audio files"
     try:
-        # Use the agent's run_agent method
-        result = await agent.run_agent(user_message, audio_file_urls or [])
         # Extract the final response and audio files from the result
         final_response = result.final_response
         output_audio_files = result.output_audio_files
-        # Add assistant response to history (only final_response)
-        history.append({"role": "assistant", "content": final_response})
         return history, output_audio_files
     except Exception as e:
-        history.append({"role": "assistant", "content": f"❌ **Error**: {e}"})
         return history, []
-def bot_response_sync(history, audio_file_urls):
     """
     Synchronous wrapper for the async bot response
     """
     loop = asyncio.new_event_loop()
     asyncio.set_event_loop(loop)
     try:
-        return loop.run_until_complete(bot_response(history, audio_file_urls))
     finally:
         loop.close()
@@ -89,8 +119,9 @@ def create_interface():
         **Supported formats**: MP3, WAV, M4A, FLAC, AAC, OGG
         """)
-        # Hidden state to store audio file URLs
         audio_urls_state = gr.State([])
         with gr.Row():
             with gr.Column(scale=2):
@@ -126,34 +157,34 @@ def create_interface():
             send_btn = gr.Button("Ask", variant="primary", scale=1, size="lg")
         # Handle user input and bot response
-        def handle_submit(message, files, history):
-            new_msg, new_files, updated_history, audio_urls = user_input(message, files, history)
-            return new_msg, new_files, updated_history, audio_urls
-        def handle_bot_response(history, audio_urls):
-            updated_history, output_files = bot_response_sync(history, audio_urls)
-            return updated_history, output_files
         msg.submit(
             handle_submit,
-            [msg, audio_files, chatbot],
-            [msg, audio_files, chatbot, audio_urls_state],
             queue=False
         ).then(
             handle_bot_response,
-            [chatbot, audio_urls_state],
-            [chatbot, output_audio_files]
         )
         send_btn.click(
             handle_submit,
-            [msg, audio_files, chatbot],
-            [msg, audio_files, chatbot, audio_urls_state],
             queue=False
         ).then(
             handle_bot_response,
-            [chatbot, audio_urls_state],
-            [chatbot, output_audio_files]
         )
     return interface

         return path
     return f"{demo.share_url}/gradio_api/file={path}"
+def user_input(user_message, audio_files, history, custom_history):
     """
     Handle user input with text and audio files
     """
     if not user_message.strip() and not audio_files:
+        return "", audio_files, history, custom_history
     # Process audio files into URLs
     audio_file_urls = []
             audio_file_urls.append(get_share_url(file_path))
+    # Add user message to history with input files
+    history.append({
+        "role": "user",
+        "content": user_message,
+    })
+    # Update custom history
+    custom_history.append({
+        "role": "user",
+        "content": user_message,
+        "input_files": audio_file_urls
+    })
+    return "", audio_files, history, audio_file_urls, custom_history
+async def bot_response(history, audio_file_urls, custom_history):
     """
     Generate bot response using the agent
     """
     if not history or history[-1]["role"] != "user":
         return history, []
+    # Get the user message and input files
+    user_message = custom_history[-1]["content"]
+    input_files = custom_history[-1].get("input_files", [])
     # If message is empty but we have audio files, provide default message
     if not user_message.strip() and audio_file_urls:
         user_message = "Please process these audio files"
     try:
+        # Use the agent's run_agent method with history
+        result = await agent.run_agent(user_message, input_files, custom_history)
         # Extract the final response and audio files from the result
         final_response = result.final_response
         output_audio_files = result.output_audio_files
+        # Add assistant response to history with output files
+        history.append({
+            "role": "assistant",
+            "content": final_response,
+        })
+        # Update custom history
+        custom_history.append({
+            "role": "assistant",
+            "content": final_response,
+            "output_files": output_audio_files
+        })
         return history, output_audio_files
     except Exception as e:
+        history.append({
+            "role": "assistant",
+            "content": f"❌ **Error**: {e}",
+        })
+        custom_history.append({
+            "role": "assistant",
+            "content": f"❌ **Error**: {e}",
+            "output_files": []
+        })
         return history, []
+def bot_response_sync(history, audio_file_urls, custom_history):
     """
     Synchronous wrapper for the async bot response
     """
     loop = asyncio.new_event_loop()
     asyncio.set_event_loop(loop)
     try:
+        return loop.run_until_complete(bot_response(history, audio_file_urls, custom_history))
     finally:
         loop.close()
         **Supported formats**: MP3, WAV, M4A, FLAC, AAC, OGG
         """)
+        # Hidden state to store audio file URLs and custom history
         audio_urls_state = gr.State([])
+        custom_history_state = gr.State([])
         with gr.Row():
             with gr.Column(scale=2):
             send_btn = gr.Button("Ask", variant="primary", scale=1, size="lg")
         # Handle user input and bot response
+        def handle_submit(message, files, history, custom_history):
+            new_msg, new_files, updated_history, audio_urls, updated_custom_history = user_input(message, files, history, custom_history)
+            return new_msg, new_files, updated_history, audio_urls, updated_custom_history
+        def handle_bot_response(history, audio_urls, custom_history):
+            updated_history, output_files = bot_response_sync(history, audio_urls, custom_history)
+            return updated_history, output_files, custom_history
         msg.submit(
             handle_submit,
+            [msg, audio_files, chatbot, custom_history_state],
+            [msg, audio_files, chatbot, audio_urls_state, custom_history_state],
             queue=False
         ).then(
             handle_bot_response,
+            [chatbot, audio_urls_state, custom_history_state],
+            [chatbot, output_audio_files, custom_history_state]
         )
         send_btn.click(
             handle_submit,
+            [msg, audio_files, chatbot, custom_history_state],
+            [msg, audio_files, chatbot, audio_urls_state, custom_history_state],
             queue=False
         ).then(
             handle_bot_response,
+            [chatbot, audio_urls_state, custom_history_state],
+            [chatbot, output_audio_files, custom_history_state]
         )
     return interface