YigitSekerci commited on
Commit
23c0e5d
·
1 Parent(s): 80fa97f

implement history mechanic

Browse files
Files changed (2) hide show
  1. src/agent.py +36 -6
  2. src/ui.py +62 -31
src/agent.py CHANGED
@@ -17,6 +17,7 @@ system_prompt = """You are an expert Audio Processing Assistant with specialized
17
  - If a user asks about topics outside the audio domain, politely decline and redirect them back to audio-related assistance
18
  - Be conversational, friendly, and helpful when discussing audio topics
19
  - Share your expertise about audio concepts, techniques, and best practices when relevant
 
20
 
21
  ### Audio Processing Workflow:
22
  When a user requests audio processing and provides input files, follow this structured approach:
@@ -59,6 +60,16 @@ When a user requests audio processing and provides input files, follow this stru
59
 
60
  Remember: Stay focused on audio-related assistance and use your specialized tools to help users achieve their audio processing goals efficiently and effectively."""
61
 
 
 
 
 
 
 
 
 
 
 
62
  class AudioAgent:
63
  def __init__(
64
  self,
@@ -87,17 +98,36 @@ class AudioAgent:
87
 
88
  return agent
89
 
90
- async def run_agent(self, user_input: str, input_audio_files: list[str]):
91
  if self.agent is None:
92
  self.agent = await self.build_agent()
93
 
94
- input_context = f"""
95
- User Request: {user_input}
96
- Input Audio Files: {', '.join(input_audio_files) if input_audio_files else 'None'}
97
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
  res = await self.agent.ainvoke(
100
- {"messages": [{"role": "user", "content": input_context}]},
101
  )
102
 
103
  return res["structured_response"]
 
17
  - If a user asks about topics outside the audio domain, politely decline and redirect them back to audio-related assistance
18
  - Be conversational, friendly, and helpful when discussing audio topics
19
  - Share your expertise about audio concepts, techniques, and best practices when relevant
20
+ - If user doesn't provide input files, look for old messages to find input files. If many messages, look for the most recent one or ask the user to choose one of them.
21
 
22
  ### Audio Processing Workflow:
23
  When a user requests audio processing and provides input files, follow this structured approach:
 
60
 
61
  Remember: Stay focused on audio-related assistance and use your specialized tools to help users achieve their audio processing goals efficiently and effectively."""
62
 
63
+ user_prompt = """
64
+ User Request: {user_input}
65
+ Input Audio Files: {input_audio_files}
66
+ """
67
+
68
+ assistant_prompt = """
69
+ Assistant Response: {final_response}
70
+ Output Audio Files: {output_audio_files}
71
+ """
72
+
73
  class AudioAgent:
74
  def __init__(
75
  self,
 
98
 
99
  return agent
100
 
101
+ async def run_agent(self, user_input: str, input_audio_files: list[str], history: list = None):
102
  if self.agent is None:
103
  self.agent = await self.build_agent()
104
 
105
+ messages = []
106
+ if history:
107
+ for msg in history:
108
+ if msg["role"] == "user":
109
+ input_files = msg.get("input_files", [])
110
+ content = user_prompt.format(
111
+ user_input=msg["content"],
112
+ input_audio_files="\n".join(input_files)
113
+ )
114
+ messages.append({"role": "user", "content": content})
115
+ elif msg["role"] == "assistant":
116
+ output_files = msg.get("output_files", [])
117
+ content = assistant_prompt.format(
118
+ final_response=msg["content"],
119
+ output_audio_files="\n".join(output_files)
120
+ )
121
+ messages.append({"role": "assistant", "content": content})
122
+
123
+ current_input = user_prompt.format(
124
+ user_input=user_input,
125
+ input_audio_files="\n".join(input_audio_files)
126
+ )
127
+ messages.append({"role": "user", "content": current_input})
128
 
129
  res = await self.agent.ainvoke(
130
+ {"messages": messages},
131
  )
132
 
133
  return res["structured_response"]
src/ui.py CHANGED
@@ -14,12 +14,12 @@ def get_share_url(path):
14
  return path
15
  return f"{demo.share_url}/gradio_api/file={path}"
16
 
17
- def user_input(user_message, audio_files, history):
18
  """
19
  Handle user input with text and audio files
20
  """
21
  if not user_message.strip() and not audio_files:
22
- return "", [], history
23
 
24
  # Process audio files into URLs
25
  audio_file_urls = []
@@ -33,49 +33,79 @@ def user_input(user_message, audio_files, history):
33
 
34
  audio_file_urls.append(get_share_url(file_path))
35
 
36
- # Add user message to history (no uploaded file display)
37
- history.append({"role": "user", "content": user_message})
38
- return "", [], history, audio_file_urls
 
 
39
 
40
- async def bot_response(history, audio_file_urls):
 
 
 
 
 
 
 
 
 
41
  """
42
  Generate bot response using the agent
43
  """
44
  if not history or history[-1]["role"] != "user":
45
  return history, []
46
 
47
- # Get the user message
48
- user_message = history[-1]["content"]
49
-
 
50
  # If message is empty but we have audio files, provide default message
51
  if not user_message.strip() and audio_file_urls:
52
  user_message = "Please process these audio files"
53
 
54
  try:
55
- # Use the agent's run_agent method
56
- result = await agent.run_agent(user_message, audio_file_urls or [])
57
 
58
  # Extract the final response and audio files from the result
59
  final_response = result.final_response
60
  output_audio_files = result.output_audio_files
61
 
62
- # Add assistant response to history (only final_response)
63
- history.append({"role": "assistant", "content": final_response})
 
 
 
 
 
 
 
 
 
 
64
 
65
  return history, output_audio_files
66
 
67
  except Exception as e:
68
- history.append({"role": "assistant", "content": f"❌ **Error**: {e}"})
 
 
 
 
 
 
 
 
69
  return history, []
70
 
71
- def bot_response_sync(history, audio_file_urls):
72
  """
73
  Synchronous wrapper for the async bot response
74
  """
75
  loop = asyncio.new_event_loop()
76
  asyncio.set_event_loop(loop)
77
  try:
78
- return loop.run_until_complete(bot_response(history, audio_file_urls))
79
  finally:
80
  loop.close()
81
 
@@ -89,8 +119,9 @@ def create_interface():
89
  **Supported formats**: MP3, WAV, M4A, FLAC, AAC, OGG
90
  """)
91
 
92
- # Hidden state to store audio file URLs
93
  audio_urls_state = gr.State([])
 
94
 
95
  with gr.Row():
96
  with gr.Column(scale=2):
@@ -126,34 +157,34 @@ def create_interface():
126
  send_btn = gr.Button("Ask", variant="primary", scale=1, size="lg")
127
 
128
  # Handle user input and bot response
129
- def handle_submit(message, files, history):
130
- new_msg, new_files, updated_history, audio_urls = user_input(message, files, history)
131
- return new_msg, new_files, updated_history, audio_urls
132
 
133
- def handle_bot_response(history, audio_urls):
134
- updated_history, output_files = bot_response_sync(history, audio_urls)
135
- return updated_history, output_files
136
 
137
  msg.submit(
138
  handle_submit,
139
- [msg, audio_files, chatbot],
140
- [msg, audio_files, chatbot, audio_urls_state],
141
  queue=False
142
  ).then(
143
  handle_bot_response,
144
- [chatbot, audio_urls_state],
145
- [chatbot, output_audio_files]
146
  )
147
 
148
  send_btn.click(
149
  handle_submit,
150
- [msg, audio_files, chatbot],
151
- [msg, audio_files, chatbot, audio_urls_state],
152
  queue=False
153
  ).then(
154
  handle_bot_response,
155
- [chatbot, audio_urls_state],
156
- [chatbot, output_audio_files]
157
  )
158
 
159
  return interface
 
14
  return path
15
  return f"{demo.share_url}/gradio_api/file={path}"
16
 
17
+ def user_input(user_message, audio_files, history, custom_history):
18
  """
19
  Handle user input with text and audio files
20
  """
21
  if not user_message.strip() and not audio_files:
22
+ return "", audio_files, history, custom_history
23
 
24
  # Process audio files into URLs
25
  audio_file_urls = []
 
33
 
34
  audio_file_urls.append(get_share_url(file_path))
35
 
36
+ # Add user message to history with input files
37
+ history.append({
38
+ "role": "user",
39
+ "content": user_message,
40
+ })
41
 
42
+ # Update custom history
43
+ custom_history.append({
44
+ "role": "user",
45
+ "content": user_message,
46
+ "input_files": audio_file_urls
47
+ })
48
+
49
+ return "", audio_files, history, audio_file_urls, custom_history
50
+
51
+ async def bot_response(history, audio_file_urls, custom_history):
52
  """
53
  Generate bot response using the agent
54
  """
55
  if not history or history[-1]["role"] != "user":
56
  return history, []
57
 
58
+ # Get the user message and input files
59
+ user_message = custom_history[-1]["content"]
60
+ input_files = custom_history[-1].get("input_files", [])
61
+
62
  # If message is empty but we have audio files, provide default message
63
  if not user_message.strip() and audio_file_urls:
64
  user_message = "Please process these audio files"
65
 
66
  try:
67
+ # Use the agent's run_agent method with history
68
+ result = await agent.run_agent(user_message, input_files, custom_history)
69
 
70
  # Extract the final response and audio files from the result
71
  final_response = result.final_response
72
  output_audio_files = result.output_audio_files
73
 
74
+ # Add assistant response to history with output files
75
+ history.append({
76
+ "role": "assistant",
77
+ "content": final_response,
78
+ })
79
+
80
+ # Update custom history
81
+ custom_history.append({
82
+ "role": "assistant",
83
+ "content": final_response,
84
+ "output_files": output_audio_files
85
+ })
86
 
87
  return history, output_audio_files
88
 
89
  except Exception as e:
90
+ history.append({
91
+ "role": "assistant",
92
+ "content": f"❌ **Error**: {e}",
93
+ })
94
+ custom_history.append({
95
+ "role": "assistant",
96
+ "content": f"❌ **Error**: {e}",
97
+ "output_files": []
98
+ })
99
  return history, []
100
 
101
+ def bot_response_sync(history, audio_file_urls, custom_history):
102
  """
103
  Synchronous wrapper for the async bot response
104
  """
105
  loop = asyncio.new_event_loop()
106
  asyncio.set_event_loop(loop)
107
  try:
108
+ return loop.run_until_complete(bot_response(history, audio_file_urls, custom_history))
109
  finally:
110
  loop.close()
111
 
 
119
  **Supported formats**: MP3, WAV, M4A, FLAC, AAC, OGG
120
  """)
121
 
122
+ # Hidden state to store audio file URLs and custom history
123
  audio_urls_state = gr.State([])
124
+ custom_history_state = gr.State([])
125
 
126
  with gr.Row():
127
  with gr.Column(scale=2):
 
157
  send_btn = gr.Button("Ask", variant="primary", scale=1, size="lg")
158
 
159
  # Handle user input and bot response
160
+ def handle_submit(message, files, history, custom_history):
161
+ new_msg, new_files, updated_history, audio_urls, updated_custom_history = user_input(message, files, history, custom_history)
162
+ return new_msg, new_files, updated_history, audio_urls, updated_custom_history
163
 
164
+ def handle_bot_response(history, audio_urls, custom_history):
165
+ updated_history, output_files = bot_response_sync(history, audio_urls, custom_history)
166
+ return updated_history, output_files, custom_history
167
 
168
  msg.submit(
169
  handle_submit,
170
+ [msg, audio_files, chatbot, custom_history_state],
171
+ [msg, audio_files, chatbot, audio_urls_state, custom_history_state],
172
  queue=False
173
  ).then(
174
  handle_bot_response,
175
+ [chatbot, audio_urls_state, custom_history_state],
176
+ [chatbot, output_audio_files, custom_history_state]
177
  )
178
 
179
  send_btn.click(
180
  handle_submit,
181
+ [msg, audio_files, chatbot, custom_history_state],
182
+ [msg, audio_files, chatbot, audio_urls_state, custom_history_state],
183
  queue=False
184
  ).then(
185
  handle_bot_response,
186
+ [chatbot, audio_urls_state, custom_history_state],
187
+ [chatbot, output_audio_files, custom_history_state]
188
  )
189
 
190
  return interface