YigitSekerci commited on
Commit
211e40f
Β·
1 Parent(s): 46971d5
Files changed (4) hide show
  1. src/agent.py +6 -56
  2. src/nodes/chat.py +1 -1
  3. src/nodes/processor.py +26 -18
  4. src/ui.py +23 -73
src/agent.py CHANGED
@@ -1,10 +1,8 @@
1
- import asyncio
2
- import re
3
  from dotenv import load_dotenv
 
4
 
5
  from langchain_mcp_adapters.client import MultiServerMCPClient
6
  from langgraph.graph import StateGraph, END, START
7
- from langgraph.prebuilt import ToolNode
8
 
9
  from .state import AgentState
10
  from .nodes.chat import chat_node, chat_node_router
@@ -49,13 +47,10 @@ class AudioAgent:
49
  _graph.add_node("planner", planner_node)
50
  _graph.add_edge("planner", "audio_processor")
51
 
52
- _graph.add_node("audio_processor", processor_node)
 
53
  _graph.add_edge("audio_processor", "validator")
54
 
55
- _graph.add_node("tools", ToolNode(self.tools))
56
- _graph.add_edge("audio_processor", "tools")
57
- _graph.add_edge("tools", "audio_processor")
58
-
59
  _graph.add_node("validator", validator_node)
60
  _graph.add_conditional_edges(
61
  "validator",
@@ -98,7 +93,7 @@ class AudioAgent:
98
  clean_message = '\n'.join(clean_lines).strip()
99
  return clean_message, audio_files
100
 
101
- async def stream_chat(self, user_message: str):
102
  """Stream chat responses with node information."""
103
  if not self.is_initialized:
104
  await self.initialize()
@@ -119,55 +114,10 @@ class AudioAgent:
119
  }
120
 
121
  # Stream the graph execution
122
- current_node = None
123
- async for event in self.graph.astream(initial_state):
124
- for node_name, node_output in event.items():
125
- current_node = node_name
126
-
127
- # Yield any response content from the node
128
- if isinstance(node_output, dict):
129
- # Check for final response
130
- if "final_response" in node_output and node_output["final_response"]:
131
- response_content = node_output["final_response"]
132
- # Stream the response in chunks
133
- for i in range(0, len(response_content), 50):
134
- chunk = response_content[i:i+50]
135
- yield chunk, current_node
136
-
137
- # Check for output audio files
138
- if "output_audio_files" in node_output:
139
- for audio_file in node_output["output_audio_files"]:
140
- if audio_file:
141
- yield f"Audio Ready: {audio_file}", current_node
142
 
143
  def draw_graph(self) -> None:
144
  """Draw the graph to a file."""
145
  graph_image = self.graph.get_graph().draw_mermaid_png()
146
  with open("graph.png", "wb") as f:
147
- f.write(graph_image)
148
-
149
- async def main():
150
- """Test the agent with various scenarios."""
151
- agent = AudioAgent()
152
- await agent.initialize()
153
-
154
- # Test with audio files
155
- test_message = """Remove filler words from this audio
156
-
157
- Audio file: /path/to/audio1.mp3
158
- Audio file: /path/to/audio2.wav"""
159
-
160
- print("Testing agent with audio files...")
161
- async for chunk, node_name in agent.stream_chat(test_message):
162
- print(f"[{node_name}]: {chunk}")
163
-
164
- print("\n" + "="*50 + "\n")
165
-
166
- # Test with just a question
167
- test_question = "What audio processing tools are available?"
168
- print("Testing agent with question...")
169
- async for chunk, node_name in agent.stream_chat(test_question):
170
- print(f"[{node_name}]: {chunk}")
171
-
172
- if __name__ == "__main__":
173
- asyncio.run(main())
 
 
 
1
  from dotenv import load_dotenv
2
+ from functools import partial
3
 
4
  from langchain_mcp_adapters.client import MultiServerMCPClient
5
  from langgraph.graph import StateGraph, END, START
 
6
 
7
  from .state import AgentState
8
  from .nodes.chat import chat_node, chat_node_router
 
47
  _graph.add_node("planner", planner_node)
48
  _graph.add_edge("planner", "audio_processor")
49
 
50
+ processor_node_with_tools = partial(processor_node, tools=self.tools)
51
+ _graph.add_node("audio_processor", processor_node_with_tools)
52
  _graph.add_edge("audio_processor", "validator")
53
 
 
 
 
 
54
  _graph.add_node("validator", validator_node)
55
  _graph.add_conditional_edges(
56
  "validator",
 
93
  clean_message = '\n'.join(clean_lines).strip()
94
  return clean_message, audio_files
95
 
96
+ async def chat(self, user_message: str):
97
  """Stream chat responses with node information."""
98
  if not self.is_initialized:
99
  await self.initialize()
 
114
  }
115
 
116
  # Stream the graph execution
117
+ return self.graph.invoke(initial_state, stream_mode="values")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
  def draw_graph(self) -> None:
120
  """Draw the graph to a file."""
121
  graph_image = self.graph.get_graph().draw_mermaid_png()
122
  with open("graph.png", "wb") as f:
123
+ f.write(graph_image)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/nodes/chat.py CHANGED
@@ -29,6 +29,6 @@ def chat_node(state: AgentState) -> AgentState:
29
 
30
  def chat_node_router(state: AgentState) -> str:
31
  if state.requires_processing:
32
- return "audio_processor"
33
  else:
34
  return "end"
 
29
 
30
  def chat_node_router(state: AgentState) -> str:
31
  if state.requires_processing:
32
+ return "planner"
33
  else:
34
  return "end"
src/nodes/processor.py CHANGED
@@ -1,24 +1,32 @@
1
- from langchain_openai import ChatOpenAI
2
  from langchain_core.prompts import ChatPromptTemplate
3
  from src.state import AgentState
4
- from operator import itemgetter
5
- from langchain_core.runnables import RunnableParallel
6
 
7
- def processor_node(state: AgentState) -> AgentState:
8
- llm = ChatOpenAI(model="gpt-4.1")
9
- llm = llm.with_structured_output(AgentState)
10
-
11
- prompt = ChatPromptTemplate.from_messages([
12
- ("system", "You are processor that processes the plan and generates a final response to the user."),
13
- ("user", "Current state: {state}")
14
- ])
15
 
16
- chain = (
17
- RunnableParallel({
18
- "state": itemgetter("state")
19
- })
20
- | prompt
21
- | llm
22
  )
23
 
24
- return chain.invoke({"state": state})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from langchain_core.prompts import ChatPromptTemplate
2
  from src.state import AgentState
3
+ from langgraph.prebuilt import create_react_agent
4
+ from pydantic import BaseModel, Field
5
 
6
+ class ProcessorState(BaseModel):
7
+ steps_details: list[str] = Field(description="The steps that have been completed.", default=[])
8
+ final_response: str = Field(description="The final response to the user.", default="")
9
+ output_audio_files: list[str] = Field(description="The output audio files.", default=[])
 
 
 
 
10
 
11
+ def processor_node(state: AgentState, tools: list) -> AgentState:
12
+ agent = create_react_agent(
13
+ model="gpt-4.1",
14
+ tools=tools,
15
+ prompt="You are processor that processes the plan and generates a final response to the user.",
16
+ response_format=ProcessorState,
17
  )
18
 
19
+ processor_state: ProcessorState = agent.invoke(
20
+ {"messages": [{"role": "user", "content": f"Current state: {state}"}]}
21
+ )["structured_response"]
22
+
23
+ return AgentState(
24
+ steps_details=state.steps_details + processor_state.steps_details,
25
+ user_input=state.user_input,
26
+ plan=state.plan,
27
+ final_response=processor_state.final_response,
28
+ requires_processing=state.requires_processing,
29
+ validator_feedback=state.validator_feedback,
30
+ input_audio_files=state.input_audio_files,
31
+ output_audio_files=state.output_audio_files + processor_state.output_audio_files,
32
+ )
src/ui.py CHANGED
@@ -34,10 +34,10 @@ def user_input(user_message, audio_files, history):
34
 
35
  async def bot_response(history):
36
  """
37
- Generate bot response showing only the final output
38
  """
39
  if not history or history[-1]["role"] != "user":
40
- return
41
 
42
  user_message = history[-1]["content"]
43
 
@@ -46,45 +46,29 @@ async def bot_response(history):
46
  if not agent.is_initialized:
47
  await agent.initialize()
48
 
49
- # Add empty assistant message to start streaming
50
- history.append({"role": "assistant", "content": ""})
51
- yield history
 
 
 
52
 
53
- # Track final response and audio files
54
- final_response = ""
55
- processed_audio_urls = []
 
 
 
 
 
 
 
 
56
 
57
- # Stream the response and collect final output
58
- async for chunk, node_name in agent.stream_chat(user_message):
59
- # Check if this chunk contains an audio URL
60
- if "Audio Ready" in chunk and "http" in chunk:
61
- processed_audio_urls.append(chunk)
62
- continue
63
-
64
- # Only show output from chat and response_formatter nodes
65
- if node_name in ["chat", "response_formatter"]:
66
- final_response += chunk
67
-
68
- # Build simple display with final response and audio files
69
- formatted_content = final_response
70
-
71
- # Add processed audio files section if any
72
- if processed_audio_urls:
73
- formatted_content += "\n\n## 🎡 Generated Audio Files\n\n"
74
- for audio_url in processed_audio_urls:
75
- formatted_content += f"{audio_url}\n"
76
-
77
- # Update the chat history
78
- history[-1]["content"] = formatted_content.rstrip()
79
- yield history
80
-
81
  except Exception as e:
82
- # Update the last message with error or add error message
83
- if history and history[-1]["role"] == "assistant":
84
- history[-1]["content"] = f"❌ **Error in Agent Processing**: {str(e)}\n\n*Please try rephrasing your request or check if audio files are properly uploaded.*"
85
- else:
86
- history.append({"role": "assistant", "content": f"❌ **Error**: {str(e)}"})
87
- yield history
88
 
89
  def bot_response_sync(history):
90
  """
@@ -93,12 +77,7 @@ def bot_response_sync(history):
93
  loop = asyncio.new_event_loop()
94
  asyncio.set_event_loop(loop)
95
  try:
96
- async_gen = bot_response(history)
97
- while True:
98
- try:
99
- yield loop.run_until_complete(async_gen.__anext__())
100
- except StopAsyncIteration:
101
- break
102
  finally:
103
  loop.close()
104
 
@@ -110,13 +89,6 @@ def create_interface():
110
  title="Audio Agent - Professional Audio Processing",
111
  theme=gr.themes.Soft(),
112
  css="""
113
- .audio-upload-area {
114
- border: 2px dashed #ccc;
115
- border-radius: 10px;
116
- padding: 20px;
117
- text-align: center;
118
- margin: 10px 0;
119
- }
120
  .processed-audio {
121
  background: #f0f9ff;
122
  border: 1px solid #0891b2;
@@ -124,28 +96,6 @@ def create_interface():
124
  padding: 15px;
125
  margin: 10px 0;
126
  }
127
- .thinking-section h2 {
128
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
129
- color: white;
130
- padding: 10px 15px;
131
- border-radius: 8px;
132
- margin: 15px 0 10px 0;
133
- }
134
- .thinking-section h3 {
135
- background: #f8f9fa;
136
- border-left: 4px solid #667eea;
137
- padding: 8px 12px;
138
- margin: 10px 0;
139
- border-radius: 0 6px 6px 0;
140
- }
141
- .cot-container {
142
- background: #ffffff;
143
- border: 1px solid #e1e5e9;
144
- border-radius: 12px;
145
- padding: 20px;
146
- margin: 10px 0;
147
- box-shadow: 0 2px 4px rgba(0,0,0,0.05);
148
- }
149
  """
150
  ) as demo:
151
 
 
34
 
35
  async def bot_response(history):
36
  """
37
+ Generate bot response using the simple chat method
38
  """
39
  if not history or history[-1]["role"] != "user":
40
+ return history
41
 
42
  user_message = history[-1]["content"]
43
 
 
46
  if not agent.is_initialized:
47
  await agent.initialize()
48
 
49
+ # Get the response from the agent
50
+ result = await agent.chat(user_message)
51
+
52
+ # Extract the final response and audio files from the result
53
+ final_response = result.get("final_response", "")
54
+ output_audio_files = result.get("output_audio_files", [])
55
 
56
+ # Format the response
57
+ formatted_content = final_response
58
+
59
+ # Add processed audio files section if any
60
+ if output_audio_files:
61
+ formatted_content += "\n\n## 🎡 Generated Audio Files\n\n"
62
+ for audio_file in output_audio_files:
63
+ formatted_content += f"Audio Ready: {audio_file}\n"
64
+
65
+ # Add assistant response to history
66
+ history.append({"role": "assistant", "content": formatted_content.rstrip()})
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  except Exception as e:
69
+ history.append({"role": "assistant", "content": f"❌ **Error**: {e}"})
70
+
71
+ return history
 
 
 
72
 
73
  def bot_response_sync(history):
74
  """
 
77
  loop = asyncio.new_event_loop()
78
  asyncio.set_event_loop(loop)
79
  try:
80
+ return loop.run_until_complete(bot_response(history))
 
 
 
 
 
81
  finally:
82
  loop.close()
83
 
 
89
  title="Audio Agent - Professional Audio Processing",
90
  theme=gr.themes.Soft(),
91
  css="""
 
 
 
 
 
 
 
92
  .processed-audio {
93
  background: #f0f9ff;
94
  border: 1px solid #0891b2;
 
96
  padding: 15px;
97
  margin: 10px 0;
98
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  """
100
  ) as demo:
101