Audio-Agent / src /ui.py
Ahmet Emre Şafak
add os
0ce56db
import asyncio
import gradio as gr
import os
from agent import AudioAgent
# Global agent instance
agent = None
# Global demo instance
demo = None
def get_share_url(path):
"""Get the share URL for a given path"""
agent_url = os.environ.get('AGENT_URL')
if agent_url:
return f"{agent_url}/gradio_api/file={path}"
if demo:
return f"{demo.share_url}/gradio_api/file={path}"
return path
def update_agent(model_name, temperature, api_key):
"""Update the agent with new configuration"""
global agent
try:
agent = AudioAgent(
model_name=model_name,
temperature=float(temperature),
api_key=api_key
)
return True, None
except Exception as e:
return False, str(e)
def user_input(user_message, audio_files, history, custom_history, model_name, temperature, api_key):
"""
Handle user input with text and audio files
"""
# Try to update agent configuration
success, error = update_agent(model_name, temperature, api_key)
if not success:
raise gr.Error(error)
if not user_message.strip() and not audio_files:
return "", audio_files, history, custom_history
# Process audio files into URLs
audio_file_urls = []
if audio_files:
for audio_file in audio_files:
if hasattr(audio_file, 'name'):
file_path = audio_file.name
else:
file_path = str(audio_file)
audio_file_urls.append(get_share_url(file_path))
# Add user message to history with input files
history.append({
"role": "user",
"content": user_message,
})
# Update custom history
custom_history.append({
"role": "user",
"content": user_message,
"input_files": audio_file_urls
})
return "", audio_files, history, custom_history
async def bot_response(history, audio_file_urls, custom_history):
"""
Generate bot response using the agent
"""
if not agent:
raise gr.Error("Please configure the agent first")
if not history or history[-1]["role"] != "user":
return history, []
# Get the user message and input files
user_message = custom_history[-1]["content"]
input_files = custom_history[-1].get("input_files", [])
# If message is empty but we have audio files, provide default message
if not user_message.strip() and audio_file_urls:
user_message = "Please process these audio files"
try:
# Use the agent's run_agent method with history
result = await agent.run_agent(user_message, input_files, custom_history[:-1])
# Extract the final response and audio files from the result
final_response = result["final_response"]
output_audio_files = result["output_audio_files"]
# Add assistant response to history with output files
history.append({
"role": "assistant",
"content": final_response,
})
# Update custom history
custom_history.append({
"role": "assistant",
"content": final_response,
"output_files": output_audio_files
})
return history, output_audio_files
except Exception as e:
history.pop()
custom_history.pop()
raise gr.Error(str(e))
def bot_response_sync(history, audio_file_urls, custom_history):
"""
Synchronous wrapper for the async bot response
"""
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
return loop.run_until_complete(bot_response(history, audio_file_urls, custom_history))
finally:
loop.close()
def create_interface():
with gr.Blocks(
title="Audio Agent - Professional Audio Processing",
theme=gr.themes.Default(),
) as interface:
gr.Markdown("""
# Audio Agent - Your AI Audio Assistant
Upload your audio files and tell me what you need. I'll handle the rest!
""")
# Hidden state to store audio file URLs and custom history
audio_urls_state = gr.State([])
custom_history_state = gr.State([])
with gr.Row():
with gr.Column(scale=4):
chatbot = gr.Chatbot(
type="messages",
height=500,
show_copy_button=True,
show_share_button=False
)
msg = gr.Textbox(
label="Describe what you want to do?",
placeholder="e.g., 'Remove filler words and improve audio quality''",
lines=3,
submit_btn=True
)
with gr.Column(scale=1):
# Model Configuration
with gr.Group():
model_name = gr.Dropdown(
choices=["gpt-4.1", "gpt-4.1-mini", "gpt-4o", "o3"],
value="gpt-4.1",
label="Model",
info="Select the model to use"
)
temperature = gr.Slider(
minimum=0.0,
maximum=1.0,
value=0.3,
step=0.1,
label="Temperature",
info="Higher values make output more random"
)
api_key = gr.Textbox(
label="OpenAI API Key",
placeholder="sk-...",
type="password",
info="Your OpenAI API key"
)
# Set temperature to 1.0 when o3 model is selected
def update_temperature(model):
if model == "o3":
return gr.update(value=1.0, interactive=False)
return gr.update(interactive=True)
model_name.change(
update_temperature,
inputs=[model_name],
outputs=[temperature]
)
with gr.Group():
audio_files = gr.File(
file_count="multiple",
file_types=["audio"],
label="Upload Audio Files to Process",
height=150
)
output_audio_files = gr.File(
file_count="multiple",
file_types=["audio"],
label="Download Generated Audio",
height=150,
interactive=False,
visible=False # Start hidden
)
# Handle user input and bot response
def handle_submit(message, files, history, custom_history, model, temp, key):
new_msg, new_files, updated_history, updated_custom_history = user_input(
message, files, history, custom_history, model, temp, key
)
return new_msg, new_files, updated_history, updated_custom_history
def handle_bot_response(history, audio_urls, custom_history):
updated_history, output_files = bot_response_sync(history, audio_urls, custom_history)
output_visible = bool(output_files) # True if there are files, else False
return updated_history, gr.update(value=output_files, visible=output_visible), custom_history
msg.submit(
handle_submit,
[msg, audio_files, chatbot, custom_history_state, model_name, temperature, api_key],
[msg, audio_files, chatbot, custom_history_state],
queue=False
).then(
handle_bot_response,
[chatbot, audio_urls_state, custom_history_state],
[chatbot, output_audio_files, custom_history_state]
)
gr.Markdown("""
---
""")
with gr.Row():
gr.Markdown("""
## 🎚️ What I Can Do For You
**Audio Manipulation:**
- Merge multiple audio files into one continuous track
- Cut or trim specific sections from any file
- Adjust volume levels (increase or decrease)
- Normalize audio levels for consistency
- Apply fade-in or fade-out effects for smooth transitions (Mono channel only)
- Change playback speed (faster or slower, with pitch change)
- Reverse audio for creative effects
- Remove silence from beginning or end of files
**Analysis & Transcription:** (English only)
- Transcribe speech in audio to text
- Analyze audio properties (duration, sample rate, etc.)
""")
gr.Markdown("""
## 💡 Example Requests
- *"Merge these two audio files and add a fade-in effect"*
- *"Remove the silence at the beginning of this recording"*
- *"Transcribe the speech in this audio file"*
- *"Increase the volume of the first track and normalize both files"*
- *"Cut out the middle section from 1:30 to 2:45"*
- *"Make this audio play 1.5x faster"*
- *"Apply a fade-out effect to the end of this track"*
""")
return interface
if __name__ == "__main__":
demo = create_interface()
demo.launch()