Spaces:
Running
Running
"""Main entry point for the Audio Translation Web Application using Gradio | |
Handles file upload, processing pipeline, and UI rendering | |
""" | |
import logging | |
logging.basicConfig( | |
level=logging.INFO, | |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | |
handlers=[ | |
logging.FileHandler("app.log"), | |
logging.StreamHandler() | |
] | |
) | |
logger = logging.getLogger(__name__) | |
import gradio as gr | |
import os | |
import time | |
import numpy as np | |
import soundfile as sf | |
from utils.stt import transcribe_audio | |
from utils.translation import translate_text | |
from utils.tts import get_tts_engine | |
# Initialize environment configurations | |
os.makedirs("temp/uploads", exist_ok=True) | |
os.makedirs("temp/outputs", exist_ok=True) | |
# CSS for styling the Gradio interface | |
css = """ | |
.gradio-container { | |
max-width: 1200px; | |
margin: 0 auto; | |
} | |
.output-text { | |
font-family: monospace; | |
padding: 10px; | |
background-color: #f5f5f5; | |
border-radius: 4px; | |
} | |
""" | |
def handle_file_processing(audio_file): | |
""" | |
Execute the complete processing pipeline: | |
1. Speech-to-Text (STT) | |
2. Machine Translation | |
3. Text-to-Speech (TTS) | |
Args: | |
audio_file: Tuple containing (sample_rate, audio_data) | |
Returns: | |
Tuple containing (english_text, chinese_text, output_audio) | |
""" | |
logger.info("Starting processing for uploaded audio") | |
try: | |
# Save the uploaded audio to a temporary file | |
sr, audio_data = audio_file | |
temp_path = os.path.join("temp/uploads", f"upload_{time.time()}.wav") | |
sf.write(temp_path, audio_data, sr) | |
logger.info(f"Saved uploaded audio to {temp_path}") | |
# STT Phase | |
logger.info("Beginning STT processing") | |
english_text = transcribe_audio(temp_path) | |
logger.info(f"STT completed. Text length: {len(english_text)} characters") | |
# Translation Phase | |
logger.info("Beginning translation") | |
chinese_text = translate_text(english_text) | |
logger.info(f"Translation completed. Translated length: {len(chinese_text)} characters") | |
# TTS Phase | |
logger.info("Beginning TTS generation") | |
# Initialize TTS engine with appropriate language code for Chinese | |
engine = get_tts_engine(lang_code='z') # 'z' for Mandarin Chinese | |
# Generate speech and get the file path | |
output_path = engine.generate_speech(chinese_text, voice="zf_xiaobei") | |
logger.info(f"TTS completed. Output file: {output_path}") | |
# Load the generated audio for Gradio output | |
audio_data, sr = sf.read(output_path) | |
return english_text, chinese_text, (sr, audio_data) | |
except Exception as e: | |
logger.error(f"Processing failed: {str(e)}", exc_info=True) | |
raise gr.Error(f"Processing Failed: {str(e)}") | |
def stream_audio(chinese_text, voice, speed): | |
""" | |
Stream audio in chunks for the Gradio interface | |
Args: | |
chinese_text: The Chinese text to convert to speech | |
voice: The voice to use | |
speed: The speech speed factor | |
Returns: | |
Generator yielding audio chunks | |
""" | |
engine = get_tts_engine(lang_code='z') | |
# Stream the audio in chunks | |
for sample_rate, audio_chunk in engine.generate_speech_stream( | |
chinese_text, | |
voice=voice, | |
speed=speed | |
): | |
# Create a temporary file for each chunk | |
temp_chunk_path = f"temp/outputs/chunk_{time.time()}.wav" | |
sf.write(temp_chunk_path, audio_chunk, sample_rate) | |
# Load the chunk for Gradio output | |
chunk_data, sr = sf.read(temp_chunk_path) | |
# Clean up the temporary chunk file | |
os.remove(temp_chunk_path) | |
yield (sr, chunk_data) | |
def create_interface(): | |
""" | |
Create and configure the Gradio interface | |
Returns: | |
Gradio Blocks interface | |
""" | |
with gr.Blocks(css=css) as interface: | |
gr.Markdown("# π§ High-Quality Audio Translation System") | |
gr.Markdown("Upload English Audio β Get Chinese Speech Output") | |
with gr.Row(): | |
with gr.Column(scale=2): | |
# File upload component | |
audio_input = gr.Audio( | |
label="Upload English Audio", | |
type="numpy", | |
sources=["upload", "microphone"] | |
) | |
# Process button | |
process_btn = gr.Button("Process Audio", variant="primary") | |
with gr.Column(scale=1): | |
# TTS Settings | |
with gr.Box(): | |
gr.Markdown("### TTS Settings") | |
voice_dropdown = gr.Dropdown( | |
choices=["Xiaobei (Female)", "Yunjian (Male)"], | |
value="Xiaobei (Female)", | |
label="Select Voice" | |
) | |
speed_slider = gr.Slider( | |
minimum=0.5, | |
maximum=2.0, | |
value=1.0, | |
step=0.1, | |
label="Speech Speed" | |
) | |
# Output section | |
with gr.Row(): | |
with gr.Column(scale=2): | |
# Text outputs | |
english_output = gr.Textbox( | |
label="Recognition Results", | |
lines=5, | |
elem_classes=["output-text"] | |
) | |
chinese_output = gr.Textbox( | |
label="Translation Results", | |
lines=5, | |
elem_classes=["output-text"] | |
) | |
with gr.Column(scale=1): | |
# Audio output | |
audio_output = gr.Audio( | |
label="Audio Output", | |
type="numpy" | |
) | |
# Stream button | |
stream_btn = gr.Button("Stream Audio") | |
# Download button is automatically provided by gr.Audio | |
# Set up event handlers | |
process_btn.click( | |
fn=handle_file_processing, | |
inputs=[audio_input], | |
outputs=[english_output, chinese_output, audio_output] | |
) | |
# Map voice selection to actual voice IDs | |
def get_voice_id(voice_name): | |
voice_map = { | |
"Xiaobei (Female)": "zf_xiaobei", | |
"Yunjian (Male)": "zm_yunjian" | |
} | |
return voice_map.get(voice_name, "zf_xiaobei") | |
# Stream button handler | |
stream_btn.click( | |
fn=lambda text, voice, speed: stream_audio(text, get_voice_id(voice), speed), | |
inputs=[chinese_output, voice_dropdown, speed_slider], | |
outputs=audio_output | |
) | |
# Examples | |
gr.Examples( | |
examples=[ | |
["examples/sample1.mp3"], | |
["examples/sample2.wav"] | |
], | |
inputs=audio_input | |
) | |
return interface | |
def main(): | |
""" | |
Main application entry point | |
""" | |
logger.info("Starting Gradio application") | |
interface = create_interface() | |
interface.launch() | |
if __name__ == "__main__": | |
main() |