""" Main entry point for the Audio Translation Web Application Handles file upload, processing pipeline, and UI rendering """ # Configure logging first import logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler("app.log"), logging.StreamHandler() ] ) logger = logging.getLogger(__name__) import streamlit as st import os import time import subprocess from utils.stt import transcribe_audio from utils.translation import translate_text from utils.tts_dummy import generate_speech # Hugging Face Spaces Setup Automation def setup_huggingface_space(): """Automatically configure Hugging Face Space requirements""" logger.debug("Running Hugging Face space setup") st.sidebar.header("Space Configuration") try: subprocess.run(["espeak-ng", "--version"], check=True, capture_output=True) logger.debug("espeak-ng verification successful") except (FileNotFoundError, subprocess.CalledProcessError): logger.error("Missing espeak-ng dependency") st.sidebar.error(""" **Missing System Dependencies!** Add this to your Space settings: ```txt apt-get update && apt-get install -y espeak-ng ``` """) st.stop() model_dir = "./kokoro" required_files = [ f"{model_dir}/kokoro-v0_19.pth", f"{model_dir}/voices/af_bella.pt" ] if not all(os.path.exists(f) for f in required_files): logger.error("Missing model files in %s", model_dir) st.sidebar.warning(""" **Missing Model Files!** Add this to your Space settings: ```txt git clone https://huggingface.co/hexgrad/Kokoro-82M ./kokoro ``` """) st.stop() # Initialize environment configurations os.makedirs("temp/uploads", exist_ok=True) os.makedirs("temp/outputs", exist_ok=True) def configure_page(): """Set up Streamlit page configuration""" logger.debug("Configuring Streamlit page") st.set_page_config( page_title="Audio Translator", page_icon="🎧", layout="wide", initial_sidebar_state="expanded" ) st.markdown(""" """, unsafe_allow_html=True) def handle_file_processing(upload_path): """ Execute the complete processing pipeline: 1. Speech-to-Text (STT) 2. Machine Translation 3. Text-to-Speech (TTS) """ logger.info(f"Starting processing for: {upload_path}") progress_bar = st.progress(0) status_text = st.empty() try: # STT Phase logger.debug("Beginning STT processing") status_text.markdown("🔍 **Performing Speech Recognition...**") with st.spinner("Initializing Whisper model..."): english_text = transcribe_audio(upload_path) progress_bar.progress(30) logger.info(f"STT completed. Text length: {len(english_text)} characters") # Translation Phase logger.debug("Beginning translation") status_text.markdown("🌐 **Translating Content...**") with st.spinner("Loading translation model..."): chinese_text = translate_text(english_text) progress_bar.progress(60) logger.info(f"Translation completed. Translated length: {len(chinese_text)} characters") # TTS Phase logger.debug("Beginning TTS generation") status_text.markdown("🎵 **Generating Chinese Speech...**") with st.spinner("Initializing TTS engine..."): output_path = generate_speech(chinese_text, language="zh") progress_bar.progress(100) logger.info(f"TTS completed. Output file: {output_path}") # Display results # Display results status_text.success("✅ Processing Complete!") return english_text, chinese_text, output_path except Exception as e: logger.error(f"Processing failed: {str(e)}", exc_info=True) status_text.error(f"❌ Processing Failed: {str(e)}") st.exception(e) raise def render_results(english_text, chinese_text, output_path): """Display processing results in organized columns""" logger.debug("Rendering results") st.divider() col1, col2 = st.columns([2, 1]) with col1: st.subheader("Recognition Results") st.code(english_text, language="text") st.subheader("Translation Results") st.code(chinese_text, language="text") with col2: st.subheader("Audio Output") st.audio(output_path) with open(output_path, "rb") as f: st.download_button( label="Download Audio", data=f, file_name="translated_audio.wav", mime="audio/wav" ) def main(): """Main application workflow""" logger.info("Starting application") # setup_huggingface_space() # First-run configuration checks configure_page() st.title("🎧 High-Quality Audio Translation System") st.markdown("Upload English Audio → Get Chinese Speech Output") uploaded_file = st.file_uploader( "Select Audio File (MP3/WAV)", type=["mp3", "wav"], accept_multiple_files=False ) if uploaded_file: logger.info(f"File uploaded: {uploaded_file.name}") upload_path = os.path.join("temp/uploads", uploaded_file.name) with open(upload_path, "wb") as f: f.write(uploaded_file.getbuffer()) results = handle_file_processing(upload_path) if results: render_results(*results) if __name__ == "__main__": main()