File size: 4,663 Bytes
cd1309d
 
 
 
 
 
 
 
2d2f2b9
cd1309d
 
 
 
2d2f2b9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cd1309d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2d2f2b9
cd1309d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2d2f2b9
cd1309d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2d2f2b9
cd1309d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
"""
Main entry point for the Audio Translation Web Application
Handles file upload, processing pipeline, and UI rendering
"""

import streamlit as st
import os
import time
import subprocess
from utils.stt import transcribe_audio
from utils.translation import translate_text
from utils.tts import generate_speech

# Hugging Face Spaces Setup Automation
def setup_huggingface_space():
    """Automatically configure Hugging Face Space requirements"""
    st.sidebar.header("Space Configuration")
    
    # Check for required system packages
    try:
        subprocess.run(["espeak-ng", "--version"], check=True, capture_output=True)
    except (FileNotFoundError, subprocess.CalledProcessError):
        st.sidebar.error("""
        **Missing System Dependencies!** Add this to your Space settings:
        ```txt
        apt-get update && apt-get install -y espeak-ng
        ```
        """)
        st.stop()

    # Verify model files
    model_dir = "./kokoro"
    required_files = [
        f"{model_dir}/kokoro-v0_19.pth",
        f"{model_dir}/voices/af_bella.pt"
    ]
    
    if not all(os.path.exists(f) for f in required_files):
        st.sidebar.warning("""
        **Missing Model Files!** Add this to your Space settings:
        ```txt
        git clone https://huggingface.co/hexgrad/Kokoro-82M ./kokoro
        ```
        """)
        st.stop()

# Initialize environment configurations
os.makedirs("temp/uploads", exist_ok=True)
os.makedirs("temp/outputs", exist_ok=True)

def configure_page():
    """Set up Streamlit page configuration"""
    st.set_page_config(
        page_title="Audio Translator",
        page_icon="🎧",
        layout="wide",
        initial_sidebar_state="expanded"
    )
    st.markdown("""
        <style>
            .reportview-container {margin-top: -2em;}
            #MainMenu {visibility: hidden;}
            .stDeployButton {display:none;}
            .stAlert {padding: 20px !important;}
        </style>
    """, unsafe_allow_html=True)

def handle_file_processing(upload_path):
    """
    Execute the complete processing pipeline:
    1. Speech-to-Text (STT)
    2. Machine Translation
    3. Text-to-Speech (TTS)
    """
    progress_bar = st.progress(0)
    status_text = st.empty()
    
    try:
        # STT Phase
        status_text.markdown("πŸ” **Performing Speech Recognition...**")
        english_text = transcribe_audio(upload_path)
        progress_bar.progress(30)
        
        # Translation Phase
        status_text.markdown("🌐 **Translating Content...**")
        chinese_text = translate_text(english_text)
        progress_bar.progress(60)
        
        # TTS Phase
        status_text.markdown("🎡 **Generating Chinese Speech...**")
        output_path = generate_speech(chinese_text, language="zh")
        progress_bar.progress(100)
        
        # Display results
        status_text.success("βœ… Processing Complete!")
        return english_text, chinese_text, output_path
        
    except Exception as e:
        status_text.error(f"❌ Processing Failed: {str(e)}")
        st.exception(e)
        raise

def render_results(english_text, chinese_text, output_path):
    """Display processing results in organized columns"""
    st.divider()
    
    col1, col2 = st.columns([2, 1])
    with col1:
        st.subheader("Recognition Results")
        st.code(english_text, language="text")
        
        st.subheader("Translation Results")
        st.code(chinese_text, language="text")

    with col2:
        st.subheader("Audio Output")
        st.audio(output_path)
        with open(output_path, "rb") as f:
            st.download_button(
                label="Download Audio",
                data=f,
                file_name="translated_audio.wav",
                mime="audio/wav"
            )

def main():
    """Main application workflow"""
    setup_huggingface_space()  # First-run configuration checks
    configure_page()
    st.title("🎧 High-Quality Audio Translation System")
    st.markdown("Upload English Audio β†’ Get Chinese Speech Output")

    # File uploader widget
    uploaded_file = st.file_uploader(
        "Select Audio File (MP3/WAV)",
        type=["mp3", "wav"],
        accept_multiple_files=False
    )

    if uploaded_file:
        # Save uploaded file
        upload_path = os.path.join("temp/uploads", uploaded_file.name)
        with open(upload_path, "wb") as f:
            f.write(uploaded_file.getbuffer())
        
        # Execute processing pipeline
        results = handle_file_processing(upload_path)
        if results:
            render_results(*results)

if __name__ == "__main__":
    main()