Spaces:
Build error
Build error
Michael Hu
commited on
Commit
Β·
8b93773
1
Parent(s):
94a90b6
try fixing gr.blocks issue
Browse files
app.py
CHANGED
@@ -200,7 +200,7 @@ def process_audio_pipeline(
|
|
200 |
return error_msg, "", "", None, f"System Error: {str(e)}"
|
201 |
|
202 |
def create_interface():
|
203 |
-
"""Create and configure the Gradio interface"""
|
204 |
|
205 |
# Initialize application
|
206 |
initialize_application()
|
@@ -208,9 +208,6 @@ def create_interface():
|
|
208 |
# Get supported configurations
|
209 |
config = get_supported_configurations()
|
210 |
|
211 |
-
# Voice options mapping
|
212 |
-
voice_options = ["kokoro", "dia", "cosyvoice2", "dummy"]
|
213 |
-
|
214 |
# Language options mapping
|
215 |
language_options = {
|
216 |
"Chinese (Mandarin)": "zh",
|
@@ -220,132 +217,62 @@ def create_interface():
|
|
220 |
"English": "en"
|
221 |
}
|
222 |
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
css="""
|
228 |
-
.gradio-container {
|
229 |
-
max-width: 1200px !important;
|
230 |
-
}
|
231 |
-
.audio-player {
|
232 |
-
width: 100%;
|
233 |
-
}
|
234 |
-
"""
|
235 |
-
) as interface:
|
236 |
-
|
237 |
-
gr.Markdown("# π§ High-Quality Audio Translation System")
|
238 |
-
gr.Markdown("Upload English Audio β Get Chinese Speech Output")
|
239 |
-
|
240 |
-
with gr.Row():
|
241 |
-
with gr.Column(scale=2):
|
242 |
-
# Audio input
|
243 |
-
audio_input = gr.Audio(
|
244 |
-
label=f"Upload Audio File ({', '.join(config['audio_formats']).upper()})",
|
245 |
-
type="filepath",
|
246 |
-
format="wav"
|
247 |
-
)
|
248 |
-
|
249 |
-
# Model selection
|
250 |
-
asr_model = gr.Dropdown(
|
251 |
-
choices=config['asr_models'],
|
252 |
-
value=config['asr_models'][0] if config['asr_models'] else "parakeet",
|
253 |
-
label="Speech Recognition Model",
|
254 |
-
info="Choose the ASR model for speech recognition"
|
255 |
-
)
|
256 |
-
|
257 |
-
# Language selection
|
258 |
-
target_language = gr.Dropdown(
|
259 |
-
choices=list(language_options.keys()),
|
260 |
-
value="Chinese (Mandarin)",
|
261 |
-
label="Target Language",
|
262 |
-
info="Select the target language for translation"
|
263 |
-
)
|
264 |
-
|
265 |
-
with gr.Column(scale=1):
|
266 |
-
# TTS Settings
|
267 |
-
gr.Markdown("### TTS Settings")
|
268 |
-
|
269 |
-
voice = gr.Dropdown(
|
270 |
-
choices=voice_options,
|
271 |
-
value="kokoro",
|
272 |
-
label="Voice"
|
273 |
-
)
|
274 |
-
|
275 |
-
speed = gr.Slider(
|
276 |
-
minimum=config['speed_range']['min'],
|
277 |
-
maximum=config['speed_range']['max'],
|
278 |
-
value=1.0,
|
279 |
-
step=0.1,
|
280 |
-
label="Speech Speed"
|
281 |
-
)
|
282 |
-
|
283 |
-
# Process button
|
284 |
-
process_btn = gr.Button("π Process Audio", variant="primary", size="lg")
|
285 |
-
|
286 |
-
# Status message
|
287 |
-
status_output = gr.Markdown(label="Status")
|
288 |
-
|
289 |
-
# Results section
|
290 |
-
with gr.Row():
|
291 |
-
with gr.Column(scale=2):
|
292 |
-
# Text outputs
|
293 |
-
original_text = gr.Textbox(
|
294 |
-
label="Recognition Results",
|
295 |
-
lines=4,
|
296 |
-
max_lines=8,
|
297 |
-
interactive=False
|
298 |
-
)
|
299 |
-
|
300 |
-
translated_text = gr.Textbox(
|
301 |
-
label="Translation Results",
|
302 |
-
lines=4,
|
303 |
-
max_lines=8,
|
304 |
-
interactive=False
|
305 |
-
)
|
306 |
-
|
307 |
-
# Processing details
|
308 |
-
with gr.Accordion("Processing Details", open=False):
|
309 |
-
processing_details = gr.Code(
|
310 |
-
label="Metadata",
|
311 |
-
language="json",
|
312 |
-
interactive=False
|
313 |
-
)
|
314 |
-
|
315 |
-
with gr.Column(scale=1):
|
316 |
-
# Audio output
|
317 |
-
audio_output = gr.Audio(
|
318 |
-
label="Audio Output",
|
319 |
-
interactive=False
|
320 |
-
)
|
321 |
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
target_lang_code
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
asr_model=asr_model_val,
|
330 |
-
target_language=target_lang_code,
|
331 |
-
voice=voice_val,
|
332 |
-
speed=speed_val,
|
333 |
-
source_language="en"
|
334 |
-
)
|
335 |
-
|
336 |
-
process_btn.click(
|
337 |
-
fn=process_wrapper,
|
338 |
-
inputs=[audio_input, asr_model, target_language, voice, speed],
|
339 |
-
outputs=[status_output, original_text, translated_text, audio_output, processing_details]
|
340 |
-
)
|
341 |
-
|
342 |
-
# Add examples if needed
|
343 |
-
gr.Examples(
|
344 |
-
examples=[],
|
345 |
-
inputs=[audio_input, asr_model, target_language, voice, speed],
|
346 |
-
label="Example Configurations"
|
347 |
)
|
348 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
349 |
return interface
|
350 |
|
351 |
def main():
|
|
|
200 |
return error_msg, "", "", None, f"System Error: {str(e)}"
|
201 |
|
202 |
def create_interface():
|
203 |
+
"""Create and configure the Gradio interface using gr.Interface for better compatibility"""
|
204 |
|
205 |
# Initialize application
|
206 |
initialize_application()
|
|
|
208 |
# Get supported configurations
|
209 |
config = get_supported_configurations()
|
210 |
|
|
|
|
|
|
|
211 |
# Language options mapping
|
212 |
language_options = {
|
213 |
"Chinese (Mandarin)": "zh",
|
|
|
217 |
"English": "en"
|
218 |
}
|
219 |
|
220 |
+
def process_wrapper(audio_file, asr_model_val, target_lang_val, voice_val, speed_val):
|
221 |
+
"""Wrapper function for processing"""
|
222 |
+
# Map display language to code
|
223 |
+
target_lang_code = language_options.get(target_lang_val, "zh")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
224 |
|
225 |
+
return process_audio_pipeline(
|
226 |
+
audio_file=audio_file,
|
227 |
+
asr_model=asr_model_val,
|
228 |
+
target_language=target_lang_code,
|
229 |
+
voice=voice_val,
|
230 |
+
speed=speed_val,
|
231 |
+
source_language="en"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
232 |
)
|
233 |
|
234 |
+
# Create the interface using gr.Interface for better compatibility
|
235 |
+
interface = gr.Interface(
|
236 |
+
fn=process_wrapper,
|
237 |
+
inputs=[
|
238 |
+
gr.Audio(label="Upload Audio File", type="filepath"),
|
239 |
+
gr.Dropdown(
|
240 |
+
choices=config['asr_models'],
|
241 |
+
value=config['asr_models'][0] if config['asr_models'] else "parakeet",
|
242 |
+
label="Speech Recognition Model"
|
243 |
+
),
|
244 |
+
gr.Dropdown(
|
245 |
+
choices=list(language_options.keys()),
|
246 |
+
value="Chinese (Mandarin)",
|
247 |
+
label="Target Language"
|
248 |
+
),
|
249 |
+
gr.Dropdown(
|
250 |
+
choices=config['voices'],
|
251 |
+
value="kokoro",
|
252 |
+
label="Voice"
|
253 |
+
),
|
254 |
+
gr.Slider(
|
255 |
+
minimum=config['speed_range']['min'],
|
256 |
+
maximum=config['speed_range']['max'],
|
257 |
+
value=1.0,
|
258 |
+
step=0.1,
|
259 |
+
label="Speech Speed"
|
260 |
+
)
|
261 |
+
],
|
262 |
+
outputs=[
|
263 |
+
gr.Textbox(label="Status"),
|
264 |
+
gr.Textbox(label="Recognition Results"),
|
265 |
+
gr.Textbox(label="Translation Results"),
|
266 |
+
gr.Audio(label="Audio Output"),
|
267 |
+
gr.Code(label="Processing Details", language="json")
|
268 |
+
],
|
269 |
+
title="π§ High-Quality Audio Translation System",
|
270 |
+
description="Upload English Audio β Get Chinese Speech Output",
|
271 |
+
examples=[
|
272 |
+
# Add example configurations if needed
|
273 |
+
]
|
274 |
+
)
|
275 |
+
|
276 |
return interface
|
277 |
|
278 |
def main():
|