Michael Hu commited on
Commit
8b93773
Β·
1 Parent(s): 94a90b6

try fixing gr.blocks issue

Browse files
Files changed (1) hide show
  1. app.py +54 -127
app.py CHANGED
@@ -200,7 +200,7 @@ def process_audio_pipeline(
200
  return error_msg, "", "", None, f"System Error: {str(e)}"
201
 
202
  def create_interface():
203
- """Create and configure the Gradio interface"""
204
 
205
  # Initialize application
206
  initialize_application()
@@ -208,9 +208,6 @@ def create_interface():
208
  # Get supported configurations
209
  config = get_supported_configurations()
210
 
211
- # Voice options mapping
212
- voice_options = ["kokoro", "dia", "cosyvoice2", "dummy"]
213
-
214
  # Language options mapping
215
  language_options = {
216
  "Chinese (Mandarin)": "zh",
@@ -220,132 +217,62 @@ def create_interface():
220
  "English": "en"
221
  }
222
 
223
- # Create the interface
224
- with gr.Blocks(
225
- title="🎧 High-Quality Audio Translation System",
226
- theme=gr.themes.Soft(),
227
- css="""
228
- .gradio-container {
229
- max-width: 1200px !important;
230
- }
231
- .audio-player {
232
- width: 100%;
233
- }
234
- """
235
- ) as interface:
236
-
237
- gr.Markdown("# 🎧 High-Quality Audio Translation System")
238
- gr.Markdown("Upload English Audio β†’ Get Chinese Speech Output")
239
-
240
- with gr.Row():
241
- with gr.Column(scale=2):
242
- # Audio input
243
- audio_input = gr.Audio(
244
- label=f"Upload Audio File ({', '.join(config['audio_formats']).upper()})",
245
- type="filepath",
246
- format="wav"
247
- )
248
-
249
- # Model selection
250
- asr_model = gr.Dropdown(
251
- choices=config['asr_models'],
252
- value=config['asr_models'][0] if config['asr_models'] else "parakeet",
253
- label="Speech Recognition Model",
254
- info="Choose the ASR model for speech recognition"
255
- )
256
-
257
- # Language selection
258
- target_language = gr.Dropdown(
259
- choices=list(language_options.keys()),
260
- value="Chinese (Mandarin)",
261
- label="Target Language",
262
- info="Select the target language for translation"
263
- )
264
-
265
- with gr.Column(scale=1):
266
- # TTS Settings
267
- gr.Markdown("### TTS Settings")
268
-
269
- voice = gr.Dropdown(
270
- choices=voice_options,
271
- value="kokoro",
272
- label="Voice"
273
- )
274
-
275
- speed = gr.Slider(
276
- minimum=config['speed_range']['min'],
277
- maximum=config['speed_range']['max'],
278
- value=1.0,
279
- step=0.1,
280
- label="Speech Speed"
281
- )
282
-
283
- # Process button
284
- process_btn = gr.Button("πŸš€ Process Audio", variant="primary", size="lg")
285
-
286
- # Status message
287
- status_output = gr.Markdown(label="Status")
288
-
289
- # Results section
290
- with gr.Row():
291
- with gr.Column(scale=2):
292
- # Text outputs
293
- original_text = gr.Textbox(
294
- label="Recognition Results",
295
- lines=4,
296
- max_lines=8,
297
- interactive=False
298
- )
299
-
300
- translated_text = gr.Textbox(
301
- label="Translation Results",
302
- lines=4,
303
- max_lines=8,
304
- interactive=False
305
- )
306
-
307
- # Processing details
308
- with gr.Accordion("Processing Details", open=False):
309
- processing_details = gr.Code(
310
- label="Metadata",
311
- language="json",
312
- interactive=False
313
- )
314
-
315
- with gr.Column(scale=1):
316
- # Audio output
317
- audio_output = gr.Audio(
318
- label="Audio Output",
319
- interactive=False
320
- )
321
 
322
- # Wire up the processing function
323
- def process_wrapper(audio_file, asr_model_val, target_lang_val, voice_val, speed_val):
324
- # Map display language to code
325
- target_lang_code = language_options.get(target_lang_val, "zh")
326
-
327
- return process_audio_pipeline(
328
- audio_file=audio_file,
329
- asr_model=asr_model_val,
330
- target_language=target_lang_code,
331
- voice=voice_val,
332
- speed=speed_val,
333
- source_language="en"
334
- )
335
-
336
- process_btn.click(
337
- fn=process_wrapper,
338
- inputs=[audio_input, asr_model, target_language, voice, speed],
339
- outputs=[status_output, original_text, translated_text, audio_output, processing_details]
340
- )
341
-
342
- # Add examples if needed
343
- gr.Examples(
344
- examples=[],
345
- inputs=[audio_input, asr_model, target_language, voice, speed],
346
- label="Example Configurations"
347
  )
348
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
349
  return interface
350
 
351
  def main():
 
200
  return error_msg, "", "", None, f"System Error: {str(e)}"
201
 
202
  def create_interface():
203
+ """Create and configure the Gradio interface using gr.Interface for better compatibility"""
204
 
205
  # Initialize application
206
  initialize_application()
 
208
  # Get supported configurations
209
  config = get_supported_configurations()
210
 
 
 
 
211
  # Language options mapping
212
  language_options = {
213
  "Chinese (Mandarin)": "zh",
 
217
  "English": "en"
218
  }
219
 
220
+ def process_wrapper(audio_file, asr_model_val, target_lang_val, voice_val, speed_val):
221
+ """Wrapper function for processing"""
222
+ # Map display language to code
223
+ target_lang_code = language_options.get(target_lang_val, "zh")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
 
225
+ return process_audio_pipeline(
226
+ audio_file=audio_file,
227
+ asr_model=asr_model_val,
228
+ target_language=target_lang_code,
229
+ voice=voice_val,
230
+ speed=speed_val,
231
+ source_language="en"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
  )
233
 
234
+ # Create the interface using gr.Interface for better compatibility
235
+ interface = gr.Interface(
236
+ fn=process_wrapper,
237
+ inputs=[
238
+ gr.Audio(label="Upload Audio File", type="filepath"),
239
+ gr.Dropdown(
240
+ choices=config['asr_models'],
241
+ value=config['asr_models'][0] if config['asr_models'] else "parakeet",
242
+ label="Speech Recognition Model"
243
+ ),
244
+ gr.Dropdown(
245
+ choices=list(language_options.keys()),
246
+ value="Chinese (Mandarin)",
247
+ label="Target Language"
248
+ ),
249
+ gr.Dropdown(
250
+ choices=config['voices'],
251
+ value="kokoro",
252
+ label="Voice"
253
+ ),
254
+ gr.Slider(
255
+ minimum=config['speed_range']['min'],
256
+ maximum=config['speed_range']['max'],
257
+ value=1.0,
258
+ step=0.1,
259
+ label="Speech Speed"
260
+ )
261
+ ],
262
+ outputs=[
263
+ gr.Textbox(label="Status"),
264
+ gr.Textbox(label="Recognition Results"),
265
+ gr.Textbox(label="Translation Results"),
266
+ gr.Audio(label="Audio Output"),
267
+ gr.Code(label="Processing Details", language="json")
268
+ ],
269
+ title="🎧 High-Quality Audio Translation System",
270
+ description="Upload English Audio β†’ Get Chinese Speech Output",
271
+ examples=[
272
+ # Add example configurations if needed
273
+ ]
274
+ )
275
+
276
  return interface
277
 
278
  def main():