qqwjq1981 commited on
Commit
306be63
·
verified ·
1 Parent(s): 6c15ec0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -10
app.py CHANGED
@@ -281,15 +281,22 @@ def transcribe_video_with_speakers(video_path):
281
 
282
  return transcript_with_speakers, detected_language
283
 
284
- def segment_audio_from_video(video_path):
285
  # Extract audio from video
286
  video = VideoFileClip(video_path)
287
  audio_path = "audio.wav"
288
  video.audio.write_audiofile(audio_path)
289
  logger.info(f"Audio extracted from video: {audio_path}")
290
 
291
- segment_result, speech_audio_path = segment_background_audio(audio_path)
292
- print(f"Saved non-speech (background) audio to local")
 
 
 
 
 
 
 
293
 
294
  device = "cuda" if torch.cuda.is_available() else "cpu"
295
  logger.info(f"Using device: {device}")
@@ -1333,7 +1340,8 @@ def calibrated_speed(text, desired_duration):
1333
  slope = (1.7 - 1.0) / (25.2 - 14)
1334
  return 1.0 + slope * (cps - 14)
1335
 
1336
- def upload_and_manage(file, target_language, process_mode):
 
1337
  if file is None:
1338
  logger.info("No file uploaded. Please upload a video/audio file.")
1339
  return None, [], None, "No file uploaded. Please upload a video/audio file."
@@ -1343,7 +1351,7 @@ def upload_and_manage(file, target_language, process_mode):
1343
  logger.info(f"Started processing file: {file.name}")
1344
 
1345
  # Define paths for audio and output files
1346
- audio_path = "audio.wav"
1347
  output_video_path = "output_video.mp4"
1348
  voiceover_path = "voiceover.wav"
1349
  translated_json_filepath = "translated_output.json"
@@ -1352,7 +1360,11 @@ def upload_and_manage(file, target_language, process_mode):
1352
 
1353
  # Step 1: Segment audio from the uploaded video/audio file
1354
  logger.info("Segmenting audio...")
1355
- temp_audio_for_vad, background_audio_path, speech_segments = segment_audio_from_video(file.name)
 
 
 
 
1356
  if not speech_segments:
1357
  raise Exception("No speech segments detected in the audio.")
1358
  logger.info(f"Audio segmentation completed. Found {len(speech_segments)} segments.")
@@ -1386,7 +1398,7 @@ def upload_and_manage(file, target_language, process_mode):
1386
  with open(translated_json_filepath, "w", encoding="utf-8") as f:
1387
  json.dump(translated_json, f, ensure_ascii=False, indent=4)
1388
  logger.info(f"Translated JSON saved to {translated_json_filepath}")
1389
-
1390
  # Step 3: Add transcript to video based on timestamps
1391
  logger.info("Adding translated transcript to video...")
1392
  add_transcript_voiceover(file.name, translated_json, output_video_path, process_mode, target_language, background_audio_path = background_audio_path)
@@ -1430,7 +1442,15 @@ def build_interface():
1430
  process_mode = gr.Radio(choices=[("Transcription Only", 1),
1431
  ("Transcription with Premium Voice", 2),
1432
  ("Transcription with Voice Clone", 3)],
1433
- label="Choose Processing Type", value=1)
 
 
 
 
 
 
 
 
1434
  submit_button = gr.Button("Post and Process")
1435
  with gr.Column(scale=8):
1436
  gr.Markdown("## Edit Translations")
@@ -1475,7 +1495,7 @@ def build_interface():
1475
  )
1476
  submit_button.click(
1477
  upload_and_manage,
1478
- inputs=[file_input, language_input, process_mode],
1479
  outputs=[editable_table, processed_video_output, translated_json_download, elapsed_time_display]
1480
  )
1481
  # Connect submit button to save_feedback_db function
@@ -1489,4 +1509,4 @@ def build_interface():
1489
  tts_model = None
1490
  # Launch the Gradio interface
1491
  demo = build_interface()
1492
- demo.launch()
 
281
 
282
  return transcript_with_speakers, detected_language
283
 
284
+ def segment_audio_from_video(video_path, separate_background = True):
285
  # Extract audio from video
286
  video = VideoFileClip(video_path)
287
  audio_path = "audio.wav"
288
  video.audio.write_audiofile(audio_path)
289
  logger.info(f"Audio extracted from video: {audio_path}")
290
 
291
+ segment_result = None
292
+ speech_audio_path = audio_path
293
+
294
+ if separate_background:
295
+ # Assuming segment_background_audio returns a tuple (segment_result, speech_audio_path)
296
+ segment_result, speech_audio_path = segment_background_audio(audio_path)
297
+ print(f"Saved non-speech (background) audio to local")
298
+ else:
299
+ logger.info("Background audio separation skipped as per separate_background=False.")
300
 
301
  device = "cuda" if torch.cuda.is_available() else "cpu"
302
  logger.info(f"Using device: {device}")
 
1340
  slope = (1.7 - 1.0) / (25.2 - 14)
1341
  return 1.0 + slope * (cps - 14)
1342
 
1343
+ # Modified upload_and_manage function
1344
+ def upload_and_manage(file, target_language, process_mode, separate_background_audio): # Added separate_background_audio
1345
  if file is None:
1346
  logger.info("No file uploaded. Please upload a video/audio file.")
1347
  return None, [], None, "No file uploaded. Please upload a video/audio file."
 
1351
  logger.info(f"Started processing file: {file.name}")
1352
 
1353
  # Define paths for audio and output files
1354
+ audio_path = "audio.wav" # This will be the full extracted audio
1355
  output_video_path = "output_video.mp4"
1356
  voiceover_path = "voiceover.wav"
1357
  translated_json_filepath = "translated_output.json"
 
1360
 
1361
  # Step 1: Segment audio from the uploaded video/audio file
1362
  logger.info("Segmenting audio...")
1363
+ # Pass the separate_background_audio boolean from the Gradio input
1364
+ temp_audio_for_vad, background_audio_path, speech_segments = segment_audio_from_video(
1365
+ file.name,
1366
+ separate_background=separate_background_audio
1367
+ )
1368
  if not speech_segments:
1369
  raise Exception("No speech segments detected in the audio.")
1370
  logger.info(f"Audio segmentation completed. Found {len(speech_segments)} segments.")
 
1398
  with open(translated_json_filepath, "w", encoding="utf-8") as f:
1399
  json.dump(translated_json, f, ensure_ascii=False, indent=4)
1400
  logger.info(f"Translated JSON saved to {translated_json_filepath}")
1401
+
1402
  # Step 3: Add transcript to video based on timestamps
1403
  logger.info("Adding translated transcript to video...")
1404
  add_transcript_voiceover(file.name, translated_json, output_video_path, process_mode, target_language, background_audio_path = background_audio_path)
 
1442
  process_mode = gr.Radio(choices=[("Transcription Only", 1),
1443
  ("Transcription with Premium Voice", 2),
1444
  ("Transcription with Voice Clone", 3)],
1445
+ label="Choose Processing Type", value=1)
1446
+
1447
+ # New Gradio Checkbox for background audio separation
1448
+ separate_background_checkbox = gr.Checkbox(
1449
+ label="Separate Background Audio (Recommended)",
1450
+ value=True, # Default to True
1451
+ interactive=True
1452
+ )
1453
+
1454
  submit_button = gr.Button("Post and Process")
1455
  with gr.Column(scale=8):
1456
  gr.Markdown("## Edit Translations")
 
1495
  )
1496
  submit_button.click(
1497
  upload_and_manage,
1498
+ inputs=[file_input, language_input, process_mode, separate_background_checkbox], # Add checkbox as input
1499
  outputs=[editable_table, processed_video_output, translated_json_download, elapsed_time_display]
1500
  )
1501
  # Connect submit button to save_feedback_db function
 
1509
  tts_model = None
1510
  # Launch the Gradio interface
1511
  demo = build_interface()
1512
+ demo.launch()