Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -281,15 +281,22 @@ def transcribe_video_with_speakers(video_path):
|
|
281 |
|
282 |
return transcript_with_speakers, detected_language
|
283 |
|
284 |
-
def segment_audio_from_video(video_path):
|
285 |
# Extract audio from video
|
286 |
video = VideoFileClip(video_path)
|
287 |
audio_path = "audio.wav"
|
288 |
video.audio.write_audiofile(audio_path)
|
289 |
logger.info(f"Audio extracted from video: {audio_path}")
|
290 |
|
291 |
-
segment_result
|
292 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
293 |
|
294 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
295 |
logger.info(f"Using device: {device}")
|
@@ -1333,7 +1340,8 @@ def calibrated_speed(text, desired_duration):
|
|
1333 |
slope = (1.7 - 1.0) / (25.2 - 14)
|
1334 |
return 1.0 + slope * (cps - 14)
|
1335 |
|
1336 |
-
|
|
|
1337 |
if file is None:
|
1338 |
logger.info("No file uploaded. Please upload a video/audio file.")
|
1339 |
return None, [], None, "No file uploaded. Please upload a video/audio file."
|
@@ -1343,7 +1351,7 @@ def upload_and_manage(file, target_language, process_mode):
|
|
1343 |
logger.info(f"Started processing file: {file.name}")
|
1344 |
|
1345 |
# Define paths for audio and output files
|
1346 |
-
audio_path = "audio.wav"
|
1347 |
output_video_path = "output_video.mp4"
|
1348 |
voiceover_path = "voiceover.wav"
|
1349 |
translated_json_filepath = "translated_output.json"
|
@@ -1352,7 +1360,11 @@ def upload_and_manage(file, target_language, process_mode):
|
|
1352 |
|
1353 |
# Step 1: Segment audio from the uploaded video/audio file
|
1354 |
logger.info("Segmenting audio...")
|
1355 |
-
|
|
|
|
|
|
|
|
|
1356 |
if not speech_segments:
|
1357 |
raise Exception("No speech segments detected in the audio.")
|
1358 |
logger.info(f"Audio segmentation completed. Found {len(speech_segments)} segments.")
|
@@ -1386,7 +1398,7 @@ def upload_and_manage(file, target_language, process_mode):
|
|
1386 |
with open(translated_json_filepath, "w", encoding="utf-8") as f:
|
1387 |
json.dump(translated_json, f, ensure_ascii=False, indent=4)
|
1388 |
logger.info(f"Translated JSON saved to {translated_json_filepath}")
|
1389 |
-
|
1390 |
# Step 3: Add transcript to video based on timestamps
|
1391 |
logger.info("Adding translated transcript to video...")
|
1392 |
add_transcript_voiceover(file.name, translated_json, output_video_path, process_mode, target_language, background_audio_path = background_audio_path)
|
@@ -1430,7 +1442,15 @@ def build_interface():
|
|
1430 |
process_mode = gr.Radio(choices=[("Transcription Only", 1),
|
1431 |
("Transcription with Premium Voice", 2),
|
1432 |
("Transcription with Voice Clone", 3)],
|
1433 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1434 |
submit_button = gr.Button("Post and Process")
|
1435 |
with gr.Column(scale=8):
|
1436 |
gr.Markdown("## Edit Translations")
|
@@ -1475,7 +1495,7 @@ def build_interface():
|
|
1475 |
)
|
1476 |
submit_button.click(
|
1477 |
upload_and_manage,
|
1478 |
-
inputs=[file_input, language_input, process_mode],
|
1479 |
outputs=[editable_table, processed_video_output, translated_json_download, elapsed_time_display]
|
1480 |
)
|
1481 |
# Connect submit button to save_feedback_db function
|
@@ -1489,4 +1509,4 @@ def build_interface():
|
|
1489 |
tts_model = None
|
1490 |
# Launch the Gradio interface
|
1491 |
demo = build_interface()
|
1492 |
-
demo.launch()
|
|
|
281 |
|
282 |
return transcript_with_speakers, detected_language
|
283 |
|
284 |
+
def segment_audio_from_video(video_path, separate_background = True):
|
285 |
# Extract audio from video
|
286 |
video = VideoFileClip(video_path)
|
287 |
audio_path = "audio.wav"
|
288 |
video.audio.write_audiofile(audio_path)
|
289 |
logger.info(f"Audio extracted from video: {audio_path}")
|
290 |
|
291 |
+
segment_result = None
|
292 |
+
speech_audio_path = audio_path
|
293 |
+
|
294 |
+
if separate_background:
|
295 |
+
# Assuming segment_background_audio returns a tuple (segment_result, speech_audio_path)
|
296 |
+
segment_result, speech_audio_path = segment_background_audio(audio_path)
|
297 |
+
print(f"Saved non-speech (background) audio to local")
|
298 |
+
else:
|
299 |
+
logger.info("Background audio separation skipped as per separate_background=False.")
|
300 |
|
301 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
302 |
logger.info(f"Using device: {device}")
|
|
|
1340 |
slope = (1.7 - 1.0) / (25.2 - 14)
|
1341 |
return 1.0 + slope * (cps - 14)
|
1342 |
|
1343 |
+
# Modified upload_and_manage function
|
1344 |
+
def upload_and_manage(file, target_language, process_mode, separate_background_audio): # Added separate_background_audio
|
1345 |
if file is None:
|
1346 |
logger.info("No file uploaded. Please upload a video/audio file.")
|
1347 |
return None, [], None, "No file uploaded. Please upload a video/audio file."
|
|
|
1351 |
logger.info(f"Started processing file: {file.name}")
|
1352 |
|
1353 |
# Define paths for audio and output files
|
1354 |
+
audio_path = "audio.wav" # This will be the full extracted audio
|
1355 |
output_video_path = "output_video.mp4"
|
1356 |
voiceover_path = "voiceover.wav"
|
1357 |
translated_json_filepath = "translated_output.json"
|
|
|
1360 |
|
1361 |
# Step 1: Segment audio from the uploaded video/audio file
|
1362 |
logger.info("Segmenting audio...")
|
1363 |
+
# Pass the separate_background_audio boolean from the Gradio input
|
1364 |
+
temp_audio_for_vad, background_audio_path, speech_segments = segment_audio_from_video(
|
1365 |
+
file.name,
|
1366 |
+
separate_background=separate_background_audio
|
1367 |
+
)
|
1368 |
if not speech_segments:
|
1369 |
raise Exception("No speech segments detected in the audio.")
|
1370 |
logger.info(f"Audio segmentation completed. Found {len(speech_segments)} segments.")
|
|
|
1398 |
with open(translated_json_filepath, "w", encoding="utf-8") as f:
|
1399 |
json.dump(translated_json, f, ensure_ascii=False, indent=4)
|
1400 |
logger.info(f"Translated JSON saved to {translated_json_filepath}")
|
1401 |
+
|
1402 |
# Step 3: Add transcript to video based on timestamps
|
1403 |
logger.info("Adding translated transcript to video...")
|
1404 |
add_transcript_voiceover(file.name, translated_json, output_video_path, process_mode, target_language, background_audio_path = background_audio_path)
|
|
|
1442 |
process_mode = gr.Radio(choices=[("Transcription Only", 1),
|
1443 |
("Transcription with Premium Voice", 2),
|
1444 |
("Transcription with Voice Clone", 3)],
|
1445 |
+
label="Choose Processing Type", value=1)
|
1446 |
+
|
1447 |
+
# New Gradio Checkbox for background audio separation
|
1448 |
+
separate_background_checkbox = gr.Checkbox(
|
1449 |
+
label="Separate Background Audio (Recommended)",
|
1450 |
+
value=True, # Default to True
|
1451 |
+
interactive=True
|
1452 |
+
)
|
1453 |
+
|
1454 |
submit_button = gr.Button("Post and Process")
|
1455 |
with gr.Column(scale=8):
|
1456 |
gr.Markdown("## Edit Translations")
|
|
|
1495 |
)
|
1496 |
submit_button.click(
|
1497 |
upload_and_manage,
|
1498 |
+
inputs=[file_input, language_input, process_mode, separate_background_checkbox], # Add checkbox as input
|
1499 |
outputs=[editable_table, processed_video_output, translated_json_download, elapsed_time_display]
|
1500 |
)
|
1501 |
# Connect submit button to save_feedback_db function
|
|
|
1509 |
tts_model = None
|
1510 |
# Launch the Gradio interface
|
1511 |
demo = build_interface()
|
1512 |
+
demo.launch()
|