Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -300,7 +300,7 @@ def segment_audio_from_video(video_path):
|
|
300 |
logger.info("Audio transcription completed")
|
301 |
except Exception as e:
|
302 |
logger.error(f"❌ WhisperX pipeline failed: {e}")
|
303 |
-
return audio_path, []
|
304 |
|
305 |
# Return segment boundaries (only timestamps, not text)
|
306 |
transcript_with_speakers = [
|
@@ -312,7 +312,7 @@ def segment_audio_from_video(video_path):
|
|
312 |
if segment["end"] > segment["start"]
|
313 |
]
|
314 |
|
315 |
-
return audio_path, transcript_with_speakers
|
316 |
|
317 |
def clean_transcribed_text(text: str) -> str:
|
318 |
"""
|
@@ -1388,7 +1388,7 @@ def upload_and_manage(file, target_language, process_mode):
|
|
1388 |
|
1389 |
# Step 1: Segment audio from the uploaded video/audio file
|
1390 |
logger.info("Segmenting audio...")
|
1391 |
-
temp_audio_for_vad, speech_segments = segment_audio_from_video(file.name)
|
1392 |
if not speech_segments:
|
1393 |
raise Exception("No speech segments detected in the audio.")
|
1394 |
logger.info(f"Audio segmentation completed. Found {len(speech_segments)} segments.")
|
@@ -1417,7 +1417,7 @@ def upload_and_manage(file, target_language, process_mode):
|
|
1417 |
|
1418 |
# Step 3: Add transcript to video based on timestamps
|
1419 |
logger.info("Adding translated transcript to video...")
|
1420 |
-
add_transcript_voiceover(file.name, translated_json, output_video_path, process_mode, target_language)
|
1421 |
logger.info(f"Transcript added to video. Output video saved at {output_video_path}")
|
1422 |
|
1423 |
# Convert translated JSON into a format for the editable table
|
|
|
300 |
logger.info("Audio transcription completed")
|
301 |
except Exception as e:
|
302 |
logger.error(f"❌ WhisperX pipeline failed: {e}")
|
303 |
+
return audio_path, segment_result, []
|
304 |
|
305 |
# Return segment boundaries (only timestamps, not text)
|
306 |
transcript_with_speakers = [
|
|
|
312 |
if segment["end"] > segment["start"]
|
313 |
]
|
314 |
|
315 |
+
return audio_path, segment_result, transcript_with_speakers
|
316 |
|
317 |
def clean_transcribed_text(text: str) -> str:
|
318 |
"""
|
|
|
1388 |
|
1389 |
# Step 1: Segment audio from the uploaded video/audio file
|
1390 |
logger.info("Segmenting audio...")
|
1391 |
+
temp_audio_for_vad, background_audio_path, speech_segments = segment_audio_from_video(file.name)
|
1392 |
if not speech_segments:
|
1393 |
raise Exception("No speech segments detected in the audio.")
|
1394 |
logger.info(f"Audio segmentation completed. Found {len(speech_segments)} segments.")
|
|
|
1417 |
|
1418 |
# Step 3: Add transcript to video based on timestamps
|
1419 |
logger.info("Adding translated transcript to video...")
|
1420 |
+
add_transcript_voiceover(file.name, translated_json, output_video_path, process_mode, target_language, background_audio_path = background_audio_path)
|
1421 |
logger.info(f"Transcript added to video. Output video saved at {output_video_path}")
|
1422 |
|
1423 |
# Convert translated JSON into a format for the editable table
|