Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -605,6 +605,9 @@ def solve_optimal_alignment(original_segments, generated_durations, total_durati
|
|
605 |
d = np.array(generated_durations)
|
606 |
m = np.array([(seg['start'] + seg['end']) / 2 for seg in original_segments])
|
607 |
|
|
|
|
|
|
|
608 |
try:
|
609 |
s = cp.Variable(N)
|
610 |
objective = cp.Minimize(cp.sum_squares(s + d / 2 - m))
|
@@ -1043,7 +1046,6 @@ def process_entry(entry, i, tts_model, video_width, video_height, process_mode,
|
|
1043 |
|
1044 |
return i, txt_clip, audio_segment, actual_duration, error_message
|
1045 |
|
1046 |
-
|
1047 |
def add_transcript_voiceover(video_path, translated_json, output_path, process_mode, target_language="en", speaker_sample_paths=None, background_audio_path="background_segments.wav"):
|
1048 |
|
1049 |
video = VideoFileClip(video_path)
|
@@ -1082,10 +1084,13 @@ def add_transcript_voiceover(video_path, translated_json, output_path, process_m
|
|
1082 |
err = f"❌ Unexpected error in future result: {e}"
|
1083 |
error_messages.append(err)
|
1084 |
|
|
|
1085 |
results.sort(key=lambda x: x[0])
|
1086 |
-
|
1087 |
-
|
1088 |
-
|
|
|
|
|
1089 |
# Align using optimization (modifies translated_json in-place)
|
1090 |
translated_json = solve_optimal_alignment(translated_json, generated_durations, video.duration)
|
1091 |
|
@@ -1306,7 +1311,8 @@ def upload_and_manage(file, target_language, process_mode):
|
|
1306 |
transcription_json, source_language = transcribe_video_with_speakers_11labs(file.name)
|
1307 |
logger.info(f"Transcription completed. Detected source language: {source_language}")
|
1308 |
|
1309 |
-
transcription_json_merged =
|
|
|
1310 |
# Step 2: Translate the transcription
|
1311 |
logger.info(f"Translating transcription from {source_language} to {target_language}...")
|
1312 |
translated_json_raw = translate_text(transcription_json_merged, source_language, target_language)
|
|
|
605 |
d = np.array(generated_durations)
|
606 |
m = np.array([(seg['start'] + seg['end']) / 2 for seg in original_segments])
|
607 |
|
608 |
+
if N == 0 or len(generated_durations) == 0:
|
609 |
+
logger.warning("⚠️ Alignment skipped: empty segments or durations.")
|
610 |
+
return original_segments # or raise an error, depending on your app logic
|
611 |
try:
|
612 |
s = cp.Variable(N)
|
613 |
objective = cp.Minimize(cp.sum_squares(s + d / 2 - m))
|
|
|
1046 |
|
1047 |
return i, txt_clip, audio_segment, actual_duration, error_message
|
1048 |
|
|
|
1049 |
def add_transcript_voiceover(video_path, translated_json, output_path, process_mode, target_language="en", speaker_sample_paths=None, background_audio_path="background_segments.wav"):
|
1050 |
|
1051 |
video = VideoFileClip(video_path)
|
|
|
1084 |
err = f"❌ Unexpected error in future result: {e}"
|
1085 |
error_messages.append(err)
|
1086 |
|
1087 |
+
# Sort and filter together
|
1088 |
results.sort(key=lambda x: x[0])
|
1089 |
+
filtered = [(translated_json[i], txt, aud, dur) for i, txt, aud, dur in results if dur > 0]
|
1090 |
+
|
1091 |
+
translated_json = [entry for entry, _, _, _ in filtered]
|
1092 |
+
generated_durations = [dur for _, _, _, dur in filtered]
|
1093 |
+
|
1094 |
# Align using optimization (modifies translated_json in-place)
|
1095 |
translated_json = solve_optimal_alignment(translated_json, generated_durations, video.duration)
|
1096 |
|
|
|
1311 |
transcription_json, source_language = transcribe_video_with_speakers_11labs(file.name)
|
1312 |
logger.info(f"Transcription completed. Detected source language: {source_language}")
|
1313 |
|
1314 |
+
transcription_json_merged = transcription_json
|
1315 |
+
#post_edit_transcribed_segments(transcription_json, file.name, source_language)
|
1316 |
# Step 2: Translate the transcription
|
1317 |
logger.info(f"Translating transcription from {source_language} to {target_language}...")
|
1318 |
translated_json_raw = translate_text(transcription_json_merged, source_language, target_language)
|