qqwjq1981 commited on
Commit
281e1f6
·
verified ·
1 Parent(s): 3f4f0cf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -5
app.py CHANGED
@@ -605,6 +605,9 @@ def solve_optimal_alignment(original_segments, generated_durations, total_durati
605
  d = np.array(generated_durations)
606
  m = np.array([(seg['start'] + seg['end']) / 2 for seg in original_segments])
607
 
 
 
 
608
  try:
609
  s = cp.Variable(N)
610
  objective = cp.Minimize(cp.sum_squares(s + d / 2 - m))
@@ -1043,7 +1046,6 @@ def process_entry(entry, i, tts_model, video_width, video_height, process_mode,
1043
 
1044
  return i, txt_clip, audio_segment, actual_duration, error_message
1045
 
1046
-
1047
  def add_transcript_voiceover(video_path, translated_json, output_path, process_mode, target_language="en", speaker_sample_paths=None, background_audio_path="background_segments.wav"):
1048
 
1049
  video = VideoFileClip(video_path)
@@ -1082,10 +1084,13 @@ def add_transcript_voiceover(video_path, translated_json, output_path, process_m
1082
  err = f"❌ Unexpected error in future result: {e}"
1083
  error_messages.append(err)
1084
 
 
1085
  results.sort(key=lambda x: x[0])
1086
- text_clips = [clip for _, clip, _, _ in results if clip]
1087
- generated_durations = [dur for _, _, _, dur in results if dur > 0]
1088
-
 
 
1089
  # Align using optimization (modifies translated_json in-place)
1090
  translated_json = solve_optimal_alignment(translated_json, generated_durations, video.duration)
1091
 
@@ -1306,7 +1311,8 @@ def upload_and_manage(file, target_language, process_mode):
1306
  transcription_json, source_language = transcribe_video_with_speakers_11labs(file.name)
1307
  logger.info(f"Transcription completed. Detected source language: {source_language}")
1308
 
1309
- transcription_json_merged = post_edit_transcribed_segments(transcription_json, file.name, source_language)
 
1310
  # Step 2: Translate the transcription
1311
  logger.info(f"Translating transcription from {source_language} to {target_language}...")
1312
  translated_json_raw = translate_text(transcription_json_merged, source_language, target_language)
 
605
  d = np.array(generated_durations)
606
  m = np.array([(seg['start'] + seg['end']) / 2 for seg in original_segments])
607
 
608
+ if N == 0 or len(generated_durations) == 0:
609
+ logger.warning("⚠️ Alignment skipped: empty segments or durations.")
610
+ return original_segments # or raise an error, depending on your app logic
611
  try:
612
  s = cp.Variable(N)
613
  objective = cp.Minimize(cp.sum_squares(s + d / 2 - m))
 
1046
 
1047
  return i, txt_clip, audio_segment, actual_duration, error_message
1048
 
 
1049
  def add_transcript_voiceover(video_path, translated_json, output_path, process_mode, target_language="en", speaker_sample_paths=None, background_audio_path="background_segments.wav"):
1050
 
1051
  video = VideoFileClip(video_path)
 
1084
  err = f"❌ Unexpected error in future result: {e}"
1085
  error_messages.append(err)
1086
 
1087
+ # Sort and filter together
1088
  results.sort(key=lambda x: x[0])
1089
+ filtered = [(translated_json[i], txt, aud, dur) for i, txt, aud, dur in results if dur > 0]
1090
+
1091
+ translated_json = [entry for entry, _, _, _ in filtered]
1092
+ generated_durations = [dur for _, _, _, dur in filtered]
1093
+
1094
  # Align using optimization (modifies translated_json in-place)
1095
  translated_json = solve_optimal_alignment(translated_json, generated_durations, video.duration)
1096
 
 
1311
  transcription_json, source_language = transcribe_video_with_speakers_11labs(file.name)
1312
  logger.info(f"Transcription completed. Detected source language: {source_language}")
1313
 
1314
+ transcription_json_merged = transcription_json
1315
+ #post_edit_transcribed_segments(transcription_json, file.name, source_language)
1316
  # Step 2: Translate the transcription
1317
  logger.info(f"Translating transcription from {source_language} to {target_language}...")
1318
  translated_json_raw = translate_text(transcription_json_merged, source_language, target_language)