qqwjq1981 commited on
Commit
13e8c37
·
verified ·
1 Parent(s): ddd2e4a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -47
app.py CHANGED
@@ -985,53 +985,6 @@ def merge_speaker_and_time_from_whisperx(
985
 
986
  return merged
987
 
988
- # def merge_speaker_and_time_from_whisperx(ocr_json, whisperx_json, text_sim_threshold=80, replace_threshold=90):
989
- # merged = []
990
- # used_whisperx = set()
991
-
992
- # for ocr in ocr_json:
993
- # ocr_start = ocr["start"]
994
- # ocr_end = ocr["end"]
995
- # ocr_text = ocr["text"]
996
-
997
- # best_match = None
998
- # best_score = -1
999
- # best_idx = None
1000
-
1001
- # for idx, wx in enumerate(whisperx_json):
1002
- # wx_start, wx_end = wx["start"], wx["end"]
1003
- # wx_text = wx["text"]
1004
-
1005
- # if idx in used_whisperx:
1006
- # continue # Already matched
1007
-
1008
- # time_center_diff = abs((ocr_start + ocr_end)/2 - (wx_start + wx_end)/2)
1009
- # if time_center_diff > 3:
1010
- # continue
1011
-
1012
- # sim = fuzz.ratio(ocr_text, wx_text)
1013
- # if sim > best_score:
1014
- # best_score = sim
1015
- # best_match = wx
1016
- # best_idx = idx
1017
-
1018
- # new_entry = copy.deepcopy(ocr)
1019
- # if best_match:
1020
- # new_entry["speaker"] = best_match.get("speaker", "UNKNOWN")
1021
- # new_entry["ocr_similarity"] = best_score
1022
-
1023
- # if best_score >= replace_threshold:
1024
- # new_entry["start"] = best_match["start"]
1025
- # new_entry["end"] = best_match["end"]
1026
- # used_whisperx.add(best_idx) # Mark used
1027
-
1028
- # else:
1029
- # new_entry["speaker"] = "UNKNOWN"
1030
- # new_entry["ocr_similarity"] = None
1031
-
1032
- # merged.append(new_entry)
1033
- # return merged
1034
-
1035
  def realign_ocr_segments(merged_ocr_json, min_gap=0.2):
1036
  """
1037
  Realign OCR segments to avoid overlaps using midpoint-based adjustment.
@@ -1166,6 +1119,8 @@ def add_transcript_voiceover(video_path, translated_json, output_path, process_m
1166
 
1167
  # Sort and filter together
1168
  results.sort(key=lambda x: x[0])
 
 
1169
  filtered = [(translated_json[i], txt, aud, dur) for i, txt, aud, dur in results if dur > 0]
1170
 
1171
  translated_json = [entry for entry, _, _, _ in filtered]
 
985
 
986
  return merged
987
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
988
  def realign_ocr_segments(merged_ocr_json, min_gap=0.2):
989
  """
990
  Realign OCR segments to avoid overlaps using midpoint-based adjustment.
 
1119
 
1120
  # Sort and filter together
1121
  results.sort(key=lambda x: x[0])
1122
+ text_clips = [clip for _, clip, _, _ in results if clip]
1123
+
1124
  filtered = [(translated_json[i], txt, aud, dur) for i, txt, aud, dur in results if dur > 0]
1125
 
1126
  translated_json = [entry for entry, _, _, _ in filtered]