Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -985,53 +985,6 @@ def merge_speaker_and_time_from_whisperx(
|
|
985 |
|
986 |
return merged
|
987 |
|
988 |
-
# def merge_speaker_and_time_from_whisperx(ocr_json, whisperx_json, text_sim_threshold=80, replace_threshold=90):
|
989 |
-
# merged = []
|
990 |
-
# used_whisperx = set()
|
991 |
-
|
992 |
-
# for ocr in ocr_json:
|
993 |
-
# ocr_start = ocr["start"]
|
994 |
-
# ocr_end = ocr["end"]
|
995 |
-
# ocr_text = ocr["text"]
|
996 |
-
|
997 |
-
# best_match = None
|
998 |
-
# best_score = -1
|
999 |
-
# best_idx = None
|
1000 |
-
|
1001 |
-
# for idx, wx in enumerate(whisperx_json):
|
1002 |
-
# wx_start, wx_end = wx["start"], wx["end"]
|
1003 |
-
# wx_text = wx["text"]
|
1004 |
-
|
1005 |
-
# if idx in used_whisperx:
|
1006 |
-
# continue # Already matched
|
1007 |
-
|
1008 |
-
# time_center_diff = abs((ocr_start + ocr_end)/2 - (wx_start + wx_end)/2)
|
1009 |
-
# if time_center_diff > 3:
|
1010 |
-
# continue
|
1011 |
-
|
1012 |
-
# sim = fuzz.ratio(ocr_text, wx_text)
|
1013 |
-
# if sim > best_score:
|
1014 |
-
# best_score = sim
|
1015 |
-
# best_match = wx
|
1016 |
-
# best_idx = idx
|
1017 |
-
|
1018 |
-
# new_entry = copy.deepcopy(ocr)
|
1019 |
-
# if best_match:
|
1020 |
-
# new_entry["speaker"] = best_match.get("speaker", "UNKNOWN")
|
1021 |
-
# new_entry["ocr_similarity"] = best_score
|
1022 |
-
|
1023 |
-
# if best_score >= replace_threshold:
|
1024 |
-
# new_entry["start"] = best_match["start"]
|
1025 |
-
# new_entry["end"] = best_match["end"]
|
1026 |
-
# used_whisperx.add(best_idx) # Mark used
|
1027 |
-
|
1028 |
-
# else:
|
1029 |
-
# new_entry["speaker"] = "UNKNOWN"
|
1030 |
-
# new_entry["ocr_similarity"] = None
|
1031 |
-
|
1032 |
-
# merged.append(new_entry)
|
1033 |
-
# return merged
|
1034 |
-
|
1035 |
def realign_ocr_segments(merged_ocr_json, min_gap=0.2):
|
1036 |
"""
|
1037 |
Realign OCR segments to avoid overlaps using midpoint-based adjustment.
|
@@ -1166,6 +1119,8 @@ def add_transcript_voiceover(video_path, translated_json, output_path, process_m
|
|
1166 |
|
1167 |
# Sort and filter together
|
1168 |
results.sort(key=lambda x: x[0])
|
|
|
|
|
1169 |
filtered = [(translated_json[i], txt, aud, dur) for i, txt, aud, dur in results if dur > 0]
|
1170 |
|
1171 |
translated_json = [entry for entry, _, _, _ in filtered]
|
|
|
985 |
|
986 |
return merged
|
987 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
988 |
def realign_ocr_segments(merged_ocr_json, min_gap=0.2):
|
989 |
"""
|
990 |
Realign OCR segments to avoid overlaps using midpoint-based adjustment.
|
|
|
1119 |
|
1120 |
# Sort and filter together
|
1121 |
results.sort(key=lambda x: x[0])
|
1122 |
+
text_clips = [clip for _, clip, _, _ in results if clip]
|
1123 |
+
|
1124 |
filtered = [(translated_json[i], txt, aud, dur) for i, txt, aud, dur in results if dur > 0]
|
1125 |
|
1126 |
translated_json = [entry for entry, _, _, _ in filtered]
|