Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -638,30 +638,37 @@ def post_edit_transcribed_segments(transcription_json, video_path,
|
|
| 638 |
updated_entry["ocr_similarity"] = best_score if best_score >= 0 else None
|
| 639 |
|
| 640 |
merged_segments.append(updated_entry)
|
| 641 |
-
|
| 642 |
# Step 4: Insert unused OCR segments (Phase 2)
|
| 643 |
inserted_segments = []
|
| 644 |
for ocr_idx, ocr in enumerate(collapsed_ocr):
|
| 645 |
-
if ocr_idx
|
| 646 |
-
|
| 647 |
-
|
| 648 |
-
|
| 649 |
-
|
| 650 |
-
|
| 651 |
-
|
| 652 |
-
|
| 653 |
-
|
| 654 |
-
|
| 655 |
-
|
| 656 |
-
|
| 657 |
-
|
| 658 |
-
|
| 659 |
-
|
| 660 |
-
|
| 661 |
-
|
| 662 |
-
|
| 663 |
-
|
| 664 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 665 |
|
| 666 |
# Step 5: Combine and sort
|
| 667 |
final_segments = merged_segments + inserted_segments
|
|
|
|
| 638 |
updated_entry["ocr_similarity"] = best_score if best_score >= 0 else None
|
| 639 |
|
| 640 |
merged_segments.append(updated_entry)
|
| 641 |
+
|
| 642 |
# Step 4: Insert unused OCR segments (Phase 2)
|
| 643 |
inserted_segments = []
|
| 644 |
for ocr_idx, ocr in enumerate(collapsed_ocr):
|
| 645 |
+
if ocr_idx in used_ocr_indices:
|
| 646 |
+
continue
|
| 647 |
+
|
| 648 |
+
# Check for fuzzy duplicates in WhisperX
|
| 649 |
+
duplicate = False
|
| 650 |
+
for whisper_seg in transcription_json:
|
| 651 |
+
if abs(ocr["start"] - whisper_seg["start"]) < time_tolerance or abs(ocr["end"] - whisper_seg["end"]) < time_tolerance:
|
| 652 |
+
sim = fuzz.ratio(ocr["text"], whisper_seg["text"])
|
| 653 |
+
if sim >= text_similarity_threshold:
|
| 654 |
+
duplicate = True
|
| 655 |
+
break
|
| 656 |
+
|
| 657 |
+
if duplicate:
|
| 658 |
+
logger.debug(f"🟡 Skipping near-duplicate OCR: '{ocr['text']}'")
|
| 659 |
+
continue
|
| 660 |
+
|
| 661 |
+
# Infer speaker from nearest WhisperX entry
|
| 662 |
+
nearby = sorted(transcription_json, key=lambda x: abs(x["start"] - ocr["start"]))
|
| 663 |
+
speaker_guess = nearby[0].get("speaker", "unknown") if nearby else "unknown"
|
| 664 |
+
|
| 665 |
+
inserted_segment = {
|
| 666 |
+
"start": ocr["start"],
|
| 667 |
+
"end": ocr["end"],
|
| 668 |
+
"text": ocr["text"],
|
| 669 |
+
"speaker": speaker_guess
|
| 670 |
+
}
|
| 671 |
+
inserted_segments.append(inserted_segment)
|
| 672 |
|
| 673 |
# Step 5: Combine and sort
|
| 674 |
final_segments = merged_segments + inserted_segments
|