qqwjq1981 commited on
Commit
1320e5b
·
verified ·
1 Parent(s): c34772c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -12
app.py CHANGED
@@ -482,19 +482,40 @@ def solve_optimal_alignment(original_segments, generated_durations, total_durati
482
  )
483
 
484
  return original_segments
485
-
 
 
 
 
 
 
 
 
 
486
  def ocr_frame_worker(args):
487
  frame_idx, frame_time, frame = args
488
- ocr = PaddleOCR(use_angle_cls=True, lang="ch") # Initialize OCR inside worker
489
- result = ocr.ocr(frame, cls=True)
490
- texts = [line[1][0] for line in result[0]] if result[0] else []
491
- combined_text = " ".join(texts).strip()
492
- return {"time": frame_time, "text": combined_text}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
493
 
494
  def frame_is_in_audio_segments(frame_time, audio_segments, tolerance=0.2):
495
- """
496
- Check if the frame_time falls within any audio segment (plus/minus tolerance).
497
- """
498
  for segment in audio_segments:
499
  start, end = segment["start"], segment["end"]
500
  if (start - tolerance) <= frame_time <= (end + tolerance):
@@ -518,19 +539,20 @@ def extract_ocr_subtitles_parallel(video_path, transcription_json, interval_sec=
518
  cap.release()
519
 
520
  ocr_results = []
521
- with concurrent.futures.ProcessPoolExecutor(max_workers=num_workers) as executor:
522
  futures = [executor.submit(ocr_frame_worker, frame) for frame in frames]
523
-
524
  for f in tqdm(concurrent.futures.as_completed(futures), total=len(futures)):
525
  try:
526
  result = f.result()
527
  if result["text"]:
528
  ocr_results.append(result)
529
  except Exception as e:
530
- print(f"⚠️ OCR failed for a frame: {e}")
531
 
532
  return ocr_results
533
 
 
534
  def collapse_ocr_subtitles(ocr_json, text_similarity_threshold=90):
535
  collapsed = []
536
  current = None
 
482
  )
483
 
484
  return original_segments
485
+
486
+ ocr_model = None
487
+ ocr_lock = threading.Lock()
488
+
489
+ def init_ocr_model():
490
+ global ocr_model
491
+ with ocr_lock:
492
+ if ocr_model is None:
493
+ ocr_model = PaddleOCR(use_angle_cls=True, lang="ch")
494
+
495
  def ocr_frame_worker(args):
496
  frame_idx, frame_time, frame = args
497
+
498
+ init_ocr_model() # Ensure model is loaded once per process
499
+
500
+ if frame is None or frame.size == 0:
501
+ return {"time": frame_time, "text": ""}
502
+
503
+ if not isinstance(frame, np.ndarray):
504
+ return {"time": frame_time, "text": ""}
505
+
506
+ if frame.dtype != np.uint8:
507
+ frame = frame.astype(np.uint8)
508
+
509
+ try:
510
+ result = ocr_model.ocr(frame, cls=True)
511
+ texts = [line[1][0] for line in result[0]] if result[0] else []
512
+ combined_text = " ".join(texts).strip()
513
+ return {"time": frame_time, "text": combined_text}
514
+ except Exception as e:
515
+ print(f"⚠️ OCR failed at {frame_time:.2f}s: {e}")
516
+ return {"time": frame_time, "text": ""}
517
 
518
  def frame_is_in_audio_segments(frame_time, audio_segments, tolerance=0.2):
 
 
 
519
  for segment in audio_segments:
520
  start, end = segment["start"], segment["end"]
521
  if (start - tolerance) <= frame_time <= (end + tolerance):
 
539
  cap.release()
540
 
541
  ocr_results = []
542
+ with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor:
543
  futures = [executor.submit(ocr_frame_worker, frame) for frame in frames]
544
+
545
  for f in tqdm(concurrent.futures.as_completed(futures), total=len(futures)):
546
  try:
547
  result = f.result()
548
  if result["text"]:
549
  ocr_results.append(result)
550
  except Exception as e:
551
+ print(f"⚠️ OCR worker failed: {e}")
552
 
553
  return ocr_results
554
 
555
+
556
  def collapse_ocr_subtitles(ocr_json, text_similarity_threshold=90):
557
  collapsed = []
558
  current = None