awacke1 commited on
Commit
f234b10
·
verified ·
1 Parent(s): c42f181

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -20
app.py CHANGED
@@ -499,39 +499,76 @@ def process_pdf_tab(pdf_file, max_pages, voice):
499
  return
500
  audio_processor = AudioProcessor() # Instance for this run
501
  try:
502
- reader=PdfReader(pdf_file); total_pages=min(len(reader.pages),max_pages);
 
 
 
 
 
503
  st.write(f"Processing first {total_pages} pages of '{pdf_file.name}'...")
504
  texts, audios={}, {}; page_threads = []; results_lock = threading.Lock()
505
 
 
506
  def process_page_sync(page_num, page_text):
507
- # Runs async audio generation in a separate thread
508
- async def run_async_audio(): return await audio_processor.create_audio(page_text, voice)
509
- try: audio_path = asyncio.run(run_async_audio());
510
- if audio_path:
511
- with results_lock: audios[page_num] = audio_path
512
- except Exception as page_e: print(f"Err process page {page_num+1}: {page_e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
513
 
514
  # Start threads
515
  for i in range(total_pages):
516
- text=reader.pages[i].extract_text();
517
- if text: texts[i]=text; thread = threading.Thread(target=process_page_sync, args=(i, text)); page_threads.append(thread); thread.start()
518
- else: texts[i] = "[No text extracted]"
519
-
520
- # Display results as they become available (or after join)
521
- st.progress(0) # Placeholder for progress
522
- # Wait for threads and display - consider using st.empty() for updates
523
- for thread in page_threads: thread.join()
524
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
525
  for i in range(total_pages):
526
  with st.expander(f"Page {i+1}"):
527
  st.markdown(texts.get(i, "[Error getting text]"))
528
- audio_file = audios.get(i)
529
  if audio_file: play_and_download_audio(audio_file)
530
  else: st.caption("Audio generation failed or pending.")
531
 
532
- except Exception as pdf_e: st.error(f"Err read PDF: {pdf_e}"); st.exception(pdf_e)
533
-
534
-
535
  # ==============================================================================
536
  # WebSocket Server Logic
537
  # ==============================================================================
 
499
  return
500
  audio_processor = AudioProcessor() # Instance for this run
501
  try:
502
+ reader=PdfReader(pdf_file)
503
+ # Check if PDF is password protected (optional but good practice)
504
+ if reader.is_encrypted:
505
+ st.warning("PDF is encrypted and cannot be processed.")
506
+ return
507
+ total_pages=min(len(reader.pages),max_pages);
508
  st.write(f"Processing first {total_pages} pages of '{pdf_file.name}'...")
509
  texts, audios={}, {}; page_threads = []; results_lock = threading.Lock()
510
 
511
+ # --- Corrected process_page_sync function ---
512
  def process_page_sync(page_num, page_text):
513
+ # Runs async audio generation using asyncio.run in this thread
514
+ async def run_async_audio():
515
+ # Ensure audio_processor is accessible (it is, from outer scope)
516
+ return await audio_processor.create_audio(page_text, voice)
517
+ try: # Start of the try block
518
+ # It's generally better not to run asyncio.run inside threads repeatedly
519
+ # if the main loop is async, but in Streamlit context this might be necessary.
520
+ audio_path = asyncio.run(run_async_audio()) # Attempt to run async func
521
+ if audio_path: # Check result *inside* the try block
522
+ with results_lock:
523
+ audios[page_num] = audio_path # Update shared dict safely
524
+ except RuntimeError as run_err:
525
+ # Handle cases where asyncio.run is called from an already running loop
526
+ # This might happen depending on Streamlit's internal async handling
527
+ print(f"RuntimeError processing page {page_num+1} (asyncio loop issue?): {run_err}")
528
+ # Fallback? Or just log the error.
529
+ except Exception as page_e: # Correctly indented except block
530
+ print(f"Err process page {page_num+1}: {page_e}")
531
+ # --- End of corrected function ---
532
 
533
  # Start threads
534
  for i in range(total_pages):
535
+ try:
536
+ page = reader.pages[i]
537
+ text = page.extract_text()
538
+ if text and text.strip(): # Check if text extraction yielded something meaningful
539
+ texts[i]=text
540
+ # Start a new thread for each page's audio processing
541
+ thread = threading.Thread(target=process_page_sync, args=(i, text))
542
+ page_threads.append(thread)
543
+ thread.start()
544
+ else: texts[i] = "[No text extracted or page empty]"
545
+ except Exception as extract_e:
546
+ print(f"Error extracting text from page {i+1}: {extract_e}")
547
+ texts[i] = f"[Error extracting text: {extract_e}]"
548
+
549
+
550
+ # Wait for threads and display results
551
+ progress_bar = st.progress(0.0)
552
+ total_threads = len(page_threads)
553
+ completed_threads = 0
554
+ while completed_threads < total_threads:
555
+ completed_threads = total_threads - sum(t.is_alive() for t in page_threads)
556
+ progress = completed_threads / total_threads if total_threads > 0 else 1.0
557
+ progress_bar.progress(progress)
558
+ time.sleep(0.2) # Brief sleep to avoid busy-waiting
559
+
560
+ progress_bar.progress(1.0) # Ensure it reaches 100%
561
+
562
+ # Display results after all threads are done (or tried)
563
+ st.write("Processing complete. Displaying results:")
564
  for i in range(total_pages):
565
  with st.expander(f"Page {i+1}"):
566
  st.markdown(texts.get(i, "[Error getting text]"))
567
+ audio_file = audios.get(i) # Get result from shared dict
568
  if audio_file: play_and_download_audio(audio_file)
569
  else: st.caption("Audio generation failed or pending.")
570
 
571
+ except Exception as pdf_e: st.error(f"Error reading PDF: {pdf_e}"); st.exception(pdf_e)
 
 
572
  # ==============================================================================
573
  # WebSocket Server Logic
574
  # ==============================================================================