Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -499,39 +499,76 @@ def process_pdf_tab(pdf_file, max_pages, voice):
|
|
| 499 |
return
|
| 500 |
audio_processor = AudioProcessor() # Instance for this run
|
| 501 |
try:
|
| 502 |
-
reader=PdfReader(pdf_file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 503 |
st.write(f"Processing first {total_pages} pages of '{pdf_file.name}'...")
|
| 504 |
texts, audios={}, {}; page_threads = []; results_lock = threading.Lock()
|
| 505 |
|
|
|
|
| 506 |
def process_page_sync(page_num, page_text):
|
| 507 |
-
# Runs async audio generation in
|
| 508 |
-
async def run_async_audio():
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 513 |
|
| 514 |
# Start threads
|
| 515 |
for i in range(total_pages):
|
| 516 |
-
|
| 517 |
-
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
|
| 522 |
-
|
| 523 |
-
|
| 524 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 525 |
for i in range(total_pages):
|
| 526 |
with st.expander(f"Page {i+1}"):
|
| 527 |
st.markdown(texts.get(i, "[Error getting text]"))
|
| 528 |
-
audio_file = audios.get(i)
|
| 529 |
if audio_file: play_and_download_audio(audio_file)
|
| 530 |
else: st.caption("Audio generation failed or pending.")
|
| 531 |
|
| 532 |
-
except Exception as pdf_e: st.error(f"
|
| 533 |
-
|
| 534 |
-
|
| 535 |
# ==============================================================================
|
| 536 |
# WebSocket Server Logic
|
| 537 |
# ==============================================================================
|
|
|
|
| 499 |
return
|
| 500 |
audio_processor = AudioProcessor() # Instance for this run
|
| 501 |
try:
|
| 502 |
+
reader=PdfReader(pdf_file)
|
| 503 |
+
# Check if PDF is password protected (optional but good practice)
|
| 504 |
+
if reader.is_encrypted:
|
| 505 |
+
st.warning("PDF is encrypted and cannot be processed.")
|
| 506 |
+
return
|
| 507 |
+
total_pages=min(len(reader.pages),max_pages);
|
| 508 |
st.write(f"Processing first {total_pages} pages of '{pdf_file.name}'...")
|
| 509 |
texts, audios={}, {}; page_threads = []; results_lock = threading.Lock()
|
| 510 |
|
| 511 |
+
# --- Corrected process_page_sync function ---
|
| 512 |
def process_page_sync(page_num, page_text):
|
| 513 |
+
# Runs async audio generation using asyncio.run in this thread
|
| 514 |
+
async def run_async_audio():
|
| 515 |
+
# Ensure audio_processor is accessible (it is, from outer scope)
|
| 516 |
+
return await audio_processor.create_audio(page_text, voice)
|
| 517 |
+
try: # Start of the try block
|
| 518 |
+
# It's generally better not to run asyncio.run inside threads repeatedly
|
| 519 |
+
# if the main loop is async, but in Streamlit context this might be necessary.
|
| 520 |
+
audio_path = asyncio.run(run_async_audio()) # Attempt to run async func
|
| 521 |
+
if audio_path: # Check result *inside* the try block
|
| 522 |
+
with results_lock:
|
| 523 |
+
audios[page_num] = audio_path # Update shared dict safely
|
| 524 |
+
except RuntimeError as run_err:
|
| 525 |
+
# Handle cases where asyncio.run is called from an already running loop
|
| 526 |
+
# This might happen depending on Streamlit's internal async handling
|
| 527 |
+
print(f"RuntimeError processing page {page_num+1} (asyncio loop issue?): {run_err}")
|
| 528 |
+
# Fallback? Or just log the error.
|
| 529 |
+
except Exception as page_e: # Correctly indented except block
|
| 530 |
+
print(f"Err process page {page_num+1}: {page_e}")
|
| 531 |
+
# --- End of corrected function ---
|
| 532 |
|
| 533 |
# Start threads
|
| 534 |
for i in range(total_pages):
|
| 535 |
+
try:
|
| 536 |
+
page = reader.pages[i]
|
| 537 |
+
text = page.extract_text()
|
| 538 |
+
if text and text.strip(): # Check if text extraction yielded something meaningful
|
| 539 |
+
texts[i]=text
|
| 540 |
+
# Start a new thread for each page's audio processing
|
| 541 |
+
thread = threading.Thread(target=process_page_sync, args=(i, text))
|
| 542 |
+
page_threads.append(thread)
|
| 543 |
+
thread.start()
|
| 544 |
+
else: texts[i] = "[No text extracted or page empty]"
|
| 545 |
+
except Exception as extract_e:
|
| 546 |
+
print(f"Error extracting text from page {i+1}: {extract_e}")
|
| 547 |
+
texts[i] = f"[Error extracting text: {extract_e}]"
|
| 548 |
+
|
| 549 |
+
|
| 550 |
+
# Wait for threads and display results
|
| 551 |
+
progress_bar = st.progress(0.0)
|
| 552 |
+
total_threads = len(page_threads)
|
| 553 |
+
completed_threads = 0
|
| 554 |
+
while completed_threads < total_threads:
|
| 555 |
+
completed_threads = total_threads - sum(t.is_alive() for t in page_threads)
|
| 556 |
+
progress = completed_threads / total_threads if total_threads > 0 else 1.0
|
| 557 |
+
progress_bar.progress(progress)
|
| 558 |
+
time.sleep(0.2) # Brief sleep to avoid busy-waiting
|
| 559 |
+
|
| 560 |
+
progress_bar.progress(1.0) # Ensure it reaches 100%
|
| 561 |
+
|
| 562 |
+
# Display results after all threads are done (or tried)
|
| 563 |
+
st.write("Processing complete. Displaying results:")
|
| 564 |
for i in range(total_pages):
|
| 565 |
with st.expander(f"Page {i+1}"):
|
| 566 |
st.markdown(texts.get(i, "[Error getting text]"))
|
| 567 |
+
audio_file = audios.get(i) # Get result from shared dict
|
| 568 |
if audio_file: play_and_download_audio(audio_file)
|
| 569 |
else: st.caption("Audio generation failed or pending.")
|
| 570 |
|
| 571 |
+
except Exception as pdf_e: st.error(f"Error reading PDF: {pdf_e}"); st.exception(pdf_e)
|
|
|
|
|
|
|
| 572 |
# ==============================================================================
|
| 573 |
# WebSocket Server Logic
|
| 574 |
# ==============================================================================
|