Spaces:

awacke1
/

Happy-Valley-Game

Sleeping

App Files Files Community

awacke1 commited on Apr 6

Commit

f234b10

verified ·

1 Parent(s): c42f181

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -20

app.py CHANGED Viewed

@@ -499,39 +499,76 @@ def process_pdf_tab(pdf_file, max_pages, voice):
            return
       audio_processor = AudioProcessor() # Instance for this run
       try:
-          reader=PdfReader(pdf_file); total_pages=min(len(reader.pages),max_pages);
           st.write(f"Processing first {total_pages} pages of '{pdf_file.name}'...")
           texts, audios={}, {}; page_threads = []; results_lock = threading.Lock()
           def process_page_sync(page_num, page_text):
-              # Runs async audio generation in a separate thread
-              async def run_async_audio(): return await audio_processor.create_audio(page_text, voice)
-              try: audio_path = asyncio.run(run_async_audio());
-                   if audio_path:
-                        with results_lock: audios[page_num] = audio_path
-              except Exception as page_e: print(f"Err process page {page_num+1}: {page_e}")
           # Start threads
           for i in range(total_pages):
-               text=reader.pages[i].extract_text();
-               if text: texts[i]=text; thread = threading.Thread(target=process_page_sync, args=(i, text)); page_threads.append(thread); thread.start()
-               else: texts[i] = "[No text extracted]"
-          # Display results as they become available (or after join)
-          st.progress(0) # Placeholder for progress
-          # Wait for threads and display - consider using st.empty() for updates
-          for thread in page_threads: thread.join()
           for i in range(total_pages):
                 with st.expander(f"Page {i+1}"):
                      st.markdown(texts.get(i, "[Error getting text]"))
-                     audio_file = audios.get(i)
                      if audio_file: play_and_download_audio(audio_file)
                      else: st.caption("Audio generation failed or pending.")
-      except Exception as pdf_e: st.error(f"Err read PDF: {pdf_e}"); st.exception(pdf_e)
 # ==============================================================================
 # WebSocket Server Logic
 # ==============================================================================

            return
       audio_processor = AudioProcessor() # Instance for this run
       try:
+          reader=PdfReader(pdf_file)
+          # Check if PDF is password protected (optional but good practice)
+          if reader.is_encrypted:
+              st.warning("PDF is encrypted and cannot be processed.")
+              return
+          total_pages=min(len(reader.pages),max_pages);
           st.write(f"Processing first {total_pages} pages of '{pdf_file.name}'...")
           texts, audios={}, {}; page_threads = []; results_lock = threading.Lock()
+          # --- Corrected process_page_sync function ---
           def process_page_sync(page_num, page_text):
+              # Runs async audio generation using asyncio.run in this thread
+              async def run_async_audio():
+                  # Ensure audio_processor is accessible (it is, from outer scope)
+                  return await audio_processor.create_audio(page_text, voice)
+              try: # Start of the try block
+                  # It's generally better not to run asyncio.run inside threads repeatedly
+                  # if the main loop is async, but in Streamlit context this might be necessary.
+                  audio_path = asyncio.run(run_async_audio()) # Attempt to run async func
+                  if audio_path: # Check result *inside* the try block
+                      with results_lock:
+                           audios[page_num] = audio_path # Update shared dict safely
+              except RuntimeError as run_err:
+                   # Handle cases where asyncio.run is called from an already running loop
+                   # This might happen depending on Streamlit's internal async handling
+                   print(f"RuntimeError processing page {page_num+1} (asyncio loop issue?): {run_err}")
+                   # Fallback? Or just log the error.
+              except Exception as page_e: # Correctly indented except block
+                  print(f"Err process page {page_num+1}: {page_e}")
+          # --- End of corrected function ---
           # Start threads
           for i in range(total_pages):
+               try:
+                   page = reader.pages[i]
+                   text = page.extract_text()
+                   if text and text.strip(): # Check if text extraction yielded something meaningful
+                       texts[i]=text
+                       # Start a new thread for each page's audio processing
+                       thread = threading.Thread(target=process_page_sync, args=(i, text))
+                       page_threads.append(thread)
+                       thread.start()
+                   else: texts[i] = "[No text extracted or page empty]"
+               except Exception as extract_e:
+                    print(f"Error extracting text from page {i+1}: {extract_e}")
+                    texts[i] = f"[Error extracting text: {extract_e}]"
+          # Wait for threads and display results
+          progress_bar = st.progress(0.0)
+          total_threads = len(page_threads)
+          completed_threads = 0
+          while completed_threads < total_threads:
+               completed_threads = total_threads - sum(t.is_alive() for t in page_threads)
+               progress = completed_threads / total_threads if total_threads > 0 else 1.0
+               progress_bar.progress(progress)
+               time.sleep(0.2) # Brief sleep to avoid busy-waiting
+          progress_bar.progress(1.0) # Ensure it reaches 100%
+          # Display results after all threads are done (or tried)
+          st.write("Processing complete. Displaying results:")
           for i in range(total_pages):
                 with st.expander(f"Page {i+1}"):
                      st.markdown(texts.get(i, "[Error getting text]"))
+                     audio_file = audios.get(i) # Get result from shared dict
                      if audio_file: play_and_download_audio(audio_file)
                      else: st.caption("Audio generation failed or pending.")
+      except Exception as pdf_e: st.error(f"Error reading PDF: {pdf_e}"); st.exception(pdf_e)
 # ==============================================================================
 # WebSocket Server Logic
 # ==============================================================================