raksama19 commited on
Commit
fd1e4ad
Β·
verified Β·
1 Parent(s): bc0e0a2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -24
app.py CHANGED
@@ -131,13 +131,14 @@ def convert_pdf_to_images_gradio(pdf_file):
131
  raise Exception(f"Error converting PDF: {str(e)}")
132
 
133
 
134
- def process_pdf_document(pdf_file, model, progress=gr.Progress()):
135
  """Process uploaded PDF file page by page"""
136
  if pdf_file is None:
137
  return "No PDF file uploaded", ""
138
 
139
  try:
140
- progress(0.1, desc="Converting PDF to images...")
 
141
  images = convert_pdf_to_images_gradio(pdf_file)
142
 
143
  if not images:
@@ -146,8 +147,8 @@ def process_pdf_document(pdf_file, model, progress=gr.Progress()):
146
  all_results = []
147
 
148
  for page_idx, pil_image in enumerate(images):
149
- progress((page_idx + 1) / len(images) * 0.8 + 0.1,
150
- desc=f"Processing page {page_idx + 1}/{len(images)}...")
151
 
152
  layout_output = model.chat("Parse the reading order of this document.", pil_image)
153
 
@@ -172,7 +173,8 @@ def process_pdf_document(pdf_file, model, progress=gr.Progress()):
172
  }
173
  all_results.append(page_result)
174
 
175
- progress(1.0, desc="Processing complete!")
 
176
 
177
  combined_markdown = "\n\n---\n\n".join([
178
  f"# Page {result['page_number']}\n\n{result['markdown']}"
@@ -322,31 +324,64 @@ def start_processing():
322
  """Immediately update status when processing starts"""
323
  return "πŸ”„ Processing PDF... Please wait", gr.Tabs(visible=False)
324
 
325
- def process_uploaded_pdf(pdf_file, progress=gr.Progress()):
326
- """Main processing function for uploaded PDF"""
327
  global processed_markdown, show_results_tab
328
 
329
  if dolphin_model is None:
330
- return "❌ Model not loaded", gr.Tabs(visible=False)
 
331
 
332
  if pdf_file is None:
333
- return "❌ No PDF uploaded", gr.Tabs(visible=False)
 
334
 
335
  try:
336
- combined_markdown, status = process_pdf_document(pdf_file, dolphin_model, progress)
 
 
 
 
 
 
 
 
337
 
338
  if status == "processing_complete":
339
  processed_markdown = combined_markdown
340
  show_results_tab = True
341
- return "βœ… PDF processed successfully! Check the 'Document' tab above.", gr.Tabs(visible=True)
342
  else:
343
  show_results_tab = False
344
- return combined_markdown, gr.Tabs(visible=False)
345
 
346
  except Exception as e:
347
  show_results_tab = False
348
  error_msg = f"❌ Error processing PDF: {str(e)}"
349
- return error_msg, gr.Tabs(visible=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
350
 
351
 
352
  def get_processed_markdown():
@@ -494,19 +529,10 @@ with gr.Blocks(
494
 
495
  # Event handlers
496
  process_btn.click(
497
- fn=start_processing,
498
- outputs=[status_output, results_tab]
499
- ).then(
500
- fn=lambda: gr.HTML(visible=True),
501
- outputs=[progress_space]
502
- ).then(
503
- fn=process_uploaded_pdf,
504
  inputs=[pdf_input],
505
  outputs=[status_output, results_tab],
506
- show_progress="full"
507
- ).then(
508
- fn=lambda: gr.HTML(visible=False),
509
- outputs=[progress_space]
510
  ).then(
511
  fn=get_processed_markdown,
512
  outputs=[markdown_display]
 
131
  raise Exception(f"Error converting PDF: {str(e)}")
132
 
133
 
134
+ def process_pdf_document(pdf_file, model, progress_callback=None):
135
  """Process uploaded PDF file page by page"""
136
  if pdf_file is None:
137
  return "No PDF file uploaded", ""
138
 
139
  try:
140
+ if progress_callback:
141
+ progress_callback("πŸ”„ Converting PDF to images...")
142
  images = convert_pdf_to_images_gradio(pdf_file)
143
 
144
  if not images:
 
147
  all_results = []
148
 
149
  for page_idx, pil_image in enumerate(images):
150
+ if progress_callback:
151
+ progress_callback(f"πŸ”„ Processing page {page_idx + 1}/{len(images)}...")
152
 
153
  layout_output = model.chat("Parse the reading order of this document.", pil_image)
154
 
 
173
  }
174
  all_results.append(page_result)
175
 
176
+ if progress_callback:
177
+ progress_callback("πŸ”„ Finalizing document...")
178
 
179
  combined_markdown = "\n\n---\n\n".join([
180
  f"# Page {result['page_number']}\n\n{result['markdown']}"
 
324
  """Immediately update status when processing starts"""
325
  return "πŸ”„ Processing PDF... Please wait", gr.Tabs(visible=False)
326
 
327
+ def process_uploaded_pdf_with_progress(pdf_file):
328
+ """Main processing function with custom progress updates"""
329
  global processed_markdown, show_results_tab
330
 
331
  if dolphin_model is None:
332
+ yield "❌ Model not loaded", gr.Tabs(visible=False)
333
+ return
334
 
335
  if pdf_file is None:
336
+ yield "❌ No PDF uploaded", gr.Tabs(visible=False)
337
+ return
338
 
339
  try:
340
+ def update_progress(message):
341
+ return message
342
+
343
+ # Process with custom progress callback
344
+ for progress_msg in process_pdf_document_with_updates(pdf_file, dolphin_model):
345
+ yield progress_msg, gr.Tabs(visible=False)
346
+
347
+ # Final result
348
+ combined_markdown, status = process_pdf_document(pdf_file, dolphin_model)
349
 
350
  if status == "processing_complete":
351
  processed_markdown = combined_markdown
352
  show_results_tab = True
353
+ yield "βœ… PDF processed successfully! Check the 'Document' tab above.", gr.Tabs(visible=True)
354
  else:
355
  show_results_tab = False
356
+ yield combined_markdown, gr.Tabs(visible=False)
357
 
358
  except Exception as e:
359
  show_results_tab = False
360
  error_msg = f"❌ Error processing PDF: {str(e)}"
361
+ yield error_msg, gr.Tabs(visible=False)
362
+
363
+ def process_pdf_document_with_updates(pdf_file, model):
364
+ """Generator that yields progress updates"""
365
+ try:
366
+ yield "πŸ”„ Converting PDF to images..."
367
+ images = convert_pdf_to_images_gradio(pdf_file)
368
+
369
+ if not images:
370
+ yield "❌ Failed to convert PDF to images"
371
+ return
372
+
373
+ for page_idx, pil_image in enumerate(images):
374
+ yield f"πŸ”„ Processing page {page_idx + 1}/{len(images)}..."
375
+
376
+ # Small delay to show progress
377
+ import time
378
+ time.sleep(0.1)
379
+
380
+ yield "πŸ”„ Finalizing document..."
381
+ time.sleep(0.5)
382
+
383
+ except Exception as e:
384
+ yield f"❌ Error: {str(e)}"
385
 
386
 
387
  def get_processed_markdown():
 
529
 
530
  # Event handlers
531
  process_btn.click(
532
+ fn=process_uploaded_pdf_with_progress,
 
 
 
 
 
 
533
  inputs=[pdf_input],
534
  outputs=[status_output, results_tab],
535
+ show_progress=False
 
 
 
536
  ).then(
537
  fn=get_processed_markdown,
538
  outputs=[markdown_display]