raksama19 commited on
Commit
d3ca789
Β·
verified Β·
1 Parent(s): fd1e4ad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -57
app.py CHANGED
@@ -131,14 +131,13 @@ def convert_pdf_to_images_gradio(pdf_file):
131
  raise Exception(f"Error converting PDF: {str(e)}")
132
 
133
 
134
- def process_pdf_document(pdf_file, model, progress_callback=None):
135
  """Process uploaded PDF file page by page"""
136
  if pdf_file is None:
137
  return "No PDF file uploaded", ""
138
 
139
  try:
140
- if progress_callback:
141
- progress_callback("πŸ”„ Converting PDF to images...")
142
  images = convert_pdf_to_images_gradio(pdf_file)
143
 
144
  if not images:
@@ -147,8 +146,8 @@ def process_pdf_document(pdf_file, model, progress_callback=None):
147
  all_results = []
148
 
149
  for page_idx, pil_image in enumerate(images):
150
- if progress_callback:
151
- progress_callback(f"πŸ”„ Processing page {page_idx + 1}/{len(images)}...")
152
 
153
  layout_output = model.chat("Parse the reading order of this document.", pil_image)
154
 
@@ -173,8 +172,7 @@ def process_pdf_document(pdf_file, model, progress_callback=None):
173
  }
174
  all_results.append(page_result)
175
 
176
- if progress_callback:
177
- progress_callback("πŸ”„ Finalizing document...")
178
 
179
  combined_markdown = "\n\n---\n\n".join([
180
  f"# Page {result['page_number']}\n\n{result['markdown']}"
@@ -320,68 +318,31 @@ processed_markdown = ""
320
  show_results_tab = False
321
 
322
 
323
- def start_processing():
324
- """Immediately update status when processing starts"""
325
- return "πŸ”„ Processing PDF... Please wait", gr.Tabs(visible=False)
326
-
327
- def process_uploaded_pdf_with_progress(pdf_file):
328
- """Main processing function with custom progress updates"""
329
  global processed_markdown, show_results_tab
330
 
331
  if dolphin_model is None:
332
- yield "❌ Model not loaded", gr.Tabs(visible=False)
333
- return
334
 
335
  if pdf_file is None:
336
- yield "❌ No PDF uploaded", gr.Tabs(visible=False)
337
- return
338
 
339
  try:
340
- def update_progress(message):
341
- return message
342
-
343
- # Process with custom progress callback
344
- for progress_msg in process_pdf_document_with_updates(pdf_file, dolphin_model):
345
- yield progress_msg, gr.Tabs(visible=False)
346
-
347
- # Final result
348
- combined_markdown, status = process_pdf_document(pdf_file, dolphin_model)
349
 
350
  if status == "processing_complete":
351
  processed_markdown = combined_markdown
352
  show_results_tab = True
353
- yield "βœ… PDF processed successfully! Check the 'Document' tab above.", gr.Tabs(visible=True)
354
  else:
355
  show_results_tab = False
356
- yield combined_markdown, gr.Tabs(visible=False)
357
 
358
  except Exception as e:
359
  show_results_tab = False
360
  error_msg = f"❌ Error processing PDF: {str(e)}"
361
- yield error_msg, gr.Tabs(visible=False)
362
-
363
- def process_pdf_document_with_updates(pdf_file, model):
364
- """Generator that yields progress updates"""
365
- try:
366
- yield "πŸ”„ Converting PDF to images..."
367
- images = convert_pdf_to_images_gradio(pdf_file)
368
-
369
- if not images:
370
- yield "❌ Failed to convert PDF to images"
371
- return
372
-
373
- for page_idx, pil_image in enumerate(images):
374
- yield f"πŸ”„ Processing page {page_idx + 1}/{len(images)}..."
375
-
376
- # Small delay to show progress
377
- import time
378
- time.sleep(0.1)
379
-
380
- yield "πŸ”„ Finalizing document..."
381
- time.sleep(0.5)
382
-
383
- except Exception as e:
384
- yield f"❌ Error: {str(e)}"
385
 
386
 
387
  def get_processed_markdown():
@@ -395,7 +356,7 @@ def clear_all():
395
  global processed_markdown, show_results_tab
396
  processed_markdown = ""
397
  show_results_tab = False
398
- return None, "Upload a PDF to get started", gr.Tabs(visible=False)
399
 
400
 
401
  # Create Gradio interface
@@ -484,9 +445,9 @@ with gr.Blocks(
484
  elem_id="progress-container"
485
  )
486
 
487
- # Status and progress
488
  status_output = gr.Markdown(
489
- "Upload a PDF to get started",
490
  elem_classes="status-message"
491
  )
492
 
@@ -529,10 +490,10 @@ with gr.Blocks(
529
 
530
  # Event handlers
531
  process_btn.click(
532
- fn=process_uploaded_pdf_with_progress,
533
  inputs=[pdf_input],
534
  outputs=[status_output, results_tab],
535
- show_progress=False
536
  ).then(
537
  fn=get_processed_markdown,
538
  outputs=[markdown_display]
 
131
  raise Exception(f"Error converting PDF: {str(e)}")
132
 
133
 
134
+ def process_pdf_document(pdf_file, model, progress=gr.Progress()):
135
  """Process uploaded PDF file page by page"""
136
  if pdf_file is None:
137
  return "No PDF file uploaded", ""
138
 
139
  try:
140
+ progress(0.1, desc="Converting PDF to images...")
 
141
  images = convert_pdf_to_images_gradio(pdf_file)
142
 
143
  if not images:
 
146
  all_results = []
147
 
148
  for page_idx, pil_image in enumerate(images):
149
+ progress((page_idx + 1) / len(images) * 0.8 + 0.1,
150
+ desc=f"Processing page {page_idx + 1}/{len(images)}...")
151
 
152
  layout_output = model.chat("Parse the reading order of this document.", pil_image)
153
 
 
172
  }
173
  all_results.append(page_result)
174
 
175
+ progress(1.0, desc="Processing complete!")
 
176
 
177
  combined_markdown = "\n\n---\n\n".join([
178
  f"# Page {result['page_number']}\n\n{result['markdown']}"
 
318
  show_results_tab = False
319
 
320
 
321
+ def process_uploaded_pdf(pdf_file, progress=gr.Progress()):
322
+ """Main processing function for uploaded PDF"""
 
 
 
 
323
  global processed_markdown, show_results_tab
324
 
325
  if dolphin_model is None:
326
+ return "❌ Model not loaded", gr.Tabs(visible=False)
 
327
 
328
  if pdf_file is None:
329
+ return "❌ No PDF uploaded", gr.Tabs(visible=False)
 
330
 
331
  try:
332
+ combined_markdown, status = process_pdf_document(pdf_file, dolphin_model, progress)
 
 
 
 
 
 
 
 
333
 
334
  if status == "processing_complete":
335
  processed_markdown = combined_markdown
336
  show_results_tab = True
337
+ return "βœ… PDF processed successfully! Check the 'Document' tab above.", gr.Tabs(visible=True)
338
  else:
339
  show_results_tab = False
340
+ return combined_markdown, gr.Tabs(visible=False)
341
 
342
  except Exception as e:
343
  show_results_tab = False
344
  error_msg = f"❌ Error processing PDF: {str(e)}"
345
+ return error_msg, gr.Tabs(visible=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
346
 
347
 
348
  def get_processed_markdown():
 
356
  global processed_markdown, show_results_tab
357
  processed_markdown = ""
358
  show_results_tab = False
359
+ return None, "βœ… Ready to process your PDF", gr.Tabs(visible=False)
360
 
361
 
362
  # Create Gradio interface
 
445
  elem_id="progress-container"
446
  )
447
 
448
+ # Status output (hidden during processing)
449
  status_output = gr.Markdown(
450
+ "βœ… Ready to process your PDF",
451
  elem_classes="status-message"
452
  )
453
 
 
490
 
491
  # Event handlers
492
  process_btn.click(
493
+ fn=process_uploaded_pdf,
494
  inputs=[pdf_input],
495
  outputs=[status_output, results_tab],
496
+ show_progress=True
497
  ).then(
498
  fn=get_processed_markdown,
499
  outputs=[markdown_display]