Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -131,14 +131,13 @@ def convert_pdf_to_images_gradio(pdf_file):
|
|
131 |
raise Exception(f"Error converting PDF: {str(e)}")
|
132 |
|
133 |
|
134 |
-
def process_pdf_document(pdf_file, model,
|
135 |
"""Process uploaded PDF file page by page"""
|
136 |
if pdf_file is None:
|
137 |
return "No PDF file uploaded", ""
|
138 |
|
139 |
try:
|
140 |
-
|
141 |
-
progress_callback("π Converting PDF to images...")
|
142 |
images = convert_pdf_to_images_gradio(pdf_file)
|
143 |
|
144 |
if not images:
|
@@ -147,8 +146,8 @@ def process_pdf_document(pdf_file, model, progress_callback=None):
|
|
147 |
all_results = []
|
148 |
|
149 |
for page_idx, pil_image in enumerate(images):
|
150 |
-
|
151 |
-
|
152 |
|
153 |
layout_output = model.chat("Parse the reading order of this document.", pil_image)
|
154 |
|
@@ -173,8 +172,7 @@ def process_pdf_document(pdf_file, model, progress_callback=None):
|
|
173 |
}
|
174 |
all_results.append(page_result)
|
175 |
|
176 |
-
|
177 |
-
progress_callback("π Finalizing document...")
|
178 |
|
179 |
combined_markdown = "\n\n---\n\n".join([
|
180 |
f"# Page {result['page_number']}\n\n{result['markdown']}"
|
@@ -320,68 +318,31 @@ processed_markdown = ""
|
|
320 |
show_results_tab = False
|
321 |
|
322 |
|
323 |
-
def
|
324 |
-
"""
|
325 |
-
return "π Processing PDF... Please wait", gr.Tabs(visible=False)
|
326 |
-
|
327 |
-
def process_uploaded_pdf_with_progress(pdf_file):
|
328 |
-
"""Main processing function with custom progress updates"""
|
329 |
global processed_markdown, show_results_tab
|
330 |
|
331 |
if dolphin_model is None:
|
332 |
-
|
333 |
-
return
|
334 |
|
335 |
if pdf_file is None:
|
336 |
-
|
337 |
-
return
|
338 |
|
339 |
try:
|
340 |
-
|
341 |
-
return message
|
342 |
-
|
343 |
-
# Process with custom progress callback
|
344 |
-
for progress_msg in process_pdf_document_with_updates(pdf_file, dolphin_model):
|
345 |
-
yield progress_msg, gr.Tabs(visible=False)
|
346 |
-
|
347 |
-
# Final result
|
348 |
-
combined_markdown, status = process_pdf_document(pdf_file, dolphin_model)
|
349 |
|
350 |
if status == "processing_complete":
|
351 |
processed_markdown = combined_markdown
|
352 |
show_results_tab = True
|
353 |
-
|
354 |
else:
|
355 |
show_results_tab = False
|
356 |
-
|
357 |
|
358 |
except Exception as e:
|
359 |
show_results_tab = False
|
360 |
error_msg = f"β Error processing PDF: {str(e)}"
|
361 |
-
|
362 |
-
|
363 |
-
def process_pdf_document_with_updates(pdf_file, model):
|
364 |
-
"""Generator that yields progress updates"""
|
365 |
-
try:
|
366 |
-
yield "π Converting PDF to images..."
|
367 |
-
images = convert_pdf_to_images_gradio(pdf_file)
|
368 |
-
|
369 |
-
if not images:
|
370 |
-
yield "β Failed to convert PDF to images"
|
371 |
-
return
|
372 |
-
|
373 |
-
for page_idx, pil_image in enumerate(images):
|
374 |
-
yield f"π Processing page {page_idx + 1}/{len(images)}..."
|
375 |
-
|
376 |
-
# Small delay to show progress
|
377 |
-
import time
|
378 |
-
time.sleep(0.1)
|
379 |
-
|
380 |
-
yield "π Finalizing document..."
|
381 |
-
time.sleep(0.5)
|
382 |
-
|
383 |
-
except Exception as e:
|
384 |
-
yield f"β Error: {str(e)}"
|
385 |
|
386 |
|
387 |
def get_processed_markdown():
|
@@ -395,7 +356,7 @@ def clear_all():
|
|
395 |
global processed_markdown, show_results_tab
|
396 |
processed_markdown = ""
|
397 |
show_results_tab = False
|
398 |
-
return None, "
|
399 |
|
400 |
|
401 |
# Create Gradio interface
|
@@ -484,9 +445,9 @@ with gr.Blocks(
|
|
484 |
elem_id="progress-container"
|
485 |
)
|
486 |
|
487 |
-
# Status
|
488 |
status_output = gr.Markdown(
|
489 |
-
"
|
490 |
elem_classes="status-message"
|
491 |
)
|
492 |
|
@@ -529,10 +490,10 @@ with gr.Blocks(
|
|
529 |
|
530 |
# Event handlers
|
531 |
process_btn.click(
|
532 |
-
fn=
|
533 |
inputs=[pdf_input],
|
534 |
outputs=[status_output, results_tab],
|
535 |
-
show_progress=
|
536 |
).then(
|
537 |
fn=get_processed_markdown,
|
538 |
outputs=[markdown_display]
|
|
|
131 |
raise Exception(f"Error converting PDF: {str(e)}")
|
132 |
|
133 |
|
134 |
+
def process_pdf_document(pdf_file, model, progress=gr.Progress()):
|
135 |
"""Process uploaded PDF file page by page"""
|
136 |
if pdf_file is None:
|
137 |
return "No PDF file uploaded", ""
|
138 |
|
139 |
try:
|
140 |
+
progress(0.1, desc="Converting PDF to images...")
|
|
|
141 |
images = convert_pdf_to_images_gradio(pdf_file)
|
142 |
|
143 |
if not images:
|
|
|
146 |
all_results = []
|
147 |
|
148 |
for page_idx, pil_image in enumerate(images):
|
149 |
+
progress((page_idx + 1) / len(images) * 0.8 + 0.1,
|
150 |
+
desc=f"Processing page {page_idx + 1}/{len(images)}...")
|
151 |
|
152 |
layout_output = model.chat("Parse the reading order of this document.", pil_image)
|
153 |
|
|
|
172 |
}
|
173 |
all_results.append(page_result)
|
174 |
|
175 |
+
progress(1.0, desc="Processing complete!")
|
|
|
176 |
|
177 |
combined_markdown = "\n\n---\n\n".join([
|
178 |
f"# Page {result['page_number']}\n\n{result['markdown']}"
|
|
|
318 |
show_results_tab = False
|
319 |
|
320 |
|
321 |
+
def process_uploaded_pdf(pdf_file, progress=gr.Progress()):
|
322 |
+
"""Main processing function for uploaded PDF"""
|
|
|
|
|
|
|
|
|
323 |
global processed_markdown, show_results_tab
|
324 |
|
325 |
if dolphin_model is None:
|
326 |
+
return "β Model not loaded", gr.Tabs(visible=False)
|
|
|
327 |
|
328 |
if pdf_file is None:
|
329 |
+
return "β No PDF uploaded", gr.Tabs(visible=False)
|
|
|
330 |
|
331 |
try:
|
332 |
+
combined_markdown, status = process_pdf_document(pdf_file, dolphin_model, progress)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
333 |
|
334 |
if status == "processing_complete":
|
335 |
processed_markdown = combined_markdown
|
336 |
show_results_tab = True
|
337 |
+
return "β
PDF processed successfully! Check the 'Document' tab above.", gr.Tabs(visible=True)
|
338 |
else:
|
339 |
show_results_tab = False
|
340 |
+
return combined_markdown, gr.Tabs(visible=False)
|
341 |
|
342 |
except Exception as e:
|
343 |
show_results_tab = False
|
344 |
error_msg = f"β Error processing PDF: {str(e)}"
|
345 |
+
return error_msg, gr.Tabs(visible=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
346 |
|
347 |
|
348 |
def get_processed_markdown():
|
|
|
356 |
global processed_markdown, show_results_tab
|
357 |
processed_markdown = ""
|
358 |
show_results_tab = False
|
359 |
+
return None, "β
Ready to process your PDF", gr.Tabs(visible=False)
|
360 |
|
361 |
|
362 |
# Create Gradio interface
|
|
|
445 |
elem_id="progress-container"
|
446 |
)
|
447 |
|
448 |
+
# Status output (hidden during processing)
|
449 |
status_output = gr.Markdown(
|
450 |
+
"β
Ready to process your PDF",
|
451 |
elem_classes="status-message"
|
452 |
)
|
453 |
|
|
|
490 |
|
491 |
# Event handlers
|
492 |
process_btn.click(
|
493 |
+
fn=process_uploaded_pdf,
|
494 |
inputs=[pdf_input],
|
495 |
outputs=[status_output, results_tab],
|
496 |
+
show_progress=True
|
497 |
).then(
|
498 |
fn=get_processed_markdown,
|
499 |
outputs=[markdown_display]
|