Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -131,13 +131,14 @@ def convert_pdf_to_images_gradio(pdf_file):
|
|
131 |
raise Exception(f"Error converting PDF: {str(e)}")
|
132 |
|
133 |
|
134 |
-
def process_pdf_document(pdf_file, model,
|
135 |
"""Process uploaded PDF file page by page"""
|
136 |
if pdf_file is None:
|
137 |
return "No PDF file uploaded", ""
|
138 |
|
139 |
try:
|
140 |
-
|
|
|
141 |
images = convert_pdf_to_images_gradio(pdf_file)
|
142 |
|
143 |
if not images:
|
@@ -146,8 +147,8 @@ def process_pdf_document(pdf_file, model, progress=gr.Progress()):
|
|
146 |
all_results = []
|
147 |
|
148 |
for page_idx, pil_image in enumerate(images):
|
149 |
-
|
150 |
-
|
151 |
|
152 |
layout_output = model.chat("Parse the reading order of this document.", pil_image)
|
153 |
|
@@ -172,7 +173,8 @@ def process_pdf_document(pdf_file, model, progress=gr.Progress()):
|
|
172 |
}
|
173 |
all_results.append(page_result)
|
174 |
|
175 |
-
|
|
|
176 |
|
177 |
combined_markdown = "\n\n---\n\n".join([
|
178 |
f"# Page {result['page_number']}\n\n{result['markdown']}"
|
@@ -322,31 +324,64 @@ def start_processing():
|
|
322 |
"""Immediately update status when processing starts"""
|
323 |
return "π Processing PDF... Please wait", gr.Tabs(visible=False)
|
324 |
|
325 |
-
def
|
326 |
-
"""Main processing function
|
327 |
global processed_markdown, show_results_tab
|
328 |
|
329 |
if dolphin_model is None:
|
330 |
-
|
|
|
331 |
|
332 |
if pdf_file is None:
|
333 |
-
|
|
|
334 |
|
335 |
try:
|
336 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
337 |
|
338 |
if status == "processing_complete":
|
339 |
processed_markdown = combined_markdown
|
340 |
show_results_tab = True
|
341 |
-
|
342 |
else:
|
343 |
show_results_tab = False
|
344 |
-
|
345 |
|
346 |
except Exception as e:
|
347 |
show_results_tab = False
|
348 |
error_msg = f"β Error processing PDF: {str(e)}"
|
349 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
350 |
|
351 |
|
352 |
def get_processed_markdown():
|
@@ -494,19 +529,10 @@ with gr.Blocks(
|
|
494 |
|
495 |
# Event handlers
|
496 |
process_btn.click(
|
497 |
-
fn=
|
498 |
-
outputs=[status_output, results_tab]
|
499 |
-
).then(
|
500 |
-
fn=lambda: gr.HTML(visible=True),
|
501 |
-
outputs=[progress_space]
|
502 |
-
).then(
|
503 |
-
fn=process_uploaded_pdf,
|
504 |
inputs=[pdf_input],
|
505 |
outputs=[status_output, results_tab],
|
506 |
-
show_progress=
|
507 |
-
).then(
|
508 |
-
fn=lambda: gr.HTML(visible=False),
|
509 |
-
outputs=[progress_space]
|
510 |
).then(
|
511 |
fn=get_processed_markdown,
|
512 |
outputs=[markdown_display]
|
|
|
131 |
raise Exception(f"Error converting PDF: {str(e)}")
|
132 |
|
133 |
|
134 |
+
def process_pdf_document(pdf_file, model, progress_callback=None):
|
135 |
"""Process uploaded PDF file page by page"""
|
136 |
if pdf_file is None:
|
137 |
return "No PDF file uploaded", ""
|
138 |
|
139 |
try:
|
140 |
+
if progress_callback:
|
141 |
+
progress_callback("π Converting PDF to images...")
|
142 |
images = convert_pdf_to_images_gradio(pdf_file)
|
143 |
|
144 |
if not images:
|
|
|
147 |
all_results = []
|
148 |
|
149 |
for page_idx, pil_image in enumerate(images):
|
150 |
+
if progress_callback:
|
151 |
+
progress_callback(f"π Processing page {page_idx + 1}/{len(images)}...")
|
152 |
|
153 |
layout_output = model.chat("Parse the reading order of this document.", pil_image)
|
154 |
|
|
|
173 |
}
|
174 |
all_results.append(page_result)
|
175 |
|
176 |
+
if progress_callback:
|
177 |
+
progress_callback("π Finalizing document...")
|
178 |
|
179 |
combined_markdown = "\n\n---\n\n".join([
|
180 |
f"# Page {result['page_number']}\n\n{result['markdown']}"
|
|
|
324 |
"""Immediately update status when processing starts"""
|
325 |
return "π Processing PDF... Please wait", gr.Tabs(visible=False)
|
326 |
|
327 |
+
def process_uploaded_pdf_with_progress(pdf_file):
|
328 |
+
"""Main processing function with custom progress updates"""
|
329 |
global processed_markdown, show_results_tab
|
330 |
|
331 |
if dolphin_model is None:
|
332 |
+
yield "β Model not loaded", gr.Tabs(visible=False)
|
333 |
+
return
|
334 |
|
335 |
if pdf_file is None:
|
336 |
+
yield "β No PDF uploaded", gr.Tabs(visible=False)
|
337 |
+
return
|
338 |
|
339 |
try:
|
340 |
+
def update_progress(message):
|
341 |
+
return message
|
342 |
+
|
343 |
+
# Process with custom progress callback
|
344 |
+
for progress_msg in process_pdf_document_with_updates(pdf_file, dolphin_model):
|
345 |
+
yield progress_msg, gr.Tabs(visible=False)
|
346 |
+
|
347 |
+
# Final result
|
348 |
+
combined_markdown, status = process_pdf_document(pdf_file, dolphin_model)
|
349 |
|
350 |
if status == "processing_complete":
|
351 |
processed_markdown = combined_markdown
|
352 |
show_results_tab = True
|
353 |
+
yield "β
PDF processed successfully! Check the 'Document' tab above.", gr.Tabs(visible=True)
|
354 |
else:
|
355 |
show_results_tab = False
|
356 |
+
yield combined_markdown, gr.Tabs(visible=False)
|
357 |
|
358 |
except Exception as e:
|
359 |
show_results_tab = False
|
360 |
error_msg = f"β Error processing PDF: {str(e)}"
|
361 |
+
yield error_msg, gr.Tabs(visible=False)
|
362 |
+
|
363 |
+
def process_pdf_document_with_updates(pdf_file, model):
|
364 |
+
"""Generator that yields progress updates"""
|
365 |
+
try:
|
366 |
+
yield "π Converting PDF to images..."
|
367 |
+
images = convert_pdf_to_images_gradio(pdf_file)
|
368 |
+
|
369 |
+
if not images:
|
370 |
+
yield "β Failed to convert PDF to images"
|
371 |
+
return
|
372 |
+
|
373 |
+
for page_idx, pil_image in enumerate(images):
|
374 |
+
yield f"π Processing page {page_idx + 1}/{len(images)}..."
|
375 |
+
|
376 |
+
# Small delay to show progress
|
377 |
+
import time
|
378 |
+
time.sleep(0.1)
|
379 |
+
|
380 |
+
yield "π Finalizing document..."
|
381 |
+
time.sleep(0.5)
|
382 |
+
|
383 |
+
except Exception as e:
|
384 |
+
yield f"β Error: {str(e)}"
|
385 |
|
386 |
|
387 |
def get_processed_markdown():
|
|
|
529 |
|
530 |
# Event handlers
|
531 |
process_btn.click(
|
532 |
+
fn=process_uploaded_pdf_with_progress,
|
|
|
|
|
|
|
|
|
|
|
|
|
533 |
inputs=[pdf_input],
|
534 |
outputs=[status_output, results_tab],
|
535 |
+
show_progress=False
|
|
|
|
|
|
|
536 |
).then(
|
537 |
fn=get_processed_markdown,
|
538 |
outputs=[markdown_display]
|