AkashDataScience commited on
Commit
8dc2d5d
·
1 Parent(s): 1402288

Adding time

Browse files
Files changed (1) hide show
  1. app.py +23 -22
app.py CHANGED
@@ -12,6 +12,7 @@ from docling_core.types.doc.document import DocTagsDocument
12
  from transformers import AutoProcessor, AutoModelForVision2Seq
13
  from transformers.image_utils import load_image
14
  from pathlib import Path
 
15
 
16
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
17
 
@@ -36,34 +37,35 @@ def get_pdf_page_count(pdf_path):
36
  return len(reader.pages)
37
 
38
  def get_page_image(pdf_path, page_num):
 
39
  images = convert_from_path(pdf_path, first_page=page_num, last_page=page_num)
40
  page_image = images[0]
41
- return page_image
 
42
 
43
  def get_docling_ocr(pdf_path, page_num):
 
44
  result = converter.convert(pdf_path, page_range=(page_num, page_num))
45
  markdown_text_docling = result.document.export_to_markdown()
46
- return markdown_text_docling
 
47
 
48
  def get_paddle_ocr(pdf_path, page_num):
49
- page_image = get_page_image(pdf_path, page_num)
50
-
51
  output = pipeline.predict(input=np.array(page_image))
52
-
53
  markdown_list = []
54
-
55
  for res in output:
56
  md_info = res.markdown
57
  markdown_list.append(md_info)
58
-
59
  markdown_text_paddleOCR = pipeline.concatenate_markdown_pages(markdown_list)
60
- return markdown_text_paddleOCR
 
61
 
62
  def get_smoldocling_ocr(pdf_path, page_num):
63
- page_image = get_page_image(pdf_path, page_num)
 
64
  image = load_image(page_image)
65
-
66
- # Create input messages
67
  messages = [
68
  {
69
  "role": "user",
@@ -73,12 +75,9 @@ def get_smoldocling_ocr(pdf_path, page_num):
73
  ]
74
  },
75
  ]
76
-
77
- # Prepare inputs
78
  prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
79
  inputs = processor(text=prompt, images=[image], return_tensors="pt")
80
  inputs = inputs.to(DEVICE)
81
-
82
  generated_ids = model.generate(**inputs, max_new_tokens=8192)
83
  prompt_length = inputs.input_ids.shape[1]
84
  trimmed_generated_ids = generated_ids[:, prompt_length:]
@@ -86,13 +85,11 @@ def get_smoldocling_ocr(pdf_path, page_num):
86
  trimmed_generated_ids,
87
  skip_special_tokens=False,
88
  )[0].lstrip()
89
-
90
- # Populate document
91
  doctags_doc = DocTagsDocument.from_doctags_and_image_pairs([doctags], [image])
92
  doc = DoclingDocument.load_from_doctags(doctags_doc, document_name="Document")
93
-
94
  markdown_text_smoldocling = doc.export_to_markdown()
95
- return markdown_text_smoldocling
 
96
 
97
  title = "OCR Arena"
98
  description = "A simple Gradio interface to extract text from PDFs and compare OCR models"
@@ -117,16 +114,20 @@ with gr.Blocks(theme=gr.themes.Glass()) as demo:
117
  clear_btn = gr.ClearButton(components=[pdf, page_num])
118
  submit_btn = gr.Button("Submit", variant='primary')
119
 
120
- submit_btn.click(get_page_image, inputs=[pdf, page_num], outputs=original).then(
121
- get_docling_ocr, inputs=[pdf, page_num], outputs=docling_ocr_out).then(
122
- get_paddle_ocr, inputs=[pdf, page_num], outputs=paddle_ocr_out).then(
123
- get_smoldocling_ocr, inputs=[pdf, page_num], outputs=smoldocling_ocr_out)
124
 
125
  with gr.Column():
126
  original = gr.Image(width=640, height=640, label="Original Page", interactive=False)
 
127
  docling_ocr_out = gr.Textbox(label="Docling OCR Output", type="text", interactive=False)
 
128
  paddle_ocr_out = gr.Textbox(label="Paddle OCR Output", type="text", interactive=False)
 
129
  smoldocling_ocr_out = gr.Textbox(label="SmolDocling OCR Output", type="text", interactive=False)
 
130
 
131
  examples_obj = gr.Examples(examples=examples, inputs=[pdf])
132
 
 
12
  from transformers import AutoProcessor, AutoModelForVision2Seq
13
  from transformers.image_utils import load_image
14
  from pathlib import Path
15
+ import time
16
 
17
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
18
 
 
37
  return len(reader.pages)
38
 
39
  def get_page_image(pdf_path, page_num):
40
+ start = time.time()
41
  images = convert_from_path(pdf_path, first_page=page_num, last_page=page_num)
42
  page_image = images[0]
43
+ runtime = time.time() - start
44
+ return page_image, f"{runtime:.2f} s"
45
 
46
  def get_docling_ocr(pdf_path, page_num):
47
+ start = time.time()
48
  result = converter.convert(pdf_path, page_range=(page_num, page_num))
49
  markdown_text_docling = result.document.export_to_markdown()
50
+ runtime = time.time() - start
51
+ return markdown_text_docling, f"{runtime:.2f} s"
52
 
53
  def get_paddle_ocr(pdf_path, page_num):
54
+ start = time.time()
55
+ page_image = get_page_image(pdf_path, page_num)[0]
56
  output = pipeline.predict(input=np.array(page_image))
 
57
  markdown_list = []
 
58
  for res in output:
59
  md_info = res.markdown
60
  markdown_list.append(md_info)
 
61
  markdown_text_paddleOCR = pipeline.concatenate_markdown_pages(markdown_list)
62
+ runtime = time.time() - start
63
+ return markdown_text_paddleOCR, f"{runtime:.2f} s"
64
 
65
  def get_smoldocling_ocr(pdf_path, page_num):
66
+ start = time.time()
67
+ page_image = get_page_image(pdf_path, page_num)[0]
68
  image = load_image(page_image)
 
 
69
  messages = [
70
  {
71
  "role": "user",
 
75
  ]
76
  },
77
  ]
 
 
78
  prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
79
  inputs = processor(text=prompt, images=[image], return_tensors="pt")
80
  inputs = inputs.to(DEVICE)
 
81
  generated_ids = model.generate(**inputs, max_new_tokens=8192)
82
  prompt_length = inputs.input_ids.shape[1]
83
  trimmed_generated_ids = generated_ids[:, prompt_length:]
 
85
  trimmed_generated_ids,
86
  skip_special_tokens=False,
87
  )[0].lstrip()
 
 
88
  doctags_doc = DocTagsDocument.from_doctags_and_image_pairs([doctags], [image])
89
  doc = DoclingDocument.load_from_doctags(doctags_doc, document_name="Document")
 
90
  markdown_text_smoldocling = doc.export_to_markdown()
91
+ runtime = time.time() - start
92
+ return markdown_text_smoldocling, f"{runtime:.2f} s"
93
 
94
  title = "OCR Arena"
95
  description = "A simple Gradio interface to extract text from PDFs and compare OCR models"
 
114
  clear_btn = gr.ClearButton(components=[pdf, page_num])
115
  submit_btn = gr.Button("Submit", variant='primary')
116
 
117
+ submit_btn.click(get_page_image, inputs=[pdf, page_num], outputs=[original, original_runtime]).then(
118
+ get_docling_ocr, inputs=[pdf, page_num], outputs=[docling_ocr_out, docling_ocr_runtime]).then(
119
+ get_paddle_ocr, inputs=[pdf, page_num], outputs=[paddle_ocr_out, paddle_ocr_runtime]).then(
120
+ get_smoldocling_ocr, inputs=[pdf, page_num], outputs=[smoldocling_ocr_out, smoldocling_ocr_runtime])
121
 
122
  with gr.Column():
123
  original = gr.Image(width=640, height=640, label="Original Page", interactive=False)
124
+ original_runtime = gr.Textbox(label="Image Extraction Time", type="text", interactive=False)
125
  docling_ocr_out = gr.Textbox(label="Docling OCR Output", type="text", interactive=False)
126
+ docling_ocr_runtime = gr.Textbox(label="Docling OCR Time", type="text", interactive=False)
127
  paddle_ocr_out = gr.Textbox(label="Paddle OCR Output", type="text", interactive=False)
128
+ paddle_ocr_runtime = gr.Textbox(label="Paddle OCR Time", type="text", interactive=False)
129
  smoldocling_ocr_out = gr.Textbox(label="SmolDocling OCR Output", type="text", interactive=False)
130
+ smoldocling_ocr_runtime = gr.Textbox(label="SmolDocling OCR Time", type="text", interactive=False)
131
 
132
  examples_obj = gr.Examples(examples=examples, inputs=[pdf])
133