Pavan147 commited on
Commit
8dc569d
·
verified ·
1 Parent(s): aa63203

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -35
app.py CHANGED
@@ -98,7 +98,6 @@
98
  # demo.launch()
99
 
100
  import re
101
- import json
102
  import gradio as gr
103
  from transformers import AutoProcessor, AutoModelForImageTextToText
104
  from PIL import Image
@@ -107,28 +106,7 @@ from PIL import Image
107
  processor = AutoProcessor.from_pretrained("ds4sd/SmolDocling-256M-preview")
108
  model = AutoModelForImageTextToText.from_pretrained("ds4sd/SmolDocling-256M-preview")
109
 
110
- def parse_docling_to_json(docling_text):
111
- # Remove unwanted tags like <otsl>, </otsl>, <loc_...>
112
- cleaned = re.sub(r"</?otsl>|<loc_[^>]+>", "", docling_text)
113
-
114
- # Split by line break <nl>
115
- lines = cleaned.split("<nl>")
116
- table = []
117
- for line in lines:
118
- if not line.strip():
119
- continue
120
- # Extract all <fcel> values
121
- cells = re.findall(r"<fcel>([^<]+)", line)
122
- # Convert to floats if possible
123
- try:
124
- row = [float(cell) for cell in cells]
125
- except ValueError:
126
- # If conversion fails, keep as string
127
- row = cells
128
- table.append(row)
129
- return json.dumps(table, indent=2)
130
-
131
- def smoldocling_readimage(image, prompt_text):
132
  messages = [
133
  {"role": "user", "content": [{"type": "image"}, {"type": "text", "text": prompt_text}]}
134
  ]
@@ -138,21 +116,49 @@ def smoldocling_readimage(image, prompt_text):
138
  prompt_length = inputs.input_ids.shape[1]
139
  generated = outputs[:, prompt_length:]
140
  result = processor.batch_decode(generated, skip_special_tokens=False)[0]
141
-
142
- # Parse raw docling output to JSON
143
- json_output = parse_docling_to_json(result)
144
- return f"<pre>{json_output}</pre>"
145
-
146
- # Gradio UI
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  demo = gr.Interface(
148
- fn=smoldocling_readimage,
149
  inputs=[
150
- gr.Image(type="pil", label="Upload Image"),
151
- gr.Textbox(lines=1, placeholder="Enter prompt (e.g. Convert to docling)", label="Prompt"),
152
  ],
153
- outputs="html",
154
- title="SmolDocling Web App",
155
- description="Upload a document image and convert it to structured docling format."
156
  )
157
 
158
  demo.launch()
 
98
  # demo.launch()
99
 
100
  import re
 
101
  import gradio as gr
102
  from transformers import AutoProcessor, AutoModelForImageTextToText
103
  from PIL import Image
 
106
  processor = AutoProcessor.from_pretrained("ds4sd/SmolDocling-256M-preview")
107
  model = AutoModelForImageTextToText.from_pretrained("ds4sd/SmolDocling-256M-preview")
108
 
109
+ def smoldocling_readimage(image, prompt_text="Convert to docling"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  messages = [
111
  {"role": "user", "content": [{"type": "image"}, {"type": "text", "text": prompt_text}]}
112
  ]
 
116
  prompt_length = inputs.input_ids.shape[1]
117
  generated = outputs[:, prompt_length:]
118
  result = processor.batch_decode(generated, skip_special_tokens=False)[0]
119
+ return result.replace("<end_of_utterance>", "").strip()
120
+
121
+ def extract_numbers(docling_text):
122
+ # Extract all floating numbers from the docling text using regex
123
+ numbers = re.findall(r"[-+]?\d*\.\d+|\d+", docling_text)
124
+ return list(map(float, numbers))
125
+
126
+ def compare_outputs(img1, img2):
127
+ # Extract docling text from both images
128
+ output1 = smoldocling_readimage(img1)
129
+ output2 = smoldocling_readimage(img2)
130
+
131
+ # Extract numbers from both outputs
132
+ nums1 = extract_numbers(output1)
133
+ nums2 = extract_numbers(output2)
134
+
135
+ # Compare numbers — find matching count based on position
136
+ length = min(len(nums1), len(nums2))
137
+ matches = sum(1 for i in range(length) if abs(nums1[i] - nums2[i]) < 1e-3)
138
+
139
+ # Calculate similarity accuracy percentage
140
+ total = max(len(nums1), len(nums2))
141
+ accuracy = (matches / total) * 100 if total > 0 else 0
142
+
143
+ # Prepare result text
144
+ result_text = (
145
+ f"Output for Image 1:\n{output1}\n\n"
146
+ f"Output for Image 2:\n{output2}\n\n"
147
+ f"Similarity Accuracy: {accuracy:.2f}%\n"
148
+ f"Matching Values: {matches} out of {total}"
149
+ )
150
+ return result_text
151
+
152
+ # Gradio UI: take 2 images, output similarity report
153
  demo = gr.Interface(
154
+ fn=compare_outputs,
155
  inputs=[
156
+ gr.Image(type="pil", label="Upload Image 1"),
157
+ gr.Image(type="pil", label="Upload Image 2"),
158
  ],
159
+ outputs="text",
160
+ title="SmolDocling Image Comparison",
161
+ description="Upload two document images. This app extracts data from both and compares similarity."
162
  )
163
 
164
  demo.launch()