Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -98,7 +98,6 @@
|
|
98 |
# demo.launch()
|
99 |
|
100 |
import re
|
101 |
-
import json
|
102 |
import gradio as gr
|
103 |
from transformers import AutoProcessor, AutoModelForImageTextToText
|
104 |
from PIL import Image
|
@@ -107,28 +106,7 @@ from PIL import Image
|
|
107 |
processor = AutoProcessor.from_pretrained("ds4sd/SmolDocling-256M-preview")
|
108 |
model = AutoModelForImageTextToText.from_pretrained("ds4sd/SmolDocling-256M-preview")
|
109 |
|
110 |
-
def
|
111 |
-
# Remove unwanted tags like <otsl>, </otsl>, <loc_...>
|
112 |
-
cleaned = re.sub(r"</?otsl>|<loc_[^>]+>", "", docling_text)
|
113 |
-
|
114 |
-
# Split by line break <nl>
|
115 |
-
lines = cleaned.split("<nl>")
|
116 |
-
table = []
|
117 |
-
for line in lines:
|
118 |
-
if not line.strip():
|
119 |
-
continue
|
120 |
-
# Extract all <fcel> values
|
121 |
-
cells = re.findall(r"<fcel>([^<]+)", line)
|
122 |
-
# Convert to floats if possible
|
123 |
-
try:
|
124 |
-
row = [float(cell) for cell in cells]
|
125 |
-
except ValueError:
|
126 |
-
# If conversion fails, keep as string
|
127 |
-
row = cells
|
128 |
-
table.append(row)
|
129 |
-
return json.dumps(table, indent=2)
|
130 |
-
|
131 |
-
def smoldocling_readimage(image, prompt_text):
|
132 |
messages = [
|
133 |
{"role": "user", "content": [{"type": "image"}, {"type": "text", "text": prompt_text}]}
|
134 |
]
|
@@ -138,21 +116,49 @@ def smoldocling_readimage(image, prompt_text):
|
|
138 |
prompt_length = inputs.input_ids.shape[1]
|
139 |
generated = outputs[:, prompt_length:]
|
140 |
result = processor.batch_decode(generated, skip_special_tokens=False)[0]
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
demo = gr.Interface(
|
148 |
-
fn=
|
149 |
inputs=[
|
150 |
-
gr.Image(type="pil", label="Upload Image"),
|
151 |
-
gr.
|
152 |
],
|
153 |
-
outputs="
|
154 |
-
title="SmolDocling
|
155 |
-
description="Upload
|
156 |
)
|
157 |
|
158 |
demo.launch()
|
|
|
98 |
# demo.launch()
|
99 |
|
100 |
import re
|
|
|
101 |
import gradio as gr
|
102 |
from transformers import AutoProcessor, AutoModelForImageTextToText
|
103 |
from PIL import Image
|
|
|
106 |
processor = AutoProcessor.from_pretrained("ds4sd/SmolDocling-256M-preview")
|
107 |
model = AutoModelForImageTextToText.from_pretrained("ds4sd/SmolDocling-256M-preview")
|
108 |
|
109 |
+
def smoldocling_readimage(image, prompt_text="Convert to docling"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
messages = [
|
111 |
{"role": "user", "content": [{"type": "image"}, {"type": "text", "text": prompt_text}]}
|
112 |
]
|
|
|
116 |
prompt_length = inputs.input_ids.shape[1]
|
117 |
generated = outputs[:, prompt_length:]
|
118 |
result = processor.batch_decode(generated, skip_special_tokens=False)[0]
|
119 |
+
return result.replace("<end_of_utterance>", "").strip()
|
120 |
+
|
121 |
+
def extract_numbers(docling_text):
|
122 |
+
# Extract all floating numbers from the docling text using regex
|
123 |
+
numbers = re.findall(r"[-+]?\d*\.\d+|\d+", docling_text)
|
124 |
+
return list(map(float, numbers))
|
125 |
+
|
126 |
+
def compare_outputs(img1, img2):
|
127 |
+
# Extract docling text from both images
|
128 |
+
output1 = smoldocling_readimage(img1)
|
129 |
+
output2 = smoldocling_readimage(img2)
|
130 |
+
|
131 |
+
# Extract numbers from both outputs
|
132 |
+
nums1 = extract_numbers(output1)
|
133 |
+
nums2 = extract_numbers(output2)
|
134 |
+
|
135 |
+
# Compare numbers — find matching count based on position
|
136 |
+
length = min(len(nums1), len(nums2))
|
137 |
+
matches = sum(1 for i in range(length) if abs(nums1[i] - nums2[i]) < 1e-3)
|
138 |
+
|
139 |
+
# Calculate similarity accuracy percentage
|
140 |
+
total = max(len(nums1), len(nums2))
|
141 |
+
accuracy = (matches / total) * 100 if total > 0 else 0
|
142 |
+
|
143 |
+
# Prepare result text
|
144 |
+
result_text = (
|
145 |
+
f"Output for Image 1:\n{output1}\n\n"
|
146 |
+
f"Output for Image 2:\n{output2}\n\n"
|
147 |
+
f"Similarity Accuracy: {accuracy:.2f}%\n"
|
148 |
+
f"Matching Values: {matches} out of {total}"
|
149 |
+
)
|
150 |
+
return result_text
|
151 |
+
|
152 |
+
# Gradio UI: take 2 images, output similarity report
|
153 |
demo = gr.Interface(
|
154 |
+
fn=compare_outputs,
|
155 |
inputs=[
|
156 |
+
gr.Image(type="pil", label="Upload Image 1"),
|
157 |
+
gr.Image(type="pil", label="Upload Image 2"),
|
158 |
],
|
159 |
+
outputs="text",
|
160 |
+
title="SmolDocling Image Comparison",
|
161 |
+
description="Upload two document images. This app extracts data from both and compares similarity."
|
162 |
)
|
163 |
|
164 |
demo.launch()
|