Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -62,7 +62,9 @@
|
|
62 |
# )
|
63 |
|
64 |
# demo.launch()
|
|
|
65 |
import re
|
|
|
66 |
import gradio as gr
|
67 |
from transformers import AutoProcessor, AutoModelForImageTextToText
|
68 |
from PIL import Image
|
@@ -82,12 +84,11 @@ def extract_values(docling_text):
|
|
82 |
continue
|
83 |
# Extract numbers inside <fcel> tags
|
84 |
values = re.findall(r"<fcel>(.*?)<fcel>", row)
|
85 |
-
# Convert to float list
|
86 |
float_values = [float(v) for v in values]
|
87 |
result.append(float_values)
|
88 |
return result
|
89 |
|
90 |
-
def
|
91 |
messages = [
|
92 |
{"role": "user", "content": [{"type": "image"}, {"type": "text", "text": prompt_text}]}
|
93 |
]
|
@@ -97,20 +98,50 @@ def smoldocling_readimage(image, prompt_text):
|
|
97 |
prompt_length = inputs.input_ids.shape[1]
|
98 |
generated = outputs[:, prompt_length:]
|
99 |
raw_result = processor.batch_decode(generated, skip_special_tokens=False)[0]
|
100 |
-
|
101 |
-
|
102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
|
104 |
-
# Gradio UI
|
105 |
demo = gr.Interface(
|
106 |
-
fn=
|
107 |
inputs=[
|
108 |
-
gr.Image(type="pil", label="Upload Image"),
|
|
|
109 |
gr.Textbox(lines=1, placeholder="Enter prompt (e.g. Convert to docling)", label="Prompt"),
|
110 |
],
|
111 |
outputs="text",
|
112 |
-
title="SmolDocling
|
113 |
-
description="Upload
|
114 |
)
|
115 |
|
116 |
demo.launch()
|
|
|
62 |
# )
|
63 |
|
64 |
# demo.launch()
|
65 |
+
|
66 |
import re
|
67 |
+
import numpy as np
|
68 |
import gradio as gr
|
69 |
from transformers import AutoProcessor, AutoModelForImageTextToText
|
70 |
from PIL import Image
|
|
|
84 |
continue
|
85 |
# Extract numbers inside <fcel> tags
|
86 |
values = re.findall(r"<fcel>(.*?)<fcel>", row)
|
|
|
87 |
float_values = [float(v) for v in values]
|
88 |
result.append(float_values)
|
89 |
return result
|
90 |
|
91 |
+
def get_array_from_image(image, prompt_text):
|
92 |
messages = [
|
93 |
{"role": "user", "content": [{"type": "image"}, {"type": "text", "text": prompt_text}]}
|
94 |
]
|
|
|
98 |
prompt_length = inputs.input_ids.shape[1]
|
99 |
generated = outputs[:, prompt_length:]
|
100 |
raw_result = processor.batch_decode(generated, skip_special_tokens=False)[0]
|
101 |
+
return extract_values(raw_result)
|
102 |
+
|
103 |
+
def compare_arrays(arr1, arr2):
|
104 |
+
# Flatten both arrays (assumes 2D list)
|
105 |
+
flat1 = np.array(arr1).flatten()
|
106 |
+
flat2 = np.array(arr2).flatten()
|
107 |
+
|
108 |
+
# If shapes differ, compare only overlapping parts
|
109 |
+
min_len = min(len(flat1), len(flat2))
|
110 |
+
if min_len == 0:
|
111 |
+
return 0.0 # no data to compare
|
112 |
+
|
113 |
+
flat1 = flat1[:min_len]
|
114 |
+
flat2 = flat2[:min_len]
|
115 |
+
|
116 |
+
# Calculate similarity as 1 - normalized mean absolute error
|
117 |
+
mae = np.mean(np.abs(flat1 - flat2))
|
118 |
+
max_val = max(np.max(flat1), np.max(flat2), 1e-6) # avoid zero division
|
119 |
+
similarity = 1 - (mae / max_val)
|
120 |
+
similarity_percent = max(0, similarity) * 100 # clamp to >=0
|
121 |
+
|
122 |
+
return round(similarity_percent, 2)
|
123 |
+
|
124 |
+
def process_two_images(image1, image2, prompt_text):
|
125 |
+
arr1 = get_array_from_image(image1, prompt_text)
|
126 |
+
arr2 = get_array_from_image(image2, prompt_text)
|
127 |
+
similarity = compare_arrays(arr1, arr2)
|
128 |
+
|
129 |
+
return (
|
130 |
+
f"Extracted values from Image 1:\n{arr1}\n\n"
|
131 |
+
f"Extracted values from Image 2:\n{arr2}\n\n"
|
132 |
+
f"Similarity Accuracy: {similarity} %"
|
133 |
+
)
|
134 |
|
|
|
135 |
demo = gr.Interface(
|
136 |
+
fn=process_two_images,
|
137 |
inputs=[
|
138 |
+
gr.Image(type="pil", label="Upload Image 1"),
|
139 |
+
gr.Image(type="pil", label="Upload Image 2"),
|
140 |
gr.Textbox(lines=1, placeholder="Enter prompt (e.g. Convert to docling)", label="Prompt"),
|
141 |
],
|
142 |
outputs="text",
|
143 |
+
title="SmolDocling Image Comparison",
|
144 |
+
description="Upload two document images, extract numeric arrays, and compare their similarity."
|
145 |
)
|
146 |
|
147 |
demo.launch()
|