Pavan147 commited on
Commit
62320a7
·
verified ·
1 Parent(s): 8e81891

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -10
app.py CHANGED
@@ -62,7 +62,9 @@
62
  # )
63
 
64
  # demo.launch()
 
65
  import re
 
66
  import gradio as gr
67
  from transformers import AutoProcessor, AutoModelForImageTextToText
68
  from PIL import Image
@@ -82,12 +84,11 @@ def extract_values(docling_text):
82
  continue
83
  # Extract numbers inside <fcel> tags
84
  values = re.findall(r"<fcel>(.*?)<fcel>", row)
85
- # Convert to float list
86
  float_values = [float(v) for v in values]
87
  result.append(float_values)
88
  return result
89
 
90
- def smoldocling_readimage(image, prompt_text):
91
  messages = [
92
  {"role": "user", "content": [{"type": "image"}, {"type": "text", "text": prompt_text}]}
93
  ]
@@ -97,20 +98,50 @@ def smoldocling_readimage(image, prompt_text):
97
  prompt_length = inputs.input_ids.shape[1]
98
  generated = outputs[:, prompt_length:]
99
  raw_result = processor.batch_decode(generated, skip_special_tokens=False)[0]
100
- # Clean and extract numeric values
101
- values_array = extract_values(raw_result)
102
- return str(values_array)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
- # Gradio UI
105
  demo = gr.Interface(
106
- fn=smoldocling_readimage,
107
  inputs=[
108
- gr.Image(type="pil", label="Upload Image"),
 
109
  gr.Textbox(lines=1, placeholder="Enter prompt (e.g. Convert to docling)", label="Prompt"),
110
  ],
111
  outputs="text",
112
- title="SmolDocling Web App",
113
- description="Upload a document image and convert it to structured docling format."
114
  )
115
 
116
  demo.launch()
 
62
  # )
63
 
64
  # demo.launch()
65
+
66
  import re
67
+ import numpy as np
68
  import gradio as gr
69
  from transformers import AutoProcessor, AutoModelForImageTextToText
70
  from PIL import Image
 
84
  continue
85
  # Extract numbers inside <fcel> tags
86
  values = re.findall(r"<fcel>(.*?)<fcel>", row)
 
87
  float_values = [float(v) for v in values]
88
  result.append(float_values)
89
  return result
90
 
91
+ def get_array_from_image(image, prompt_text):
92
  messages = [
93
  {"role": "user", "content": [{"type": "image"}, {"type": "text", "text": prompt_text}]}
94
  ]
 
98
  prompt_length = inputs.input_ids.shape[1]
99
  generated = outputs[:, prompt_length:]
100
  raw_result = processor.batch_decode(generated, skip_special_tokens=False)[0]
101
+ return extract_values(raw_result)
102
+
103
+ def compare_arrays(arr1, arr2):
104
+ # Flatten both arrays (assumes 2D list)
105
+ flat1 = np.array(arr1).flatten()
106
+ flat2 = np.array(arr2).flatten()
107
+
108
+ # If shapes differ, compare only overlapping parts
109
+ min_len = min(len(flat1), len(flat2))
110
+ if min_len == 0:
111
+ return 0.0 # no data to compare
112
+
113
+ flat1 = flat1[:min_len]
114
+ flat2 = flat2[:min_len]
115
+
116
+ # Calculate similarity as 1 - normalized mean absolute error
117
+ mae = np.mean(np.abs(flat1 - flat2))
118
+ max_val = max(np.max(flat1), np.max(flat2), 1e-6) # avoid zero division
119
+ similarity = 1 - (mae / max_val)
120
+ similarity_percent = max(0, similarity) * 100 # clamp to >=0
121
+
122
+ return round(similarity_percent, 2)
123
+
124
+ def process_two_images(image1, image2, prompt_text):
125
+ arr1 = get_array_from_image(image1, prompt_text)
126
+ arr2 = get_array_from_image(image2, prompt_text)
127
+ similarity = compare_arrays(arr1, arr2)
128
+
129
+ return (
130
+ f"Extracted values from Image 1:\n{arr1}\n\n"
131
+ f"Extracted values from Image 2:\n{arr2}\n\n"
132
+ f"Similarity Accuracy: {similarity} %"
133
+ )
134
 
 
135
  demo = gr.Interface(
136
+ fn=process_two_images,
137
  inputs=[
138
+ gr.Image(type="pil", label="Upload Image 1"),
139
+ gr.Image(type="pil", label="Upload Image 2"),
140
  gr.Textbox(lines=1, placeholder="Enter prompt (e.g. Convert to docling)", label="Prompt"),
141
  ],
142
  outputs="text",
143
+ title="SmolDocling Image Comparison",
144
+ description="Upload two document images, extract numeric arrays, and compare their similarity."
145
  )
146
 
147
  demo.launch()