Spaces:

Pavan147
/

Docling_Image

Sleeping

App Files Files Community

Pavan147 commited on 6 days ago

Commit

8dc569d

verified ·

1 Parent(s): aa63203

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -35

app.py CHANGED Viewed

@@ -98,7 +98,6 @@
 # demo.launch()
 import re
-import json
 import gradio as gr
 from transformers import AutoProcessor, AutoModelForImageTextToText
 from PIL import Image
@@ -107,28 +106,7 @@ from PIL import Image
 processor = AutoProcessor.from_pretrained("ds4sd/SmolDocling-256M-preview")
 model = AutoModelForImageTextToText.from_pretrained("ds4sd/SmolDocling-256M-preview")
-def parse_docling_to_json(docling_text):
-    # Remove unwanted tags like <otsl>, </otsl>, <loc_...>
-    cleaned = re.sub(r"</?otsl>|<loc_[^>]+>", "", docling_text)
-    # Split by line break <nl>
-    lines = cleaned.split("<nl>")
-    table = []
-    for line in lines:
-        if not line.strip():
-            continue
-        # Extract all <fcel> values
-        cells = re.findall(r"<fcel>([^<]+)", line)
-        # Convert to floats if possible
-        try:
-            row = [float(cell) for cell in cells]
-        except ValueError:
-            # If conversion fails, keep as string
-            row = cells
-        table.append(row)
-    return json.dumps(table, indent=2)
-def smoldocling_readimage(image, prompt_text):
     messages = [
         {"role": "user", "content": [{"type": "image"}, {"type": "text", "text": prompt_text}]}
     ]
@@ -138,21 +116,49 @@ def smoldocling_readimage(image, prompt_text):
     prompt_length = inputs.input_ids.shape[1]
     generated = outputs[:, prompt_length:]
     result = processor.batch_decode(generated, skip_special_tokens=False)[0]
-    # Parse raw docling output to JSON
-    json_output = parse_docling_to_json(result)
-    return f"<pre>{json_output}</pre>"
-# Gradio UI
 demo = gr.Interface(
-    fn=smoldocling_readimage,
     inputs=[
-        gr.Image(type="pil", label="Upload Image"),
-        gr.Textbox(lines=1, placeholder="Enter prompt (e.g. Convert to docling)", label="Prompt"),
     ],
-    outputs="html",
-    title="SmolDocling Web App",
-    description="Upload a document image and convert it to structured docling format."
 )
 demo.launch()

 # demo.launch()
 import re
 import gradio as gr
 from transformers import AutoProcessor, AutoModelForImageTextToText
 from PIL import Image
 processor = AutoProcessor.from_pretrained("ds4sd/SmolDocling-256M-preview")
 model = AutoModelForImageTextToText.from_pretrained("ds4sd/SmolDocling-256M-preview")
+def smoldocling_readimage(image, prompt_text="Convert to docling"):
     messages = [
         {"role": "user", "content": [{"type": "image"}, {"type": "text", "text": prompt_text}]}
     ]
     prompt_length = inputs.input_ids.shape[1]
     generated = outputs[:, prompt_length:]
     result = processor.batch_decode(generated, skip_special_tokens=False)[0]
+    return result.replace("<end_of_utterance>", "").strip()
+def extract_numbers(docling_text):
+    # Extract all floating numbers from the docling text using regex
+    numbers = re.findall(r"[-+]?\d*\.\d+|\d+", docling_text)
+    return list(map(float, numbers))
+def compare_outputs(img1, img2):
+    # Extract docling text from both images
+    output1 = smoldocling_readimage(img1)
+    output2 = smoldocling_readimage(img2)
+    # Extract numbers from both outputs
+    nums1 = extract_numbers(output1)
+    nums2 = extract_numbers(output2)
+    # Compare numbers — find matching count based on position
+    length = min(len(nums1), len(nums2))
+    matches = sum(1 for i in range(length) if abs(nums1[i] - nums2[i]) < 1e-3)
+    # Calculate similarity accuracy percentage
+    total = max(len(nums1), len(nums2))
+    accuracy = (matches / total) * 100 if total > 0 else 0
+    # Prepare result text
+    result_text = (
+        f"Output for Image 1:\n{output1}\n\n"
+        f"Output for Image 2:\n{output2}\n\n"
+        f"Similarity Accuracy: {accuracy:.2f}%\n"
+        f"Matching Values: {matches} out of {total}"
+    )
+    return result_text
+# Gradio UI: take 2 images, output similarity report
 demo = gr.Interface(
+    fn=compare_outputs,
     inputs=[
+        gr.Image(type="pil", label="Upload Image 1"),
+        gr.Image(type="pil", label="Upload Image 2"),
     ],
+    outputs="text",
+    title="SmolDocling Image Comparison",
+    description="Upload two document images. This app extracts data from both and compares similarity."
 )
 demo.launch()