Spaces:

Pavan147
/

Docling_Image

Sleeping

App Files Files Community

Pavan147 commited on 6 days ago

Commit

aa63203

verified ·

1 Parent(s): 1b7aff0

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -11

app.py CHANGED Viewed

@@ -98,6 +98,7 @@
 # demo.launch()
 import re
 import gradio as gr
 from transformers import AutoProcessor, AutoModelForImageTextToText
 from PIL import Image
@@ -106,6 +107,27 @@ from PIL import Image
 processor = AutoProcessor.from_pretrained("ds4sd/SmolDocling-256M-preview")
 model = AutoModelForImageTextToText.from_pretrained("ds4sd/SmolDocling-256M-preview")
 def smoldocling_readimage(image, prompt_text):
     messages = [
         {"role": "user", "content": [{"type": "image"}, {"type": "text", "text": prompt_text}]}
@@ -115,17 +137,11 @@ def smoldocling_readimage(image, prompt_text):
     outputs = model.generate(**inputs, max_new_tokens=1024)
     prompt_length = inputs.input_ids.shape[1]
     generated = outputs[:, prompt_length:]
-    raw_result = processor.batch_decode(generated, skip_special_tokens=False)[0]
-    # Remove all tags like <tag> and </tag>
-    text_without_tags = re.sub(r'<.*?>', '', raw_result)
-    # Extract all numbers (integers or decimals)
-    numbers = re.findall(r'\d+\.\d+|\d+', text_without_tags)
-    # Join numbers with commas
-    cleaned_result = ",".join(numbers)
-    return cleaned_result
 # Gradio UI
 demo = gr.Interface(
@@ -134,7 +150,7 @@ demo = gr.Interface(
         gr.Image(type="pil", label="Upload Image"),
         gr.Textbox(lines=1, placeholder="Enter prompt (e.g. Convert to docling)", label="Prompt"),
     ],
-    outputs="text",
     title="SmolDocling Web App",
     description="Upload a document image and convert it to structured docling format."
 )

 # demo.launch()
 import re
+import json
 import gradio as gr
 from transformers import AutoProcessor, AutoModelForImageTextToText
 from PIL import Image
 processor = AutoProcessor.from_pretrained("ds4sd/SmolDocling-256M-preview")
 model = AutoModelForImageTextToText.from_pretrained("ds4sd/SmolDocling-256M-preview")
+def parse_docling_to_json(docling_text):
+    # Remove unwanted tags like <otsl>, </otsl>, <loc_...>
+    cleaned = re.sub(r"</?otsl>|<loc_[^>]+>", "", docling_text)
+    # Split by line break <nl>
+    lines = cleaned.split("<nl>")
+    table = []
+    for line in lines:
+        if not line.strip():
+            continue
+        # Extract all <fcel> values
+        cells = re.findall(r"<fcel>([^<]+)", line)
+        # Convert to floats if possible
+        try:
+            row = [float(cell) for cell in cells]
+        except ValueError:
+            # If conversion fails, keep as string
+            row = cells
+        table.append(row)
+    return json.dumps(table, indent=2)
 def smoldocling_readimage(image, prompt_text):
     messages = [
         {"role": "user", "content": [{"type": "image"}, {"type": "text", "text": prompt_text}]}
     outputs = model.generate(**inputs, max_new_tokens=1024)
     prompt_length = inputs.input_ids.shape[1]
     generated = outputs[:, prompt_length:]
+    result = processor.batch_decode(generated, skip_special_tokens=False)[0]
+    # Parse raw docling output to JSON
+    json_output = parse_docling_to_json(result)
+    return f"<pre>{json_output}</pre>"
 # Gradio UI
 demo = gr.Interface(
         gr.Image(type="pil", label="Upload Image"),
         gr.Textbox(lines=1, placeholder="Enter prompt (e.g. Convert to docling)", label="Prompt"),
     ],
+    outputs="html",
     title="SmolDocling Web App",
     description="Upload a document image and convert it to structured docling format."
 )