Pavan147 commited on
Commit
aa63203
·
verified ·
1 Parent(s): 1b7aff0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -11
app.py CHANGED
@@ -98,6 +98,7 @@
98
  # demo.launch()
99
 
100
  import re
 
101
  import gradio as gr
102
  from transformers import AutoProcessor, AutoModelForImageTextToText
103
  from PIL import Image
@@ -106,6 +107,27 @@ from PIL import Image
106
  processor = AutoProcessor.from_pretrained("ds4sd/SmolDocling-256M-preview")
107
  model = AutoModelForImageTextToText.from_pretrained("ds4sd/SmolDocling-256M-preview")
108
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  def smoldocling_readimage(image, prompt_text):
110
  messages = [
111
  {"role": "user", "content": [{"type": "image"}, {"type": "text", "text": prompt_text}]}
@@ -115,17 +137,11 @@ def smoldocling_readimage(image, prompt_text):
115
  outputs = model.generate(**inputs, max_new_tokens=1024)
116
  prompt_length = inputs.input_ids.shape[1]
117
  generated = outputs[:, prompt_length:]
118
- raw_result = processor.batch_decode(generated, skip_special_tokens=False)[0]
119
-
120
- # Remove all tags like <tag> and </tag>
121
- text_without_tags = re.sub(r'<.*?>', '', raw_result)
122
-
123
- # Extract all numbers (integers or decimals)
124
- numbers = re.findall(r'\d+\.\d+|\d+', text_without_tags)
125
 
126
- # Join numbers with commas
127
- cleaned_result = ",".join(numbers)
128
- return cleaned_result
129
 
130
  # Gradio UI
131
  demo = gr.Interface(
@@ -134,7 +150,7 @@ demo = gr.Interface(
134
  gr.Image(type="pil", label="Upload Image"),
135
  gr.Textbox(lines=1, placeholder="Enter prompt (e.g. Convert to docling)", label="Prompt"),
136
  ],
137
- outputs="text",
138
  title="SmolDocling Web App",
139
  description="Upload a document image and convert it to structured docling format."
140
  )
 
98
  # demo.launch()
99
 
100
  import re
101
+ import json
102
  import gradio as gr
103
  from transformers import AutoProcessor, AutoModelForImageTextToText
104
  from PIL import Image
 
107
  processor = AutoProcessor.from_pretrained("ds4sd/SmolDocling-256M-preview")
108
  model = AutoModelForImageTextToText.from_pretrained("ds4sd/SmolDocling-256M-preview")
109
 
110
+ def parse_docling_to_json(docling_text):
111
+ # Remove unwanted tags like <otsl>, </otsl>, <loc_...>
112
+ cleaned = re.sub(r"</?otsl>|<loc_[^>]+>", "", docling_text)
113
+
114
+ # Split by line break <nl>
115
+ lines = cleaned.split("<nl>")
116
+ table = []
117
+ for line in lines:
118
+ if not line.strip():
119
+ continue
120
+ # Extract all <fcel> values
121
+ cells = re.findall(r"<fcel>([^<]+)", line)
122
+ # Convert to floats if possible
123
+ try:
124
+ row = [float(cell) for cell in cells]
125
+ except ValueError:
126
+ # If conversion fails, keep as string
127
+ row = cells
128
+ table.append(row)
129
+ return json.dumps(table, indent=2)
130
+
131
  def smoldocling_readimage(image, prompt_text):
132
  messages = [
133
  {"role": "user", "content": [{"type": "image"}, {"type": "text", "text": prompt_text}]}
 
137
  outputs = model.generate(**inputs, max_new_tokens=1024)
138
  prompt_length = inputs.input_ids.shape[1]
139
  generated = outputs[:, prompt_length:]
140
+ result = processor.batch_decode(generated, skip_special_tokens=False)[0]
 
 
 
 
 
 
141
 
142
+ # Parse raw docling output to JSON
143
+ json_output = parse_docling_to_json(result)
144
+ return f"<pre>{json_output}</pre>"
145
 
146
  # Gradio UI
147
  demo = gr.Interface(
 
150
  gr.Image(type="pil", label="Upload Image"),
151
  gr.Textbox(lines=1, placeholder="Enter prompt (e.g. Convert to docling)", label="Prompt"),
152
  ],
153
+ outputs="html",
154
  title="SmolDocling Web App",
155
  description="Upload a document image and convert it to structured docling format."
156
  )