Pavan147 commited on
Commit
1b7aff0
·
verified ·
1 Parent(s): d30603a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -4
app.py CHANGED
@@ -63,6 +63,41 @@
63
 
64
  # demo.launch()
65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  import gradio as gr
67
  from transformers import AutoProcessor, AutoModelForImageTextToText
68
  from PIL import Image
@@ -80,8 +115,17 @@ def smoldocling_readimage(image, prompt_text):
80
  outputs = model.generate(**inputs, max_new_tokens=1024)
81
  prompt_length = inputs.input_ids.shape[1]
82
  generated = outputs[:, prompt_length:]
83
- result = processor.batch_decode(generated, skip_special_tokens=False)[0]
84
- return result.replace("<end_of_utterance>", "").strip()
 
 
 
 
 
 
 
 
 
85
 
86
  # Gradio UI
87
  demo = gr.Interface(
@@ -90,9 +134,9 @@ demo = gr.Interface(
90
  gr.Image(type="pil", label="Upload Image"),
91
  gr.Textbox(lines=1, placeholder="Enter prompt (e.g. Convert to docling)", label="Prompt"),
92
  ],
93
- outputs="html",
94
  title="SmolDocling Web App",
95
  description="Upload a document image and convert it to structured docling format."
96
  )
97
 
98
- demo.launch()
 
63
 
64
  # demo.launch()
65
 
66
+ # import gradio as gr
67
+ # from transformers import AutoProcessor, AutoModelForImageTextToText
68
+ # from PIL import Image
69
+
70
+ # # Load model & processor once at startup
71
+ # processor = AutoProcessor.from_pretrained("ds4sd/SmolDocling-256M-preview")
72
+ # model = AutoModelForImageTextToText.from_pretrained("ds4sd/SmolDocling-256M-preview")
73
+
74
+ # def smoldocling_readimage(image, prompt_text):
75
+ # messages = [
76
+ # {"role": "user", "content": [{"type": "image"}, {"type": "text", "text": prompt_text}]}
77
+ # ]
78
+ # prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
79
+ # inputs = processor(text=prompt, images=[image], return_tensors="pt")
80
+ # outputs = model.generate(**inputs, max_new_tokens=1024)
81
+ # prompt_length = inputs.input_ids.shape[1]
82
+ # generated = outputs[:, prompt_length:]
83
+ # result = processor.batch_decode(generated, skip_special_tokens=False)[0]
84
+ # return result.replace("<end_of_utterance>", "").strip()
85
+
86
+ # # Gradio UI
87
+ # demo = gr.Interface(
88
+ # fn=smoldocling_readimage,
89
+ # inputs=[
90
+ # gr.Image(type="pil", label="Upload Image"),
91
+ # gr.Textbox(lines=1, placeholder="Enter prompt (e.g. Convert to docling)", label="Prompt"),
92
+ # ],
93
+ # outputs="html",
94
+ # title="SmolDocling Web App",
95
+ # description="Upload a document image and convert it to structured docling format."
96
+ # )
97
+
98
+ # demo.launch()
99
+
100
+ import re
101
  import gradio as gr
102
  from transformers import AutoProcessor, AutoModelForImageTextToText
103
  from PIL import Image
 
115
  outputs = model.generate(**inputs, max_new_tokens=1024)
116
  prompt_length = inputs.input_ids.shape[1]
117
  generated = outputs[:, prompt_length:]
118
+ raw_result = processor.batch_decode(generated, skip_special_tokens=False)[0]
119
+
120
+ # Remove all tags like <tag> and </tag>
121
+ text_without_tags = re.sub(r'<.*?>', '', raw_result)
122
+
123
+ # Extract all numbers (integers or decimals)
124
+ numbers = re.findall(r'\d+\.\d+|\d+', text_without_tags)
125
+
126
+ # Join numbers with commas
127
+ cleaned_result = ",".join(numbers)
128
+ return cleaned_result
129
 
130
  # Gradio UI
131
  demo = gr.Interface(
 
134
  gr.Image(type="pil", label="Upload Image"),
135
  gr.Textbox(lines=1, placeholder="Enter prompt (e.g. Convert to docling)", label="Prompt"),
136
  ],
137
+ outputs="text",
138
  title="SmolDocling Web App",
139
  description="Upload a document image and convert it to structured docling format."
140
  )
141
 
142
+ demo.launch()