Pavan147 commited on
Commit
4e8d812
·
verified ·
1 Parent(s): fcd0714

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -11
app.py CHANGED
@@ -62,16 +62,24 @@
62
  # )
63
 
64
  # demo.launch()
65
-
66
  import gradio as gr
67
  from transformers import AutoProcessor, AutoModelForImageTextToText
68
  from PIL import Image
69
- import json
70
 
71
  # Load model & processor once at startup
72
  processor = AutoProcessor.from_pretrained("ds4sd/SmolDocling-256M-preview")
73
  model = AutoModelForImageTextToText.from_pretrained("ds4sd/SmolDocling-256M-preview")
74
 
 
 
 
 
 
 
 
 
 
75
  def smoldocling_readimage(image, prompt_text):
76
  messages = [
77
  {"role": "user", "content": [{"type": "image"}, {"type": "text", "text": prompt_text}]}
@@ -84,12 +92,8 @@ def smoldocling_readimage(image, prompt_text):
84
  result = processor.batch_decode(generated, skip_special_tokens=False)[0]
85
  clean_result = result.replace("<end_of_utterance>", "").strip()
86
 
87
- # Try to parse as JSON
88
- try:
89
- json_result = json.loads(clean_result)
90
- return json_result
91
- except json.JSONDecodeError:
92
- return {"error": "Output is not valid JSON", "raw_output": clean_result}
93
 
94
  # Gradio UI
95
  demo = gr.Interface(
@@ -99,9 +103,8 @@ demo = gr.Interface(
99
  gr.Textbox(lines=1, placeholder="Enter prompt (e.g. Convert to docling)", label="Prompt"),
100
  ],
101
  outputs=gr.JSON(),
102
- title="SmolDocling Web App",
103
- description="Upload a document image and convert it to structured docling format."
104
  )
105
 
106
  demo.launch()
107
-
 
62
  # )
63
 
64
  # demo.launch()
 
65
  import gradio as gr
66
  from transformers import AutoProcessor, AutoModelForImageTextToText
67
  from PIL import Image
68
+ import re
69
 
70
  # Load model & processor once at startup
71
  processor = AutoProcessor.from_pretrained("ds4sd/SmolDocling-256M-preview")
72
  model = AutoModelForImageTextToText.from_pretrained("ds4sd/SmolDocling-256M-preview")
73
 
74
+ def extract_numbers_from_docling(docling_text):
75
+ # Remove tags except keep content between <fcel> and <nl>
76
+ # Use regex to find all numbers (integers or decimals)
77
+ numbers = re.findall(r"[-+]?\d*\.\d+|\d+", docling_text)
78
+ # Convert strings to floats or ints as appropriate
79
+ def convert_num(s):
80
+ return int(s) if s.isdigit() else float(s)
81
+ return [convert_num(num) for num in numbers]
82
+
83
  def smoldocling_readimage(image, prompt_text):
84
  messages = [
85
  {"role": "user", "content": [{"type": "image"}, {"type": "text", "text": prompt_text}]}
 
92
  result = processor.batch_decode(generated, skip_special_tokens=False)[0]
93
  clean_result = result.replace("<end_of_utterance>", "").strip()
94
 
95
+ numbers = extract_numbers_from_docling(clean_result)
96
+ return numbers
 
 
 
 
97
 
98
  # Gradio UI
99
  demo = gr.Interface(
 
103
  gr.Textbox(lines=1, placeholder="Enter prompt (e.g. Convert to docling)", label="Prompt"),
104
  ],
105
  outputs=gr.JSON(),
106
+ title="SmolDocling Web App - Extract Numbers",
107
+ description="Upload a document image and extract numeric values as a list."
108
  )
109
 
110
  demo.launch()