Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -62,16 +62,24 @@
|
|
62 |
# )
|
63 |
|
64 |
# demo.launch()
|
65 |
-
|
66 |
import gradio as gr
|
67 |
from transformers import AutoProcessor, AutoModelForImageTextToText
|
68 |
from PIL import Image
|
69 |
-
import
|
70 |
|
71 |
# Load model & processor once at startup
|
72 |
processor = AutoProcessor.from_pretrained("ds4sd/SmolDocling-256M-preview")
|
73 |
model = AutoModelForImageTextToText.from_pretrained("ds4sd/SmolDocling-256M-preview")
|
74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
def smoldocling_readimage(image, prompt_text):
|
76 |
messages = [
|
77 |
{"role": "user", "content": [{"type": "image"}, {"type": "text", "text": prompt_text}]}
|
@@ -84,12 +92,8 @@ def smoldocling_readimage(image, prompt_text):
|
|
84 |
result = processor.batch_decode(generated, skip_special_tokens=False)[0]
|
85 |
clean_result = result.replace("<end_of_utterance>", "").strip()
|
86 |
|
87 |
-
|
88 |
-
|
89 |
-
json_result = json.loads(clean_result)
|
90 |
-
return json_result
|
91 |
-
except json.JSONDecodeError:
|
92 |
-
return {"error": "Output is not valid JSON", "raw_output": clean_result}
|
93 |
|
94 |
# Gradio UI
|
95 |
demo = gr.Interface(
|
@@ -99,9 +103,8 @@ demo = gr.Interface(
|
|
99 |
gr.Textbox(lines=1, placeholder="Enter prompt (e.g. Convert to docling)", label="Prompt"),
|
100 |
],
|
101 |
outputs=gr.JSON(),
|
102 |
-
title="SmolDocling Web App",
|
103 |
-
description="Upload a document image and
|
104 |
)
|
105 |
|
106 |
demo.launch()
|
107 |
-
|
|
|
62 |
# )
|
63 |
|
64 |
# demo.launch()
|
|
|
65 |
import gradio as gr
|
66 |
from transformers import AutoProcessor, AutoModelForImageTextToText
|
67 |
from PIL import Image
|
68 |
+
import re
|
69 |
|
70 |
# Load model & processor once at startup
|
71 |
processor = AutoProcessor.from_pretrained("ds4sd/SmolDocling-256M-preview")
|
72 |
model = AutoModelForImageTextToText.from_pretrained("ds4sd/SmolDocling-256M-preview")
|
73 |
|
74 |
+
def extract_numbers_from_docling(docling_text):
|
75 |
+
# Remove tags except keep content between <fcel> and <nl>
|
76 |
+
# Use regex to find all numbers (integers or decimals)
|
77 |
+
numbers = re.findall(r"[-+]?\d*\.\d+|\d+", docling_text)
|
78 |
+
# Convert strings to floats or ints as appropriate
|
79 |
+
def convert_num(s):
|
80 |
+
return int(s) if s.isdigit() else float(s)
|
81 |
+
return [convert_num(num) for num in numbers]
|
82 |
+
|
83 |
def smoldocling_readimage(image, prompt_text):
|
84 |
messages = [
|
85 |
{"role": "user", "content": [{"type": "image"}, {"type": "text", "text": prompt_text}]}
|
|
|
92 |
result = processor.batch_decode(generated, skip_special_tokens=False)[0]
|
93 |
clean_result = result.replace("<end_of_utterance>", "").strip()
|
94 |
|
95 |
+
numbers = extract_numbers_from_docling(clean_result)
|
96 |
+
return numbers
|
|
|
|
|
|
|
|
|
97 |
|
98 |
# Gradio UI
|
99 |
demo = gr.Interface(
|
|
|
103 |
gr.Textbox(lines=1, placeholder="Enter prompt (e.g. Convert to docling)", label="Prompt"),
|
104 |
],
|
105 |
outputs=gr.JSON(),
|
106 |
+
title="SmolDocling Web App - Extract Numbers",
|
107 |
+
description="Upload a document image and extract numeric values as a list."
|
108 |
)
|
109 |
|
110 |
demo.launch()
|
|