Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -7,12 +7,36 @@ from google import genai
|
|
7 |
from google.genai import types
|
8 |
from pdf2image import convert_from_bytes
|
9 |
|
10 |
-
# Constants
|
11 |
DETECTION_PROMPT = """\
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
"""
|
17 |
|
18 |
TEXT_EXTRACTION_PROMPT = "Extract the text in this image. Return only the exact text, nothing else."
|
@@ -102,7 +126,7 @@ if uploaded_file and st.button("Analyze"):
|
|
102 |
col1, col2 = st.columns(2)
|
103 |
|
104 |
with col1:
|
105 |
-
st.image(image, caption="Original",
|
106 |
|
107 |
with col2:
|
108 |
# Get bounding boxes
|
@@ -126,7 +150,7 @@ if uploaded_file and st.button("Analyze"):
|
|
126 |
annotated = draw_bounding_boxes(image.copy(), boxes)
|
127 |
st.image(annotated,
|
128 |
caption=f"Detected {len(boxes)} text regions",
|
129 |
-
|
130 |
|
131 |
# Display extracted texts
|
132 |
if any(texts):
|
|
|
7 |
from google.genai import types
|
8 |
from pdf2image import convert_from_bytes
|
9 |
|
|
|
10 |
DETECTION_PROMPT = """\
|
11 |
+
Analyze this document image and identify ALL visible text regions including:
|
12 |
+
- Paragraphs
|
13 |
+
- Headers/footers
|
14 |
+
- Tables
|
15 |
+
- Captions
|
16 |
+
- Labels
|
17 |
+
- Sidebars
|
18 |
+
- Any text fragments
|
19 |
+
|
20 |
+
For EACH text region:
|
21 |
+
1. Identify precise boundaries containing ALL text characters
|
22 |
+
2. Exclude whitespace/padding around text
|
23 |
+
3. Return coordinates as a Python list of lists in STRICT format:
|
24 |
+
[[xmin, ymin, xmax, ymax]] with values normalized between 0-1 (relative to image dimensions)
|
25 |
+
|
26 |
+
CRITICAL RULES:
|
27 |
+
- Include even small text fragments
|
28 |
+
- Split overlapping regions into separate boxes
|
29 |
+
- Maintain original text reading order in list
|
30 |
+
- Never omit text regions even if partially visible
|
31 |
+
- Never add non-text elements
|
32 |
+
- Coordinates must be precise to 3 decimal places
|
33 |
+
|
34 |
+
Example response for 3 regions:
|
35 |
+
[[0.042, 0.118, 0.247, 0.184],
|
36 |
+
[0.301, 0.395, 0.503, 0.551],
|
37 |
+
[0.612, 0.723, 0.891, 0.798]]
|
38 |
+
|
39 |
+
ONLY RETURN THE PYTHON LIST, NO OTHER TEXT!
|
40 |
"""
|
41 |
|
42 |
TEXT_EXTRACTION_PROMPT = "Extract the text in this image. Return only the exact text, nothing else."
|
|
|
126 |
col1, col2 = st.columns(2)
|
127 |
|
128 |
with col1:
|
129 |
+
st.image(image, caption="Original", use_container_width=True)
|
130 |
|
131 |
with col2:
|
132 |
# Get bounding boxes
|
|
|
150 |
annotated = draw_bounding_boxes(image.copy(), boxes)
|
151 |
st.image(annotated,
|
152 |
caption=f"Detected {len(boxes)} text regions",
|
153 |
+
use_container_width=True)
|
154 |
|
155 |
# Display extracted texts
|
156 |
if any(texts):
|