pdf_gemini

Sleeping

App Files Files Community

Sebbe33 commited on Feb 18

Commit

c45c762

verified ·

1 Parent(s): d2aded5

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -7

app.py CHANGED Viewed

@@ -7,12 +7,36 @@ from google import genai
 from google.genai import types
 from pdf2image import convert_from_bytes
-# Constants
 DETECTION_PROMPT = """\
-Identify ALL text regions in this document. Return bounding boxes as a Python list of lists
-in format [[xmin, ymin, xmax, ymax]] where coordinates are normalized between 0-1.
-Only return the list, nothing else. Example:
-[[0.05, 0.12, 0.25, 0.18], [0.30, 0.40, 0.50, 0.55]]
 """
 TEXT_EXTRACTION_PROMPT = "Extract the text in this image. Return only the exact text, nothing else."
@@ -102,7 +126,7 @@ if uploaded_file and st.button("Analyze"):
                     col1, col2 = st.columns(2)
                     with col1:
-                        st.image(image, caption="Original", use_column_width=True)
                     with col2:
                         # Get bounding boxes
@@ -126,7 +150,7 @@ if uploaded_file and st.button("Analyze"):
                         annotated = draw_bounding_boxes(image.copy(), boxes)
                         st.image(annotated,
                                caption=f"Detected {len(boxes)} text regions",
-                               use_column_width=True)
                         # Display extracted texts
                         if any(texts):

 from google.genai import types
 from pdf2image import convert_from_bytes
 DETECTION_PROMPT = """\
+Analyze this document image and identify ALL visible text regions including:
+- Paragraphs
+- Headers/footers
+- Tables
+- Captions
+- Labels
+- Sidebars
+- Any text fragments
+For EACH text region:
+1. Identify precise boundaries containing ALL text characters
+2. Exclude whitespace/padding around text
+3. Return coordinates as a Python list of lists in STRICT format:
+[[xmin, ymin, xmax, ymax]] with values normalized between 0-1 (relative to image dimensions)
+CRITICAL RULES:
+- Include even small text fragments
+- Split overlapping regions into separate boxes
+- Maintain original text reading order in list
+- Never omit text regions even if partially visible
+- Never add non-text elements
+- Coordinates must be precise to 3 decimal places
+Example response for 3 regions:
+[[0.042, 0.118, 0.247, 0.184],
+ [0.301, 0.395, 0.503, 0.551],
+ [0.612, 0.723, 0.891, 0.798]]
+ONLY RETURN THE PYTHON LIST, NO OTHER TEXT!
 """
 TEXT_EXTRACTION_PROMPT = "Extract the text in this image. Return only the exact text, nothing else."
                     col1, col2 = st.columns(2)
                     with col1:
+                        st.image(image, caption="Original", use_container_width=True)
                     with col2:
                         # Get bounding boxes
                         annotated = draw_bounding_boxes(image.copy(), boxes)
                         st.image(annotated,
                                caption=f"Detected {len(boxes)} text regions",
+                               use_container_width=True)
                         # Display extracted texts
                         if any(texts):