pdf_gemini

Sleeping

Sebbe33 commited on Feb 19

Commit

7c8a4dc

verified ·

1 Parent(s): c45c762

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,35 +8,38 @@ from google.genai import types
 from pdf2image import convert_from_bytes
 DETECTION_PROMPT = """\
-Analyze this document image and identify ALL visible text regions including:
-- Paragraphs
-- Headers/footers
-- Tables
-- Captions
-- Labels
-- Sidebars
-- Any text fragments
-For EACH text region:
-1. Identify precise boundaries containing ALL text characters
-2. Exclude whitespace/padding around text
-3. Return coordinates as a Python list of lists in STRICT format:
-[[xmin, ymin, xmax, ymax]] with values normalized between 0-1 (relative to image dimensions)
-CRITICAL RULES:
-- Include even small text fragments
-- Split overlapping regions into separate boxes
-- Maintain original text reading order in list
-- Never omit text regions even if partially visible
-- Never add non-text elements
-- Coordinates must be precise to 3 decimal places
-Example response for 3 regions:
-[[0.042, 0.118, 0.247, 0.184],
- [0.301, 0.395, 0.503, 0.551],
- [0.612, 0.723, 0.891, 0.798]]
-ONLY RETURN THE PYTHON LIST, NO OTHER TEXT!
 """
 TEXT_EXTRACTION_PROMPT = "Extract the text in this image. Return only the exact text, nothing else."

 from pdf2image import convert_from_bytes
 DETECTION_PROMPT = """\
+Analyze this document image and identify text regions following these rules:
+1. GROUP RELATED CONTENT:
+- Full tables as SINGLE regions (including headers and all rows)
+- Paragraphs as SINGLE rectangular blocks (multiple lines as one box)
+- Keep text columns intact
+- Treat list items as single region if visually grouped
+2. TEXT REGION REQUIREMENTS:
+- Boundaries must tightly wrap text content
+- Include 2% padding around text clusters
+- Exclude isolated decorative elements
+- Merge adjacent text fragments with ≤1% spacing
+3. COORDINATE FORMAT:
+Python list of lists [[xmin, ymin, xmax, ymax]]
+- Normalized 0-1 with 3 decimal places
+- Ordered top-to-bottom, left-to-right
+- Table example: [[0.12, 0.35, 0.88, 0.65]] for full table
+4. SPECIAL CASES:
+- Table cells should NOT have individual boxes
+- Page headers/footers as separate regions
+- Text wrapped around images as distinct regions
+Example response for table + 2 paragraphs:
+[[0.07, 0.12, 0.93, 0.28],  # Header
+ [0.12, 0.35, 0.88, 0.65],  # Full table
+ [0.10, 0.70, 0.90, 0.85],  # First paragraph
+ [0.10, 0.88, 0.90, 0.95]]  # Second paragraph
+ONLY RETURN THE PYTHON LIST! No explanations.
 """
 TEXT_EXTRACTION_PROMPT = "Extract the text in this image. Return only the exact text, nothing else."