Sebbe33 commited on
Commit
7c8a4dc
·
verified ·
1 Parent(s): c45c762

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -29
app.py CHANGED
@@ -8,35 +8,38 @@ from google.genai import types
8
  from pdf2image import convert_from_bytes
9
 
10
  DETECTION_PROMPT = """\
11
- Analyze this document image and identify ALL visible text regions including:
12
- - Paragraphs
13
- - Headers/footers
14
- - Tables
15
- - Captions
16
- - Labels
17
- - Sidebars
18
- - Any text fragments
19
-
20
- For EACH text region:
21
- 1. Identify precise boundaries containing ALL text characters
22
- 2. Exclude whitespace/padding around text
23
- 3. Return coordinates as a Python list of lists in STRICT format:
24
- [[xmin, ymin, xmax, ymax]] with values normalized between 0-1 (relative to image dimensions)
25
-
26
- CRITICAL RULES:
27
- - Include even small text fragments
28
- - Split overlapping regions into separate boxes
29
- - Maintain original text reading order in list
30
- - Never omit text regions even if partially visible
31
- - Never add non-text elements
32
- - Coordinates must be precise to 3 decimal places
33
-
34
- Example response for 3 regions:
35
- [[0.042, 0.118, 0.247, 0.184],
36
- [0.301, 0.395, 0.503, 0.551],
37
- [0.612, 0.723, 0.891, 0.798]]
38
-
39
- ONLY RETURN THE PYTHON LIST, NO OTHER TEXT!
 
 
 
40
  """
41
 
42
  TEXT_EXTRACTION_PROMPT = "Extract the text in this image. Return only the exact text, nothing else."
 
8
  from pdf2image import convert_from_bytes
9
 
10
  DETECTION_PROMPT = """\
11
+ Analyze this document image and identify text regions following these rules:
12
+
13
+ 1. GROUP RELATED CONTENT:
14
+ - Full tables as SINGLE regions (including headers and all rows)
15
+ - Paragraphs as SINGLE rectangular blocks (multiple lines as one box)
16
+ - Keep text columns intact
17
+ - Treat list items as single region if visually grouped
18
+
19
+ 2. TEXT REGION REQUIREMENTS:
20
+ - Boundaries must tightly wrap text content
21
+ - Include 2% padding around text clusters
22
+ - Exclude isolated decorative elements
23
+ - Merge adjacent text fragments with ≤1% spacing
24
+
25
+ 3. COORDINATE FORMAT:
26
+ Python list of lists [[xmin, ymin, xmax, ymax]]
27
+ - Normalized 0-1 with 3 decimal places
28
+ - Ordered top-to-bottom, left-to-right
29
+ - Table example: [[0.12, 0.35, 0.88, 0.65]] for full table
30
+
31
+ 4. SPECIAL CASES:
32
+ - Table cells should NOT have individual boxes
33
+ - Page headers/footers as separate regions
34
+ - Text wrapped around images as distinct regions
35
+
36
+ Example response for table + 2 paragraphs:
37
+ [[0.07, 0.12, 0.93, 0.28], # Header
38
+ [0.12, 0.35, 0.88, 0.65], # Full table
39
+ [0.10, 0.70, 0.90, 0.85], # First paragraph
40
+ [0.10, 0.88, 0.90, 0.95]] # Second paragraph
41
+
42
+ ONLY RETURN THE PYTHON LIST! No explanations.
43
  """
44
 
45
  TEXT_EXTRACTION_PROMPT = "Extract the text in this image. Return only the exact text, nothing else."