Sebbe33 commited on
Commit
6018547
·
verified ·
1 Parent(s): 316d102

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -24
app.py CHANGED
@@ -8,12 +8,10 @@ from google.genai import types
8
  from pdf2image import convert_from_bytes
9
 
10
  # Constants
11
- GET_NODE_BOUNDING_BOXES_PROMPT = """\
12
- Please provide me strict bounding boxes that encompasses the following text in the attached image? I'm trying to draw a rectangle around the text.
13
- - Use the top-left coordinate system
14
- - Values should be percentages of the image width and height (0 to 1)
15
-
16
- {nodes}
17
  """
18
 
19
  # Helper functions
@@ -39,20 +37,19 @@ def draw_bounding_boxes(image, boxes):
39
  ymin * height,
40
  xmax * width,
41
  ymax * height
42
- ], outline="#00FF00", width=3)
43
  return image
44
 
45
  # Streamlit UI
46
- st.title("PDF Themenerkennung mit Gemini")
47
  col1, col2 = st.columns(2)
48
 
49
  with col1:
50
- uploaded_file = st.file_uploader("PDF hochladen", type=["pdf"])
51
- topic_name = st.text_input("Thema zur Erkennung", placeholder="z.B. 'Überschrift', 'Tabelle', 'Absatz'")
52
 
53
- if uploaded_file and topic_name:
54
- if st.button("Analysieren"):
55
- with st.spinner("Analysiere PDF..."):
56
  try:
57
  # Convert PDF to images
58
  pdf_bytes = uploaded_file.read()
@@ -72,27 +69,26 @@ with col1:
72
  mime_type="image/png"
73
  )
74
 
75
- # Get topic boxes using new prompt
76
- detection_prompt = GET_NODE_BOUNDING_BOXES_PROMPT.format(nodes=topic_name)
77
  box_response = client.models.generate_content(
78
  model="gemini-2.0-flash-exp",
79
- contents=[detection_prompt, image_part]
80
  )
81
 
82
  # Get description
83
  desc_response = client.models.generate_content(
84
  model="gemini-2.0-flash-exp",
85
- contents=["Beschreibe diesen Dokumentenausschnitt detailliert.", image_part]
86
  )
87
 
88
  # Process boxes
89
  try:
90
  boxes = parse_list_boxes(box_response.text)
91
  except Exception as e:
92
- st.error(f"Fehler bei Seite {page_num+1}: {str(e)}")
93
  boxes = []
94
 
95
- # Draw boxes with corrected coordinates
96
  annotated_image = image.copy()
97
  if boxes:
98
  annotated_image = draw_bounding_boxes(annotated_image, boxes)
@@ -106,15 +102,15 @@ with col1:
106
 
107
  # Display results
108
  with col2:
109
- st.write(f"## Ergebnisse ({len(results)} Seiten)")
110
- tabs = st.tabs([f"Seite {res['page']}" for res in results])
111
 
112
  for tab, res in zip(tabs, results):
113
  with tab:
114
  st.image(res["image"],
115
- caption=f"Seite {res['page']} - {res['boxes']} {topic_name} erkannt",
116
  use_container_width=True)
117
- st.write("**Beschreibung:**", res["description"])
118
 
119
  except Exception as e:
120
- st.error(f"Fehler: {str(e)}")
 
8
  from pdf2image import convert_from_bytes
9
 
10
  # Constants
11
+ DETECTION_PROMPT = """\
12
+ Identify all text regions in this document. Provide bounding boxes in the format [xmin, ymin, xmax, ymax]
13
+ as percentages of the image dimensions. Return only a Python-style list of lists without any additional text.
14
+ Example: [[0.1, 0.2, 0.4, 0.5], [0.6, 0.7, 0.8, 0.9]]
 
 
15
  """
16
 
17
  # Helper functions
 
37
  ymin * height,
38
  xmax * width,
39
  ymax * height
40
+ ], outline="#00FF00", width=2)
41
  return image
42
 
43
  # Streamlit UI
44
+ st.title("PDF Text Region Detection with Gemini")
45
  col1, col2 = st.columns(2)
46
 
47
  with col1:
48
+ uploaded_file = st.file_uploader("Upload PDF", type=["pdf"])
 
49
 
50
+ if uploaded_file:
51
+ if st.button("Analyze Document"):
52
+ with st.spinner("Analyzing PDF..."):
53
  try:
54
  # Convert PDF to images
55
  pdf_bytes = uploaded_file.read()
 
69
  mime_type="image/png"
70
  )
71
 
72
+ # Get all text boxes
 
73
  box_response = client.models.generate_content(
74
  model="gemini-2.0-flash-exp",
75
+ contents=[DETECTION_PROMPT, image_part]
76
  )
77
 
78
  # Get description
79
  desc_response = client.models.generate_content(
80
  model="gemini-2.0-flash-exp",
81
+ contents=["Describe this document section in detail.", image_part]
82
  )
83
 
84
  # Process boxes
85
  try:
86
  boxes = parse_list_boxes(box_response.text)
87
  except Exception as e:
88
+ st.error(f"Error on page {page_num+1}: {str(e)}")
89
  boxes = []
90
 
91
+ # Draw boxes
92
  annotated_image = image.copy()
93
  if boxes:
94
  annotated_image = draw_bounding_boxes(annotated_image, boxes)
 
102
 
103
  # Display results
104
  with col2:
105
+ st.write(f"## Results ({len(results)} pages)")
106
+ tabs = st.tabs([f"Page {res['page']}" for res in results])
107
 
108
  for tab, res in zip(tabs, results):
109
  with tab:
110
  st.image(res["image"],
111
+ caption=f"Page {res['page']} - {res['boxes']} text regions detected",
112
  use_container_width=True)
113
+ st.write("**Description:**", res["description"])
114
 
115
  except Exception as e:
116
+ st.error(f"Error: {str(e)}")