Sebbe33 commited on
Commit
7e4f227
·
verified ·
1 Parent(s): 355b6ef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -78
app.py CHANGED
@@ -5,123 +5,112 @@ import streamlit as st
5
  from PIL import Image, ImageDraw
6
  from google import genai
7
  from google.genai import types
 
8
 
9
- # Hilfsfunktionen
10
  def parse_list_boxes(text):
11
- """Extrahiert Bounding Boxes aus dem Antworttext"""
12
  pattern = r'\[([\d\.]+),\s*([\d\.]+),\s*([\d\.]+),\s*([\d\.]+)\]'
13
  matches = re.findall(pattern, text)
14
  return [[float(m) for m in match] for match in matches]
15
 
16
  def draw_bounding_boxes(image, boxes):
17
- """Zeichnet Bounding Boxes auf das Bild"""
18
  draw = ImageDraw.Draw(image)
19
  width, height = image.size
20
 
21
  for box in boxes:
22
- # Sicherstellen, dass alle Werte zwischen 0-1 liegen
23
  ymin = max(0.0, min(1.0, box[0]))
24
  xmin = max(0.0, min(1.0, box[1]))
25
  ymax = max(0.0, min(1.0, box[2]))
26
  xmax = max(0.0, min(1.0, box[3]))
27
 
28
- # Zeichne den Rahmen
29
  draw.rectangle([
30
  xmin * width,
31
  ymin * height,
32
  xmax * width,
33
  ymax * height
34
- ], outline="#00FF00", width=7) # Neon green mit dicken Linien
35
  return image
36
 
37
  # Streamlit UI
38
- st.title("Objekterkennung mit Gemini")
39
  col1, col2 = st.columns(2)
40
 
41
  with col1:
42
- uploaded_file = st.file_uploader("Bild hochladen", type=["jpg", "png", "jpeg"])
43
- object_name = st.text_input("Objekt zur Erkennung", placeholder="z.B. 'Auto', 'Person'")
44
-
45
- if uploaded_file and object_name:
46
- image = Image.open(uploaded_file)
47
- width, height = image.size
48
- st.image(image, caption="Hochgeladenes Bild", use_container_width=True)
49
 
 
50
  if st.button("Analysieren"):
51
- with st.spinner("Analysiere Bild..."):
52
  try:
53
- # Bildvorbereitung
54
- image_bytes = io.BytesIO()
55
- image.save(image_bytes, format=image.format)
56
- image_part = types.Part.from_bytes(
57
- data=image_bytes.getvalue(),
58
- mime_type=f"image/{image.format.lower()}"
59
- )
60
 
61
- # API-Client
62
  client = genai.Client(api_key=os.getenv("KEY"))
63
 
64
- # Bildbeschreibung
65
- desc_response = client.models.generate_content(
66
- model="gemini-2.0-flash-exp",
67
- contents=["Beschreibe dieses Bild detailliert.", image_part]
68
- )
69
-
70
- # Objekterkennung
71
- detection_prompt = (
72
- f"Gib exakt 4 Dezimalzahlen pro Box für alle {object_name} im Format "
73
- "[ymin, xmin, ymax, xmax] als reine Python-Liste ohne weiteren Text. "
74
- "Beispiel: [[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]]"
75
- )
76
- box_response = client.models.generate_content(
77
- model="gemini-2.0-flash-exp",
78
- contents=[detection_prompt, image_part]
79
- )
80
-
81
- # Verarbeitung
82
- try:
83
- boxes = parse_list_boxes(box_response.text)
84
- st.write("**Parsed Boxes:**", boxes)
85
- except Exception as e:
86
- st.error(f"Parsing Error: {str(e)}")
87
- boxes = []
88
 
89
- annotated_image = image.copy()
90
-
91
- if boxes:
92
- annotated_image = draw_bounding_boxes(annotated_image, boxes)
93
- result_text = f"{len(boxes)} {object_name} erkannt"
 
 
 
 
 
 
94
 
95
- # Zoom auf erste Box
96
- ymin, xmin, ymax, xmax = boxes[0]
97
- zoom_area = (
98
- max(0, int(xmin * width - 50)),
99
- max(0, int(ymin * height - 50)),
100
- min(width, int(xmax * width + 50)),
101
- min(height, int(ymax * height + 50))
102
  )
103
- zoomed_image = annotated_image.crop(zoom_area)
104
 
105
- else:
106
- result_text = "Keine Objekte gefunden"
107
- zoomed_image = None
 
 
 
108
 
109
- # Ergebnisse anzeigen
110
- with col2:
111
-
112
- st.write("## Objekterkennung:")
113
- st.write(result_text)
114
-
115
  if boxes:
116
- st.image(
117
- [annotated_image, zoomed_image],
118
- caption=["Gesamtbild", "Zoom auf Erkennung"],
119
- width=400
120
- )
121
- else:
122
- st.image(annotated_image, caption="Keine Objekte erkannt", width=400)
 
 
 
 
 
 
123
 
124
- st.write("## Beschreibung:")
125
- st.write(desc_response.text)
 
 
 
 
 
126
  except Exception as e:
127
  st.error(f"Fehler: {str(e)}")
 
5
  from PIL import Image, ImageDraw
6
  from google import genai
7
  from google.genai import types
8
+ from pdf2image import convert_from_bytes
9
 
10
+ # Helper functions
11
  def parse_list_boxes(text):
12
+ """Extracts bounding boxes from response text"""
13
  pattern = r'\[([\d\.]+),\s*([\d\.]+),\s*([\d\.]+),\s*([\d\.]+)\]'
14
  matches = re.findall(pattern, text)
15
  return [[float(m) for m in match] for match in matches]
16
 
17
  def draw_bounding_boxes(image, boxes):
18
+ """Draws bounding boxes on the image"""
19
  draw = ImageDraw.Draw(image)
20
  width, height = image.size
21
 
22
  for box in boxes:
 
23
  ymin = max(0.0, min(1.0, box[0]))
24
  xmin = max(0.0, min(1.0, box[1]))
25
  ymax = max(0.0, min(1.0, box[2]))
26
  xmax = max(0.0, min(1.0, box[3]))
27
 
 
28
  draw.rectangle([
29
  xmin * width,
30
  ymin * height,
31
  xmax * width,
32
  ymax * height
33
+ ], outline="#00FF00", width=3)
34
  return image
35
 
36
  # Streamlit UI
37
+ st.title("PDF Themenerkennung mit Gemini")
38
  col1, col2 = st.columns(2)
39
 
40
  with col1:
41
+ uploaded_file = st.file_uploader("PDF hochladen", type=["pdf"])
42
+ topic_name = st.text_input("Thema zur Erkennung", placeholder="z.B. 'Überschrift', 'Tabelle', 'Absatz'")
 
 
 
 
 
43
 
44
+ if uploaded_file and topic_name:
45
  if st.button("Analysieren"):
46
+ with st.spinner("Analysiere PDF..."):
47
  try:
48
+ # Convert PDF to images
49
+ pdf_bytes = uploaded_file.read()
50
+ images = convert_from_bytes(pdf_bytes)
51
+ results = []
 
 
 
52
 
53
+ # Initialize client
54
  client = genai.Client(api_key=os.getenv("KEY"))
55
 
56
+ for page_num, image in enumerate(images):
57
+ # Prepare image
58
+ img_byte_arr = io.BytesIO()
59
+ image.save(img_byte_arr, format='PNG')
60
+
61
+ image_part = types.Part.from_bytes(
62
+ data=img_byte_arr.getvalue(),
63
+ mime_type="image/png"
64
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
+ # Get topic boxes
67
+ detection_prompt = (
68
+ f"Identifiziere alle {topic_name} Bereiche in diesem Dokument. "
69
+ "Gib Bounding Boxes im Format [ymin, xmin, ymax, xmax] "
70
+ "als reine Python-Liste ohne weiteren Text. "
71
+ "Beispiel: [[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]]"
72
+ )
73
+ box_response = client.models.generate_content(
74
+ model="gemini-2.0-flash-exp",
75
+ contents=[detection_prompt, image_part]
76
+ )
77
 
78
+ # Get description
79
+ desc_response = client.models.generate_content(
80
+ model="gemini-2.0-flash-exp",
81
+ contents=["Beschreibe diesen Dokumentenausschnitt detailliert.", image_part]
 
 
 
82
  )
 
83
 
84
+ # Process boxes
85
+ try:
86
+ boxes = parse_list_boxes(box_response.text)
87
+ except Exception as e:
88
+ st.error(f"Fehler bei Seite {page_num+1}: {str(e)}")
89
+ boxes = []
90
 
91
+ # Draw boxes
92
+ annotated_image = image.copy()
 
 
 
 
93
  if boxes:
94
+ annotated_image = draw_bounding_boxes(annotated_image, boxes)
95
+
96
+ results.append({
97
+ "page": page_num + 1,
98
+ "image": annotated_image,
99
+ "description": desc_response.text,
100
+ "boxes": len(boxes)
101
+ })
102
+
103
+ # Display results
104
+ with col2:
105
+ st.write(f"## Ergebnisse ({len(results)} Seiten)")
106
+ tabs = st.tabs([f"Seite {res['page']}" for res in results])
107
 
108
+ for tab, res in zip(tabs, results):
109
+ with tab:
110
+ st.image(res["image"],
111
+ caption=f"Seite {res['page']} - {res['boxes']} {topic_name} erkannt",
112
+ use_container_width=True)
113
+ st.write("**Beschreibung:**", res["description"])
114
+
115
  except Exception as e:
116
  st.error(f"Fehler: {str(e)}")