Sebbe33's picture
Update app.py
355b6ef verified
import os
import re
import io
import streamlit as st
from PIL import Image, ImageDraw
from google import genai
from google.genai import types
# Hilfsfunktionen
def parse_list_boxes(text):
"""Extrahiert Bounding Boxes aus dem Antworttext"""
pattern = r'\[([\d\.]+),\s*([\d\.]+),\s*([\d\.]+),\s*([\d\.]+)\]'
matches = re.findall(pattern, text)
return [[float(m) for m in match] for match in matches]
def draw_bounding_boxes(image, boxes):
"""Zeichnet Bounding Boxes auf das Bild"""
draw = ImageDraw.Draw(image)
width, height = image.size
for box in boxes:
# Sicherstellen, dass alle Werte zwischen 0-1 liegen
ymin = max(0.0, min(1.0, box[0]))
xmin = max(0.0, min(1.0, box[1]))
ymax = max(0.0, min(1.0, box[2]))
xmax = max(0.0, min(1.0, box[3]))
# Zeichne den Rahmen
draw.rectangle([
xmin * width,
ymin * height,
xmax * width,
ymax * height
], outline="#00FF00", width=7) # Neon green mit dicken Linien
return image
# Streamlit UI
st.title("Objekterkennung mit Gemini")
col1, col2 = st.columns(2)
with col1:
uploaded_file = st.file_uploader("Bild hochladen", type=["jpg", "png", "jpeg"])
object_name = st.text_input("Objekt zur Erkennung", placeholder="z.B. 'Auto', 'Person'")
if uploaded_file and object_name:
image = Image.open(uploaded_file)
width, height = image.size
st.image(image, caption="Hochgeladenes Bild", use_container_width=True)
if st.button("Analysieren"):
with st.spinner("Analysiere Bild..."):
try:
# Bildvorbereitung
image_bytes = io.BytesIO()
image.save(image_bytes, format=image.format)
image_part = types.Part.from_bytes(
data=image_bytes.getvalue(),
mime_type=f"image/{image.format.lower()}"
)
# API-Client
client = genai.Client(api_key=os.getenv("KEY"))
# Bildbeschreibung
desc_response = client.models.generate_content(
model="gemini-2.0-flash-exp",
contents=["Beschreibe dieses Bild detailliert.", image_part]
)
# Objekterkennung
detection_prompt = (
f"Gib exakt 4 Dezimalzahlen pro Box für alle {object_name} im Format "
"[ymin, xmin, ymax, xmax] als reine Python-Liste ohne weiteren Text. "
"Beispiel: [[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]]"
)
box_response = client.models.generate_content(
model="gemini-2.0-flash-exp",
contents=[detection_prompt, image_part]
)
# Verarbeitung
try:
boxes = parse_list_boxes(box_response.text)
st.write("**Parsed Boxes:**", boxes)
except Exception as e:
st.error(f"Parsing Error: {str(e)}")
boxes = []
annotated_image = image.copy()
if boxes:
annotated_image = draw_bounding_boxes(annotated_image, boxes)
result_text = f"{len(boxes)} {object_name} erkannt"
# Zoom auf erste Box
ymin, xmin, ymax, xmax = boxes[0]
zoom_area = (
max(0, int(xmin * width - 50)),
max(0, int(ymin * height - 50)),
min(width, int(xmax * width + 50)),
min(height, int(ymax * height + 50))
)
zoomed_image = annotated_image.crop(zoom_area)
else:
result_text = "Keine Objekte gefunden"
zoomed_image = None
# Ergebnisse anzeigen
with col2:
st.write("## Objekterkennung:")
st.write(result_text)
if boxes:
st.image(
[annotated_image, zoomed_image],
caption=["Gesamtbild", "Zoom auf Erkennung"],
width=400
)
else:
st.image(annotated_image, caption="Keine Objekte erkannt", width=400)
st.write("## Beschreibung:")
st.write(desc_response.text)
except Exception as e:
st.error(f"Fehler: {str(e)}")