gemini_vision_objects

Running

App Files Files Community

Sebbe33 commited on Feb 12

Commit

6c79114

verified ·

1 Parent(s): 40a87e2

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -27

app.py CHANGED Viewed

@@ -1,49 +1,92 @@
 import os
-import streamlit as st
-from PIL import Image
 import io
-from google import genai
 from google.genai import types
-# API-Schlüssel laden
-#genai.configure(api_key=os.get("KEY"))
 st.title("Bildanalyse mit Gemini")
 col1, col2 = st.columns(2)
 with col1:
     uploaded_file = st.file_uploader("Bild hochladen", type=["jpg", "png", "jpeg"])
-    if uploaded_file is not None:
         image = Image.open(uploaded_file)
         st.image(image, caption="Hochgeladenes Bild", use_container_width=True)
         if st.button("Analysieren"):
             with st.spinner("Analysiere Bild..."):
                 try:
-                    # Bild in Bytes umwandeln
                     image_bytes = io.BytesIO()
                     image.save(image_bytes, format=image.format)
-                    image_bytes = image_bytes.getvalue()
-                    # Anfrage an Gemini senden
-                    client = genai.Client(api_key=os.getenv("KEY")) # Client innerhalb der Funktion erstellen
-                    response = client.models.generate_content(
-                        model="gemini-2.0-flash-exp",  # Oder "gemini-2.0-flash-exp", je nach Verfügbarkeit
-                        contents=["Beschreibe dieses Bild und identifiziere das Hauptobjekt.", types.Part.from_bytes(data=image_bytes, mime_type=f"image/{image.format.lower()}")
-                        ]
                     )
                     with col2:
-                      # Antwort anzeigen
-                        st.write("## Analyseergebnis:")
-                        st.write(response.text)
                 except Exception as e:
-                    st.error(f"Ein Fehler ist aufgetreten: {e}")

 import os
+import re
 import io
+import streamlit as st
+from PIL import Image, ImageDraw
+from google import genai
 from google.genai import types
+# Hilfsfunktionen
+def parse_list_boxes(text):
+    """Extrahiert Bounding Boxes aus dem Antworttext"""
+    pattern = r'\[([\d\.]+),\s*([\d\.]+),\s*([\d\.]+),\s*([\d\.]+)\]'
+    matches = re.findall(pattern, text)
+    return [[float(m) for m in match] for match in matches]
+def draw_bounding_boxes(image, boxes):
+    """Zeichnet Bounding Boxes auf das Bild"""
+    draw = ImageDraw.Draw(image)
+    width, height = image.size
+    for box in boxes:
+        ymin, xmin, ymax, xmax = box
+        draw.rectangle([
+            xmin * width,
+            ymin * height,
+            xmax * width,
+            ymax * height
+        ], outline="red", width=3)
+    return image
+# Streamlit UI
 st.title("Bildanalyse mit Gemini")
 col1, col2 = st.columns(2)
 with col1:
     uploaded_file = st.file_uploader("Bild hochladen", type=["jpg", "png", "jpeg"])
+    object_name = st.text_input("Objekt zur Erkennung", placeholder="z.B. 'Auto', 'Person'")
+    if uploaded_file and object_name:
         image = Image.open(uploaded_file)
         st.image(image, caption="Hochgeladenes Bild", use_container_width=True)
         if st.button("Analysieren"):
             with st.spinner("Analysiere Bild..."):
                 try:
+                    # Bildvorbereitung
                     image_bytes = io.BytesIO()
                     image.save(image_bytes, format=image.format)
+                    image_part = types.Part.from_bytes(
+                        data=image_bytes.getvalue(),
+                        mime_type=f"image/{image.format.lower()}"
                     )
+                    # API-Client
+                    client = genai.Client(api_key=os.getenv("KEY"))
+                    # Bildbeschreibung
+                    desc_response = client.models.generate_content(
+                        model="gemini-1.0-pro-vision",
+                        contents=["Beschreibe dieses Bild detailliert.", image_part]
+                    )
+                    # Objekterkennung
+                    detection_prompt = (
+                        f"Gib alle Bounding Boxes für {object_name} im Format "
+                        "[ymin, xmin, ymax, xmax] als Liste. Nur die Liste zurückgeben!"
+                    )
+                    box_response = client.models.generate_content(
+                        model="gemini-1.0-pro-vision",
+                        contents=[detection_prompt, image_part]
+                    )
+                    # Verarbeitung
+                    boxes = parse_list_boxes(box_response.text)
+                    annotated_image = image.copy()
+                    if boxes:
+                        annotated_image = draw_bounding_boxes(annotated_image, boxes)
+                        result_text = f"{len(boxes)} {object_name} erkannt"
+                    else:
+                        result_text = "Keine Objekte gefunden"
+                    # Ergebnisse anzeigen
                     with col2:
+                        st.write("## Beschreibung:")
+                        st.write(desc_response.text)
+                        st.write("## Objekterkennung:")
+                        st.write(result_text)
+                        st.image(annotated_image, caption="Erkannte Objekte", use_column_width=True)
                 except Exception as e:
+                    st.error(f"Fehler: {str(e)}")