Spaces:
Running
Running
File size: 5,044 Bytes
6459986 6c79114 d1dce8a 6c79114 1d8d466 5f554b3 6c79114 5f554b3 cdb1e78 b908919 cdb1e78 b908919 6c79114 d1dce8a 6c79114 355b6ef 02d80b6 80e2b7f 6c79114 80e2b7f b908919 80e2b7f 6c79114 80e2b7f 6c79114 80e2b7f 6c79114 80e2b7f 6c79114 d9ec95b 6c79114 b908919 6c79114 718c9df 6c79114 b908919 6c79114 b908919 6c79114 b908919 6c79114 b908919 6c79114 40a87e2 ea8272c b908919 ea8272c b908919 d57a6ad 80e2b7f 6c79114 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
import os
import re
import io
import streamlit as st
from PIL import Image, ImageDraw
from google import genai
from google.genai import types
# Hilfsfunktionen
def parse_list_boxes(text):
"""Extrahiert Bounding Boxes aus dem Antworttext"""
pattern = r'\[([\d\.]+),\s*([\d\.]+),\s*([\d\.]+),\s*([\d\.]+)\]'
matches = re.findall(pattern, text)
return [[float(m) for m in match] for match in matches]
def draw_bounding_boxes(image, boxes):
"""Zeichnet Bounding Boxes auf das Bild"""
draw = ImageDraw.Draw(image)
width, height = image.size
for box in boxes:
# Sicherstellen, dass alle Werte zwischen 0-1 liegen
ymin = max(0.0, min(1.0, box[0]))
xmin = max(0.0, min(1.0, box[1]))
ymax = max(0.0, min(1.0, box[2]))
xmax = max(0.0, min(1.0, box[3]))
# Zeichne den Rahmen
draw.rectangle([
xmin * width,
ymin * height,
xmax * width,
ymax * height
], outline="#00FF00", width=7) # Neon green mit dicken Linien
return image
# Streamlit UI
st.title("Objekterkennung mit Gemini")
col1, col2 = st.columns(2)
with col1:
uploaded_file = st.file_uploader("Bild hochladen", type=["jpg", "png", "jpeg"])
object_name = st.text_input("Objekt zur Erkennung", placeholder="z.B. 'Auto', 'Person'")
if uploaded_file and object_name:
image = Image.open(uploaded_file)
width, height = image.size
st.image(image, caption="Hochgeladenes Bild", use_container_width=True)
if st.button("Analysieren"):
with st.spinner("Analysiere Bild..."):
try:
# Bildvorbereitung
image_bytes = io.BytesIO()
image.save(image_bytes, format=image.format)
image_part = types.Part.from_bytes(
data=image_bytes.getvalue(),
mime_type=f"image/{image.format.lower()}"
)
# API-Client
client = genai.Client(api_key=os.getenv("KEY"))
# Bildbeschreibung
desc_response = client.models.generate_content(
model="gemini-2.0-flash-exp",
contents=["Beschreibe dieses Bild detailliert.", image_part]
)
# Objekterkennung
detection_prompt = (
f"Gib exakt 4 Dezimalzahlen pro Box für alle {object_name} im Format "
"[ymin, xmin, ymax, xmax] als reine Python-Liste ohne weiteren Text. "
"Beispiel: [[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]]"
)
box_response = client.models.generate_content(
model="gemini-2.0-flash-exp",
contents=[detection_prompt, image_part]
)
# Verarbeitung
try:
boxes = parse_list_boxes(box_response.text)
st.write("**Parsed Boxes:**", boxes)
except Exception as e:
st.error(f"Parsing Error: {str(e)}")
boxes = []
annotated_image = image.copy()
if boxes:
annotated_image = draw_bounding_boxes(annotated_image, boxes)
result_text = f"{len(boxes)} {object_name} erkannt"
# Zoom auf erste Box
ymin, xmin, ymax, xmax = boxes[0]
zoom_area = (
max(0, int(xmin * width - 50)),
max(0, int(ymin * height - 50)),
min(width, int(xmax * width + 50)),
min(height, int(ymax * height + 50))
)
zoomed_image = annotated_image.crop(zoom_area)
else:
result_text = "Keine Objekte gefunden"
zoomed_image = None
# Ergebnisse anzeigen
with col2:
st.write("## Objekterkennung:")
st.write(result_text)
if boxes:
st.image(
[annotated_image, zoomed_image],
caption=["Gesamtbild", "Zoom auf Erkennung"],
width=400
)
else:
st.image(annotated_image, caption="Keine Objekte erkannt", width=400)
st.write("## Beschreibung:")
st.write(desc_response.text)
except Exception as e:
st.error(f"Fehler: {str(e)}") |