gemini_vision_objects

Running

App Files Files Community

gemini_vision_objects / app.py

Sebbe33

Update app.py

d57a6ad verified 7 months ago

raw

history blame

5.15 kB

	import os
	import re
	import io
	import streamlit as st
	from PIL import Image, ImageDraw
	from google import genai
	from google.genai import types

	# Hilfsfunktionen
	def parse_list_boxes(text):
	"""Extrahiert Bounding Boxes aus dem Antworttext"""
	pattern = r'\[([\d\.]+),\s([\d\.]+),\s([\d\.]+),\s*([\d\.]+)\]'
	matches = re.findall(pattern, text)
	return [[float(m) for m in match] for match in matches]

	def draw_bounding_boxes(image, boxes):
	"""Zeichnet Bounding Boxes auf das Bild"""
	draw = ImageDraw.Draw(image)
	width, height = image.size

	for box in boxes:
	# Sicherstellen, dass alle Werte zwischen 0-1 liegen
	ymin = max(0.0, min(1.0, box[0]))
	xmin = max(0.0, min(1.0, box[1]))
	ymax = max(0.0, min(1.0, box[2]))
	xmax = max(0.0, min(1.0, box[3]))

	# Zeichne den Rahmen
	draw.rectangle([
	xmin * width,
	ymin * height,
	xmax * width,
	ymax * height
	], outline="#00FF00", width=7) # Neon green mit dicken Linien
	return image

	# Streamlit UI
	st.title("Bildanalyse mit Gemini")
	col1, col2 = st.columns(2)

	with col1:
	uploaded_file = st.file_uploader("Bild hochladen", type=["jpg", "png", "jpeg"])
	object_name = st.text_input("Objekt zur Erkennung", placeholder="z.B. 'Auto', 'Person'")

	if uploaded_file and object_name:
	image = Image.open(uploaded_file)
	width, height = image.size
	st.image(image, caption="Hochgeladenes Bild", use_container_width=True)

	if st.button("Analysieren"):
	with st.spinner("Analysiere Bild..."):
	try:
	# Bildvorbereitung
	image_bytes = io.BytesIO()
	image.save(image_bytes, format=image.format)
	image_part = types.Part.from_bytes(
	data=image_bytes.getvalue(),
	mime_type=f"image/{image.format.lower()}"
	)

	# API-Client
	client = genai.Client(api_key=os.getenv("KEY"))

	# Bildbeschreibung
	desc_response = client.models.generate_content(
	model="gemini-2.0-flash-exp",
	contents=["Beschreibe dieses Bild detailliert.", image_part]
	)

	# Objekterkennung
	detection_prompt = (
	f"Gib exakt 4 Dezimalzahlen pro Box für alle {object_name} im Format "
	"[ymin, xmin, ymax, xmax] als reine Python-Liste ohne weiteren Text. "
	"Beispiel: [[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]]"
	)
	box_response = client.models.generate_content(
	model="gemini-2.0-flash-exp",
	contents=[detection_prompt, image_part]
	)

	# Debug-Ausgaben
	st.write("Raw API Response:", box_response.text)

	# Verarbeitung
	try:
	boxes = parse_list_boxes(box_response.text)
	st.write("Parsed Boxes:", boxes)
	except Exception as e:
	st.error(f"Parsing Error: {str(e)}")
	boxes = []

	annotated_image = image.copy()

	if boxes:
	annotated_image = draw_bounding_boxes(annotated_image, boxes)
	result_text = f"{len(boxes)} {object_name} erkannt"

	# Zoom auf erste Box
	ymin, xmin, ymax, xmax = boxes[0]
	zoom_area = (
	max(0, int(xmin * width - 50)),
	max(0, int(ymin * height - 50)),
	min(width, int(xmax * width + 50)),
	min(height, int(ymax * height + 50))
	)
	zoomed_image = annotated_image.crop(zoom_area)

	else:
	result_text = "Keine Objekte gefunden"
	zoomed_image = None

	# Ergebnisse anzeigen
	with col2:

	st.write("## Objekterkennung:")
	st.write(result_text)

	if boxes:
	st.image(
	[annotated_image, zoomed_image],
	caption=["Gesamtbild", "Zoom auf Erkennung"],
	width=400
	)
	else:
	st.image(annotated_image, caption="Keine Objekte erkannt", width=400)

	st.write("## Beschreibung:")
	st.write(desc_response.text)
	except Exception as e:
	st.error(f"Fehler: {str(e)}")