gemini_vision_objects

Running

App Files Files Community

gemini_vision_objects / app.py

Sebbe33

Update app.py

355b6ef verified 6 months ago

raw

history blame contribute delete

5.04 kB

	import os
	import re
	import io
	import streamlit as st
	from PIL import Image, ImageDraw
	from google import genai
	from google.genai import types

	# Hilfsfunktionen
	def parse_list_boxes(text):
	"""Extrahiert Bounding Boxes aus dem Antworttext"""
	pattern = r'\[([\d\.]+),\s([\d\.]+),\s([\d\.]+),\s*([\d\.]+)\]'
	matches = re.findall(pattern, text)
	return [[float(m) for m in match] for match in matches]

	def draw_bounding_boxes(image, boxes):
	"""Zeichnet Bounding Boxes auf das Bild"""
	draw = ImageDraw.Draw(image)
	width, height = image.size

	for box in boxes:
	# Sicherstellen, dass alle Werte zwischen 0-1 liegen
	ymin = max(0.0, min(1.0, box[0]))
	xmin = max(0.0, min(1.0, box[1]))
	ymax = max(0.0, min(1.0, box[2]))
	xmax = max(0.0, min(1.0, box[3]))

	# Zeichne den Rahmen
	draw.rectangle([
	xmin * width,
	ymin * height,
	xmax * width,
	ymax * height
	], outline="#00FF00", width=7) # Neon green mit dicken Linien
	return image

	# Streamlit UI
	st.title("Objekterkennung mit Gemini")
	col1, col2 = st.columns(2)

	with col1:
	uploaded_file = st.file_uploader("Bild hochladen", type=["jpg", "png", "jpeg"])
	object_name = st.text_input("Objekt zur Erkennung", placeholder="z.B. 'Auto', 'Person'")

	if uploaded_file and object_name:
	image = Image.open(uploaded_file)
	width, height = image.size
	st.image(image, caption="Hochgeladenes Bild", use_container_width=True)

	if st.button("Analysieren"):
	with st.spinner("Analysiere Bild..."):
	try:
	# Bildvorbereitung
	image_bytes = io.BytesIO()
	image.save(image_bytes, format=image.format)
	image_part = types.Part.from_bytes(
	data=image_bytes.getvalue(),
	mime_type=f"image/{image.format.lower()}"
	)

	# API-Client
	client = genai.Client(api_key=os.getenv("KEY"))

	# Bildbeschreibung
	desc_response = client.models.generate_content(
	model="gemini-2.0-flash-exp",
	contents=["Beschreibe dieses Bild detailliert.", image_part]
	)

	# Objekterkennung
	detection_prompt = (
	f"Gib exakt 4 Dezimalzahlen pro Box für alle {object_name} im Format "
	"[ymin, xmin, ymax, xmax] als reine Python-Liste ohne weiteren Text. "
	"Beispiel: [[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]]"
	)
	box_response = client.models.generate_content(
	model="gemini-2.0-flash-exp",
	contents=[detection_prompt, image_part]
	)

	# Verarbeitung
	try:
	boxes = parse_list_boxes(box_response.text)
	st.write("Parsed Boxes:", boxes)
	except Exception as e:
	st.error(f"Parsing Error: {str(e)}")
	boxes = []

	annotated_image = image.copy()

	if boxes:
	annotated_image = draw_bounding_boxes(annotated_image, boxes)
	result_text = f"{len(boxes)} {object_name} erkannt"

	# Zoom auf erste Box
	ymin, xmin, ymax, xmax = boxes[0]
	zoom_area = (
	max(0, int(xmin * width - 50)),
	max(0, int(ymin * height - 50)),
	min(width, int(xmax * width + 50)),
	min(height, int(ymax * height + 50))
	)
	zoomed_image = annotated_image.crop(zoom_area)

	else:
	result_text = "Keine Objekte gefunden"
	zoomed_image = None

	# Ergebnisse anzeigen
	with col2:

	st.write("## Objekterkennung:")
	st.write(result_text)

	if boxes:
	st.image(
	[annotated_image, zoomed_image],
	caption=["Gesamtbild", "Zoom auf Erkennung"],
	width=400
	)
	else:
	st.image(annotated_image, caption="Keine Objekte erkannt", width=400)

	st.write("## Beschreibung:")
	st.write(desc_response.text)
	except Exception as e:
	st.error(f"Fehler: {str(e)}")