gemini_vision_objects

Running

App Files Files Community

gemini_vision_objects / app.py

Sebbe33

Update app.py

d9ec95b verified 6 months ago

raw

history blame

3.5 kB

	import os
	import re
	import io
	import streamlit as st
	from PIL import Image, ImageDraw
	from google import genai
	from google.genai import types

	# Hilfsfunktionen
	def parse_list_boxes(text):
	"""Extrahiert Bounding Boxes aus dem Antworttext"""
	pattern = r'\[([\d\.]+),\s([\d\.]+),\s([\d\.]+),\s*([\d\.]+)\]'
	matches = re.findall(pattern, text)
	return [[float(m) for m in match] for match in matches]

	def draw_bounding_boxes(image, boxes):
	"""Zeichnet Bounding Boxes auf das Bild"""
	draw = ImageDraw.Draw(image)
	width, height = image.size
	for box in boxes:
	ymin, xmin, ymax, xmax = box
	draw.rectangle([
	xmin * width,
	ymin * height,
	xmax * width,
	ymax * height
	], outline="red", width=3)
	return image

	# Streamlit UI
	st.title("Bildanalyse mit Gemini")
	col1, col2 = st.columns(2)

	with col1:
	uploaded_file = st.file_uploader("Bild hochladen", type=["jpg", "png", "jpeg"])
	object_name = st.text_input("Objekt zur Erkennung", placeholder="z.B. 'Auto', 'Person'")

	if uploaded_file and object_name:
	image = Image.open(uploaded_file)
	st.image(image, caption="Hochgeladenes Bild", use_container_width=True)

	if st.button("Analysieren"):
	with st.spinner("Analysiere Bild..."):
	try:
	# Bildvorbereitung
	image_bytes = io.BytesIO()
	image.save(image_bytes, format=image.format)
	image_part = types.Part.from_bytes(
	data=image_bytes.getvalue(),
	mime_type=f"image/{image.format.lower()}"
	)

	# API-Client
	client = genai.Client(api_key=os.getenv("KEY"))

	# Bildbeschreibung
	desc_response = client.models.generate_content(
	model="gemini-2.0-flash-exp",
	contents=["Beschreibe dieses Bild detailliert.", image_part]
	)

	# Objekterkennung
	detection_prompt = (
	f"Gib alle Bounding Boxes für {object_name} im Format "
	"[ymin, xmin, ymax, xmax] als Liste. Nur die Liste zurückgeben!"
	)
	box_response = client.models.generate_content(
	model="gemini-1.0-pro-vision",
	contents=[detection_prompt, image_part]
	)

	# Verarbeitung
	boxes = parse_list_boxes(box_response.text)
	annotated_image = image.copy()

	if boxes:
	annotated_image = draw_bounding_boxes(annotated_image, boxes)
	result_text = f"{len(boxes)} {object_name} erkannt"
	else:
	result_text = "Keine Objekte gefunden"

	# Ergebnisse anzeigen
	with col2:
	st.write("## Beschreibung:")
	st.write(desc_response.text)

	st.write("## Objekterkennung:")
	st.write(result_text)
	st.image(annotated_image, caption="Erkannte Objekte", use_column_width=True)

	except Exception as e:
	st.error(f"Fehler: {str(e)}")