File size: 5,044 Bytes
6459986
6c79114
d1dce8a
6c79114
 
 
1d8d466
5f554b3
6c79114
 
 
 
 
 
5f554b3
cdb1e78
 
 
 
 
 
 
 
 
 
 
 
b908919
cdb1e78
 
 
 
 
b908919
6c79114
d1dce8a
6c79114
355b6ef
02d80b6
 
 
80e2b7f
6c79114
 
 
80e2b7f
b908919
80e2b7f
6c79114
80e2b7f
 
 
6c79114
80e2b7f
 
6c79114
 
 
80e2b7f
6c79114
 
 
 
 
 
d9ec95b
6c79114
 
 
 
 
b908919
 
 
6c79114
 
718c9df
6c79114
 
b908919
6c79114
b908919
 
 
 
 
 
 
6c79114
 
 
 
 
b908919
 
 
 
 
 
 
 
 
 
 
6c79114
 
b908919
6c79114
 
40a87e2
ea8272c
b908919
 
ea8272c
b908919
 
 
 
 
 
 
 
d57a6ad
 
 
80e2b7f
6c79114
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import os
import re
import io
import streamlit as st
from PIL import Image, ImageDraw
from google import genai
from google.genai import types

# Hilfsfunktionen
def parse_list_boxes(text):
    """Extrahiert Bounding Boxes aus dem Antworttext"""
    pattern = r'\[([\d\.]+),\s*([\d\.]+),\s*([\d\.]+),\s*([\d\.]+)\]'
    matches = re.findall(pattern, text)
    return [[float(m) for m in match] for match in matches]

def draw_bounding_boxes(image, boxes):
    """Zeichnet Bounding Boxes auf das Bild"""
    draw = ImageDraw.Draw(image)
    width, height = image.size
    
    for box in boxes:
        # Sicherstellen, dass alle Werte zwischen 0-1 liegen
        ymin = max(0.0, min(1.0, box[0]))
        xmin = max(0.0, min(1.0, box[1]))
        ymax = max(0.0, min(1.0, box[2]))
        xmax = max(0.0, min(1.0, box[3]))

        # Zeichne den Rahmen
        draw.rectangle([
            xmin * width,
            ymin * height,
            xmax * width,
            ymax * height
        ], outline="#00FF00", width=7)  # Neon green mit dicken Linien
    return image

# Streamlit UI
st.title("Objekterkennung mit Gemini")
col1, col2 = st.columns(2)

with col1:
    uploaded_file = st.file_uploader("Bild hochladen", type=["jpg", "png", "jpeg"])
    object_name = st.text_input("Objekt zur Erkennung", placeholder="z.B. 'Auto', 'Person'")

    if uploaded_file and object_name:
        image = Image.open(uploaded_file)
        width, height = image.size
        st.image(image, caption="Hochgeladenes Bild", use_container_width=True)

        if st.button("Analysieren"):
            with st.spinner("Analysiere Bild..."):
                try:
                    # Bildvorbereitung
                    image_bytes = io.BytesIO()
                    image.save(image_bytes, format=image.format)
                    image_part = types.Part.from_bytes(
                        data=image_bytes.getvalue(),
                        mime_type=f"image/{image.format.lower()}"
                    )

                    # API-Client
                    client = genai.Client(api_key=os.getenv("KEY"))

                    # Bildbeschreibung
                    desc_response = client.models.generate_content(
                        model="gemini-2.0-flash-exp",
                        contents=["Beschreibe dieses Bild detailliert.", image_part]
                    )

                    # Objekterkennung
                    detection_prompt = (
                        f"Gib exakt 4 Dezimalzahlen pro Box für alle {object_name} im Format "
                        "[ymin, xmin, ymax, xmax] als reine Python-Liste ohne weiteren Text. "
                        "Beispiel: [[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]]"
                    )
                    box_response = client.models.generate_content(
                        model="gemini-2.0-flash-exp",
                        contents=[detection_prompt, image_part]
                    )
                    
                    # Verarbeitung
                    try:
                        boxes = parse_list_boxes(box_response.text)
                        st.write("**Parsed Boxes:**", boxes)
                    except Exception as e:
                        st.error(f"Parsing Error: {str(e)}")
                        boxes = []

                    annotated_image = image.copy()
                    
                    if boxes:
                        annotated_image = draw_bounding_boxes(annotated_image, boxes)
                        result_text = f"{len(boxes)} {object_name} erkannt"

                        # Zoom auf erste Box
                        ymin, xmin, ymax, xmax = boxes[0]
                        zoom_area = (
                            max(0, int(xmin * width - 50)),
                            max(0, int(ymin * height - 50)),
                            min(width, int(xmax * width + 50)),
                            min(height, int(ymax * height + 50))
                        )
                        zoomed_image = annotated_image.crop(zoom_area)

                    else:
                        result_text = "Keine Objekte gefunden"
                        zoomed_image = None

                    # Ergebnisse anzeigen
                    with col2:
                        
                        st.write("## Objekterkennung:")
                        st.write(result_text)
                        
                        if boxes:
                            st.image(
                                [annotated_image, zoomed_image],
                                caption=["Gesamtbild", "Zoom auf Erkennung"],
                                width=400
                            )
                        else:
                            st.image(annotated_image, caption="Keine Objekte erkannt", width=400)
                        
                        st.write("## Beschreibung:")
                        st.write(desc_response.text)
                except Exception as e:
                    st.error(f"Fehler: {str(e)}")