Spaces:

valegro
/

EoL_Understanding

Runtime error

App Files Files Community

valegro commited on Apr 22

Commit

ed309ba

verified ·

1 Parent(s): cf5fa6f

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -93

app.py CHANGED Viewed

@@ -1,96 +1,54 @@
-import streamlit as st
-import numpy as np
-import torch
 from PIL import Image
-import cv2
-import matplotlib.pyplot as plt
-from huggingface_hub import hf_hub_download
-from segment_anything import SamPredictor, sam_model_registry
-from groundingdino.util.inference import load_model, predict, annotate
-# Titolo dell'app
-st.title("🔍 Riconoscimento Zero-Shot con GroundingDINO + SAM")
-# Configurazione dei modelli da Hugging Face Hub
-@st.cache_resource
-def load_sam():
-    checkpoint = hf_hub_download(
-        repo_id="SegmentAnything/sam_vit_b",
-        filename="sam_vit_b_01ec64.pth"
-    )
-    model = sam_model_registry["vit_b"](checkpoint=checkpoint)
-    return SamPredictor(model.to("cuda" if torch.cuda.is_available() else "cpu"))
-@st.cache_resource
-def load_grounding_dino():
-    config_path = hf_hub_download(
-        repo_id="IDEA-Research/grounding-dino-tiny",
-        filename="GroundingDINO_SwinT_OGC.py"
-    )
-    checkpoint_path = hf_hub_download(
-        repo_id="IDEA-Research/grounding-dino-tiny",
-        filename="groundingdino_tiny.pth"
-    )
-    model = load_model(config_path, checkpoint_path)
-    return model
-sam = load_sam()
-grounding_dino = load_grounding_dino()
-# Caricamento immagine da parte dell'utente
-uploaded_image = st.file_uploader("📷 Carica un'immagine", type=['jpg', 'jpeg', 'png'])
-prompt = st.text_input("📝 Inserisci le classi da riconoscere (separate da virgola)",
-                       value="lamiera, foro circolare, vite, bullone, scanalatura")
-if uploaded_image is not None:
-    image = Image.open(uploaded_image).convert("RGB")
-    img_array = np.array(image)
-    st.image(image, caption="Immagine caricata", use_column_width=True)
-    if st.button("▶️ Avvia riconoscimento"):
-        # GroundingDINO prediction
-        boxes, logits, phrases = predict(
-            model=grounding_dino,
-            image=img_array,
-            caption=prompt,
-            box_threshold=0.3,
-            text_threshold=0.25,
-            device="cuda" if torch.cuda.is_available() else "cpu"
-        )
-        annotated_frame = annotate(image_source=img_array, boxes=boxes, logits=logits, phrases=phrases)
-        st.subheader("Risultato GroundingDINO")
-        st.image(annotated_frame, caption="Annotazione GroundingDINO")
-        # SAM segmentation
-        sam.set_image(img_array)
-        H, W, _ = img_array.shape
-        boxes_scaled = boxes * torch.tensor([W, H, W, H], device=boxes.device)
-        boxes_scaled = boxes_scaled.cpu().numpy()
-        masks, scores, _ = sam.predict_torch(
-            point_coords=None,
-            point_labels=None,
-            boxes=torch.tensor(boxes_scaled, device=sam.device),
-            multimask_output=False,
         )
-        # Visualizza maschere segmentate
-        st.subheader("Risultato Segment Anything (SAM)")
-        plt.figure(figsize=(10, 10))
-        plt.imshow(img_array)
-        for mask in masks:
-            mask_np = mask[0].cpu().numpy()
-            plt.contour(mask_np, colors='red', linewidths=1.5)
-        plt.axis('off')
-        st.pyplot(plt.gcf())
-        plt.close()
-        # Tabella risultati
-        st.subheader("🔖 Tabella Risultati")
-        result_data = [{"Classe": phrase, "Confidenza": round(logit.item(), 2)} for phrase, logit in zip(phrases, logits)]
-        st.table(result_data)

+import gradio as gr, numpy as np
+from utils import SAM, GD
+from groundingdino.util.utils import clean_text
 from PIL import Image
+import cv2, torch
+def pipeline(image, prompt):
+    # 1. segmenta con SAM
+    img_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
+    SAM.set_image(img_cv)
+    masks, _, _ = SAM.predict(box=None, point_coords=None, point_labels=None, multimask_output=False)
+    annotated = image.copy()
+    boxes, labels, feats = [], [], []
+    for m in masks:
+        coords = np.argwhere(m)
+        y1, x1 = coords.min(0)
+        y2, x2 = coords.max(0)
+        box = np.array([x1, y1, x2, y2])
+        boxes.append(box)
+    if boxes:
+        # 2. grounding DINO zero‑shot
+        dino_out = GD.predict_with_caption(
+            image=np.array(image),
+            captions=[prompt] * len(boxes),
+            boxes=np.vstack(boxes)
         )
+        for box, text in zip(dino_out["boxes"], dino_out["captions"]):
+            x1,y1,x2,y2 = map(int, box)
+            cv2.rectangle(annotated, (x1,y1), (x2,y2), (255,0,0), 2)
+            cv2.putText(annotated, clean_text(text), (x1, y1-6),
+                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,0,0), 2)
+    return Image.fromarray(annotated)
+demo = gr.Interface(
+    fn=pipeline,
+    inputs=[
+        gr.Image(type="pil"),
+        gr.Textbox(value="lamiera, foro circolare, vite, bullone, scanalatura")
+    ],
+    outputs=gr.Image(type="pil"),
+    title="Zero‑Shot Mechanical Part Finder",
+    description=(
+        "Carica una foto di componenti meccanici a fine vita e scrivi le etichette "
+        "che vuoi cercare (separate da virgole). Il sistema segmenta con SAM e fa "
+        "grounding zero‑shot con GroundingDINO."
+    )
+)
+if __name__ == "__main__":
+    demo.launch()