valegro's picture
Create app.py
947d274 verified
raw
history blame
1.9 kB
import gradio as gr
import numpy as np
from PIL import Image, ImageDraw
from huggingface_hub import hf_hub_download
# carica SAM e GroundingDINO direttamente dall’HF Hub
SAM_CHECKPOINT = hf_hub_download("facebook/sam-vit-base", "sam_vit_b.pth")
GDINO_CONFIG = hf_hub_download("IDEA-Research/GroundingDINO", "GroundingDINO_SwinT_OGC.py")
GDINO_CHECKPT = hf_hub_download("IDEA-Research/GroundingDINO", "groundingdino_swint_ogc.pth")
# — pseudocodice di import —
from segment_anything import sam_model
from groundingdino.util.inference import load_model, predict
sam = sam_model.load_from_checkpoint(SAM_CHECKPOINT)
gdino = load_model(GDINO_CONFIG, GDINO_CHECKPT)
def recognize(img, prompt, conf):
masks = sam.segment(img) # 1. segmentazione zero‑shot
out = Image.fromarray(img).convert("RGBA")
draw = ImageDraw.Draw(out, "RGBA")
results = []
for m in masks:
label, score = predict(gdino, img, m, prompt) # 2. classificazione zero‑shot
if score<conf: continue
yy, xx = np.where(m)
bbox = (xx.min(), yy.min(), xx.max(), yy.max())
area = int(m.sum())
draw.rectangle(bbox, outline=(255,0,0,180), width=3)
draw.text((bbox[0], bbox[1]-10), f"{label} {score:.2f}", fill=(255,0,0,180))
results.append({"label":label, "score":score, "area":area, "bbox":bbox})
return np.array(out), results
app = gr.Interface(
fn=recognize,
inputs=[
gr.Image(type="numpy", label="Upload Image"),
gr.Textbox(label="Prompt (comma‑separated)"),
gr.Slider(0,1,0.25, label="Confidence Threshold"),
],
outputs=[
gr.Image(label="Overlay"),
gr.JSON(label="Detections")
],
title="Zero‑Shot Component Recognition",
description="Segmenta e classifica componenti meccanici da foto, senza training specifico."
)
if __name__=="__main__":
app.launch()