Spaces:

prodevroger
/

gg

Sleeping

IZERE HIRWA Roger commited on Jul 11

Commit

5ee91ca

1 Parent(s): 85b6fb8

p0

Files changed (2) hide show

app.py CHANGED Viewed

@@ -19,7 +19,7 @@ from flask import Flask, request, send_file
 from flask_cors import CORS
 import torch
-from groundingdino.util.inference import Model as GroundingModel
 from segment_anything import sam_model_registry, SamPredictor
 # ─── Load models once ───────────────────────────────────────────────────────────
@@ -29,7 +29,8 @@ DINO_CONFIG = "GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py"
 DINO_CKPT = "weights/groundingdino_swint_ogc.pth"
 SAM_CKPT  = "weights/sam_vit_h_4b8939.pth"
-grounder = GroundingModel(model_config_path=DINO_CONFIG, model_checkpoint_path=DINO_CKPT, device=device)
 sam = sam_model_registry["vit_h"](checkpoint=SAM_CKPT).to(device)
 predictor = SamPredictor(sam)
@@ -38,14 +39,23 @@ app = Flask(__name__)
 CORS(app)
 def segment(image_pil: Image.Image, prompt: str):
-    # 1) Run GroundingDINO to get boxes for the prompt
-    boxes, _, _ = grounder.predict(image_pil, prompt=prompt, box_threshold=0.3, text_threshold=0.25)
     if boxes.size == 0:
         raise ValueError("No boxes found for prompt.")
     # 2) Largest box → mask via SAM
     box = boxes[np.argmax((boxes[:,2]-boxes[:,0])*(boxes[:,3]-boxes[:,1]))]
-    predictor.set_image(np.array(image_pil))
     masks, _, _ = predictor.predict(box=box)
     mask = masks[0]  # boolean HxW

 from flask_cors import CORS
 import torch
+from groundingdino.util.inference import load_model, predict
 from segment_anything import sam_model_registry, SamPredictor
 # ─── Load models once ───────────────────────────────────────────────────────────
 DINO_CKPT = "weights/groundingdino_swint_ogc.pth"
 SAM_CKPT  = "weights/sam_vit_h_4b8939.pth"
+# Load GroundingDINO model
+grounder = load_model(DINO_CONFIG, DINO_CKPT)
 sam = sam_model_registry["vit_h"](checkpoint=SAM_CKPT).to(device)
 predictor = SamPredictor(sam)
 CORS(app)
 def segment(image_pil: Image.Image, prompt: str):
+    # Convert PIL image to numpy array
+    image_np = np.array(image_pil)
+    # Run GroundingDINO to get boxes for the prompt
+    boxes, _, _ = predict(
+        model=grounder,
+        image=image_np,
+        caption=prompt,
+        box_threshold=0.3,
+        text_threshold=0.25
+    )
     if boxes.size == 0:
         raise ValueError("No boxes found for prompt.")
     # 2) Largest box → mask via SAM
     box = boxes[np.argmax((boxes[:,2]-boxes[:,0])*(boxes[:,3]-boxes[:,1]))]
+    predictor.set_image(image_np)
     masks, _, _ = predictor.predict(box=box)
     mask = masks[0]  # boolean HxW

result.png ADDED Viewed