Spaces:
Sleeping
Sleeping
IZERE HIRWA Roger
commited on
Commit
Β·
d41ddc1
1
Parent(s):
8600871
- Dockerfile +42 -0
- app.py +85 -0
- requirements.txt +12 -0
- spaces.yaml +1 -0
Dockerfile
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
2 |
+
# GroundedβSAM CPU Docker image with Flask API
|
3 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
4 |
+
FROM python:3.10-slim
|
5 |
+
|
6 |
+
ENV DEBIAN_FRONTEND=noninteractive \
|
7 |
+
PYTHONUNBUFFERED=1 \
|
8 |
+
AM_I_DOCKER=True \
|
9 |
+
BUILD_WITH_CUDA=False \
|
10 |
+
# β Huggingβ―Face cache inside the container (optional)
|
11 |
+
HF_HOME=/opt/hf_cache
|
12 |
+
|
13 |
+
# βββ OS packages βββ
|
14 |
+
RUN apt-get update && \
|
15 |
+
apt-get install -y --no-install-recommends \
|
16 |
+
git wget ffmpeg libgl1 && \
|
17 |
+
apt-get clean && rm -rf /var/lib/apt/lists/*
|
18 |
+
|
19 |
+
# βββ Code βββ
|
20 |
+
WORKDIR /workspace
|
21 |
+
COPY requirements.txt ./requirements.txt
|
22 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
23 |
+
|
24 |
+
# SegmentβAnything & GroundingDINO in editable mode
|
25 |
+
RUN git clone --depth 1 https://github.com/facebookresearch/segment-anything.git && \
|
26 |
+
pip install -e segment-anything
|
27 |
+
|
28 |
+
RUN git clone --depth 1 https://github.com/IDEA-Research/GroundingDINO.git && \
|
29 |
+
pip install --no-build-isolation -e GroundingDINO
|
30 |
+
|
31 |
+
# Flask API
|
32 |
+
COPY app.py ./app.py
|
33 |
+
|
34 |
+
# Download pretrained checkpoints at build time (comment out to download on first run)
|
35 |
+
RUN mkdir -p weights && \
|
36 |
+
wget -q -O weights/sam_vit_h_4b8939.pth \
|
37 |
+
https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth && \
|
38 |
+
wget -q -O weights/groundingdino_swint_ogc.pth \
|
39 |
+
https://github.com/IDEA-Research/GroundingDINO/releases/download/v0.1.0-alpha/groundingdino_swint_ogc.pth
|
40 |
+
|
41 |
+
EXPOSE 7860
|
42 |
+
ENTRYPOINT ["python", "app.py", "--host", "0.0.0.0", "--port", "7860"]
|
app.py
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
GroundedβSAM Flask API (CPU only)
|
3 |
+
|
4 |
+
POST /segment
|
5 |
+
Body (multipart/form-data):
|
6 |
+
- image: the house photo
|
7 |
+
- prompt: text prompt, e.g. "roof sheet"
|
8 |
+
Query params:
|
9 |
+
- overlay (bool, default=false): if true, returns a PNG overlay instead
|
10 |
+
Returns:
|
11 |
+
- image/png mask (single channel) OR overlay
|
12 |
+
"""
|
13 |
+
import io
|
14 |
+
import os
|
15 |
+
import argparse
|
16 |
+
import numpy as np
|
17 |
+
from PIL import Image
|
18 |
+
from flask import Flask, request, send_file
|
19 |
+
from flask_cors import CORS
|
20 |
+
|
21 |
+
import torch
|
22 |
+
from groundingdino.util.inference import Model as GroundingModel
|
23 |
+
from segment_anything import sam_model_registry, SamPredictor
|
24 |
+
|
25 |
+
# βββ Load models once βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
26 |
+
device = torch.device("cpu")
|
27 |
+
|
28 |
+
DINO_CKPT = "weights/groundingdino_swint_ogc.pth"
|
29 |
+
SAM_CKPT = "weights/sam_vit_h_4b8939.pth"
|
30 |
+
|
31 |
+
grounder = GroundingModel(DINO_CKPT, device=device)
|
32 |
+
sam = sam_model_registry["vit_h"](checkpoint=SAM_CKPT).to(device)
|
33 |
+
predictor = SamPredictor(sam)
|
34 |
+
|
35 |
+
# βββ Flask app ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
36 |
+
app = Flask(__name__)
|
37 |
+
CORS(app)
|
38 |
+
|
39 |
+
def segment(image_pil: Image.Image, prompt: str):
|
40 |
+
# 1) Run GroundingDINO to get boxes for the prompt
|
41 |
+
boxes, _, _ = grounder.predict(image_pil, prompt=prompt, box_threshold=0.3, text_threshold=0.25)
|
42 |
+
if boxes.size == 0:
|
43 |
+
raise ValueError("No boxes found for prompt.")
|
44 |
+
|
45 |
+
# 2) Largest box β mask via SAM
|
46 |
+
box = boxes[np.argmax((boxes[:,2]-boxes[:,0])*(boxes[:,3]-boxes[:,1]))]
|
47 |
+
predictor.set_image(np.array(image_pil))
|
48 |
+
masks, _, _ = predictor.predict(box=box)
|
49 |
+
mask = masks[0] # boolean HxW
|
50 |
+
|
51 |
+
return mask
|
52 |
+
|
53 |
+
@app.route("/segment", methods=["POST"])
|
54 |
+
def segment_endpoint():
|
55 |
+
if "image" not in request.files or "prompt" not in request.form:
|
56 |
+
return {"error": "image file and prompt are required."}, 400
|
57 |
+
|
58 |
+
prompt = request.form["prompt"]
|
59 |
+
image = Image.open(request.files["image"].stream).convert("RGB")
|
60 |
+
|
61 |
+
try:
|
62 |
+
mask = segment(image, prompt)
|
63 |
+
except ValueError as e:
|
64 |
+
return {"error": str(e)}, 422
|
65 |
+
|
66 |
+
overlay = request.args.get("overlay", "false").lower() == "true"
|
67 |
+
if overlay:
|
68 |
+
colored = np.array(image).copy()
|
69 |
+
colored[mask] = [255, 0, 0] # red overlay
|
70 |
+
out_img = Image.fromarray(colored)
|
71 |
+
else:
|
72 |
+
out_img = Image.fromarray((mask * 255).astype(np.uint8))
|
73 |
+
|
74 |
+
buf = io.BytesIO()
|
75 |
+
out_img.save(buf, format="PNG")
|
76 |
+
buf.seek(0)
|
77 |
+
return send_file(buf, mimetype="image/png")
|
78 |
+
|
79 |
+
# βββ CLI ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
80 |
+
if __name__ == "__main__":
|
81 |
+
parser = argparse.ArgumentParser()
|
82 |
+
parser.add_argument("--host", default="127.0.0.1")
|
83 |
+
parser.add_argument("--port", default=7860, type=int)
|
84 |
+
args = parser.parse_args()
|
85 |
+
app.run(host=args.host, port=args.port)
|
requirements.txt
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Core ML
|
2 |
+
torch==2.3.0 # CPU build from PyPI
|
3 |
+
torchvision==0.18.0
|
4 |
+
# GroundedβSAM additional deps
|
5 |
+
diffusers==0.27.0
|
6 |
+
opencv-python-headless==4.10.0.82
|
7 |
+
Pillow>=10.1
|
8 |
+
pycocotools==2.0.7
|
9 |
+
matplotlib==3.9.0
|
10 |
+
# API
|
11 |
+
flask==3.0.3
|
12 |
+
flask-cors==4.0.0
|
spaces.yaml
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
sdk : "docker"
|