File size: 2,868 Bytes
cabf51c
a109861
c782801
c23b6bb
 
5a07d09
 
68aee5f
1265529
bc2dc1a
 
 
 
6148b9b
1265529
cabf51c
1265529
 
a7e2698
02a0351
 
cabf51c
 
 
bc2dc1a
 
8d27209
bc2dc1a
 
687617e
bc2dc1a
 
 
 
 
 
 
1265529
6ab8d85
1265529
bc2dc1a
 
dfa7b0a
1265529
 
 
 
 
 
dfa7b0a
 
1265529
 
bc2dc1a
df2d919
dfa7b0a
 
 
 
 
 
 
4fea159
 
 
 
 
 
1265529
bc2dc1a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import spaces
import os
os.system("pip install git+https://github.com/facebookresearch/detectron2.git")
os.system("git clone https://github.com/Visual-AI/Mr.DETR.git MrDETR && cd MrDETR && rm -f requirements.txt && cd ..")
os.system("cp multi_scale_deform_attn.py MrDETR/detrex/layers/ && cd MrDETR && pip install . & cd ..")
import sys 
sys.path.append("MrDETR/")

import gradio as gr
from demo.predictors import VisualizationDemo
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.config import LazyConfig, instantiate
import numpy as np
from PIL import Image


if __name__ == "__main__":
    gr.close_all()
    cfg = LazyConfig.load("MrDETR/projects/mr_detr_align/configs/deformable_detr_swinl_two_stage_12ep_plusplus.py")
    cfg["model"].device = "cuda"
    cfg["train"].device = "cuda"

    # @spaces.GPU(duration=40, progress=gr.Progress(track_tqdm=True))
    # def 
    model = instantiate(cfg.model)
    checkpointer = DetectionCheckpointer(model)
    checkpointer.load("https://github.com/Visual-AI/Mr.DETR/releases/download/weights/MrDETR_align_swinL_12ep_900q_safe.pth")

    model.eval()
    model.cuda()
    vis_demo = VisualizationDemo(
        model=model,
        min_size_test=800,
        max_size_test=1333,
        img_format="RGB",
        metadata_dataset="coco_2017_val",
    )
    
    @spaces.GPU
    def inference(img, confidence):
        img = np.array(img)
        _, results = vis_demo.run_on_image(img, confidence)
        results = Image.fromarray(results.get_image()[:, :, ::-1])
        return results 
    
    demo = gr.Interface(
        fn=inference,
        inputs=[
            gr.Image(type="pil", image_mode="RGB"),
            # gr.Number(precision=2, minimum=0.0, maximum=1.0, value=0.5)
            gr.Slider(minimum=0.0, maximum=1.0, value=0.5, step=0.05)
        ],
        outputs="image",
        examples=[
            ["MrDETR/assets/000000014226.jpg", 0.5],
            ["MrDETR/assets/000000028449.jpg", 0.3],
            ["MrDETR/assets/000000070048.jpg", 0.5],
            ["MrDETR/assets/000000218997.jpg", 0.5],
            ["MrDETR/assets/000000279774.jpg", 0.5],
            ["MrDETR/assets/000000434459.jpg", 0.5],
            ["MrDETR/assets/000000448448.jpg", 0.5],
            ["MrDETR/assets/000000560474.jpg", 0.5],
        ],
        title="[CVPR 2025] Mr. DETR: Instructive Multi-Route Training for Detection Transformers",
        description='''
            [![Paper](https://img.shields.io/badge/arXiv-2412.10028-red)](https://arxiv.org/abs/2412.10028)
            [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/mr-detr-instructive-multi-route-training-for/object-detection-on-coco-2017-val)](https://paperswithcode.com/sota/object-detection-on-coco-2017-val?p=mr-detr-instructive-multi-route-training-for)
        '''
    )
    demo.launch()