UTOPIA-DXD xinlongwang commited on
Commit
505446f
·
0 Parent(s):

Duplicate from BAAI/SegGPT

Browse files

Co-authored-by: Xinlong Wang <[email protected]>

.gitattributes ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ rainbow.gif filter=lfs diff=lfs merge=lfs -text
36
+ rainbow_.gif filter=lfs diff=lfs merge=lfs -text
37
+ rainbow__.gif filter=lfs diff=lfs merge=lfs -text
38
+ rainbow2.gif filter=lfs diff=lfs merge=lfs -text
39
+ videos/jeep-moving.jpg filter=lfs diff=lfs merge=lfs -text
40
+ videos/a_car_is_moving_on_the_road_40.mp4 filter=lfs diff=lfs merge=lfs -text
41
+ videos/a_man_in_parkour_100.jpg filter=lfs diff=lfs merge=lfs -text
42
+ videos/a_man_in_parkour_100.mp4 filter=lfs diff=lfs merge=lfs -text
43
+ videos/child-riding_lego.jpg filter=lfs diff=lfs merge=lfs -text
44
+ videos/child-riding_lego.mp4 filter=lfs diff=lfs merge=lfs -text
45
+ videos/jeep-moving.mp4 filter=lfs diff=lfs merge=lfs -text
46
+ videos/a_car_is_moving_on_the_road_40.jpg filter=lfs diff=lfs merge=lfs -text
47
+ videos/a_man_is_surfing_3_30.jpg filter=lfs diff=lfs merge=lfs -text
48
+ videos/a_man_is_surfing_3_30.mp4 filter=lfs diff=lfs merge=lfs -text
49
+ videos/horse-running.jpg filter=lfs diff=lfs merge=lfs -text
50
+ videos/horse-running.mp4 filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: SegGPT
3
+ emoji: 🏢
4
+ colorFrom: gray
5
+ colorTo: indigo
6
+ sdk: gradio
7
+ sdk_version: 3.22.1
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ duplicated_from: BAAI/SegGPT
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,211 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+
3
+ import sys
4
+ import io
5
+ import requests
6
+ import json
7
+ import base64
8
+ from PIL import Image
9
+ import numpy as np
10
+ import gradio as gr
11
+
12
+
13
+ def inference_mask1_sam(prompt,
14
+ img,
15
+ img_):
16
+
17
+ files = {
18
+ "useSam" : 1,
19
+ "pimage" : resizeImg(prompt["image"]),
20
+ "pmask" : resizeImg(prompt["mask"]),
21
+ "img" : resizeImg(img),
22
+ "img_" : resizeImg(img_)
23
+ }
24
+ r = requests.post("http://120.92.79.209/painter/run", json = files)
25
+ a = json.loads(r.text)
26
+
27
+ res = []
28
+
29
+ for i in range(len(a)):
30
+ #out = Image.open(io.BytesIO(base64.b64decode(a[i])))
31
+ #out = out.resize((224, 224))
32
+ #res.append(np.uint8(np.array(out)))
33
+ res.append(np.uint8(np.array(Image.open(io.BytesIO(base64.b64decode(a[i]))))))
34
+ return res[1:] # remove prompt image
35
+
36
+ def inference_mask1(prompt,
37
+ img,
38
+ img_):
39
+ files = {
40
+ "pimage" : resizeImg(prompt["image"]),
41
+ "pmask" : resizeImg(prompt["mask"]),
42
+ "img" : resizeImg(img),
43
+ "img_" : resizeImg(img_)
44
+ }
45
+ #r = requests.post("https://flagstudio.baai.ac.cn/painter/run", json = files)
46
+ r = requests.post("http://120.92.79.209/painter/run", json = files)
47
+ a = json.loads(r.text)
48
+ res = []
49
+ for i in range(len(a)):
50
+ #out = Image.open(io.BytesIO(base64.b64decode(a[i])))
51
+ #out = out.resize((224, 224))
52
+ #res.append(np.uint8(np.array(out)))
53
+ res.append(np.uint8(np.array(Image.open(io.BytesIO(base64.b64decode(a[i]))))))
54
+ return res
55
+
56
+
57
+
58
+ def inference_mask_video(
59
+ prompt,
60
+ vid,
61
+ request: gr.Request,
62
+ ):
63
+
64
+
65
+ files = {
66
+ "pimage" : resizeImgIo(prompt["image"]),
67
+ "pmask" : resizeImgIo(prompt["mask"]),
68
+ "video" : open(vid, 'rb'),
69
+ }
70
+ r = requests.post("http://120.92.79.209/painter/runVideo", files = files)
71
+ '''
72
+ path = str(uuid.uuid4()) + "." + str(time.time())
73
+ fName = 'out.mp4'
74
+ file_out = "video/" + path + "." + fName
75
+ with open(file_out,"wb") as f:
76
+ f.write(r.content)
77
+ '''
78
+ a = json.loads(r.text)
79
+ return [np.uint8(np.array(Image.open(io.BytesIO(base64.b64decode(a["mask"]))))), a["url"]]
80
+
81
+
82
+ def resizeImg(img):
83
+ res, hres = 448, 448
84
+ img = Image.fromarray(img).convert("RGB")
85
+ img = img.resize((res, hres))
86
+ temp = io.BytesIO()
87
+ img.save(temp, format="WEBP")
88
+ return base64.b64encode(temp.getvalue()).decode('ascii')
89
+
90
+ def resizeImgIo(img):
91
+ res, hres = 448, 448
92
+ img = Image.fromarray(img).convert("RGB")
93
+ img = img.resize((res, hres))
94
+ temp = io.BytesIO()
95
+ img.save(temp, format="WEBP")
96
+ return io.BytesIO(temp.getvalue())
97
+
98
+
99
+ # define app features and run
100
+
101
+ examples = [
102
+ ['./images/hmbb_1.jpg', './images/hmbb_2.jpg', './images/hmbb_3.jpg'],
103
+ ['./images/rainbow_1.jpg', './images/rainbow_2.jpg', './images/rainbow_3.jpg'],
104
+ ['./images/earth_1.jpg', './images/earth_2.jpg', './images/earth_3.jpg'],
105
+ ['./images/obj_1.jpg', './images/obj_2.jpg', './images/obj_3.jpg'],
106
+ ['./images/ydt_2.jpg', './images/ydt_1.jpg', './images/ydt_3.jpg'],
107
+ ]
108
+
109
+ examples_sam = [
110
+ ['./images/nc_1.jpg', './images/nc_2.jpg', './images/nc_3.jpg'],
111
+ ['./images/street_1.jpg', './images/street_2.jpg', './images/street_3.jpg'],
112
+ ['./images/hmbb_1.jpg', './images/hmbb_2.jpg', './images/hmbb_3.jpg'],
113
+ ['./images/earth_1.jpg', './images/earth_2.jpg', './images/earth_3.jpg'],
114
+ ['./images/ydt_2.jpg', './images/ydt_1.jpg', './images/ydt_3.jpg'],
115
+ ]
116
+
117
+ examples_video = [
118
+ ['./videos/horse-running.jpg', './videos/horse-running.mp4'],
119
+ ['./videos/a_man_is_surfing_3_30.jpg', './videos/a_man_is_surfing_3_30.mp4'],
120
+ ['./videos/a_car_is_moving_on_the_road_40.jpg', './videos/a_car_is_moving_on_the_road_40.mp4'],
121
+ ['./videos/jeep-moving.jpg', './videos/jeep-moving.mp4'],
122
+ ['./videos/child-riding_lego.jpg', './videos/child-riding_lego.mp4'],
123
+ ]
124
+
125
+
126
+
127
+ demo_mask = gr.Interface(fn=inference_mask1,
128
+ inputs=[gr.ImageMask(brush_radius=8, label="prompt (提示图)"), gr.Image(label="img1 (测试图1)"), gr.Image(label="img2 (测试图2)")],
129
+ #outputs=[gr.Image(shape=(448, 448), label="output1 (输出图1)"), gr.Image(shape=(448, 448), label="output2 (输出图2)")],
130
+ outputs=[gr.Image(label="output1 (输出图1)").style(height=256, width=256), gr.Image(label="output2 (输出图2)").style(height=256, width=256)],
131
+ #outputs=gr.Gallery(label="outputs (输出图)"),
132
+ examples=examples,
133
+ #title="SegGPT for Any Segmentation<br>(Painter Inside)",
134
+ description="<p> \
135
+ Choose an example below &#128293; &#128293; &#128293; <br>\
136
+ Or, upload by yourself: <br>\
137
+ 1. Upload images to be tested to 'img1' and/or 'img2'. <br>2. Upload a prompt image to 'prompt' and draw a mask. <br>\
138
+ <br> \
139
+ 💎 The more accurate you annotate, the more accurate the model predicts. <br>\
140
+ 💎 Examples below were never trained and are randomly selected for testing in the wild. <br>\
141
+ 💎 Current UI interface only unleashes a small part of the capabilities of SegGPT, i.e., 1-shot case. \
142
+ </p>",
143
+ cache_examples=False,
144
+ allow_flagging="never",
145
+ )
146
+
147
+
148
+
149
+ demo_mask_sam = gr.Interface(fn=inference_mask1_sam,
150
+ inputs=[gr.ImageMask(brush_radius=4, label="prompt (提示图)"), gr.Image(label="img1 (测试图1)"), gr.Image(label="img2 (测试图2)")],
151
+ #outputs=[gr.Image(shape=(448, 448), label="output1 (输出图1)"), gr.Image(shape=(448, 448), label="output2 (输出图2)")],
152
+ # outputs=[gr.Image(label="output1 (输出图1)").style(height=256, width=256), gr.Image(label="output2 (输出图2)").style(height=256, width=256)],
153
+ #outputs=gr.Gallery(label="outputs (输出图)"),
154
+ outputs=[gr.Image(label="SAM output (mask)").style(height=256, width=256),gr.Image(label="output1 (输出图1)").style(height=256, width=256), gr.Image(label="output2 (输出图2)").style(height=256, width=256)],
155
+ # outputs=[gr.Image(label="output3 (输出图1)").style(height=256, width=256), gr.Image(label="output4 (输出图2)").style(height=256, width=256)],
156
+ examples=examples_sam,
157
+ #title="SegGPT for Any Segmentation<br>(Painter Inside)",
158
+ description="<p> \
159
+ <strong>SAM+SegGPT: One touch for segmentation in all images or videos.</strong> <br>\
160
+ Choose an example below &#128293; &#128293; &#128293; <br>\
161
+ Or, upload by yourself: <br>\
162
+ 1. Upload images to be tested to 'img1' and 'img2'. <br>2. Upload a prompt image to 'prompt' and draw <strong>a point or line on the target</strong>. <br>\
163
+ <br> \
164
+ 💎 SAM segments the target with any point or scribble, then SegGPT segments all other images. <br>\
165
+ 💎 Examples below were never trained and are randomly selected for testing in the wild. <br>\
166
+ 💎 Current UI interface only unleashes a small part of the capabilities of SegGPT, i.e., 1-shot case. \
167
+ </p>",
168
+ cache_examples=False,
169
+ allow_flagging="never",
170
+ )
171
+
172
+ demo_mask_video = gr.Interface(fn=inference_mask_video,
173
+ inputs=[gr.ImageMask(label="prompt (提示图)"), gr.Video(label="video (测试视频)").style(height=448, width=448)],
174
+ outputs=[gr.Image(label="SAM output (mask)").style(height=256, width=256), gr.Video().style(height=448, width=448)],
175
+ examples=examples_video,
176
+ description="<p> \
177
+ <strong>SegGPT+SAM: One touch for any segmentation in a video.</strong> <br>\
178
+ Choose an example below &#128293; &#128293; &#128293; <br>\
179
+ Or, upload by yourself: <br>\
180
+ 1. Upload a video to be tested to 'video'. If failed, please check the codec, we recommend h.264 by default. <br>2. Upload a prompt image to 'prompt' and draw <strong>a point or line on the target</strong>. <br>\
181
+ <br> \
182
+ 💎 SAM segments the target with any point or scribble, then SegGPT segments the whole video. <br>\
183
+ 💎 Examples below were never trained and are randomly selected for testing in the wild. <br>\
184
+ 💎 Current UI interface only unleashes a small part of the capabilities of SegGPT, i.e., 1-shot case. <br> \
185
+ Note: we only take the first 16 frames for the demo. \
186
+ </p>",
187
+ cache_examples=False,
188
+ allow_flagging="never",
189
+ )
190
+
191
+
192
+
193
+
194
+ title = "SegGPT: Segmenting Everything In Context<br> \
195
+ <div align='center'> \
196
+ <h2><a href='https://arxiv.org/abs/2304.03284' target='_blank' rel='noopener'>[paper]</a> \
197
+ <a href='https://github.com/baaivision/Painter' target='_blank' rel='noopener'>[code]</a></h2> \
198
+ <br> \
199
+ <image src='file/rainbow2.gif' width='720px' /> \
200
+ <h2>SegGPT performs arbitrary segmentation tasks in images or videos via in-context inference, such as object instance, stuff, part, contour, and text, with only one single model.</h2> \
201
+ </div> \
202
+ "
203
+
204
+ demo = gr.TabbedInterface([demo_mask_sam, demo_mask_video, demo_mask], ['SAM+SegGPT (一触百通)', '🎬Anything in a Video', 'General 1-shot'], title=title)
205
+
206
+ #demo.launch(share=True, auth=("baai", "vision"))
207
+ demo.launch(enable_queue=False)
208
+ #demo.launch(server_name="0.0.0.0", server_port=34311)
209
+ # -
210
+
211
+
images/earth_1.jpg ADDED
images/earth_2.jpg ADDED
images/earth_3.jpg ADDED
images/hmbb_1.jpg ADDED
images/hmbb_2.jpg ADDED
images/hmbb_3.jpg ADDED
images/nc_1.jpg ADDED
images/nc_2.jpg ADDED
images/nc_3.jpg ADDED
images/obj_1.jpg ADDED
images/obj_2.jpg ADDED
images/obj_3.jpg ADDED
images/rainbow_1.jpg ADDED
images/rainbow_2.jpg ADDED
images/rainbow_3.jpg ADDED
images/street_1.jpg ADDED
images/street_2.jpg ADDED
images/street_3.jpg ADDED
images/tom_1.jpg ADDED
images/tom_2.jpg ADDED
images/tom_3.jpg ADDED
images/xray_1.jpg ADDED
images/xray_2.jpg ADDED
images/xray_3.jpg ADDED
images/ydt_1.jpg ADDED
images/ydt_2.jpg ADDED
images/ydt_3.jpg ADDED
rainbow2.gif ADDED

Git LFS Details

  • SHA256: 58a8540803d25c4db5ef8857362df37ec96014c14184286922cc8ecb079a0125
  • Pointer size: 132 Bytes
  • Size of remote file: 3.22 MB
seggpt_teaser.png ADDED
videos/.DS_Store ADDED
Binary file (6.15 kB). View file
 
videos/a_car_is_moving_on_the_road_40.jpg ADDED

Git LFS Details

  • SHA256: d59d813aab4e084395cf3440e4e382b1e505b150891705c9a357c3c7ba432061
  • Pointer size: 131 Bytes
  • Size of remote file: 891 kB
videos/a_car_is_moving_on_the_road_40.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e69783eed3294b0a76c147d46ce8705a46c21ae4122edc0fd9a2b57ee453954
3
+ size 248227
videos/a_man_in_parkour_100.jpg ADDED

Git LFS Details

  • SHA256: 25c0c3e0b7ca78d19735d21adc0f544f37204b9df4dc5cd8cb1ff1334eac1c27
  • Pointer size: 131 Bytes
  • Size of remote file: 797 kB
videos/a_man_in_parkour_100.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dda65f31336de9f9ed607fe60443164a8529b98d949b7cb8e068a2245352e2e3
3
+ size 1020054
videos/a_man_is_surfing_3_30.jpg ADDED

Git LFS Details

  • SHA256: ad718666508641ceffd5866b0dc23cd1160233178f98c7e3d5387361aa186d80
  • Pointer size: 131 Bytes
  • Size of remote file: 461 kB
videos/a_man_is_surfing_3_30.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73633b80752cefe0e68fb3bf6d1117d5bc1c094c4198c871beb7b59c6856f2f7
3
+ size 301229
videos/child-riding_lego.jpg ADDED

Git LFS Details

  • SHA256: 738a855aff8883a4d3a15b619f5e00fee99d4004e409832801b8fead5c362e47
  • Pointer size: 130 Bytes
  • Size of remote file: 21.5 kB
videos/child-riding_lego.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24a9ffd1a3a430851bb864f3fe1da4e84aed8d9694aea42d1f1578e7ef4818b4
3
+ size 117189
videos/horse-running.jpg ADDED

Git LFS Details

  • SHA256: a9caafbe8a2f340b9632db09806cc73ca8e186f825ba75843c93413a25bca02d
  • Pointer size: 131 Bytes
  • Size of remote file: 598 kB
videos/horse-running.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfd8b6c99776f291bf4c8787721387d8764c85b787741c665dee49dfb6442630
3
+ size 383635
videos/jeep-moving.jpg ADDED

Git LFS Details

  • SHA256: e61812ffd152c44f7b67e4a0d33f2d79c9d074fa431f6ea83a316a05f6f25a88
  • Pointer size: 130 Bytes
  • Size of remote file: 36.9 kB
videos/jeep-moving.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92c2b38d7c52d8a19be7aa7f568d1d07b5fc433cbd369f45e028325230ad76ba
3
+ size 150698