Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,32 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
import torch
|
|
|
3 |
import os
|
4 |
import gradio as gr
|
5 |
from threading import Thread
|
@@ -11,24 +38,24 @@ from transformers import (
|
|
11 |
from PIL import ImageDraw
|
12 |
from torchvision.transforms.v2 import Resize
|
13 |
|
14 |
-
|
|
|
15 |
|
16 |
-
subprocess.run(
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
)
|
21 |
|
22 |
auth_token = os.environ.get("TOKEN_FROM_SECRET") or True
|
23 |
tokenizer = AutoTokenizer.from_pretrained("vikhyatk/moondream2")
|
24 |
moondream = AutoModelForCausalLM.from_pretrained(
|
25 |
"vikhyatk/moondream-next",
|
26 |
-
revision="591ff5569240caf61126be6b080ff5c9370b87d4",
|
27 |
trust_remote_code=True,
|
28 |
torch_dtype=torch.float16,
|
29 |
device_map={"": "cuda"},
|
30 |
attn_implementation="flash_attention_2",
|
31 |
-
token=auth_token,
|
32 |
)
|
33 |
moondream.eval()
|
34 |
|
@@ -36,17 +63,20 @@ moondream.eval()
|
|
36 |
@spaces.GPU(duration=10)
|
37 |
def answer_question(img, prompt):
|
38 |
if img is None:
|
39 |
-
yield ""
|
40 |
return
|
41 |
|
42 |
image_embeds = moondream.encode_image(img)
|
43 |
streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
|
|
|
44 |
thread = Thread(
|
45 |
target=moondream.answer_question,
|
46 |
kwargs={
|
47 |
"image_embeds": image_embeds,
|
48 |
"question": prompt,
|
49 |
"tokenizer": tokenizer,
|
|
|
|
|
50 |
"streamer": streamer,
|
51 |
},
|
52 |
)
|
@@ -55,7 +85,11 @@ def answer_question(img, prompt):
|
|
55 |
buffer = ""
|
56 |
for new_text in streamer:
|
57 |
buffer += new_text
|
58 |
-
yield buffer.strip()
|
|
|
|
|
|
|
|
|
59 |
|
60 |
|
61 |
@spaces.GPU(duration=10)
|
@@ -84,6 +118,10 @@ def caption(img, mode):
|
|
84 |
|
85 |
@spaces.GPU(duration=10)
|
86 |
def detect(img, object):
|
|
|
|
|
|
|
|
|
87 |
w, h = img.size
|
88 |
if w > 768 or h > 768:
|
89 |
img = Resize(768)(img)
|
@@ -97,7 +135,7 @@ def detect(img, object):
|
|
97 |
width=3,
|
98 |
)
|
99 |
|
100 |
-
|
101 |
|
102 |
|
103 |
js = """
|
@@ -173,22 +211,27 @@ js = """
|
|
173 |
|
174 |
// Dark mode colors
|
175 |
var darkColors = {
|
|
|
176 |
1: '#4a5788', // Deep blue-grey
|
177 |
2: '#4c5a8d',
|
178 |
3: '#4e5d92',
|
179 |
4: '#506097',
|
180 |
5: '#52639c' // Brighter blue-grey
|
|
|
|
|
|
|
|
|
|
|
|
|
181 |
};
|
182 |
|
183 |
return isDarkMode ? darkColors[age] : lightColors[age];
|
184 |
}
|
185 |
|
186 |
function draw() {
|
187 |
-
|
188 |
-
|
189 |
-
ctx.fillStyle = isDarkMode ? '#333' : '#f0f0f0';
|
190 |
ctx.fillRect(0, 0, canvas.width, canvas.height);
|
191 |
-
|
192 |
for (var i = 0; i < cols; i++) {
|
193 |
for (var j = 0; j < rows; j++) {
|
194 |
if (grid[i][j]) {
|
@@ -220,6 +263,10 @@ css = """
|
|
220 |
font-size: 1.4rem !important;
|
221 |
}
|
222 |
|
|
|
|
|
|
|
|
|
223 |
#life-canvas {
|
224 |
position: fixed;
|
225 |
top: 0;
|
@@ -262,9 +309,9 @@ with gr.Blocks(title="moondream vl (new)", css=css, js=js) as demo:
|
|
262 |
)
|
263 |
submit = gr.Button("Submit")
|
264 |
img = gr.Image(type="pil", label="Upload an Image")
|
265 |
-
submit.click(answer_question, [img, prompt], output)
|
266 |
-
prompt.submit(answer_question, [img, prompt], output)
|
267 |
-
img.change(answer_question, [img, prompt], output)
|
268 |
elif mode == "Caption":
|
269 |
with gr.Group():
|
270 |
with gr.Row():
|
@@ -278,7 +325,7 @@ with gr.Blocks(title="moondream vl (new)", css=css, js=js) as demo:
|
|
278 |
img = gr.Image(type="pil", label="Upload an Image")
|
279 |
submit.click(caption, [img, caption_mode], output)
|
280 |
img.change(caption, [img, caption_mode], output)
|
281 |
-
|
282 |
with gr.Group():
|
283 |
with gr.Row():
|
284 |
prompt = gr.Textbox(
|
@@ -288,18 +335,21 @@ with gr.Blocks(title="moondream vl (new)", css=css, js=js) as demo:
|
|
288 |
)
|
289 |
submit = gr.Button("Submit")
|
290 |
img = gr.Image(type="pil", label="Upload an Image")
|
291 |
-
submit.click(detect, [img, prompt], ann)
|
292 |
-
prompt.submit(detect, [img, prompt], ann)
|
293 |
-
img.change(detect, [img, prompt], ann)
|
|
|
|
|
294 |
|
295 |
with gr.Column():
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
|
|
304 |
|
305 |
demo.queue().launch()
|
|
|
1 |
+
try:
|
2 |
+
import spaces
|
3 |
+
|
4 |
+
IN_SPACES = True
|
5 |
+
except ImportError:
|
6 |
+
from functools import wraps
|
7 |
+
import inspect
|
8 |
+
|
9 |
+
class spaces:
|
10 |
+
@staticmethod
|
11 |
+
def GPU(duration):
|
12 |
+
def decorator(func):
|
13 |
+
@wraps(func) # Preserves the original function's metadata
|
14 |
+
def wrapper(*args, **kwargs):
|
15 |
+
if inspect.isgeneratorfunction(func):
|
16 |
+
# If the decorated function is a generator, yield from it
|
17 |
+
yield from func(*args, **kwargs)
|
18 |
+
else:
|
19 |
+
# For regular functions, just return the result
|
20 |
+
return func(*args, **kwargs)
|
21 |
+
|
22 |
+
return wrapper
|
23 |
+
|
24 |
+
return decorator
|
25 |
+
|
26 |
+
IN_SPACES = False
|
27 |
+
|
28 |
import torch
|
29 |
+
from queue import Queue
|
30 |
import os
|
31 |
import gradio as gr
|
32 |
from threading import Thread
|
|
|
38 |
from PIL import ImageDraw
|
39 |
from torchvision.transforms.v2 import Resize
|
40 |
|
41 |
+
if IN_SPACES:
|
42 |
+
import subprocess
|
43 |
|
44 |
+
subprocess.run(
|
45 |
+
"pip install flash-attn --no-build-isolation",
|
46 |
+
env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
|
47 |
+
shell=True,
|
48 |
+
)
|
49 |
|
50 |
auth_token = os.environ.get("TOKEN_FROM_SECRET") or True
|
51 |
tokenizer = AutoTokenizer.from_pretrained("vikhyatk/moondream2")
|
52 |
moondream = AutoModelForCausalLM.from_pretrained(
|
53 |
"vikhyatk/moondream-next",
|
|
|
54 |
trust_remote_code=True,
|
55 |
torch_dtype=torch.float16,
|
56 |
device_map={"": "cuda"},
|
57 |
attn_implementation="flash_attention_2",
|
58 |
+
token=auth_token if IN_SPACES else None,
|
59 |
)
|
60 |
moondream.eval()
|
61 |
|
|
|
63 |
@spaces.GPU(duration=10)
|
64 |
def answer_question(img, prompt):
|
65 |
if img is None:
|
66 |
+
yield "", ""
|
67 |
return
|
68 |
|
69 |
image_embeds = moondream.encode_image(img)
|
70 |
streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
|
71 |
+
queue = Queue()
|
72 |
thread = Thread(
|
73 |
target=moondream.answer_question,
|
74 |
kwargs={
|
75 |
"image_embeds": image_embeds,
|
76 |
"question": prompt,
|
77 |
"tokenizer": tokenizer,
|
78 |
+
"allow_cot": True,
|
79 |
+
"result_queue": queue,
|
80 |
"streamer": streamer,
|
81 |
},
|
82 |
)
|
|
|
85 |
buffer = ""
|
86 |
for new_text in streamer:
|
87 |
buffer += new_text
|
88 |
+
yield buffer.strip(), "Thinking..."
|
89 |
+
|
90 |
+
answer = queue.get()
|
91 |
+
# yield answer["answer"], answer["thought"]
|
92 |
+
yield answer["answer"], ""
|
93 |
|
94 |
|
95 |
@spaces.GPU(duration=10)
|
|
|
118 |
|
119 |
@spaces.GPU(duration=10)
|
120 |
def detect(img, object):
|
121 |
+
if img is None:
|
122 |
+
yield "", gr.update(visible=False, value=None)
|
123 |
+
return
|
124 |
+
|
125 |
w, h = img.size
|
126 |
if w > 768 or h > 768:
|
127 |
img = Resize(768)(img)
|
|
|
135 |
width=3,
|
136 |
)
|
137 |
|
138 |
+
yield f"{len(objs)} detected", gr.update(visible=True, value=img)
|
139 |
|
140 |
|
141 |
js = """
|
|
|
211 |
|
212 |
// Dark mode colors
|
213 |
var darkColors = {
|
214 |
+
/*
|
215 |
1: '#4a5788', // Deep blue-grey
|
216 |
2: '#4c5a8d',
|
217 |
3: '#4e5d92',
|
218 |
4: '#506097',
|
219 |
5: '#52639c' // Brighter blue-grey
|
220 |
+
*/
|
221 |
+
1: 'rgb(16, 20, 32)',
|
222 |
+
2: 'rgb(21, 25, 39)',
|
223 |
+
3: 'rgb(26, 30, 46)',
|
224 |
+
4: 'rgb(31, 35, 53)',
|
225 |
+
5: 'rgb(36, 40, 60)'
|
226 |
};
|
227 |
|
228 |
return isDarkMode ? darkColors[age] : lightColors[age];
|
229 |
}
|
230 |
|
231 |
function draw() {
|
232 |
+
var isDarkMode = document.body.classList.contains('dark');
|
233 |
+
ctx.fillStyle = isDarkMode ? '#0b0f19' : '#f0f0f0';
|
|
|
234 |
ctx.fillRect(0, 0, canvas.width, canvas.height);
|
|
|
235 |
for (var i = 0; i < cols; i++) {
|
236 |
for (var j = 0; j < rows; j++) {
|
237 |
if (grid[i][j]) {
|
|
|
263 |
font-size: 1.4rem !important;
|
264 |
}
|
265 |
|
266 |
+
.chain-of-thought span p {
|
267 |
+
opacity: 0.7 !important;
|
268 |
+
}
|
269 |
+
|
270 |
#life-canvas {
|
271 |
position: fixed;
|
272 |
top: 0;
|
|
|
309 |
)
|
310 |
submit = gr.Button("Submit")
|
311 |
img = gr.Image(type="pil", label="Upload an Image")
|
312 |
+
submit.click(answer_question, [img, prompt], [output, thought])
|
313 |
+
prompt.submit(answer_question, [img, prompt], [output, thought])
|
314 |
+
img.change(answer_question, [img, prompt], [output, thought])
|
315 |
elif mode == "Caption":
|
316 |
with gr.Group():
|
317 |
with gr.Row():
|
|
|
325 |
img = gr.Image(type="pil", label="Upload an Image")
|
326 |
submit.click(caption, [img, caption_mode], output)
|
327 |
img.change(caption, [img, caption_mode], output)
|
328 |
+
elif mode == "Detect":
|
329 |
with gr.Group():
|
330 |
with gr.Row():
|
331 |
prompt = gr.Textbox(
|
|
|
335 |
)
|
336 |
submit = gr.Button("Submit")
|
337 |
img = gr.Image(type="pil", label="Upload an Image")
|
338 |
+
submit.click(detect, [img, prompt], [thought, ann])
|
339 |
+
prompt.submit(detect, [img, prompt], [thought, ann])
|
340 |
+
img.change(detect, [img, prompt], [thought, ann])
|
341 |
+
else:
|
342 |
+
gr.Markdown("Coming soon!")
|
343 |
|
344 |
with gr.Column():
|
345 |
+
thought = gr.Markdown(elem_classes=["chain-of-thought"])
|
346 |
+
output = gr.Markdown(label="Response", elem_classes=["output-text"])
|
347 |
+
ann = gr.Image(visible=False)
|
348 |
+
|
349 |
+
mode_radio.change(
|
350 |
+
lambda: ("", "", gr.update(visible=False, value=None)),
|
351 |
+
[],
|
352 |
+
[output, thought, ann],
|
353 |
+
)
|
354 |
|
355 |
demo.queue().launch()
|