vikhyatk commited on
Commit
d382dff
·
verified ·
1 Parent(s): a40aa2c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -6
app.py CHANGED
@@ -25,8 +25,6 @@ except ImportError:
25
 
26
  IN_SPACES = False
27
 
28
- print("IN_SPACES", IN_SPACES)
29
-
30
  import torch
31
  import os
32
  import gradio as gr
@@ -52,8 +50,6 @@ if IN_SPACES:
52
  )
53
 
54
  auth_token = os.environ.get("TOKEN_FROM_SECRET") or True
55
- os.environ["HF_TOKEN"] = auth_token
56
-
57
  tokenizer = AutoTokenizer.from_pretrained("vikhyatk/moondream-next")
58
  moondream = AutoModelForCausalLM.from_pretrained(
59
  "vikhyatk/moondream-next",
@@ -61,6 +57,7 @@ moondream = AutoModelForCausalLM.from_pretrained(
61
  torch_dtype=torch.float16,
62
  device_map={"": "cuda"},
63
  attn_implementation="flash_attention_2",
 
64
  )
65
 
66
  # CKPT_DIRS = ["/tmp/md-ckpt/ckpt/ft/song-moon-4c-s15/s72001/"]
@@ -217,6 +214,32 @@ def detect(img, object):
217
  )
218
 
219
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  js = """
221
  function createBgAnimation() {
222
  var canvas = document.createElement('canvas');
@@ -375,7 +398,7 @@ with gr.Blocks(title="moondream vl (new)", css=css, js=js) as demo:
375
  """
376
  )
377
  mode_radio = gr.Radio(
378
- ["Caption", "Query", "Detect"],
379
  show_label=False,
380
  value=lambda: "Caption",
381
  )
@@ -427,6 +450,19 @@ with gr.Blocks(title="moondream vl (new)", css=css, js=js) as demo:
427
  submit.click(detect, [img, prompt], [thought, ann])
428
  prompt.submit(detect, [img, prompt], [thought, ann])
429
  img.change(detect, [img, prompt], [thought, ann])
 
 
 
 
 
 
 
 
 
 
 
 
 
430
  else:
431
  gr.Markdown("Coming soon!")
432
 
@@ -473,4 +509,4 @@ with gr.Blocks(title="moondream vl (new)", css=css, js=js) as demo:
473
  [output, thought, ann],
474
  )
475
 
476
- demo.queue().launch()
 
25
 
26
  IN_SPACES = False
27
 
 
 
28
  import torch
29
  import os
30
  import gradio as gr
 
50
  )
51
 
52
  auth_token = os.environ.get("TOKEN_FROM_SECRET") or True
 
 
53
  tokenizer = AutoTokenizer.from_pretrained("vikhyatk/moondream-next")
54
  moondream = AutoModelForCausalLM.from_pretrained(
55
  "vikhyatk/moondream-next",
 
57
  torch_dtype=torch.float16,
58
  device_map={"": "cuda"},
59
  attn_implementation="flash_attention_2",
60
+ token=auth_token if IN_SPACES else None,
61
  )
62
 
63
  # CKPT_DIRS = ["/tmp/md-ckpt/ckpt/ft/song-moon-4c-s15/s72001/"]
 
214
  )
215
 
216
 
217
+ @spaces.GPU(duration=10)
218
+ def point(img, object):
219
+ if img is None:
220
+ yield "", gr.update(visible=False, value=None)
221
+ return
222
+
223
+ w, h = img.size
224
+ if w > 768 or h > 768:
225
+ img = Resize(768)(img)
226
+ w, h = img.size
227
+
228
+ objs = moondream.point(img, object, tokenizer)
229
+ draw_image = ImageDraw.Draw(img)
230
+ for o in objs:
231
+ draw_image.ellipse(
232
+ (o["x"] * w - 5, o["y"] * h - 5, o["x"] * w + 5, o["y"] * h + 5),
233
+ fill="red",
234
+ outline="blue",
235
+ width=2,
236
+ )
237
+
238
+ yield {"text": f"{len(objs)} detected", "entities": []}, gr.update(
239
+ visible=True, value=img
240
+ )
241
+
242
+
243
  js = """
244
  function createBgAnimation() {
245
  var canvas = document.createElement('canvas');
 
398
  """
399
  )
400
  mode_radio = gr.Radio(
401
+ ["Caption", "Query", "Detect", "Point"],
402
  show_label=False,
403
  value=lambda: "Caption",
404
  )
 
450
  submit.click(detect, [img, prompt], [thought, ann])
451
  prompt.submit(detect, [img, prompt], [thought, ann])
452
  img.change(detect, [img, prompt], [thought, ann])
453
+ elif mode == "Point":
454
+ with gr.Group():
455
+ with gr.Row():
456
+ prompt = gr.Textbox(
457
+ label="Object",
458
+ value="Cat",
459
+ scale=4,
460
+ )
461
+ submit = gr.Button("Submit")
462
+ img = gr.Image(type="pil", label="Upload an Image")
463
+ submit.click(point, [img, prompt], [thought, ann])
464
+ prompt.submit(point, [img, prompt], [thought, ann])
465
+ img.change(point, [img, prompt], [thought, ann])
466
  else:
467
  gr.Markdown("Coming soon!")
468
 
 
509
  [output, thought, ann],
510
  )
511
 
512
+ demo.queue().launch(share=True)