Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -56,7 +56,6 @@ moondream = AutoModelForCausalLM.from_pretrained(
|
|
56 |
torch_dtype=torch.float16,
|
57 |
device_map={"": "cuda"},
|
58 |
attn_implementation="flash_attention_2",
|
59 |
-
revision="a23030ab157f2d0f5bb2df6c6d43623904727ec2",
|
60 |
)
|
61 |
|
62 |
# CKPT_DIRS = ["/tmp/md-ckpt/ckpt/ft/song-moon-4c-s15/s72001/"]
|
@@ -78,7 +77,8 @@ def convert_to_entities(text, coords):
|
|
78 |
Converts a string with special markers into an entity representation.
|
79 |
Markers:
|
80 |
- <|coord|> pairs indicate coordinate markers
|
81 |
-
- <|
|
|
|
82 |
- <|end_ground|> indicates the end of a ground term
|
83 |
|
84 |
Returns:
|
@@ -103,10 +103,15 @@ def convert_to_entities(text, coords):
|
|
103 |
entity.append(coords.pop(0))
|
104 |
continue
|
105 |
|
106 |
-
elif text[i : i +
|
107 |
in_entity = True
|
108 |
entity_start = current_pos
|
109 |
-
i +=
|
|
|
|
|
|
|
|
|
|
|
110 |
continue
|
111 |
|
112 |
elif text[i : i + 14] == "<|end_ground|>":
|
@@ -484,18 +489,21 @@ with gr.Blocks(title="moondream vl (new)", css=css, js=js) as demo:
|
|
484 |
w, h = img.size
|
485 |
|
486 |
coords = json.loads(evt.value[1])
|
487 |
-
if len(coords) !=
|
488 |
raise ValueError("Only points supported right now.")
|
489 |
-
coords[0] = int(coords[0] * w)
|
490 |
-
coords[1] = int(coords[1] * h)
|
491 |
|
492 |
img_clone = img.copy()
|
493 |
draw = ImageDraw.Draw(img_clone)
|
494 |
-
|
495 |
-
|
496 |
-
|
497 |
-
|
498 |
-
|
|
|
|
|
|
|
|
|
|
|
499 |
|
500 |
return gr.update(visible=True, value=img_clone)
|
501 |
|
|
|
56 |
torch_dtype=torch.float16,
|
57 |
device_map={"": "cuda"},
|
58 |
attn_implementation="flash_attention_2",
|
|
|
59 |
)
|
60 |
|
61 |
# CKPT_DIRS = ["/tmp/md-ckpt/ckpt/ft/song-moon-4c-s15/s72001/"]
|
|
|
77 |
Converts a string with special markers into an entity representation.
|
78 |
Markers:
|
79 |
- <|coord|> pairs indicate coordinate markers
|
80 |
+
- <|start_ground_points|> indicates the start of grounding
|
81 |
+
- <|start_ground_text|> indicates the start of a ground term
|
82 |
- <|end_ground|> indicates the end of a ground term
|
83 |
|
84 |
Returns:
|
|
|
103 |
entity.append(coords.pop(0))
|
104 |
continue
|
105 |
|
106 |
+
elif text[i : i + 23] == "<|start_ground_points|>":
|
107 |
in_entity = True
|
108 |
entity_start = current_pos
|
109 |
+
i += 23
|
110 |
+
continue
|
111 |
+
|
112 |
+
elif text[i : i + 21] == "<|start_ground_text|>":
|
113 |
+
entity_start = current_pos
|
114 |
+
i += 21
|
115 |
continue
|
116 |
|
117 |
elif text[i : i + 14] == "<|end_ground|>":
|
|
|
489 |
w, h = img.size
|
490 |
|
491 |
coords = json.loads(evt.value[1])
|
492 |
+
if len(coords) % 2 != 0:
|
493 |
raise ValueError("Only points supported right now.")
|
|
|
|
|
494 |
|
495 |
img_clone = img.copy()
|
496 |
draw = ImageDraw.Draw(img_clone)
|
497 |
+
|
498 |
+
for i in range(0, len(coords), 2): # Step by 2 to handle x,y pairs
|
499 |
+
x = int(coords[i] * w)
|
500 |
+
y = int(coords[i + 1] * h)
|
501 |
+
draw.ellipse(
|
502 |
+
(x - 3, y - 3, x + 3, y + 3),
|
503 |
+
fill="red",
|
504 |
+
outline="red",
|
505 |
+
)
|
506 |
+
|
507 |
|
508 |
return gr.update(visible=True, value=img_clone)
|
509 |
|