test-space

Runtime error

App Files Files Community

vikhyatk commited on Nov 20, 2024

Commit

5912d41

verified ·

1 Parent(s): 718a94f

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -12

app.py CHANGED Viewed

@@ -56,7 +56,6 @@ moondream = AutoModelForCausalLM.from_pretrained(
     torch_dtype=torch.float16,
     device_map={"": "cuda"},
     attn_implementation="flash_attention_2",
-    revision="a23030ab157f2d0f5bb2df6c6d43623904727ec2",
 )
 # CKPT_DIRS = ["/tmp/md-ckpt/ckpt/ft/song-moon-4c-s15/s72001/"]
@@ -78,7 +77,8 @@ def convert_to_entities(text, coords):
     Converts a string with special markers into an entity representation.
     Markers:
     - <|coord|> pairs indicate coordinate markers
-    - <|start_ground|> indicates the start of a ground term
     - <|end_ground|> indicates the end of a ground term
     Returns:
@@ -103,10 +103,15 @@ def convert_to_entities(text, coords):
             entity.append(coords.pop(0))
             continue
-        elif text[i : i + 16] == "<|start_ground|>":
             in_entity = True
             entity_start = current_pos
-            i += 16
             continue
         elif text[i : i + 14] == "<|end_ground|>":
@@ -484,18 +489,21 @@ with gr.Blocks(title="moondream vl (new)", css=css, js=js) as demo:
                 w, h = img.size
             coords = json.loads(evt.value[1])
-            if len(coords) != 2:
                 raise ValueError("Only points supported right now.")
-            coords[0] = int(coords[0] * w)
-            coords[1] = int(coords[1] * h)
             img_clone = img.copy()
             draw = ImageDraw.Draw(img_clone)
-            draw.ellipse(
-                (coords[0] - 3, coords[1] - 3, coords[0] + 3, coords[1] + 3),
-                fill="red",
-                outline="red",
-            )
             return gr.update(visible=True, value=img_clone)

     torch_dtype=torch.float16,
     device_map={"": "cuda"},
     attn_implementation="flash_attention_2",
 )
 # CKPT_DIRS = ["/tmp/md-ckpt/ckpt/ft/song-moon-4c-s15/s72001/"]
     Converts a string with special markers into an entity representation.
     Markers:
     - <|coord|> pairs indicate coordinate markers
+    - <|start_ground_points|> indicates the start of grounding
+    - <|start_ground_text|> indicates the start of a ground term
     - <|end_ground|> indicates the end of a ground term
     Returns:
             entity.append(coords.pop(0))
             continue
+        elif text[i : i + 23] == "<|start_ground_points|>":
             in_entity = True
             entity_start = current_pos
+            i += 23
+            continue
+        elif text[i : i + 21] == "<|start_ground_text|>":
+            entity_start = current_pos
+            i += 21
             continue
         elif text[i : i + 14] == "<|end_ground|>":
                 w, h = img.size
             coords = json.loads(evt.value[1])
+            if len(coords) % 2 != 0:
                 raise ValueError("Only points supported right now.")
             img_clone = img.copy()
             draw = ImageDraw.Draw(img_clone)
+            for i in range(0, len(coords), 2):  # Step by 2 to handle x,y pairs
+                x = int(coords[i] * w)
+                y = int(coords[i + 1] * h)
+                draw.ellipse(
+                    (x - 3, y - 3, x + 3, y + 3),
+                    fill="red",
+                    outline="red",
+                )
             return gr.update(visible=True, value=img_clone)