vikhyatk commited on
Commit
5912d41
·
verified ·
1 Parent(s): 718a94f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -12
app.py CHANGED
@@ -56,7 +56,6 @@ moondream = AutoModelForCausalLM.from_pretrained(
56
  torch_dtype=torch.float16,
57
  device_map={"": "cuda"},
58
  attn_implementation="flash_attention_2",
59
- revision="a23030ab157f2d0f5bb2df6c6d43623904727ec2",
60
  )
61
 
62
  # CKPT_DIRS = ["/tmp/md-ckpt/ckpt/ft/song-moon-4c-s15/s72001/"]
@@ -78,7 +77,8 @@ def convert_to_entities(text, coords):
78
  Converts a string with special markers into an entity representation.
79
  Markers:
80
  - <|coord|> pairs indicate coordinate markers
81
- - <|start_ground|> indicates the start of a ground term
 
82
  - <|end_ground|> indicates the end of a ground term
83
 
84
  Returns:
@@ -103,10 +103,15 @@ def convert_to_entities(text, coords):
103
  entity.append(coords.pop(0))
104
  continue
105
 
106
- elif text[i : i + 16] == "<|start_ground|>":
107
  in_entity = True
108
  entity_start = current_pos
109
- i += 16
 
 
 
 
 
110
  continue
111
 
112
  elif text[i : i + 14] == "<|end_ground|>":
@@ -484,18 +489,21 @@ with gr.Blocks(title="moondream vl (new)", css=css, js=js) as demo:
484
  w, h = img.size
485
 
486
  coords = json.loads(evt.value[1])
487
- if len(coords) != 2:
488
  raise ValueError("Only points supported right now.")
489
- coords[0] = int(coords[0] * w)
490
- coords[1] = int(coords[1] * h)
491
 
492
  img_clone = img.copy()
493
  draw = ImageDraw.Draw(img_clone)
494
- draw.ellipse(
495
- (coords[0] - 3, coords[1] - 3, coords[0] + 3, coords[1] + 3),
496
- fill="red",
497
- outline="red",
498
- )
 
 
 
 
 
499
 
500
  return gr.update(visible=True, value=img_clone)
501
 
 
56
  torch_dtype=torch.float16,
57
  device_map={"": "cuda"},
58
  attn_implementation="flash_attention_2",
 
59
  )
60
 
61
  # CKPT_DIRS = ["/tmp/md-ckpt/ckpt/ft/song-moon-4c-s15/s72001/"]
 
77
  Converts a string with special markers into an entity representation.
78
  Markers:
79
  - <|coord|> pairs indicate coordinate markers
80
+ - <|start_ground_points|> indicates the start of grounding
81
+ - <|start_ground_text|> indicates the start of a ground term
82
  - <|end_ground|> indicates the end of a ground term
83
 
84
  Returns:
 
103
  entity.append(coords.pop(0))
104
  continue
105
 
106
+ elif text[i : i + 23] == "<|start_ground_points|>":
107
  in_entity = True
108
  entity_start = current_pos
109
+ i += 23
110
+ continue
111
+
112
+ elif text[i : i + 21] == "<|start_ground_text|>":
113
+ entity_start = current_pos
114
+ i += 21
115
  continue
116
 
117
  elif text[i : i + 14] == "<|end_ground|>":
 
489
  w, h = img.size
490
 
491
  coords = json.loads(evt.value[1])
492
+ if len(coords) % 2 != 0:
493
  raise ValueError("Only points supported right now.")
 
 
494
 
495
  img_clone = img.copy()
496
  draw = ImageDraw.Draw(img_clone)
497
+
498
+ for i in range(0, len(coords), 2): # Step by 2 to handle x,y pairs
499
+ x = int(coords[i] * w)
500
+ y = int(coords[i + 1] * h)
501
+ draw.ellipse(
502
+ (x - 3, y - 3, x + 3, y + 3),
503
+ fill="red",
504
+ outline="red",
505
+ )
506
+
507
 
508
  return gr.update(visible=True, value=img_clone)
509