vikhyatk commited on
Commit
92bb3e8
·
verified ·
1 Parent(s): e308ff6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -5
app.py CHANGED
@@ -126,11 +126,17 @@ def convert_to_entities(text, coords):
126
 
127
 
128
  @spaces.GPU(duration=30)
129
- def answer_question(img, prompt):
130
  buffer = ""
 
 
 
 
 
 
131
  for new_text in moondream.query(img, prompt, stream=True)["answer"]:
132
  buffer += new_text
133
- yield buffer.strip(), {"text": "Thinking...", "entities": []}
134
 
135
 
136
  @spaces.GPU(duration=10)
@@ -272,10 +278,12 @@ with gr.Blocks(title="moondream vl (new)", css=css, js=js) as demo:
272
  scale=4,
273
  )
274
  submit = gr.Button("Submit")
 
275
  img = gr.Image(type="pil", label="Upload an Image")
276
- submit.click(answer_question, [img, prompt], [output, thought])
277
- prompt.submit(answer_question, [img, prompt], [output, thought])
278
- img.change(answer_question, [img, prompt], [output, thought])
 
279
  img.change(lambda img: img, [img], [input_image])
280
  elif mode == "Caption":
281
  with gr.Group():
 
126
 
127
 
128
  @spaces.GPU(duration=30)
129
+ def answer_question(img, prompt, reasoning):
130
  buffer = ""
131
+ resp = moondream.query(img, prompt, stream=True, reasoning=reasoning)
132
+ reasoning_text = resp["reasoning"]["text"] if reasoning else "[reasoning disabled]"
133
+ entities = [
134
+ {"start": g["start_idx"], "end": g["end_idx"], "entity": json.dumps(g["points"])}
135
+ for g in resp["reasoning"]["grounding"]
136
+ ] if reasoning else []
137
  for new_text in moondream.query(img, prompt, stream=True)["answer"]:
138
  buffer += new_text
139
+ yield buffer.strip(), {"text": reasoning_text, "entities": entities}
140
 
141
 
142
  @spaces.GPU(duration=10)
 
278
  scale=4,
279
  )
280
  submit = gr.Button("Submit")
281
+ reasoning = gr.Checkbox(label="Enable reasoning")
282
  img = gr.Image(type="pil", label="Upload an Image")
283
+ submit.click(answer_question, [img, prompt, reasoning], [output, thought])
284
+ prompt.submit(answer_question, [img, prompt, reasoning], [output, thought])
285
+ reasoning.change(answer_question, [img, prompt, reasoning], [output, thought])
286
+ img.change(answer_question, [img, prompt, reasoning], [output, thought])
287
  img.change(lambda img: img, [img], [input_image])
288
  elif mode == "Caption":
289
  with gr.Group():