Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -126,11 +126,17 @@ def convert_to_entities(text, coords):
|
|
126 |
|
127 |
|
128 |
@spaces.GPU(duration=30)
|
129 |
-
def answer_question(img, prompt):
|
130 |
buffer = ""
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
for new_text in moondream.query(img, prompt, stream=True)["answer"]:
|
132 |
buffer += new_text
|
133 |
-
yield buffer.strip(), {"text":
|
134 |
|
135 |
|
136 |
@spaces.GPU(duration=10)
|
@@ -272,10 +278,12 @@ with gr.Blocks(title="moondream vl (new)", css=css, js=js) as demo:
|
|
272 |
scale=4,
|
273 |
)
|
274 |
submit = gr.Button("Submit")
|
|
|
275 |
img = gr.Image(type="pil", label="Upload an Image")
|
276 |
-
submit.click(answer_question, [img, prompt], [output, thought])
|
277 |
-
prompt.submit(answer_question, [img, prompt], [output, thought])
|
278 |
-
|
|
|
279 |
img.change(lambda img: img, [img], [input_image])
|
280 |
elif mode == "Caption":
|
281 |
with gr.Group():
|
|
|
126 |
|
127 |
|
128 |
@spaces.GPU(duration=30)
|
129 |
+
def answer_question(img, prompt, reasoning):
|
130 |
buffer = ""
|
131 |
+
resp = moondream.query(img, prompt, stream=True, reasoning=reasoning)
|
132 |
+
reasoning_text = resp["reasoning"]["text"] if reasoning else "[reasoning disabled]"
|
133 |
+
entities = [
|
134 |
+
{"start": g["start_idx"], "end": g["end_idx"], "entity": json.dumps(g["points"])}
|
135 |
+
for g in resp["reasoning"]["grounding"]
|
136 |
+
] if reasoning else []
|
137 |
for new_text in moondream.query(img, prompt, stream=True)["answer"]:
|
138 |
buffer += new_text
|
139 |
+
yield buffer.strip(), {"text": reasoning_text, "entities": entities}
|
140 |
|
141 |
|
142 |
@spaces.GPU(duration=10)
|
|
|
278 |
scale=4,
|
279 |
)
|
280 |
submit = gr.Button("Submit")
|
281 |
+
reasoning = gr.Checkbox(label="Enable reasoning")
|
282 |
img = gr.Image(type="pil", label="Upload an Image")
|
283 |
+
submit.click(answer_question, [img, prompt, reasoning], [output, thought])
|
284 |
+
prompt.submit(answer_question, [img, prompt, reasoning], [output, thought])
|
285 |
+
reasoning.change(answer_question, [img, prompt, reasoning], [output, thought])
|
286 |
+
img.change(answer_question, [img, prompt, reasoning], [output, thought])
|
287 |
img.change(lambda img: img, [img], [input_image])
|
288 |
elif mode == "Caption":
|
289 |
with gr.Group():
|