yu-rp
commited on
Commit
·
3a08b56
1
Parent(s):
bf90ae0
update
Browse files- app.py +31 -5
- gallery/1_resize.jpg +3 -0
- gallery/2_resize.jpg +3 -0
- gallery/3_resize.jpg +3 -0
- model.py +20 -0
app.py
CHANGED
@@ -31,6 +31,19 @@ from model import (
|
|
31 |
get_model,
|
32 |
)
|
33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
logger = build_logger("dimple", "dimple.log")
|
35 |
|
36 |
no_change_btn = gr.Button()
|
@@ -233,8 +246,6 @@ def http_bot(
|
|
233 |
)
|
234 |
|
235 |
thread = threading.Thread(target=run_generate)
|
236 |
-
thread.start()
|
237 |
-
|
238 |
logger.info(f"==== wait for first token ====\n")
|
239 |
state.append_message(Conversation.ASSISTANT, state.streaming_placeholder)
|
240 |
yield (
|
@@ -243,6 +254,10 @@ def http_bot(
|
|
243 |
gr.MultimodalTextbox(interactive=False),
|
244 |
) + (disable_btn,) * 5
|
245 |
|
|
|
|
|
|
|
|
|
246 |
try:
|
247 |
# Stream output
|
248 |
for ans in streamer:
|
@@ -252,11 +267,22 @@ def http_bot(
|
|
252 |
ans = ans[0]
|
253 |
|
254 |
state.update_message(Conversation.ASSISTANT, ans, None)
|
|
|
255 |
yield (
|
256 |
state,
|
257 |
state.to_gradio_chatbot(),
|
258 |
gr.MultimodalTextbox(interactive=False),
|
259 |
) + (disable_btn,) * 5
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
260 |
except Exception as e:
|
261 |
state.update_message(Conversation.ASSISTANT, server_error_msg, None)
|
262 |
yield (
|
@@ -422,7 +448,7 @@ def build_demo():
|
|
422 |
[
|
423 |
{
|
424 |
"files": [
|
425 |
-
"gallery/
|
426 |
],
|
427 |
"text": "Please help me describe the image.",
|
428 |
}
|
@@ -430,7 +456,7 @@ def build_demo():
|
|
430 |
[
|
431 |
{
|
432 |
"files": [
|
433 |
-
"gallery/
|
434 |
],
|
435 |
"text": "First please perform reasoning, and think step by step to provide best answer to the following question: Is this airplane taking off or landing?",
|
436 |
}
|
@@ -438,7 +464,7 @@ def build_demo():
|
|
438 |
[
|
439 |
{
|
440 |
"files": [
|
441 |
-
"gallery/
|
442 |
],
|
443 |
"text": "First please perform reasoning, and think step by step to provide best answer to the following question: What is the lamp on, a side table or a nightstand?",
|
444 |
}
|
|
|
31 |
get_model,
|
32 |
)
|
33 |
|
34 |
+
# seed for reproducibility
|
35 |
+
import random
|
36 |
+
import numpy as np
|
37 |
+
import torch
|
38 |
+
seed = 42
|
39 |
+
random.seed(seed)
|
40 |
+
np.random.seed(seed)
|
41 |
+
torch.manual_seed(seed)
|
42 |
+
if torch.cuda.is_available():
|
43 |
+
torch.cuda.manual_seed_all(seed)
|
44 |
+
torch.backends.cudnn.deterministic = True
|
45 |
+
torch.backends.cudnn.benchmark = False
|
46 |
+
|
47 |
logger = build_logger("dimple", "dimple.log")
|
48 |
|
49 |
no_change_btn = gr.Button()
|
|
|
246 |
)
|
247 |
|
248 |
thread = threading.Thread(target=run_generate)
|
|
|
|
|
249 |
logger.info(f"==== wait for first token ====\n")
|
250 |
state.append_message(Conversation.ASSISTANT, state.streaming_placeholder)
|
251 |
yield (
|
|
|
254 |
gr.MultimodalTextbox(interactive=False),
|
255 |
) + (disable_btn,) * 5
|
256 |
|
257 |
+
num_steps = 0
|
258 |
+
start_tstamp = time.time()
|
259 |
+
thread.start()
|
260 |
+
|
261 |
try:
|
262 |
# Stream output
|
263 |
for ans in streamer:
|
|
|
267 |
ans = ans[0]
|
268 |
|
269 |
state.update_message(Conversation.ASSISTANT, ans, None)
|
270 |
+
num_steps += 1
|
271 |
yield (
|
272 |
state,
|
273 |
state.to_gradio_chatbot(),
|
274 |
gr.MultimodalTextbox(interactive=False),
|
275 |
) + (disable_btn,) * 5
|
276 |
+
end_tstamp = time.time()
|
277 |
+
total_time = end_tstamp - start_tstamp
|
278 |
+
tps = int(max_new_tokens) / total_time
|
279 |
+
stat_info = f"\n\n[#Tokens: {int(max_new_tokens)}, #Steps: {int(num_steps)}, TPS: {tps:.2f} tokens/s, Latency: {total_time:.2f}s]"
|
280 |
+
state.update_message(Conversation.ASSISTANT, ans+stat_info, None)
|
281 |
+
yield (
|
282 |
+
state,
|
283 |
+
state.to_gradio_chatbot(),
|
284 |
+
gr.MultimodalTextbox(interactive=False),
|
285 |
+
) + (disable_btn,) * 5
|
286 |
except Exception as e:
|
287 |
state.update_message(Conversation.ASSISTANT, server_error_msg, None)
|
288 |
yield (
|
|
|
448 |
[
|
449 |
{
|
450 |
"files": [
|
451 |
+
"gallery/1_resize.jpg",
|
452 |
],
|
453 |
"text": "Please help me describe the image.",
|
454 |
}
|
|
|
456 |
[
|
457 |
{
|
458 |
"files": [
|
459 |
+
"gallery/2_resize.jpg",
|
460 |
],
|
461 |
"text": "First please perform reasoning, and think step by step to provide best answer to the following question: Is this airplane taking off or landing?",
|
462 |
}
|
|
|
464 |
[
|
465 |
{
|
466 |
"files": [
|
467 |
+
"gallery/3_resize.jpg",
|
468 |
],
|
469 |
"text": "First please perform reasoning, and think step by step to provide best answer to the following question: What is the lamp on, a side table or a nightstand?",
|
470 |
}
|
gallery/1_resize.jpg
ADDED
![]() |
Git LFS Details
|
gallery/2_resize.jpg
ADDED
![]() |
Git LFS Details
|
gallery/3_resize.jpg
ADDED
![]() |
Git LFS Details
|
model.py
CHANGED
@@ -5,8 +5,14 @@ from transformers import AutoProcessor, AutoModel, TextIteratorStreamer
|
|
5 |
class FullSequenceStreamer(TextIteratorStreamer):
|
6 |
def __init__(self, tokenizer, **kwargs):
|
7 |
super().__init__(tokenizer, **kwargs)
|
|
|
|
|
|
|
8 |
|
9 |
def put(self, value, stream_end=False):
|
|
|
|
|
|
|
10 |
# Assume full token_ids are passed in every time
|
11 |
decoded = self.tokenizer.batch_decode(value, **self.decode_kwargs)
|
12 |
self.text_queue.put(decoded)
|
@@ -33,3 +39,17 @@ def get_model(device):
|
|
33 |
|
34 |
return model, processor
|
35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
class FullSequenceStreamer(TextIteratorStreamer):
|
6 |
def __init__(self, tokenizer, **kwargs):
|
7 |
super().__init__(tokenizer, **kwargs)
|
8 |
+
self.mask_token = tokenizer.mask_token_id
|
9 |
+
self.placeholder_token = tokenizer.convert_tokens_to_ids("_")
|
10 |
+
self.placeholder_token = tokenizer.encode("␣")[0]
|
11 |
|
12 |
def put(self, value, stream_end=False):
|
13 |
+
# change mask tokens to space token
|
14 |
+
value = value.clone()
|
15 |
+
value[value == self.mask_token] = self.placeholder_token
|
16 |
# Assume full token_ids are passed in every time
|
17 |
decoded = self.tokenizer.batch_decode(value, **self.decode_kwargs)
|
18 |
self.text_queue.put(decoded)
|
|
|
39 |
|
40 |
return model, processor
|
41 |
|
42 |
+
from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
|
43 |
+
|
44 |
+
def get_qwen(device):
|
45 |
+
|
46 |
+
model_name = "Qwen/Qwen2-VL-7B-Instruct"
|
47 |
+
processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
|
48 |
+
model = Qwen2VLForConditionalGeneration.from_pretrained(
|
49 |
+
model_name,
|
50 |
+
torch_dtype=torch.bfloat16,
|
51 |
+
)
|
52 |
+
model = model.eval()
|
53 |
+
model = model.to(device)
|
54 |
+
|
55 |
+
return model, processor
|