yu-rp commited on
Commit
3a08b56
·
1 Parent(s): bf90ae0
Files changed (5) hide show
  1. app.py +31 -5
  2. gallery/1_resize.jpg +3 -0
  3. gallery/2_resize.jpg +3 -0
  4. gallery/3_resize.jpg +3 -0
  5. model.py +20 -0
app.py CHANGED
@@ -31,6 +31,19 @@ from model import (
31
  get_model,
32
  )
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  logger = build_logger("dimple", "dimple.log")
35
 
36
  no_change_btn = gr.Button()
@@ -233,8 +246,6 @@ def http_bot(
233
  )
234
 
235
  thread = threading.Thread(target=run_generate)
236
- thread.start()
237
-
238
  logger.info(f"==== wait for first token ====\n")
239
  state.append_message(Conversation.ASSISTANT, state.streaming_placeholder)
240
  yield (
@@ -243,6 +254,10 @@ def http_bot(
243
  gr.MultimodalTextbox(interactive=False),
244
  ) + (disable_btn,) * 5
245
 
 
 
 
 
246
  try:
247
  # Stream output
248
  for ans in streamer:
@@ -252,11 +267,22 @@ def http_bot(
252
  ans = ans[0]
253
 
254
  state.update_message(Conversation.ASSISTANT, ans, None)
 
255
  yield (
256
  state,
257
  state.to_gradio_chatbot(),
258
  gr.MultimodalTextbox(interactive=False),
259
  ) + (disable_btn,) * 5
 
 
 
 
 
 
 
 
 
 
260
  except Exception as e:
261
  state.update_message(Conversation.ASSISTANT, server_error_msg, None)
262
  yield (
@@ -422,7 +448,7 @@ def build_demo():
422
  [
423
  {
424
  "files": [
425
- "gallery/1.jpg",
426
  ],
427
  "text": "Please help me describe the image.",
428
  }
@@ -430,7 +456,7 @@ def build_demo():
430
  [
431
  {
432
  "files": [
433
- "gallery/2.jpg",
434
  ],
435
  "text": "First please perform reasoning, and think step by step to provide best answer to the following question: Is this airplane taking off or landing?",
436
  }
@@ -438,7 +464,7 @@ def build_demo():
438
  [
439
  {
440
  "files": [
441
- "gallery/3.jpg",
442
  ],
443
  "text": "First please perform reasoning, and think step by step to provide best answer to the following question: What is the lamp on, a side table or a nightstand?",
444
  }
 
31
  get_model,
32
  )
33
 
34
+ # seed for reproducibility
35
+ import random
36
+ import numpy as np
37
+ import torch
38
+ seed = 42
39
+ random.seed(seed)
40
+ np.random.seed(seed)
41
+ torch.manual_seed(seed)
42
+ if torch.cuda.is_available():
43
+ torch.cuda.manual_seed_all(seed)
44
+ torch.backends.cudnn.deterministic = True
45
+ torch.backends.cudnn.benchmark = False
46
+
47
  logger = build_logger("dimple", "dimple.log")
48
 
49
  no_change_btn = gr.Button()
 
246
  )
247
 
248
  thread = threading.Thread(target=run_generate)
 
 
249
  logger.info(f"==== wait for first token ====\n")
250
  state.append_message(Conversation.ASSISTANT, state.streaming_placeholder)
251
  yield (
 
254
  gr.MultimodalTextbox(interactive=False),
255
  ) + (disable_btn,) * 5
256
 
257
+ num_steps = 0
258
+ start_tstamp = time.time()
259
+ thread.start()
260
+
261
  try:
262
  # Stream output
263
  for ans in streamer:
 
267
  ans = ans[0]
268
 
269
  state.update_message(Conversation.ASSISTANT, ans, None)
270
+ num_steps += 1
271
  yield (
272
  state,
273
  state.to_gradio_chatbot(),
274
  gr.MultimodalTextbox(interactive=False),
275
  ) + (disable_btn,) * 5
276
+ end_tstamp = time.time()
277
+ total_time = end_tstamp - start_tstamp
278
+ tps = int(max_new_tokens) / total_time
279
+ stat_info = f"\n\n[#Tokens: {int(max_new_tokens)}, #Steps: {int(num_steps)}, TPS: {tps:.2f} tokens/s, Latency: {total_time:.2f}s]"
280
+ state.update_message(Conversation.ASSISTANT, ans+stat_info, None)
281
+ yield (
282
+ state,
283
+ state.to_gradio_chatbot(),
284
+ gr.MultimodalTextbox(interactive=False),
285
+ ) + (disable_btn,) * 5
286
  except Exception as e:
287
  state.update_message(Conversation.ASSISTANT, server_error_msg, None)
288
  yield (
 
448
  [
449
  {
450
  "files": [
451
+ "gallery/1_resize.jpg",
452
  ],
453
  "text": "Please help me describe the image.",
454
  }
 
456
  [
457
  {
458
  "files": [
459
+ "gallery/2_resize.jpg",
460
  ],
461
  "text": "First please perform reasoning, and think step by step to provide best answer to the following question: Is this airplane taking off or landing?",
462
  }
 
464
  [
465
  {
466
  "files": [
467
+ "gallery/3_resize.jpg",
468
  ],
469
  "text": "First please perform reasoning, and think step by step to provide best answer to the following question: What is the lamp on, a side table or a nightstand?",
470
  }
gallery/1_resize.jpg ADDED

Git LFS Details

  • SHA256: 51e7b7e8afbc26df85f33e662099d9c8f0e93e888efb13c251447cda110b5b8d
  • Pointer size: 131 Bytes
  • Size of remote file: 106 kB
gallery/2_resize.jpg ADDED

Git LFS Details

  • SHA256: 8140b68ef6c750df8616189336d55a23d4434796242f59a5806d26affb774d70
  • Pointer size: 130 Bytes
  • Size of remote file: 51.7 kB
gallery/3_resize.jpg ADDED

Git LFS Details

  • SHA256: d63becfcf5fcce9e87c21786bd50123830eeab82f383c992bb4faedbb7a62d2c
  • Pointer size: 130 Bytes
  • Size of remote file: 45.2 kB
model.py CHANGED
@@ -5,8 +5,14 @@ from transformers import AutoProcessor, AutoModel, TextIteratorStreamer
5
  class FullSequenceStreamer(TextIteratorStreamer):
6
  def __init__(self, tokenizer, **kwargs):
7
  super().__init__(tokenizer, **kwargs)
 
 
 
8
 
9
  def put(self, value, stream_end=False):
 
 
 
10
  # Assume full token_ids are passed in every time
11
  decoded = self.tokenizer.batch_decode(value, **self.decode_kwargs)
12
  self.text_queue.put(decoded)
@@ -33,3 +39,17 @@ def get_model(device):
33
 
34
  return model, processor
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  class FullSequenceStreamer(TextIteratorStreamer):
6
  def __init__(self, tokenizer, **kwargs):
7
  super().__init__(tokenizer, **kwargs)
8
+ self.mask_token = tokenizer.mask_token_id
9
+ self.placeholder_token = tokenizer.convert_tokens_to_ids("_")
10
+ self.placeholder_token = tokenizer.encode("␣")[0]
11
 
12
  def put(self, value, stream_end=False):
13
+ # change mask tokens to space token
14
+ value = value.clone()
15
+ value[value == self.mask_token] = self.placeholder_token
16
  # Assume full token_ids are passed in every time
17
  decoded = self.tokenizer.batch_decode(value, **self.decode_kwargs)
18
  self.text_queue.put(decoded)
 
39
 
40
  return model, processor
41
 
42
+ from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
43
+
44
+ def get_qwen(device):
45
+
46
+ model_name = "Qwen/Qwen2-VL-7B-Instruct"
47
+ processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
48
+ model = Qwen2VLForConditionalGeneration.from_pretrained(
49
+ model_name,
50
+ torch_dtype=torch.bfloat16,
51
+ )
52
+ model = model.eval()
53
+ model = model.to(device)
54
+
55
+ return model, processor