cyrus28214 commited on
Commit
80cd182
·
unverified ·
1 Parent(s): cde52cf
Files changed (1) hide show
  1. app.py +25 -25
app.py CHANGED
@@ -3,6 +3,7 @@ import torch
3
  from PIL import Image
4
  from threading import Thread
5
  from transformers import AutoProcessor, AutoModelForVision2Seq, TextIteratorStreamer
 
6
  import spaces
7
 
8
  device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -26,46 +27,45 @@ def respond(
26
  temperature,
27
  top_p,
28
  ):
 
 
 
 
 
29
  messages = [{"role": "system", "content": system_message}]
30
 
31
  print(message)
32
  print(history)
33
 
34
- messages.extend(history)
35
-
36
- images = []
37
- if message["files"]:
38
- pil_image = Image.open(message["files"][0]).convert("RGB")
39
- images.append(pil_image)
40
-
41
- current_user_message = {"role": "user", "content": message["text"]}
42
- messages.append(current_user_message)
43
-
44
  prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
45
- inputs = processor(text=prompt, images=images, return_tensors="pt").to(device, torch_dtype)
46
-
47
- streamer = TextIteratorStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True)
 
 
 
48
 
49
- generation_kwargs = dict(
50
- **inputs,
51
- streamer=streamer,
52
- max_new_tokens=max_tokens,
53
- do_sample=True,
54
- temperature=temperature,
55
- top_p=top_p,
56
- )
57
 
58
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
59
  thread.start()
60
 
61
- response = ""
62
  for new_text in streamer:
63
- response += new_text
64
- yield response
65
 
66
  demo = gr.ChatInterface(
67
  respond,
68
- type='messages',
69
  multimodal=True,
70
  additional_inputs=[
71
  gr.Textbox(value="You are a helpful and friendly multimodal assistant. You can analyze images and answer questions about them.", label="System message"),
 
3
  from PIL import Image
4
  from threading import Thread
5
  from transformers import AutoProcessor, AutoModelForVision2Seq, TextIteratorStreamer
6
+ from transformers.image_utils import load_image
7
  import spaces
8
 
9
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
27
  temperature,
28
  top_p,
29
  ):
30
+ text = message["text"]
31
+ files = message["files"]
32
+ all_images = []
33
+ current_message_images = []
34
+
35
  messages = [{"role": "system", "content": system_message}]
36
 
37
  print(message)
38
  print(history)
39
 
40
+ current_message_images = [load_image(image) for image in files]
41
+ messages.append({
42
+ "role": "user",
43
+ "content": [
44
+ *[{"type": "image", "image": image} for image in current_message_images],
45
+ {"type": "text", "text": text},
46
+ ],
47
+ })
 
 
48
  prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
49
+ inputs = processor(
50
+ text=[prompt],
51
+ images=current_message_images if current_message_images else None,
52
+ return_tensors="pt",
53
+ padding=True,
54
+ ).to(device, torch_dtype)
55
 
56
+ streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
57
+ generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)
 
 
 
 
 
 
58
 
59
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
60
  thread.start()
61
 
62
+ buffer = ""
63
  for new_text in streamer:
64
+ buffer += new_text
65
+ yield buffer
66
 
67
  demo = gr.ChatInterface(
68
  respond,
 
69
  multimodal=True,
70
  additional_inputs=[
71
  gr.Textbox(value="You are a helpful and friendly multimodal assistant. You can analyze images and answer questions about them.", label="System message"),