cyrus28214 commited on
Commit
cde52cf
·
1 Parent(s): 1d23ed0
Files changed (1) hide show
  1. app.py +47 -20
app.py CHANGED
@@ -1,9 +1,21 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
 
 
 
3
  import spaces
4
- from transformers import pipeline
5
 
6
- pipe = pipeline("image-text-to-text", model="HuggingFaceTB/SmolVLM2-2.2B-Instruct")
 
 
 
 
 
 
 
 
 
 
7
 
8
  @spaces.GPU
9
  def respond(
@@ -16,35 +28,49 @@ def respond(
16
  ):
17
  messages = [{"role": "system", "content": system_message}]
18
 
 
 
 
19
  messages.extend(history)
20
 
21
- messages.append({"role": "user", "content": message})
 
 
 
22
 
23
- response = ""
 
 
 
 
24
 
25
- for message in pipe(
26
- messages,
27
- max_tokens=max_tokens,
28
- stream=True,
 
 
 
29
  temperature=temperature,
30
  top_p=top_p,
31
- ):
32
- token = message.choices[0].delta.content
33
 
34
- response += token
35
- yield response
36
 
 
 
 
 
37
 
38
- """
39
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
40
- """
41
  demo = gr.ChatInterface(
42
  respond,
43
  type='messages',
 
44
  additional_inputs=[
45
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
46
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
47
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
48
  gr.Slider(
49
  minimum=0.1,
50
  maximum=1.0,
@@ -53,8 +79,9 @@ demo = gr.ChatInterface(
53
  label="Top-p (nucleus sampling)",
54
  ),
55
  ],
 
 
56
  )
57
 
58
-
59
  if __name__ == "__main__":
60
- demo.launch()
 
1
  import gradio as gr
2
+ import torch
3
+ from PIL import Image
4
+ from threading import Thread
5
+ from transformers import AutoProcessor, AutoModelForVision2Seq, TextIteratorStreamer
6
  import spaces
 
7
 
8
+ device = "cuda" if torch.cuda.is_available() else "cpu"
9
+ torch_dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
10
+
11
+ MODEL_ID = "HuggingFaceTB/SmolVLM-256M-Instruct"
12
+ processor = AutoProcessor.from_pretrained(MODEL_ID)
13
+ model = AutoModelForVision2Seq.from_pretrained(
14
+ MODEL_ID,
15
+ torch_dtype=torch_dtype,
16
+ trust_remote_code=True
17
+ ).to(device)
18
+
19
 
20
  @spaces.GPU
21
  def respond(
 
28
  ):
29
  messages = [{"role": "system", "content": system_message}]
30
 
31
+ print(message)
32
+ print(history)
33
+
34
  messages.extend(history)
35
 
36
+ images = []
37
+ if message["files"]:
38
+ pil_image = Image.open(message["files"][0]).convert("RGB")
39
+ images.append(pil_image)
40
 
41
+ current_user_message = {"role": "user", "content": message["text"]}
42
+ messages.append(current_user_message)
43
+
44
+ prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
45
+ inputs = processor(text=prompt, images=images, return_tensors="pt").to(device, torch_dtype)
46
 
47
+ streamer = TextIteratorStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True)
48
+
49
+ generation_kwargs = dict(
50
+ **inputs,
51
+ streamer=streamer,
52
+ max_new_tokens=max_tokens,
53
+ do_sample=True,
54
  temperature=temperature,
55
  top_p=top_p,
56
+ )
 
57
 
58
+ thread = Thread(target=model.generate, kwargs=generation_kwargs)
59
+ thread.start()
60
 
61
+ response = ""
62
+ for new_text in streamer:
63
+ response += new_text
64
+ yield response
65
 
 
 
 
66
  demo = gr.ChatInterface(
67
  respond,
68
  type='messages',
69
+ multimodal=True,
70
  additional_inputs=[
71
+ gr.Textbox(value="You are a helpful and friendly multimodal assistant. You can analyze images and answer questions about them.", label="System message"),
72
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
73
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Temperature"),
74
  gr.Slider(
75
  minimum=0.1,
76
  maximum=1.0,
 
79
  label="Top-p (nucleus sampling)",
80
  ),
81
  ],
82
+ title="Chatbot",
83
+ description="Ask me anything or upload an image. This version uses AutoModel and AutoProcessor directly.",
84
  )
85
 
 
86
  if __name__ == "__main__":
87
+ demo.launch()