cyrus28214 commited on
Commit
9fecce5
·
unverified ·
1 Parent(s): a033194
Files changed (2) hide show
  1. app.py +18 -43
  2. requirements.txt +2 -1
app.py CHANGED
@@ -1,64 +1,39 @@
1
  import gradio as gr
2
- from transformers import AutoProcessor, AutoModelForImageTextToText, TextIteratorStreamer
3
- from transformers.image_utils import load_image
4
  from threading import Thread
5
  import torch
6
  import spaces
7
 
8
  MODEL_ID = "csfufu/Revisual-R1-final"
9
  processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
10
- model = AutoModelForImageTextToText.from_pretrained(
11
- MODEL_ID,
12
- trust_remote_code=True,
13
- torch_dtype=torch.bfloat16
14
- ).to("cuda").eval()
15
 
16
  @spaces.GPU
17
  def respond(input_dict, history):
18
  text = input_dict["text"]
19
  files = input_dict["files"]
20
 
21
- all_images = []
22
- current_message_images = []
23
- messages = []
24
-
25
- for val in history:
26
- if val[0]:
27
- if isinstance(val[0], str):
28
- messages.append({
29
- "role": "user",
30
- "content": [
31
- *[{"type": "image", "image": image} for image in current_message_images],
32
- {"type": "text", "text": val[0]},
33
- ],
34
- })
35
- current_message_images = []
36
-
37
- else:
38
- # Load messages. These will be appended to the first user text message that comes after
39
- current_message_images = [load_image(image) for image in val[0]]
40
- all_images += current_message_images
41
-
42
- if val[1]:
43
- messages.append({"role": "assistant", "content": val[1]})
44
-
45
- current_message_images = [load_image(image) for image in files]
46
- all_images += current_message_images
47
- messages.append({
48
- "role": "user",
49
- "content": [
50
- *[{"type": "image", "image": image} for image in current_message_images],
51
- {"type": "text", "text": text},
52
- ],
53
- })
54
-
55
  prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
56
  inputs = processor(
57
  text=[prompt],
58
- images=all_images if all_images else None,
 
59
  return_tensors="pt",
60
  padding=True,
61
- ).to("cuda")
62
 
63
  streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
64
  generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)
 
1
  import gradio as gr
2
+ from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration, TextIteratorStreamer
3
+ from qwen_vl_utils import process_vision_info
4
  from threading import Thread
5
  import torch
6
  import spaces
7
 
8
  MODEL_ID = "csfufu/Revisual-R1-final"
9
  processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
10
+ model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
11
+ MODEL_ID, torch_dtype="auto", device_map="auto"
12
+ )
 
 
13
 
14
  @spaces.GPU
15
  def respond(input_dict, history):
16
  text = input_dict["text"]
17
  files = input_dict["files"]
18
 
19
+ messages = [
20
+ {
21
+ "role": "user",
22
+ "content": [
23
+ { "type": "text", "text": text },
24
+ *[{"type": "image", "image": image} for image in files]
25
+ ]
26
+ }
27
+ ]
28
+ image_inputs, video_inputs = process_vision_info(messages)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
30
  inputs = processor(
31
  text=[prompt],
32
+ images=image_inputs,
33
+ videos=video_inputs,
34
  return_tensors="pt",
35
  padding=True,
36
+ ).to(model.device)
37
 
38
  streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
39
  generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  huggingface_hub
2
  transformers
3
  torchvision
4
- pydantic==2.10.6
 
 
1
  huggingface_hub
2
  transformers
3
  torchvision
4
+ pydantic==2.10.6
5
+ qwen_vl_utils