cyrus28214 commited on
Commit
12c4ba7
·
unverified ·
1 Parent(s): 4745ba0
Files changed (1) hide show
  1. app.py +5 -36
app.py CHANGED
@@ -5,7 +5,7 @@ from threading import Thread
5
  import torch
6
  import spaces
7
 
8
- MODEL_ID = "csfufu/Revisual-R1-final"
9
  processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
10
  model = AutoModelForImageTextToText.from_pretrained(
11
  MODEL_ID,
@@ -14,42 +14,15 @@ model = AutoModelForImageTextToText.from_pretrained(
14
  ).to("cuda").eval()
15
 
16
  @spaces.GPU
17
- def inference(input_dict, history):
18
  text = input_dict["text"]
19
  files = input_dict["files"]
20
 
21
- """
22
- Create chat history
23
- Example history value:
24
- [
25
- [('pixel.png',), None],
26
- ['ignore this image. just say "hi" and nothing else', 'Hi!'],
27
- ['just say "hi" and nothing else', 'Hi!']
28
- ]
29
- """
30
  all_images = []
31
  current_message_images = []
32
  messages = []
33
 
34
- for val in history:
35
- if val[0]:
36
- if isinstance(val[0], str):
37
- messages.append({
38
- "role": "user",
39
- "content": [
40
- *[{"type": "image", "image": image} for image in current_message_images],
41
- {"type": "text", "text": val[0]},
42
- ],
43
- })
44
- current_message_images = []
45
-
46
- else:
47
- # Load messages. These will be appended to the first user text message that comes after
48
- current_message_images = [load_image(image) for image in val[0]]
49
- all_images += current_message_images
50
-
51
- if val[1]:
52
- messages.append({"role": "assistant", "content": val[1]})
53
 
54
  current_message_images = [load_image(image) for image in files]
55
  all_images += current_message_images
@@ -61,11 +34,6 @@ def inference(input_dict, history):
61
  ],
62
  })
63
 
64
- #print(messages)
65
-
66
- """
67
- Generate and stream text
68
- """
69
  prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
70
  inputs = processor(
71
  text=[prompt],
@@ -86,7 +54,8 @@ def inference(input_dict, history):
86
  yield buffer
87
 
88
  demo = gr.ChatInterface(
89
- fn=inference,
 
90
  multimodal=True,
91
  )
92
 
 
5
  import torch
6
  import spaces
7
 
8
+ MODEL_ID = "TIGER-Lab/VL-Rethinker-7B"
9
  processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
10
  model = AutoModelForImageTextToText.from_pretrained(
11
  MODEL_ID,
 
14
  ).to("cuda").eval()
15
 
16
  @spaces.GPU
17
+ def respond(input_dict, chat_history):
18
  text = input_dict["text"]
19
  files = input_dict["files"]
20
 
 
 
 
 
 
 
 
 
 
21
  all_images = []
22
  current_message_images = []
23
  messages = []
24
 
25
+ messages.append(chat_history)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  current_message_images = [load_image(image) for image in files]
28
  all_images += current_message_images
 
34
  ],
35
  })
36
 
 
 
 
 
 
37
  prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
38
  inputs = processor(
39
  text=[prompt],
 
54
  yield buffer
55
 
56
  demo = gr.ChatInterface(
57
+ fn=respond,
58
+ type='messages',
59
  multimodal=True,
60
  )
61