Daemontatox commited on
Commit
df30043
·
verified ·
1 Parent(s): 8fbc686

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -121
app.py CHANGED
@@ -1,158 +1,100 @@
1
-
2
- import subprocess
3
-
4
- subprocess.run(
5
- 'pip install --force-reinstall --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git ',
6
- shell=True
7
- )
8
-
9
-
10
- subprocess.run(
11
- 'pip install unsloth_zoo',
12
- shell=True
13
- )
14
-
15
- from transformers import AutoTokenizer, TextStreamer
16
  from PIL import Image
 
17
  import torch
18
  from threading import Thread
19
  import gradio as gr
20
  from gradio import FileData
21
  import time
22
  import spaces
23
- from unsloth import FastVisionModel
24
-
25
-
26
- # Load model and tokenizer
27
  ckpt = "Daemontatox/DocumentLlama"
28
- model, tokenizer = FastVisionModel.from_pretrained(
29
- ckpt,
30
- load_in_4bit=True,
31
- use_gradient_checkpointing="unsloth",
32
- )
33
 
34
- # Enable inference mode
35
- FastVisionModel.for_inference(model)
36
 
37
  @spaces.GPU()
38
  def bot_streaming(message, history, max_new_tokens=2048):
 
39
  txt = message["text"]
40
- messages = []
 
 
41
  images = []
42
 
43
- # Process history
44
- for i, msg in enumerate(history):
45
  if isinstance(msg[0], tuple):
46
- messages.append({
47
- "role": "user",
48
- "content": [
49
- {"type": "text", "text": history[i+1][0]},
50
- {"type": "image"}
51
- ]
52
- })
53
- messages.append({
54
- "role": "assistant",
55
- "content": [{"type": "text", "text": history[i+1][1]}]
56
- })
57
  images.append(Image.open(msg[0][0]).convert("RGB"))
58
  elif isinstance(history[i-1], tuple) and isinstance(msg[0], str):
 
59
  pass
60
- elif isinstance(history[i-1][0], str) and isinstance(msg[0], str):
61
- messages.append({
62
- "role": "user",
63
- "content": [{"type": "text", "text": msg[0]}]
64
- })
65
- messages.append({
66
- "role": "assistant",
67
- "content": [{"type": "text", "text": msg[1]}]
68
- })
69
 
70
- # Handle current message
71
  if len(message["files"]) == 1:
72
- if isinstance(message["files"][0], str): # examples
 
73
  image = Image.open(message["files"][0]).convert("RGB")
74
- else: # regular input
75
  image = Image.open(message["files"][0]["path"]).convert("RGB")
76
  images.append(image)
77
- messages.append({
78
- "role": "user",
79
- "content": [
80
- {"type": "image"},
81
- {"type": "text", "text": txt}
82
- ]
83
- })
84
  else:
85
- messages.append({
86
- "role": "user",
87
- "content": [{"type": "text", "text": txt}]
88
- })
89
 
90
- # Prepare inputs
91
- input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
92
-
93
- if images:
94
- inputs = tokenizer(
95
- images[-1], # Use the last image
96
- input_text,
97
- add_special_tokens=False,
98
- return_tensors="pt"
99
- ).to("cuda")
100
- else:
101
- inputs = tokenizer(
102
- input_text,
103
- add_special_tokens=False,
104
- return_tensors="pt"
105
- ).to("cuda")
106
 
107
- # Setup streaming
108
- text_streamer = TextStreamer(tokenizer, skip_prompt=True)
109
- buffer = ""
 
 
 
 
110
 
111
- def generate():
112
- nonlocal buffer
113
- output_ids = model.generate(
114
- **inputs,
115
- streamer=text_streamer,
116
- max_new_tokens=max_new_tokens,
117
- use_cache=True,
118
- temperature=1.5,
119
- min_p=0.1
120
- )
121
 
122
- thread = Thread(target=generate)
123
  thread.start()
 
124
 
125
- for new_text in text_streamer:
126
  buffer += new_text
 
127
  time.sleep(0.01)
128
  yield buffer
129
 
130
- # Setup Gradio interface
131
- demo = gr.ChatInterface(
132
- fn=bot_streaming,
133
- title="Document Analyzer",
134
- examples=[
135
- [{"text": "Which era does this piece belong to? Give details about the era.", "files":["./examples/rococo.jpg"]}, 200],
136
- [{"text": "Where do the droughts happen according to this diagram?", "files":["./examples/weather_events.png"]}, 250],
137
- [{"text": "What happens when you take out white cat from this chain?", "files":["./examples/ai2d_test.jpg"]}, 250],
138
- [{"text": "How long does it take from invoice date to due date? Be short and concise.", "files":["./examples/invoice.png"]}, 250],
139
- [{"text": "Where to find this monument? Can you give me other recommendations around the area?", "files":["./examples/wat_arun.jpg"]}, 250],
140
- ],
141
- textbox=gr.MultimodalTextbox(),
142
- additional_inputs=[
143
- gr.Slider(
144
- minimum=10,
145
- maximum=500,
146
- value=2048,
147
- step=10,
148
- label="Maximum number of new tokens to generate",
149
- )
150
- ],
151
- cache_examples=False,
152
- description="MllM",
153
- stop_btn="Stop Generation",
154
- fill_height=True,
155
- multimodal=True
156
- )
157
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  demo.launch(debug=True)
 
1
+ from transformers import MllamaForConditionalGeneration, AutoProcessor, TextIteratorStreamer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  from PIL import Image
3
+ import requests
4
  import torch
5
  from threading import Thread
6
  import gradio as gr
7
  from gradio import FileData
8
  import time
9
  import spaces
 
 
 
 
10
  ckpt = "Daemontatox/DocumentLlama"
11
+ model = MllamaForConditionalGeneration.from_pretrained(ckpt,
12
+ torch_dtype=torch.bfloat16,load_in_4bit=True).to("cuda")
13
+ processor = AutoProcessor.from_pretrained(ckpt)
 
 
14
 
 
 
15
 
16
  @spaces.GPU()
17
  def bot_streaming(message, history, max_new_tokens=2048):
18
+
19
  txt = message["text"]
20
+ ext_buffer = f"{txt}"
21
+
22
+ messages= []
23
  images = []
24
 
25
+
26
+ for i, msg in enumerate(history):
27
  if isinstance(msg[0], tuple):
28
+ messages.append({"role": "user", "content": [{"type": "text", "text": history[i+1][0]}, {"type": "image"}]})
29
+ messages.append({"role": "assistant", "content": [{"type": "text", "text": history[i+1][1]}]})
 
 
 
 
 
 
 
 
 
30
  images.append(Image.open(msg[0][0]).convert("RGB"))
31
  elif isinstance(history[i-1], tuple) and isinstance(msg[0], str):
32
+ # messages are already handled
33
  pass
34
+ elif isinstance(history[i-1][0], str) and isinstance(msg[0], str): # text only turn
35
+ messages.append({"role": "user", "content": [{"type": "text", "text": msg[0]}]})
36
+ messages.append({"role": "assistant", "content": [{"type": "text", "text": msg[1]}]})
 
 
 
 
 
 
37
 
38
+ # add current message
39
  if len(message["files"]) == 1:
40
+
41
+ if isinstance(message["files"][0], str): # examples
42
  image = Image.open(message["files"][0]).convert("RGB")
43
+ else: # regular input
44
  image = Image.open(message["files"][0]["path"]).convert("RGB")
45
  images.append(image)
46
+ messages.append({"role": "user", "content": [{"type": "text", "text": txt}, {"type": "image"}]})
 
 
 
 
 
 
47
  else:
48
+ messages.append({"role": "user", "content": [{"type": "text", "text": txt}]})
 
 
 
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
+ texts = processor.apply_chat_template(messages, add_generation_prompt=True)
52
+
53
+ if images == []:
54
+ inputs = processor(text=texts, return_tensors="pt").to("cuda")
55
+ else:
56
+ inputs = processor(text=texts, images=images, return_tensors="pt").to("cuda")
57
+ streamer = TextIteratorStreamer(processor, skip_special_tokens=True, skip_prompt=True)
58
 
59
+ generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=max_new_tokens)
60
+ generated_text = ""
 
 
 
 
 
 
 
 
61
 
62
+ thread = Thread(target=model.generate, kwargs=generation_kwargs)
63
  thread.start()
64
+ buffer = ""
65
 
66
+ for new_text in streamer:
67
  buffer += new_text
68
+ generated_text_without_prompt = buffer
69
  time.sleep(0.01)
70
  yield buffer
71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
+ demo = gr.ChatInterface(fn=bot_streaming, title="Document Analyzer", examples=[
74
+ [{"text": "Which era does this piece belong to? Give details about the era.", "files":["./examples/rococo.jpg"]},
75
+ 200],
76
+ [{"text": "Where do the droughts happen according to this diagram?", "files":["./examples/weather_events.png"]},
77
+ 250],
78
+ [{"text": "What happens when you take out white cat from this chain?", "files":["./examples/ai2d_test.jpg"]},
79
+ 250],
80
+ [{"text": "How long does it take from invoice date to due date? Be short and concise.", "files":["./examples/invoice.png"]},
81
+ 250],
82
+ [{"text": "Where to find this monument? Can you give me other recommendations around the area?", "files":["./examples/wat_arun.jpg"]},
83
+ 250],
84
+ ],
85
+ textbox=gr.MultimodalTextbox(),
86
+ additional_inputs = [gr.Slider(
87
+ minimum=10,
88
+ maximum=500,
89
+ value=2048,
90
+ step=10,
91
+ label="Maximum number of new tokens to generate",
92
+ )
93
+ ],
94
+ cache_examples=False,
95
+ description="MllM ",
96
+ stop_btn="Stop Generation",
97
+ fill_height=True,
98
+ multimodal=True)
99
+
100
  demo.launch(debug=True)